diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/Android.common.mk mesa-11.0.0~git20150916+11.0.c4bae579/Android.common.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/Android.common.mk	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/Android.common.mk	2015-09-16 14:36:08.000000000 +0000
@@ -42,6 +42,7 @@
 	-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)
 
 LOCAL_CFLAGS += \
+	-D__STDC_LIMIT_MACROS \
 	-DHAVE___BUILTIN_EXPECT \
 	-DHAVE___BUILTIN_FFS \
 	-DHAVE___BUILTIN_FFSLL \
@@ -68,7 +69,16 @@
 endif
 endif
 
+ifeq ($(MESA_ENABLE_LLVM),true)
+LOCAL_CFLAGS += \
+	-DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2 \
+	-D__STDC_CONSTANT_MACROS \
+	-D__STDC_FORMAT_MACROS \
+	-D__STDC_LIMIT_MACROS
+endif
+
 LOCAL_CPPFLAGS += \
+	$(if $(filter true,$(MESA_LOLLIPOP_BUILD)),-D_USING_LIBCXX) \
 	-Wno-error=non-virtual-dtor \
 	-Wno-non-virtual-dtor
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/Android.mk	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/Android.mk	2015-09-16 14:36:08.000000000 +0000
@@ -24,7 +24,7 @@
 # BOARD_GPU_DRIVERS should be defined.  The valid values are
 #
 #   classic drivers: i915 i965
-#   gallium drivers: swrast freedreno i915g ilo nouveau r300g r600g radeonsi vmwgfx
+#   gallium drivers: swrast freedreno i915g ilo nouveau r300g r600g radeonsi vc4 vmwgfx
 #
 # The main target is libGLES_mesa.  For each classic driver enabled, a DRI
 # module will also be built.  DRI modules will be loaded by libGLES_mesa.
@@ -45,10 +45,8 @@
 MESA_COMMON_MK := $(MESA_TOP)/Android.common.mk
 MESA_PYTHON2 := python
 
-DRM_GRALLOC_TOP := hardware/drm_gralloc
-
 classic_drivers := i915 i965
-gallium_drivers := swrast freedreno i915g ilo nouveau r300g r600g radeonsi vmwgfx
+gallium_drivers := swrast freedreno i915g ilo nouveau r300g r600g radeonsi vmwgfx vc4
 
 MESA_GPU_DRIVERS := $(strip $(BOARD_GPU_DRIVERS))
 
@@ -80,6 +78,8 @@
 MESA_BUILD_GALLIUM := false
 endif
 
+MESA_ENABLE_LLVM := $(if $(filter radeonsi,$(MESA_GPU_DRIVERS)),true,false)
+
 # add subdirectories
 ifneq ($(strip $(MESA_GPU_DRIVERS)),)
 
@@ -89,13 +89,8 @@
 	src/glsl \
 	src/mesa \
 	src/util \
-	src/egl/main
-
-ifeq ($(strip $(MESA_BUILD_CLASSIC)),true)
-SUBDIRS += \
-	src/egl/drivers/dri2 \
+	src/egl \
 	src/mesa/drivers/dri
-endif
 
 ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
 SUBDIRS += src/gallium
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/bin/test-driver mesa-11.0.0~git20150916+11.0.c4bae579/bin/test-driver
--- mesa-10.6.5~git20150829+10.6.fa342251/bin/test-driver	2015-08-29 14:01:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/bin/test-driver	2015-09-16 14:36:55.000000000 +0000
@@ -3,7 +3,7 @@
 
 scriptversion=2013-07-13.22; # UTC
 
-# Copyright (C) 2011-2014 Free Software Foundation, Inc.
+# Copyright (C) 2011-2013 Free Software Foundation, Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -106,14 +106,11 @@
 # Test script is run here.
 "$@" >$log_file 2>&1
 estatus=$?
-
 if test $enable_hard_errors = no && test $estatus -eq 99; then
-  tweaked_estatus=1
-else
-  tweaked_estatus=$estatus
+  estatus=1
 fi
 
-case $tweaked_estatus:$expect_failure in
+case $estatus:$expect_failure in
   0:yes) col=$red res=XPASS recheck=yes gcopy=yes;;
   0:*)   col=$grn res=PASS  recheck=no  gcopy=no;;
   77:*)  col=$blu res=SKIP  recheck=no  gcopy=yes;;
@@ -122,12 +119,6 @@
   *:*)   col=$red res=FAIL  recheck=yes gcopy=yes;;
 esac
 
-# Report the test outcome and exit status in the logs, so that one can
-# know whether the test passed or failed simply by looking at the '.log'
-# file, without the need of also peaking into the corresponding '.trs'
-# file (automake bug#11814).
-echo "$res $test_name (exit status: $estatus)" >>$log_file
-
 # Report outcome to console.
 echo "${col}${res}${std}: $test_name"
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/ChangeLog mesa-11.0.0~git20150916+11.0.c4bae579/ChangeLog
--- mesa-10.6.5~git20150829+10.6.fa342251/ChangeLog	2015-08-29 14:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/ChangeLog	2015-09-16 14:36:57.000000000 +0000
@@ -1,1295 +1,1683 @@
-commit 0dc07acb5290fc00673e95f67e3b3cd5d858b7e8
-Author: Rico Tzschichholz <ricotz@ubuntu.com>
-Date:   Sat Aug 29 15:59:40 2015 +0200
+commit f67c992288a9c1d356f655106c129aa3f14aad79
+Author: Robert Hooker <sarvatt@ubuntu.com>
+Date:   Wed Sep 16 10:36:11 2015 -0400
 
-    Add debian tree from origin/ubuntu
+    Add debian tree from origin/ubuntu+1
 
-commit fa34225167396008e75e93f23696666caba8a7bf
+commit c4bae5792bb5515da42e23f166f5ba5d68f79615
 Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Sat Aug 22 11:00:47 2015 +0100
+Date:   Sat Sep 12 13:32:56 2015 +0100
 
-    docs: add sha256 checksums for 10.6.5
+    docs: add sha256 checksums for 11.0.0
     
     Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
 
-commit a43b3dd99bd4c114d0f3e90f4fd4792164fe7539
+commit 4f1e500150be2e82a2d7eb954f7198cc0c5cbec1
 Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Sat Aug 22 10:20:54 2015 +0100
+Date:   Sat Sep 12 10:33:49 2015 +0100
 
-    docs: add release notes for 10.6.5
+    docs: Update 11.0.0 release notes
     
     Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
 
-commit b9df15bef99c4c7a949070a8e065b0884645bc01
+commit bd460931037b4fc5c2b5ae9311aee4be3b261732
 Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Sat Aug 22 10:15:00 2015 +0100
+Date:   Sat Sep 12 10:21:51 2015 +0100
 
-    Update version to 10.6.5
+    Update version to 11.0.0(final)
     
     Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
 
-commit 76cc235e2b5605b9a9e93855c38e12cd19929acd
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Sat Aug 22 10:12:52 2015 +0100
+commit 766d11e8f07c3baf56d6398c14c8074313496ed1
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Tue Nov 11 23:16:13 2014 -0800
 
-    Revert "radeonsi: properly set the raster_config for KV"
+    glsl: Use hash tables for opt_constant_propagation() kill sets.
+    
+    Cuts compile/link time of the fragment shader in #91857 by 19%
+    (16.28 -> 13.05).
+    
+    I didn't bother with the acp sets because they're smaller, but it
+    might be worth doing as well.
     
-    This reverts commit 20bb0a771dded700ba1b213256bf47dfedbdfd77.
-    Requested-by: Alex Deucher <alexdeucher@gmail.com>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91857
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Tested-by: Tapani Pälli <tapani.palli@intel.com>
+    (cherry picked from commit 4654439fdd766f79a78fe0d812fd916f5815e7e6)
+    Nominated-by: Emil Velikov <emil.l.velikov@gmail.com>
 
-commit 4a2a49040e6891355a80b582932c5a3cffaa3c16
-Author: Renaud Gaubert <renaud@lse.epita.fr>
-Date:   Sat Jul 11 19:38:10 2015 +0200
+commit 5923bd6d782f20273827e482e3f6f73d5fd8af10
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sat Sep 5 00:51:33 2015 -0700
 
-    glsl: avoid compiler's segfault when processing operators with void arguments
+    i965: Use hash tables for brw_fs_vector_splitting().
     
-    This is done by returning an rvalue of type void in the
-    ast_function_expression::hir function instead of a void expression.
+    Cuts compile/link time of the fragment shader in #91857 by 25%
+    (21.64 -> 16.28).
     
-    This produces (in the case of the ternary) an hir with a call
-    to the void returning function and an assignment of a void variable
-    which will be optimized out (the assignment) during the optimization
-    pass.
+    v2: Drop unnecessary _mesa_hash_table_destroy call, and use
+        refs.ht->entries == 0 rather than ad-hoc checking (suggested by
+        Timothy Arceri).
     
-    This fix results in having a valid subexpression in the many
-    different cases where the subexpressions are functions whose
-    return values are void.
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91857
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Tested-by: Tapani Pälli <tapani.palli@intel.com>
+    (cherry picked from commit e20f30eb5181cddf8286d2247cfaf7e0fac7e417)
+    Nominated-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit d0cf5100b5564c2c8b06c733329248398e9ca6a3
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sat Sep 5 00:22:57 2015 -0700
+
+    glsl: Use hash tables in opt_constant_variable().
     
-    Thus preventing to dereference NULL in the following cases:
-      * binary operator
-      * unary operators
-      * ternary operator
-      * comparison operators (except equal and nequal operator)
+    Cuts compile/link time of the fragment shader in bug #91857 by 31%
+    (31.79 -> 21.64).  It has over 8,000 variables so linked lists are
+    terrible.
     
-    Equal and nequal had to be handled as a special case because
-    instead of segfaulting on a forbidden syntax it was now accepting
-    expressions with a void return value on either (or both) side of
-    the expression.
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91857
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Tested-by: Tapani Pälli <tapani.palli@intel.com>
+    (cherry picked from commit 2fc0ce293ac58237f02cc5dd2eee4e35abea06b5)
+    Nominated-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit e36ca8c2bb04eb6be6b6a378ed7990baa041e71b
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Wed Sep 2 16:06:58 2015 -0700
+
+    meta: Always bind the texture
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85252
+    We may have been called from glGenerateTextureMipmap with CurrentUnit
+    still set to 0, so we don't know when we can skip binding the texture.
+    Assume that _mesa_BindTexture will be fast if we're rebinding the same
+    texture.
     
-    Signed-off-by: Renaud Gaubert <renaud@lse.epita.fr>
-    Reviewed-by: Gabriel Laskar <gabriel@lse.epita.fr>
-    Reviewed-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
-    (cherry picked from commit 7b9ebf879b6f35038996805a641667f00d93c4b7)
-    Nominated-by: Mark Janes <mark.a.janes@intel.com>
+    v2: Remove currentTexUnitSave because it is now unused.  Suggested by
+    both Neil and Anuj.
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91847
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Neil Roberts <neil@linux.intel.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    (cherry picked from commit 767c33e88138afa64443417860b264a494eba33d)
 
-commit e9ab083702fb6be6444224074632b0d36e6a16da
-Author: Neil Roberts <neil@linux.intel.com>
-Date:   Mon Jul 13 18:01:13 2015 +0100
+commit 901744b2ff5ef4b6be6f6e8d9a886c5be0640508
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Sep 6 16:40:21 2015 +0200
 
-    i965/bdw: Fix setting the instancing state for the SGVS element
+    r600g: use pipe_resource::width0 instead pb_buffer::size
     
-    When gl_VertexID or gl_InstanceID is used a 3DSTATE_VF_SGVS
-    instruction is sent to create a sort of element to store the generated
-    values. The last instruction in this chunk of code looks like it was
-    trying to set the instancing state for the element using the
-    3DSTATE_VF_INSTANCING instruction. However it was sending
-    brw->vb.nr_buffers instead of the element index. This instruction is
-    supposed to take an element index and that is how it is used further
-    down in the function so the previous code looks wrong. Perhaps
-    previously the number of buffers coincidentally matched the number of
-    enabled elements so the value was generally correct anyway. In a
-    subsequent patch I want to change a bit how it chooses the SGVS
-    element index so this needs to be fixed.
+    pb_buffer::size was aligned by 29aaab2b5f55cc6d9a84f58ce2bb8607e76a9dde,
+    which broke the CMASK code I think.
     
-    v2 [by Ben]
-    Remove stable 10.5 stable tag (it's too late now)
-    Commit update as follows:
-    The number of vertex buffers emitted is always <= the number of vertex elements.
-    To maximize reuse (actually, to minimize relocations - according to the code
-    comments), a vertex buffer is only emitted once, even when we setup multiple
-    components (3DSTATE_VERTEX_ELEMENT) from that buffer. This meant that the
-    previous code would use the wrong indexed element for these reuse cases. This
-    patch by itself prevents hangs on BSW in the linked bug. It doesn't make the
-    test pass, the remaining patches are needed for that.
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91881
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91610
-    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
-    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
-    Tested-by: Mark Janes <mark.a.janes@intel.com>
-    Cc: <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit c03247bae010dfd81a08572a32067e9ea8637f63)
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+    (cherry picked from commit 5c6c5b524649997805d0128d4df9dda5e8567cbb)
 
-commit e57c526b8705d06f305b3d758edd0312316650a1
-Author: Adam Jackson <ajax@redhat.com>
-Date:   Fri Jul 31 11:32:58 2015 -0400
+commit c62f82980c55a96189d1b32b4e56e6cdc67c3e81
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Sep 2 19:05:09 2015 +0200
 
-    glx: Fix __glXWireToEvent for BufferSwapComplete
+    radeonsi: enable VGPR spilling on VI
     
-    In the DRI2 path this event is magically synthesized from the
-    corresponding DRI2 event, but with Present, the server sends us the
-    event itself. The DRI2 path fills in the serial number, send_event, and
-    display fields of the XEvent struct that the app sees, but the Present
-    path did not.
+    This fixes corruption in Unigine Heaven on VI
     
-    This is likely related to a class of crashes seen in gtk/clutter apps:
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    (cherry picked from commit 7956eae1c76e298ca1ded46679c1a9bf875ec4ee)
+
+commit 151f84f2db398fa5e165daa048cfd598970bdef8
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Sep 2 19:04:25 2015 +0200
+
+    winsys/amdgpu: calculate the maximum number of compute units
     
-    https://bugzilla.redhat.com/attachment.cgi?id=1032631
+    Required for register spilling.
     
-    Note that the crashing instruction is looking up the lock_fns slot in
-    the Display *, and %rdi (holding the Display *) is 0x1.
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    (cherry picked from commit c6502e880bba00f8a68f004fe6be7a4bc275494a)
+
+commit 7d79ad95fd629730dc7bb8e56e4273505adbcf4b
+Author: Albert Freeman <albertwdfreeman@gmail.com>
+Date:   Tue Sep 8 13:06:40 2015 +0000
+
+    clover: Avoid using typename to allow compilation of clover by clang
     
-    Cc: mesa-stable@lists.freedesktop.org
-    Signed-off-by: Adam Jackson <ajax@redhat.com>
-    Reviewed-by: Eric Anholt <eric@anholt.net>
-    (cherry picked from commit 8f7ebcb6fad53ea6d2f80fc5b7a046db07690032)
+    When parsing an variable declaration qualified with the typename
+    keyword, clang attempted to declare a variable with the type of non
+    type member "enum type type" of module::argument (within the header
+    file clover/core/module.hpp) instead of the typed member of
+    module::argument "enum type".
+    
+    Replaced "typename" with "enum" to force clang to declare the variable
+    marg_type with type "enum type" of module::argument.
+    
+    CC: "11.0" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Signed-off-by: Albert Freeman <albertwdfreeman@gmail.com>
+    (cherry picked from commit 1691ead1b8ae4018a805af58977a43ef90af4203)
 
-commit 69649ea637c0729b72969b1f466d86708b95bccc
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sat Aug 15 22:05:15 2015 -0400
+commit 2becc9864598eddc47c21f7d858f0d9c12894172
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Tue Sep 8 15:41:11 2015 -0700
 
-    nv50,nvc0: take level into account when doing eng2d multi-layer blits
+    i965: Advertise 65536 for GL_MAX_UNIFORM_BLOCK_SIZE.
     
-    This fixes arb_get_texture_sub_image-get, and any situation where the 2d
-    engine was being used for multi-layer blits to a non-0 level.
+    Our old value of 16384 is the minimum value.  DirectX apparently
+    requires 65536 at a minimum; that's also what nVidia and the Intel
+    Windows driver advertise.  AMD advertises MAX_INT.
+    
+    Ilia Mirkin noticed that "Shadow Warrior" uses UBOs larger than 16k
+    on Nouveau, which advertises 65536 bytes for this limit.  Traces
+    captured on Nouveau don't work on i965 because our lower limit causes
+    the GLSL linker to reject the captured shaders.  While this isn't
+    important in and of itself, it does suggest that raising the limit
+    would be beneficial.
+    
+    We can read linear buffers up to 2^27 bytes in size, so raising this
+    should be safe; we could probably even go larger.  For now, matching
+    nVidia and Intel/Windows seems like a good plan.
+    
+    We have to reinitialize MaxCombinedUniformComponents as core Mesa will
+    have set it based on a stale value for MaxUniformBlockSize.
+    
+    According to Tapani, there's an unreleased game that asserts on this.
     
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit bf58a2c362d5afdba512f40b3eb300154201c7f0)
+
+commit 7cca7f71dadd43fbd92ccb3772c66a802cb31267
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Sep 10 05:02:26 2015 -0400
+
+    nv50/ir: don't fold immediate into mad if registers are too high
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91551
     Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 2514c78fba507ca8ab94d2e6de553b8b20d653d2)
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit 74b86b971f3bf9b0482341b07c1cbc2e520fb1d0)
 
-commit 0a831196665f8a9c2c5001f9c1890e073bac9f14
+commit 94b8f60146f47709ff22a4f76ecf044176dd6a3d
 Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Fri Aug 14 14:10:36 2015 -0400
+Date:   Thu Sep 10 03:55:06 2015 -0400
 
-    gm107/ir: indirect handle goes first on maxwell also
+    nv50/ir: fix emission of 8-byte wide interp instruction
     
-    Fixes fs-simple-texture-size.shader_test
+    This can come up if the target register number is > 63, which is fairly
+    rare.
     
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91551
     Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit b346a84e270a50f0a8f1a6e474a51da04dd72f0e)
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit ce28ca713364dbe83cb3c371ca034bc2c2947616)
 
-commit 20bb0a771dded700ba1b213256bf47dfedbdfd77
-Author: Alex Deucher <alexander.deucher@amd.com>
-Date:   Wed Jun 10 11:39:30 2015 -0400
+commit 94bf2e2e0551413c149950cb924c0c33c3a330d2
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Sep 10 03:49:36 2015 -0400
 
-    radeonsi: properly set the raster_config for KV
+    nv50/ir: r63 is only 0 if we are using less than 63 registers
     
-    This enables the second RB on asics that support it which
-    should boost performance.
+    It is advantageous to use r63 instead of r127 since r63 can fit into the
+    shorter encoding. However if we've RA'd over 63 registers, we must use
+    r127 as the replacement instead.
     
-    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
-    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit 649975e7162cc4ee0586ee76d24321cd7250581f)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit 641eda0c792e10c2792730b1833353564479a557)
 
-commit 16c65ec37f5554041fa3dd8238d435041ac38526
-Author: Frank Binns <frank.binns@imgtec.com>
-Date:   Tue Aug 4 14:32:45 2015 +0100
+commit 78612aba51f0b7bb07419ed4b54c2d878b5e7b65
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Sep 10 01:54:30 2015 -0400
 
-    egl/x11: don't abort when creating a DRI2 drawable fails
-    
-    When calling either eglCreateWindowSurface or eglCreatePixmapSurface it
-    was possible for an application to be aborted as a result of it failing
-    to create a DRI2 drawable on the server. This could happen due to an
-    application passing in an invalid native drawable handle, for example.
+    nv50/ir: make edge splitting fix up phi node sources
     
-    v2: Handle the case where an error has been set on the connection
+    Unfortunately nv50_ir phi nodes aren't directly connected to the CFG, so
+    the mapping between source and the actual BB is by inbound edge order.
+    So when manipulating edges one has to be extremely careful. We were
+    insufficiently careful when splitting critical edges which resulted in
+    the phi nodes being confused as to where their sources were coming from.
+    
+    This primarily manifests itself with the TXL-lowering logic on nv50,
+    when it is inside of a conditional. I've been unable to trigger the
+    issue anywhere else so far. This resolves rendering failures
+    in a number of games like Two Worlds 2, Trine: Enchanted Edition, Trine 2,
+    XCOM:Enemy Unknown, Stacking. It also improves the situation in
+    Hearthstone, Sonic Generations, and The Raven: Legacy of a Master Thief.
+    However more work needs to be done there (splitting a lot more edges
+    solves it, so it's some other sort of RA-related issue).
     
-    Cc: <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Frank Binns <frank.binns@imgtec.com>
-    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
-    (cherry picked from commit 9a4eae61c24858d69d731d63b141d2acaed40d69)
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90887
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit a072ef8748a65d286e9b542bb9ea6e020fdcc7f8)
 
-commit 23bbe418fcce69447fb425012e9ac8d149fb5455
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Tue Aug 11 21:37:59 2015 +0200
+commit 0878187488008facccbdae1b0e5258234a2b9dd4
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Sep 9 21:50:03 2015 -0400
 
-    r600g: allow setting geometry shader sampler states
+    nvc0: remove BGRA4 format support
     
-    We were ignoring them. This is both hilarious and sad.
+    Something is wrong with the support somewhere. I couldn't get the blob
+    driver to use it either, although it happily used RGB5_A1.
+    teximage-colors works, but WoW seems to fail in the menus for drawing
+    text.
     
-    Cc: mesa-stable@lists.freedesktop.org
-    Reviewed-by: Edward O'Callaghan <eocallaghan at alterapraxis.com>
-    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
-    (cherry picked from commit 8c0b943e87b48e7359230825cc06fbdd059a9e58)
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91526
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit 342e68dc60eebb20ac1be9f47800ee9e604354f0)
 
-commit f40be8799671e1195274ae846cd329d7a71c80bb
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Tue Aug 11 22:36:51 2015 +0200
+commit 4ae2ffbff142de4532b8e7222e4620c84dee3652
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Sep 9 03:17:38 2015 -0400
 
-    r600g: fix polygon offset scale
+    nvc0: keep track of cb bindings per buffer, use for upload settings
     
-    The value was copied from r300g, which uses 1/12 subpixels, but this hw
-    uses 1/16 subpixels.
+    CB updates to bound buffers need to go through the CB_DATA endpoints,
+    otherwise the shader may not notice that the updates happened.
+    Furthermore, these updates have to go in to the same address as the
+    bound buffer, otherwise, again, the shader may not notice updates.
     
-    Should fix piglit: gl-1.4-polygon-offset (formerly a glean test)
-    (untested, ported from radeonsi)
+    So we keep track of all the places where a constbuf is bound, and
+    iterate over all of them when updating data. If a binding is found that
+    encompasses the region to be updated, then we use the settings of that
+    binding for the upload. Otherwise we upload as a regular data update.
     
-    Reviewed-by: Edward O'Callaghan <eocallaghan at alterapraxis.com>
-    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit d335aad11b208bcdcc75a99d4b6c5fc8b69ce368)
+    This fixes piglit 'arb_uniform_buffer_object-rendering offset' as well
+    as blurriness in Witcher2.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91890
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit e50c01d5af305e07110cb4a38d5a655437058f04)
 
-commit e7e38e11c3ec7ae03d46451ce45b7226a56ac25c
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Tue Aug 11 22:36:51 2015 +0200
+commit b0578c0061275900b4ff2ffbe018fd039cb3ee9e
+Author: Hans de Goede <hdegoede@redhat.com>
+Date:   Wed Sep 9 15:52:09 2015 +0200
 
-    radeonsi: fix polygon offset scale
+    nv30: Disable msaa unless requested from the env by NV30_MAX_MSAA
     
-    The value was copied from r300g, which uses 1/12 subpixels, but this hw
-    uses 1/16 subpixels.
+    Some modern apps try to use msaa without keeping in mind the
+    restrictions on videomem of older cards. Resulting in dmesg saying:
     
-    Fixes piglit: gl-1.4-polygon-offset (formerly a glean test)
+     [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
+     [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
+     [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12
     
-    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit bfac8ba9d32be351277c7ea814ac9848bdcb1f16)
+    Because we are running out of video memory, after which the program
+    using the msaa visual freezes, and eventually the entire system freezes.
+    
+    To work around this we do not allow msaa visauls by default and allow
+    the user to override this via NV30_MAX_MSAA.
+    
+    Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+    [imirkin: move env var lookup to screen so that it's only done once]
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    
+    (cherry picked from commit 3e9df0e3af7a8a84147ae48f588e9c435bf65b98)
 
-commit b7a8003c588d928c1be224b595a1c43c76f67de6
-Author: Oded Gabbay <oded.gabbay@gmail.com>
-Date:   Wed Aug 12 18:22:53 2015 +0300
+commit b3dfd67feb21ff6ca15cfd1b57623d5b09cb2238
+Author: Hans de Goede <hdegoede@redhat.com>
+Date:   Wed Sep 9 15:52:08 2015 +0200
 
-    mesa/formats: don't byteswap when building array formats
+    nv30: Fix color resolving for nv3x cards
     
-    Because we build here an array format, we don't need to swap the
-    bytes for big endian.
-    If it isn't an array format, the bytes will be swapped in
-    _mesa_format_convert.
+    We do not have a generic blitter on nv3x cards, so we must use the
+    sifm object for color resolving.
     
-    v2: remove temp variable
+    This commit divides the sources and dest surfaces in to tiles which
+    match the constraints of the sifm object, so that color resolving
+    will work properly on nv3x cards.
     
-    Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
-    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 5f1d5b1c7857f8680b47a7a450ee9e4530e22c6f)
+    Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit ac066bf65cb585a4f6b4a2fb1d055b033f2b94ae)
 
-commit d18593b4160d75d21e8e5849aeb021514bd77b35
-Author: Jason Ekstrand <jason.ekstrand@intel.com>
-Date:   Mon Aug 10 01:32:23 2015 -0700
+commit 017085efafa5196ac77afffff051b06bcdeb9b07
+Author: Mauro Rossi <issor.oruam@gmail.com>
+Date:   Fri Aug 21 23:46:29 2015 +0200
 
-    mesa/formats: Don't flip channels of null array formats
+    android: Always define __STDC_LIMIT_MACROS.
     
-    Before, if we encountered an array format of 0 on a BE system, we would
-    flip all the channels even though it's an invalid format.  This would
-    result in a mostly invalid format with a swizzle of yyyy or wwww.  Instead,
-    we should just return 0 if the array format stashed in the format info is
-    invalid.
+    Analogous to commit 02a4fe22b13 (configure.ac: Always define
+    __STDC_LIMIT_MACROS.)
     
-    Cc: "10.6 10.5" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit e3eb91af804f449005a2ff535c805eaa1d579d99)
+    v2: [Emil Velikov] keep the LLVM specific __STDC_FORMAT_MACROS
+    
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 8056b3ffeb0cdca890cf9cde05dcd5afff4c50fc)
 
-commit 096282a662a303c24f5de5d8a0eeb16239f0c537
-Author: Jason Ekstrand <jason.ekstrand@intel.com>
-Date:   Sun Aug 9 23:45:44 2015 -0700
+commit 9e3528a84451e03687602fba0e12d451c3a50b6a
+Author: Mauro Rossi <issor.oruam@gmail.com>
+Date:   Fri Aug 21 23:46:28 2015 +0200
 
-    mesa/formats: Fix swizzle flipping for big-endian targets
+    android: rename LLVM_VERSION_PATCH to MESA_LLVM_VERSION_PATCH
     
-    The swizzle defines where in the format you should look for any given
-    channel.  When we flip the format around for BE targets, we need to change
-    the destinations of the swizzles, not the sources.  For example, say the
-    format is an RGBX format with a swizzle of xyz1 on LE.  Then it should be
-    wzy1 on BE;  however, the code as it was before, would have made it 1zyx on
-    BE which is clearly wrong.
+    Fixes: 797f4eacea8(configure.ac: rename LLVM_VERSION_PATCH to avoid
+    conflict with llvm-config.h)
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
     
-    Reviewed-by: Iago Toral <itoral@igalia.com>
-    Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
-    Cc: "10.6 10.5" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 28d1a506c8d09fa66170978c85566c34cbf1cc0a)
+    (cherry picked from commit 5235bfe7b709d5cf0fcd38dff43a97909cfbc38c)
 
-commit c364a00cf957f4e0b5e4d039f1736f54c57e7fde
-Author: Jason Ekstrand <jason.ekstrand@intel.com>
-Date:   Sat Aug 8 09:00:21 2015 -0700
+commit 84060d35bb53d191a8b2297877d66c5a8e9113a3
+Author: Mauro Rossi <issor.oruam@gmail.com>
+Date:   Fri Aug 21 23:46:27 2015 +0200
 
-    mesa/formats: Only do byteswapping for packed formats
+    nouveau: android: add space before PRIx64 macro
     
-    Reviewed-by: Iago Toral <itoral@igalia.com>
-    Cc: "10.6 10.5" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 3941539179b72fe25b6dffd1aacc0722d198a5ca)
+    Otherwise the android build fails with
+    
+       error : unable to find string literal operator ‘operator"" PRIx64’
+    
+    There are several resources referring to the problem, which is related
+    to c++11, in our case used when building mesa for lollipop.
+    
+    http://comments.gmane.org/gmane.comp.graphics.opensg.user/5883
+    
+    I've not investigated all the semantics, some people even suggested a
+    bug in the gcc compiler,
+    I just saw the building error was solved with one little space for
+    lollipop and no side effect when c+11 not used.
+    
+    v2: [Emil Velikov] add an alternative commit message from Mauro.
+    
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit e838d91b94c3d1d20db62a61bfd9163f675d3139)
 
-commit e5a198e4dd4771621e70f8a4c8f685e05a3cb22f
-Author: Alex Deucher <alexander.deucher@amd.com>
-Date:   Mon Aug 10 15:35:21 2015 -0400
+commit 2c581d04ccc7fcc4104d9ffd05f105623fdfb3bb
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 17 10:52:35 2015 +0100
 
-    radeonsi: add new OLAND pci id
+    auxiliary: rework the python generated sources rules
     
-    Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
-    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
-    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit 87cea61b9e2681e5365e989c7fa7a0298e4005fa)
+    There are a few bits this commit aims to resolve:
+    
+    One can generalise the mkdir rule to a simple MKDIR_P $(@D) which will
+    expand appropriately for even if we change the subdir name, and/or add
+    new rules. We can also drop the explicit $(srcdir) prefix for the
+    dependency rules, they they are not strictly required, nor used
+    elsewhere in mesa.
+    
+    Finally replace $< with explicit filename to be consistent through the
+    file, and honour PYTHON_FLAGS.
+    
+    v2: Add comprehensive commit summary/message (Ian, Matt)
+    
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 0d39279448bbda6e824bcfd4997b4583bc0481af)
 
-commit 0a7202385d6c129164feb151deec99a0e43ed7bf
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Mon Aug 10 17:41:36 2015 -0400
+commit a1ac93fc4bbfa15c3aefc78250787cd7dcd54233
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Sep 9 12:40:03 2015 +0100
 
-    nouveau: no need to do tnl wakeup, state updates are always hooked up
+    glsl: build: remove bogus dependency
     
-    A TNL state update now requires a DrawBuffer to be set, which it isn't
-    early on in context creation. Since we init swtnl from context init,
-    this caused crashes.
+    v2: rebase on top of the previous commit - don't touch the LOCAL_PATH
+    prefix for nir_constant_expressions.h
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91570
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 3fa1ca34cc0134bd16b3315a0695703c9f684bd4)
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    (cherry picked from commit c373eaedfc09ff2af7002b64ba0ae8ba71df86a1)
 
-commit d706b00522e23704ea1cee91fd0d5e7dedccca9e
-Author: Oded Gabbay <oded.gabbay@gmail.com>
-Date:   Tue Aug 4 21:39:32 2015 +0300
+commit 1f2b601f8b5855f1c01d81527d0692946c16791b
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 17 13:30:51 2015 +0100
 
-    mesa: clear existing swizzle info before bitwise-OR
+    glsl: build: use makefile.sources variables when possible
     
-    This patch fixes a bug in big-endian treatment, where the previous
-    swizzle info wasn't cleared before a new swizzle info was inserted into
-    the format field using a bitwise-OR operation.
+    Rather than folding one variable within the other only to unwrap them,
+    just use the ones we need.
     
-    v2: use MESA_ARRAY_FORMAT_SWIZZLE_*_MASK instead of numeric constants
-    v3: align according to coding style
+    v2: bring back LOCAL_PATH prefix for nir_constant_expressions,h
     
-    Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
-    CC: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
-    (cherry picked from commit 2ac171a7db4e4ad2fa902e62bf18bc1f67e91643)
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com> (v1)
+    (cherry picked from commit a3b05e04921a4fcc05cfc994e415e3ceb39fd184)
 
-commit d02bb82d52da5029ae2e2cb77f6d2568f5ed425b
+commit 4ca5756766d538a30a12d37ec88f85c679da0c1f
 Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Fri Jul 17 12:52:27 2015 +0100
+Date:   Fri Jul 17 13:28:00 2015 +0100
 
-    vc4: add missing nir include, to fix the build
+    glsl: automake: reuse $(NIR_GENERATED_FILES) where possible
     
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
     Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Reviewed-by: Eric Anholt <eric@anholt.net>
-    (cherry picked from commit 75ce7919d6496981013a21a7055c668e47e7bed2)
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    (cherry picked from commit da5e4559ee3b239d2483645ed54b35aa6628fbaf)
 
-commit 3ebf4afbf704dd7f4b537694a6641de32f856669
-Author: Jason Ekstrand <jason.ekstrand@intel.com>
-Date:   Thu Jul 23 17:26:56 2015 -0700
+commit 7023899ab99efc85f0f99aa9b96d9d1305e99dcb
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Sep 9 12:28:37 2015 +0100
 
-    meta/copy_image: Stash off the scissor
+    glsl: automake: rework the sources generation rules
     
-    The meta CopyImageSubData path uses BlitFramebuffers to do the actual copy.
-    The only thing that can affect BlitFramebuffers other than the currently
-    bound framebuffers is the scissor so we need to save that off and reset it.
-    If we don't do this, applications that use a scissor together with
-    CopyImageSubData will get accidentally scissored copies.
+    The glsl equivalent of "mesa: automake: rework the source generation
+    rules". Plus let's make things consistent and always explicitly provide
+    the header name.
     
-    Tested-by: Markus Wick <markus at selfnet.de>
-    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
-    (cherry picked from commit 736c6f3cfc2c69e3c29268d4ebb7110dd36ac97f)
+    v2: Rebase on top of reverted "remove custom AM_V_LEX/YACC" (Matt)
+    
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 9e0594418d8fa47e19bfe57450198d3fa7d087a0)
 
-commit 99793e2541510fe208d29e69fedf97a6fff006f8
+commit 2190f218ad2197d98fe9ebd8e1cf651e39d1120f
 Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Tue Aug 11 18:54:18 2015 +0100
+Date:   Fri Jul 17 10:44:30 2015 +0100
 
-    docs: add sha256 checksums for 10.6.4
+    mesa: automake: rework the source generation rules
+    
+    Same logic as previous commit applies.
+    
+    Additionally remove the odd (set -e/mv/INDENT) from the rules.
+    The last one is the only one we remotely care about, if reading the
+    generated sources.
+    
+    Upcoming work from DylanB which will replace the existing python
+    scripts with ones that produce more readable output anyway.
     
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
     Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    (cherry picked from commit fd913f47b7fcc724d8d191f2752f328d037abb20)
 
-commit 6b2fcee64edadbd4db2293f5f4fc1a70e80c7251
+commit 2c27775a4434b7133ccda223e34e36a9a8cd4c6a
 Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Tue Aug 11 16:39:10 2015 +0100
+Date:   Fri Jul 17 10:27:29 2015 +0100
 
-    docs: add release notes for 10.6.4
+    mapi: automake: rework the source generation rules
+    
+    Same logic as previous commit applies. Also fix bogus MESA_MAPI_DIR -
+    the sources are located in the source dir (duh).
     
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
     Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    (cherry picked from commit 96509aa80429db1884a78fae95c169aa40641e84)
 
-commit 95ecedf6d9af87b98aa07112048f495c964bd4cf
+commit b7b8d4982d62974bb30e8bb82b91e43cc5f2a699
 Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Tue Aug 11 16:35:06 2015 +0100
+Date:   Wed Sep 9 12:14:00 2015 +0100
 
-    Update version to 10.6.4
+    mapi: automake: rework the *api/glapi_mapi_tmp.h rules
+    
+    Same logic as previous commit applies.
     
+    v2: Merge with "inline glapi_gen_mapi define" (Matt)
+    
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
     Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    (cherry picked from commit 449ce5d64f3d0e5840287040755df23e86ce6bb2)
 
-commit 736f6e16d9989f01cc55dcba15ba978ba90b7748
-Author: Francisco Jerez <currojerez@riseup.net>
-Date:   Wed Aug 5 16:29:30 2015 +0300
+commit 0d1f600c946d17e88ef5a2377cefd6db77ea872a
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 17 10:21:06 2015 +0100
 
-    i965/fs: Fix fs_inst::regs_read() for sources in the ATTR file.
+    util: automake: rework the format_srgb.c rule
     
-    Otherwise it would crash on Gen8 with scalar VS.  The issue can easily
-    be reproduced with the following patch, but I don't see any reason why
-    it wouldn't be possible to end up with an ATTR argument here even
-    without it.
+    A handful of changes/cleanups paving the way to bmake support:
+     - Remove optional $(srcdir)/ prefix for files in the prereq list.
+     - Drop the space after the AM_V_GEN variable.
+     - Using $< in a non-suffix rule is a GNU make idiom.
+     - Use $(@D) over $(dir $@). The latter is a POSIX standard.
     
-    CC: mesa-stable@lists.freedesktop.org
-    Reviewed-by: Connor Abbott <connor.w.abbott@intel.com>
-    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
-    (cherry picked from commit 42a18ca76057621ae7d8812b29ea2245d6ff282d)
+    v2: Cosmetic tweaks in the commit summary.
+    
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com> (v1)
+    (cherry picked from commit d65bd7a7be48d7805f68cd45218794f3e4590408)
 
-commit f13ba8a5ab537e6dcdcc8b0c1a814012202d2497
-Author: Eduardo Lima Mitev <elima@igalia.com>
-Date:   Wed Jul 29 16:01:28 2015 +0200
+commit 0c9f66829c24a0102ee38fc222fb8cbe5d6c177b
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jul 15 10:28:05 2015 +0100
 
-    mesa: Fix error returned by glCopyTexImage2D() upon an invalid internal format
-    
-    Page 161 of the OpenGL-ES 3.1 (PDF) spec, and page 207 of the OpenGL 4.5 (PDF),
-    both on section '8.6. ALTERNATE TEXTURE IMAGE SPECIFICATION COMMANDS', states:
+    xmlpool: 'promote' LOCALEDIR variable
     
-        "An INVALID_ENUM error is generated if an invalid value is specified for
-         internalformat".
+    This is the only place in mesa that uses this constuct which seems
+    to be GNUmake-ism. Attempting to build with POSIX make implementations
+    (bmake) would fail as below.
     
-    It is currently returning INVALID_OPERATION error because
-    _mesa_get_read_renderbuffer_for_format() is called before the internalformat
-    argument has been validated. To fix this, we move this call down the validation
-    process, after _mesa_base_tex_format() has been called. _mesa_base_tex_format()
-    effectively serves as a validator for the internal format.
+    --- options.h ---
+    LOCALEDIR := .
+    sh: line 2: LOCALEDIR: command not found
+    *** [options.h] Error code 127
     
-    Fixes 1 dEQP test:
-    * dEQP-GLES3.functional.negative_api.texture.copyteximage2d_invalid_format
+    So let's keep things consistent and compatible by making the variable
+    non target specific.
     
-    Fixes 1 piglit test:
-    * spec@oes_compressed_etc1_rgb8_texture@basic
+    v2:
+     - Bring back LOCALEDIR.
+     - Reword the commit message
+     - Change mesa-stable tag 10.6 > 11.0
     
-    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 4b07e9a033ddb6733eba206b5bd47a2373756f7d)
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Cc: Jonathan Gray <jsg@jsg.id.au>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit c8984a7a4686c2045666d32fbe5733ff5a5c3bd8)
 
-commit 791cf8a025ac0d610596cdfab17fc84b49df2288
-Author: Eduardo Lima Mitev <elima@igalia.com>
-Date:   Wed Jul 29 16:01:26 2015 +0200
+commit 11dc43424d691220e68eecf1d3f4b5387cca5d47
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue Sep 8 08:36:32 2015 +1000
 
-    mesa: Validate target before resolving tex obj in glTex(ture)SubImageXD
+    r600: don't use shader key without verifying shader type (v2)
     
-    Currently, glTexSubImageXD attempt to resolve the texture object
-    (by calling _mesa_get_current_tex_object()) before validating the given
-    target. However, that method explicitly states that target must have been
-    validated before calling it, so it never returns a user error.
+    Since 7a32652231f96eac14c4bfce02afe77b4132fb77
+    r600: Turn 'r600_shader_key' struct into union
     
-    The target validation occurs later when texsubimage_error_check() is called.
+    we were accessing key fields that might be aliased in the union
+    with other fields, so we should check what shader type we are
+    compiling for before using key values from it.
     
-    This patch reorganizes target validation, taking it out from the error check
-    function and into a point before the texture object is resolved.
+    v1.1: make it compile
+    v2: have caffeine, make it work - we don't set type
+    until later, so don't reference it until we've set it.
     
-    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 5d64cae8427b090c42d6d38da7fb474b3ddd4eb0)
-    [Emil Velikov: s/_mesa_enum_to_string/_mesa_lookup_enum_by_nr/]
+    Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 6d2ceb10cd63b89892131a27d238620f00922dfb)
     Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
     
     Conflicts:
-    	src/mesa/main/teximage.c
+    	src/gallium/drivers/r600/r600_shader.c
 
-commit 58b2e95c1f09aafdfe5434f433ed7dc7a628e7a8
-Author: Eduardo Lima Mitev <elima@igalia.com>
-Date:   Wed Jul 29 16:01:23 2015 +0200
+commit ec9bafda7012c269927bb3ec12f4dafafc3ae635
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Tue Sep 8 16:58:43 2015 -0400
 
-    mesa: Fix errors values returned by glShaderBinary()
+    st/mesa: increase viewport bounds limits for GL4 hw
     
-    Page 68, section 7.2 'Shader Binaries" of the of the OpenGL ES 3.1,
-    and page 88 of the OpenGL 4.5 specs state:
+    According to the ARB_viewport_array spec, GL4 limit is higher than the
+    GL3 limit. Also take this opportunity to fix the GL3 limit.
     
-        "An INVALID_VALUE error is generated if count or length is negative.
-         An INVALID_ENUM error is generated if binaryformat is not a supported
-         format returned in SHADER_BINARY_FORMATS."
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    (cherry picked from commit 458e55d7c5793b02af8b08ebec90906a829d3f65)
+
+commit 6654483bc661eef34ab933f812e94707191541df
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Sep 6 04:51:29 2015 -0400
+
+    nvc0: always emit a full shader colormask
     
-    Currently, an INVALID_OPERATION error is returned for all cases.
+    Indications are that if the colormask indicates a single bit set on
+    fermi, that value will always be read from $r0 instead of a potentially
+    higher register (if e.g. green is set). Not to upset the counting logic,
+    always set the header up with a full color mask for each RT. Such a
+    situation can basically only ever happen with generated blit shaders.
     
-    Fixes 1 dEQP test:
-    * dEQP-GLES3.functional.negative_api.shader.shader_binary
+    Fixes the following piglit on Fermi (Kepler is unaffected):
+      fbo-stencil blit GL_DEPTH32F_STENCIL8
     
-    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit b38a50f1e3edae6079c91f73a8d9c63a2dbf512a)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit 39df725f731f75f488c75a4910169beb352213fb)
 
-commit 1f6798a70a6d7e6db636decc6af752f9a7714906
-Author: Frank Binns <frank.binns@imgtec.com>
-Date:   Fri Jul 31 09:11:45 2015 +0100
+commit 4b1ef5e84269a98867292fced0b5990f1c692f5f
+Author: Hans de Goede <hdegoede@redhat.com>
+Date:   Mon Sep 7 21:50:48 2015 +0200
 
-    egl: Add eglQuerySurface surface type check for EGL_LARGEST_PBUFFER attrib
-    
-    Calling eglQuerySurface on a window or pixmap with the EGL_LARGEST_PBUFFER
-    attribute resulted in the contents of the 'value' parameter being modified.
-    This is the wrong behaviour according to the EGL spec, which states:
+    nv30: Fix max width / height checks in nv30 sifm code
     
-        "Querying EGL_LARGEST_PBUFFER for a pbuffer surface returns the
-         same attribute value specified when the surface was created with
-         eglCreatePbufferSurface. For a window or pixmap surface, the
-         contents of value are not modified."
+    The sifm object has a limit of 1024x1024 for its input size and 2048x2048
+    for its output. The code checking this was trying to be clever resulting
+    in it seeing a surface of e.g 1024x256 being outside of the input size
+    limit.
     
-    Avoid this from happening by checking that the surface type is EGL_PBUFFER_BIT
-    before modifying the contents of the parameter.
+    This commit fixes this.
     
-    Cc: <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Frank Binns <frank.binns@imgtec.com>
-    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Reviewed-by: Eric Anholt <eric@anholt.net>
-    (cherry picked from commit b2c5986ea1c8e66c4e0a05bcacbcf28c27f5b183)
+    Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit 87073c69f3e253044bc235f34917aaa89041a63c)
 
-commit 84ef345dffec02d790db13fd6257e2c08eb0d56a
-Author: Frank Binns <frank.binns@imgtec.com>
-Date:   Fri Jul 31 09:11:46 2015 +0100
+commit 95bc059c50fa3679ee9d02cd1fd9f82239fb353f
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sat Sep 5 19:19:33 2015 +0100
 
-    egl/dri: Add error info needed for EGL_EXT_image_dma_buf_import extension
+    i965: Disallow fast blit paths for CopyTexImage with PixelTransfer ops
     
-    Update the DRI image interface error codes to reflect the needs of the
-    EGL_EXT_image_dma_buf_import extension. This means updating the existing error
-    code documentation and adding a new __DRI_IMAGE_ERROR_BAD_ACCESS error code
-    so that drivers can correctly reject unsupported pitches and offsets. Hook
-    the new error code up in EGL to return EGL_BAD_ACCESS.
+    glCopyTexImage behaves similarly to glReadPixels with respect to the
+    pixel transfer operations. Therefore if any are set we cannot use the
+    simple blit-only fast paths.
     
-    Cc: <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Frank Binns <frank.binns@imgtec.com>
-    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Reviewed-by: Eric Anholt <eric@anholt.net>
-    (cherry picked from commit cfc3200a35647026a0b5cf188f378ce33802044b)
+    (Though if would be possible to relax the blorp path to handle
+    pixel zoom, or we can just enhance meta.)
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Cc: Jason Ekstrand <jason.ekstrand@intel.com>
+    Cc: Kenneth Graunke <kenneth@whitecape.org>
+    Reviwewed-by: Iago Toral <itoral@igalia.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Cc: mesa-stable@lists.freedesktop.org
+    (cherry picked from commit be519c2d50f4aaa48fdb8b27707114cc5bfd348f)
 
-commit 7722a24cab4b8880d45fb723205e2eedfada2055
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Sun Aug 2 15:18:36 2015 +0200
+commit 254a07841dbfe41eaa13250fc2dfbeb1787c46e5
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Sep 6 11:29:00 2015 -0400
 
-    r600g: fix the CB_SHADER_MASK setup
+    st/mesa: don't fall back to 16F when 32F is requested
     
-    This fixes the single-sample fast clear hang.
+    Nothing in the spec allows for the reduced precision, and this also
+    fixes st_QuerySamplesForFormat for nv50, which does not allow MS8 on
+    RGBA32F. Now this will be respected instead of reporting MS8 as
+    supported with an assumption that the format used will be RGBA16F.
     
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
-    Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
-    Reviewed-by: Dave Airlie <airlied@redhat.com>
-    (cherry picked from commit d4ad4c20617f45f71152e292ee39f020ef352bfd)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    (cherry picked from commit e40f32d5626c87d9e77bbc261df3648cd54bd066)
 
-commit 880a0ce2e973d5ed9ee28c3f48fc5332128f9652
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Sat Jul 25 00:53:16 2015 +0200
+commit 271290f0774e123f221d6415e4b158e4d4b958cc
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sun Sep 6 19:30:23 2015 +0100
 
-    radeonsi: completely rework updating descriptors without CP DMA
+    Update version to 11.0.0-rc3
     
-    For 10.6: This fixes graphical corruption occuring on most Southern Islands
-    Radeon GPUs. This will allow closing a lot of bugs in the bugzilla.
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 7bf27c2393e3d07f6293b30cc859a6ef2aa07212
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Jul 2 18:44:18 2015 -0400
+
+    nouveau: don't mark full range as used on unmap with explicit flush
     
-    The patch has a better explanation. Just a summary here:
-    - The CPU always uploads a whole descriptor array to previously-unused memory.
-    - CP DMA isn't used.
-    - No caches need to be flushed.
-    - All descriptors are always up-to-date in memory even after a hang, because
-      CP DMA doesn't serve as a middle man to update them.
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: mesa-stable@lists.freedesktop.org
+    (cherry picked from commit a778831735ea45f789c247c40677cd26adc78e3e)
+
+commit 7f80a2383ea4ecdf85ea16eed1d3aac2acc0a5f4
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Aug 24 11:49:05 2015 -0400
+
+    nv50: avoid using inline vertex data submit when gl_VertexID is used
     
-    This should bring:
-    - better hang recovery (descriptors are always up-to-date)
-    - better GPU performance (no KCACHE and TC flushes)
-    - worse CPU performance for partial updates (only whole arrays are uploaded)
-    - less used IB space (no CP_DMA and WRITE_DATA packets)
-    - simpler code
-    - corruption issues are fixed on SI cards
+    The hardware only generates vertexid when vertices come from a VBO. This
+    fixes:
     
-    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
-    (cherry picked from commit b0528118dfb1af00e7d08cdb637191b80c14c2ba)
+      vertexid-drawelements
+      vertexid-drawarrays
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit c830d193db5c90cf0af57ff73606e2aa12aed9a8)
 
-commit 842a3af20b6d5dc12e2025be8005531d5da6285c
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Mon Sep 15 23:34:28 2014 +0200
+commit 3e1fde76b6eea459ff4a22231c1d3cc73d9b6f9a
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Jul 3 20:32:53 2015 -0400
 
-    radeonsi: rework how shader pointers to descriptors are set
+    nv50: don't flush vertex arrays when index buffer changes
     
-    For 10.6: This is a prerequisite for the next fix. The below comment is from
-    the original commit.
+    The index buffer is fed in inline over a pushbuf. It's not related to
+    vertices or any caching that might be done on them.
     
-    This is mainly needed for tessellation where a VS can be bound as VS, ES,
-    or LS, and TES (tess. evaluationshader) can be bound as VS or ES or neither.
-    Therefore we need the ability to move pointers to descriptors between
-    shaders arbitrarily.
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: mesa-stable@lists.freedesktop.org
+    (cherry picked from commit 4a025c6bc835387a31007fdf30a130e612e54e19)
+
+commit 747e1b03bfac3e32878a6f68002b5bb83194fad3
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Jul 3 20:16:48 2015 -0400
+
+    nv50: rebind bo to bufctx when invalidating idxbuf storage
     
-    The idea is that the context has a mapping from PIPE_SHADER_x to
-    SPI_SHADER_USER_DATA_x. After a shader is enabled or disabled,
-    si_shader_change_notify should be called to update this mapping accordingly.
+    There is nothing to be done on a dirty idxbuf, but the bo may have
+    changed, so we have to rebind it to the bufctx.
     
-    There is a dirty flag for each shader pointer, but only one emit function
-    for all pointers in the whole context, whose code and logic is separated
-    from descriptors.
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: mesa-stable@lists.freedesktop.org
+    (cherry picked from commit 1f62d36ae21043c472fc182fd4b738ec1d54a2d2)
+
+commit b85ec1e34b317accd7f69bb5f23bf9a7a8d84561
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Jul 3 19:21:21 2015 -0400
+
+    nv50: clear buffer status on all vertex bufs, not just the first one
     
-    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
-    (cherry picked from commit 3ce91c727f2a00a05f414351266b0b45d677611e)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: mesa-stable@lists.freedesktop.org
+    (cherry picked from commit 114cc18b98b6e016ab1986577aa3df12acc22cca)
 
-commit bc29f8f6b7b4a9bf575e401bd08bebe87d7dc863
-Author: Igor Gnatenko <i.gnatenko.brain@gmail.com>
-Date:   Tue Jul 7 13:05:04 2015 +0300
+commit acb822f1bdb7f8f812d6b3f1196b5913c066e15a
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Jan 1 06:09:59 2015 -0500
 
-    opencl: use versioned .so in mesa.icd
+    nv50: fix drawing from tfb, direct-to-pushbuf submits
     
-    We must have versioned library in mesa.icd, because ICD loader would
-    fail if the mesa-devel package wasn't installed.
+    The stride was being set to 0, which is illegal (and also non-sensical).
+    Also we must wait for the buffer to become available for reading as
+    otherwise a wrong value may be prefetched. Since we must wait for the
+    buffer anyways, and it's mapped and in GART, we may as well avoid the
+    annoyance of the indirect pushbuf submit.
     
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    Reported-by: Fabian Deutsch <fabian.deutsch@gmx.de>
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73512
-    Signed-off-by: Igor Gnatenko <i.gnatenko.brain@gmail.com>
-    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Acked-by: Michel Dänzer <michel.daenzer@amd.com>
-    (cherry picked from commit 4d7e0fa8c731776ad5d630f37b36c535f1907371)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: mesa-stable@lists.freedesktop.org
+    (cherry picked from commit 75e34d1df8b0ab56e5e658b8ef90ff6057ec954e)
 
-commit f2f62059dc5c9b548127c31bc7875c6836716944
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Mon Jul 13 20:01:39 2015 +0100
+commit ddf459492d04df02c9e3ed6471de0b21269694a4
+Author: Oded Gabbay <oded.gabbay@gmail.com>
+Date:   Thu Sep 3 19:00:26 2015 +0300
 
-    bugzilla_mesa.sh: sort the bugs list by number
+    llvmpipe: convert double to long long instead of unsigned long long
     
-    v2: Use change sed/sort based on Ilia's suggestion.
+    round(val*dscale) produces a double result, as val and dscale are double.
+    However, LLVMConstInt receives unsigned long long, so there is an
+    implicit conversion from double to unsigned long long.
+    This is an undefined behavior. Therefore, we need to first explicitly
+    convert the round result to long long, and then let the compiler handle
+    conversion from that to unsigned long long.
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    (cherry picked from commit c505064b2cea14c9da115a26e9326b9c0c7dca3b)
+    This bug manifests itself in POWER, where all IMM values of -1 are being
+    converted to 0 implicitly, causing a wrong LLVM IR output.
+    
+    Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
+    CC: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    (cherry picked from commit 4f2290d1612569686284609059d29a85c9de67cf)
 
-commit 2a72e18abbe9286d16e388eb1a004147f0b6e54b
-Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
-Date:   Mon Jul 6 23:34:23 2015 +0200
+commit fcdaa190e558241607595d91cb1e6aa9e28746fc
+Author: Hans de Goede <hdegoede@redhat.com>
+Date:   Thu Sep 3 12:38:01 2015 +0200
 
-    nv50: avoid segfault with enabled but unbound vertex attrib
+    nv30: Implement color resolve for msaa
     
-    Before validating vertex arrays we need to check if a VBO is present.
-    Checking if vb->buffer is not NULL fixes the issue.
+    Note this is not ideal. Since the sifm can only do source sizes upto
+    1024x1024 we end up using the blitter on nv4x, which is not that fast.
     
-    Fixes the following piglit test:
-      gl-3.1-vao-broken-attrib
+    And on nv3x we end up using the cpu which is really slow.
     
-    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Hans de Goede <hdegoede@redhat.com>
     Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    (cherry picked from commit adc816a1e41812e6489a5bc388f80de65504be5b)
+    (cherry picked from commit 3c6c4d4f298ec81fe57992790a68aaab2e573519)
 
-commit b70176d96b1b5033d4bc1414fd63758ebd144787
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Wed Jul 29 15:44:32 2015 +0100
+commit 0abcd9c8fcc74428299630fa96bc51551b952324
+Author: Hans de Goede <hdegoede@redhat.com>
+Date:   Wed Aug 12 13:39:42 2015 +0200
 
-    winsys/radeon: don't leak the fd when it is 0
+    nv30: Fix creation of scanout buffers
     
-    Earlier commit added an extra dup(fd) to fix a ZaphodHeads issue.
-    Although it did not consider the (very unlikely) case where we might end
-    up with the valid fd == 0.
+    Scanout buffers on nv30 must always be non-swizzled and have special
+    width alignment constraints.
     
-    Fixes: 28dda47ae4d(winsys/radeon: Use dup fd as key in drm-winsys hash
-    table to fix ZaphodHeads.)
+    These constrains have been taken from the xf86-video-nouveau
+    src/nv_accel_common.c: nouveau_allocate_surface() function.
     
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
-    Reviewed-by: Mario Kleiner <mario.kleiner.de@gmail.com>
-    (cherry picked from commit 1307be519b8785249ee863a22115930299ff642a)
-
-commit d8116f8ec531980d287e5fcb1091acfe2fcffb8d
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Fri Jul 10 11:01:55 2015 +0100
-
-    egl/wayland: libdrm is a hard requirement, treat it as such
+    nouveau_allocate_surface() applies these width constraints only when a
+    tiled attribute is set, which it sets for all surfaces allocated via
+    dri, and this "tiling" is not the same as swizzling, scanout surfaces
+    must be linear / have a uniform_pitch or only complete garbage is shown.
     
-    Prompt at configure time if it's missing otherwise we'll fail later on
-    in the build. Remove ambiguous HAVE_LIBDRM guard.
+    This commit fixes dri3 on nv30 showing a garbled display, with dri3 the
+    scanout buffers are allocated by mesa, rather then by the ddx, and the
+    wrong stride of these buffers was causing the garbled display.
     
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    (cherry picked from commit fa109d02dda118f756903b663879375c06353ae7)
+    Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    (cherry picked from commit 3329703eb116a7ad73bc694356b43e014532240b)
 
-commit 8f8c842338b11185e8432e4b44e31a85abcbf9c6
-Author: Ben Widawsky <benjamin.widawsky@intel.com>
-Date:   Wed Jul 29 12:35:24 2015 -0700
+commit 0b14d3586338f304e2816e3395cdcc940d6073c1
+Author: Boyan Ding <boyan.j.ding@gmail.com>
+Date:   Wed Aug 26 19:52:50 2015 +0800
 
-    i965/skl: Add production thread counts and URB size
+    vc4: Initialize pack field of qreg to 0 in qir_get_temp
     
-    This patch adjusts the SKL values to the best known values we have.
+    This avoids generation of undefined packing in qir and qpu instructions,
+    fixing a lot of rendering errors.
     
-    v2: Remove HS/DS/CS fields. Adding this makes most sense to add to the
-    GEN9_FEATURES macro, however, doing that would require updating BXT values, and
-    Jordan requested I not do that. Conveniently, this request makes a lot of sense
-    wrt to stable backport as HS, and DS do not even exist there.
+    Fixes 8b36d107fdd (vc4: Pack the unorm-packing bits into a src MUL
+    instruction when possible.)
     
     Cc: mesa-stable@lists.freedesktop.org
-    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
-    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
-    (cherry picked from commit 7eaacc1678195738fab3bb98870828611cae066d)
-    [Emil Velikov: .supports_simd16_3src is missing in 10.6]
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    
-    Conflicts:
-    	src/mesa/drivers/dri/i965/brw_device_info.c
+    Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 48de40ce9c45de154965490843f9e50407970c26)
 
-commit eddea78fb36039e03478e11780c8a32c06c1c435
-Author: Kenneth Graunke <kenneth@whitecape.org>
-Date:   Wed Jul 22 20:08:23 2015 -0700
+commit a6710090af7bfda005388d9ee8f108b3aeb15e57
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Fri Sep 4 19:02:28 2015 +0100
 
-    glsl: Fix a bug where LHS swizzles of swizzles were too small.
-    
-    A simple shader such as
-    
-       vec4 color;
-       color.xy.x = 1.0;
+    i965: Disallow PixelTransfer operations for tiled-memcpy TexImage/ReadPixels
     
-    would cause ir_assignment::set_lhs() to generate bogus IR:
+    The tiled memcpy fast paths perform a simple blit (with only a couple of
+    trivial pixel conversion routines) and do not accommodate PixelTransfer
+    operations. Therefore if any are set, fallback to the regular routines.
+    Note that PixelTransfer only applies to TexImage and ReadPixels, not to
+    GetTexImage.
     
-       (swiz xy (swiz x (constant float (1.0))))
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Cc: Jason Ekstrand <jason.ekstrand@intel.com>
+    Cc: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Cc: mesa-stable@lists.freedesktop.org
+    (cherry picked from commit 099f5b3a62be1919add02a4cb887841c9f0f2fe4)
+
+commit 0c98ba7abffd91a127c7bc5cc0ceaa1f2d2106fd
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Sep 2 16:39:27 2015 -0700
+
+    i965: Fix copy propagation type changes.
     
-    We were setting the number of components of each new RHS swizzle based
-    on the highest channel used in the LHS swizzle.  So, .xy.y would
-    generate (swiz xy (swiz xx ...)), while .xy.x would break.
+    commit 472ef9a02f2e5c5d0caa2809cb736a0f4f0d4693 introduced code to
+    change the types of SEL and MOV instructions for moves that simply
+    "copy bits around".  It didn't account for type conversion moves,
+    however.  So it would happily turn this:
     
-    Our existing Piglit test happened to use .xzy.z, which worked, since
-    'z' is the third component, resulting in an xxx swizzle.
+       mov(8) vgrf6:D, -vgrf5:D
+       mov(8) vgrf7:F, vgrf6:UD
     
-    This patch sets the number of swizzle components based on the size of
-    the LHS swizzle's inner value, so we always have the correct number
-    at each step.
+    into this:
     
-    Fixes new Piglit tests glsl-vs-swizzle-swizzle-lhs-[23].
-    Fixes ir_validate assertions in in Metro 2033 Redux.
+       mov(8) vgrf6:D, -vgrf5:D
+       mov(8) vgrf7:D, -vgrf5:D
     
-    v2: Move num_components updating completely out of update_rhs_swizzle
-        (suggested by Timothy Arceri).  Simplify.
+    which erroneously drops the conversion to float.
     
-    Cc: mesa-stable@lists.freedesktop.org
+    Cc: "11.0 10.6" <mesa-stable@lists.freedesktop.org>
     Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
-    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
-    (cherry picked from commit e235ca159f5f6de2bd29616fdda5c02dc69b0d7f)
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    (cherry picked from commit 2ace64fd598816fd1be9877962734242fc27b87b)
 
-commit 080c4713bcd4c0c3643b3fb3ede1aa09f891aecf
+commit eef8258a86b6df103cb31cfa6feeddc32ac4eb95
 Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Fri Jul 24 19:47:06 2015 +0200
+Date:   Tue Sep 1 04:14:43 2015 +0200
 
-    st/mesa: don't ignore texture buffer state changes
-    
-    Fixes piglit:
-      spec@arb_texture_buffer_range@ranges-2
+    winsys/radeon: remove exported buffers from the cache
     
-    Cc: mesa-stable@lists.freedesktop.org
-    Reviewed-by: Brian Paul <brianp@vmware.com>
-    (cherry picked from commit e39ece0d7856d0532a0f011cd5cb17bc85ee82e2)
-    [Emil Velikov: resolve tess related conflicts.]
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    (cherry picked from commit efea7c3a3f91219db6e2fa3588388b6be4ecfa40)
+
+commit 747cd2c27382f4cdd1cb9149447b677af340335e
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Sep 1 04:14:33 2015 +0200
+
+    winsys/amdgpu: remove exported buffers from the cache
     
-    Conflicts:
-    	src/mesa/state_tracker/st_atom_texture.c
-    	src/mesa/state_tracker/st_context.c
-    	src/mesa/state_tracker/st_context.h
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    (cherry picked from commit 54964c77510b060806615c842692c0f393e807e6)
 
-commit ba10c9ff5088b99591c13cad0b70929b49e6319c
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sat Jul 11 12:47:03 2015 -0400
+commit ecdd69cd0509119adfd01c4fed512609963d0720
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Sep 1 04:07:54 2015 +0200
 
-    nvc0: fix geometry program revalidation of clipping params
+    gallium/pb_bufmgr_cache: add a way to remove buffers from the cache explicitly
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit a818faa6ddcfa6cd90a24b70c49ec76573954111)
+    This must be done before exporting a buffer as dmabuf fds, because
+    we lose track of who is using it and can't trust the reference counter.
+    
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    (cherry picked from commit 35d0f12797237cdd38e7fd2c39d3c19e875875ca)
 
-commit f167d9b46cd41e8c750bdfd65a05c04545546236
-Author: Anuj Phogat <anuj.phogat@gmail.com>
-Date:   Tue May 12 05:46:04 2015 -0700
+commit 74fa10693227c08d227957e9544f60ee68b5762c
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Sep 2 10:42:57 2015 -0700
 
-    meta: Fix reading luminance texture as rgba in _mesa_meta_pbo_GetTexSubImage()
+    glsl: Handle attribute aliasing in attribute storage limit check.
     
-    After recent addition of pbo testing in piglit test getteximage-luminance,
-    it fails on i965. This patch makes a sub test pass.
+    In various versions of OpenGL and GLSL, it's possible to declare
+    multiple VS input variables with aliasing attribute locations.
     
-    This patch adds a clear color operation to meta pbo path, which I think is
-    better than falling back to software path.
+    So, when computing the storage requirements for vertex attributes,
+    we can't simply add up the sizes.  Instead, we need to look at the
+    enabled slots.
     
-    V2: Fix color mask for GL_LUMINANCE_ALPHA
+    This patch begins tracking which attributes are double types that
+    are larger than 128-bits (i.e. take up two vec4 slots).  We then
+    count normal attributes once, and count the double-size attributes
+    a second time.
     
-    Cc: <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
-    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
-    (cherry picked from commit aa40546b2de4cd572af02d31fd5c7d4045505ea2)
+    Fixes deQP functional.attribute_location.bind_aliasing.max_cond_* tests
+    on i965, which regressed with commit ad208d975a6d3aebe14f7c2c16039ee20.
+    
+    No Piglit changes on llvmpipe (which actually supports dvecs).
+    
+    Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    Tested-by: Mark Janes <mark.a.janes@intel.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    (cherry picked from commit c3294ca5a13cf3f0eb3d9907a46ff8ce4bc2963b)
 
-commit def2d2e018abbab381ea329a8472bded18b45659
-Author: Anuj Phogat <anuj.phogat@gmail.com>
-Date:   Thu Jun 11 16:48:26 2015 -0700
+commit 1153420017873011a91367a4fd81cad6a3878023
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon Aug 31 18:44:42 2015 -0700
 
-    mesa: Add a helper function _mesa_need_luminance_to_rgb_conversion()
+    mesa: Don't allow wrong type setters for matrix uniforms
     
-    Cc: <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
-    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
-    (cherry picked from commit c59c0f8a42652603da7f89e3270897cb685fe76b)
+    Previously we would allow glUniformMatrix4fv on a dmat4 and
+    glUniformMatrix4dv on a mat4.  Both are illegal.  That later also
+    overwrites the storage for the mat4 and causes bad things to happen.
+    
+    Should fix the (new) arb_gpu_shader_fp64-wrong-type-setter piglit test.
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Cc: Dave Airlie <airlied@redhat.com>
+    Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit 7237c937af3b495191bee2f7240901e3a9daf1fb)
 
-commit 831bf63e6b55c4f1f610a2fb988906550157172a
-Author: Anuj Phogat <anuj.phogat@gmail.com>
-Date:   Wed May 6 05:43:08 2015 -0700
+commit 5704d473c8d4b58aa51b521d23a6e2fdd79a0bfc
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon Aug 31 18:30:48 2015 -0700
 
-    meta: Don't do fragment color clamping in _mesa_meta_pbo_GetTexSubImage
-    
-    _mesa_meta_pbo_GetTexSubImage() uses _mesa_meta_BlitFrameBuffer(),
-    which will do fragment clamping if enabled. But fragment clamping
-    doesn't affect ReadPixels and GetTexImage.
+    mesa: Pass the type to _mesa_uniform_matrix as a glsl_base_type
     
-    Without this patch, piglit test arb_color_buffer_float-clear fails,
-    when forced to use the meta pbo path.
+    This matches _mesa_uniform, and it enables the bug fix in the next
+    patch.
     
-    v2: Apply this fix to both glReadPixels and glGetTexImage.
+    v2: s/type/basicType/ in the assert in _mesa_uniform_matrix.
     
-    Cc: <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
-    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
-    (cherry picked from commit ca4e17e03e9aeaa04fe6bb04bfe2d6f97991005b)
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au> [v1]
+    Cc: Dave Airlie <airlied@redhat.com>
+    Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit a6976f09727014730f45ec27c714c6a8140e074a)
 
-commit 6321bf72be877fe3efc81bd9e3c6d645d6b6ed9a
-Author: Anuj Phogat <anuj.phogat@gmail.com>
-Date:   Wed May 20 10:22:45 2015 -0700
+commit eb2b88c44b526d785800ca926afde5df1020382a
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue Sep 1 22:00:24 2015 -0700
 
-    meta: Abort meta pbo path if readpixels need signed-unsigned conversion
+    i965/fs: Handle MRF destinations in lower_integer_multiplication().
     
-    Meta pbo path for ReadPixels rely on BlitFramebuffer which doesn't support
-    signed to unsigned integer conversions and vice versa.
+    The lowered code reads from the destination, which isn't possible from
+    message registers.
     
-    Without this patch, piglit test fbo_integer_readpixels_sint_uint fails, when
-    forced to use the meta pbo path.
+    Fixes the following dEQP tests on SNB:
     
-    v2: Make need_signed_unsigned_int_conversion() a static function. (Iago)
-        Bump up the comment and the commit message. (Jason)
+        dEQP-GLES3.functional.shaders.precision.int.highp_mul_fragment
+        dEQP-GLES3.functional.shaders.precision.int.mediump_mul_fragment
+        dEQP-GLES3.functional.shaders.precision.int.lowp_mul_fragment
     
-    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
-    Cc: <mesa-stable@lists.freedesktop.org>
+    Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    Tested-by:  Mark Janes <mark.a.janes@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
     Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
-    Reviewed-by: Iago Toral <itoral@igalia.com>
-    (cherry picked from commit 0d207905e675b778739236072e7a4dfba7cd7959)
+    (cherry picked from commit 9390cb84593bda516e8c1521c87a08475574d1be)
 
-commit acaac69ccd3d8d70044aab98dbadc5b177510eb5
-Author: Anuj Phogat <anuj.phogat@gmail.com>
-Date:   Wed May 20 10:21:39 2015 -0700
+commit 5c08afc894a66fa51eb8531a93d4e9f8b7b74edd
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue Sep 1 15:57:02 2015 +1000
 
-    meta: Fix transfer operations check in meta pbo path for readpixels
-    
-    Currently used ctx->_ImageTransferState check is not sufficient
-    because it doesn't include the read color clamping enabled with
-    GL_CLAMP_READ_COLOR. So, use the helper function
-    _mesa_get_readpixels_transfer_ops().
+    mesa/readpixels: check strides are equal before skipping conversion
     
-    Also, transfer operations don't affect glGetTexImage(). So, do
-    the check only for glReadPixles.
-    
-    Without this patch, arb_color_buffer_float-readpixels test fails, when
-    forced to use meta pbo path.
+    The CTS packed_pixels test checks that readpixels doesn't write
+    into the space between rows, however we fail that here unless
+    we check the format and stride match.
     
-    V2: Add a comment and bump up the commit message.
+    This fixes all the core mesa problems with CTS packed_pixels
+    tests.
     
-    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
-    Cc: <mesa-stable@lists.freedesktop.org>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
     Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
-    (cherry picked from commit 1252d53c19ec005c17ca666cecb7db072d77e5ce)
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 32769ac016dee4ce5767a922f91de47df4ce984d)
 
-commit fbc464803a777bea4f9b7b11eeba192122adf8cb
-Author: Anuj Phogat <anuj.phogat@gmail.com>
-Date:   Tue May 19 17:44:52 2015 -0700
+commit 5fb758a418e64e1967785bca964c57e81034a884
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue Sep 1 15:44:46 2015 +1000
 
-    mesa: Turn get_readpixels_transfer_ops() in to a global function
+    texcompress_s3tc/fxt1: fix stride checks (v1.1)
     
-    This utility function is utilized in a later patch.
+    The fastpath currently checks the RowLength != width, but
+    if you have a RowLength of 7, and Alignment of 4, then
+    that shouldn't match.
     
-    Cc: <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    align the rowlength to the pack alignment before comparing.
+    
+    This fixes compressed cases in CTS packed_pixels_pixelstore
+    test when SKIP_PIXELS is enabled, which causes row length
+    to get set.
+    
+    v1.1: add fxt1 fix (Iago)
+    
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
     Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
-    (cherry picked from commit 7974e23be9ff7586e5250cff321b6ec7749ecc44)
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit b4a70401f52e5d7e08c94715b250ea1de8f63d15)
 
-commit 56e4cc67fec63d86d76f5b4de55678890cf06648
-Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
-Date:   Thu Jul 23 10:38:36 2015 +0200
+commit bb378249594f860b97d91d51956c019b2d111a05
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue Sep 1 15:13:45 2015 +1000
 
-    glsl/glcpp: fix SIGSEGV when checking error condition for macro redefinition
+    st/readpixels: fix accel path for skipimages.
     
-    Commit a6e9cd14c does not take into account than node_{a,b}->next could be NULL
-    in some circumstances, such as in a shader containing this code:
-    
-      #define A 1 /* comment */
-      #define A 1 /* comment */
+    We don't need to use the 3d image address here as that will
+    include SKIP_IMAGES, and we are only blitting a single
+    2D anyways, so just use the 2D path.
     
-    This patch fixes the segmentation fault for cases like that.
+    This fixes some memory overruns under CTS
+     packed_pixels.packed_pixels_pixelstore when PACK_SKIP_IMAGES
+    is used.
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91290
-    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
-    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit 30f97b5e52b324d501c56df8902d294fb755a5b7)
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 6a3e1fb958778e00e8fe2d860b6327fc4409c148)
 
-commit a31dfd91b500735c44bb89e194f02b718299bf2e
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Fri Jul 10 23:35:55 2015 +0200
+commit 8fc2cbb00e08986b5962e6cbb13c3155a17b9f05
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jul 30 02:48:37 2015 +0100
 
-    radeonsi: upload shader rodata after updating scratch relocations
+    mesa/formats: 8-bit channel integer formats addition
     
-    Cc: 10.5 10.6 <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
-    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
-    (cherry picked from commit 50a957c5de842b18e10c361f7b0310aa46bb483f)
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Add enough 8-bit channel formats to handle all the
+    different things CTS throws at us.
     
-    Conflicts:
-    	src/gallium/drivers/radeonsi/si_shader.c
-    	src/gallium/drivers/radeonsi/si_shader.h
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit c3c242070e868225a81e1afe5fb424c33eb94c2f)
 
-commit 504903b827604f1a630a335d14231f88c2cf36be
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Thu Jul 16 17:14:07 2015 +0200
+commit b497b88dbe6001f11e3e12da1eac1595ab08a6f6
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jul 30 02:48:36 2015 +0100
 
-    st/mesa: don't call st_validate_state in BlitFramebuffer
+    mesa/formats: add some formats from GL3.3
     
-    None of the draw states are used here.
-    This fixes a crash in piglit: ext_framebuffer_blit/blit-early
+    GL3.3 added GL_ARB_texture_rgb10_a2ui, which specifies
+    a lot more things than just rgb10/a2ui.
     
-    Calling st_manager_validate_framebuffers is the minimum requirement here.
+    While playing with ogl conform one of the tests must
+    attempted all valid formats for GL3.3 and hits the
+    unreachable here.
     
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit d082c5324914212f76e45be497229c7a0681f706)
+    This adds the first chunk of formats that hit the
+    assert.
+    
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 8185a02316cfb7dc3d64b8772af82ad2bb49754e)
 
-commit c33ca1696a0a967091937805fa198ffc3317a03f
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Thu Jul 9 21:19:15 2015 +0100
+commit dcb220f2f700bba0709fa246417f1e89676acbbb
+Author: Dave Airlie <airlied@gmail.com>
+Date:   Tue Aug 25 21:13:13 2015 +1000
 
-    configure.ac: do not set HAVE_DRI(23) when libdrm is missing
+    mesa: handle SwapBytes in compressed texture get code.
     
-    These conditionals are used to guard both dri modules and loader(s).
+    This case just wasn't handled, so add support for it.
     
-    Currently if we try to build the gallium swrast dri module (without glx)
-    on a system that's missing libdrm the build will fail.
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 5b6c7da460b8f6c908df7060ec0709a9848ce160)
+
+commit d9534e4785b26a0c65153dad934ab8a03723d89c
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue Aug 25 14:36:01 2015 +1000
+
+    mesa: fix SwapBytes handling in numerous places
     
-    v2: Make sure we assign prior to checking the have_libdrm variable.
+    In a number of places the SwapBytes handling didn't handle cases with
+    GL_(UN)PACK_ALIGNMENT set and 7 byte width cases aligned to 8 bytes.
     
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    (cherry picked from commit 16f6d432de07dcb537dafd0c9f3ef7614891ed6b)
+    This adds a common routine to swap bytes a 2D image and uses this
+    code in:
     
-    Conflicts:
-    	configure.ac
+    texture storage
+    texture get
+    readpixels
+    swrast drawpixels.
+    
+    [airlied: updated with Brian's nitpicks].
+    
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 0ad3a475ef81dad3baf607d749b91dfa1700ca23)
 
-commit ccef8901de421eae5dcc8affa14218d46cc06593
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Sun Jul 26 15:18:24 2015 +0100
+commit 63b4e6bfc931d0da1a31389f7f44e297c988971d
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 30 03:44:03 2015 +0200
 
-    docs: Add checksums for mesa 10.6.3 tarballs
+    radeonsi: fix memory usage checking for big IBs
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    Acked-by: Christian König <christian.koenig@amd.com>
+    (cherry picked from commit 05af645a951fd985d0dbe3c22614e1dee8dfb3f0)
 
-commit ddc976368fef367e464472ebcc2ac4fd89eb9fd8
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Sun Jul 26 14:38:58 2015 +0100
+commit a5dee22767c4ea133b93dcbae16c0d08f161e8e1
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 30 00:12:03 2015 +0200
 
-    Add release notes for 10.6.3
+    radeonsi: set all 16 viewport Z bounds for GL 4.1
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    Acked-by: Christian König <christian.koenig@amd.com>
+    (cherry picked from commit 08775a219628611989ab87c621255ac3c841dcda)
 
-commit 2eef0b7d8608faeecd7cf71b386d64edd7a08e24
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Sun Jul 26 14:33:29 2015 +0100
+commit 1aea7812b0c55f82d0411cefba8a821d7b84b504
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Aug 29 22:59:23 2015 +0200
 
-    Update version to 10.6.3
+    radeonsi: fix a Unigine Heaven hang when drirc is missing
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Cc: 10.6 11.0 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    Acked-by: Christian König <christian.koenig@amd.com>
+    (cherry picked from commit 9b510a9652297a63677f1d55b2bf444694fd94e1)
 
-commit 954c18fb5c95c125ef43a88b55af620dca32e829
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Mon Jul 20 00:19:56 2015 -0400
+commit f0180a37d79a881b02d164f4f9ff22143928cbaf
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sat Jun 6 09:33:33 2015 +0100
 
-    nv50: fix max level clamping on G80
+    i965: Prevent coordinate overflow in intel_emit_linear_blit
     
-    It appears that the G80 did not have support for the sampler view
-    first/last clamping. Put the view's last level in the place of the
-    texture's so that it doesn't go past what the sampler view allows.
+    Fixes regression from
+    commit 8c17d53823c77ac1c56b0548e4e54f69a33285f1
+    Author: Kenneth Graunke <kenneth@whitecape.org>
+    Date:   Wed Apr 15 03:04:33 2015 -0700
+    
+        i965: Make intel_emit_linear_blit handle Gen8+ alignment restrictions.
+    
+    which adjusted the coordinates to be relative to the nearest cacheline.
+    However, this then offsets the coordinates by up to 63 and this may then
+    cause them to overflow the BLT limits. For the well aligned large
+    transfer case, we can use 32bpp pixels and so reduce the coordinates by
+    4 (versus the current 8bpp pixels). We also have to be more careful
+    doing the last line just in case it may exceed the coordinate limit.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reported-and-tested-by: kaillasse91@hotmail.fr
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90734
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Cc: Kenneth Graunke <kenneth@whitecape.org>
+    Cc: Ian Romanick <ian.d.romanick@intel.com>
+    Cc: Anuj Phogat <anuj.phogat@gmail.com>
     Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit 801d41fa43eba996c6bd7c071282ad15e51609d3)
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    (cherry picked from commit d38a5601068ae1d923efece8f28757777f4474e4)
 
-commit 2a77b82a92494d91e90b516ad5fed8e6e0a10a6b
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sat Jul 18 19:02:29 2015 -0400
+commit fe77d714f28fa876d3c863bd2c019282703eee0d
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue Sep 1 12:29:58 2015 +1000
 
-    gm107/ir: fix indirect txq emission
+    r600g: fix calculation for gpr allocation
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit 8c8a71f0d125bb655b17a32914ffecf8d159593b)
+    I've been chasing a geom shader hang on rv635 since I wrote
+    r600 geom code, and finally I hacked some values from fglrx
+    in and I could run texelfetch without failures.
+    
+    This is totally my fault as well, maths fail 101.
+    
+    This makes geom shaders on r600 not fail heavily.
+    
+    Cc: "10.6" "11.0" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 0de53ccc8cbee0f63ba25c9e72664b3cbd31be54)
 
-commit 7efc693ef26c91c545c78f4b751432e43011b541
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sat Jul 18 18:38:42 2015 -0400
+commit fb119b22602afb9e137c3c59d9b846ea9a46356a
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Aug 31 14:22:23 2015 +1000
 
-    nvc0/ir: don't worry about sampler in txq handling
+    r600/sb: update last_cf for finalize if.
     
-    There's no need to deal with samplers for texture size queries. That
-    code also was accidentally setting an invalid sIndirectSrc position, but
-    it can now just be removed.
+    As Glenn did for finalize_loop we need to update_cf when we
+    add a POP at the end of a shader.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit 346ce0b98832e33d5411200002571b3edea9e2bb)
+    I think this fixes one of the earlier shader going off end
+    of memory problems we've stopped.
+    
+    Reviewed-by: Glenn Kennard <glenn.kennard@gmail.com>
+    Cc: "10.6" "11.0" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 3063913f77cd2db1a263cb824a5c8c3dcc1a51a0)
 
-commit 440f465f5f38c0f522eea3a79d94663954b63864
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sat Jul 18 16:43:17 2015 -0400
+commit 50306a33b4eef89e735fc2dedf6f6540a445290c
+Author: Alexander von Gluck IV <kallisti5@unixzen.com>
+Date:   Tue Aug 18 20:47:59 2015 -0500
 
-    nvc0/ir: fix txq on indirect samplers
+    egl: scons: fix the haiku build, do not build the dri2 backend
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit 20e484afa4874e87cd18daffd66286bb893cf3fb)
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 5abbd1caccf4653ac1a8760de68d8ed101c814d8)
+    Fixes: 78674631a2d(egl: remove the non-haiku scons build)
 
-commit 9656b34faef3a262ad0354a3194ed1ee1edd1e16
-Author: Kenneth Graunke <kenneth@whitecape.org>
-Date:   Sat Jul 4 19:15:16 2015 -0700
+commit cf007af859119efe44566fa00dbd9b8ee13dca16
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat Aug 15 11:57:22 2015 -0400
 
-    program: Allow redundant OPTION ARB_fog_* directives.
+    freedreno/a4xx: formats update
     
-    A fragment program from "Pixel Piracy" contains redundant OPTION
-    directives:
+    Fixes glamor, which wants to use R8 integer textures.
     
-    !!ARBfp1.0
-    OPTION ARB_precision_hint_fastest;
-    OPTION ARB_fog_exp2;
-    OPTION ARB_precision_hint_fastest;
-    OPTION ARB_fog_exp2;
-    ...
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    (cherry picked from commit 000e225360c020e8b3de142c4c898baad321d242)
+
+commit 7d576419b2aeffdc1b3b88b12b66a8f548a5c505
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon Aug 24 12:58:08 2015 -0400
+
+    freedreno: update generated headers
     
-    We already allow redundant ARB_precision_hint_fastest directives, but
-    disallow the redundant (yet consistent) ARB_fog_exp2 directives, failing
-    to compile the program.
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    (cherry picked from commit afb6c24a207fe7b9917644b940e4c5d1870c5c92)
+
+commit 893caebf44b4f1c8484c50d95bd8b719e6a07a3a
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Sun Aug 30 20:40:31 2015 +1000
+
+    r600: move prim convert from geom shader to function.
     
-    The specification seems to contradict itself - the main text says that
-    only one fog application option may be specified, but then backpedals,
-    indicating the intent is to disallow /contradictory/ flags.  One of the
-    issues suggests that specifying contradictory ones is stupid, but
-    allowed, and only the last one should take effect.
+    This should avoid C++ fail including this header.
     
-    Accepting multiple redundant (but consistent) directives seems harmless,
-    and like a reasonable interpretation of the specification.  It also
-    fixes a fragment program found in the wild.
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 03b7ec877843cd622717b01c1047e08baf34facf)
+    Fixes: 69418831756 (r600: port si_conv_prim_to_gs_out from radeonsi)
+    Nominated-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 3f8d44210c70f4b16651f1faf5fd72c47c64ab07
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Aug 31 13:40:19 2015 +0100
+
+    Update version to 11.0.0-rc2
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 579ca506aefabc018d2cdd1856de4ce0e95bdfcf
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 23 12:57:09 2015 +0200
+
+    gallium/radeon: fix the ADDRESS_HI mask for EVENT_WRITE CIK packets
     
     Cc: mesa-stable@lists.freedesktop.org
-    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
-    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
-    (cherry picked from commit 4b17f0d9f58637300b0748d1fb702a7e4d51979f)
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    (cherry picked from commit 437cb1e3f482570447501526927df4d80c845bf5)
 
-commit 329763791b2a869f30a39b8d1f94b95dcb2c9e8e
-Author: Francisco Jerez <currojerez@riseup.net>
-Date:   Wed Jul 1 16:32:24 2015 +0300
+commit 94205d0aa229d0cbfb9a5f9ed0cbd1cb07ba9d33
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Aug 24 23:31:00 2015 -0400
 
-    i965/gen9: Use custom MOCS entries set up by the kernel.
+    freedreno/a3xx: add basic clip plane support
     
-    Instead of relying on hardware defaults the i915 kernel driver is
-    going program custom MOCS tables system-wide on Gen9 hardware.  The
-    "WT" entry previously used for renderbuffers had a number of problems:
-    It disabled caching on eLLC, it used a reserved L3 cacheability
-    setting, and it used to override the PTE controls making renderbuffers
-    always WT on LLC regardless of the kernel's setting.  Instead use an
-    entry from the new MOCS tables with parameters: TC=LLC/eLLC, LeCC=PTE,
-    L3CC=WB.
+    The hardware is capable of dealing with GL1-style user clip planes.
+    No clip vertex, no clip distances. Fixes a number of ucp tests, as well
+    as neverball.
     
-    The "WB" entry previously used for anything other than renderbuffers
-    has moved to a different index in the new MOCS tables but it should
-    have the same caching semantics as the old entry.
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit 58e24b4761ec8c348bf6825c2355a6e047599306)
+
+commit 1b40221850d41b622e66f7bbea0eed6594b85c4a
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Fri Aug 28 10:46:10 2015 +1000
+
+    r600: port si_conv_prim_to_gs_out from radeonsi
     
-    Even though the corresponding kernel change ("drm/i915: Added
-    Programming of the MOCS") is in a way an ABI break it doesn't seem
-    necessary to check that the kernel is recent enough because the change
-    should only affect Gen9 which is still unreleased hardware.
+    This code was broken by the tess merge, and I totally missed it
+    until now. I'm not sure this fixes anything but it stops the assert.
     
-    v2: Update MOCS values for the new Android-incompatible tables
-        introduced in v7 of the kernel patch.
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Glenn Kennard <glenn.kennard@gmail.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 6941883175612ae602a8745945153ba064f53a7a)
+
+commit 2fe87a1b68c282846c119bdd930aa936c6504054
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Fri Aug 28 09:57:04 2015 +1000
+
+    gallium/util: fix debug_get_flags_option on 32-bit
     
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
-    Reference: http://lists.freedesktop.org/archives/intel-gfx/2015-July/071080.html
-    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
-    (cherry picked from commit af768922cafa3eb3e78a2fdfee90380a74c79460)
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    On 32-bit we need to use PRIu64 flags for printfs,
+    otherwise this segfaults in R600_DEBUG=help otherwise.
     
-    Conflicts:
-    	src/mesa/drivers/dri/i965/brw_defines.h
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 8d6d0cc17d945317f44328a7761801e6781dc3fc)
 
-commit 8132c7ac41f07857368aef25cbc4a26979784ba4
-Author: Brian Paul <brianp@vmware.com>
-Date:   Wed Jul 15 06:15:06 2015 -0600
+commit b83b452eea6db023f50611e090f05c5dcafdbc93
+Author: Daniel Scharrer <daniel@constexpr.org>
+Date:   Fri Aug 28 11:45:35 2015 +0200
 
-    osmesa: fix OSMesaPixelsStore typo
+    mesa: add missing queries for ARB_direct_state_access
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91337
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+    This adds index queries (glGet*i_v) for GL_TEXTURE_BINDING_* and
+    GL_SAMPLER_BINDING, as well as textue queries
+    (glGetTex{,ture}Parameter*) for GL_TEXTURE_TARGET.
     
-    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
-    (cherry picked from commit 141e1eb29fe80ad341e718147a1277cc3b1b9c11)
+    CC: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    
+    Reviewed-by: Fredrik Höglund <fredrik@kde.org>
+    Signed-off-by: Fredrik Höglund <fredrik@kde.org>
+    (cherry picked from commit 5aaaaebf22c920745d577c49e463d23b90ba5ea8)
 
-commit da8bc1673936b9c9ba83c3d60fd03ce383bc8e85
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Fri Jul 10 21:27:13 2015 +0100
+commit 68bd2ddda026afd5e45324d51a5b6b74227c5408
+Author: Glenn Kennard <glenn.kennard@gmail.com>
+Date:   Thu Aug 27 19:04:17 2015 +0200
 
-    auxiliary/vl: use the correct screen index
+    r600g/sb: Don't crash on empty if jump target
     
-    Inspired (copied) from Marek's commit for egl/x11
-    commit 0b56e23e7f3(egl/dri2: use the correct screen index)
+    Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 608c7b4a63d5818f7ae0b3d48496b02cf8458d9b)
+
+commit 9db5c2ca2ea8f4172611337db1cc81f19aab443e
+Author: Glenn Kennard <glenn.kennard@gmail.com>
+Date:   Thu Aug 27 19:04:16 2015 +0200
+
+    r600g/sb: Don't read junk after EOP
     
-    v2: Fix copy/pasta errors.
+    Shaders that contain instruction data after an instruction with EOP could end
+    up parsing that as an instruction, leading to various crashes and asserts in
+    SB as it gets very confused if it sees for instance a loop start instruction
+    jumping off to some random point.
     
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
-    (cherry picked from commit 7a50bf6c7f7729f5eee3ddf7aa9b38a81873f2c6)
+    Add a couple of asserts, and print EOP bit if set in old asm printer.
+    
+    Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit a830225adbb77073272961df409885cca6b861ee)
 
-commit 6012eeca0b44240b5ddc40650266eeaeb076d6fa
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Fri Jul 10 16:42:18 2015 -0400
+commit 08c41221d7102106d0b70a6399e2270919458077
+Author: Glenn Kennard <glenn.kennard@gmail.com>
+Date:   Thu Aug 27 19:04:15 2015 +0200
 
-    nv50, nvc0: enable at least one color RT if alphatest is enabled
+    r600g/sb: Handle undef in read port tracker
     
-    Fixes the following piglits:
-      fbo-alphatest-nocolor
-      fbo-alphatest-nocolor-ff
+    e8e443 missed adding check for undef values also in
+    unreserve function, leading to an assert triggering.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit 1bfa25e88d21f95b9e176232bb091af77c294578)
+    Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 36f1999a87258603b6720d55e6020d5d24c215c9)
 
-commit 9c7f5947058b17d2fd117e475a0395a6a1f745af
-Author: Chad Versace <chad.versace@intel.com>
-Date:   Thu Jul 9 18:46:21 2015 -0700
+commit 896ef5cb95557fbf68c75600d2bcbb01f8933b98
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Aug 27 15:28:24 2015 -0400
 
-    mesa: Fix generation of git_sha1.h.tmp for gitlinks
+    mesa: only copy the requested teximage faces
     
-    Don't assume that $(top_srcdir)/.git is a directory. It may be a
-    gitlink file [1] if $(top_srcdir) is a submodule checkout or a linked
-    worktree [2].
+    Cube maps are special in that they have separate teximages for each
+    face. We handled that by copying the data to them separately, but in
+    case zoffset != 0 or depth != 6 we would read off the end of the client
+    array or modify the wrong images.
     
-    [1] A "gitlink" is a text file that specifies the real location of
-        the gitdir.
-    [2] Linked worktrees are a new feature in Git 2.5.
+    zoffset/depth have already been verified by the time the code gets to
+    this stage, so no need to double-check.
     
-    Cc: "10.6, 10.5" <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
-    (cherry picked from commit 75784243df1f5bb0652fb243b37d69f36d493a86)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit 2259b111003f2e8c55cae42677ec45345fb1b6e3)
 
-commit 9e62e1a1d3023fa2834c6715c6bed72c3f11dac6
-Author: Rob Clark <robclark@freedesktop.org>
-Date:   Wed Jul 8 13:30:22 2015 -0400
+commit 594388e5776312c1bdc9d5613369ed530bb7fbbc
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Wed Aug 19 14:29:53 2015 -0700
 
-    xa: don't leak fences
+    i965/fs: Split VGRFs after lowering pull constants
     
-    XA was never unref'ing last_fence in the various call paths to
-    pipe->flush().  Add this to xa_context_flush() and update the other
-    open-coded calls to pipe->flush() to use xa_context_flush() instead.
+    The split_virtual_grfs code doesn't properly rewrite reladdr so we need to
+    make sure that any uniform indirects are lowered away first.
     
-    This fixes a memory leak reported with xf86-video-freedreno.
+    This fixes the glsl-fs-uniform-indexed-by-swizzled-vec4.shader_test in piglit
     
-    Reported-by: Nicolas Dechesne <nicolas.dechesne@linaro.org>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Rob Clark <robclark@freedesktop.org>
-    (cherry picked from commit 0a8af6361eecaba0f34a668328746924b61caa6a)
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    (cherry picked from commit fee0c5af11dd0995de96e7053377d425a66d03a0)
+    
+    Conflicts:
+    	src/mesa/drivers/dri/i965/brw_fs.cpp
 
-commit 4a3d6d04e10db6e756ecc8f09e737958fab7246b
-Author: Christian König <christian.koenig@amd.com>
-Date:   Mon Jun 29 10:19:36 2015 +0200
+commit 812f2855dd601e23a4f813f53547d446ca484df7
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Aug 24 00:22:37 2015 +0200
 
-    st/vdpau: fix mixer size checks
+    mesa: create multisample fallback textures like normal textures
     
-    We need to check what the 3D pipe is able to handle for the mixer, not what
-    the decoder is able to decode. This fixes output of resolutions like 720x1280.
+    This works if drivers upsample on upload (like all radeon ones do).
+    The alternative is an unexpected GL error from anything calling
+    _mesa_update_state and possibly other issues.
     
-    Signed-off-by: Christian König <christian.koenig@amd.com>
-    CC: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit 2cfa64e159a68998b76bdbcd20f8c7810379fce0)
+    Cc: 10.6 11.0 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit f432ae899fb81468778dbeb17ac7615da3ed5c0d)
 
-commit 9850b9ca73056dc4e606b8cdfcd0d97659d25861
-Author: Christian König <christian.koenig@amd.com>
-Date:   Fri May 29 15:10:31 2015 +0200
+commit 5d8ce45d9031e9949dfbab27ab4d608853b86d72
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Aug 24 09:52:12 2015 +1000
 
-    vl: cleanup video buffer private when the decoder is destroyed
+    mesa/texgetimage: fix missing stencil check
     
-    Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=90728
+    GetTexImage can read to stencil8 but only from
+    a stencil or depthstencil textures.
     
-    Signed-off-by: Christian König <christian.koenig@amd.com>
-    CC: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit bbfdf5c17b695c31915e293e1ec858cbcb340894)
-
-commit 89cbd91b17989ec7eb1cb93ac427a84dca56cd79
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Sat Jul 11 20:33:16 2015 +0100
-
-    docs: Add sha256 checksums for the 10.6.2 release
+    This fixes a bunch of failures in CTS
+    GL33-CTS.gtf32.GL3Tests.packed_pixels
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit c1452983b44cc8ee238b8c7e2cfca1105c707487)
 
-commit 9643cce94c8a1938e3342fb83d025a1e5c2aa79b
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Sat Jul 11 19:46:49 2015 +0100
+commit 33b0f6e5e181c52c3eeb7ba4cb3bc0db13f10670
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Wed Jul 29 18:09:44 2015 +1000
 
-    Add release notes for the 10.6.2 release
+    mesa: enable texture stencil8 for multisample
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    This fixes GL45-CTS.gtf44.GL31Tests.texture_stencil8.texture_stencil8_gl44
+    from the ogl conform suite.
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: 10.6 11.0 <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 529acab22a3e21e0ed0c5243675aec6c0ee27e8f)
 
-commit 187f919c9004274cee95f7a4940b909c54923a1d
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Sat Jul 11 19:42:36 2015 +0100
+commit 6659fba2c0ea9a5c358e3ef3770585381e619d39
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Sun Aug 23 09:24:57 2015 +0100
 
-    Update version to 10.6.2
+    i965: Always re-emit the pipeline select during invariant state emission
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    On the older platforms where we don't have logical contexts preserving
+    state across batches, we emit the invariant state setup on every batch
+    using the brw_invariant_state atom. This includes the pipeline selection
+    which is cached with the introduction of
+    
+    commit 0e0e23ef537c9add672ff322f34e129a07edc55e
+    Author: Jordan Justen <jordan.l.justen@intel.com>
+    Date:   Wed Apr 22 11:43:50 2015 -0700
+    
+        i965/state: Emit pipeline select when changing pipelines
+    
+    However, we do not reset the cache between batches on context-less
+    platforms resulting in us not setting the pipeline selection and can
+    cause GPU hangs if a media pipelined was loaded in the meantime (e.g.
+    mixing mplayer/gstreamer using libva and gnome-shell). A simple solution
+    is to just forcibly re-emit the pipeline select along with the invariant
+    state and reset the cache at that point.
+    
+    Reported-and-tested-by: Tomasz C. <tomaszc@o2.pl>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91254
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Cc: Jordan Justen <jordan.l.justen@intel.com>
+    Cc: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit 4e5752e2b78243a71766538f62ca0a80488047a7)
 
-commit 5e9254194594b863f55f4efcafca7fbb5b21ec8f
-Author: Neil Roberts <neil@linux.intel.com>
-Date:   Fri Jun 26 17:54:15 2015 +0100
+commit adae777f24fd84d3a0c074c5f2c01a31d9f63cce
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 23 18:57:44 2015 +0200
 
-    i965: Don't try to print the GLSL IR if it has been freed
+    Revert "radeon/winsys: increase the IB size for VM"
     
-    Since commit 104c8fc2c2aa5621261f8 the GLSL IR will be freed if NIR is
-    being used. This was causing it to segfault if INTEL_DEBUG=wm is set.
-    This patch just makes it avoid dumping the GLSL IR in that case.
+    This reverts commit 567394112d904096abff1d994ab952f475dfb444.
     
-    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
-    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
-    (cherry picked from commit c0ca6c30eaf7f488f154c462a01a8945cb4a3103)
+    It regressed performance. It looks like smaller IBs are better, because
+    the GPU goes idle quicker and there is less waiting for buffers and fences.
+    
+    Cc: 11.0 <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit a83c36b5c0c64c717ced76db89bab900006648aa)
 
-commit 6b6e14ac35750e0a7f8194923877a842f9a43e3f
-Author: Kenneth Graunke <kenneth@whitecape.org>
-Date:   Sun Jun 28 22:17:09 2015 -0700
+commit 0b690e39dc3f84dbce97d507fd7955ecdecbe5c2
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Aug 23 03:11:09 2015 -0400
 
-    Revert "i965: Delete linked GLSL IR when using NIR."
+    nv50: fix 2d engine blits for 64- and 128-bit formats
     
-    This reverts commit 104c8fc2c2aa5621261f80aa6b4f76c3163078f1.
+    This fixes bin/ext_framebuffer_multisample-formats all_samples
     
-    (cherry picked from commit cae701fc8ed0faeaaaafd1cf57f6143031edcab2)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit e18c29b03105567cf20bc235ce23cf08986cc537)
 
-commit 25daf2592c21881eed3cbe1e8439f32878b3eb2f
-Author: Kenneth Graunke <kenneth@whitecape.org>
-Date:   Sun Jun 28 22:17:16 2015 -0700
+commit 67fc4b417a7e73feb840b6666b343f4d32d23f22
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Aug 23 02:56:45 2015 -0400
 
-    Revert "glsl: clone inputs and outputs during linking"
+    nv50: account for the int RT0 rule for alpha-to-one/cov
     
-    This reverts commit c2ff3485b3d48749ea9dcad07bc1a691627dc3e5.
+    Same as commit 1af0641db but for nvc0. If an integer texture is
+    bound to RT0, don't do alpha-to-one or alpha-to-coverage.
     
-    Ilia and I noticed a memory leak caused by this patch: at least with
-    fixed-function programs, we clone things using ProgramResourceList as
-    the context before reralloc makes it non-NULL.
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit a6ad49cbbd599aec054d0a3163fff5ad724f2b18)
+
+commit 7a8d2048bc830c77be7baf5eb71aaef645cb1bf6
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Jul 27 13:13:49 2015 +1000
+
+    mesa/arb_gpu_shader_fp64: add support for glGetUniformdv
     
-    I believe Tapani found other bugs with these patches, so I'm just going
-    to revert them for now and let him pursue them further.
+    This was missed when I did fp64, I've sent a piglit test to cover
+    the case as well.
     
-    (cherry picked from commit 6218c68bece0cea671f2940a651119a87ab8b24e)
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    (cherry picked from commit 45971fd0df1cbfc400f89f2e8df206625b40d65f)
 
-commit b85e389d6caebdad3eb1e080146a3ef113aec8a5
-Author: Mike Stroyan <mike@lunarg.com>
-Date:   Wed Jul 1 10:16:28 2015 -0600
+commit bf84c85130d7b9160f62ce8b54e33d5228531217
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Aug 22 23:59:50 2015 -0400
 
-    i965: allocate at least 1 BLEND_STATE element
+    nv50,nvc0: disable depth bounds test on blit
     
-    When there are no color buffer render targets, gen6 and gen7 still
-    use the first BLEND_STATE element to determine alpha test.
-    gen6_upload_blend_state was allocating zero elements when
-    ctx->Color.AlphaEnabled was false.
-    That left _3DSTATE_CC_STATE_POINTERS or _3DSTATE_BLEND_STATE_POINTERS
-    pointing to random data from some previous brw_state_batch().
-    That sometimes suppressed depth rendering when those bits
-    happened to mean COMPAREFUNC_NEVER.
-    This produced flickering shadows for dota2 reborn.
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    (cherry picked from commit abbf05cfc2bea0787bcf710ef984d73ee8ba8f9e)
+
+commit aab6075613ec078257cc7008e40319f844d2ba9c
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Wed Aug 19 18:55:44 2015 -0700
+
+    i965/bdw: Fix 3DSTATE_VF_INSTANCING when the edge flag is used
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=80500
-    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-    (cherry picked from commit fe2b748a39ff676949fcefccf739aff967fc38c5)
-    Nominated-by: Kenneth Graunke <kenneth@whitecape.org>
+    When the edge flag element is enabled then the elements are slightly
+    reordered so that the edge flag is always the last one. This was
+    confusing the code to upload the 3DSTATE_VF_INSTANCING state because
+    that is uploaded with a separate loop which has an instruction for
+    each element. The indices used in these instructions weren't taking
+    into account the reordering so the state would be incorrect.
+    
+    v2: Use nr_elements instead of brw->vb.nr_enabled so that it will cope
+        when gl_VertexID is used.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91292
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Tested-by: Mark Janes <mark.a.janes@intel.com>
+    (cherry picked from commit 3a1ab2348050fd32f41553b9febfd9972b5761aa)
 
-commit 57a6f5208d2affe33bfef4bc1d632e4c63af7b1e
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Tue Jul 7 18:28:31 2015 +0200
+commit 2ef343432828f563d793908af6e1b0401d6cba5a
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Mon Jul 13 18:01:14 2015 +0100
 
-    st/dri: don't set PIPE_BIND_SCANOUT for MSAA surfaces
+    i965: Swap the order of the vertex ID and edge flag attributes
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91231
+    The edge flag data on Gen6+ is passed through the fixed function hardware as
+    an extra attribute. According to the PRM it must be the last valid
+    VERTEX_ELEMENT structure. However if the vertex ID is also used then another
+    extra element is added to source the VID. This made it so the vertex ID is in
+    the wrong register in the vertex shader and the edge attribute is no longer in
+    the last element.
     
-    Reviewed-by: Brian Paul <brianp@vmware.com>
-    (cherry picked from commit 6611f65047575054a38ce83ebfe0331e39e1774f)
-    Nominated-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    v2: Also implement for BDW+
+    
+    v3 [by Ben]: Remove 10.5 tag. Too late.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=84677
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Tested-by: Ben Widawsky <ben@bwidawsk.net>
+    Tested-by: Mark Janes <mark.a.janes@intel.com>
+    (cherry picked from commit fb02b4ec482762ccf2a9fedf24fe6f50787932a9)
+
+commit 3d58fea2e39e406ae0a4183317170451813f7794
+Author: Glenn Kennard <glenn.kennard@gmail.com>
+Date:   Sun Aug 23 01:01:31 2015 +0200
+
+    r600g: Fix assert in tgsi_cmp
+    
+    Fixes https://bugs.freedesktop.org/show_bug.cgi?id=91726
+    
+    Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
+    Cc: "11.0" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Dave Airlie <airlied@gmail.com>
+    (cherry picked from commit 50932268aad0cc21511f370793e77c76e038bd06)
+
+commit ab94875352c47d3815d78166ee9826af6b17b143
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Aug 22 11:55:47 2015 +0100
+
+    Update version to 11.0.0-rc1
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
 
-commit f3abea1577f8d5c17dacab17aeafb0c1fc1092cc
+commit f077632030425f820ebe96144b436e35259e06ab
 Author: Matt Turner <mattst88@gmail.com>
 Date:   Sat Feb 28 12:12:22 2015 -0800
 
     Revert SHA1 additions.
     
     The shader-cache isn't finished, so the configure checks are a bit
-    premature and will only stand to confuse users of Mesa 10.6.
+    premature and will only stand to confuse users of Mesa 11.0.
     
     This is a squash of the follow four reverts:
     
@@ -1300,2091 +1688,25659 @@
     
     Reviewed-by: Carl Worth <cworth@cworth.org>
 
-commit 6fbe4bf790c3ee20044928058ba64b17589df3cd
-Author: Chris Wilson <chris@chris-wilson.co.uk>
-Date:   Wed Jun 10 08:28:13 2015 +0100
+commit d7bafcafd329d23ab0c76e3a6ae06277506f50f3
+Author: Boyan Ding <boyan.j.ding@gmail.com>
+Date:   Fri Aug 21 21:44:36 2015 +0800
 
-    loader: Look for any version of currently linked libudev.so
-    
-    Since there was an ABI break and linking twice against libudev.so.0 and
-    libudev.so.1 causes the application to quickly crash, we first check if
-    the application is currently linked against libudev before dlopening a
-    local handle. However for backwards/forwards compatability, we need to
-    inspect the application for current linkage against all known versions
-    first. Not doing so causes a crash when both libraries are present and
-    so mesa chooses libudev.so.1 but the application was linked against
-    libudev.so.0.
-    
-    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-    
-    Emil Velikov:
+    egl/wayland: define set_cloexec_or_close only when mkostemp is not present
     
-    I'm ever so slightly conserned that RTLD_NOLOAD is not part of the POSIX
-    standard, thus it's missing on some platforms (*BSD seems ok, while
-    Solaris, MacOS are not).
+    Fixes a compiler warning of defined but not used function when
+    HAVE_MKOSTEMP is defined.
     
+    Fixes: eb3e2562a4b(configure.ac: check for mkostemp())
+    Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
     Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit f2413457937f8f4a92e11379569be69e508d7477)
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
 
-commit 4c3a4ac6da7820226ecad5fd654899ae727e11e0
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Tue Jul 7 23:05:45 2015 -0400
+commit ec256eceedd0a00b41c2c94d5a32609f0e1870b2
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Aug 22 12:58:03 2015 +0100
 
-    nvc0: turn sample counts off during blit
-    
-    Fixes the following piglits:
-      occlusion_query_meta_fragments
-      occlusion_query_meta_no_fragments
+    mapi: ship ARB_tessellation_shader.xml
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit 38c2ec5ff0bf626578db7b84387279342aa48844)
+    Fixes: e2b59a39cbb(mapi: add ARB_tessellation_shader)
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
 
-commit 2ca2f3701b9928374911c603178cf92da1e5167b
-Author: Kenneth Graunke <kenneth@whitecape.org>
-Date:   Wed Jul 1 20:13:00 2015 -0700
+commit 79da1b262de0df61c1bf74a55c9b18cd6eeb2fc5
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Aug 22 12:15:27 2015 +0100
 
-    i965/vs: Fix matNxM vertex attributes where M != 4.
+    nouveau: add codegen/unordered_set.h to the tarball
     
-    Matrix vertex attributes have their columns padded out to vec4s, which
-    I was failing to account for.  Scalar NIR expects them to be packed,
-    however.
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit a8f451cd0e06278699fb7139213bbc6c98a9cc52
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Aug 21 02:01:42 2015 +0100
+
+    winsys/sw/kms-dri: don't attempt to bundle the sconscript
     
-    Fixes 1256 dEQP tests on Broadwell.
+    The build/file was removed with an earlier commit while the EXTRA_DIST
+    was forgotten.
     
-    Cc: mesa-stable@lists.freedesktop.org
-    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
-    Tested-by: Mark Janes <mark.a.janes@intel.com>
-    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
-    (cherry picked from commit 73d0e7f3451eaeb62ac039d2dcee1e1c6787e3db)
+    Fixes: 66d77cd71c6 (scons: don't build the kms-dri winsys)
     Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    
-    Conflicts:
-    	src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 
-commit fcc9f9e06e770c709bba619836da8b3ee525cd7a
-Author: Neil Roberts <neil@linux.intel.com>
-Date:   Sat Jul 4 22:40:59 2015 +0100
+commit 4a21da709be3b2aa8d4b22830d56a5cc7661ccad
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Aug 20 22:55:49 2015 +0100
 
-    glsl: Make sure not to dereference NULL
+    winsys/amdgpu: automake: remove missing headers
     
-    In this bit of code point_five can be NULL if the expression is not a
-    constant. This fixes it to match the pattern of the rest of the chunk
-    of code so that it checks for NULLs.
+    The files are not referenced in any other place in whole of
+    mesa. They are likely remnants of the early development stage.
     
-    Cc: Matt Turner <mattst88@gmail.com>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Matt Turner <mattst88@gmail.com>
-    (cherry picked from commit 86a3557d7c95ac945eedf42ab095639b255c1bed)
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
 
-commit 5de0e9f982de9cbb6823b7a3d130bb632a16b2ae
-Author: Neil Roberts <neil@linux.intel.com>
-Date:   Sat Jul 4 22:40:58 2015 +0100
+commit cfbcabe673f0657a96752c5db18671989b745a95
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Aug 20 22:52:49 2015 +0100
 
-    glsl: Add missing check for whether an expression is an add operation
+    automake: build all drivers but vc4 during distcheck
     
-    There is a piece of code that is trying to match expressions of the
-    form (mul (floor (add (abs x) 0.5) (sign x))). However the check for
-    the add expression wasn't checking whether it had the expected
-    operation. It looks like this was just an oversight because it doesn't
-    match the pattern for the rest of the code snippet. The existing line
-    to check whether add_expr!=NULL was added as part of a coverity fix in
-    3384179f.
+    vc4 conflicts with ilo, when build on x86 as it's build for emulation
+    purposes. In that mode a i965-like symbol is exported by vc4, which
+    conflicts with the ilo one in the gallium-dri megadriver.
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91226
-    Cc: Matt Turner <mattst88@gmail.com>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Matt Turner <mattst88@gmail.com>
-    (cherry picked from commit 18039078e0254c7cb5e15b7186be05e2e4c10f38)
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
 
-commit 083840d365e079ec4b63911dc4b4fb8dda5b98d2
-Author: Kenneth Graunke <kenneth@whitecape.org>
-Date:   Wed Mar 4 15:46:57 2015 -0800
+commit 4b5936335b0a253e61473ed026d0f6309b169859
+Author: Mauro Rossi <issor.oruam@gmail.com>
+Date:   Tue Aug 18 11:53:32 2015 +0200
 
-    i965: Reserve more batch space to accomodate Gen6 perfmonitors.
+    android: enable amdgpu winsys in radeonsi driver
     
-    Ben noticed that I said each PIPE_CONTROL was 4 DWords, but it's
-    actually 5 DWords on Gen6-7.  We've been reserving insufficient space
-    for performance monitoring on Sandybridge, which means it would likely
-    break if you used that functionality.  (Thankfully, no one does...)
+    Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 6aaa814995d922d6f9cc68bc26276fd752866ceb
+Author: Mauro Rossi <issor.oruam@gmail.com>
+Date:   Tue Aug 18 11:53:31 2015 +0200
+
+    android: fix cflags and includes for amdgpu winsys
     
-    Also, the existing number of 146 was the result of me flubbing up the
-    arithmetic: it should have actually been 140.
+    Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 807b1e5b05dacd46b5f563f5c6e561e660a2872e
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Aug 22 11:04:11 2015 +0100
+
+    docs: add news item and link release notes for 10.6.5
     
-    Cc: mesa-stable@lists.freedesktop.org
-    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
-    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
-    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
-    (cherry picked from commit d9ab95b365f058a46bc43a8cb96b6fff10a13faf)
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
 
-commit afa43fa696e7dd65ebce4c1e95892a4886d6049e
-Author: Neil Roberts <neil@linux.intel.com>
-Date:   Fri Jul 3 13:15:21 2015 +0100
+commit 32cd1252b8c816c1662ac4b95f74e86d797bc894
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Aug 22 11:00:47 2015 +0100
 
-    i965/skl: Set the pulls bary bit in 3DSTATE_PS_EXTRA
+    docs: add sha256 checksums for 10.6.5
     
-    On Gen9+ there is a new bit in 3DSTATE_PS_EXTRA that must be set if
-    the shader sends a message to the pixel interpolator. This fixes the
-    interpolateAt* tests on SKL, apart from interpolateatsample-nonconst
-    but that is not implemented anywhere so it's not a regression.
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit fa34225167396008e75e93f23696666caba8a7bf)
+
+commit fa52cf0ccf3f109b3119ca0980bb1dcba45e8911
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Aug 22 10:20:54 2015 +0100
+
+    docs: add release notes for 10.6.5
     
-    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-    Cc: "10.6 10.5" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 493af150fb3b1c007d791b24dcd5ea8a92ad763c)
     Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit a43b3dd99bd4c114d0f3e90f4fd4792164fe7539)
+
+commit 6817e0f1ce71d2a6d347d4c182f2cf4742dd5deb
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Fri Aug 21 15:28:22 2015 +0100
+
+    i965: Move control flush into pipelined conditional render
     
-     Conflicts:
-    	src/mesa/drivers/dri/i965/brw_fs_nir.cpp
-    	src/mesa/drivers/dri/i965/gen8_ps_state.c
+    The nv_conditional_render piglits were sporadically failing. Moving
+    the control flush from the write and placing it just before the read
+    was sufficient to make the piglits pass a 1000/1000 times. The bspec
+    says that the flush enable bit "waits until all previous writes of
+    immediate data from post sync circles are complete before executing the
+    next command" - the operative word being previous!
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90691
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Cc: Neil Roberts <neil@linux.intel.com>
+    Cc: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
 
-commit 03cf14a71378eec10d8c496916c0bc0ff212ef7d
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Wed Jun 24 11:58:50 2015 +0200
+commit eb2776504ae32feaf41a5bad9f09f154045e96a3
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Aug 21 10:57:24 2015 -0700
 
-    radeonsi: fix a hang with DrawTransformFeedback on 4 SE chips
+    vc4: Actually allow math results to allocate into r4.
     
-    Cc: 10.6 10.5 <mesa-stable@lists.freedesktop.org>
-    Acked-by: Christian König <christain.koenig@amd.com>
-    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
-    (cherry picked from commit d3f4f6b2e9380a91ab61b93c55ab36106345e7b2)
+    I switched us to tracking whether the results *could* go to r4, but then
+    didn't make a separate register class for the class bits that included r4.
+    Switch the "any" class to actually be "any", and name the "any but r4"
+    class more appropriately.
+    
+    total instructions in shared programs: 96798 -> 94680 (-2.19%)
+    instructions in affected programs:     62736 -> 60618 (-3.38%)
 
-commit e529d5ffb4c0b951005948d06e8ce868f5659c24
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Fri Jul 3 19:09:09 2015 -0400
+commit 89b1b33f44bc6ce71109ac8668529c30b6d6d910
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Aug 21 00:08:13 2015 -0700
 
-    nv50/ir: UCMP arguments are float, so make sure modifiers are applied
+    vc4: Fold the 16-bit integer pack into the instructions generating it.
     
-    The first argument to UCMP needs to be compared against 0, but the
-    latter arguments are treated as float and need to be able to properly
-    apply neg/abs arguments. Adjust the inferSrcType function accordingly.
+    total instructions in shared programs: 97580 -> 96798 (-0.80%)
+    instructions in affected programs:     52826 -> 52044 (-1.48%)
+
+commit 7e0b868cf31003ada12063398fb91485ed8043dd
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Aug 21 00:04:36 2015 -0700
+
+    vc4: Reuse QPU dumping for packing bits in QIR.
+
+commit 4ae137534a8718db4611782dbfec773504b6e3be
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Aug 18 20:26:05 2015 -0700
+
+    vc4: Make _dest variants of qir ALU helpers to provide an explicit dest.
+
+commit 2002438c91981b22991ae70fefc5d492dda72835
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Aug 21 09:22:32 2015 -0700
+
+    vc4: Use the SSA defs list for figuring out eligible MOVs for copy prop.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit f70719cc4b64e12310dfe8825a8e2d4bce970673)
+    I thought I'd converted this over previously.  It was copy propagating
+    MOVs badly with the new destination packing flags.
 
-commit 4d8c6edab419375b0b9e25d7e53af394258c1ab8
-Author: Mario Kleiner <mario.kleiner.de@gmail.com>
-Date:   Sun Jun 28 03:02:31 2015 +0200
+commit 20746c2e7d8d065445bbb30f0d1383c30459a784
+Author: Krzysztof Sobiecki <sobkas@gmail.com>
+Date:   Thu Aug 20 23:19:30 2015 +0200
 
-    winsys/radeon: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads.
+    st/nine: Always use user constant buffers
     
-    Same problem and fix as for nouveau's ZaphodHeads trouble.
+    We had several reports of users hitting bugs
+    with the other path to upload constants,
+    and switching to the user constant buffer
+    path solves the bugs.
     
-    See patch ...
+    User constant buffers are expected to be slower
+    for Nvidia cards, so ideally this patch should be
+    reverted when the path is fixed.
     
-    "nouveau: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads."
+    Reviewed-by: Axel Davy <axel.davy@ens.fr>
+    Signed-off-by: Krzysztof Sobiecki <sobkas@gmail.com>
+
+commit f57e9c77e388e1c5373265f4bc15434e63477757
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun Aug 16 13:11:50 2015 +0200
+
+    st/nine: Silent warning in nine_ff
     
-    ... for reference.
+    release build was complaining
     
-    Cc: "10.3 10.4 10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
-    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
-    (cherry picked from commit 28dda47ae4d974e3e032d60e8e0965c8c068c6d8)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 8ef284366c4a0daf4bd0953dc8369362efb49055
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Fri Jul 3 16:20:32 2015 +0200
+commit 3cc205bbeb19d417b17be0f6200cb5cda9adca8a
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun Aug 16 13:11:27 2015 +0200
 
-    r600g: disable single-sample fast color clear due to hangs
+    st/nine: Silent warning in sm1_declusage_to_tgsi
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73528
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82186
+    release build was complaining
     
-    Cc: 10.4 10.5 10.6 <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 97ec2c694fe568e375ec7a2b85c1acb1e4666b54)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 0b5a9660dc1210558e093d8690516c833a87a330
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Wed Jul 1 18:22:23 2015 -0400
+commit d48cab9fa6b75db054e3a53819b465611923e9bc
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun Aug 16 12:58:41 2015 +0200
 
-    mesa/prog: relative offsets into constbufs are not constant
+    st/nine: Silent warning in NineCubeTexture9_ctor
     
-    The optimization logic relies on being able to read out constbuf values
-    from program parameters. However that only works if there's no relative
-    addressing involved.
+    The compiler was complaining the value may be uninitialised
+    when it is used (which is wrong). Initialize to NULL to silent
+    the warning.
+
+commit 2f02d5e814904d7a5c041d6869751731712e8195
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun Aug 16 12:57:40 2015 +0200
+
+    st/nine: Silent warning in update_vertex_buffer
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91173
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
-    (cherry picked from commit 197a19f9ed0ba12cc431542ac09f2af0a8bd0bce)
+    There was an unused variable
 
-commit 1e8c43f4d0f6dd6699d5d72efbc02538733a38ec
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Thu Jul 2 00:13:36 2015 -0400
+commit 719f124620d3c9b4d6ce14db3dbfc7af05626e5b
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun Aug 9 19:06:01 2015 +0200
 
-    nv50/ir: don't emit src2 in immediate form
+    st/nine: Catch setting the same shader
     
-    In the immediate form, src2 == dst, so it does not need to be emitted.
-    Otherwise it overlaps with the immediate value's low bits.
+    This is quite rare that an app does set again
+    the same shaders, but it isn't an expensive check
+    either.
     
-    Fixes: 09ee907266 (nv50/ir: Fold IMM into MAD)
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    (cherry picked from commit c3215ef204c0fdfc44230adbd423720169d44dcb)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 6902a36d2239a774fd07c0d7dce4f18bc649d9bf
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Wed Jul 1 15:18:47 2015 -0400
+commit eba3c390a3c6221781ca4d9c344c6f448e177b6c
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun Aug 9 19:02:02 2015 +0200
 
-    mesa: reset the source packing when creating temp transfer image
+    st/nine: Avoid Constant upload when there is no change
     
-    Commit 4b249d2ee (mesa: Handle transferOps in texstore_rgba) introduced
-    proper transferops handling, but in updating the source to the newly
-    allocated temporary image neglected to reset the source packing. Set it
-    to the default which should be appropriate for the floats used.
+    It is very common for d3d9 apps to set again the constants
+    they need before every draw call, even if nothing changed.
     
-    Fixes: 4b249d2ee (mesa: Handle transferOps in texstore_rgba)
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91173
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
-    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
-    (cherry picked from commit 4f57cdba2767b56eb4752f14ba9853ba6bc06d0e)
+    Since we are mostly gpu bound, it is better to check
+    for change, and upload constants again (and thus use
+    gpu bandwith) only if the constants changed.
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 96bed9fea8d2ef68e43ba445b18570cbe52143ec
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Wed Jul 1 02:11:39 2015 -0400
+commit 1a747094ed0ab32541936dd18931f382901affe6
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sat Jul 25 17:54:26 2015 +0200
 
-    nv50/ir: copy joinAt when splitting both before and after
+    st/nine: Fix the number of texture stages
     
-    The current implementation only moves the joinAt when splitting after
-    the given instruction, not before it. So if you have a BB with
+    The number of texture stages is 8.
     
-      foo
-      instr
-      bar
-      joinat
+    'tex_stage' array was too big, and thus
+    the checks with 'Elements(state->ff.tex_stage)' were passing,
+    causing some invalid API calls to pass, and crash because of
+    out of bounds write since bumpmap_vars was just the correct size.
     
-    and thus with joinAt set, we end up first splitting before instr, at
-    which point the instr's bb is updated to the new bb. Since that bb
-    doesn't have a joinAt set (despite containing one), when splitting after
-    the instr, there is nothing to copy over. Since the joinat will be in
-    the "split" bb irrespective of whether we're splitting before or after
-    the instruction, move it over in either case.
-    
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91124
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 5dcb28c3d26828ed1b0e2bd5a0589c5baab04b85)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 3fe9fe9cb90785244d4a464e0d9cd244b75a080d
-Author: Tapani Pälli <tapani.palli@intel.com>
-Date:   Tue May 19 15:01:49 2015 +0300
+commit f15ff98e2c53f6ce62443b24e3a9cf79c60ddff7
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Thu May 14 19:24:57 2015 +0200
 
-    glsl: validate sampler array indexing for 'constant-index-expression'
-    
-    Desktop GLSL < 130 and GLSL ES < 300 allow sampler array indexing where
-    index can contain a loop induction variable. This extra check will warn
-    during linking if some of the indexes could not be turned in to constant
-    expressions.
+    st/nine: Use CSO cache for sampler views
     
-    v2: warning instead of error for backends that did not enable
-        EmitNoIndirectSampler option (have dynamic indexing)
+    The CSO cache unbinds views that are not needed anymore,
+    which we don't do.
+    It checks for change before committing the views.
     
-    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
-    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
-    Cc: "10.5" and "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 9350ea6979c48772e1fb55d4f1c7c5a3cfa987b0)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 1beb6738a784e69981208273e2cc4a6c8c6675f3
-Author: Tapani Pälli <tapani.palli@intel.com>
-Date:   Mon Jun 29 09:48:52 2015 +0300
+commit 98f786b270b3e55a1e8a7bdc8182e436702748a2
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Thu May 14 19:10:41 2015 +0200
 
-    mesa/st: use EmitNoIndirectSampler if !ARB_gpu_shader5
+    st/nine: Calculate dummy sampler state only once
     
-    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
-    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
-    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
-    Cc: "10.5" and "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit f17c8c287f3581fccb52714fbd4b2ea09a58e3d3)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit f3e514a41d0c3f532e5633c20df91405e52a4a2a
-Author: Tapani Pälli <tapani.palli@intel.com>
-Date:   Mon Jun 29 09:53:45 2015 +0300
+commit f5effeb8eaf16e39a2f561b4b277f3810a69fc88
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Thu May 14 15:42:50 2015 +0200
 
-    i915: use EmitNoIndirectSampler
+    st/nine: Better check shader constant limits
     
-    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
-    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
-    Cc: "10.5" and "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 2dc2b12ed15abb84c7e2b3c2726dcc1b735abcda)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit b150817c197a8e0772114641fed3eb19284f4540
-Author: Tapani Pälli <tapani.palli@intel.com>
-Date:   Wed Jun 24 13:22:43 2015 +0300
+commit bae2c7c15444b02a4820e3182c345545bd348561
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Thu Aug 13 19:24:02 2015 +0200
 
-    i965: use EmitNoIndirectSampler for gen < 7
+    st/nine: Remove NINED3DRS_ZBIASSCALE
     
-    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
-    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
-    Cc: "10.5" and "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 8852e26e93af1fc4b72bf9d57e847f53e1a1371b)
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    It wasn't giving the expected result.
     
-    Conflicts:
-    	src/mesa/drivers/dri/i965/brw_shader.cpp
+    This fixes some object being transparents
+    in games like FEAR.
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 619b9e84bf828e716a7a5b8391fe15efab441828
-Author: Tapani Pälli <tapani.palli@intel.com>
-Date:   Tue Jun 9 13:33:39 2015 +0300
+commit 6379a28aa75a38676120891b355c434bec4125e1
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun May 17 00:22:33 2015 +0200
 
-    mesa/glsl: new compiler option EmitNoIndirectSampler
+    st/nine: Implement special DOTPRODUCT3 behaviour
     
-    Patch provides new compiler option for backend to force unroll loops
-    that have non-constant expression indexing on sampler arrays.
+    Taken from wine tests
     
-    This makes sure that we can never end up with a shader that uses loop
-    induction variable as sampler array index but does not unroll because
-    of having too much instructions. This would not work without dynamic
-    indexing support.
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+
+commit 791b794a849f4576e59bda29680bdd49ed0429a9
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sat May 16 12:14:54 2015 +0200
+
+    st/nine: Implement ff vertex data passthrough
     
-    v2: change option name as EmitNoIndirectSampler
+    Fixes Wine tests
     
-    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
-    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
-    Cc: "10.5" and "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit e4512e1581cf90f56d13cfa6a809832ef3517283)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit cc7caf9239903ca3604e90613c4696e7c0f7b0e1
-Author: Tapani Pälli <tapani.palli@intel.com>
-Date:   Tue Jun 9 13:28:44 2015 +0300
+commit fb6c76f1bac039cddc66cb823e85437981ce8bef
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sat May 16 01:24:29 2015 +0200
 
-    glsl: Allow dynamic sampler array indexing with GLSL ES < 3.00
+    st/nine: Change nine_state_update order
     
-    Dynamic indexing of sampler arrays is prohibited by GLSL ES 3.00.
-    Earlier versions allow 'constant-index-expression' indexing, where
-    index can contain a loop induction variable.
+    nine_update_state called every draw call.
     
-    Patch allows dynamic indexing for sampler arrays when GLSL ES < 3.00.
-    This change makes 'sampler-array-index.frag' parser test in Piglit
-    pass + fishgl.com works when running Chrome on OpenGL ES 2.0 backend
+    This patch attemps to change the order
+    of the checks to have better control flow
     
-    v2: small change and some more commit message (Tapani)
-    v3: refactor checks to make it more readable (Ian Romanick)
-    v4: change warning comment in GLSL ES case (Curro)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+
+commit 4a00e4cdc970582df338ebf880168a4ef4ec05d9
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Thu May 14 01:27:33 2015 +0200
+
+    st/nine: Programmable ps D3DTTSS_PROJECTED support
     
-    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
-    Signed-off-by: Kalyan Kondapally <kalyan.kondapally@intel.com>
-    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
-    Cc: "10.5" and "10.6" <mesa-stable@lists.freedesktop.org>
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=84225
-    (cherry picked from commit edb8383c98ee23385731d0fc23a6b6673528a8ec)
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    The implementation used Wine tests for conformance
     
-    Conflicts:
-    	src/glsl/ast_array_index.cpp
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit a70904bc78e3dd0ea369a6dd03e4c54fbf5ff808
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Tue Jun 30 02:46:26 2015 -0400
+commit b7261528ea964f6cba3173f72608c1afc0750d6b
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Tue May 12 23:49:54 2015 +0200
 
-    nv50/ir: fix emission of address reg in 3rd source
+    st/nine: Complete ff texture transform implementation
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91056
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit d5f1253b0c4637ad996fd0da45095165006d61d3)
+    Wine tests were used to get it right.
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit e00aab165416b3281f729053ce003b2f298f543c
-Author: Mario Kleiner <mario.kleiner.de@gmail.com>
-Date:   Fri Jun 5 15:36:52 2015 +0200
+commit dcb6f764cfb13c21b5bc56e973daf05db5c1bc1c
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sat Aug 15 22:52:19 2015 +0200
 
-    nouveau: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads.
-    
-    The dup'ed fd owned by the nouveau_screen for a device node
-    must also be used as key for the winsys hash table, instead
-    of using the original fd passed in for a screen, to make
-    multi-x-screen ZaphodHeads configurations work on nouveau.
+    st/nine: Change a few advertised caps
     
-    The original fd's lifetime differs from that of the nouveau_screen stored
-    in the hash. The hash key is the fd, and in order to compare hash entries
-    we fstat them, so the fd must be around for as long as the screen is.
+    There were flags all sm3 cards do advertise,
+    and we weren't.
+    Some games can trigger buggy rendering path
+    if the caps are not what they expect.
     
-    This is an extension of the fix in commit a59f2bb1 (nouveau: dup fd
-    before passing it to device).
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+
+commit c2480bbab1a15960d8b30dda8351a6869a424679
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Fri May 8 19:50:15 2015 +0200
+
+    st/nine: Advertise Fog flags
     
-    Cc: "10.3 10.4 10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
-    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    (cherry picked from commit a98600b0ebdfc8481c168aae6c5670071e22fc29)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 887a18018d1e4a532c671e642265a0853960d3b0
-Author: Mike Stroyan <mike@lunarg.com>
-Date:   Fri Jun 26 15:15:46 2015 -0600
+commit fcca7ff38a6ba923abd910f50a8e2bcf6560e6ae
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun Aug 16 14:59:52 2015 +0200
 
-    meta: Only change and restore viewport 0 in mesa meta mode
+    st/nine: Revert to userbuf path when needed
     
-    The meta code was setting a default depth range for all viewports
-    and 'restoring' all viewports to depth range values saved from viewport 0.
+    Automatically switch to userbuf path when
+    we would need to upload fog or bumpmat
+    constants
     
-    Cc: mesa-stable@lists.freedesktop.org
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-    (cherry picked from commit 2a210b797eacd27a556af9c5e0edca940f9486c5)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit a09b91792c0841e67b2dedebc97293a1007c5586
-Author: Kenneth Graunke <kenneth@whitecape.org>
-Date:   Thu Jun 18 13:55:52 2015 -0700
+commit 8f39ffc11e8b6fdf7c940f9c1853a362a886977a
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Tue May 12 21:56:04 2015 +0200
 
-    i965/fs: Fix ir_txs in emit_texture_gen4_simd16().
-    
-    We were not emitting the LOD, which led to message lengths of 1 instead
-    of 3.  Setting has_lod makes us emit the LOD, but I had to make changes
-    to avoid emitting the non-existent coordinate as well.
+    st/nine: Finish Fog implementation
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91022
-    Cc: mesa-stable@lists.freedesktop.org
-    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
-    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
-    (cherry picked from commit 35d83793047b3de31a706fa2a62a233090ea7cfc)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 0eaf0e16ddea0d9369991d8ca0829fe38dca819c
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Fri Jun 26 15:01:22 2015 -0400
+commit 69de5d626f2e67e74e4de94ce13f7ac50fa52161
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Mon May 11 21:32:57 2015 +0200
 
-    nv50/ir: propagate modifier to right arg when const-folding mad
+    st/nine: Rework shader states
     
-    An immediate has to be the second arg of an ADD operation. However we
-    were mistakenly propagating the modifier of the non-folded value to the
-    folded immediate argument.
+    Separate state setting and commit
+    Changes how the shader key is computed
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91117
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit ad62ec8316a926682958e7ab52639992867c3755)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 542299185b4fa7c662839bf560b41a7796440ca3
-Author: Tapani Pälli <tapani.palli@intel.com>
-Date:   Thu Jun 11 10:41:53 2015 +0300
+commit 854778ea0fea474eea0c984f1c0fc32aed91f10e
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun May 10 11:42:25 2015 +0200
 
-    i965: Delete linked GLSL IR when using NIR.
+    st/nine: Remove some useless variables
     
-    This is based on Kenneth's patch to delete 'most of the IR'. Due to
-    linker changes to clone variables, we can now free all of IR.
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+    Reviewed-by: David Heidelberg <david@ixit.cz>
+
+commit cb0816c0f6e2ccf81cbb433206b616f4662a803a
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun May 10 12:01:10 2015 +0200
+
+    st/nine: Fix nine_ff_ps_key padding
     
-    Saves 58MB of memory when replaying a Dota 2 trace on Broadwell.
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+
+commit dd4802c8deedc0b2bad7d361c487761be31f2b56
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Fri May 8 19:48:45 2015 +0200
+
+    st/nine: Begin programmable shader fog support
     
-    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit 104c8fc2c2aa5621261f80aa6b4f76c3163078f1)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 16d35dbd56b8d4687981c0dda0eeab2de8b0dc4a
-Author: Tapani Pälli <tapani.palli@intel.com>
-Date:   Thu Jun 11 10:41:52 2015 +0300
+commit 2dd59a2d286cca8ccbc7a25622cd7036cdbf437d
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Fri May 8 19:26:20 2015 +0200
 
-    glsl: clone inputs and outputs during linking
+    st/nine: Fix fixed function fog support
     
-    This increases memory pressure during linking but makes it easier
-    for backend to free IR after it is not needed anymore.
+    Previous code had only a subcase of fog working right.
     
-    v2: use resource list as ralloc context in case of relink (Kenneth)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+
+commit 87ec6b56b28c25d4184422c40f576c24ebe7cd75
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Tue Mar 24 11:06:22 2015 +0100
+
+    st/nine: Rework ff constant buffers
     
-    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit c2ff3485b3d48749ea9dcad07bc1a691627dc3e5)
+    Always use a user constant buffer for ff.
+    It means we have to:
+    . commit the user constant buffer for ff when we use it
+    . commit back the non-ff constant buffer when we stop using it
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit e77d8eb4b67db51d2b31159189edbcf74022bf78
-Author: Michel Dänzer <michel.daenzer@amd.com>
-Date:   Thu May 21 10:49:05 2015 +0900
+commit 993e68fa6a431a7c7c451c738e07d02a84ea40e4
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Tue Mar 24 10:47:35 2015 +0100
 
-    winsys/radeon: Unmap GPU VM address range when destroying BO
+    st/nine: Rework constant buffer state handling
     
-    But only when doing so is safe according to the
-    RADEON_INFO_VA_UNMAP_WORKING kernel query.
+    We have two paths:
+    . One that uses a fixed constant buffer, and updates it when needed
+    . One that uses a user constant buffer, and uploads it when needed.
     
-    This avoids kernel GPU VM address range conflicts when the BO has other
-    references than the GEM handle being closed, e.g. when the BO is shared.
+    This patch separates the preparation of the constant buffer
+    and the commit.
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90537
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90873
+    It also removes NineDevice9_RestoreNonCSOState, which was
+    used to restore all states. Instead the commit of the constant
+    buffer is moved to nine_state, and the other field settings
+    moved to other functions where more appropriate.
     
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Christian König <christian.koenig@amd.com>
-    (cherry picked from commit 7796e8889a9a2cc1b454dc32d8da3d756404339a)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 1e84989ffc37d917b737b2537b75e5e390a01c39
-Author: Matt Turner <mattst88@gmail.com>
-Date:   Mon Jun 22 10:59:33 2015 -0700
+commit a3f0d21da9a33e58a4be41f65f77eebe1dd85841
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Tue Mar 24 10:10:25 2015 +0100
 
-    i965/fs: Don't mess up stride for uniform integer multiplication.
+    st/nine: Rework blend states
     
-    If the stride is 0, the source is a uniform and we should not modify the
-    stride.
+    Separate state preparation and state commit
     
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91047
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-    (cherry picked from commit a49328d58d1e3e143f9434976d9f3574acefc4ea)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 6ff3ae8deb1d99037f2f8e5890b09bd984059cf0
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Mon Jun 29 09:00:24 2015 +0100
+commit b06f3ee6f4ebf6ad403e7ee917c54bef85899c19
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sat Jan 3 11:36:09 2015 +0100
 
-    docs: Add sha256 checksums for the 10.6.1 release
+    st/nine: Improve fallback when driver doesn't support user buffers.
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    For now the path updated is only used by Amd drivers, but a later
+    patch will make it used by all drivers. Some drivers like llvmpipe
+    doesn't support the uploading of constants from user buffers, so improve
+    the path to work for all drivers
+    
+    Inspired from the gl state tracker.
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit a871e80fc6237fa029d6970f7e9b414fd097bd98
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Mon Jun 29 08:23:14 2015 +0100
+commit a7ce3cd0d3a9f3580d02afe5a4fc6588d20aba6b
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun Feb 15 16:22:43 2015 +0100
 
-    Add release notes for the 10.6.1 release
+    st/nine: Avoid useless updates in SetSamplerState
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Check for redundant settings
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+    Reviewed-by: David Heidelberg <david@ixit.cz>
 
-commit f513cc8836552ac3e8f8058b4f3f5a681c0d3215
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Mon Jun 29 08:17:10 2015 +0100
+commit 5a2302b5ece2edf944b474eeb19ad7fc17906e7d
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Tue Mar 24 10:16:59 2015 +0100
 
-    Update version to 10.6.1
+    st/nine: Rework rasterizer states
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Separate state preparation and state commit
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit da588875ce25e339d72b45b5cd60b39c96a5bd62
-Author: Boyan Ding <boyan.j.ding@gmail.com>
-Date:   Sat Jun 13 15:33:20 2015 +0800
+commit 71616d0c501077a04deb4f2a3cc115b50634763d
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Fri May 15 23:52:09 2015 +0200
 
-    egl/x11: Remove duplicate call to dri2_x11_add_configs_for_visuals
+    st/nine: Reorder DSA state settings
     
-    The call to dri2_x11_add_configs_for_visuals (previously
-    dri2_add_configs_for_visuals) was moved downwards in commit f8c5b8a1,
-    but appeared again in its original position after its rename in
-    d019cd81. Remove it.
+    Separate state preparation and state commit
     
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
-    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
-    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Reviewed-by: Chad Versace <chad.versace@intel.com>
-    (cherry picked from commit 3fa9bb81ec8b21f472de32e08d0caf917239da08)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 684c81a75fba70cafef8583a031e3d2af55bb429
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sun Jun 21 19:03:35 2015 -0400
+commit 06285530566ea3387b6eb3f8e1a0443132c3659b
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Tue Mar 24 09:35:18 2015 +0100
 
-    nv50,nvc0: make sure to pushbuf_refn before putting bo into pushbuf_data
+    st/nine: Reorder nine_state.
     
-    Without first running the bo through pushbuf_refn, the nouveau drm
-    library will have uninitialized structures regarding this bo, and will
-    insert incorrect data.
+    Instead of mixing state preparation (filling pipe_****)
+    and state commit (pipe->set_*****),
+    begin doing so in two separate functions.
     
-    This fixes supertuxkart 0.9 crash on start (where it ends up doing a lot
-    of indirect draws).
+    This will allow to implement efficient Stateblocks,
+    and eventually lead to optimisation where the complete
+    pipe_*** structure is only partially updated.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 78d58e642549fbf340fdb4fca06720d2891216a8)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 9ffa1f7a1b938d6103f97d064a7443ae03d9a9d9
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sun Jun 21 15:00:16 2015 -0400
+commit 99537f68db829bd4708eb8e1b1ef0948f3dd3c66
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun Feb 15 09:19:16 2015 +0100
 
-    nvc0: always put all tfb bufs into bufctx
+    st/nine: Remove group_mask argument from nine_update_state
     
-    Since we clear the TFB bufctx binding point above, we need to put all of
-    the active tfb's back in, even if they haven't changed since last time.
-    Otherwise the tfb may get moved into sysmem and the underlying mapping
-    will generate write errors.
+    It was only used to discriminate update framebuffer vs update
+    everything. Instead use two functions.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 9fcbf515b431a92e0289f234ab77a796cf2a5612)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit c4dc2a5e2c0ccbd0cc30c9cdf205775b2cb690a8
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Tue Jun 23 00:16:59 2015 -0400
+commit 360ba5b74858b3854784e3d4678c242140088395
+Author: Tiziano Bacocco <tizbac2@gmail.com>
+Date:   Fri Jan 30 20:10:38 2015 +0100
 
-    glsl: binding point is a texture unit, which is a combined space
-    
-    This fixes compilation failures in Dota 2 Reborn where a texture unit
-    binding point was used that was numerically higher than the max
-    per stage.
+    st/nine: Implement TEXBEM,TEXBEML and BEM
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
-    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
-    Tested-by: Nick Sarnie <commendsarnex@gmail.com>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit fccf012adc0d3aad877de095244324aa1d2d046a)
+    Signed-off-by: Tiziano Bacocco <tizbac2@gmail.com>
 
-commit d93677eb48d5205fb495399291a2a0defb22a804
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Fri Jun 19 19:22:38 2015 +0100
+commit 620f3e9bfe4bc56c65638a49b1c4f1d5cbef057d
+Author: Patrick Rudolph <siro@das-labor.org>
+Date:   Sun May 17 12:46:42 2015 +0200
 
-    gbm: do not (over)link against libglapi.so
+    st/nine: Fix use of uninitialized values
     
-    The whole of GBM does not rely on even a single symbol from the GL
-    dispatch library, unsuprisingly. The only need for it comes from the
-    unresolved symbols in the DRI modules, which are now correctly handled
-    with Frank's commit.
+    Set all values to 0 after allocation. Found using valgrind.
     
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    (cherry picked from commit a0dc6b7824d3b9095919e29393a379ea7f9c1318)
+    Reviewed-by: Axel Davy <axel.davy@ens.fr>
+    Signed-off-by: Patrick Rudolph <siro@das-labor.org>
 
-commit 0db9835d3ba3f84f601e7a60e1f1590400788b36
-Author: Frank Henigman <fjhenigman@chromium.org>
-Date:   Thu Nov 6 16:29:26 2014 -0500
+commit 248833ff4072da4f3362dc9f0eab84eb015f3964
+Author: Patrick Rudolph <siro@das-labor.org>
+Date:   Sat May 16 19:47:00 2015 +0200
 
-    gbm: dlopen libglapi so gbm_create_device works
-    
-    Dri driver libs are not linked to pull in libglapi so gbm_create_device()
-    fails when it tries to dlopen them (unless the application is linked
-    with something that does pull in libglapi, like libGL).
-    Until dri drivers can be fixed properly, dlopen libglapi before trying
-    to dlopen them.
+    st/nine: Prevent possible crash
     
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Frank Henigman <fjhenigman@google.com>
-    [Emil Velikov: Drop misleading bugzilla link, mention that libname differs]
-    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    In case NineBaseTexture9_ctor returns an error
+    This->surfaces[l] might be NULL.
     
-    (cherry picked from commit 828f13330c9384f2b55c8b0f962d93a74ecd0601)
+    Reviewed-by: Axel Davy <axel.davy@ens.fr>
+    Signed-off-by: Patrick Rudolph <siro@das-labor.org>
 
-commit ca079a77f92b8d60aa17a611ad93c92e840ed09f
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Fri Jun 19 17:46:41 2015 +0100
+commit bc6c80e54714e939e4bc116374f410680e7a3632
+Author: Patrick Rudolph <siro@das-labor.org>
+Date:   Tue May 19 20:18:29 2015 +0200
 
-    configure: error out when building libEGL without shared-glapi
+    st/nine: Return correct error codes in NineDevice9_Reset
     
-    The latter is a hard requirement and without it we'll error out later
-    on in the build.
+    Allow more than two errors, and return D3DERR_INVALIDCALL
+    for failed display resolution changes.
     
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Reviewed-by: Eric Anholt <eric@anholt.net>
-    (cherry picked from commit 994be5143a097ae2cf504ba344362edfee388ac3)
+    Reviewed-by: Axel Davy <axel.davy@ens.fr>
+    Signed-off-by: Patrick Rudolph <siro@das-labor.org>
 
-commit 9ba9c030ad4d7264a160412a422f7b2cade331e0
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Fri Jun 19 17:44:02 2015 +0100
+commit d0a4374e1a04ba16f36ea91fdb390633fe941978
+Author: Patrick Rudolph <siro@das-labor.org>
+Date:   Wed May 13 19:51:30 2015 +0200
 
-    configure: error out when building backend-less libEGL
+    st/nine: Fail on D3DUSAGE_DYNAMIC for D3DPOOL_SCRATCH textures
     
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Reviewed-by: Eric Anholt <eric@anholt.net>
-    (cherry picked from commit ddc886b5bfe5976fa2e5f49eeefa918736f1aa97)
+    Texture with pool D3DPOOL_SCRATCH and D3DPOOL_MANAGED
+    cannot be used with flag D3DUSAGE_DYNAMIC.
+    
+    Signed-off-by: Patrick Rudolph <siro@das-labor.org>
 
-commit c96d9c23717564d5a898c7c17a42514507a28b8c
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Fri Jun 19 17:19:46 2015 +0100
+commit f396cd43ab148ff77b8df3344610ee6f2224d21d
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Thu May 14 17:22:26 2015 +0200
 
-    configure: warn about shared_glapi & xlib-glx only when both are set
-    
-    Printing out the message when shared_glapi is disabled only leads to
-    confusion.
+    st/nine: Fix Lock Checks for Compressed textures
     
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Reviewed-by: Eric Anholt <eric@anholt.net>
-    (cherry picked from commit 6d744aaf4e427b6b0b3d8d35d756592a50abbb97)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit f036512122428b65fdfa4ba3aacf8baefa675104
-Author: Ben Widawsky <benjamin.widawsky@intel.com>
-Date:   Wed Jun 3 21:35:51 2015 -0700
+commit d0daec1797a22b51f7a3f5aa585ad6826af06cd3
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Thu May 14 17:01:40 2015 +0200
 
-    i965/gen9: Implement Push Constant Buffer workaround
-    
-    This implements a workaround (exact excerpt as a comment in the code). The docs
-    specify [clearly, after you struggle for a while] that the offset isn't relative
-    to state base. This actually makes sense. This fixes hangs on SKL.
+    st/nine: Impose restrictions on DXTN texture sizes
     
-    Buffer #0 is meant to be used for normal uniforms.
-    Buffer #1 is typically used for gather constants when using RS.
-    Buffer #1-#3 could be used to push a bunch of UBO data which would just be
-      somewhere in memory, and not relative to the dynamic state.
+    This is the expected behaviour.
     
-    NOTE: I've moved away from the ternary operator for the new gen9 conditions.
-    Admittedly it's probably not great to do this, but I really want to fix this all
-    up in the subsequent patch and doing it here makes that diff a lot nicer. I want
-    to split out the gen8/9 code to make the function a bit more readable, but to
-    keep this easily cherry-pickable I am doing this fix first. If we decide not to
-    merge the cleanup patch then I can revisit this.
+    Fixes wine tests.
     
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
-    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
-    Tested-by: Valtteri Rantala <Valtteri.rantala@intel.com>
-    (cherry picked from commit 90754d2df05eafe1a3ee3cd9bb1611a19099fc49)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+    Reviewed-by: David Heidelberg <david@ixit.cz>
 
-commit 0c46196e1d172beac95dd0cb6ab43bcb4d1e2919
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Wed Jun 17 23:00:44 2015 -0400
+commit 48d895aa4b2475ef0af234b832d92d0ac4a47761
+Author: Patrick Rudolph <siro@das-labor.org>
+Date:   Wed May 13 19:43:04 2015 +0200
 
-    mesa: add GL_PROGRAM_PIPELINE support in KHR_debug calls
+    st/nine: Return NULL pointer in lock error cases
     
-    This was apparently missed when ARB_sso support was added.
-    Add label support to pipeline objects just like all the other
-    debug-related objects.
+    Tests showed, that in case of errors, the pBits pointer is set to NULL.
+    The pBits field isn't set to NULL in case of an already locked object.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 770f141866654dab969302f720228497f0fb35fd)
+    Reviewed-by: Axel Davy <axel.davy@ens.fr>
+    Signed-off-by: Patrick Rudolph <siro@das-labor.org>
 
-commit 74f2c1c282ba3776c21647bf523081bdab7bedef
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Wed Jun 17 15:09:26 2015 -0400
+commit 74a77a4c1a5dafccbad0ff8c7fb55156c6643685
+Author: Patrick Rudolph <siro@das-labor.org>
+Date:   Wed May 13 07:19:11 2015 +0200
 
-    glsl: add version checks to conditionals for builtin variable enablement
+    st/nine: Fix resource SetPriority/GetPriority
     
-    A number of builtin variables have checks based on the extension being
-    enabled, but were missing enablement via a higher GLSL version.
+    Return 0 for non MANAGED textures and surfaces.
+    Fixes failing wine d3d9 tests device.c test_resource_priority.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit b6e238023c4f8af2328dc3bcab1d73a3e19f4fbb)
+    Reviewed-by: Axel Davy <axel.davy@ens.fr>
+    Signed-off-by: Patrick Rudolph <siro@das-labor.org>
 
-commit 8ed4c7acc2786758410ac257296cd058709030b1
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Wed Jun 17 15:07:14 2015 -0400
+commit 40a8943f53ff42f2a3b782e42d8a79711d814ab8
+Author: Patrick Rudolph <siro@das-labor.org>
+Date:   Tue May 12 20:28:17 2015 +0200
 
-    glsl: handle conversions to double when comparing param matches
+    st/nine: Clean GetPrivateData
     
-    This allows mod(int, int) to become selected as float mod when doubles
-    are supported.
+    Move the assert to return error codes in the correct order.
+    Always set the pSizeOfData to the required buffer size.
+    Fixes failing wine test device.c test_private_data()
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit c40e7ee7c47cb24264fd77ef37fab99dea4c299a)
+    Reviewed-by: Axel Davy <axel.davy@ens.fr>
+    Signed-off-by: Patrick Rudolph <siro@das-labor.org>
 
-commit fc3af254b11baddc14f59c4d5b691430ece1975b
-Author: Boyan Ding <boyan.j.ding@gmail.com>
-Date:   Tue Jun 16 11:08:33 2015 +0800
+commit 9ba3f83592730e98cb5be0fbc88f1e40ff5471a8
+Author: Patrick Rudolph <siro@das-labor.org>
+Date:   Mon Apr 20 19:26:55 2015 +0200
 
-    egl/x11: Set version of swrastLoader to 2
+    st/nine: Allow lock coordinates outside range
     
-    which it actually implements instead of the newest version defined in
-    dri_interface.h
+    This fixes wine test device.c test_lockrect_invalid()
     
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
-    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
-    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
-    (cherry picked from commit 997fc807b2f71ef65b4601d6db33d0f912c18d3f)
+    Mimic WindowsXp behaviour and allow negative values in the rectangle passed.
+    Add comment to point out behaviour used.
+    
+    Reviewed-by: Axel Davy <axel.davy@ens.fr>
+    Signed-off-by: Patrick Rudolph <siro@das-labor.org>
 
-commit 9d2b9e7724689ce7a74e16b9691a704b65ec6cfb
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Wed Jun 17 22:18:09 2015 -0400
+commit bb1c2c2aa3b15ee1fdf8b8085cc1ca9f8ef04ed9
+Author: Patrick Rudolph <siro@das-labor.org>
+Date:   Tue May 12 20:07:09 2015 +0200
 
-    nvc0/ir: can't have a join on a load with an indirect source
-    
-    Triggers an INVALID_OPCODE warning on GK208. Seems rare enough to not
-    warrant verification on other chips. Fixes the new piglits:
+    st/nine: Fix GenerateMipSubLevels potential crash
     
-      ubo_array_indexing/fs-nonuniform-control-flow.shader_test
-      ubo_array_indexing/vs-nonuniform-control-flow.shader_test
+    For the case of D3DPOOL_MANAGED textures, This->base.resource can be NULL
+    at the start of the function. In This case, UploadSelf will take care
+    of the defining. Assign resource after the UploadSelf call
+    to prevent NULL pointer exception.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 36e3eb6a957f8f20ed187ec88a067fc65cb81432)
+    Reviewed-by: Axel Davy <axel.davy@ens.fr>
+    Signed-off-by: Patrick Rudolph <siro@das-labor.org>
 
-commit acfaacb18b8ec3333fd8786a62b6abb0741f2928
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Mon Jun 15 15:48:58 2015 -0400
+commit 3bcab9ba75b24bd6cabcd9d31165ad4c1fe9d75c
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sat May 16 18:40:02 2015 +0200
 
-    nv50,nvc0: clamp uniform size to 64k
+    st/nine: Fix FillColor Flag check
     
-    The state tracker will pass through requests from buggy applications
-    which will have the buffer size larger than the max allowed (64k). Clamp
-    the size to 64k so that we don't get errors when uploading the constbuf
-    data.
+    IT is better check if the surface was created with RT flag,
+    instead of checking capability (llvmpipe was complaining)
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 8b24388647f626a5cad10fd48e61335ed26a8560)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 0736a2aa795bab3856abea308ca99b708449e169
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Fri Jun 12 16:09:05 2015 +0200
+commit ef8ade6f09410b734fb84861a8bce23dc9e49afc
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Fri May 8 01:29:35 2015 +0200
 
-    nvc0/ir: fix collection of first uses for texture barrier insertion
+    st/nine: Fix StretchRect checks
     
-    One of the places we have to insert texbars is in situations where the
-    result of the tex gets overwritten by a different instruction (e.g. in a
-    conditional statement). However in some situations it can actually
-    appear as though the original tex itself is an overwriting instruction.
-    This can naturally never really happen, so just ignore the tex
-    instruction when it comes up.
+    Fixes Wine tests
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90347
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit a2af42c1d2dc91f4c31e25ff9fff15a89a9b6ead)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 1a153e1fd341c565245096fab939d41a83aecfd8
-Author: Anuj Phogat <anuj.phogat@gmail.com>
-Date:   Tue May 12 04:17:04 2015 -0700
+commit c57f777b095fbbb6273fccfe7ea2d1ced5fbc0a4
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Fri May 8 16:54:53 2015 +0200
 
-    meta: Abort meta path if ReadPixels need rgb to luminance conversion
+    st/nine: Implement EvictManagedResources
     
-    After recent addition of pbo testing in piglit test getteximage-luminance,
-    it fails on i965. This patch makes a sub test pass.
+    EvictManagedResources is used by apps to free
+    the gpu memory of MANAGED textures (which have
+    a cpu memory backing)
     
-    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
-    Cc: <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
-    (cherry picked from commit a4ff47ade9d95a27c9c55afbf6dd77d3f3b10562)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 1f3ec929761d9aee986362ef90a4a150f4481496
-Author: Anuj Phogat <anuj.phogat@gmail.com>
-Date:   Fri May 1 00:05:18 2015 -0700
+commit 4c126f0b5837227d93e481fb04e43c96b3316413
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Thu May 7 00:03:21 2015 +0200
 
-    mesa: Turn need_rgb_to_luminance_conversion() in to a global function
+    st/nine: Track managed textures
     
-    This will be used by _mesa_meta_pbo_GetTexSubImage() in a later patch.
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+
+commit 41f54040e20d40e5e2ecbf73c09dcb4a154c4577
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Tue May 5 20:40:12 2015 +0200
+
+    st/nine: Only update dirty rect for UpdateTexture
     
-    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
-    Cc: <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
-    (cherry picked from commit ba2b1f8668811eade97a4f134f6df900ff36c8aa)
+    UpdateTexture is supposed to optimise by uploading only for the
+    dirty region of the source (d3d9 doc, wine tests).
+    This patch adds the behaviour for surfaces, but not entirely for
+    volumes.
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 2040c18ecc32262426541f5781daa783e3e4bf6b
-Author: Anuj Phogat <anuj.phogat@gmail.com>
-Date:   Thu Apr 30 23:36:18 2015 -0700
+commit 43d5c5a11b35dc09f369b08ff7066ccc8ab34e82
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Mon May 4 00:17:09 2015 +0200
 
-    mesa: Use helper function need_rgb_to_luminance_conversion()
+    st/nine: Textures start dirty
     
-    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
-    Cc: <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
-    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
-    (cherry picked from commit 0b13adcd0802d1ad60f625e7e557d2090a7c143e)
+    According to the spec all textures start
+    dirty.
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit b590ee6d45a3c91dce3309739226b5f9d062637a
-Author: Anuj Phogat <anuj.phogat@gmail.com>
-Date:   Thu Apr 30 23:35:20 2015 -0700
+commit e139e0debd2e0dfa1f233adafab4a0c322598a7e
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun May 3 23:53:48 2015 +0200
 
-    mesa: Handle integer formats in need_rgb_to_luminance_conversion()
+    st/nine: Track dirty region for SYSTEMMEM too
     
-    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
-    Cc: <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
-    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
-    (cherry picked from commit 82abdf209a2fb5b95b2bae80045aecc61202b13c)
+    Dirty regions should be tracked for both MANAGED
+    and SYSTEMMEM.
+    Until now we didn't bother to track for SYSTEMMEM,
+    because we hadn't implemented using the dirty region
+    to avoid some copies
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 5d327b373531861f86a726db669b3d656f1b5f8d
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Sun Jun 14 16:40:00 2015 +0100
+commit 8a61894cdb0f97df48775f5a16ba60d33519e36a
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun May 3 23:46:14 2015 +0200
 
-    docs: Add sha256sums for the 10.6.0 release
+    st/nine: Add missing BASETEX_REGISTER_UPDATE calls
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    If the texture is bound and dirty_mip is true,
+    BASETEX_REGISTER_UPDATE adds the texture to the list
+    of things to update before the next draw call.
+    
+    Some calls to it were missing.
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 3b9cde5c8138fb5cc45c652f2a5c15c5fa222bd7
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Sun Jun 14 16:26:40 2015 +0100
+commit e4f69bc394e1b92e3d1dabf320258425c2876091
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun May 3 23:28:25 2015 +0200
 
-    docs: Update 10.6.0 release notes
+    st/nine: SetAutoGenFilterType should regenerate the sublevels
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    It should regenerate the sublevels according to the spec
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 9719f26cc603f9eface0802a917f7b2167810b04
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Sun Jun 14 16:18:43 2015 +0100
+commit b75f830166eaf294e43746f1bf5630f7f2dcf30f
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun May 3 22:57:13 2015 +0200
 
-    Bump version to 10.6.0(final)
+    st/nine: Simplify NineVolume9_CopyVolume
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    We had only one usage for this function.
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 81027ea1e2dba8541877255e969fb6c1a8d93904
-Author: Erik Faye-Lund <kusmabite@gmail.com>
-Date:   Wed Jun 10 23:35:04 2015 +0100
+commit bc42c29013209f80182a634e32c702edb2327b22
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun May 3 22:40:38 2015 +0200
 
-    mesa: build xmlconfig to a separate static library
+    st/nine: Split NineSurface9_CopySurface
     
-    As we use the file from both the dri modules and loader, we end up with
-    multiple definition of the symbols provided in our gallium dri  modules.
-    Additionally we compile the file twice.
+    NineSurface9_CopySurface was supporting more cases than what
+    we needed, and doing checks that were innapropriate for
+    some NineSurface9_CopySurface use cases.
     
-    Resolve both issues, effectively enabling the build on toolchains which
-    don't support -Wl,--allow-multiple-definition.
+    This patch splits it into two for the two use cases, and moves
+    the checks to the caller.
     
-    v2: [Emil Velikov]
-     - Fix the Scons/Android build.
-     - Resolve libgbm build issues (bring back the missing -lm)
+    This patch also adds a few checks to NineDevice9_UpdateSurface
     
-    Cc: Julien Isorce <j.isorce@samsung.com>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90310
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90905
-    Acked-by: Matt Turner <mattst88@gmail.com>
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    (cherry picked from commit 634f2002563b4fca68490c0a39518ea838f28fb1)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit a439cafc7cdc93e2f9f45ed8a809cd34a04207a9
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Wed Apr 15 14:34:00 2015 +0100
+commit 3f36ad732c7fbe8fd38aa852c3699101fdd5041d
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Tue May 5 23:51:24 2015 +0200
 
-    targets/nine: link against libnir/libglsl_util
+    st/nine: Simplify Volume9 dirty region tracking
     
-    Based on commit 101142c4010(xa: support for drivers which use NIR)
+    Similar to what was done for Surface9, track the dirty region
+    only in VolumeTexture9.
     
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90466
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    (cherry picked from commit 83b5648a1e0b7c21536af18c0d29da2f2a31215e)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 1c2a462125acf644a17c07067c487dc3f07a32ca
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Wed Apr 15 12:46:30 2015 +0100
+commit ab0643225e2718884eea874b67b55eb4aa936e53
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Nov 8 13:02:47 2014 +0100
 
-    pipe-loader: add libnir and libglsl_util to the link
+    util/u_blitter: implement alpha blending for pipe->blit
+
+commit 23da32a9234065e0a16e91ef2f54f1e1d9bf52e5
+Author: Christoph Bumiller <e0425955@student.tuwien.ac.at>
+Date:   Sat May 31 23:13:24 2014 +0200
+
+    gallium: Add blending to pipe blit
     
-    Based on commit 101142c4010(xa: support for drivers which use NIR)
+    This type of blending is used for gallium nine software cursor
     
-    Cc: Rob Clark <robclark@freedesktop.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90466
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    (cherry picked from commit ba512cc7fa5db0aeeb2fc0708920914cd3a5bf95)
+    Signed-off-by: David Heidelberg <david@ixit.cz>
 
-commit 279b1d85cc93cb6eda8eff92d1f3a1ce4415884b
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Wed Apr 15 13:40:55 2015 +0100
+commit a30684712ee9a3ef6738de8c357134a01a24924c
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sat Mar 21 22:28:59 2015 +0100
 
-    mesa; add a dummy _mesa_error_no_memory() symbol  to libglsl_util
+    st/nine: Revert to sw cursor in case of failure to set hw cursor
     
-    Rather than forcing everyone to provide their own definition of the symbol
-    provide a common (dummy) one.
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+    Reviewed-by: David Heidelberg <david@ixit.cz>
+
+commit df6f1f77cc63db3e6a7c105f3e7bab246b576eb9
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sat Mar 21 22:23:56 2015 +0100
+
+    st/nine: Do not call ID3DPresent_GetCursorPos for sw cursor
     
-    This helps us resolve the build of the standalone pipe-drivers (amongst
-    others), which are missing the symbol.
+    For sw cursor we do not tell wine the cursor position (the app
+    tells us directly). We shouldn't use ID3DPresent_GetCursorPos.
     
-    Cc: Rob Clark <robclark@freedesktop.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    (cherry picked from commit 1df5a6c71ee4a3c08b5da3f8bae24880af16b74c)
+    device->cursor.pos already contains the coordinates the app
+    gave us.
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+    Reviewed-by: David Heidelberg <david@ixit.cz>
 
-commit 28b3e4f9257d1deaa8b860de4ae03dd7f6456d75
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Wed Apr 15 11:28:38 2015 +0100
+commit 78b304e2f9f9ad9b97ac8665a44af09567159800
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun Mar 22 18:48:07 2015 +0100
 
-    freedreno: use CXX linker rather than explicit link against libstdc++
+    st/nine: Force hw cursor for Windowed mode
     
-    Cc: Rob Clark <robclark@freedesktop.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    (cherry picked from commit 3f5dc9b94fc47f25821cec0a052df3d8f4cb5a1f)
+    According to the spec, Windowed mode must
+    have hw cursor
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+    Reviewed-by: David Heidelberg <david@ixit.cz>
 
-commit ea3d26eeb4db1a8510e5a99c702d5b9db828973d
-Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
-Date:   Wed May 13 12:18:31 2015 +0200
+commit 1b20eaff67fba9ac6310511c79d6de8851c39145
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sat Mar 21 22:21:14 2015 +0100
 
-    mesa/main: validate name syntax for array variables only
+    st/nine: Hide hardware cursor when we don't use it
     
-    From ARB_program_interface_query:
+    We have either hardware cursor or software cursor.
+    When we use software cursor, we should hide the hardware
+    cursor.
     
-     "Note that if an interface enumerates a single active resource list
-     entry for an array variable (e.g., "a[0]"), a <name> identifying
-     any array element other than the first (e.g., "a[1]") is not
-     considered to match."
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+    Reviewed-by: David Heidelberg <david@ixit.cz>
+
+commit 34708783833bb99e9e42cd40a745fed1354317f2
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sun Feb 15 21:30:44 2015 +0100
+
+    st/nine: fix D3DRS_DITHERENABLE wrong state group
     
-    It doesn't apply to arrays of interface blocks but just to array
-    variables.
+    D3DRS_DITHERENABLE was assigned to the rasterizer state
+    group, but it was used for the blend group.
     
-    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
-    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
-    (cherry picked from commit 4ee69a97bb0af0cc216539c48b246ea2abf8f208)
+    Assign it to the blend group.
+    
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 39aa6b8bea1383c7fe1ff04a3637248a3bbe4fc5
-Author: Dave Airlie <airlied@gmail.com>
-Date:   Wed Jun 10 13:51:59 2015 +1000
+commit 1b645df2f309833cca5dbeaa2deceeda36a7426c
+Author: Patrick Rudolph <siro@das-labor.org>
+Date:   Sun Apr 19 10:14:30 2015 +0200
 
-    st/dri: check pscreen is valid before querying param
+    st/nine: Account POINTSIZE_MIN and POINTSIZE_MAX for point size
     
-    we don't check the validity of pscreen until dri_init_screen_helper
+    When using D3DRS_POINTSIZE make sure the value is at least
+    D3DRS_POINTSIZE_MIN but not greater than D3DRS_POINTSIZE_MAX.
     
-    hit this trying to init glamor on a device with no driver (udl).
+    Fixes some Wine tests.
     
-    Acked-by: Michel Dänzer <michel.daenzer@amd.com>
-    Signed-off-by: Dave Airlie <airlied@redhat.com>
-    (cherry picked from commit 563706c14641fde2ab604d590b5425680354f280)
+    Reviewed-by: Axel Davy <axel.davy@ens.fr>
+    Signed-off-by: Patrick Rudolph <siro@das-labor.org>
 
-commit 1a47d37c994c51479d9c24a59b9d4944dd2db26c
-Author: Matt Turner <mattst88@gmail.com>
-Date:   Tue Jun 2 17:46:38 2015 -0700
+commit 886227d363b7e889e2a9162c27059e2a9fe79cb7
+Author: Patrick Rudolph <siro@das-labor.org>
+Date:   Tue May 12 07:27:37 2015 +0200
 
-    i965: Use UW-typed immediate in multiply inst.
+    st/nine: Align texture memory
     
-    Some hardware reads only the low 16-bits even if the type is UD, but
-    other hardware like Cherryview can't handle this.
+    Align texture memory on 32 byte boundry to allow
+    SSE/AVX memcpy to work on locked rects.
     
-    Fixes spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple on
-    Cherryview.
+    This fixes some crashes with games using SSE.
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90830
-    Reviewed-by: Neil Roberts <neil@linux.intel.com>
-    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
-    (cherry picked from commit d46d04529b9c1e55b4c3b65a7078bbbd7ab1a810)
+    Reviewed-by: David Heidelberg <david@ixit.cz>
+    Reviewed-by: Axel Davy <axel.davy@ens.fr>
+    Signed-off-by: Patrick Rudolph <siro@das-labor.org>
 
-commit a2f216b329b97c5e033615e269a11228007d5e32
-Author: Neil Roberts <neil@linux.intel.com>
-Date:   Fri May 29 13:41:48 2015 +0100
+commit 3c4864fa5539d6b34863c28f3650289fd2a4ce93
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sat May 16 18:41:51 2015 +0200
 
-    i965: Don't add base_binding_table_index if it's zero
+    st/nine: Always set point_quad_rasterization to 1
     
-    When calculating the binding table index for non-constant sampler
-    array indexing it needs to add the base binding table index which is a
-    constant within the generated code. Often this base is zero so we can
-    avoid a redundant instruction in that case.
+    Both Points and Point Sprites are rasterized like quads,
+    according to d3d9 doc and gallium rasterizer doc.
     
-    It looks like nothing in shader-db is doing non-constant sampler array
-    indexing so this patch doesn't make any difference but it might be
-    worth having anyway.
-    
-    Reviewed-by: Matt Turner <mattst88@gmail.com>
-    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
-    Acked-by: Ben Widawsky <ben@bwidawsk.net>
-    (cherry picked from commit 7f62fdae1629d75dd581d1c57b28c2f099c5ef6b)
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
 
-commit 9b8d49278183abb05861490f99a91fe6e433932b
-Author: Neil Roberts <neil@linux.intel.com>
-Date:   Thu May 28 15:27:31 2015 +0100
+commit 74de849bd44ee4a99d36d4d0c43d2a82a6ec07c2
+Author: Axel Davy <axel.davy@ens.fr>
+Date:   Sat May 16 22:41:26 2015 +0200
 
-    i965: Don't use a temporary when generating an indirect sample
-    
-    Previously when generating the send instruction for a sample
-    instruction with an indirect sampler it would use the destination
-    register as a temporary store. This breaks when used in combination
-    with the opt_sampler_eot optimisation because that forces the
-    destination to be null. This patch fixes that by avoiding the temp
-    register altogether.
+    st/nine: Fix Swizzle for ATI2 format
     
-    The reason the temporary register was needed was because it was trying
-    to ensure the binding table index doesn't overflow a byte by and'ing
-    it with 0xff. The result is then or'd with samper_index<<8. This patch
-    instead just and's the whole thing by 0xfff. This will ensure that a
-    bogus sampler index won't overflow into the rest of the message
-    descriptor but unlike the previous code it won't ensure that the
-    binding table index doesn't overflow into the sampler index. It
-    doesn't seem like that should matter very much though because if the
-    shader is generating a bogus sampler index then it's going to just get
-    garbage out either way.
+    We had red and green in the wrong channels
+    for the ATI2 format (RGTC2).
     
-    Instead of doing sampler_index<<8|(sampler_index+base_table_index) the
-    new code avoids one operation by doing
-    sampler_index*0x101+base_table_index which should be equivalent.
-    However if we wanted to avoid the multiply for some reason we could do
-    this by adding an extra or instruction still without needing the
-    temporary register.
+    Found thanks to wine tests.
     
-    This fixes a number of Piglit tests on Skylake that were using
-    indirect samplers such as:
+    Signed-off-by: Axel Davy <axel.davy@ens.fr>
+    Reviewed-by: David Heidelberg <david@ixit.cz>
+
+commit cb2d680232d64d614db93e2d011bb446e863e384
+Author: Patrick Rudolph <siro@das-labor.org>
+Date:   Mon May 25 10:36:21 2015 +0200
+
+    target/d3dadapter9: Return Windows like card names
     
-     spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple
+    Add support for multiple cards and fill in Win
+    like card name, driver name and version info.
+    Use fallback for unknown vendors and unknown card names.
     
-    Reviewed-by: Matt Turner <mattst88@gmail.com>
-    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
-    Acked-by: Ben Widawsky <ben@bwidawsk.net>
-    Tested-by: Anuj Phogat <anuj.phogat@gmail.com>
-    (cherry picked from commit 6c846dc57b1d6f3e015a604dba1976f96c4be9e9)
+    Reviewed-by: Axel Davy <axel.davy@ens.fr>
+    Signed-off-by: Patrick Rudolph <siro@das-labor.org>
 
-commit ef9020570448b520ecdf7d088003130e36211589
-Author: Ben Widawsky <benjamin.widawsky@intel.com>
-Date:   Wed May 27 17:55:02 2015 -0700
+commit 56717c0b069a20b0c4438ac1dc9280cd9026b36f
+Author: David Heidelberg <david@ixit.cz>
+Date:   Sat Apr 11 00:13:53 2015 +0200
 
-    i965: Disable compaction for EOT send messages
+    st/nine: Require gcc >= 4.6
     
-    AFAICT, there is no real way to make sure a send message with EOT is properly
-    ignored from compact, nor can I see a way to actually encode EOT while
-    compacting. Before the single send optimization we'd always bail because we hit
-    the is_immediate && !is_compactable_immediate case. However, with single send,
-    is_immediate is not true, and so we end up trying to compact the un-compactible.
+    Nine code uses some C11 features, and this
+    leads to compile error on gcc <= 4.5
     
-    Without this, any compacting single send instruction will hang because the EOT
-    isn't there. I am not sure how I didn't hit this when I originally enabled the
-    optimization.  I didn't check if some surrounding code changed.
+    Another way would have been to use the
+    -fms-extensions CFLAG
     
-    I know Neil and Matt were both looking into this. I did a quick search and
-    didn't see any patches out there to handle this. Please ignore if this has
-    already been sent by someone. (Direct me to it and I will review it).
+    Signed-off-by: David Heidelberg <david@ixit.cz>
+    Cc: "10.4 10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 365d631eb220e7d546a726d6f38f76e2e3746d65
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Aug 21 15:08:15 2015 -0400
+
+    glsl: fix error message when validating tcs output decls
     
-    Reported-by: Neil Roberts <neil@linux.intel.com>
-    Reported-by: Mark Janes <mark.a.janes@intel.com>
-    Tested-by: Mark Janes <mark.a.janes@intel.com>
-    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
-    Reviewed-by: Matt Turner <mattst88@gmail.com>
-    (cherry picked from commit b307921c3ff3b36607752f881a180272366a79cf)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
 
-commit c6184b2b5c61b306e29b1a746b2b577520c9b691
-Author: Alexander Monakov <amonakov@gmail.com>
-Date:   Tue Jun 9 20:58:22 2015 +0300
+commit 3b4d03d44044f683d9b194c069cd3c1e6acb105f
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon Aug 10 17:27:19 2015 -0400
 
-    i965: do_blit_drawpixels: decode array formats
+    relnote updates
     
-    Correct a regression introduced by commit 922c0c9fd526 by converting "array
-    format", if received from _mesa_format_from_format_and_type, to mesa_format.
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 3525aa1dc9c27fb2394a37788a29c272b3a81d1b
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Aug 20 20:06:50 2015 -0400
+
+    st/mesa: pass through 4th opcode argument in bitmap/pixel visitors
     
-    References: https://bugs.freedesktop.org/show_bug.cgi?id=90839
-    Signed-off-by: Alexander Monakov <amonakov@gmail.com>
-    Tested-by: AnAkkk <anakin.cs@gmail.com>
-    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit bd38f91f8d80897ca91979962d80d4bc0acef586)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
 
-commit bf538839ed48b29b81e00181f7dee6859c87cdee
-Author: Iago Toral Quiroga <itoral@igalia.com>
-Date:   Wed Jun 10 09:07:32 2015 +0200
+commit 681efdf7a18b73ce06989cb2d3299e3feabdb5f5
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Aug 20 19:59:04 2015 -0400
 
-    i965: do not round line width when multisampling or antialiaing are enabled
+    st/mesa: fix assignments with 4-operand arguments (i.e. BFI)
     
-    In commit fe74fee8fa721a we rounded the line width to the nearest integer to
-    match the GLES3 spec requirements stated in section 13.4.2.1, but that seems
-    to break a dEQP test that renders wide lines in some multisampling scenarios.
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit f142e64b2938ab3ebc42fd40436a1de74e3ed2e2
+Author: Martin Peres <martin.peres@linux.intel.com>
+Date:   Fri Aug 21 16:25:14 2015 +0300
+
+    i965: allow image_size on float images
     
-    Ian noted that the Open 4.4 spec has the following similar text:
+    This got missed because the piglit test only tested int images to avoid a
+    combinatiorial explosion of format, targets, stages and sizes which
+    takes more than 5 minutes to test on nvidia's driver.
     
-        "The actual width of non-antialiased lines is determined by rounding the
-        supplied width to the nearest integer, then clamping it to the
-        implementation-dependent maximum non-antialiased line width."
+    This patch also drops the IMAGE_FUNCTION_AVAIL_ATOMIC which is not applicable
+    to the image_size codepath but was not hurting in any way.
     
-    and suggested that when ES removed antialiased lines, they removed
-    "non-antialised" from that paragraph but probably should not have.
+    Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit df5cdec1329507d5ac52a6d3db49c2608b9226bc
+Author: Zoltan Gilian <zoltan.gilian@gmail.com>
+Date:   Wed Aug 19 11:56:08 2015 +0200
+
+    clover: fix llvm 3.5 build error
     
-    Going by that note, this patch restricts the quantization implemented in
-    fe74fee8fa721a only to regular aliased lines. This seems to keep the
-    tests fixed with that commit passing while fixing the broken test.
+    There is no MDOperand in llvm 3.5.
     
-    v2:
-      - Drop one of the clamps (Ken, Marius)
-      - Add a rule to prevent advertising line widths that when rounded go beyond
-        the limits allowed by the hardware (Ken)
-      - Update comments in the code accordingly (Ian)
-      - Put the code in a utility function (Ian)
+    v2: Check if kernel metadata is present to avoid crash (EdB).
+    v3: Second attempt to avoid crash: switch off metadata query for llvm < 3.6.
     
-    Fixes:
-    dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_max.primitives.lines_wide
+    Reviewed-by: Serge Martin (EdB) <edb+mesa@sigluy.net>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 7eda897bf05dc572dbe83f3a1075b773b0c65708
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Thu Aug 20 10:25:59 2015 +0300
+
+    mesa: update fbo state in glTexStorage
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90749
+    We have to re-validate FBOs rendering to the texture like is done
+    with TexImage and CopyTexImage.
     
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91673
     Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit f9a18acb56c69b24c1e47cd326dc98e14fadcf94)
 
-commit 7abb5e3f1307f21738f0233a27410ce49a4274e1
-Author: Dave Airlie <airlied@gmail.com>
-Date:   Wed Jun 10 13:26:56 2015 +1000
+commit 8cae9f2fda37b9868ea973a665e1acc115172b45
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Aug 18 22:19:12 2015 -0700
 
-    nouveau: set imported buffers to what the kernel gives us
+    vc4: Add algebraic opt for rcp(1.0).
     
-    When we import a dma-buf fd from another driver the kernel
-    gives us the right info, and this trashes it.
+    We're generating rcps as part of backend lowering of the packed coordinate
+    in the CS, and we don't want to lower them in NIR because of the extra
+    newton-raphson steps in the common case.  However, GLB2.7 is moving a
+    vertex attribute with a 1.0 W component to the position, and that makes us
+    produce some silly RCPs.
     
-    Convert the kernel bo flags into the domain flags.
+    total instructions in shared programs: 97590 -> 97580 (-0.01%)
+    instructions in affected programs:     74 -> 64 (-13.51%)
+
+commit c800fef2e2c65f7d81215cb316de6f73b15ba6c5
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Aug 18 22:07:47 2015 -0700
+
+    vc4: Allow unpack_8[abcd]_f's src to stay in r4.
     
-    This helps getting reverse prime and glamor working.
+    I had QPU emit code to do it, but forgot to flag the register class.
     
-    Cc: mesa-stable@lists.freedesktop.org
-    Acked-by: Ben Skeggs <bskeggs@redhat.com>
-    Signed-off-by: Dave Airlie <airlied@redhat.com>
-    (cherry picked from commit c6877c9e5983287a0741b26a358b7d744aebe232)
+    total instructions in shared programs: 97974 -> 97590 (-0.39%)
+    instructions in affected programs:     25291 -> 24907 (-1.52%)
 
-commit 2353b2197c409ed028d89bfa90ba489c576f32c3
-Author: Jason Ekstrand <jason.ekstrand@intel.com>
-Date:   Sat Jun 6 12:15:30 2015 -0700
+commit 8b36d107fdd6f6b91556fcdc3498df16803d4181
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Aug 18 21:26:05 2015 -0700
 
-    i965/fs: Don't let the EOT send message interfere with the MRF hack
+    vc4: Pack the unorm-packing bits into a src MUL instruction when possible.
     
-    Previously, we just put the message for the EOT send as high in the file as
-    it would go.  This is because the register pre-filling hardware will stop
-    all over the early registers in the file in preparation for the next thread
-    while you're still sending the last message.  However, if something happens
-    to spill, then the MRF hack interferes with the EOT send message and, if
-    things aren't scheduled nicely, will stomp on it.
+    Now that we do non-SSA QIR instructions, we can take a NIR SSA src that's
+    only used by the unorm packing and just stuff the pack bits into it.
     
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90520
-    Reviewed-by: Neil Roberts <neil@linux.intel.com>
-    (cherry picked from commit 86e5afbfee5492235cab1a7be4ea49ac02be1644)
+    total instructions in shared programs: 98136 -> 97974 (-0.17%)
+    instructions in affected programs:     4149 -> 3987 (-3.90%)
 
-commit 10b7dba331dfeb9c8476e5e28709ca965a821be1
-Author: Chris Wilson <chris@chris-wilson.co.uk>
-Date:   Fri Jun 5 14:45:18 2015 +0100
+commit 572a48366d9dfac6a7f9ee8f4d29832c496125e2
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Aug 18 21:43:42 2015 -0700
 
-    i965: Export format comparison for blitting between miptrees
+    vc4: Add a QIR helper for whether the op is a MUL type.
+
+commit fd74da11c48dcd9098d4f64508aae65775c68b75
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Aug 18 20:18:51 2015 -0700
+
+    vc4: Drop an unused algebraic op.
     
-    Since the introduction of
+    NIR now handles this optimization for us.
+
+commit 98728ce0718e49864b872beb76fc3afbf341b38a
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Aug 5 20:54:02 2015 -0700
+
+    vc4: Switch QPU_PACK_SCALED to be two non-SSA instructions.
     
-    commit 536003c11e4cb1172c540932ce3cce06f03bf44e
-    Author: Boyan Ding <boyan.j.ding@gmail.com>
-    Date:   Wed Mar 25 19:36:54 2015 +0800
+    total instructions in shared programs: 98159 -> 98136 (-0.02%)
+    instructions in affected programs:     12279 -> 12256 (-0.19%)
+
+commit 69ef08d303cdf153fe2432a7e40faccae5d62aab
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Aug 5 20:31:21 2015 -0700
+
+    vc4: Make the pack-to-unorm instructions be non-SSA.
     
-        i965: Add XRGB8888 format to intel_screen_make_configs
+    This helps ensure that the register allocator doesn't force the later pack
+    operations to insert extra MOVs.
     
-    winsys buffers no longer have an alpha channel. This causes
-    _mesa_format_matches_format_and_type() to reject previously working BGRA
-    uploads from using the BLT fast path. Instead of using the generic
-    routine for matching formats exactly, export the slightly more relaxed
-    check from intel_miptree_blit() which importantly allows the blitter
-    routine to apply a small number of format conversions.
+    total instructions in shared programs: 98170 -> 98159 (-0.01%)
+    instructions in affected programs:     2134 -> 2123 (-0.52%)
+
+commit 0bba4fa070583f5fd8a0f7208fbfa181dc25e71b
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Aug 3 19:25:47 2015 -0700
+
+    vc4: Allow QIR registers to be non-SSA.
     
-    References: https://bugs.freedesktop.org/show_bug.cgi?id=90839
-    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-    Cc: Jason Ekstrand <jason@jlekstrand.net>
-    Cc: Alexander Monakov <amonakov@gmail.com>
-    Cc: Kristian Høgsberg <krh@bitplanet.net>
-    Cc: Kenneth Graunke <kenneth@whitecape.org>
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-    Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit 922c0c9fd526ce19b87bc74a3159dec7705c1de1)
+    Now that we have NIR, most of the optimization we still need to do is
+    peepholes on instruction selection rather than general dataflow
+    operations.  This means we want to be able to have QIR be a lot closer to
+    the actual QPU instructions, just with virtual registers.  Allowing
+    multiple instructions writing the same register opens up a lot of
+    possibilities.
 
-commit 55104870a14d03c803b100b66cf5886478d52de1
-Author: Chris Wilson <chris@chris-wilson.co.uk>
-Date:   Fri Jun 5 14:33:36 2015 +0100
+commit ceb1a318424bf219eace29955ae473c1ccf9f8b8
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Aug 5 20:11:07 2015 -0700
 
-    i915: Blit RGBX<->RGBA drawpixels
+    vc4: We can now move TEX_RESULT accesses across other r4 ops.
     
-    The blitter already has code to accommodate filling in the alpha channel
-    for BGRX destination formats, so expand this to also allow filling the
-    alpha channgel in RGBX formats.
+    No difference on shader-db.
+
+commit ad89748541159968787dce02bb9c19d9367fddc6
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Wed May 27 20:12:42 2015 +1000
+
+    glsl: fix binding validation for interface blocks
     
-    More importantly for the next patch is moving the test into its own
-    function for the purpose of exporting the check to the callers.
+    V2: rebase on SSBO changes
     
-    v2: Fix alpha expansion as spotted by Alexander with the fix suggested by
-    Kenneth
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit dd6a6dbaf707c120f6db38036985fcc258ebe294
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Sun Aug 16 14:26:23 2015 +1000
+
+    glsl: interleave constant propagation and folding
+    
+    The constant folding pass can take a long time to complete
+    so rather than running through the entire pass each time
+    a new constant is propagated (and vice versa) interleave them.
     
-    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-    Cc: Jason Ekstrand <jason@jlekstrand.net>
-    Cc: Alexander Monakov <amonakov@gmail.com>
-    Cc: Kristian Høgsberg <krh@bitplanet.net>
-    Cc: Kenneth Graunke <kenneth@whitecape.org>
-    Reviewed-by Kenneth Graunke <kenneth@whitecape.org>
-    Cc: mesa-stable@lists.freedesktop.org
+    This change helps ES31-CTS.arrays_of_arrays.InteractionFunctionCalls1
+    go from around 2 min -> 23 sec.
     
-    (cherry picked from commit c2d0606827412b710dcaed80268fc665de8c9c5d)
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
 
-commit fbc04dcddb9e24dec1e878050d4161c1f4da4093
-Author: Chris Wilson <chris@chris-wilson.co.uk>
-Date:   Fri Jun 5 13:49:08 2015 +0100
+commit 8483577f6b393c26dc21f6693e44760404ba6fcb
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Aug 20 22:13:48 2015 -0400
 
-    i965: Fix HW blitter pitch limits
-    
-    The BLT pitch is specified in bytes for linear surfaces and in dwords
-    for tiled surfaces. In both cases the programmable limit is 32,767, so
-    adjust the check to compensate for the effect of tiling.
-    
-    v2: Tweak whitespace for functions (Kenneth)
+    nv50/ir: pre-compute BFE arg when both bits and offset are imm
     
-    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-    Cc: Kristian Høgsberg <krh@bitplanet.net>
-    Cc: Kenneth Graunke <kenneth@whitecape.org>
-    Reviewed-by Kenneth Graunke <kenneth@whitecape.org>
-    Cc: mesa-stable@lists.freedesktop.org
+    Due to a quirk in how the nv50 opt passes run, the algebraic
+    optimization that looks for these BFE's happens before the constant
+    folding pass. Rearranging these passes isn't a great idea, but this is
+    easy enough to fix. Allows a following cvt to eliminate the bfe in
+    certain situations.
     
-    (cherry picked from commit 8da79b8378ae87474d8c47ad955e4833edf98359)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
 
-commit 8631c00acbf17b078d5572b7f8f6ec4a91fac7e9
-Author: Kenneth Graunke <kenneth@whitecape.org>
-Date:   Thu Jun 4 17:00:17 2015 -0700
+commit ecebd3dbfcb769b44e99733279c8fb0745818708
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Aug 19 18:43:47 2015 -0400
 
-    prog_to_nir: Fix fragment depth writes.
-    
-    In the ARB_fragment_program specification, the result.depth output
-    variable is treated as a vec4, where the fragment depth is stored in the
-    .z component, and the other three components are undefined.
+    glsl: expose textureQueryLod in GLSL 4.00+ fragment shaders
     
-    This is different than GLSL, which uses a scalar value (gl_FragDepth).
+    See issue from the ARB_texture_query_lod spec for LOD vs Lod confusion:
     
-    To make this consistent for driver backends, this patch makes
-    prog_to_nir use a scalar output variable for FRAG_RESULT_DEPTH,
-    moving result.depth.z into the first component.
+        (3) The core specification uses the "Lod" spelling, not "LOD".  Should
+            this extension be modified to use "Lod"?
     
-    Fixes Glean's fragProg1 "Z-write test" subtest.
+          RESOLVED: The "Lod" spelling is the correct spelling for the core
+          specification and the preferred spelling for use. However, use of
+          "LOD" also exists, as the extension predated the core specification,
+          so this extension won't remove use of "LOD".
     
-    Cc: mesa-stable@lists.freedesktop.org
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90000
-    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
-    Reviewed-by: Matt Turner <mattst88@gmail.com>
-    (cherry picked from commit 7b8f20ec5505a25958bcd98aabe73a7ca2b6cbba)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
 
-commit add82b672d05b9a6c47b7929cdcdddf25e227b01
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Tue May 12 22:53:00 2015 +0200
+commit 29e953b07b8c1e4d27f53c4a1430154a3d67f896
+Author: Nanley Chery <nanley.g.chery@intel.com>
+Date:   Thu Aug 20 18:00:20 2015 -0700
 
-    clover: clarify and fix the EGL interop error case
+    Revert "mesa/formats: refactor by collapsing cases in switch statement by type"
     
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 6acb61fc9c2c5f81569d17d90a480abc48ec6055)
+    This reverts commit ffe6c6ad5f719dedd1b6b95e8590e3f20b23d340.
+    
+    _mesa_format_num_components() does not include the padding bits in mesa formats
+    containing 'X' channels. This could cause mipmap generation for certain
+    uncompressed formats to underestimate the number of channels in the source
+    image by 1.
+    
+    Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
 
-commit 3514680b918c2605e082990a411d972b65d41602
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Tue May 12 00:44:20 2015 +0200
+commit 4237dfb97815a50de7be464a0d62bd19e62d17b7
+Author: Glenn Kennard <glenn.kennard@gmail.com>
+Date:   Thu Aug 13 20:30:07 2015 +0200
 
-    egl: fix setting context flags
+    r600g: Fix handling of TGSI_OPCODE_ARR with SB
     
-    Cc: 10.6 10.5 10.4 <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Reviewed-by: Chad Versace <chad.versace@intel.com>
-    (cherry picked from commit f9f894447e4e7442d5dfa489bb43f2823e2fc71d)
+    FLT_TO_INT goes in the vector pipes on evergreen/NI,
+    not the trans unit as on earlier chips.
+    
+    Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
 
-commit 4f68af254e55967d7b14f9cf8ced5f2509f0706b
-Author: Roland Scheidegger <sroland@vmware.com>
-Date:   Thu Jun 4 14:35:59 2015 +0200
+commit 7a32652231f96eac14c4bfce02afe77b4132fb77
+Author: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+Date:   Wed Aug 19 18:58:47 2015 +1000
+
+    r600: Turn 'r600_shader_key' struct into union
+    
+    This struct was getting a bit crowded, following the lead of
+    radeonsi, mirror the idea of having sub-structures for each
+    shader type. Turning 'r600_shader_key' into an union saves
+    some trivial memory and CPU cycles for the shader keys.
+    
+    [airlied: drop as_ls, and reorder so larger fields at start.]
+    Signed-off-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
 
-    draw: (trivial) fix NULL pointer dereference
+commit e2145de74d6333f099613c595c5c46f79f54e59f
+Author: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+Date:   Wed Aug 19 18:58:46 2015 +1000
+
+    r600: Rewrite r600_shader_selector_key() to use a switch stmt
     
-    This probably got broken when the samplers were converted to be indexed
-    by shader type.
-    Seen when looking at bug 89819 though I'm not sure if that really was what
-    the bug was about...
+    Signed-off-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit bbf8291bf869e219bd0e71063bf26a060682a000
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Aug 3 08:17:42 2015 -0700
+
+    i965: Use NIR by default for vertex shaders
     
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Shader-db results for vec4 on i965:
     
-    Reviewed-by: Brian Paul <brianp@vmware.com>
-    (cherry picked from commit 6e5970ffee0129fb94d8b7f0ebd4fac3992e7dce)
+       total instructions in shared programs: 1499894 -> 1502261 (0.16%)
+       instructions in affected programs:     1414224 -> 1416591 (0.17%)
+       helped:                                2434
+       HURT:                                  10543
+       GAINED:                                1
+       LOST:                                  0
+    
+    Shader-db results for vec4 on g4x:
+    
+       total instructions in shared programs: 1437411 -> 1439779 (0.16%)
+       instructions in affected programs:     1362402 -> 1364770 (0.17%)
+       helped:                                2434
+       HURT:                                  10544
+       GAINED:                                0
+       LOST:                                  0
+    
+    Shader-db results for vec4 on Iron Lake:
+    
+       total instructions in shared programs: 1437214 -> 1439593 (0.17%)
+       instructions in affected programs:     1362205 -> 1364584 (0.17%)
+       helped:                                2433
+       HURT:                                  10544
+       GAINED:                                1
+       LOST:                                  0
+    
+    Shader-db results for vec4 on Sandy Bridge:
+    
+       total instructions in shared programs: 2022092 -> 1941570 (-3.98%)
+       instructions in affected programs:     1886838 -> 1806316 (-4.27%)
+       helped:                                7510
+       HURT:                                  10737
+       GAINED:                                0
+       LOST:                                  0
+    
+    Shader-db results for vec4 on Ivy Bridge:
+    
+       total instructions in shared programs: 1853749 -> 1804960 (-2.63%)
+       instructions in affected programs:     1686736 -> 1637947 (-2.89%)
+       helped:                                6735
+       HURT:                                  11101
+       GAINED:                                0
+       LOST:                                  0
+    
+    Shader-db results for vec4 on Haswell:
+    
+       total instructions in shared programs: 1853749 -> 1804960 (-2.63%)
+       instructions in affected programs:     1686736 -> 1637947 (-2.89%)
+       helped:                                6735
+       HURT:                                  11101
+       GAINED:                                0
+       LOST:                                  0
+    
+    Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Acked-by: Kenneth Graunke <kenneth@whitecape.org>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 70ac6a2655334b3e4f662ca228c2f3681305e52d
-Author: Kenneth Graunke <kenneth@whitecape.org>
-Date:   Thu Jun 4 01:24:07 2015 -0700
+commit 6921f170b62d9f9c0e5bd2cb6666c15395addba8
+Author: Kai Wasserbäch <kai@dev.carbon-project.org>
+Date:   Fri Aug 14 14:49:43 2015 +0200
 
-    prog_to_nir: Make RSQ properly take the absolute value of its argument.
+    glsl: check if return_deref in lower_subroutine_visitor::visit_leave isn't NULL
     
-    I just botched this when writing the original code.
+    Fixes a crash in Piglit's
+    spec@arb_shader_subroutine@linker@no-mutual-recursion.vert for me.
     
-    From the ARB_vertex_program specification:
-    "The RSQ instruction approximates the reciprocal of the square root of
-     the absolute value of the scalar operand and replicates it to all four
-     components of the result vector."
+    Signed-off-by: Kai Wasserbäch <kai@dev.carbon-project.org>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 3e6adbd761f72b612aba57fd86bb5203aae07133
+Author: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
+Date:   Sun Jan 11 22:40:22 2015 +0100
+
+    nv50/ir: Handle OP_CVT when folding constant expressions
     
-    Fixes a Glean vertProg1 subtest:
-    RSQ test 2 (reciprocal square root of negative value)
+    [imirkin: handle more type combinations, use macro]
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit f5b926183ded75661ab3f786ac1739b1f912c6c5
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Tue Aug 18 23:16:32 2015 -0400
+
+    nvc0/ir: undo more shifts still by allowing a pre-SHL to occur
     
-    Cc: mesa-stable@lists.freedesktop.org
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90547
-    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
-    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
-    (cherry picked from commit 15a12795c6f3edef0e1cbab39b6da3d5b8f64fc3)
+    This happens with unpackSnorm lowering. There's yet another
+    bitfield-extract behind it, but there's too much variation to be worth
+    cutting through.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
 
-commit 9dc43dc361434e7f88fa42e8b6e12f990afd29ce
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Wed Jun 3 18:12:31 2015 +0100
+commit 9ebe7dc09479d9a8df2733ef96525a2b5e758f6d
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Tue Aug 18 22:53:11 2015 -0400
 
-    Increment version to 10.6.0-rc3
+    nvc0/ir: don't require AND when the high byte is being addressed
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    unpackUnorm* lowering doesn't AND the high byte/word as it's
+    unnecessary. Detect that situation as well.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
 
-commit 038fc5a7f74b7a034c04d7b066b08f07886f2f92
+commit 63cb85e567ad1025ee990b38f43c2f1ef811821b
 Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Thu Mar 5 12:10:15 2015 -0500
+Date:   Tue Aug 18 21:09:12 2015 -0400
 
-    nv30: avoid doing extra work on clear and hitting unexpected states
+    nvc0/ir: detect i2f/i2i which operate on specific bytes/words
     
-    Clearing can happen at a time when various state objects are incoherent
-    and not ready for a draw. Some of the validation functions don't handle
-    this well, so only flush the framebuffer state. This has the advantage
-    of also not doing extra work.
+    Some Unigine shaders have been observed to unpack bytes out of 32-bit
+    integers and convert them to floats. I2F/I2I can handle this sort of
+    thing directly. Detect the handleable situations.
     
-    This works around some crashes that can happen when clearing.
+    This misses 16-bit word capabilities in nv50, but I haven't seen shaders
+    that would actually make use of that.
     
     Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Reviewed-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
-    (cherry picked from commit aba3392541f38f82e3ebde251fdcca78e90adbf3)
 
-commit 66e1ee52ad698db60a2c1b4033054245523f6063
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Tue May 26 19:32:36 2015 +0200
+commit 51499bb5ff5626b893383545c494c7f808763404
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Tue Aug 18 21:07:33 2015 -0400
 
-    st/dri: fix postprocessing crash when there's no depth buffer
+    nvc0/ir: detect AND/SHR pairs and convert into EXTBF
     
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89131
+    Some shaders appear to extract bits using shift/and combos. Detect
+    (some) of those and convert to EXTBF instead.
     
-    Cc: 10.6 10.5 <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Brian Paul <brianp@vmware.com>
-    (cherry picked from commit 25e9ae2b79f32631e7255807a242e5fc4e39984c)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
 
-commit bc8fa4311eeca4fa748aba2c176363b6e60d75f0
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:56:45 2015 -0700
+commit 2a4af36517333ef61d5f7ca2264fec3f49ee3662
+Author: Chih-Wei Huang <cwhuang@android-x86.org>
+Date:   Sat Jun 20 02:00:15 2015 +0800
 
-    mesa: Enable ARB_direct_state_access by default for core profile
+    nv50/ir: support different unordered_set implementations
     
-    And core profile only.
+    If build with C++11 standard, use std::unordered_set.
     
-    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
-    Reviewed-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 1ca60de4c00e864bffbee8265f631b2267c8ea29)
+    Otherwise if build on old Android version with stlport,
+    use std::tr1::unordered_set with a wrapper class.
+    
+    Otherwise use std::tr1::unordered_set.
+    
+    Signed-off-by: Chih-Wei Huang <cwhuang@linux.org.tw>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
 
-commit eafe639aeef72f1c148dfa110357e8aab0599a09
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Wed May 20 20:19:07 2015 -0700
+commit 56ebd3314bfc5895fab47586fc8cda024aac4fd8
+Author: Martin Peres <martin.peres@linux.intel.com>
+Date:   Thu Aug 20 15:15:56 2015 +0300
 
-    dispatch_sanity: Validate the compatibility profile dispatch table too
+    i965: Fix "handle nir_intrinsic_image_size"
     
-    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
-    Suggested-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit ef4dd0fc3e6b5ffbad6bd286ef9c6c25d0b25bae)
+    I pushed a half-baked version of "i965: handle nir_intrinsic_image_size" by
+    accident. Not having the Reviewed-by: tags on the last two commits should
+    have been a red flag but I somehow missed it after the QA check.
+    
+    This patch should fix image-size for non-int images. I will add support to
+    the piglit test for all the other image types.
+    
+    Sorry for the noise.
+    
+    Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
 
-commit 97eb22e959e92da090fa7461255e116edffa195b
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Wed May 20 20:17:19 2015 -0700
+commit e5851cff45169f4e635299da4ed5b41aeb0d2f83
+Author: Martin Peres <martin.peres@linux.intel.com>
+Date:   Wed Apr 29 12:42:16 2015 +0300
 
-    dispatch_sanity: Split list of GL 3.1 functions in to core and common
+    i965: enable GL_ARB_shader_image_size
     
-    The next patch will add a test for compatibility profile dispatch, and
-    it seems to make more sense to share the lists.
+    Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
+
+commit 50db9c1db645c1a4d5777d2cacfd7ac74ebbe544
+Author: Martin Peres <martin.peres@linux.intel.com>
+Date:   Wed Apr 29 12:39:16 2015 +0300
+
+    i965: handle nir_intrinsic_image_size
     
-    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
-    Cc: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 49ab670f52947dda04585cc5156e55b89f0c1c4a)
+    v2, Review from Francisco Jerez:
+    - avoid the camelCase for the booleans
+    - init the booleans using the sampler type
+    - force the initialization of all the components of the output register
     
-    Conflicts:
-    	src/mesa/main/tests/dispatch_sanity.cpp
+    v3:
+    - Rename a variable from CubeMapArray to CubeArray to re-use GLSL's name (Ilia)
+    - Fix some indentation and drop parenthesis (Topi)
+    - Fix a signed/unsigned comparaison warning
+    
+    Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
 
-commit 964d358bc1fbb23ecf30250551350b17f79e6061
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Wed May 20 20:13:12 2015 -0700
+commit 80b1707e26734ac9c957cfc876ab5893f1749c74
+Author: Martin Peres <martin.peres@linux.intel.com>
+Date:   Tue Aug 11 17:42:12 2015 +0300
 
-    mesa: Don't install glVertexAttribL* functions in compatibility profile
+    nir: convert the glsl intrinsic image_size to nir_intrinsic_image_size
     
-    GL_ARB_vertex_attrib_64bit is exclusive to core profile, and none of the
-    other functions added by the extension are advertised in other profiles.
+    v2, review from Francisco Jerez:
+     - make the destination variable as large as what the nir instrinsic
+       defines (4) instead of the size of the return variable of glsl. This
+       is still safe for the already existing code because all the intrinsics
+       affected returned the same amount of components as expected by glsl IR.
+       In the case of image_size, it is not possible to do so because the
+       returned number of component depends on the image type and this case
+       is not well handled by nir.
     
-    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
-    Cc: Dave Airlie <airlied@redhat.com>
-    Cc: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit a6fa74e6bb65f852ad1608f43dd0731e854ea42f)
+    v3:
+    - Style fix
+    
+    Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
 
-commit 384ee736e76a49c0da8c27f0841867261778b292
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Tue May 19 11:48:11 2015 -0700
+commit 58a8689717a5965919934d1b4b44b4eef0a6bf64
+Author: Martin Peres <martin.peres@linux.intel.com>
+Date:   Mon Apr 27 19:25:34 2015 +0300
 
-    glapi: Make GL_ARB_direct_state_access functions exclusive to core profile
+    glsl: add support for the imageSize builtin
     
-    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
-    Cc: Dave Airlie <airlied@redhat.com>
-    Cc: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: Dylan Baker <baker.dylan.c@gmail.com>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 4e5efa9e7ddb6d5273996cf9b09677d918759d17)
+    The code is heavily inspired from Francisco Jerez's code supporting the
+    image_load_store extension.
+    
+    Backends willing to support this builtin should handle
+    __intrinsic_image_size.
+    
+    v2: Based on the review of Ilia Mirkin
+    - Enable the extension for GLES 3.1
+    - Fix indentation
+    - Fix the return type (float to int, number of components for CubeImages)
+    - Add a warning related to GLES 3.1
+    
+    v3: Based on the review of Francisco Jerez
+    - Refactor the code to share both add_image_function and _image with the other
+      image-related functions
+    
+    v4: Based on Topi Pohjolainen's comments
+    - Do not add parenthesis for the return value
+    
+    v5: based on Francisco Jerez's comments:
+    - Fix a few indent issues
+    - Reduce the size of a condition by testing the dimension and array properties
+      instead of enumerating all the formats.
+    
+    Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
 
-commit 3599928fc6fb0ffd9e10d812f4a67b53265815ae
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Tue May 19 11:24:26 2015 -0700
+commit 3d93f65ef2dbecbf615ee8041d92354ae660d71b
+Author: Martin Peres <martin.peres@linux.intel.com>
+Date:   Mon Apr 27 20:05:14 2015 +0300
 
-    glapi: Store exec table version info outside the XML
+    main: add extension GL_ARB_shader_image_size
     
-    Currently on the functions that are exclusive to core-profile are
-    implemented.  The remainder continue to live in the XML.  Additional
-    functions can be moved later.
+    Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+    Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
+
+commit f6c622f58432b0c3cb80bc2ed41e314abf876e03
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Aug 20 13:46:53 2015 +0300
+
+    docs: Mark GLES 3.1 image load/store as done on i965.
+
+commit f5070c801c4885521df17a65c297f24ae628d414
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Aug 19 14:42:50 2015 +0300
+
+    mesa: Add ES31 API tag for the extension table.
     
-    The functions for GL_ARB_draw_indirect and GL_ARB_multi_draw_indirect
-    are put in the dispatch table inside the VBO module, so they do not need
-    to be moved over.
+    I'll mark the OES_shader_image_atomic extension entry with this tag to
+    make sure that we don't expose it on earlier GLES API versions
+    accidentally, because according to the extension:
     
-    The diff of src/mesa/main/api_exec.c before and after this patch is as
-    expected.  All of the functions listed in apiexec.py moved out of a 'if
-    (_mesa_is_desktop(ctx))' block into a new 'if (ctx->API ==
-    API_OPENGL_CORE)' block.
+     "OpenGL ES 3.1 and GLSL ES 3.10 are required."
     
-    v2: Remove stray shebang line in apiexec.py.  Suggested by Ilia.
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 6379f5cb2ab7c5ae3f3769204f95088e35c53217
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 01:47:50 2015 +0300
+
+    glsl: Parse the allowed image format qualifiers in GLSL ES 3.1.
     
-    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
-    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: Dave Airlie <airlied@redhat.com>
-    Cc: Dylan Baker <baker.dylan.c@gmail.com>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit f20899b7276b73e1b60c3ed8d8abdf959e787c0c)
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    This includes the minimum required desktop/ES GLSL version in the
+    format qualifier table in anticipation of new GLSL versions extending
+    the set of supported image formats.  According to section 4.4.7 of the
+    GLSL ES 3.1 spec:
+    
+    "The format layout qualifier identifiers for image variable
+     declarations are:
+     [...]
+     rgba32f
+     rgba16f
+     r32f
+     rgba8
+     rgba8_snorm
+     [...]
+     rgba32i
+     rgba16i
+     rgba8i
+     r32i
+     [...]
+     rgba32ui
+     rgba16ui
+     rgba8ui
+     r32ui"
     
-    Conflicts:
-    	src/mapi/glapi/gen/gl_genexec.py
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
 
-commit 03304290056a4f683f307f176605d2ae9450243c
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:55:04 2015 -0700
+commit e3fb2e1f0e160573c3d164818f556c7f6725835e
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 19:12:00 2015 +0300
 
-    Revert "mesa: Add an extension flag for ARB_direct_state_access"
+    glsl: Recognise image memory qualifiers in GLSL ES 3.1.
     
-    This reverts commit 30dcaaec356cc117d7227c6680620cd50ff534e7.
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 307c382c1b514629c342587d2f320f5491de9b65
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 17:42:30 2015 +0300
+
+    glsl: Define image-related built-in constants required by GLSL ES 3.1.
     
-    Acked-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 5c4aab58ee79a8bfa3d96f3ec442f37da587ff45)
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
 
-commit 828eeb65dc8ccff90bb6af6d29d86df391ba6e3e
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Wed May 20 17:19:29 2015 -0700
+commit a976b7255b2c84e6cccefb447029add02ddf86ae
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 01:39:38 2015 +0300
 
-    mesa: Use the profile instead of an extension bit to validate GL_TEXTURE_CUBE_MAP
+    glsl: Remove duplicate definition of gl_MaxTess*ImageUniforms built-in constants.
     
-    The extension on which this depends will always be enabled in core
-    profile, and the extension bit is about to be removed.
+    These seem to have been re-added at some point during the
+    ARB_tessellation_shader implementation work.  AFAICT the second
+    (correct) definition of each constant would have had no effect because
+    the symbols were already defined.
     
-    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
-    Reviewed-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 832ea2345a96388950bb39ce8a2e4ca8bfdb4fe5)
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
 
-commit 74e7328281dbdfcdaa86057c3dcdcc6bf387f92a
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:54:55 2015 -0700
+commit 9d0bb6be097080e2568b7b6cc18f2bf800fed1b1
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 01:38:00 2015 +0300
 
-    Revert "mesa: Add ARB_direct_state_access checks in XFB functions"
+    glsl: Accept atomic_uint type in GLSL ES 3.1.
     
-    This reverts commit 7d212765a470972f4712e42caf6406b257220369.
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit d589df94013bd04b23ef88cdac6debe04e3075a1
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 01:37:12 2015 +0300
+
+    glsl: Accept supported image types in GLSL ES 3.1.
     
-    Acked-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 90e98ea215906bb7e9ecadc4d30d2718ba2186ad)
+    These are a subset of the image types supported by desktop GL,
+    excluding 1D, 1D array, rectangle, buffer, cube array, 2D MS and 2D
+    MS array texture targets.
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
 
-commit 9e71637022a505d1f464a91c02b06cffa137b885
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:54:39 2015 -0700
+commit 6da187f80560b44b59551757c1322e921d8ca025
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 01:34:41 2015 +0300
 
-    Revert "mesa: Add ARB_direct_state_access checks in buffer object functions"
+    glsl: Expose image load and store built-ins in GLSL ES 3.1.
     
-    This reverts commit 339ed0984d4f54fca91235a1df2ce3a850f6123f.
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 76a09c87c14f65d8ad8bd805ef03101f4455a24f
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 01:34:13 2015 +0300
+
+    glsl: Use a separate availability class for image atomic built-ins.
     
-    Acked-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit cab233f277936f4cdc49aa0bbfc7ed1a85c925f1)
+    These are not part of unextended GLSL ES 3.1.
+    
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
 
-commit 83eed1ea9028068f5fec5feca59035380d043382
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:54:35 2015 -0700
+commit 26b1141a78cfec0e2eface4b399009ee5eb421df
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 01:28:57 2015 +0300
 
-    Revert "mesa: Add ARB_direct_state_access checks in FBO functions"
+    glsl: Allow precision qualifiers on general opaque types.
     
-    This reverts commit 6ad0b7e07a0445e9e0f368e079c4f7b8a6757bb3.
+    From the GLSL ES 3.1 spec, section 4.7.3:
+     "Any floating point, integer, opaque type declaration can have the
+      type preceded by one of these precision qualifiers: [...] highp
+      [...], mediump [...], lowp [...]."
     
-    Acked-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 8bcd14fab9a86276980a8859740999a1db4c55d5)
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
 
-commit 7ddacf6df33fc0843cf99110cb1af6194bee0e7d
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:54:29 2015 -0700
+commit ee7bf349d865b18ca2827508fb947b7e549c7fc6
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 01:27:43 2015 +0300
 
-    Revert "mesa: Add ARB_direct_state_access checks in renderbuffer functions"
+    glsl: Implement GLSL ES restriction on images being either readonly or writeonly.
     
-    This reverts commit cb49940766b581c6656473d89c221653c69fa0f9.
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 527ae5d4286e76fc2ec3d70f4b6cea3798539372
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 01:26:40 2015 +0300
+
+    glsl: Require that all image uniforms have a format qualifier in GLSL ES.
+    
+    Note that this is slightly more permissive than the spec language
+    requires: "Any image variable must specify a format layout qualifier."
+    
+    The GLSL ES spec seems really sketchy regarding format layout
+    qualifiers on function formal parameters -- On the one hand they are
+    required, but on the other hand it doesn't provide any syntax to
+    specify them (see section 6.1.1), they don't participate in parameter
+    type matching for overload resolution, and are in fact explictly
+    forbidden ("Layout qualifiers cannot be used on formal function
+    parameters").  Of course none of the image built-in functions defined
+    by the spec specify format layout qualifiers (and they probably
+    couldn't sensibly), to contradict its own requirement.
+    
+    This probably qualifies for a spec bug, but in the meantime do the
+    sensible thing and require layout qualifiers on uniforms *only*.
+    
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit 241774aa03d6dda5fe4cd86c1988f1678d4c0e5f
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 01:25:11 2015 +0300
+
+    glsl: Add support for image binding qualifiers.
+    
+    Support for binding an image to an image unit explicitly in the shader
+    source is required by both GLSL 4.2 and GLSL ES 3.1, but not by the
+    original ARB_shader_image_load_store extension.
+    
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit ebf1196d82eeed2f7863984ec33d26538a97b531
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 01:21:01 2015 +0300
+
+    glsl: Forbid non-constant image array indexing in GLSL ES 3.1.
+    
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit 6c161405dc191f3d4083a48dfc033d4401b8055a
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 02:05:43 2015 +0300
+
+    mesa: Refuse to bind image uniforms using glUniform in GLES.
+    
+    The GLES 3.1 spec removed support for updating the image unit bound to
+    an image uniform using glUniform1i() calls.
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 86a64dfccfd3e98befa1e58f1a252cf993e5b680
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 02:02:17 2015 +0300
+
+    mesa: Refuse to bind a mutable texture object to an image unit in GLES.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit d70edfcfd53b50da0c4bb7aad909302dca6d4722
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 02:01:40 2015 +0300
+
+    mesa: Initialize image unit state to different defaults in GLES.
+    
+    There is no GL_R8 image format in GLES, according to the state table
+    20.32 of the GLES 3.1 spec the default value should be GL_R32UI.  The
+    ES31-CTS.shader_image_load_store.basic-api-bind Khronos conformance
+    test checks that this is the case.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 3b70f7900ba14e83081d8ebcd1b827ba605a8e8b
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 02:00:48 2015 +0300
+
+    mesa: Reset image unit state to the default values when a bound image is deleted.
+    
+    The ES31-CTS.shader_image_load_store.basic-api-bind conformance test
+    expects the whole image unit state to be reset when the bound texture
+    object is deleted.  The ARB_shader_image_load_store extension is
+    rather vague regarding what should happen with image unit state other
+    than the texture object in that case, but the GL 4.2 and GLES 3.1
+    specifications (section "Automatic Unbinding of Deleted Objects")
+    explicitly require it to be reset to the default values.
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 10b7cf1a01cf0467f5c8d1cea08d8f93e401a454
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 01:58:53 2015 +0300
+
+    mesa: Reject image formats not supported by GLES.
+    
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit b97d8c95a91773dc002e3ba42bd02e84a00eada3
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 01:53:48 2015 +0300
+
+    mesa: Don't lose track of the shader image layer originally specified by the user.
+    
+    The spec requires that all layers of the image starting from the 0-th
+    are bound to the image unit regardless of the Layer parameter when
+    Layered is true, so I was setting gl_image_unit::Layer to zero in that
+    case for the convenience of the driver back-end.  However the
+    ES31-CTS.shader_image_load_store.basic-api-bind conformance test
+    checks that the layer value returned by glGetInteger is the same that
+    was originally specified, regardless of the value of layered.  Rename
+    Layer to _Layer as is usual for other derived state and keep track of
+    the original layer value as gl_image_unit::Layer.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 47e0d5b9b28b0753adda70cbfb3ad111ba6169a8
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Aug 17 19:10:46 2015 +0300
+
+    mesa: Rename MaxCombinedImageUnitsAndFragmentOutputs to MaxCombinedShaderOutputResources.
+    
+    The name of both the GLSL built-in variable and the glGetInteger param
+    with the same value changed in GLSL ES 3.1 and GL 4.5.  Its semantics
+    also changed slightly, since the limit now also takes into account the
+    number of SSBs in use.  Switch our internal data structures to the
+    up-to-date name.
+    
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 3547d9855c2103ecc5001a082965d3dda5d69d34
+Author: Dave Airlie <airlied@gmail.com>
+Date:   Sun Aug 16 07:37:37 2015 +1000
+
+    GL: update glext to svn 31811
+    
+    This brings in the new ARB extensions.
+    
+    Acked-by: Chris Forbes <chrisf@ijw.co.nz>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit ab83be590d4b45f50461a004b672f7640a4d8f53
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Aug 12 11:26:34 2015 -0700
+
+    nir: Use nir_builder in nir_lower_io's get_io_offset().
+    
+    Much more readable.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit ed2afec3fc2210ee737216981a41df8a396b11f6
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Aug 12 10:57:31 2015 -0700
+
+    nir: Pull nir_lower_io's load_op selection into a helper function.
+    
+    Makes the function a bit smaller.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit ffe6c6ad5f719dedd1b6b95e8590e3f20b23d340
+Author: Nanley Chery <nanley.g.chery@intel.com>
+Date:   Tue Aug 11 11:56:35 2015 -0700
+
+    mesa/formats: refactor by collapsing cases in switch statement by type
+    
+    Combine the adjacent cases which have the same GL type in the switch statemnt.
+    
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+    Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
+
+commit 0872b042b13388bc870a3acf167a6ce692b734dd
+Author: Nanley Chery <nanley.g.chery@intel.com>
+Date:   Fri Aug 7 14:36:23 2015 -0700
+
+    mesa/formats: add more MESA_FORMAT_LAYOUTs
+    
+    Add the classes of compressed formats as layouts. This allows the detection
+    of compressed formats belonging to a certain category of compressed formats.
+    
+    v2. simplify layout name construction (Ilia).
+    
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+    Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
+
+commit 2438e2fe326d7cb9f9d003f6edf77821e41ef22c
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Mon Aug 10 13:04:42 2015 +0200
+
+    glsl: Fix up GL_ARB_compute_shader for GLSL ES 3.1
+    
+    GL_ARB_compute_shader is limited for GLSL version 430.
+    This enables for GLSL ES version 310.
+    
+    V2: Updated error string to also include GLSL 3.10
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@linux.intel.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 3df7856b4625f21ebf5465bad2556583b808c8bf
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Wed Aug 12 11:57:39 2015 +0200
+
+    mesa/main: Add GL_IMAGE_FORMAT_COMPATIBILITY_TYPE to glGetTexParameterfv
+    
+    According to Open GL ES 3.1 specification, section 8.10.2
+    GL_IMAGE_FORMAT_COMPATIBILITY_TYPE should be supported by
+    glGetTexParameterfv.
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@linux.intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 3b1e283d884b0c5c93c32d3e4a0325fbf2e96234
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Aug 19 00:56:33 2015 +0200
+
+    radeonsi: fix a typo as_es -> as_ls in a string
+    
+    Trivial.
+
+commit 5fb0180592b124857e2e0369e2cdee74bd552bb9
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Aug 17 19:55:57 2015 +0200
+
+    winsys/amdgpu: fix the type of memory usage counters
+    
+    If the 32-bit types overflowed, the driver could submit an IB that uses much
+    more memory than is available.
+    
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 421b809db10b939d9baf7b904560c37b68261aaf
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Aug 15 11:51:48 2015 +0200
+
+    radeonsi: fix indirect indexing of MSAA textures
+    
+    FMASK wasn't handled correctly.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit f33a7ab150ea01f3550904fe3c56fcad32ce85e0
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Jul 20 15:19:53 2015 -0400
+
+    st/mesa: add fake ARB_copy_image support in Gallium
+    
+    This support should be removed in favor of something that actually works
+    in all the weird cases. However this is simple and is enough to allow
+    Bioshock Infinite to render properly on nvc0.
+    
+    Since the functionality is not implemented correctly, the extension will
+    not appear in the extension string and mesa will still return
+    INVALID_OPERATION for any glCopyImageSubData calls. In order to make use
+    of this functionality, run with
+    MESA_EXTENSION_OVERRIDE=GL_ARB_copy_image
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 89759381dbfe3784bc780a3ab6e0fe13e77e06ef
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Mon Aug 17 12:11:03 2015 +0300
+
+    glsl: enable textureSize and texelFetch on GLSL ES 3.10 with MS samplers
+    
+    Patch separates array samplers from the texture_multisample check so that we
+    can enable only [iu]sampler2DMS, [iu]sampler2DMSArray are not supported.
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit 6a8e08cb890b47394e667ec86d9a678060c19e61
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Mon Aug 17 10:14:35 2015 +0300
+
+    mesa: validate size parameters for glTexStorage*Multisample
+    
+    v2: code cleanup
+    v3: check only dimensions, samples is checked separately later
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit a342becc490b38436396e98ee653a6b81c037663
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Mon Aug 10 10:50:06 2015 +0300
+
+    mesa: expose dimension check for glTex*Storage functions
+    
+    This is done so that following patch can use it to verify dimensions
+    for multisample variants of glTex*Storage.
+    
+    v2: move function to header, use bool instead GLboolean
+    v3: small changes, cleanup
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit 2b40a144b7a589ebd5e80c9a8a8241c53201035e
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Wed Aug 19 04:17:49 2015 +0200
+
+    util/ra: (trivial) fix c99 loop variable initialization
+    
+    Fails with old msvc otherwise.
+
+commit 3f797ef0c038afecb8702cc90a452903694975a2
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Wed Aug 19 04:17:36 2015 +0200
+
+    util: (trivial) include c99_math.h in rounding.h
+    
+    Needed for rint/rintf.
+
+commit c03247bae010dfd81a08572a32067e9ea8637f63
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Mon Jul 13 18:01:13 2015 +0100
+
+    i965/bdw: Fix setting the instancing state for the SGVS element
+    
+    When gl_VertexID or gl_InstanceID is used a 3DSTATE_VF_SGVS
+    instruction is sent to create a sort of element to store the generated
+    values. The last instruction in this chunk of code looks like it was
+    trying to set the instancing state for the element using the
+    3DSTATE_VF_INSTANCING instruction. However it was sending
+    brw->vb.nr_buffers instead of the element index. This instruction is
+    supposed to take an element index and that is how it is used further
+    down in the function so the previous code looks wrong. Perhaps
+    previously the number of buffers coincidentally matched the number of
+    enabled elements so the value was generally correct anyway. In a
+    subsequent patch I want to change a bit how it chooses the SGVS
+    element index so this needs to be fixed.
+    
+    v2 [by Ben]
+    Remove stable 10.5 stable tag (it's too late now)
+    Commit update as follows:
+    The number of vertex buffers emitted is always <= the number of vertex elements.
+    To maximize reuse (actually, to minimize relocations - according to the code
+    comments), a vertex buffer is only emitted once, even when we setup multiple
+    components (3DSTATE_VERTEX_ELEMENT) from that buffer. This meant that the
+    previous code would use the wrong indexed element for these reuse cases. This
+    patch by itself prevents hangs on BSW in the linked bug. It doesn't make the
+    test pass, the remaining patches are needed for that.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91610
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+    Tested-by: Mark Janes <mark.a.janes@intel.com>
+    Cc: <mesa-stable@lists.freedesktop.org>
+
+commit f01bdb0484dd5224b183526d020ee3f2888cac45
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Sat Aug 15 09:58:32 2015 -0700
+
+    util/ra: Make allocating conflict lists optional
+    
+    Since i965 is now using make_reg_conflicts_transitive and doesn't need
+    q-value computations, they are disabled on i965.  They are enabled
+    everywhere else so that they get the old behavior.  This reduces the time
+    spent in eglInitialize() on BDW by around 10-15%.
+    
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit c3b21f2d56d77c8c11115bf110a5e25e9dd7e3d5
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Sat Aug 15 09:50:11 2015 -0700
+
+    i965/reg_allocate: Use make_reg_conflicts_transitive
+    
+    Instead of adding transitive conflicts as we go, we now add regular
+    conflicts and them make them all transitive at the end.  This reduces
+    screen creation time substantially on BDW.  The time spent in eglInitialize
+    is reduced from 27.78 ms/call to 9.92 ms/call in debug mode and from 13.15
+    ms/call to 4.54 ms/call in release mode (about 65% in either case).
+    
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 9b49284c223b284295675775d4344f066b4455db
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Sat Aug 15 09:43:05 2015 -0700
+
+    util/ra: Add a function for making all conflicts on a register transitive
+    
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 7c8e53f1bee370c1a8a0c640313c12df220f4114
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Sat Aug 15 09:30:40 2015 -0700
+
+    util/bitset: Add a BITSET_FOREACH_SET macro
+    
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 6ff3341fc77c8e22a62505eb374938db3c95144f
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Aug 4 10:43:58 2015 -0700
+
+    mesa: Move varying slots and FS output names to shader_enums.h
+    
+    They're used by glsl_to_nir.cpp, and I want to use them in TGSI-to-NIR as
+    well (our use of the var->index slot to store slot properties no longer
+    works since it got truncated).
+    
+    The *_MAX defines are left in mtypes.h, because they depend on config.h.
+    
+    Acked-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit fdacadc87c708b519a8a4e35b1d551773ca95f4c
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Thu Aug 13 23:26:01 2015 +1000
+
+    mesa: undo split out of create shader code
+    
+    This code was split out into a separate function to be used also
+    by GL_EXT_separate_shader_objects which has since been removed from
+    Mesa, so move it back.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 4a0bea38635e29b20701855131fa2b5dd6d3978f
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Tue Aug 18 15:07:02 2015 -0400
+
+    freedreno: use fd_pipe_wait_timeout()
+    
+    To properly support the case of waiting on a fence with a 0 timeout, we
+    still need to call down to the kernel.  Which requires the use of the
+    new fd_pipe_wait_timeout() API.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit fd7a14f8ddeae5fc868a8d452445d708505cc930
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sun Aug 16 19:18:22 2015 -0400
+
+    freedreno: fence fix
+    
+    Don't take current timestamp/fence from current ring, as we might have
+    already rolled over to new rb.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 885762e18291eb4dc0b449297c3a78f7c036bcde
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Mon Aug 10 17:31:02 2015 +0100
+
+    Add mesa.icd to the .gitignore
+    
+    Since 4d7e0fa8c731776 this file is generated by the configure script.
+    Reviewed-by: Tapani Palli <tapani.palli@intel.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit ec6af4f54cbfe1a779af7245f1bd65c9e5a00dce
+Author: Richard Yao <ryao@gentoo.org>
+Date:   Wed Aug 12 12:48:22 2015 -0400
+
+    drirc: Add "Unigine Oil Rush" quirk (allow_glsl_extension_directive_midshader).
+    
+    Appears to fix shader compilation. Tested by starting the client and observing
+    that the screen was correct after the trailers ran when previously, it was
+    blank. Play tested on amd64.
+    
+    This was suggested by "Kuuchan" on the Steam forums:
+    
+    https://steamcommunity.com/app/200390/discussions/0/540731690861139279/?insideModal=1#c594820656479479870
+    
+    Acked-by: Matt Turner <mattst88@gmail.com>
+    Signed-off-by: Richard Yao <ryao@gentoo.org>
+
+commit 49d0a36bd6593ce09486678a7bf3d500af5e265c
+Author: Thomas Helland <thomashelland90@gmail.com>
+Date:   Thu Aug 6 13:36:05 2015 +0200
+
+    nir: Simplify feq(fneg(a), a)) -> feq(a, 0.0)
+    
+    The positive and negative value of a float can only
+    be equal to each other if it is -0.0f and 0.0f.
+    This is safe for Nan and Inf, as -Nan != Nan, and -Inf != Inf
+    This gives no changes in my shader-db
+    
+    Signed-off-by: Thomas Helland <thomashelland90@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit a39167d5949c76dfb48994caead4b59ab5f80318
+Author: Thomas Helland <thomashelland90@gmail.com>
+Date:   Thu Aug 6 13:36:04 2015 +0200
+
+    nir: Simplify fne(fneg(a), a) -> fne(a, 0.0)
+    
+    -NaN != NaN, and -Inf != Inf, so this should be safe.
+    Found while working on my VRP pass.
+    
+    Shader-db results on my IVB:
+    total instructions in shared programs: 1698267 -> 1698067 (-0.01%)
+    instructions in affected programs:     15785 -> 15585 (-1.27%)
+    helped:                                36
+    HURT:                                  0
+    GAINED:                                0
+    LOST:                                  0
+    
+    Some shaders was found to have the following pattern in NIR:
+    vec1 ssa_26 = fneg ssa_21
+    vec1 ssa_27 = fne ssa_21, ssa_26
+    
+    Make that:
+    vec1 ssa_27 = fne ssa_21, 0.0f
+    
+    This is found in Dota2 and Brutal Legend.
+    One shader is cut by 8%, from 323 -> 296 instructons in SIMD8
+    
+    Signed-off-by: Thomas Helland <thomashelland90@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 29264d0d0cd5ca24314630d9dc22b3f971344a34
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:17 2015 +1000
+
+    i965/gen7: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/gen7_sol_state.c: In function 'gen7_upload_3dstate_so_decl_list':
+    mesa/src/mesa/drivers/dri/i965/gen7_sol_state.c:119:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (int i = 0; i < linked_xfb_info->NumOutputs; i++) {
+                          ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 94bdb50c0b02160d0b391eafc68259ab78310d37
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:16 2015 +1000
+
+    i965/gen6: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/gen6_vs_state.c: In function 'gen6_upload_push_constants':
+    mesa/src/mesa/drivers/dri/i965/gen6_vs_state.c:85:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+           for (i = 0; i < prog_data->nr_params; i++) {
+                         ^
+    mesa/src/mesa/drivers/dri/i965/gen6_vs_state.c:92:17: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+       for (i = 0; i < prog_data->nr_params; i++) {
+                     ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 1d1056c4e3b7125a5092730837fff2a9585e99ab
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:15 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/brw_vs_surface_state.c: In function 'brw_upload_pull_constants':
+    mesa/src/mesa/drivers/dri/i965/brw_vs_surface_state.c:84:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < prog_data->nr_pull_params; i++) {
+                      ^
+    mesa/src/mesa/drivers/dri/i965/brw_vs_surface_state.c:89:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+           for (i = 0; i < ALIGN(prog_data->nr_pull_params, 4) / 4; i++) {
+                         ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 30694b3f42d0da4fb106561fc898279babb498ad
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:14 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/brw_wm_surface_state.c: In function 'brw_upload_abo_surfaces':
+    mesa/src/mesa/drivers/dri/i965/brw_wm_surface_state.c:961:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (int i = 0; i < prog->NumAtomicBuffers; i++) {
+                          ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 5fb58012be6f783d735ded79582aa46a2c71e0fd
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:13 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/brw_wm_surface_state.c: In function 'brw_upload_ubo_surfaces':
+    mesa/src/mesa/drivers/dri/i965/brw_wm_surface_state.c:901:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (int i = 0; i < shader->NumUniformBlocks; i++) {
+                          ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 6625ca2370bc9b3927cf5c50be9220513320ca26
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:12 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/brw_tex_layout.c: In function 'brw_miptree_layout_texture_array':
+    mesa/src/mesa/drivers/dri/i965/brw_tex_layout.c:560:25: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+           for (int q = 0; q < mt->level[level].depth; q++) {
+                             ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 1512b086d385ac9ee575e2f90cda6bec467505c7
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:11 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/brw_state_cache.c: In function 'brw_try_upload_using_copy':
+    mesa/src/mesa/drivers/dri/i965/brw_state_cache.c:216:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < cache->size; i++) {
+                      ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 9febec0811a842644b6f4ea7cf20e845d9d55120
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:10 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/brw_primitive_restart.c: In function 'can_cut_index_handle_prims':
+    mesa/src/mesa/drivers/dri/i965/brw_primitive_restart.c:94:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (int i = 0; i < nr_prims; i++) {
+                          ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 5be455281eca77955aff52f01c6adb9281f4d747
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:09 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/brw_draw_upload.c: In function 'brw_prepare_vertices':
+    mesa/src/mesa/drivers/dri/i965/brw_draw_upload.c:434:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = j = 0; i < brw->vb.nr_enabled; i++) {
+                          ^
+    mesa/src/mesa/drivers/dri/i965/brw_draw_upload.c:557:17: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+       for (i = 0; i < nr_uploads; i++) {
+                     ^
+    mesa/src/mesa/drivers/dri/i965/brw_draw_upload.c:569:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < nr_uploads; i++) {
+                      ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 82bc45bb0863f895a6ac7c734bf127a7d0290eeb
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:08 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/brw_draw.c: In function 'brw_draw_destroy':
+    mesa/src/mesa/drivers/dri/i965/brw_draw.c:630:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < brw->vb.nr_buffers; i++) {
+                      ^
+    mesa/src/mesa/drivers/dri/i965/brw_draw.c:636:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < brw->vb.nr_enabled; i++) {
+                      ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 4864977e51872bf4c810358f03cf56722f9efb9b
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:07 2015 +1000
+
+    mesa/egl: Resolve GCC sign-compare warning.
+    
+    mesa/src/egl/drivers/dri2/platform_drm.c: In function 'release_buffer':
+    mesa/src/egl/drivers/dri2/platform_drm.c:73:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
+                      ^
+    mesa/src/egl/drivers/dri2/platform_drm.c: In function 'has_free_buffers':
+    mesa/src/egl/drivers/dri2/platform_drm.c:87:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++)
+                      ^
+    mesa/src/egl/drivers/dri2/platform_drm.c: In function 'dri2_drm_destroy_surface':
+    mesa/src/egl/drivers/dri2/platform_drm.c:199:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
+                      ^
+    mesa/src/egl/drivers/dri2/platform_drm.c: In function 'get_back_bo':
+    mesa/src/egl/drivers/dri2/platform_drm.c:224:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+           for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
+                         ^
+    mesa/src/egl/drivers/dri2/platform_drm.c: In function 'dri2_drm_swap_buffers':
+    mesa/src/egl/drivers/dri2/platform_drm.c:425:24: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+              for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++)
+                            ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 1d8c6949285dc59e53c6145d5b93b6549981711a
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:06 2015 +1000
+
+    mesa/gbm: Resolve GCC sign-compare warning.
+    
+    mesa/src/gbm/main/backend.c: In function 'find_backend':
+    mesa/src/gbm/main/backend.c:70:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < ARRAY_SIZE(backends); ++i) {
+                      ^
+    mesa/src/gbm/main/backend.c: In function '_gbm_create_device':
+    mesa/src/gbm/main/backend.c:95:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < ARRAY_SIZE(backends) && dev == NULL; ++i) {
+                      ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit f5bb5b957a1ca33015400dd2274c7b73cb0e40f4
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:05 2015 +1000
+
+    mesa/glx: Resolve GCC sign-compare warning.
+    
+    mesa/src/glx/dri_common_query_renderer.c: In function 'dri2_convert_glx_query_renderer_attribs':
+    mesa/src/glx/dri_common_query_renderer.c:61:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < ARRAY_SIZE(query_renderer_map); i++)
+                      ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit dc7a1effc41b1fea7a7138fae942ae57ccd63d1d
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:04 2015 +1000
+
+    mesa/glx: Resolve GCC sign-compare warning.
+    
+    mesa/src/glx/dri_common.c: In function 'scalarEqual':
+    mesa/src/glx/dri_common.c:259:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < ARRAY_SIZE(attribMap); i++)
+                      ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit f4ef8d084ce68784037d7f0b2f68d263359e0e00
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:03 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/intel_screen.c: In function 'intel_screen_make_configs':
+    mesa/src/mesa/drivers/dri/i965/intel_screen.c:1222:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (int i = 0; i < ARRAY_SIZE(formats); i++) {
+                          ^
+    mesa/src/mesa/drivers/dri/i965/intel_screen.c:1259:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (int i = 0; i < ARRAY_SIZE(formats); i++) {
+                          ^
+    mesa/src/mesa/drivers/dri/i965/intel_screen.c:1291:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (int i = 0; i < ARRAY_SIZE(formats); i++) {
+                          ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit dc2b0b2067c686b802137ad34719e35545a67e6e
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:02 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/intel_fbo.c: In function 'intel_validate_framebuffer':
+    mesa/src/mesa/drivers/dri/i965/intel_fbo.c:734:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < ARRAY_SIZE(fb->Attachment); i++) {
+                      ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 3637e1e7f69f9b344a98274e21f75220ca8674e6
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:01 2015 +1000
+
+    mesa: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/common/utils.c: In function 'driGetConfigAttrib':
+    mesa/src/mesa/drivers/dri/common/utils.c:457:19: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+         for (i = 0; i < ARRAY_SIZE(attribMap); i++)
+                       ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 65fe7c6ff112a979aa8c37bafcf54001e26e150f
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:34:00 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/intel_screen.c: In function 'aub_dump_bmp':
+    mesa/src/mesa/drivers/dri/i965/intel_screen.c:125:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
+                          ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 2722284b94c5007d738a082c8c29c29972308e88
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:33:59 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/intel_fbo.c: In function 'intel_blit_framebuffer_with_blitter':
+    mesa/src/mesa/drivers/dri/i965/intel_fbo.c:836:21: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+           for (i = 0; i < drawFb->_NumColorDrawBuffers; i++) {
+                         ^
+    V2 (Thomas Helland):
+      -Use unsigned instead of GLuint (trivial)
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 89b285ba0e3503de05e97cbb915485302e2fd37d
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:33:58 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/brw_wm_state.c: In function 'brw_color_buffer_write_enabled':
+    mesa/src/mesa/drivers/dri/i965/brw_wm_state.c:53:18: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+                      ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 3696e620f562aad5b14f01d70c68957b7fbe8a48
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Thu Aug 6 16:33:57 2015 +1000
+
+    i965: Resolve GCC sign-compare warning.
+    
+    mesa/src/mesa/drivers/dri/i965/brw_draw.c: In function 'brw_postdraw_set_buffers_need_resolve':
+    mesa/src/mesa/drivers/dri/i965/brw_draw.c:390:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
+        for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
+                          ^
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 8f7ebcb6fad53ea6d2f80fc5b7a046db07690032
+Author: Adam Jackson <ajax@redhat.com>
+Date:   Fri Jul 31 11:32:58 2015 -0400
+
+    glx: Fix __glXWireToEvent for BufferSwapComplete
+    
+    In the DRI2 path this event is magically synthesized from the
+    corresponding DRI2 event, but with Present, the server sends us the
+    event itself. The DRI2 path fills in the serial number, send_event, and
+    display fields of the XEvent struct that the app sees, but the Present
+    path did not.
+    
+    This is likely related to a class of crashes seen in gtk/clutter apps:
+    
+    https://bugzilla.redhat.com/attachment.cgi?id=1032631
+    
+    Note that the crashing instruction is looking up the lock_fns slot in
+    the Display *, and %rdi (holding the Display *) is 0x1.
+    
+    Cc: mesa-stable@lists.freedesktop.org
+    Signed-off-by: Adam Jackson <ajax@redhat.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 97f5d00648046a711b7806c3a47b28543c084f0e
+Author: Grazvydas Ignotas <notasas@gmail.com>
+Date:   Tue Aug 18 02:51:58 2015 +0300
+
+    radeon/uvd: remove unused variables
+    
+    Recent commits introduced new unused variable warnings, fix them.
+    
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit df97126731a745c1797c783414a44652be039d84
+Author: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
+Date:   Sun Aug 16 13:31:58 2015 -0300
+
+    nouveau: recognize tess stages in nouveau_compiler
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 723a5a2e6881e55b50b23c470d7591360f061dba
+Author: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
+Date:   Sun Aug 16 13:31:57 2015 -0300
+
+    tgsi: fix parsing of tessellation shader inputs/outputs
+    
+    Tessellation control shaders write to outputs as OUT[ADDR[0].x][0], make
+    sure to parse the indirect dimension on outputs.
+    
+    Also tess control inputs/outputs and tess eval input declarations need
+    to receive the same treatment as geometry shader inputs.
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit a37fa7653bead4985668c359391bdf01dec8b084
+Author: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
+Date:   Sun Aug 16 13:31:56 2015 -0300
+
+    tgsi: set implicit array size for tess stages
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 46684d3ae3a4084b00355df3feeeb25159656a8e
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Thu Aug 13 18:44:51 2015 +1000
+
+    mesa: move non-generic samples validation
+    
+    The previous patch replaces the other use case.
+    
+    V2: remove the validation from it old location.
+    
+    Cc: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit d3ace603a97bdd031bdff7517728eff4d0fd6458
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Mon Aug 17 21:33:49 2015 +1000
+
+    mesa: check samples > 0 for glTex*Multisample
+    
+    The GL 4.5 spec says its an GL_INVALID_VALUE error if samples equals 0 for
+    glTexImage*Multisample and an GL_INVALID_VALUE error if samples < 1 for
+    glTexStorage*Multisample.
+    
+    The spec says its undefined what happens if glTexImage*Multisample is passed
+    a samples value < 0 but we currently already produced a GL_INVALID_VALUE error
+    in this case, this is also consistent with the Nvidia binary.
+    
+    Cc: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit 2450cbfcbc3671056afad9e858acadbb6edea068
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Aug 12 11:35:17 2015 -0700
+
+    i965/vec4/nir: Emit single MOV to generate a scalar constant.
+    
+    If an immediate is written to multiple channels, we can load it in a
+    single writemasked MOV.
+    
+    total instructions in shared programs: 6285144 -> 6261991 (-0.37%)
+    instructions in affected programs:     718991 -> 695838 (-3.22%)
+    helped:                                5762
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 5af71fb5ac4dafed3ecc4b849922208cab76070f
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Dec 3 03:29:38 2014 -0500
+
+    freedreno/a3xx: add s3tc texture format support
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 581cbfdec1eda9128a9ed7ff9b0ea13ec4d3e493
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Aug 16 13:15:55 2015 -0400
+
+    freedreno/a3xx: fix up logic for handling block formats
+    
+    This only appears in cubemaps which have have packed layers, so are very
+    sensitive to any layout disagreement between sw and hw.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 12e1bf0b68b3c0e8dccb74384554f56d22eda961
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Aug 16 15:26:38 2015 -0400
+
+    freedreno/a3xx: double the polygon offset value
+    
+    A few other drivers do this, fixes the gl-1.4-polygon-offset piglit test
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 1af0641db345209c076e9b1ba4dca7524541671a
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Aug 17 04:08:01 2015 -0400
+
+    nvc0: implement the color buffer 0 is integer rule for alpha-to-one/cov
+    
+    The hardware checks for multisampling being enabled, but does not have
+    the rule about cbuf0 being an integer format. Only enable
+    alpha-to-one/alpha-to-coverage if cbuf0 is not an integer format.
+    
+    Fixes piglits
+      ext_framebuffer_multisample-int-draw-buffers-alpha-to-one
+      ext_framebuffer_multisample-int-draw-buffers-alpha-to-coverage
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 19a5a91ea49bd411f4d438d416000d49ecc2de7e
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Thu Aug 13 14:38:14 2015 +0200
+
+    mesa: Raise INVALID_VALUE from glCreateShaderProgramv if count < 0
+    
+    According to OpenGL version 4.5 and OpenGL ES 3.1 standards, section 7.3:
+    GL_INVALID_VALUE should be generated, if count is less than 0.
+    
+    V2: Changed title, eased Open GL ES 3.1 restriction and added comments.
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit dd9d2963d66d24394b20823fcffb809cc8d5389d
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Thu Aug 13 12:59:40 2015 +0200
+
+    mesa: AtomicBufferBindings should be initialized to zero.
+    
+    According to OpenGL specification version 4.5 table 23.46
+    and OpenGL ES specification version 3.1 table 20.31:
+    ATOMIC_COUNTER_BUFFER_START and ATOMIC_COUNTER_BUFFER_SIZE
+    should have the initial value of zero.
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Erik Faye-Lund <kusmabite@gmail.com>
+
+commit a7e6f8cc9f72fcf4eaef814eb50d53ccffd58730
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Thu Aug 13 17:03:44 2015 +0300
+
+    mesa: fix target error checking in glGetTexLevelParameter
+    
+    With non-dsa functions we need to do target error checking before
+    _mesa_get_current_tex_object which would just call _mesa_problem without
+    raising GL_INVALID_ENUM error. In other places of Mesa, target gets checked
+    before this call.
+    
+    Fixes failures in:
+       ES31-CTS.texture_storage_multisample.APIGLGetTexLevelParameterifv.*
+    
+    v2: do the target check also for dsa functions (Timothy)
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit 2f5ee9bf27b912726dea969a5e5159e1d6665f6c
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Aug 17 02:11:55 2015 -0400
+
+    gk110/ir: fix sched calculator to consider all registers in the ISA
+    
+    GK110/GK208 have 256 registers, not 64. Find out the number of registers
+    from the target to avoid unnecessary iteration for pre-GK110.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit e32325fc85f0d1485dec63b363c18d76f4e89714
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Sat Aug 15 13:29:19 2015 +1000
+
+    mesa: rename texture function now that its static
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit a0cea8f642688f9a51ee5cb96a3963372d14f14e
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Fri Aug 14 13:36:40 2015 +0300
+
+    glsl: add missing MS sampler builtin types for GLSL ES 3.10
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit f67dde0b0546779e422133f8b896a9b3669320c7
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Fri Aug 14 13:30:45 2015 +0200
+
+    mesa: Implement glMemoryBarrierByRegion
+    
+    The function glMemoryBarrierByRegion is part of OpenGL ES 3.1
+    and OpenGL 4.5 core and compatibility profiles.
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit ae5cf4f3f7525c49d1cd012697e8e30db86a8890
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Aug 17 00:53:04 2015 -0400
+
+    nvc0: program smooth line width when multisampling is enabled
+    
+    There are separate line widths for smooth and aliased lines. The smooth
+    one is selected when multisampling is enabled even if line smoothing
+    isn't explicitly turned on.
+    
+    Fixes the ext_framebuffer_multisample-line-smooth piglits
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 884b4df3b6f3980bb75f20fd256f9e2cca4d9403
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Jul 26 00:56:34 2015 -0400
+
+    nvc0: bind a fake tess control program when there isn't one available
+    
+    Apparently this is necessary in order for tess factors to work in a tess
+    eval program without a tess control program bound. Probably because it
+    uses the fake program's shader header to work out the number of patch
+    constants.
+    
+    Fixes vs-tes-tessinner-tessouter-inputs
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit f13073b7755e78306975a24f3286ff5a9c910a47
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Jul 24 01:25:13 2015 -0400
+
+    gm107/ir: avoid letting the lowering pass get out of sync
+    
+    There's a lot of functionality duplicated in the gm107 lowering pass
+    from the nvc0 pass. As that one gets updated, the gm107 one falls
+    behind. Avoid this by sharing the code.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 2514c78fba507ca8ab94d2e6de553b8b20d653d2
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Aug 15 22:05:15 2015 -0400
+
+    nv50,nvc0: take level into account when doing eng2d multi-layer blits
+    
+    This fixes arb_get_texture_sub_image-get, and any situation where the 2d
+    engine was being used for multi-layer blits to a non-0 level.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit bb9881fccf4c3606e66ec607dfb0387c9e068e3d
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Aug 15 21:42:02 2015 -0400
+
+    st/mesa: also move yoffset to zoffset for 1d array textures
+    
+    Do the same as in st_TexSubImage. This fixes
+    arb_get_texture_sub_image-get on llvmpipe when it is set to prefer
+    blits, and nouveau when it uses the 3d engine for blits.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit afccbd725655e6e607029a8b359886f62e4aa807
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Tue May 12 01:12:46 2015 -0700
+
+    nir: Add a glsl_uint_type() wrapper.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit ca628085b6753c1dc5c9bd59460b5fa8fe0b34f2
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Aug 7 23:11:45 2015 -0400
+
+    freedreno/a3xx: add per-texture seamless cubemap control
+    
+    The default is to enable seamless cubemap filtering, but there's a bit
+    to turn it off.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit b4ace13eeae7ec58262d8a3ec38adca63b6add76
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Aug 3 02:13:33 2015 -0400
+
+    freedreno/a4xx: add cube map array support
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 868b66fce7a156efda840c00088f89f4ba6163c9
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Aug 13 13:53:43 2015 -0400
+
+    freedreno/a4xx: fix srgb render targets
+    
+    Also fixes mipmap level generation for srgb textures.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit dd412c8fcb1c44846419523bb0e3956f38f578b5
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Aug 13 20:38:01 2015 -0400
+
+    freedreno: update generated headers
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit ee113bbbc51f7c19da5c873410fadabfdd4d4a6d
+Author: Vinson Lee <vlee@freedesktop.org>
+Date:   Fri Aug 14 15:19:49 2015 -0700
+
+    scons: Always define __STDC_LIMIT_MACROS.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91591
+    Signed-off-by: Vinson Lee <vlee@freedesktop.org>
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit d19a98e2e63293e1dfd6947c18356e5bbefd2410
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Aug 14 10:49:46 2015 -0400
+
+    freedreno: expose OES exts for float linear filtering
+    
+    a4xx can do both float and half-float, while a3xx can only do half-float
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit d3e23f1ff915c01541f8df375b50b93b3da565a8
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Aug 14 15:58:28 2015 -0400
+
+    nvc0: disable tessellation on maxwell
+    
+    The address calculations are all different (e.g. see GP), there appear
+    to be sync's in programs, and probably a bunch of other differences.
+    Just disable it for now.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit a6e75e3cd74fd60200cc8dddc672a2d88495eb06
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Feb 6 16:24:36 2015 -0800
+
+    nir: Add support for CSE on textures.
+    
+    NIR instruction count results on i965:
+    total instructions in shared programs: 1261954 -> 1261937 (-0.00%)
+    instructions in affected programs:     455 -> 438 (-3.74%)
+    
+    One in yofrankie, two in tropics.  Apparently i965 had also optimized all
+    of these out anyway.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit fb2425a641dd7f891964e6f51b10cce63dff7d2c
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Aug 11 17:10:35 2015 -0700
+
+    nir: Zero out texture instructions when creating them.
+    
+    There are so many flags in textures, that the CSE pass would have a hard
+    time referencing the correct set when figuring out if two texture ops are
+    the same.  By zeroing, we can avoid that fragility.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit bf3c50fba221f216e38d3f60f89161ced4c684c0
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Apr 13 21:36:24 2015 -0700
+
+    vc4: Move all of our fixed function fragment color handling to NIR.
+    
+    This massively reduces our dependency on VC4-specific optimization passes.
+    
+    shader-db:
+    total uniforms in shared programs: 32077 -> 32067 (-0.03%)
+    uniforms in affected programs:     149 -> 139 (-6.71%)
+    total instructions in shared programs: 98208 -> 98182 (-0.03%)
+    instructions in affected programs:     2154 -> 2128 (-1.21%)
+
+commit 38c6c0f5b499e2bcff2cc9607f67c0f1836f305b
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 31 09:02:01 2015 -0700
+
+    vc4: Add a helper for making driver-specific NIR load_uniform for GL state
+    
+    In order to move more of our lowering into NIR, we need the ability to
+    reference various pipeline state (like texture rectangle scaling factors
+    or blend colors), so we just set those up as a load_uniform with a big
+    offset to indicate that it's not within the shader's uniform storage and
+    is one of our state values.
+
+commit d50c182671a6aa315c83b5e203655e902996c0e7
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 31 15:35:22 2015 -0700
+
+    nir: Don't try to scalarize unpack ops.
+    
+    Avoids regressions in vc4 when trying to do our blending in NIR.
+    
+    v2: Add the other unpack ops I meant to when writing the original commit
+        message.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 9e6dc5b64d5e931c7ebc272096eccab102b75d76
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Aug 4 16:25:24 2015 -0700
+
+    nir: Add a nir_opt_undef() to handle csels with undef.
+    
+    We may find a cause to do more undef optimization in the future, but for
+    now this fixes up things after if flattening.  vc4 was handling this
+    internally most of the time, but a GLB2.7 shader that did a conditional
+    discard and assign gl_FragColor in the else was still emitting some extra
+    code.
+    
+    total instructions in shared programs: 100809 -> 100795 (-0.01%)
+    instructions in affected programs:     37 -> 23 (-37.84%)
+    
+    v2: Use nir_instr_rewrite_src() to update def/use on src[0] (by Thomas
+        Helland).
+    v3: Make sure to flag metadata dirties, and copy the swizzle and abs/neg
+        over to src[0], too (by anholt).
+    
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com> (v2)
+    Tested-by: Thomas Helland <thomashelland90@gmail.com> (v2)
+
+commit b346a84e270a50f0a8f1a6e474a51da04dd72f0e
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Aug 14 14:10:36 2015 -0400
+
+    gm107/ir: indirect handle goes first on maxwell also
+    
+    Fixes fs-simple-texture-size.shader_test
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 7ff7d5d799a54f2b08a3019df7fd531501174182
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Tue Aug 11 12:19:54 2015 -0400
+
+    nv30: add depth bounds test support for hw that has it
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit a6bf20d153f06639e1ae7d52d37ace9df440354d
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Tue Aug 11 11:59:56 2015 -0400
+
+    nv50: add depth bounds test support
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit d4087265f656c1998e20cbe2c9b6beaff6762b76
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Tue Aug 11 11:46:22 2015 -0400
+
+    nvc0: add depth bounds test support
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit a90aa54fde37cbdf162bf909a9e895b764eb41ea
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Aug 13 23:46:13 2015 +0200
+
+    docs/relnotes: document amdgpu, GL 4.1 and other new features
+
+commit 7bfb9ee5ee0551ef2c2056e7fe2e63e35c629e3c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Apr 16 22:59:41 2015 +0200
+
+    radeonsi: add all new VI PCI IDs including Fiji
+
+commit f47c59322e614d6304091207fc81cfa5beba6ea9
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Aug 10 16:23:53 2015 +0200
+
+    radeonsi: revert a wrong DB bug workaround for VI
+    
+    The bug was misunderstood. Besides that, the bug affects a DB feature we
+    don't use yet.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 839bf82606ae9c7b1c7d8d5055ab5e3cadae9bf9
+Author: Boyuan Zhang <boyuan.zhang@amd.com>
+Date:   Wed Jul 8 16:54:48 2015 -0400
+
+    radeon/uvd: implement HEVC support
+    
+    add context buffer to fix H265 uvd decode issue.
+    fix H265 corruption issue caused by incorrect assigned ref_pic_list.
+    
+    v2: disable interlace for HEVC
+        add CZ sps flag workaround
+        fix coding style
+    
+    Signed-off-by: Christian KÃ¶nig <christian.koenig@amd.com>
+    Signed-off-by: Boyuan Zhang <boyuan.zhang@amd.com>
+    Reviewed-by: Leo Liu <leo.liu@amd.com>
+
+commit 0654a9ca17c17fe140f70d126c878a0ce4736b76
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Mon Jul 13 13:36:27 2015 -0400
+
+    radeon/vce: disable VCE dual instance for harvest part
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit 09def7e1e06827ab1eae091f0e765d91c6715cf9
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Thu Jun 25 10:14:14 2015 -0400
+
+    radeon/vce: implement VCE dual instance support
+    
+    VCE dual instances are encoding in parallel, it needs two frames for
+    encoding with their own parameters in one IB. Master instance will check
+    the task info to find another frame, assign it to the slave instance
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Signed-off-by: Christian König <christian.koenig@amd.com>
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 4dfcf6e3a91be97fcf9d3f44e76a7a389f8f40b2
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Thu Jun 25 12:12:12 2015 -0400
+
+    radeon/video: config encode stacked frame number based on HW
+    
+    since VCE 3.0 with dual instances, we need stack frames for them.
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit 42bc4e6be434b398d9edaff0ed10dfb5bf89b6a6
+Author: Christian König <christian.koenig@amd.com>
+Date:   Mon Jun 15 20:19:48 2015 +0200
+
+    radeon/vce: make reloc offset signed
+    
+    We need a negative offset for FW 50.
+    
+    Signed-off-by: Christian König <christian.koenig@amd.com>
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 57fabe9f3a21a2a370284575833637d37e987cb5
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Mon Jun 1 13:48:24 2015 -0400
+
+    radeon/vce: add config task and put task info into encoder v2
+    
+    The config task has own task ID, extract the configuration functions
+    into config task.
+    
+    v2 (chk): calculate offset automatically
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Signed-off-by: Christian König <christian.koenig@amd.com>
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit e91a67abfa5112acd481ee4a3f07c03f6ff2708c
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Mon Jun 15 15:20:20 2015 -0400
+
+    radeon/vce: fix VCE fail after rebase
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit fa80c1fe20f1fc33864f04fd9cf49f8bddfa4448
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Mon Jun 15 14:11:57 2015 -0400
+
+    radeon/vce: add dual pipe support for VI
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit 468fcdcb4fafeba466bb1006ece1f16cc38805c7
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Fri May 29 13:43:00 2015 -0400
+
+    radeon/vce: add new firmware support for VI and CI
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit 1550790b3fab901c697e9d8e5b01ea67d8843e99
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Wed Apr 15 12:36:32 2015 -0400
+
+    radeon/vce: implement VCE two pipe support
+    
+    v2: rebase by Marek
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit 22f71dbf7976d1803940bc2a0429c3d302dae9fa
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Thu Mar 12 16:24:57 2015 -0400
+
+    radeon/uvd: make 30M as minimum for MPEG4 dpb buffer size
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit baecc518c9adcd073e725268421a049dd610d22f
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Thu Mar 12 16:13:44 2015 -0400
+
+    radeon/uvd: recalculate dbp buffer size
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit c29f0d4722832a9d284aba899875955e60a41c03
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Mon Mar 9 16:24:48 2015 -0400
+
+    radeon/video: add 4K support for decode/encode parameters
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 261ed775475db8d328a772fc4ff151d63969c84a
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Mon Dec 15 12:51:50 2014 -0500
+
+    gallium/radeon: add h264 performance HW decoder support
+    
+    v2: -make tonga use new h264 performance HW decoder;
+        -integrate it scaling buffer to msg_fb buffer
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 67586c4b40881940535658c3c89b5b1a42f94027
+Author: Christian König <christian.koenig@amd.com>
+Date:   Thu Apr 10 17:18:32 2014 +0200
+
+    gallium/radeon: use VM for VCE
+    
+    v2: (leo) add checking for driver backend
+    v3: (leo) change variable name from use_amdgpu to use_vm
+    v4: rebase by Marek
+    
+    Signed-off-by: Christian König <christian.koenig@amd.com>
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 0248c13a8b1e10e2c8c8d614473c701239627a71
+Author: Christian König <christian.koenig@amd.com>
+Date:   Wed Apr 9 19:41:06 2014 +0200
+
+    gallium/radeon: use VM for UVD
+    
+    v2: (leo) add checking for driver backend
+    v3: (leo) change variable name from use_amdgpu to use_vm
+    v4: rebase by Marek
+    
+    Signed-off-by: Christian König <christian.koenig@amd.com>
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 767ad50a10d01274b1d1a877add12b5552ba6984
+Author: Alex Deucher <alexander.deucher@amd.com>
+Date:   Wed Jul 29 15:40:46 2015 -0400
+
+    radeonsi: add support for FIJI (v4)
+    
+    v2: incorporate comments from Marek
+    v3: add missing fiji case in winsys init
+        use tonga raster config (double check this)
+    v4: rebase on harvest patch
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com> (v3)
+    Reviewed-by: Christian König <christian.koenig@amd.com> (v3)
+    Reviewed-by: David Zhang <david1.zhang@amd.com> (v3)
+    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 528a6ff5992e6710921d6e4157a8a51884bc277f
+Author: Alex Deucher <alexander.deucher@amd.com>
+Date:   Tue Jul 7 22:18:13 2015 -0400
+
+    winsys/amdgpu: add addrlib support for Fiji (v2)
+    
+    v2: fix tonga chip check
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+    Reviewed-by: David Zhang <david1.zhang@amd.com>
+    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit d69686f1d375c3a65a4398f69da843e833987b0e
+Author: Alex Deucher <alexander.deucher@amd.com>
+Date:   Wed Jul 8 22:19:55 2015 -0400
+
+    radeonsi: add harvest support for CI/VI parts (v3)
+    
+    Properly calculate the PA_SC_RASTER_CONFIG[_1] settings
+    for harvest chips.
+    
+    v2: - fix default raster config settings for CZ and KV
+        - Suggestions from Michel
+    v3: - handle multiple packers properly for CI+
+        - GRBM_GFX_INDEX is privileged on VI+
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com> (v2)
+    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 933d24b1768d769f1847a023ea3c70b6c9723e33
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jun 27 13:57:25 2015 +0200
+
+    gallium/radeon: enable the GPU load query for amdgpu
+    
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit f0e24a7beae57f24501fa9d3b6b947fc20ca23bb
+Author: Alex Deucher <alexander.deucher@amd.com>
+Date:   Wed Jun 10 11:43:24 2015 -0400
+
+    radeonsi: properly handler raster_config setup on CZ
+    
+    Need to take into account the number of RBs.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 649975e7162cc4ee0586ee76d24321cd7250581f
+Author: Alex Deucher <alexander.deucher@amd.com>
+Date:   Wed Jun 10 11:39:30 2015 -0400
+
+    radeonsi: properly set the raster_config for KV
+    
+    This enables the second RB on asics that support it which
+    should boost performance.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit bf2c3422d7c12bdead944c3de8b37b809f4cbcbb
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Apr 30 17:02:38 2015 +0200
+
+    radeonsi: add amdgpu support for querying the GPU reset state
+    
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit 2d1952e2a5abd273983374b420371d263388bb20
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Apr 16 20:44:54 2015 +0200
+
+    radeonsi: add VI hardware support
+
+commit 943a4b5e963a3bbeb3a0a39d0123e359fdf3ec07
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 11 13:22:22 2015 +0200
+
+    radeonsi: add definitions for VI status registers
+    
+    Useful for debugging hangs with the read-register interface.
+    I checked that this adds the same register fields as the kernel driver.
+    
+    Acked-by: Michel Dänzer <michel.daenzer@amd.com>
+    Acked-by: Christian König <christian.koenig@amd.com>
+
+commit 8f49f6ed19ba4ee6a26c77786dcbc151c6615d48
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Apr 16 20:12:24 2015 +0200
+
+    radeonsi: add VI register definitions
+
+commit 8ba70e0a7405005c079eb72f94999245c992aa91
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Apr 16 20:35:27 2015 +0200
+
+    radeonsi: fix DRM version checks for amdgpu DRM 3.0.0
+
+commit e7fc664b91a5d886c2709d05a498f6a1dfbaf136
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Apr 16 19:41:33 2015 +0200
+
+    winsys/amdgpu: add addrlib - texture addressing and alignment calculator
+    
+    This is an internal project that Catalyst uses and now open source will do
+    too.
+    
+    v2: squashed these commits in:
+        - winsys/amdgpu: fix warnings in addrlib
+        - winsys/amdgpu: set PIPE_CONFIG and NUM_BANKS in tiling_flags
+
+commit 2eb067db0febcd71b4182153155e3e43f215624c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Apr 16 22:43:23 2015 +0200
+
+    winsys/amdgpu: add a new winsys for the new kernel driver
+    
+    v2: - lots of changes according to Emil Velikov's comments
+        - implemented radeon_winsys::read_registers
+    
+    v3: - a lot of new work, many of them adapt to libdrm interface changes
+    Squashed patches:
+    winsys/amdgpu: implement radeon_winsys context support
+    winsys/amdgpu: add reference counting for contexts
+    winsys/amdgpu: add userptr support
+    winsys/amdgpu: allocate IBs like normal buffers
+    winsys/amdgpu: add IBs to the buffer list, adapt to interface changes
+    winsys/amdgpu: don't use KMS handles as reloc hash keys
+    winsys/amdgpu: sync buffer accesses to different rings
+    winsys/amdgpu: use dependencies instead of waiting for last fence v2
+    gallium/radeon: unify buffer_wait and buffer_is_busy in the winsys interface (amdgpu part)
+    winsys/amdgpu: track fences per ring and be thread-safe
+    winsys/amdgpu: simplify waiting on a variable in amdgpu_fence_wait
+    gallium/radeon: allow the winsys to choose the IB size (amdgpu part)
+    winsys/amdgpu: switch to new amdgpu_cs_query_fence_status interface
+    winsys/amdgpu: handle fence and dependencies merge
+    winsys/amdgpu follow libdrm change to move user fence into UMD
+    winsys/amdgpu: use amdgpu_bo_va_op for va map/unmap v2
+    winsys/amdgpu: use the new tiling flags
+    winsys/amdgpu: switch to new GTT_USWC definition
+    winsys/amdgpu: expose amdgpu_cs_query_reset_state to drivers
+    winsys/amdgpu: fix valgrind warnings
+    winsys/amdgpu: don't use VRAM with APUs that don't have much of it
+    winsys/amdgpu: require LLVM 3.6.1 for VI because of bug fixes there
+    winsys/amdgpu: remove amdgpu_winsys::num_cpus
+    winsys/amdgpu: align BO size to page size
+    winsys/amdgpu: reduce BO cache timeout
+    winsys/amdgpu: remove useless flushing and waiting in amdgpu_bo_set_tiling
+    winsys/amdgpu: use amdgpu_device_handle as a unique device ID instead of fd
+    winsys/amdgpu: use safer access to amdgpu_fence_wait::signalled
+    winsys/amdgpu: allow maximum IB size of 4 MB
+    winsys/amdgpu: add ip_instance into amdgpu_fence
+    gallium/radeon: add RING_COMPUTE instead of RADEON_FLUSH_COMPUTE
+    winsys/amdgpu: set the ring type at CS initilization
+    winsys/amdgpu: query the GART page size from the kernel
+    winsys/amdgpu: correctly wait for shared buffers to become idle
+    winsys/amdgpu: set the amdgpu_cs_fence structure only once at fence creation
+    winsys/amdgpu: add a specific error message for cs_submit -> -ENOMEM
+    winsys/amdgpu: check num_active_ioctls before calling amdgpu_bo_wait_for_idle
+    winsys/amdgpu: clear user fence BO after allocating it
+    winsys/amdgpu: fix user fences
+    winsys/amdgpu: make amdgpu_winsys_create public
+    winsys/amdgpu: remove thread offloading
+    winsys/amdgpu: flatten the amdgpu_cs_context structure and simplify more
+    
+    v4: require libdrm 2.4.63
+
+commit 5609a6986f3eb3c452d66d373b6081df5c6fb34c
+Author: Christian König <christian.koenig@amd.com>
+Date:   Wed Apr 29 15:35:02 2015 +0200
+
+    st/vdpau: add HEVC support v2
+    
+    v2: fix return code
+    
+    Signed-off-by: Christian KÃ¶nig <christian.koenig@amd.com>
+    Reviewed-by: Leo Liu <leo.liu@amd.com>
+
+commit 5581f9f28aeaef63bc1495febb402435ddfde556
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Thu Jun 25 13:19:56 2015 -0400
+
+    st/omx/enc: stack frame tasks for the gathering
+    
+    Put tasks to the FIFO queue for results
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit 0729c251bbff8375ab5d24b80cfc2f8becd6afff
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Fri May 29 14:50:44 2015 -0400
+
+    st/omx/enc: flush after eos handling v2
+    
+    v2 (chk): reorder the flush
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Signed-off-by: Christian König <christian.koenig@amd.com>
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit facba49d839b01da139261e587a05c744cc9a1fa
+Author: Christian König <christian.koenig@amd.com>
+Date:   Tue Apr 28 15:31:37 2015 +0200
+
+    vl: add HEVC profiles and defines
+    
+    Signed-off-by: Christian KÃ¶nig <christian.koenig@amd.com>
+    Reviewed-by: Leo Liu <leo.liu@amd.com>
+
+commit 716a67da12be0656a6dae2a448175946aaf57377
+Author: Leo Liu <leo.liu@amd.com>
+Date:   Thu Jun 25 12:09:11 2015 -0400
+
+    vl: add cap for stacking frames
+    
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit 973988ab8dd4d04b925a5859d1da0801e858a6fe
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Aug 13 01:51:37 2015 +0200
+
+    swrast: fix EXT_depth_bounds_test
+    
+    zMin and zMax can't use _DepthMaxF, because the test is done in Z32_UNORM.
+    
+    Probably a useless patch given how popular swrast is nowadays, but it helped
+    create and validate the piglit test.
+    
+    v2: add an explicit cast to GLuint
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 97f58fb59a45f04c9d03709063a081f572509f51
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Aug 10 02:23:21 2015 +0200
+
+    radeonsi: add support for EXT_depth_bounds_test
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 36a6f848bb03828aa9c4dc28774acf09055f2831
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Aug 10 02:18:43 2015 +0200
+
+    st/mesa: add EXT_depth_bounds_test
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 3b7800e75089d4dc8ed9b2a0ce994760c167b93a
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Aug 10 02:11:48 2015 +0200
+
+    gallium: add an interface for EXT_depth_bounds_test
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 2ebb8efa08b4ea290b8a2bb9aa2e3784b8272d87
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Aug 10 19:53:22 2015 +0200
+
+    st/mesa: small cleanup in st_extensions.c
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Edward O'Callaghan <eocallaghan at alterapraxis.com>
+
+commit 44dc1d307d7eacef0d6f1618ba0fb7f62e08f896
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Aug 10 19:37:01 2015 +0200
+
+    gallium: add support for GLES texture float extensions (v3)
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=74329
+    
+    v2: add a CAP for half floats
+        drivers should not expose the CAPs if they don't support the formats
+    
+    v3: update relnotes
+    
+    Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit 78493c33183bf2a4b8be0e58963162ef2e3aa54a
+Author: Zoltan Gilian <zoltan.gilian@gmail.com>
+Date:   Tue Jul 7 23:38:27 2015 +0200
+
+    r600,compute: setup compute sampler states and views
+    
+    v2: Add compute mode flag to sampler state setup (Marek).
+        Drop branches which avoid reference counting (Marek).
+        Simplify unset branch condition (Marek).
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 7f4ad692a10bf0f247dedd4968b7ffe9b07d2af2
+Author: Michel Dänzer <michel.daenzer@amd.com>
+Date:   Fri Aug 14 15:16:12 2015 +0900
+
+    st/clover: Fix build against LLVM 3.8 SVN r244928
+    
+    raw_svector_ostream::flush() is now unnecessary and forbidden:
+    
+      CXX      llvm/libclllvm_la-invocation.lo
+    ../../../../../src/gallium/state_trackers/clover/llvm/invocation.cpp: In function 'clover::module {anonymous}::build_module_llvm(llvm::Module*, unsigned int (&)[7])':
+    ../../../../../src/gallium/state_trackers/clover/llvm/invocation.cpp:574:29: error: use of deleted function 'void llvm::raw_svector_ostream::flush()'
+           bitcode_ostream.flush();
+                                 ^
+    In file included from /home/daenzer/src/llvm-git/llvm/include/clang/Basic/VirtualFileSystem.h:22:0,
+                     from /home/daenzer/src/llvm-git/llvm/include/clang/Basic/FileManager.h:20,
+                     from /home/daenzer/src/llvm-git/llvm/include/clang/Basic/SourceManager.h:38,
+                     from /home/daenzer/src/llvm-git/llvm/include/clang/Frontend/CompilerInstance.h:16,
+                     from ../../../../../src/gallium/state_trackers/clover/llvm/invocation.cpp:25:
+    /home/daenzer/src/llvm-git/llvm/include/llvm/Support/raw_ostream.h:512:8: note: declared here
+       void flush() = delete;
+            ^
+    Makefile:862: recipe for target 'llvm/libclllvm_la-invocation.lo' failed
+    
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 7a144aaf64e5cfff5aa53d7fd340c91762e51aa5
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Thu Aug 13 09:30:35 2015 +0300
+
+    mesa: set correct error for non-renderable multisample textures
+    
+    v2: same common error on gles31 and desktop OpenGL
+        (spotted by Erik Faye-Lund)
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@linux.intel.com>
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit 28ed1e08e8ba98ebd4ff0b56326372f0df9c73ad
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Fri Aug 7 13:58:37 2015 -0700
+
+    i965/skl: Remove early platform support
+    
+    We do not want bug reports from this early stepping of SKL. Few if any were ever
+    shipped outside of Intel to early enabling partners, and none will be sold.
+    
+    There is a functional change here. If you're using new mesa on an old
+    kernel/libdrm, the revid will be -1, and we'll use new SKL values instead of
+    early ones (a hopefully irrelevant improvement IMO).
+    
+    v2: Remove hunk which warned before dying. Instead, default to normal SKL
+    support (Ken)
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Neil Roberts <neil@linux.intel.com>
+
+commit d9603be038b6d30f17ca7c05e60cc78100a625ac
+Author: Frank Binns <frank.binns@imgtec.com>
+Date:   Wed Aug 12 16:36:00 2015 +0100
+
+    egl: improve attribute checking for eglCreateContext
+    
+    The EGL 1.4 spec states for eglCreateContext:
+    
+    	"attribute EGL_CONTEXT_CLIENT_VERSION is only valid when the current
+    	 rendering API is EGL_OPENGL_ES_API"
+    
+    Additionally, if the EGL_KHR_create_context EGL extension is supported
+    (this is mandatory in EGL 1.5) then the EGL_CONTEXT_MAJOR_VERSION_KHR,
+    which is an alias for EGL_CONTEXT_CLIENT_VERSION, and
+    EGL_CONTEXT_MINOR_VERSION_KHR attributes are also accepted by
+    eglCreateContext with the extension spec stating:
+    
+    	"The values for attributes EGL_CONTEXT_MAJOR_VERSION_KHR and
+    	 EGL_CONTEXT_MINOR_VERSION_KHR specify the requested client API
+    	 version. They are only meaningful for OpenGL and OpenGL ES
+    	 contexts, and specifying them for other types of contexts will
+    	 generate an error."
+    
+    Add the necessary checks against the extension and rendering APIs when
+    validating these attributes as part of eglCreateContext.
+    
+    Signed-off-by: Frank Binns <frank.binns@imgtec.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    [Emil Velikov: Add newline before the spec quote (Matt)]
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 21b2c6fd5ea5ec2a810945c3c61b14d93a53991d
+Author: Frank Binns <frank.binns@imgtec.com>
+Date:   Wed Aug 12 16:35:59 2015 +0100
+
+    egl: don't allow eglGetConfigs to set num_configs param to a negative value
+    
+    When a buffer is provided to eglGetConfigs it's supposed to set the value
+    of the num_config parameter to the total number of configs that have been
+    copied into this buffer. For some reason the EGL spec doesn't consider it
+    to be an error to pass this function a buffer while specifying its size to
+    be less than 0. Given this, one would expect this combination to result in
+    the num_config parameter being set to 0 but this wasn't the case. This was
+    due to the buffer size being copied straight into num_configs without being
+    clamped to 0.
+    
+    This was causing the following dEQP EGL test to fail:
+    dEQP-EGL.functional.query_config.get_configs.get_configs_bounds
+    
+    Signed-off-by: Frank Binns <frank.binns@imgtec.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 9a4eae61c24858d69d731d63b141d2acaed40d69
+Author: Frank Binns <frank.binns@imgtec.com>
+Date:   Tue Aug 4 14:32:45 2015 +0100
+
+    egl/x11: don't abort when creating a DRI2 drawable fails
+    
+    When calling either eglCreateWindowSurface or eglCreatePixmapSurface it
+    was possible for an application to be aborted as a result of it failing
+    to create a DRI2 drawable on the server. This could happen due to an
+    application passing in an invalid native drawable handle, for example.
+    
+    v2: Handle the case where an error has been set on the connection
+    
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Frank Binns <frank.binns@imgtec.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 3b491cbc42f6cfad2e750957f720b15b95278acf
+Author: Frank Binns <frank.binns@imgtec.com>
+Date:   Tue Aug 4 14:32:44 2015 +0100
+
+    egl/x11: set EGL_BAD_NATIVE_(PIXMAP|WINDOW) for invalid pixmaps/windows
+    
+    Both eglCreatePixmapSurface and eglCreateWindowSurface were incorrectly
+    setting the EGL error to be EGL_BAD_ALLOC when an invalid native drawable
+    handle was being passed in. The EGL spec states the following for
+    eglCreatePixmapSurface:
+    
+    	"If pixmap is not a valid native pixmap handle, then an EGL_BAD_-
+    	 NATIVE_PIXMAP error should be generated."
+    
+    (eglCreateWindowSurface has similar text)
+    
+    Correctly set the EGL error value based on xcb_get_geometry_reply returning
+    an error structure containing something other than BadAlloc.
+    
+    v2: Check for BadAlloc error and update commit message to reflect this
+    
+    Signed-off-by: Frank Binns <frank.binns@imgtec.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 2900e8ca9077d20c5b29bb5a4171ac59ea9d1767
+Author: Frank Binns <frank.binns@imgtec.com>
+Date:   Tue Aug 4 14:32:43 2015 +0100
+
+    egl/x11: fix use of EGL_BAD_NATIVE_WINDOW
+    
+    Commit 4ed23fd590 introduced some calls to _eglError inappropriately
+    passing it EGL_BAD_NATIVE_WINDOW. This was actually harmless in two of the
+    cases as _eglError gets called later on with a more appropriate error code
+    but (just to be safe) switch these to _eglLog calls instead.
+    
+    The final case is a little trickier as it actually needs to set an error
+    of which the following are available (according to the EGL spec):
+    EGL_BAD_MATCH, EGL_BAD_CONFIG, EGL_BAD_NATIVE_(PIXMAP|WINDOW) and
+    EGL_BAD_ALLOC.
+    
+    Of these, EGL_BAD_ALLOC seems to be the most appropriate given that
+    failure can occur either as a result of xcb_get_setup failing due to an
+    earlier error on the connection (where the most commonly occurring error
+    code is XCB_CONN_CLOSED_MEM_INSUFFICIENT) or as a result of the
+    xcb_screen_iterator_t 'rem' field being 0.
+    
+    In addition to this, commit af2aea40d2 unconditionally set the error to
+    EGL_BAD_NATIVE_WINDOW when creating a window or pixmap surface with a NULL
+    native handle. Change this to correctly set the error based on surface
+    type.
+    
+    v2: Updated patch description (Emil Velikov)
+        Return EGL_BAD_NATIVE_PIXMAP when eglCreatePixmapSurface is called
+        with a NULL native pixmap handle
+    
+    Signed-off-by: Frank Binns <frank.binns@imgtec.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 8dffa89e013b611cdafbb2cc5216450fa248cb7c
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Thu Aug 13 18:42:54 2015 +1000
+
+    mesa: remove extern from texture function
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit b8f63b3c1002eea9cc6d54191bd41ea43c467e96
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Wed Aug 12 17:01:52 2015 +1000
+
+    glsl: make linker error message more informative
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 1bba29ed403e735ba0bf04ed8aa2e571884fcaaf
+Author: Topi Pohjolainen <topi.pohjolainen@intel.com>
+Date:   Thu Jun 25 14:00:41 2015 +0300
+
+    i965: Stop aux data compare preventing program binary re-use
+    
+    Items in the program cache consist of three things: key, the data
+    representing the instructions and auxiliary data representing
+    uniform storage. The data consisting of instructions is stored into
+    a drm buffer object while the key and the auxiliary data reside in
+    malloced section. Now the cache uploading is equipped with a check
+    that iterates over existing items and seeks to find a another item
+    using identical instruction data than the one being just uploaded.
+    If such is found there is no need to add another section into the
+    drm buffer object holding identical copy of the existing one. The
+    item just being uploaded should instead simply point to the same
+    offset in the underlying drm buffer object.
+    
+    Unfortunately the check for the matching instruction data is
+    coupled with a check for matching auxiliary data also. This
+    effectively prevents the cache from ever containing two items
+    that could share a section in the drm buffer object.
+    
+    The constraint for the instruction data and auxiliary data to
+    match is, fortunately, unnecessary strong. When items are stored
+    into the cache they will anyway contain their own copy of the
+    auxiliary data (even if they matched - which they in real world
+    never will). The only thing the items would be sharing is the
+    instruction data and hence we should only check for that to match
+    and nothing else.
+    
+    No piglit regression in jenkins.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 12a66d91f6b0beff123fb6fd8a4f3c3796379532
+Author: Topi Pohjolainen <topi.pohjolainen@intel.com>
+Date:   Thu Jun 25 14:35:26 2015 +0300
+
+    i965: Only write program to cache when it doesn't exist yet
+    
+    Current logic re-writes the same data when existing data is found.
+    Not that this actually matters at the moment in practice, the
+    contraint for finding matching data is too severe to ever allow
+    data to be shared between two items in the cache.
+    
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit b4897eb70a994c4630b0fde4a66dd6ace833c33a
+Author: Topi Pohjolainen <topi.pohjolainen@intel.com>
+Date:   Thu Jun 25 14:31:03 2015 +0300
+
+    i965: Rename brw_upload_item_data to brw_alloc_item_data
+    
+    and simplify the interface to take directly the size and to return
+    the offset. The routine does nothing more than allocate, it doesn't
+    upload anything.
+    
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 853853b2ac527698215b4290629ec242333e264a
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Wed Aug 12 11:13:40 2015 +0300
+
+    mesa: update MaxShaderStorageBlockSize to 2^27
+    
+    Extension spec originally required 2^24 but 2^27 is the minimum value
+    required by OpenGL 4.5 and OpenGL ES 3.1 specifications.
+    
+    Fixes:
+       ES31-CTS.shader_storage_buffer_object.basic-max
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 24695f4b2738d930a2bc71b4ebc9e5d993980cae
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Mon Aug 3 10:46:33 2015 +0300
+
+    mesa: fix name returned for XFB varyings
+    
+    _mesa_get_program_resource_name has logic to append '[0]' in name
+    if variable is an array, this should be skipped for XFB varyings
+    that have array index already appended.
+    
+    v2: fix comment, change also GL_NAME_LENGTH query to match
+        the behaviour
+    
+    Fixes:
+       ES31-CTS.program_interface_query.transform-feedback-types
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Martin Peres <martin.peres@linux.intel.com>
+
+commit 86a72ee48eb371566765566fc778d790bc9ce201
+Author: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+Date:   Wed Jul 29 21:49:45 2015 +1000
+
+    mesa: Fix printf format specifier warn of the ptrdiff_t
+    
+    See §7.19.6.1, paragraph 7 of the ISO C specification.
+    
+    Signed-off-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+    Signed-off-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 8c0b943e87b48e7359230825cc06fbdd059a9e58
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Aug 11 21:37:59 2015 +0200
+
+    r600g: allow setting geometry shader sampler states
+    
+    We were ignoring them. This is both hilarious and sad.
+    
+    Cc: mesa-stable@lists.freedesktop.org
+    Reviewed-by: Edward O'Callaghan <eocallaghan at alterapraxis.com>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit d335aad11b208bcdcc75a99d4b6c5fc8b69ce368
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Aug 11 22:36:51 2015 +0200
+
+    r600g: fix polygon offset scale
+    
+    The value was copied from r300g, which uses 1/12 subpixels, but this hw
+    uses 1/16 subpixels.
+    
+    Should fix piglit: gl-1.4-polygon-offset (formerly a glean test)
+    (untested, ported from radeonsi)
+    
+    Reviewed-by: Edward O'Callaghan <eocallaghan at alterapraxis.com>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit bfac8ba9d32be351277c7ea814ac9848bdcb1f16
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Aug 11 22:36:51 2015 +0200
+
+    radeonsi: fix polygon offset scale
+    
+    The value was copied from r300g, which uses 1/12 subpixels, but this hw
+    uses 1/16 subpixels.
+    
+    Fixes piglit: gl-1.4-polygon-offset (formerly a glean test)
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 8ae88105b60be613126ea07492ffd9712e5e71eb
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Aug 10 02:28:01 2015 +0200
+
+    radeonsi: enable VS_OUT_MISC_SIDE_BUS_ENA
+    
+    This is recommended for better performance.
+    Diag tests always enable this.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit e7a52a5cb810de49a8282cb9f9caea5d554c3348
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Aug 10 01:50:11 2015 +0200
+
+    radeonsi: add support for gl_PrimitiveID in the fragment shader
+    
+    It must be obtained from the VS.
+    
+    The GS scenario A must be enabled for PrimID to be generated for the VS.
+    
+    + 4 piglits
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 8e11be0ddb0920633c5fab8d6a6460b7591a2627
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Aug 10 00:52:21 2015 +0200
+
+    radeonsi: move VGT_GS_MODE to the VS state
+    
+    The VS will want to select GS scenario A here (VS with PrimitiveID).
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit bdc564b942ba292a897ea0d7d37f4bcafc236129
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Aug 12 11:39:24 2015 -0400
+
+    freedreno/a4xx: format updates
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 500025a23784877c8a61d8b3c7a8eab6fddf242a
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Tue Aug 11 16:47:16 2015 -0400
+
+    freedreno/a3xx+a4xx: add texture buffer object support
+    
+    Basic texture buffer support.  Should be straightforward to add first/
+    last_element support.  And with a bit of work in ir3 emulate larger
+    texture buffer sizes.  But this seems to be enough for stk gl31 render
+    paths.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit fb07c49f4883b12cef37748271d99e2fcf217a72
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Tue Aug 11 16:33:14 2015 -0400
+
+    ttn: add buffer texture type
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit aab3912f21508b0681962c68fdaca1435c06b2ea
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Tue Aug 11 16:11:04 2015 -0400
+
+    freedreno/ir3: 'keeps' need neighbors found too
+    
+    This shows up with a glamor shader, which does a TXF and uses the result
+    for conditional kill.  Before we wouldn't group the fanin (collect)
+    neighbors which need to be allocated adjacently at RA, resulting in
+    badness.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 6e04020dd7784bb44d5e04b41efce342f80840cf
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Tue Aug 11 16:09:48 2015 -0400
+
+    freedreno/ir3/print: print left/right neighbors too
+    
+    When debugging compiler, this is useful to see.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 0667962103034d7426c763a7793ce22baab46c8e
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Tue Aug 11 11:47:46 2015 -0400
+
+    freedreno/ir3: use nir pass to lower const to scalar
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 8885f2befaea68ce7f9d550c9b9ff5ae77524406
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Tue Aug 11 08:48:34 2015 -0400
+
+    freedreno/a4xx: point-size and spritelist fixes
+    
+    a4xx needs similar treatment as 995f55a6
+    
+    Also fixup a few point-size and vpsrepl issues and drop fix_blit_fp()
+    hack previously needed for mem2gmem.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit f72fead4a28d5d8a16bbc20781218ea7df0b9c9a
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon Aug 10 20:41:45 2015 -0400
+
+    freedreno: cap cleanups
+    
+    Move a few things around to group stuff that is common to a3xx/a4xx
+    together.  Also, introduce is_ir3() for things that are more specific to
+    the compiler / shader-ISA than to the gpu generation.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 81d2fd91a90e5b2fd9fd74792a7a7c329f0e4d29
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon Aug 10 06:58:37 2015 -0400
+
+    mesa: add NV_read_{depth,stencil,depth_stencil} extensions
+    
+    These extensions allow reading depth/stencil for GLES contexts, which is
+    useful for tools like apitrace.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 078aef0e97bf7e0cc8fae4d541d5035ff6c29ad7
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Aug 6 14:26:47 2015 -0700
+
+    i965/shader: Don't use OptimizeForAOS for NIR vec4 vertex shaders
+    
+    Shader-db results for vec4 programs using NIR on HSW:
+    
+       total instructions in shared programs: 1838157 -> 1828469 (-0.53%)
+       instructions in affected programs:     275978 -> 266290 (-3.51%)
+       helped:                                2827
+       HURT:                                  244
+       GAINED:                                0
+       LOST:                                  0
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
+
+commit 91698d1206b86ef1710291213145275a2dd06dd7
+Author: Nanley Chery <nanley.g.chery@intel.com>
+Date:   Fri Aug 7 16:37:47 2015 -0700
+
+    mesa/teximage: report the correct function which triggered the error
+    
+    This function would always report that a dimension or size error occurred
+    in glTexImage even when it was called from glCompressedTexImage. Replace
+    the static string with the dynamically determined caller name.
+    
+    Reviewed-by: Tapani Palli <tapani.palli@intel.com>
+    Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
+
+commit 5f1d5b1c7857f8680b47a7a450ee9e4530e22c6f
+Author: Oded Gabbay <oded.gabbay@gmail.com>
+Date:   Wed Aug 12 18:22:53 2015 +0300
+
+    mesa/formats: don't byteswap when building array formats
+    
+    Because we build here an array format, we don't need to swap the
+    bytes for big endian.
+    If it isn't an array format, the bytes will be swapped in
+    _mesa_format_convert.
+    
+    v2: remove temp variable
+    
+    Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit e3eb91af804f449005a2ff535c805eaa1d579d99
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Aug 10 01:32:23 2015 -0700
+
+    mesa/formats: Don't flip channels of null array formats
+    
+    Before, if we encountered an array format of 0 on a BE system, we would
+    flip all the channels even though it's an invalid format.  This would
+    result in a mostly invalid format with a swizzle of yyyy or wwww.  Instead,
+    we should just return 0 if the array format stashed in the format info is
+    invalid.
+    
+    Cc: "10.6 10.5" <mesa-stable@lists.freedesktop.org>
+
+commit 28d1a506c8d09fa66170978c85566c34cbf1cc0a
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Sun Aug 9 23:45:44 2015 -0700
+
+    mesa/formats: Fix swizzle flipping for big-endian targets
+    
+    The swizzle defines where in the format you should look for any given
+    channel.  When we flip the format around for BE targets, we need to change
+    the destinations of the swizzles, not the sources.  For example, say the
+    format is an RGBX format with a swizzle of xyz1 on LE.  Then it should be
+    wzy1 on BE;  however, the code as it was before, would have made it 1zyx on
+    BE which is clearly wrong.
+    
+    Reviewed-by: Iago Toral <itoral@igalia.com>
+    Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
+    Cc: "10.6 10.5" <mesa-stable@lists.freedesktop.org>
+
+commit 3941539179b72fe25b6dffd1aacc0722d198a5ca
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Sat Aug 8 09:00:21 2015 -0700
+
+    mesa/formats: Only do byteswapping for packed formats
+    
+    Reviewed-by: Iago Toral <itoral@igalia.com>
+    Cc: "10.6 10.5" <mesa-stable@lists.freedesktop.org>
+
+commit 02a4fe22b137d4bc8378bedd8319109fd23a50e3
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue Aug 11 15:21:03 2015 -0700
+
+    configure.ac: Always define __STDC_LIMIT_MACROS.
+    
+    ... which ensures that we get defines like LONG_MAX in C++.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91591
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 2265321834608c26b2989a5a1f65bb375826a779
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Aug 10 18:50:48 2015 -0700
+
+    i965: Optimize brw_inst_set_bits() and brw_compact_inst_set_bits().
+    
+    Cuts about 2k of .text.
+    
+       text     data      bss      dec      hex  filename
+    5017141   197160    27672  5241973   4ffc75  i965_dri.so before
+    5014981   197160    27672  5239813   4ff405  i965_dri.so after
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 9fa70fef22bd458fbeb95a3b0ebb5f7919cba7f0
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Aug 10 16:57:58 2015 -0700
+
+    i965: Optimize brw_inst_bits() and brw_compact_inst_bits().
+    
+    Cuts about 1k of .text.
+    
+       text     data      bss      dec      hex  filename
+    5018165   197160    27672  5242997   500075  i965_dri.so before
+    5017141   197160    27672  5241973   4ffc75  i965_dri.so after
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 1e53df70642a970fa1bdf5e6b7d64f2c0a4699c7
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Aug 11 19:00:03 2015 +0100
+
+    docs: add news item and link release notes for 10.6.4
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit d32c45ca7bd5a81b312504ba99cdab3d748251f7
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Aug 11 18:54:18 2015 +0100
+
+    docs: add sha256 checksums for 10.6.4
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 99793e2541510fe208d29e69fedf97a6fff006f8)
+
+commit c4b4bad68a90510406c0bef97039f7d0b4f8f5fe
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Aug 11 16:39:10 2015 +0100
+
+    docs: add release notes for 10.6.4
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 6b2fcee64edadbd4db2293f5f4fc1a70e80c7251)
+
+commit b88f14702d9c02a34d517f95fe840527961631cd
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Aug 8 14:03:54 2015 +0200
+
+    gallium/radeon: fix r600g build if LLVM is disabled
+    
+    MESA_LLVM_VERSION_PATCH is undefined.
+    
+    Reviewed-by: Edward O'Callaghan <eocallaghan at alterapraxis.com>
+    Tested-by: Benjamin Bellec <b.bellec@gmail.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 50545882113b389decc3f05771764f6c62213af3
+Author: Grazvydas Ignotas <notasas@gmail.com>
+Date:   Mon Aug 10 00:42:35 2015 +0300
+
+    r600g: use a bitfield to track dirty atoms
+    
+    r600 currently has 73 atoms and looping through their dirty flags has
+    become costly because checking each flag requires a pointer
+    dereference before the read. To avoid having to do that add additional
+    bitfield which can be checked really quickly thanks to tzcnt instruction.
+    
+    id field was added to struct r600_atom but that doesn't affect memory
+    usage for both 32 and 64 bit CPUs because it was stuffed into padding.
+    
+    The performance improvement is ~2% for benchmarks that can have FPS in
+    the thousands but is hardly measurable in "real" programs.
+    
+    Signed-off-by: Marek Olšák <marek.olsak@amd.com>
+
+commit c58534c1384dc63bb1b13eb37c06bdb4652c13ff
+Author: Grazvydas Ignotas <notasas@gmail.com>
+Date:   Mon Aug 10 00:42:34 2015 +0300
+
+    r600g: don't mark unused atom dirty
+    
+    On evergreen config_state is not used, so don't mark it dirty.
+    
+    Signed-off-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 85adde30a4bb3e8e5ca44983308364559ff140ab
+Author: Grazvydas Ignotas <notasas@gmail.com>
+Date:   Mon Aug 10 00:42:33 2015 +0300
+
+    r600g: use a helper to add an initialized atom
+    
+    Instead of writing to rctx->atoms directly use a helper to take
+    advantage of assert checks.
+    
+    Signed-off-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 3206d4ed44e761186fee3c679801e57f8ce923cb
+Author: Grazvydas Ignotas <notasas@gmail.com>
+Date:   Mon Aug 10 00:42:32 2015 +0300
+
+    gallium/radeon: use helper functions to mark atoms dirty
+    
+    This is analogous to r300_mark_atom_dirty() used by r300, and will
+    be used by later patches. For common radeon code, appropriate helper
+    is called through a function pointer.
+    
+    No functional changes.
+    
+    Signed-off-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 3c04a90e91a64a4a09d77c76c6ddcaca949e9b0e
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Fri May 1 17:00:02 2015 +0300
+
+    docs: Mark ARB_shader_image_load_store as done on i965.
+
+commit d03c65793a5ee31f1138cbd0fba6fac6cd942428
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu May 7 18:56:01 2015 +0300
+
+    i965: Expose ARB_shader_image_load_store.
+    
+    Reviewed-by: Paul Berry <stereotype441@gmail.com>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 13a04abc277089275217dce119e18acf4d4ce52d
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 14:33:06 2015 +0300
+
+    i965/fs: Clamp image array indices to the array bounds on IVB.
+    
+    This fixes the spec@arb_shader_image_load_store@invalid index bounds
+    piglit tests on IVB, which were causing a GPU hang and then a crash
+    due to the invalid binding table index result of the array index
+    calculation.  Other generations seem to behave sensibly when an
+    invalid surface is provided so it doesn't look like we need to care.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit a47ae8de2cf30fbe45318a18a2ea032f30ab7d10
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 16:26:52 2015 +0300
+
+    i965/fs: Translate image load, store and atomic NIR intrinsics.
+    
+    v2: Move array coordinate workaround into the surface builder.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 912ef52c29fdc373889594b963cc93c89fa9e3f7
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Sun Jun 28 21:16:31 2015 +0300
+
+    i965/fs: Handle image uniforms in NIR programs.
+    
+    v2: Move the image_params array back to brw_stage_prog_data.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 4af27145fe2fec6586ce95e80a76cdcbfe933db1
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue May 5 21:07:15 2015 +0300
+
+    i965: Implement logic to set up and upload an image uniform.
+    
+    v2: Move the image_params array back to brw_stage_prog_data.
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 84431c1f1d343c85f3b7fa265293a1d245ba9cf3
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue May 5 21:05:45 2015 +0300
+
+    i965: Teach type_size() about the size of an image uniform.
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit caae52561dabb2d20f2369c547e660d078974285
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Jul 30 15:46:40 2015 +0300
+
+    i965/fs: Implement image load, store and atomic.
+    
+    v2: Drop VEC4 suport.
+    v3: Rebase.
+    v4: Move array coordinate workaround into the surface builder.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 7e8be000101cc6fe3846745b559f2d785430e253
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Jul 30 15:51:58 2015 +0300
+
+    i965/fs: Import image format conversion primitives.
+    
+    Define bitfield packing, unpacking and type conversion operations in
+    terms of which the image format conversion code will be implemented.
+    These don't directly know about image formats: The packing and
+    unpacking functions take a 4-tuple of bit shifts and a 4-tuple of bit
+    widths as arguments, determining the bitfield position of each
+    component.  Most of the remaining functions perform integer, fixed
+    point normalized, and floating point type conversions, mapping between
+    a target type with per-component bit widths given by a parameter and a
+    matching native representation of the same type.
+    
+    v2: Drop VEC4 suport.
+    v3: Rebase.
+    v4: Fix clamping of negative floats in the unsigned case of
+        emit_convert_to_scaled().
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 26ca81ce3029cbd2531f52635258aecae19bf185
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Apr 22 16:45:28 2015 +0300
+
+    i965/fs: Import image format metadata queries.
+    
+    Define some utility functions to query the bitfield layout of a given
+    image format and whether it satisfies a number of more or less
+    hardware-specific properties.
+    
+    v2: Drop VEC4 suport.
+    v3: Add SKL support.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 86dbd8af40deaa99aedf011e863b908173e63012
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Jul 23 19:32:08 2015 +0300
+
+    i965/fs: Import code to transform image coordinates into surface coordinates.
+    
+    Accounting for the padding required for 1D arrays in certain cases.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 1a37619763a99b78aa574aca0058eda86de7a0dc
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Apr 22 16:44:18 2015 +0300
+
+    i965/fs: Import image memory offset calculation code.
+    
+    Define a function to calculate the memory address of the image
+    location given by a vector of coordinates.  This is required in cases
+    where we need to fall back to untyped surface access, which take a raw
+    memory offset and know nothing about surface coordinates, type
+    conversion or memory tiling and swizzling.  They are still useful
+    because typed surface reads don't support any 64 or 128-bit formats on
+    IVB, and they don't support any 128-bit formats on HSW and BDW.
+    
+    The tiling algorithm is implemented based on a number of parameters
+    which are passed in as uniforms and determine whether the surface
+    layout is X-tiled, Y-tiled or untiled.  This allows binding surfaces
+    of different tiling layouts to the pipeline without recompiling the
+    program.
+    
+    v2: Drop VEC4 suport.
+    v3: Rebase.
+    v4: Add plenty of comments (Jason).
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit fb19df7a626d02cb54614d4610af2d14720a2ef3
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Apr 22 16:43:51 2015 +0300
+
+    i965/fs: Import image access validity checks.
+    
+    These utility functions check whether an image access is valid.
+    According to the spec an invalid image access should have no effect on
+    the image and yield well-defined results.  Typically the hardware
+    implements correct bounds and surface checking by itself, but in some
+    cases (typed atomics on IVB and untyped messages elsewhere) we need to
+    implement it in software to work around lacking hardware support.
+    
+    v2: Drop VEC4 suport.
+    v3: Rebase.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 3569742ec458c0a881857d9deb782c1e11f195d8
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Fri Nov 22 16:00:33 2013 -0800
+
+    i965: Define implementation constants for ARB_shader_image_load_store.
+    
+    Reviewed-by: Paul Berry <stereotype441@gmail.com>
+    
+    v2: Drop VS support pre-Gen8, drop GS support.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 786e0853bebc3c4ab073bdbb48eec8ba5ea93842
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Feb 9 21:04:53 2015 +0200
+
+    i965/gen7-8: Set up early depth/stencil control appropriately for image load/store.
+    
+    v2: Store early fragment test mode in brw_wm_prog_data instead of
+        getting it from core mesa data structures (Ken).
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit ac7664e493655e290783c23a0412b9c70936da50
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 14:21:07 2015 +0300
+
+    i965/gen7-8: Poke the 3DSTATE UAV access enable bits.
+    
+    v2: Set the PS UAV-only bit on HSW (Ken).
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit acb6d90dc809283d9839685852f19f6b301b23d3
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue Feb 3 17:14:10 2015 +0200
+
+    i965/gen7: Enable fragment shader dispatch if the program has image uniforms.
+    
+    Shaders with image uniforms may have side effects.  Make sure that
+    fragment shader threads are dispatched if the shader has any image
+    uniforms.
+    
+    v2: Use brw_stage_prog_data::nr_image_params to find out if the shader
+        has image uniforms instead of checking core mesa data structures
+        (Ken).
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 47f9b07e4cf79a8249c6f9f09148a6a0b4fabacc
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 20 17:13:17 2015 +0300
+
+    i965: Hook up image state upload.
+    
+    v2: Add CS support.  Move the image_params array back to
+        brw_stage_prog_data.
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Jason Ekstrand <jason@jlekstrand.net>
+
+commit 868f1ba0a4e6e3057be5b8c2458db4773cf82034
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 17:19:29 2015 +0300
+
+    i965: Reserve enough parameter entries for all image uniforms used in the program.
+    
+    v2: Add CS support.
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+
+commit 87a3e02d9bec689e110f820bba7b125b3e801fdd
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jan 21 17:34:49 2015 +0200
+
+    i965: Define and initialize image parameter structure.
+    
+    This will be used to pass image meta-data to the shader when we cannot
+    use typed surface reads and writes.  All entries except surface_idx
+    and size are otherwise unused and will get eliminated by the uniform
+    packing pass.  size will be used for bounds checking with some image
+    formats and will be useful for ARB_shader_image_size too.  surface_idx
+    is always used.
+    
+    v2: Add CS support.  Move the image_params array back to
+        brw_stage_prog_data.
+    v3: Improve documentation.
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+
+commit 3144844f5ca89cd5743bc9b0ac142ccf862af557
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Sat May 2 16:58:24 2015 +0300
+
+    i965: Implement surface state set-up for shader images.
+    
+    v2: Add SKL support.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 2cdb24a7c2238843d23b468275d479553f537e7e
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue May 12 15:56:54 2015 +0300
+
+    i965: Fix brw_memory_barrier() for SKL.
+    
+    This works as-is on SKL, only the assertion needs to be relaxed.
+    
+    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+
+commit f9094691378722304dd94deb76ad013bd65c7a5b
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue May 12 16:10:07 2015 +0300
+
+    i965: Add SKL support to brw_miptree_get_horizontal_slice_pitch().
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit fe55ab2d12202236ba5bf9beae09803dfe97a7ac
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Sun Aug 9 14:44:30 2015 +1000
+
+    glsl: Add missing spec quote about atomic counter in structs
+    
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 87cea61b9e2681e5365e989c7fa7a0298e4005fa
+Author: Alex Deucher <alexander.deucher@amd.com>
+Date:   Mon Aug 10 15:35:21 2015 -0400
+
+    radeonsi: add new OLAND pci id
+    
+    Reviewed-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+    Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 3fa1ca34cc0134bd16b3315a0695703c9f684bd4
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Aug 10 17:41:36 2015 -0400
+
+    nouveau: no need to do tnl wakeup, state updates are always hooked up
+    
+    A TNL state update now requires a DrawBuffer to be set, which it isn't
+    early on in context creation. Since we init swtnl from context init,
+    this caused crashes.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91570
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 8a688bee83ced46eb4bff741f05d2da033c07ade
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Aug 10 11:52:50 2015 -0700
+
+    i965/fs: Make resolve_source_modifiers consistent with the vec4 version
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 7068a6409c897e44cd98377df310691592ef6d0d
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Aug 10 11:48:14 2015 -0700
+
+    i965/vec4_visitor: Make some function arguments const references
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 1bb339493cd892c8065266b93a296a84b1dfce9b
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Fri Jul 31 08:36:35 2015 -0700
+
+    i965/fs: Don't do redundant RA setup on IVB+
+    
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit 0ac65abb466578aafbc753189cdc40fd9a6000b8
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Fri Jul 31 08:35:57 2015 -0700
+
+    i965/fs: Use dispatch_width instead of reg_width in alloc_reg_sets
+    
+    reg_width is kind of an outdated concept.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit bdcc8f32304b67cd9c87f5f285c1faa00c51d3ad
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jul 30 20:53:04 2015 -0700
+
+    ra: Delete the conflict lists in ra_set_finalize
+    
+    They are never used after the set is finalized so there's no reason to keep
+    them around.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 7539ac7fe2077f7634250dcb34497e1ac643b0df
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jul 30 20:49:22 2015 -0700
+
+    ra: Refactor ra_set_finalize
+    
+    All this commit does is change an early return to an if with an else
+    clause.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit c1d9b3ae0bb0f1222719d7737dd9986e437bf5b9
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Aug 3 15:21:59 2015 -0700
+
+    i965/vec4_nir: Properly handle integer multiplies on BDW+
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 1d658cf8795383dbef127e46f3740b516bfe21b9
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Aug 3 14:37:41 2015 -0700
+
+    i965/vec4_nir: Do boolean source modifier resolves on BDW+
+    
+    On BDW+, the negation source modifier on NOT, AND, OR, and XOR, is actually
+    a boolean negate and not an integer negate.  However, NIR's soruce
+    modifiers are the integer version.  We have to resolve it with a MOV prior
+    to emitting the actual instruction.  This is basically the same thing we do
+    in the FS backend.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 5e1c1c2fcbdfb96a973ae3fd196e341ab2d41833
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Aug 3 10:00:38 2015 -0700
+
+    i965/vec4-nir: Handle boolean resolvese on ILK-
+    
+    The analysis code was already there and running, we just weren't doing
+    anything with the result of it yet.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 1d4e698466bdea735c5f06c2658322bdc527efce
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Aug 3 16:25:18 2015 -0700
+
+    i965/nir: Don't mark bany or ball instructions for resolve
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 17c978166185a7d3a9759f828a4370c1f2169776
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Aug 3 14:12:35 2015 -0700
+
+    i965/nir: Use nir_op_info.output_type for determining when to resolve
+    
+    Previously, we were explicitly listing every instruction that needs a
+    resolve.  However, those instructions were precicely the ones that returned
+    booleans so there's no reason why we shouldn't just have that check.  Also,
+    all of the reduction opcodes such as bany and ball were missing so it
+    didn't properly flag stuff on vec4.  If an opcode gets added in the future
+    that returns a bool but doesn't need a resolve, we can special-case that.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 9901aeb1c74648cbe1aa1d18d590a689c844cbad
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Sun Aug 9 22:03:00 2015 -0700
+
+    mesa/format_utils: Add src_bits == dst_bits cases to unorm_to_unorm
+    
+    This better ensures that the src_bits == dst_bits case gets optimized away.
+    
+    Reviewed-by: Neil Roberts <neil@linux.intel.com>
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit 7e5d56394bd53607d0158b49f36ac1428acb7954
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 16:22:43 2015 +0200
+
+    gallium/radeon: add a debug flag not to use write combining (v2)
+    
+    v2: just clear the flag before the allocation
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 7bfe8cf4a487aec4870df23f6f72c828f1caaa49
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Aug 5 18:14:49 2015 -0400
+
+    freedreno/a4xx: add s8/z32/z32_s8x24 support
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit fcb8a04c9ddcb46b7b8cca21e1203674ec04dde2
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Aug 5 14:21:06 2015 -0400
+
+    freedreno: update generated headers
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 2d6a889e8b786cd76d6711627c10be50615c2b62
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sun Aug 9 09:03:25 2015 -0400
+
+    freedreno/a4xx: fix vpsrepl for blit shaders
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit d2f669e6c72a16dede22f107c3b015ec0516bc56
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon Aug 10 07:11:56 2015 -0400
+
+    freedreno/a4xx: clear cached fp when switching blit prog
+    
+    For gmem restore (mem2gmem), we swap blit programs, in order to have a
+    different frag shader for depth vs color restore.  But we weren't
+    actually clearing the cached fp, so it would not actually change the
+    frag shader as expected.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 6dabf455970f3a1fdbf384a53621ebe2bcd7545e
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sun Aug 9 08:38:25 2015 -0400
+
+    freedreno/a3xx: clear cached fp when switching blit prog
+    
+    For gmem restore (mem2gmem), we swap blit programs, in order to have a
+    different frag shader for depth vs color restore.  But we weren't
+    actually clearing the cached fp, so it would not actually change the
+    frag shader as expected.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 08f2dfe3430789085c165ce7c546d5afd2e295c2
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Mon Aug 10 13:48:11 2015 +0300
+
+    mesa/es3.1: Allow Multisampled FrameBufferTextures
+    
+    GLES 3.1 must be allowed to use multisampled framebuffer textures.
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit b6d014f0ba010f0e61be43abdceb5f2201028a04
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Mon Jun 15 13:50:21 2015 +0200
+
+    mesa/es3.1: Pass sample count check for multisampled textures
+    
+    v3 : Removed space in comment.
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 2ac171a7db4e4ad2fa902e62bf18bc1f67e91643
+Author: Oded Gabbay <oded.gabbay@gmail.com>
+Date:   Tue Aug 4 21:39:32 2015 +0300
+
+    mesa: clear existing swizzle info before bitwise-OR
+    
+    This patch fixes a bug in big-endian treatment, where the previous
+    swizzle info wasn't cleared before a new swizzle info was inserted into
+    the format field using a bitwise-OR operation.
+    
+    v2: use MESA_ARRAY_FORMAT_SWIZZLE_*_MASK instead of numeric constants
+    v3: align according to coding style
+    
+    Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
+    CC: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 1eaa29cb300e927409281ef0a9413072766eaa3d
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Sun Aug 9 22:36:37 2015 +0100
+
+    util: Use LONG_MAX instead of LONG_BIT.
+    
+    More portable.  Based on Roland Scheidegger's idea.
+    
+    Tested with roundevent_test on Linux, MinGW, and MSVC.
+    
+    https://bugs.freedesktop.org/show_bug.cgi?id=91591
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 497a22a727d3606c7327eb72efbf0d2c03607f0a
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Sun Aug 9 11:55:28 2015 +0100
+
+    scons: Build roundevent_test.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.co>
+
+commit 21ccdbdb5dd87b2ee66c4e78b011ec4df29efb98
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Sun Aug 9 11:25:41 2015 +0100
+
+    util: Cope with LONG_BIT not being defined on Windows.
+    
+    Neither MSVC nor MinGW defines LONG_BIT.  For MSVC this was not a problem as
+    it doesn't define __x86_64__ macro (it's GCC specific.)
+    
+    However on Windows long type is guaranteed to be 32bits.
+    
+    Also add an #error, as GCC will just warn, not throw any error, when no
+    value is returned.
+    
+    Trivial.
+
+commit eb643db30e1bdf5171d0a012674016c317925b6e
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Sun Aug 9 11:21:03 2015 +0100
+
+    gallium: GCC 4.9 allows to include tmmintrin.h without -msse3.
+    
+    Fixes build with MinGW x86_64 build with GCC 4.9, due to conflicting
+    definition _mm_shuffle_epi8 of u_sse.h and system headers.
+    
+    Trivial.
+
+commit 512aa0647f328fff69b3ce328b6466f2da8b7c4d
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Fri Aug 7 13:07:40 2015 +0100
+
+    util: Rename PURE to ATTRIBUTE_PURE.
+    
+    To avoid collission with windows.h's PURE macro.
+    
+    We could consider eventually renaming to __pure, but that would require
+    further care, so it's left to the future.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 27141f984d6401dc466f0e9b0c5da2a9248045e3
+Author: Boyan Ding <boyan.j.ding@gmail.com>
+Date:   Sat Aug 8 17:23:28 2015 +0800
+
+    egl/x11: Fix driver_name acquisition
+    
+    We don't need to free driverName string from dri2 reply, on the other
+    hand, the driver name acquired from loader doesn't need duplication.
+    
+    Fixes: 45e110bad9d (egl/x11: trust our loader over the xserver for the
+    drivername)
+    
+    Reported-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
+    [Emil Velikov: use brackets for both branches of conditional]
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit a1adf0b3fe428a4bf690f166c2697d8c7ea2dcb0
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Fri Aug 7 13:46:30 2015 -0700
+
+    i965/skl: (trivial) Remove invalid comment about thread counts
+    
+    This should have been a part of:
+    commit 7eaacc1678195738fab3bb98870828611cae066d
+    Author: Ben Widawsky <benjamin.widawsky@intel.com>
+    Date:   Wed Jul 29 12:35:24 2015 -0700
+    
+        i965/skl: Add production thread counts and URB size
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit ffadfbf5d076638fa4022106cfe989bc5a145f20
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Wed Aug 5 13:58:46 2015 +0100
+
+    i965: Fix HW binding tables editing
+    
+    Since the introduction of new gl_shader_stages in
+    
+    commit a2af956963b6bc4d29f37485e44c98008d2ef077
+    Author: Fabian Bieler <fabianbieler@fastmail.fm>
+    Date:   Fri Mar 7 10:19:09 2014 +0100
+    
+        mesa: add tessellation shader enums
+    
+    the translation table for the stage into the HW binding table edit
+    command was broken, and so we used illegal commands. Fix the array
+    initialisation to be impervious to changes in the gl_shader_stages enum
+    and add the asserts that would have caught the issue earlier.
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
+    Cc: Jordan Justen <jordan.l.justen@intel.com>
+    Cc: Matt Turner <mattst88@gmail.com>
+    Cc: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit ba651967a201b48f380cd30495e271317c1d8522
+Author: Alexander von Gluck IV <kallisti5@unixzen.com>
+Date:   Fri Aug 7 12:55:40 2015 -0500
+
+    egl/dri2: Fix include path of u_atomic.h introduced e7e29189
+    
+    This was causing a failure to build on SCons due to a missing
+    -Isrc/egl. Instead of adding in that path, lets just -Isrc/
+    and include "utils/u_atomic.h".
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 6de9a03bed400fca5672ef0c13c0039bbe94a679
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Aug 7 19:20:48 2015 +0100
+
+    egl/x11: don't crash if dri2_dpy->conn is NULL
+    
+    Identical to commit 60e9c35b3a0(egl/x11: bail out if we cannot fetch
+    the xcb connection) but for the swrast codepath.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 2c7b6cf512a775a37677b1e467d2af952c449dae
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jul 29 17:19:07 2015 +0100
+
+    egl/x11: auth with xserver before attempting to open the dri module
+    
+    No real change, apart from keeping the calls to the underlying winsys
+    (x11) next to each other. Just like platform_wayland.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 45e110bad9d5d31eb67d7d32937aa5a752108df8
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jul 29 17:19:06 2015 +0100
+
+    egl/x11: trust our loader over the xserver for the drivername
+    
+    This is a port of commit 7bd95ec437a(dri2: Trust our own driver name
+    lookup over the server's.) from glx/dri2.
+    
+    v2: Add newline between code and multiline comment. (Matt)
+    
+    Cc: Julien Isorce <julien.isorce@gmail.com>
+    Reported-by: Julien Isorce <julien.isorce@gmail.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit faf0f811e3f9fb724a89c463c0cb6a0d61715f95
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jul 29 17:19:05 2015 +0100
+
+    egl/x11: open the device from within dri2_x11_connect()
+    
+    Allows us, with the next commit, to use alternative driver_name rather
+    than the one from xserver.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit beddb0a2371059829b20240058931b8c9fd5be40
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jul 29 17:19:04 2015 +0100
+
+    egl/x11: fetch the device_name prior to driver_name
+    
+    With the follow up commits we're about to further reshuffle things. Thus
+    we'll honour our our driver_name lookup (src/loader), and use the one
+    provided by xserver as a fall-back.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit bf66988b08786c123804c2be8846a6a21cf200ad
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jul 29 17:19:03 2015 +0100
+
+    egl/x11: remove dri2_dpy->conn checks
+    
+    If the connection is NULL we won't be able to get here.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 60e9c35b3a0384860ffcb01d902a69ee13254eb9
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jul 29 17:19:02 2015 +0100
+
+    egl/x11: bail out if we cannot fetch the xcb connection
+    
+    The documentation of xcb_connection_has_error() does not mention
+    what will happen, if NULL is fed to the function.
+    
+    Upon closer look (props to Matt), it seems that we'll crash as the
+    implementation dereferences conn.
+    
+    This will also allow us to remove the dri2_dpy->conn checking with the
+    next commit.
+    
+    v2: Reword commit message as per Matt's findings.
+    
+    Acked-by: Alex Deucher <alexander.deucher@amd.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 75ce7919d6496981013a21a7055c668e47e7bed2
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 17 12:52:27 2015 +0100
+
+    vc4: add missing nir include, to fix the build
+    
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 4fa0cd17b77039ab67e81991002b4d5947298278
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 17 12:41:24 2015 +0100
+
+    vc4: automake: remove unused include
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit a97f1b697b01dca9f72d8559f8269188d76dccc9
+Author: Serge Martin (EdB) <edb+mesa@sigluy.net>
+Date:   Fri Aug 7 10:40:31 2015 +0200
+
+    clover: Stub missing CL 1.2 functions.
+    
+    As sugested by Tom a long time ago
+    and in order to be able to create Piglit tests
+    
+    v2:
+    replace NOT_SUPPORTED_BY_CL_1_1 macro with an inline function
+    remove extra space in clLinkProgram arg
+    
+    v3:
+    use __func__
+    
+    v4:
+    back to a macro, it make more sense to use it with __func__
+    
+    [ Francisco Jerez: Rename to CLOVER_NOT_SUPPORTED_UNTIL and pass the
+      minimum API version required by the entry point so the error
+      messages don't become stale when support for additional CL versions
+      is introduced. ]
+    
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 0508861f29f2d3b79fb803353e4ea8ab32654bc4
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Tue Jun 23 13:03:13 2015 +0200
+
+    mesa: NULL check InfoLog
+    
+    When a program is compiled, but linking failed the sh->InfoLog
+    could be NULL. This is expoloited by OpenGL ES 3.1 conformance tests.
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit a0b7c1c86e028309e639368b2b556b755761f68f
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Tue Aug 4 10:06:41 2015 +0200
+
+    i965/vec4: Fix indentation in vec4_visitor::evaluate_spill_costs
+    
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit f246aa6bcab57f85a143cbfe7e9de24237921249
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Fri Jul 31 14:36:30 2015 +0200
+
+    i965/vec4: do not predicate scratch writes for BRW_OPCODE_SEL instructions
+    
+    The dst is always written, in this case the predicate is only used to select
+    the value to write, so if we are spilling the dst we always want to write
+    whatever value we selected to scratch.
+    
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 42d283a0cc928a9e3ecddf1a90f9417ef1a34392
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Wed Aug 5 21:05:52 2015 +1000
+
+    glsl: remove stage ref generation for transform feedback
+    
+    Stage ref cannot be queried for transform feedback.
+    
+    Also simplify the build_stageref function by passing the
+    correct mode for uniforms.
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 6dea2456ca82d2c62afbd90327d265c5e78fca9c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Jul 15 21:14:24 2015 +0200
+
+    winsys/radeon: add a specific error message for cs_submit -> -ENOMEM
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit 42d9f6323a523d786fc3797587fdf63048becceb
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Apr 30 16:07:12 2015 +0200
+
+    winsys/radeon: add an interface for contexts
+    
+    Same idea as in libdrm_amdgpu.
+    
+    A command stream can only be created for a specific context and it's always
+    submitted to that context.
+    
+    This will mainly be used by amdgpu and it's required by the GPU reset status
+    query too.
+    (radeon only has a basic version of the query and thus doesn't need this)
+    
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit 592ce6e2d1b2c804a95cb00c06e7bbb9d83f554b
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Aug 6 23:41:38 2015 +0200
+
+    gallium/radeon: unify buffer_wait and buffer_is_busy in the winsys interface
+    
+    The timeout parameter covers both cases.
+    
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 8118d3719aee5fdf313c33dbf3256dd78ff46bea
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Aug 3 21:43:36 2015 +0200
+
+    radeonsi: rename enable_s3tc -> enable_compressed_formats
+    
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit a3723fb9e32ab114dcffcf74946def92647c5f03
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Jul 20 00:15:59 2015 +0200
+
+    gallium/radeon: add DRM and LLVM version to the renderer string
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit a3e81f819c20dd50d551de9b7e1280b2bd9c18de
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jul 16 14:40:00 2015 +0200
+
+    radeonsi: always flush framebuffer caches at the beginning of IBs
+    
+    better safe than sorry
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit 0615ad1c70777b515d00aa5b0c41b1073ad5a2d1
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jun 27 14:49:34 2015 +0200
+
+    radeonsi: don't count the exact needed CS space if the CS is large enough
+    
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 6d6208a431f6a01a22f892c71258fd3567d969b6
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed May 6 19:34:09 2015 +0200
+
+    radeonsi: don't crash when cleaning up after an incomplete context
+    
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 9f78e27fc60b3473b708ab4ca04e4ebd6be6cf4e
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Aug 6 10:59:15 2015 -0700
+
+    i965: Rename MIPTREE_LAYOUT_ALLOC_* -> MIPTREE_LAYOUT_TILING_*.
+    
+    Ben suggested that I rename MIPTREE_LAYOUT_ALLOC_ANY_TILED since it
+    needed to include no tiling at all, but the name
+    MIPTREE_LAYOUT_ALLOC_ANY is pretty nondescriptive. We can avoid
+    confusion by replacing "ALLOC" with "TILING" in the identifiers.
+    
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 1c175fc2e3a685b531920dec247086463ab9a154
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue Aug 4 22:58:08 2015 -0700
+
+    i965: Correct a mistake that always forced texture tiling.
+    
+    Regression since commit 3a31876600, when tiling modes were moved into
+    layout_flags.
+    
+    The relevant enum values are
+    
+       MIPTREE_LAYOUT_ALLOC_YTILED = 1 << 5
+       MIPTREE_LAYOUT_ALLOC_XTILED = 1 << 6
+       MIPTREE_LAYOUT_ALLOC_ANY_TILED = MIPTREE_LAYOUT_ALLOC_YTILED |
+                                        MIPTREE_LAYOUT_ALLOC_XTILED
+       MIPTREE_LAYOUT_ALLOC_LINEAR = 1 << 7
+    
+    so the expression (layout_flags & MIPTREE_LAYOUT_ALLOC_ANY_TILED) can
+    never produce a value of MIPTREE_LAYOUT_ALLOC_LINEAR.
+    
+    The enum this replaced was
+    
+       enum intel_miptree_tiling_mode {
+          INTEL_MIPTREE_TILING_ANY,
+          INTEL_MIPTREE_TILING_Y,
+          INTEL_MIPTREE_TILING_NONE,
+       };
+    
+    where "ANY" means "Y" or "NONE" (i.e., linear). As such, remove the
+    unused (and worse, unhandled) MIPTREE_LAYOUT_ALLOC_XTILED and redefine
+    MIPTREE_LAYOUT_ALLOC_ANY_TILED to mean what it did before.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91513
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 3d551c5c7036b650124f23e4e2e3f40b9a8ad426
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Aug 5 18:51:24 2015 -0700
+
+    i965: Request a miptree with no tiling intel_miptree_map_blit().
+    
+    Regression since commit 3a31876600, when tiling modes were moved into
+    layout_flags.
+    
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 30a7e0c021c3a77c20c6f041dc80b7dc90ad238f
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 21:12:18 2015 +0200
+
+    radeonsi: add a HUD query showing the number of shaders created
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 70f5e49ba5ca8eb063a0d7db94fbef1585b21b2d
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 16:57:39 2015 +0200
+
+    radeonsi: add a HUD query showing the number of compiler invocations
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 028528215a8a6d0a5945256cc67709eef2e68189
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 17:28:20 2015 +0200
+
+    gallium/radeon: display cumulative results for some driver queries
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 18501ff468db2091fde6029f4ec674b8365513e6
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 17:47:38 2015 +0200
+
+    gallium/radeon: switch the buffer-wait-time query to microseconds
+    
+    This display the units in the HUD.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 0257e1fbd24e2ab442996296e49e2ebe4c0f07b1
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 17:09:01 2015 +0200
+
+    gallium/radeon: change some driver query types to Hz
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit dbfeb0ec12d6550e68de1bcd164e422e79bccf2d
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 18:11:55 2015 +0200
+
+    gallium/hud: automatically print % if max_value == 100
+    
+    Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 4e2a3e0376ca4fe39ca05e80557edfaa12e93e2b
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 18:11:09 2015 +0200
+
+    gallium/hud: fix printing % next to panes
+    
+    Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit cbad30344d6e0b1ccc9fc8d5a8e6560e97dd9188
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 18:00:57 2015 +0200
+
+    gallium/hud: replace assertions with clamping the unit index
+    
+    Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 97a65d90fe88e6b4b4a42d866b23e73ce72f6dc2
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 17:24:30 2015 +0200
+
+    gallium,hud: allow displaying cumulative values instead of average
+    
+    The cumulative value is useful for queries like the number of shader
+    compilations.
+    
+    Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 130a03e360e6aebe93e86b1d522ebf22371aa2d4
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 17:08:29 2015 +0200
+
+    gallium/hud: fix printing byte units
+    
+    Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 6b47b8978101897cc0dab8f2017e3aa25d31582d
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 17:06:17 2015 +0200
+
+    gallium,hud: add support for Hz units in driver queries
+    
+    Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 60159bcfc66a067b50da06f5cabfa20d72e898ed
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Aug 3 01:34:32 2015 +0200
+
+    radeonsi: before storing tess levels, load them from LDS instead of temporary
+    
+    Also use only one store if stride <= 4.
+    All the fetches from and stores to temporaries can be removed now.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91461
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit c2a5d1dcb14acbd2db4a674453a8622d4b9a572a
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 22:01:25 2015 +0200
+
+    winsys/radeon: loosen up the requirements for how much memory IBs can use
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit cc59c78b0aa202f1a76a8708ec318e19a8502c9c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jul 31 17:26:08 2015 +0200
+
+    gallium/radeon: always use the llvm. prefix in intrinsic names
+    
+    Acked-by: Michel Dänzer <michel.daenzer@amd.com>
+    Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
+
+commit 567394112d904096abff1d994ab952f475dfb444
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jul 31 11:45:13 2015 +0200
+
+    radeon/winsys: increase the IB size for VM
+    
+    Luckily, there is a kernel query, so use the size from that.
+    It currently returns 256KB. It can be increased in the kernel.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit d587742650c262dea8007474b9956fd65472f8b2
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jun 27 14:19:41 2015 +0200
+
+    gallium/radeon: allow the winsys to choose the IB size
+    
+    Picked from the amdgpu branch.
+    
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 57245cce52d544c61f03fc966850f0f94e8118d5
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jul 31 02:39:02 2015 +0200
+
+    gallium/radeon: suspend timer queries between IBs
+    
+    When we are measuring the time spent in a draw call, an unexpected flush
+    can distort the result.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit b2eb13d602f71f19216284a584834cdaa2550eb3
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jul 30 16:14:03 2015 +0200
+
+    st/mesa: implement DrawTransformFeedbackStream
+    
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit 7d3939f0de7dcb5e68eca638d5832c683a124775
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jul 30 16:11:50 2015 +0200
+
+    mesa: save which transform feedback buffer is associated with which stream
+    
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit c63e8b1193fd380e999b8ef258a20e57884820f4
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jul 30 15:43:09 2015 +0200
+
+    vbo: pass the stream from DrawTransformFeedbackStream to drivers
+    
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit 115964052b25a958b2ad4ec42ae07133b2768cf9
+Author: Brian Paul <brianp@vmware.com>
+Date:   Mon Aug 3 15:06:42 2015 -0600
+
+    mesa: handle no-op cases sooner in _mesa_[Client]ActiveTexture()
+    
+    If the new texture unit is the current texture unit, we can return
+    before error checking.
+    
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit ee977183dcb543c919d0d70dde610cb191d5a3ea
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue Aug 4 19:07:19 2015 +0300
+
+    i965/fs: Lower arithmetic instructions with register regions of unsupported width.
+    
+    This extends the SIMD lowering pass to enforce the hardware limitation
+    that no directly-addressed source may read more than 2 physical GRFs.
+    One can easily go over this limit when doing 64-bit arithmetic
+    (e.g. FP64 or extended-precision integer MULs) or SIMD32, so it's nice
+    to be able to just emit an instruction of the intended execution size
+    from the visitor and let the lowering pass deal with this restriction
+    transparently.
+    
+    Some hardware arithmetic instructions are not handled here, including
+    all instructions that use the accumulator implicitly (which the SIMD
+    lowering pass deliberately doesn't handle), instructions with
+    non-per-channel sources (e.g. LINE or PLANE) and SEND-like
+    instructions, which need special handling most likely as virtual
+    opcodes.
+    
+    Reviewed-by: Connor Abbott <connor.w.abbott@intel.com>
+    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+
+commit 42a18ca76057621ae7d8812b29ea2245d6ff282d
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Aug 5 16:29:30 2015 +0300
+
+    i965/fs: Fix fs_inst::regs_read() for sources in the ATTR file.
+    
+    Otherwise it would crash on Gen8 with scalar VS.  The issue can easily
+    be reproduced with the following patch, but I don't see any reason why
+    it wouldn't be possible to end up with an ATTR argument here even
+    without it.
+    
+    CC: mesa-stable@lists.freedesktop.org
+    Reviewed-by: Connor Abbott <connor.w.abbott@intel.com>
+    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+
+commit e77a4a9b1f66de383043df95aada40fd5a004913
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue Aug 4 19:08:45 2015 +0300
+
+    i965/fs: Implement nir_op_imul/umul_high in terms of MULH.
+    
+    And get rid of another no16() call.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 3b48a0eeda20f5cf2dbc8de5e36f8fe3461f41bf
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Aug 6 14:04:00 2015 +0300
+
+    i965/fs: Lower the MULH virtual instruction.
+    
+    Translate MULH into the MUL/MACH sequence.  This does roughly the same
+    thing that nir_emit_alu() used to do but we can now handle 16-wide by
+    taking advantage of the SIMD lowering pass.  The force_sechalf
+    workaround near the bottom is required because the SIMD lowering pass
+    will emit instructions with non-zero quarter control and we need to
+    make sure we avoid that on integer arithmetic instructions with
+    implicit accumulator access due to a known hardware bug on IVB.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 2e731264382954beb1192cd7cc62e16e0b8e7978
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Aug 5 16:47:18 2015 +0300
+
+    i965/fs: Indent the implementation of 32x32-bit MUL lowering by one level.
+    
+    In order to make room for the code that will lower the MULH virtual
+    instruction.  Also move the hardware generation and execution type
+    checks into the same branch, they are going to have to be different
+    for MULH.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit f5b37fb1acad9cf044b7b6d4fa5f2582bd8bc7f4
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Aug 5 16:43:37 2015 +0300
+
+    i965/fs: Lower 32x32 bit multiplication on BXT.
+    
+    AFAIK BXT has the same annoying alignment limitation as CHV on the
+    source register regions of 32x32 bit MULs, give it the same treatment.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 8f5d0988ea2ccaba7f049f113b652f331524d2a6
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue Aug 4 19:04:55 2015 +0300
+
+    i965: Define virtual instruction to calculate the high 32 bits of a multiply.
+    
+    This instruction will translate to the MUL/MACH sequence that computes
+    the high 32-bits of the result of a 64-bit multiply.  Before Gen8
+    integer operations that used the accumulator were limited to 8-wide,
+    but the SIMD lowering pass can easily be hooked up to sidestep this
+    limitation, we just need a virtual opcode to represent the MUL/MACH
+    sequence in the IR.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit f7ac4ef4eeea737115d0b574fed7ecae46426072
+Author: Michel Dänzer <michel.daenzer@amd.com>
+Date:   Wed Aug 5 18:17:14 2015 +0900
+
+    glsl: Initialize patch member of glsl_struct_field
+    
+    There is apparently a subtle difference in C++ between
+    
+        F f;
+    
+    and
+    
+        F f();
+    
+    The former will use the default constructor.  If there is no default
+    constructor specified, the compiler provides one that simply invokes the
+    default constructor for each field.  For built-in basic types, the
+    default constructor does nothing.  The later will, according to
+    http://stackoverflow.com/questions/2417065/does-the-default-constructor-initialize-built-in-types)
+    perform value-initialization of the type.  For built-in types this means
+    initializing to zero.
+    
+    The per_vertex_accumulator constructor is:
+    
+        per_vertex_accumulator::per_vertex_accumulator()
+           : fields(),
+             num_fields(0)
+        {
+        }
+    
+    This is the second form of constructor, so the glsl_struct_field
+    objects were previously zero initialized.  With the addition of an empty
+    default constructor in commit 7ac946e5, per_vertex_accumulator::fields
+    receive no initialization.
+    
+    Fixes a bunch of random (mostly tessellation related) piglit failures
+    since commit 7ac946e5 ("glsl: Add constuctors for the common cases of
+    glsl_struct_field").
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91544
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 2c61d583f8c931fc9834dd852b1c960c95acefb5
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Wed Aug 5 20:27:24 2015 +1000
+
+    nir: add missing type to type_size_vec4()
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 03b7221dbb93e2439f30b2e0918f6215eb741979
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Wed Jul 29 16:01:27 2015 +0200
+
+    mesa: Add missing check of format and type in glTexSubImageXD on GLES 3.0
+    
+    Argument validation for glTexSubImageXD is missing a check of format and type
+    against texture object's internal format when profile is OpenGL-ES 3.0+.
+    
+    This patch also groups together all format and type checks on GLES into a
+    new function texture_format_error_check_gles(), to factorize similar
+    code in texture_format_error_check().
+    
+    Fixes 2 dEQP tests:
+    * dEQP-GLES3.functional.negative_api.texture.texsubimage2d
+    * dEQP-GLES3.functional.negative_api.texture.texsubimage3d
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 4b07e9a033ddb6733eba206b5bd47a2373756f7d
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Wed Jul 29 16:01:28 2015 +0200
+
+    mesa: Fix error returned by glCopyTexImage2D() upon an invalid internal format
+    
+    Page 161 of the OpenGL-ES 3.1 (PDF) spec, and page 207 of the OpenGL 4.5 (PDF),
+    both on section '8.6. ALTERNATE TEXTURE IMAGE SPECIFICATION COMMANDS', states:
+    
+        "An INVALID_ENUM error is generated if an invalid value is specified for
+         internalformat".
+    
+    It is currently returning INVALID_OPERATION error because
+    _mesa_get_read_renderbuffer_for_format() is called before the internalformat
+    argument has been validated. To fix this, we move this call down the validation
+    process, after _mesa_base_tex_format() has been called. _mesa_base_tex_format()
+    effectively serves as a validator for the internal format.
+    
+    Fixes 1 dEQP test:
+    * dEQP-GLES3.functional.negative_api.texture.copyteximage2d_invalid_format
+    
+    Fixes 1 piglit test:
+    * spec@oes_compressed_etc1_rgb8_texture@basic
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+
+commit 5d64cae8427b090c42d6d38da7fb474b3ddd4eb0
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Wed Jul 29 16:01:26 2015 +0200
+
+    mesa: Validate target before resolving tex obj in glTex(ture)SubImageXD
+    
+    Currently, glTexSubImageXD attempt to resolve the texture object
+    (by calling _mesa_get_current_tex_object()) before validating the given
+    target. However, that method explicitly states that target must have been
+    validated before calling it, so it never returns a user error.
+    
+    The target validation occurs later when texsubimage_error_check() is called.
+    
+    This patch reorganizes target validation, taking it out from the error check
+    function and into a point before the texture object is resolved.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+
+commit b38a50f1e3edae6079c91f73a8d9c63a2dbf512a
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Wed Jul 29 16:01:23 2015 +0200
+
+    mesa: Fix errors values returned by glShaderBinary()
+    
+    Page 68, section 7.2 'Shader Binaries" of the of the OpenGL ES 3.1,
+    and page 88 of the OpenGL 4.5 specs state:
+    
+        "An INVALID_VALUE error is generated if count or length is negative.
+         An INVALID_ENUM error is generated if binaryformat is not a supported
+         format returned in SHADER_BINARY_FORMATS."
+    
+    Currently, an INVALID_OPERATION error is returned for all cases.
+    
+    Fixes 1 dEQP test:
+    * dEQP-GLES3.functional.negative_api.shader.shader_binary
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+
+commit 784bea5a38c219a5ab587ff1ddce8879d4f7dce1
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Tue Aug 4 11:09:35 2015 +0300
+
+    mesa: do not modify args when errors with GetProgramResourceName
+    
+    Original purpose of these lines was to be more friendly against
+    GUI tools using the extension. However conformance suite explicitly
+    checks that buffers are not modified in error conditions.
+    
+    Fixes:
+       ES31-CTS.program_interface_query.buff-length
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit 18c5cdb9433b472d9aad13175295a848bce03185
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Mon Aug 3 08:48:32 2015 +0300
+
+    glsl: add variable mode check to build_stageref
+    
+    Currently stage reference mask is built using the variable name
+    only. However it can happen that input of one stage has same name
+    as output from another stage. Adding check of variable mode makes
+    sure we do not pick wrong variable.
+    
+    Fixes some subcases from
+       ES31-CTS.program_interface_query.no-locations
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit 7d88413ade2c41054f79b20338253aacf1ac341d
+Author: Frank Binns <frank.binns@imgtec.com>
+Date:   Fri Jul 31 09:11:47 2015 +0100
+
+    dri: set the __DRI_API_OPENGL bit based on max gl compat version
+    
+    This matches similar behaviour for the __DRI_API_OPENGL_CORE bit.
+    
+    Signed-off-by: Frank Binns <frank.binns@imgtec.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit b2c5986ea1c8e66c4e0a05bcacbcf28c27f5b183
+Author: Frank Binns <frank.binns@imgtec.com>
+Date:   Fri Jul 31 09:11:45 2015 +0100
+
+    egl: Add eglQuerySurface surface type check for EGL_LARGEST_PBUFFER attrib
+    
+    Calling eglQuerySurface on a window or pixmap with the EGL_LARGEST_PBUFFER
+    attribute resulted in the contents of the 'value' parameter being modified.
+    This is the wrong behaviour according to the EGL spec, which states:
+    
+        "Querying EGL_LARGEST_PBUFFER for a pbuffer surface returns the
+         same attribute value specified when the surface was created with
+         eglCreatePbufferSurface. For a window or pixmap surface, the
+         contents of value are not modified."
+    
+    Avoid this from happening by checking that the surface type is EGL_PBUFFER_BIT
+    before modifying the contents of the parameter.
+    
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Frank Binns <frank.binns@imgtec.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit cfc3200a35647026a0b5cf188f378ce33802044b
+Author: Frank Binns <frank.binns@imgtec.com>
+Date:   Fri Jul 31 09:11:46 2015 +0100
+
+    egl/dri: Add error info needed for EGL_EXT_image_dma_buf_import extension
+    
+    Update the DRI image interface error codes to reflect the needs of the
+    EGL_EXT_image_dma_buf_import extension. This means updating the existing error
+    code documentation and adding a new __DRI_IMAGE_ERROR_BAD_ACCESS error code
+    so that drivers can correctly reject unsupported pitches and offsets. Hook
+    the new error code up in EGL to return EGL_BAD_ACCESS.
+    
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Frank Binns <frank.binns@imgtec.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit ee47d13abbc6770b4e6513c894ede56b1e846785
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Aug 3 17:36:47 2015 -0700
+
+    vc4: Use nir_lower_load_const_to_scalar().
+
+commit 6c28ee20410afe97dd441b0c9c680b26eb4072fc
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Aug 3 17:20:33 2015 -0700
+
+    nir: Add a nir_lower_load_const_to_scalar() pass.
+    
+    This is useful to increase the CSE opportunities for a scalar backend.  It
+    avoids regressions when dropping vc4's custom CSE implementation.
+    
+    v2: Cleanups by Matt (decl in the for loop, and unreachable()).
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 45248d3640f5a0356085e26c44548bf3af5dec0f
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Aug 3 19:08:37 2015 -0700
+
+    vc4: Don't bother de-SSAing values that aren't part of phi webs.
+    
+    We can just support them the same way we do load_const's SSA values.
+
+commit a70f63ab20d8bf922a307a92020237b1dec36314
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 31 09:12:48 2015 -0700
+
+    nir: Add algebraic opt for no-op iand.
+    
+    I lazily generated some of these in VC4 NIR lowering.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 63eac5de8fd0e091d07f866a42584c057ca4bfa9
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 31 17:08:46 2015 -0700
+
+    vc4: Don't bother saturating the dst color for blending.
+    
+    Since we just pulled it out of the destination as 8-bit unorm, we know
+    it's in [0, 1] already.
+    
+    shader-db:
+    total instructions in shared programs: 100040 -> 98208 (-1.83%)
+    instructions in affected programs:     14084 -> 12252 (-13.01%)
+
+commit cc8fb2904673588d31b660dbfaf692615b5202dd
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 31 11:46:56 2015 -0700
+
+    vc4: Make r4-writes implicitly move to a temp, and allocate temps to r4.
+    
+    Previously, SFU values always moved to a temporary, and TLB color reads
+    and texture reads always lived in r4.  Instead, we can have these results
+    just be normal temporaries, and the register allocator can leave the
+    values in r4 when they don't interfere with anything else using r4.
+    
+    shader-db results:
+    total instructions in shared programs: 100809 -> 100040 (-0.76%)
+    instructions in affected programs:     42383 -> 41614 (-1.81%)
+
+commit 9b403c0756ecf806a8ff768bd73a4cbf42986bdb
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 31 10:02:45 2015 -0700
+
+    vc4: Drop a dead prototype.
+
+commit eae9c3286e2990879c6a01df3c9042b1e4031d5c
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Aug 4 17:18:43 2015 -0700
+
+    Revert "nir: Use a single bit for the dual-source blend index"
+    
+    This reverts commit ab5b7a0fe659ff6f9c1885d5cb047b6531959506.  We use more
+    than one bit of value in tgsi_to_nir.
+
+commit d6d7515bec2e7421dcbc17f31f94613643599e33
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat Aug 1 16:17:49 2015 -0400
+
+    freedreno/a4xx: add independent blend function support
+    
+    needed for MRT
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 054526e49abb5e7fd49fed6f589cff6f1ab4c9f6
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jul 31 15:32:58 2015 -0400
+
+    freedreno/a4xx: MRT support
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit b37a97c97d6477d5062a75a0313162ed324a36ed
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jul 31 14:34:19 2015 -0400
+
+    freedreno: move the half-precision logic into core
+    
+    Both a3xx and a4xx need the same logic to decide if half-precision can
+    be used for blit shaders.  So move it to core and simplify things a bit
+    with a helper that considers all render targets.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 5ca032a9a8ece0a8a43151f988215484da3c1811
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jul 31 12:07:24 2015 -0400
+
+    freedreno: simplify/cleanup resource status tracking
+    
+    Collapse dirty/reading bools into status bitmask (and drop writing which
+    should really be the same as dirty).  And use 'used_resources' list for
+    all tracking, including zsbuf/cbufs, rather than special casing the
+    color and depth/stencil buffers.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit c7deea51d2b611564c91e146fbd1ed0b547f65c0
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jul 31 10:54:23 2015 -0400
+
+    freedreno: fix stream-out caps vec4->components
+    
+    Should be in units of components, not vec4's
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit a221f8d9ebb4ef43a83ef638458d1338dfe1e517
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jul 31 09:36:31 2015 -0400
+
+    freedreno: small bit of cleanup about max rendertargets
+    
+    We hard-coded 4 or 8 as the max in various places.  Switch it all to a
+    define since the limit will go up with a4xx (and maybe even again in the
+    future?)
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 5f247a9656cb8a0eccdc98ef5911ed15c1248dfb
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 12 18:05:58 2015 -0700
+
+    glx: Use _mesa_lroundevenf() in glPixelStoref().
+    
+    Functional change in which way half-way cases are rounded from towards
+    positive-infinity to even. The spec says "the passed value is rounded to
+    the nearest integer". Removes another case of bad half-up rounding.
+
+commit 680de24545d23d0c2b699020267ca484f81a04a9
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Jun 29 09:38:34 2015 -0700
+
+    util: Use SSE intrinsics in _mesa_lroundeven{f,}.
+    
+    gcc actually generates this for us now that we use -fno-math-errno
+    (which is weird, since lrintf()/lrint() don't set errno) but clang still
+    does not. Presumably helps MSVC as well.
+    
+    Reduced .text size by 8.5k with gcc before -fno-math-errno.
+    
+       text     data      bss      dec      hex  filename
+    4935850   195136    26192  5157178   4eb13a  i965_dri.so before
+    4927225   195128    26192  5148545   4e8f81  i965_dri.so after
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit 3c050222b0d5b47c885ca72f3c7af22c0d28b5ad
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Jun 25 21:43:30 2015 -0700
+
+    mesa: Use _mesa_lroundevenf() in some more places.
+
+commit 996349cb190154ebdc8cc9f23e5f8f9aabbd6b4d
+Author: Vinson Lee <vlee@freedesktop.org>
+Date:   Wed Jul 29 20:32:41 2015 -0700
+
+    vl/mpeg12: Silence GCC unused-variable warning.
+    
+    vl/vl_mpeg12_bitstream.c: In function 'decode_slice':
+    vl/vl_mpeg12_bitstream.c:928:19: warning: unused variable 'extra' [-Wunused-variable]
+              unsigned extra = vl_vlc_get_uimsbf(&bs->vlc, 1);
+                       ^
+    
+    Signed-off-by: Vinson Lee <vlee@freedesktop.org>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit e23cbaadaac0c67a72b10e3dd14b75abc19ab3c5
+Author: Alejandro Seguí <alesegdia@gmail.com>
+Date:   Mon Aug 3 02:15:20 2015 +0200
+
+    glsl: replace old hash table with new and faster one
+    
+    The util/hash_table was intended to be a fast hash table
+    replacement for the program/hash_table see 35fd61bd99c1 and
+    72e55bb6888ff.
+    
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit 7ac946e546bba440f87ce95ef022745201744f9c
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Thu Jul 30 06:00:56 2015 -0700
+
+    glsl: Add constuctors for the common cases of glsl_struct_field
+    
+    Fixes a giant pile of GCC warnings:
+    
+    builtin_types.cpp:60:1: warning: missing initializer for member 'glsl_struct_field::stream' [-Wmissing-field-initializers]
+    
+    I had to add a default constructor because a non-default constructor
+    was added.  Otherwise the only constructor would be the one with
+    parameters, and all the plases like
+    
+        glsl_struct_field foo;
+    
+    would fail to compile.
+    
+    I wanted to do this in two patches.  All of the initializers of
+    glsl_struct_field structures had to be converted to use the
+    constructor because C++ apparently forces you to do one or the other:
+    
+    builtin_types.cpp:61:1: error: could not convert '{glsl_type::float_type, "near", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0, -1}' from '<brace-enclosed initializer list>' to 'glsl_struct_field'
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+    Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+
+commit 93977d3a151675946c03ec28102c651691cdb0bd
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Thu Jun 25 10:45:34 2015 -0700
+
+    i965: Make gen7_upload_ps_state static
+    
+    It is only ever called from within the same file.
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+
+commit 7a12e646d3874f4ff755e05cfb27560d11d075a7
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Thu Jun 25 08:43:13 2015 -0700
+
+    i965: Remove extern declaration for nonexistent state atom
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+
+commit d302f51a1ee949fae5dc53f3c872c2712021caf7
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Thu Jun 25 08:20:01 2015 -0700
+
+    i965: Trivial formatting changes in gen7_vs_state.c
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+
+commit f917a65b3eeaf0e201bd7e695a5d13403e7ad487
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Thu Jun 25 08:20:01 2015 -0700
+
+    i965: Trivial formatting changes in gen6_multisample_state.c
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+
+commit 07433760e3311ff17c0f909514ececdae9f6e9c6
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Thu Jun 25 08:20:01 2015 -0700
+
+    i965: Trivial formatting changes in brw_misc_state.c
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+
+commit 680d09b072af7ea1541cfd4fbc62c83e8bd02d0d
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Thu Jun 25 08:20:01 2015 -0700
+
+    i965: Trivial formatting changes in brw_draw_upload.c
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+
+commit 5b6218395c303ff82a19294c05c63c7b92d24e3f
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Thu Jun 18 18:45:44 2015 -0700
+
+    i965: Trivial formatting changes in brw_draw.c
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+
+commit 2b81cefb3fec3c5c17e7ef9f95c9681abfad5386
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Wed Jun 10 17:09:16 2015 -0700
+
+    i965: Trivial formatting changes in brw_wm.c
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+
+commit da1b1bf85cdc691ec27f379de84dec495cdd51e0
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Wed Jul 15 09:32:17 2015 +0200
+
+    i965/nir: Do not scalarize phis in non-scalar setups
+    
+    Significantly reduces register pressure in some piglit tests.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 34d162260f513a7eaec12611e3859bb34230cf33
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jul 8 16:08:17 2015 +0200
+
+    i965/vec4: Handle uniform and GRF array access on vertex programs (NIR)
+    
+    When the NIR-vec4 pass is enabled, handles uniform and GRF array access
+    on ARB_vertex_program like it is done on vertex shaders.
+    
+    When the old IR-vec4 pass is used, emit_program_code() emits pull constant
+    loads directly instead of using relative addressing, hence to call to
+    move_uniform_array_access_to_pull_constants() is not needed and it is enough
+    to call to split_uniform_registers().
+    
+    The patch also calls to move_grf_array_access_to_scratch() like it is
+    done for shaders, however I suspect this is a no-op for vertex programs and
+    we could remove it.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 82f2e706bfd646b91bc0b8beecdff4e54b1f7b04
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Mon Jun 29 14:21:38 2015 +0200
+
+    i965/nir/vec4: Handle uniforms on vertex programs
+    
+    The implementation takes into account that on ARB_vertex_program
+    only a single nir variable is generated to support all the uniform data.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 90825e3ca977057c8f3d6ad2d1aa38277cc3ff11
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jul 8 12:44:15 2015 +0200
+
+    i965/vec4: Enable NIR-vec4 pass on ARB_vertex_programs
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 287b006a673dabe3e21cc207a1b4622ef91a877e
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Wed Jul 1 10:12:10 2015 +0200
+
+    i965/nir/gs: Implement support for gl_InvocationID system value
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 7eced3aa863394c6e74ac3f037ed1cf9c481fe37
+Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+Date:   Mon Jul 13 15:51:17 2015 +0200
+
+    i965/gs/gen6: Refactor ir_emit_vertex and ir_end_primitive for gen6
+    
+    So the implementation is independent of GLSL IR and the visit methods of the
+    gen6 GS visitor. This way we will be able to reuse that implementation directly
+    from the NIR vec4 backend.
+    
+    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 1836201fde1826c82f579fb132455c8df4176ecd
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Mon Jun 29 14:08:11 2015 +0200
+
+    i965/nir/gs: Implement EmitVertex and EndPrimitive
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 551af29d2d8be33b66641fe47ee5156489c16132
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Mon Jun 29 13:52:30 2015 +0200
+
+    i965/nir/gs: Handle geometry shaders inputs
+    
+    Outputs from the vertex shader become array inputs in the geomtry shader,
+    but the arrays are interleaved, so we need to map our inputs accordingly.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 7ade42755f8900aaf67073214c073419f734e7a8
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Mon Jun 29 13:37:31 2015 +0200
+
+    i965/gs: Refactor ir_emit_vertex and ir_end_primitive
+    
+    So the implementation is independent of GLSL IR and the visit methods of the
+    vec4 visitor. This way we will be able to reuse that implementation directly
+    from the NIR vec4 backend.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 38fc4a91cd5c04fdd5921b8776f8e203513ab517
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Wed Jul 1 09:51:25 2015 +0200
+
+    i965/nir: Enable NIR-vec4 pass on geometry shaders
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 418c004f802e63ca4e9f3456a46498d2fc543854
+Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+Date:   Thu Jun 11 12:32:26 2015 +0200
+
+    nir: Fix output swizzle in get_mul_for_src
+    
+    Avoid copying an overwritten swizzle, use the original values.
+    
+    Example:
+    
+       Former swizzle[] = xyzw
+       src->swizzle[] = zyxx
+    
+    The expected output swizzle = zyxx but if we reuse swizzle in the loop,
+    then output swizzle would be zyzz.
+    
+    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 19cf934f7f18237e1a212b0a019026d5d36c6fac
+Author: Alejandro Piñeiro <apinheiro@igalia.com>
+Date:   Mon Jul 6 15:08:15 2015 +0200
+
+    i965/nir/vec4: Add implementation of nir_emit_texture()
+    
+    Uses the nir structure to get all the info needed (sources,
+    dest reg, etc), and then it uses the common
+    vec4_visitor::emit_texture to emit the final code.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 1343f403b2d08a0877f17133abb6dccf0f51127b
+Author: Alejandro Piñeiro <apinheiro@igalia.com>
+Date:   Mon Jul 6 14:33:21 2015 +0200
+
+    i965/ir/vec4: Refactor visit(ir_texture *ir)
+    
+    Splitted in two. The emission is moved to a new vec4_visitor
+    method, vec4_visitor::emit_texture, ir order to be reused
+    on the nir path.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 0d43d27df742ad95a086580bae2ee08a0bc00e69
+Author: Alejandro Piñeiro <apinheiro@igalia.com>
+Date:   Sat May 23 23:42:58 2015 +0200
+
+    i965/vec4: Add a new dst_reg constructor accepting a brw_reg_type
+    
+    This is useful for the upcoming texture support in NIR->vec4 pass,
+    as we found several cases where the brw_type is available, but not
+    the glsl_type.
+    
+    Without this new constructor, the alternative would be:
+    dst_reg reg(MRF, <reg>)
+    reg.type = <brw_type>
+    reg.writemask = <mask>
+    
+    Adding a new constructor makes code easier to read.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit c15eea2afa7a295992cde949b8e2a5d4552f6290
+Author: Alejandro Piñeiro <apinheiro@igalia.com>
+Date:   Mon Jul 6 13:31:05 2015 +0200
+
+    i965/vec4: Change vec4_visitor::swizzle_result() method to allow reuse
+    
+    This patch changes the signature of swizzle_result() to accept lower
+    level arguments. The purpose is to reuse it in the upcoming NIR->vec4
+    pass.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 57182332b84b58fed6641314def67450893b7419
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Thu Jun 18 12:12:21 2015 +0200
+
+    i965/vec4: Change vec4_visitor::gather_channel() method to allow reuse
+    
+    This patch changes the signature of gather_channel() to accept the gather
+    component directly instead of fetching it internally from ir_texture.
+    This will allow reuse in the upcoming NIR->vec4 pass.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 72c8d7721feb966cf8530a3ee2642f0b842dc0f8
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Thu Jun 18 11:31:54 2015 +0200
+
+    i965/vec4: Change vec4_visitor::emit_mcs_fetch() method to allow reuse
+    
+    This patch changes the signature of emit_mcs_fetch() to accept lower level
+    arguments. The purpose is to reuse it in the upcoming NIR->vec4 pass.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 434481f3155040217c3e5a8da98dab4248435f0e
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Thu Jun 18 09:37:33 2015 +0200
+
+    i965/vec4: Move is_high_sample() method to vec4_visitor class
+    
+    The is_high_sample() method is currently accessible only in the implementation of
+    vec4_visitor. Since we need to reuse it in the upcoming NIR->vec4 pass, lets make
+    it a method of the class instead.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit db8a6de571bb72ef43209a415e5492001a87b1d8
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Wed Jun 17 10:59:10 2015 +0200
+
+    i965/nir: Add new utility method brw_glsl_base_type_for_nir_type()
+    
+    This method returns the glsl_base_type corresponding to a nir_alu_type.
+    It will factorize code currently present in fs_nir, that can be reused
+    in vec4_nir on its upcoming emit_texture support.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 583c1c61703826002ba0f202e8ef7bc2c822ef1d
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Wed Jun 17 10:20:19 2015 +0200
+
+    i965/nir/vec4: Implement nir_emit_jump
+    
+    This implementation is taken as-is from fs_nir.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 9b4a6fa4c09d36e0e5c00309e6ea37300ea38f78
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 10:10:44 2015 +0200
+
+    i965/nir/vec4: Mark as unreachable ops that should be already lowered
+    
+    NIR ALU operations:
+       * nir_op_fabs
+       * nir_op_iabs
+       * nir_op_fneg
+       * nir_op_ineg
+       * nir_op_fsat
+            should be lowered by lower_source mods
+    
+       * nir_op_fdiv
+            should be lowered in the compiler by DIV_TO_MUL_RCP.
+    
+       * nir_op_fmod
+            should be lowered in the compiler by MOD_TO_FLOOR.
+    
+       * nir_op_fsub
+       * nir_op_isub
+            should be handled by ir_sub_to_add_neg.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 16072834babc487f78472f7e7b59d35249a3aac8
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 10:08:27 2015 +0200
+
+    i965/nir/vec4: Implement vector "any" operation
+    
+    Adds NIR ALU operations:
+       * nir_op_bany2
+       * nir_op_bany3
+       * nir_op_bany4
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit fa4e3c3c9f6f3a72a032499fccaa6e222d6a7fa4
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 10:06:44 2015 +0200
+
+    i965/nir/vec4: Implement the dot product operation
+    
+    Adds NIR ALU operations:
+       * nir_op_fdot2
+       * nir_op_fdot3
+       * nir_op_fdot4
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 96106e2a9f214d98fc2e99c65398f95d41a3b879
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 10:05:29 2015 +0200
+
+    i965/nir/vec4: Implement conditional select
+    
+    Adds NIR ALU operations:
+       * nir_op_bcsel
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit b38fcd0aea8d17919ecd9cc7afc518cfb2c01c27
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 09:52:43 2015 +0200
+
+    i965/nir/vec4: Implement linear interpolation
+    
+    Adds NIR ALU operation:
+       * nir_op_flrp
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 314474872b77f291132a01f7c1df2788586fc943
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 10:01:07 2015 +0200
+
+    i965/vec4: Return the emitted instruction in emit_lrp()
+    
+    Needed in the NIR backend to set the "saturate" value of the
+    instruction.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit b64bd1fdc37eed1bb62d2b32ad22f0f77501f7f2
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 09:51:10 2015 +0200
+
+    i965/nir/vec4: Implement floating-point fused multiply-add
+    
+    Adds NIR ALU operation:
+       * nir_op_ffma
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit d12e165dbb403c3cf86ab7f1b8f28ab6188b479f
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 09:49:31 2015 +0200
+
+    i965/nir/vec4: Implement "shift" operations
+    
+    Adds NIR ALU operations:
+       * nir_op_ishl
+       * nir_op_ishr
+       * nir_op_ushr
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 798cb33a256f703ecaf56d4443e12055484d4bcc
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 09:47:41 2015 +0200
+
+    i965/nir/vec4: Implement the "sign" operation
+    
+    Follows the vec4_visitor IR implementation but
+    sets the saturate value in addition.
+    
+    Adds NIR ALU operations:
+       * nir_op_fsign
+       * nir_op_isign
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 8e1e6facbf828258a9a8ca09da846d1baa21d984
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 09:44:25 2015 +0200
+
+    i965/nir/vec4: Implement bit operations
+    
+    Same implementation than the IR case.
+    
+    Adds NIR ALU operations:
+       * nir_op_bitfield_reverse
+       * nir_op_bit_count
+       * nir_op_ufind_msb
+       * nir_op_ifind_msb
+       * nir_op_find_lsb
+       * nir_op_ubitfield_extract
+       * nir_op_ibitfield_extract
+       * nir_op_bfm
+       * nir_op_bfi
+       * nir_op_bitfield_insert
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 0e874985ce50d902535e1eb766bd252c921b5d8f
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 09:29:04 2015 +0200
+
+    i965/nir/vec4: Implement pack/unpack operations
+    
+    * Lowered floating-point pack and unpack operations are not valid in VS.
+    
+    * Pack and unpack 2x16 operations should be handled by lower_packing_builtins.
+    
+    * Adds NIR ALU operations:
+       * nir_op_pack_half_2x16
+       * nir_op_unpack_half_2x16
+       * nir_op_unpack_unorm_4x8
+       * nir_op_unpack_snorm_4x8
+       * nir_op_pack_unorm_4x8
+       * nir_op_pack_snorm_4x8
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 3f10c2f3d73ae41ff83afcdbe225121b8336f499
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 09:23:10 2015 +0200
+
+    i965/nir/vec4: "noise" ops should already be lowered
+    
+    Marked them as unreachable.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit fa4731f4a53aa21e53a62f42f3afdc19b0ce4c8e
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 09:21:30 2015 +0200
+
+    i965/nir/vec4: Implement "bool<->int,float" format conversion
+    
+    Used the same implementation than the vec4_visitor NIR.
+    
+    Adds NIR ALU operations:
+       * nir_op_b2i
+       * nir_op_b2f
+       * nir_op_f2b
+       * nir_op_i2b
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit f14199a8fb802f6672d559fa958a5ee84e3e13f1
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 09:07:20 2015 +0200
+
+    i965/nir/vec4: Implement logical operators
+    
+    Adds NIR ALU operations:
+       * nir_op_inot
+       * nir_op_ixor
+       * nir_op_ior
+       * nir_op_iand
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 51aeafaf96b3b349e007ad05738bc1e05663fedf
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 09:01:28 2015 +0200
+
+    i965/nir/vec4: Implement non-equality ops on vectors
+    
+    Adds NIR ALU operations:
+       * nir_op_bany_fnequal2
+       * nir_op_bany_inequal2
+       * nir_op_bany_fnequal3
+       * nir_op_bany_inequal3
+       * nir_op_bany_fnequal4
+       * nir_op_bany_inequal4
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 8be4b876c90192c3a5e6fcc9b526f43a3f7bfc11
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 00:55:24 2015 +0200
+
+    i965/nir/vec4: Implement equality ops on vectors
+    
+    Adds NIR ALU operations:
+       * nir_op_ball_fequal2
+       * nir_op_ball_iequal2
+       * nir_op_ball_fequal3
+       * nir_op_ball_iequal3
+       * nir_op_ball_fequal4
+       * nir_op_ball_iequal4
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 84d4a9dc2ca3d98f19cc9125a5ff1ac1225f360d
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 00:49:42 2015 +0200
+
+    i965/nir/vec4: Implement non-vector comparison ops
+    
+    Adds NIR ALU operations:
+       * nir_op_flt
+       * nir_op_ilt
+       * nir_op_ult
+       * nir_op_fge
+       * nir_op_ige
+       * nir_op_uge
+       * nir_op_feq
+       * nir_op_ieq
+       * nir_op_fne
+       * nir_op_ine
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit b9c41affcf67f30d7f6c74c17ea34bc42756d56d
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Fri Apr 17 17:58:35 2015 +0200
+
+    i965/nir: Add utility method for comparisons
+    
+    This method returns the brw_conditional_mod value used when emitting
+    comparative ALU operations.
+    
+    It could be moved to brw_nir in the future to reuse it in fs_nir backend.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit dae6025e8efdfb759458a3243c8cd1588f485135
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Tue Apr 14 12:04:24 2015 +0200
+
+    i965/nir/vec4: Derivatives are not allowed in VS
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 5e6f1c38a591fa39cff1c32a2cfdda927145756a
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 00:34:57 2015 +0200
+
+    i965/nir/vec4: Implement min/max operations
+    
+    Adds NIR ALU operations:
+       * nir_op_fmin
+       * nir_op_imin
+       * nir_op_umin
+       * nir_op_fmax
+       * nir_op_imax
+       * nir_op_umax
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit d53098393e3929b0c8d82f56144c7497b184f5b7
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 00:32:58 2015 +0200
+
+    i965/vec4: Return the emitted instruction in emit_minmax()
+    
+    Needed in the NIR backend to set the "saturate" value of the
+    instruction.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 7553a51a68c0b2030265fe741f9c511b65047914
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 00:25:02 2015 +0200
+
+    i965/nir/vec4: Implement various rounding functions
+    
+    Adds NIR ALU operations:
+       * nir_op_ftrunc
+       * nir_op_fceil
+       * nir_op_ffloor
+       * nir_op_ffrac
+       * nir_op_fround_even
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 0ce159ec7fbcdf00c488b77f63e565e89ef6cab5
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 00:22:14 2015 +0200
+
+    i965/nir/vec4: Implement carry/borrow for addition/subtraction
+    
+    Adds NIR ALU operations:
+       * nir_op_uadd_carry
+       * nir_op_usub_borrow
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 62cef7b0723ad6ca49ed06a6899a5852e41359e8
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 00:10:18 2015 +0200
+
+    i965/nir/vec4: Implement more math operations
+    
+    Adds NIR ALU operations:
+       * nir_op_frcp
+       * nir_op_fexp2
+       * nir_op_flog2
+       * nir_op_fexp
+       * nir_op_flog
+       * nir_op_fsin
+       * nir_op_fcos
+       * nir_op_idiv
+       * nir_op_udiv
+       * nir_op_umod
+       * nir_op_ldexp
+       * nir_op_fsqrt
+       * nir_op_frsq
+       * nir_op_fpow
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 068a41b349e8bc30293c44d96553184f7562949f
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Wed Jun 17 00:04:09 2015 +0200
+
+    i965/vec4: Return the last emitted instruction in emit_math()
+    
+    Needed in the NIR backend to set the "saturate" value of the
+    instruction.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 9acebf146184c35e6897b91fff414c5295d47996
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Tue Jun 16 23:50:46 2015 +0200
+
+    i965/nir/vec4: Implement multiplication
+    
+    Implementation based on the vec4_visitor IR implementation
+    for the operations ir_binop_mul and ir_binop_imul_high.
+    
+    Adds NIR ALU operations:
+       * nir_op_fmul
+       * nir_op_imul
+       * nir_op_imul_high
+       * nir_op_umul_high
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 0675842b56a956befbac4a3b912823e73a95a500
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Tue Jun 16 23:48:46 2015 +0200
+
+    i965/nir/vec4: Implement the addition operation
+    
+    Adds NIR ALU operations:
+       * nir_op_fadd
+       * nir_op_iadd
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 4f39b547da4f9949d1b1f9f0df07d08951f0358d
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Tue Jun 16 23:04:32 2015 +0200
+
+    i965/nir/vec4: Implement int<->float format conversion ops
+    
+    Adds NIR ALU operations:
+       * nir_op_f2i
+       * nir_op_f2u
+       * nir_op_i2f
+       * nir_op_u2f
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit e4f02f47e70d384531ac68e6d33a62fdcdbd1f28
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Tue Jun 16 22:58:15 2015 +0200
+
+    i965/nir/vec4: Lower "vecN" instructions and mark them unreachable
+    
+    This enables NIR pass "lower_vec_to_movs" on shaders that work on vec4.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 79154d99d6e760b1daf327b4594dded18f1d4191
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Tue Jun 16 22:52:29 2015 +0200
+
+    i965/nir/vec4: Implement single-element "mov" operations
+    
+    Adds NIR ALU operations:
+       * nir_op_imov
+       * nir_op_fmov
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 9e5d827f455f3c72af6cb8d60b97890bab8d5ad0
+Author: Alejandro Piñeiro <apinheiro@igalia.com>
+Date:   Thu Jun 25 09:52:35 2015 +0200
+
+    i965/nir: Disable alu_to_scalar pass on non-scalar shaders
+    
+    Disables nir_lower_alu_to_scalar when the shader stage being processed work
+    on vec4 vectors, like the upcoming NIR->vec4 backend.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit ef1b30ae637e613b384541324c199d2dbe6b44bd
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Tue Jun 16 22:30:16 2015 +0200
+
+    i965/nir/vec4: Prepare source and destination registers for ALU operations
+    
+    This patch resolves and initializes the destination and the source
+    registers that are common to most ALU operations.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 168bbfa6ff22a586ad6307c187cfa3b8fff5f227
+Author: Antia Puentes <apuentes@igalia.com>
+Date:   Tue Jun 16 22:10:32 2015 +0200
+
+    i965/nir/vec4: Implement loading values from an UBO
+    
+    Based on the vec4_visitor IR implementation for the ir_binop_load_ubo
+    operation. Notice that unlike the vec4_visitor IR, adding the !=0
+    comparison for UBO bools is not needed here because that comparison is
+    already added by the nir_visitor when processing the ir_binop_load_ubo
+    (in UBOs "true" is any value different from zero, but for us is ~0).
+    
+    Adds NIR instrinsics:
+    
+       * nir_intrinsic_load_ubo_indirect
+       * nir_intrinsic_load_ubo
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 98d07022f5312967bdfd54069869c8d6c65117a7
+Author: Alejandro Piñeiro <apinheiro@igalia.com>
+Date:   Tue Jun 16 22:03:17 2015 +0200
+
+    i965/nir/vec4: Implement atomic counter intrinsics (read, inc and dec)
+    
+    The implementation is based on its fs_nir counterpart.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit e6cafb5dfdef8d8d25ee1e3375304cf35897d1f7
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Tue Jun 16 21:55:14 2015 +0200
+
+    i965/nir/vec4: Implement load_uniform intrinsic
+    
+    For the indirect case we need to take the index delivered by
+    NIR and compute the parent uniform that we are accessing (the one
+    that we uploaded to a surface) and the constant offset into that
+    surface.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit e76e8caecd30799500357a45468329f033a93932
+Author: Alejandro Piñeiro <apinheiro@igalia.com>
+Date:   Tue Jun 16 21:36:49 2015 +0200
+
+    i965/nir/vec4: Implement intrinsics that load system values
+    
+    These include:
+    
+    nir_intrinsic_load_vertex_id_zero_base
+    nir_intrinsic_load_base_vertex
+    nir_intrinsic_load_instance_id
+    
+    The source register is fetched from the nir_system_values map initialized
+    during nir_setup_system_values stage.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 662c4c99065381b8e265310d176cfdef6698ca57
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Tue Jun 16 21:31:49 2015 +0200
+
+    i965/nir/vec4: Implement store_output intrinsic
+    
+    This implementation is based on the current URB setup in vec4_visitor, which
+    requires the output register to be stored in the output_reg array at variable's
+    original shader location index. But since nir_lower_io() pass uses the value
+    in var->data.driver_location, we need to put there var->data.location instead,
+    prior to calling nir_lower_io(), so that we end up with the correct index
+    in const_index[0].
+    
+    The driver_location is not used at all, so this patch also disables the
+    nir_assign_var_locations pass on non-scalar shaders.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 11ed02e1c81a2aa71b22b1d6847f58e41fd89271
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Tue Jul 21 20:21:21 2015 +0200
+
+    i965/vec4: Make sure that register types always match during emit_urb_slot()
+    
+    Instead of relying on backends (currently vec4_visitor and soon NIR-vec4) to
+    store registers in output_reg with the correct type, this patch makes sure
+    that the common code in emit_urb_slot() always emit MOVs from output registers
+    using the same type on source and destination.
+    
+    Since the actual type is not important, only that they match, we default to
+    float.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 167cb9663adc8c7c61807e503f66e85f955e7d5f
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Tue Jun 16 21:24:21 2015 +0200
+
+    i965/nir/vec4: Implement load_input intrinsic
+    
+    The source register is fetched from the nir_inputs map built during
+    nir_setup_inputs stage.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit afe085a0ca01f659c69456018e5f5076c9dde47d
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Tue Jun 16 20:25:55 2015 +0200
+
+    i965/nir/vec4: Implement loop statements (nir_cf_node_loop)
+    
+    This is taken as-is from fs_nir.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 5c0436dbf87fef76ba67456f215d9285c38f1816
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Tue Jun 16 20:16:15 2015 +0200
+
+    i965/nir/vec4: Implement conditional statements (nir_cf_node_if)
+    
+    The same we do in the FS NIR backend, only that here we need to consider
+    the number of components in the condition and adjust the swizzle
+    accordingly.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit f3187ea31ede6bc181ee561573d127aa2e485657
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Tue Jun 16 17:43:02 2015 +0200
+
+    i965/nir/vec4: Add get_nir_dst() and get_nir_src() methods
+    
+    These methods are essential for the implementation of the NIR->vec4 pass. They
+    work similar to their fs_nir counter-parts.
+    
+    When processing instructions, these methods are invoked to resolve the
+    brw registers (source or destination) corresponding to the NIR sources
+    or destination. It uses the map of NIR register index to brw register for
+    all registers locally allocated in a block.
+    
+    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 97e205fd35bf77fd761caf24c611ff72cc0d85e2
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Fri Apr 17 18:10:50 2015 +0200
+
+    i965/nir: Move brw_type_for_nir_type() to brw_nir to allow reuse
+    
+    Upcoming NIR->vec4 pass can benefit from this method, so lets move it up.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit f7152525374015594e037fa11bb64e1c7174829b
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Wed Jul 1 16:10:49 2015 +0200
+
+    i965/nir/vec4: Implement load_const intrinsic
+    
+    Similar to fs_nir backend, a nir_local_values map will be filled with
+    newly allocated registers as the load_const instrinsic instructions are
+    processed. Later, get_nir_src() will fetch the registers from this map
+    for sources that are ssa.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit a5a3287f7392356386aa305c791d94b6d5dde6cc
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Tue Jun 16 20:53:28 2015 +0200
+
+    i965/vec4: Add auxiliary func to build a writemask from a component size
+    
+    New method brw_writemask_for_size() will return a writemask with the first
+    'size' components activated.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 6e58fc56a5a396020cd299db11895120ec3da520
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Fri Jul 3 08:23:33 2015 +0200
+
+    i965/nir: Dot not assign direct uniform locations first for vec4-based shaders
+    
+    In the vec4 backend we want uniform locations to be assigned consecutively
+    since that way the offsets produced by nir_lower_io are exactly what we
+    need to implement nir_intrinsic_load_uniform. Otherwise we would need a
+    mapping to match the output of nir_lower_io to the actual uniform registers
+    we need to use.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 01f6235020f9f0c2bc1a6e6ea9bd15c22fb2bcf5
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Thu Jun 18 13:52:21 2015 +0200
+
+    nir/nir_lower_io: Add vec4 support
+    
+    The current implementation operates in scalar mode only, so add a vec4
+    mode where types are padded to vec4 sizes.
+    
+    This will be useful in the i965 driver for its vec4 nir backend
+    (and possbly other drivers that have vec4-based shaders).
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 5e839727ed2378a01d3b657bad83abd4728e8da6
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Wed Jul 22 09:35:28 2015 +0200
+
+    i965/nir: Pass a is_scalar boolean to brw_create_nir()
+    
+    The upcoming introduction of NIR->vec4 pass will require that some NIR
+    lowering passes are enabled/disabled depending on the type of shader
+    (scalar vs. vector).
+    
+    With this patch we pass a 'is_scalar' variable to the process of
+    constructing the NIR, to let an external context decide how the shader
+    should be handled.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 59006d3ad3ed5d29e84afa5931f425344e2ef658
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Wed Jul 22 09:34:35 2015 +0200
+
+    i965/nir/vec4: Add shader function implementation
+    
+    It basically allocates registers local to a function in a nir_locals map,
+    then emits all its control-flow blocks.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 4023b55fdd7005a8a100637c229a1c40648cdd2b
+Author: Alejandro Piñeiro <apinheiro@igalia.com>
+Date:   Tue Jun 16 17:08:04 2015 +0200
+
+    i965/nir/vec4: Add setup for system values
+    
+    Similar to other variable setups, system values will initialize the
+    corresponding register inside a 'nir_system_values' map, which will then
+    be queried later when processing the different system value intrinsics
+    for the appropriate register.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 01c5617c8edc2f392363e9f8861d62a9fc9aa973
+Author: Alejandro Piñeiro <apinheiro@igalia.com>
+Date:   Tue Jun 16 17:01:29 2015 +0200
+
+    i965/vec4: Redefine make_reg_for_system_value() to allow reuse in NIR->vec4 pass
+    
+    The new virtual method is more flexible, it has a signature:
+    
+    dst_reg *make_reg_for_system_value(int location, const glsl_type *type);
+    
+    v2 (Jason Ekstrand):
+       Use the new version in unit tests so make check passes again
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 195156e571e851273c135847f91ed73b3bfc1914
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Tue Jun 16 14:30:31 2015 +0200
+
+    i965/nir/vec4: Add setup of uniform variables
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit b929acb6a8659fdc06623b766bdf59904d8a3558
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Tue Jun 16 13:50:43 2015 +0200
+
+    i965/nir/vec4: Add setup of input variables in NIR->vec4 pass
+    
+    This implementation sets up a map of input variable offsets to source registers
+    that are already initialized with the corresponding register offset.
+    
+    This map will then be queried when processing load_input intrinsic operations,
+    to obtain the correct register source from which the input data will be loaded.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 78e7ce2b7329f8cc3f771afbf39d3fa662e02d9e
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Tue Jun 16 13:39:48 2015 +0200
+
+    i965/vec4: Move type_size() method to brw_vec4_visitor class
+    
+    The type_size() method is currently accessible only in the implementation
+    of vec4_visitor. Since we need to reuse it in the upcoming NIR->vec4 pass,
+    lets make it a method of the class instead.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 47d68908f2c3ad3e9011a2cf910b04cd3300673a
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Tue Jun 16 12:26:39 2015 +0200
+
+    i965/nir/vec4: Select between new nir_vec4 or current vec4_visitor code-paths
+    
+    The NIR->vec4 pass will be activated if both the following conditions are met:
+    
+    * INTEL_USE_NIR environment variable is defined and is positive (1 or true)
+    * The stage is vertex shader (support for geometry shaders and
+      ARB_vertex_program will be added later).
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit abf4fa3c03ebe5716c90c8a310945c3621cf598f
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Tue Jun 16 12:08:09 2015 +0200
+
+    i965/nir/vec4: Add implementation placeholders for a new NIR->vec4 pass
+    
+    This patch will add a brw_vec4_nir.cpp file filled with entry point methods to
+    the main functionality, following a structure similar to brw_fs_nir.cpp.
+    
+    Subsequent patches in this series will be adding the implementations for these
+    methods, incrementally.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 594fc0f85953d11c455e7ab549308a773b312d70
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Jun 25 16:47:52 2015 -0700
+
+    mesa: Replace F_TO_I() with _mesa_lroundevenf().
+    
+    I'm not sure what the true meaning of "The rounding mode may vary." is,
+    but it is the case that the IROUND() path rounds differently than the
+    other paths (and does it wrong, at that).
+    
+    Like _mesa_roundeven{f,}(), just add an use _mesa_lroundeven{f,}() that
+    has known semantics.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit f55c408067a3ea3529fcf7cbbaa1a041a4a8849d
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Jul 31 12:19:46 2015 -0700
+
+    mesa: Add -fno-trapping-math to CFLAGS.
+    
+    Cuts about 1k of .text size.
+    
+       text    data     bss     dec     hex filename
+    4983676  197808   26328 5207812  4f7704 i965_dri.so before
+    4982522  197800   26328 5206650  4f727a i965_dri.so after
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit 875458b778e8d389e00f42269e716a3cb2761fab
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Jul 31 12:18:37 2015 -0700
+
+    mesa: Add -fno-math-errno to CFLAGS.
+    
+    Cuts about 9k of .text size.
+    
+       text    data     bss     dec     hex filename
+    4992804  197808   26328 5216940  4f9aac i965_dri.so before
+    4983676  197808   26328 5207812  4f7704 i965_dri.so after
+    
+    Also, Darwin's libm does not ever set errno, so if we care about those
+    systems we shouldn't rely on errno anyway.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit 44e90f2a556a9b8ede12ae18a7cfa3a71e32d40c
+Author: Zoltan Gilian <zoltan.gilian@gmail.com>
+Date:   Thu Jul 30 20:11:51 2015 +0200
+
+    r600,compute: force tiling on 2D and 3D texture compute resources
+    
+    To circumvent a problem occuring when LINEAR_ALIGNED array mode is
+    selected on a TEXTURE_2D RAT.
+    This configuration causes MEM_RAT STORE_TYPED to write to incorrect
+    locations.
+
+commit be3622dce383cb930a233b88bb056adb026dce1f
+Author: Zoltan Gilian <zoltan.gilian@gmail.com>
+Date:   Thu Jul 30 23:35:09 2015 +0200
+
+    clover: handle setKernelArg errors
+
+commit aa46fba7e61a77bb3b029c7a483b5a2a2a73ff4d
+Author: Zoltan Gilian <zoltan.gilian@gmail.com>
+Date:   Mon Jul 27 11:27:12 2015 +0200
+
+    clover: fix image resource depth and array_size
+
+commit ab5b7a0fe659ff6f9c1885d5cb047b6531959506
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Sun Aug 2 14:57:38 2015 +1000
+
+    nir: Use a single bit for the dual-source blend index
+    
+    The only values allowed are 0 and 1, and the value is checked before
+    assigning.
+    
+    This is a copy of 8eeca7a56c that seems to have been made to the glsl
+    ir type after it was copied for use in nir but before nir landed.
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 9ef5b7a23348291893a6bf61fcce7a306e787add
+Author: Zoltan Gilian <zoltan.gilian@gmail.com>
+Date:   Mon Jul 27 11:34:07 2015 +0200
+
+    clover: pass image attributes to the kernel
+    
+    Read-only and write-only image arguments are recognized and
+    distinguished.
+    Attributes of the image arguments are passed to the kernel as implicit
+    arguments.
+
+commit d2cd2c69b20fcb3f1fc3b7671745c5c84ef200cb
+Author: Zoltan Gilian <zoltan.gilian@gmail.com>
+Date:   Mon Jul 27 11:21:07 2015 +0200
+
+    clover: move find_kernels to functions
+
+commit cf5667108b2cdd6f37e1a561c18fb5c757258f06
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Sun Aug 2 11:40:26 2015 +1000
+
+    mesa: fix type for array indexing validation
+    
+    parse_program_resource_name returns -1 when the index is invalid this needs to
+    be tested before assigning the value to the unsigned array_index.
+    
+    In link_varyings.cpp (the other place parse_program_resource_name is used) after
+    the -1 check is done the value is just assigned to an unsigned variable so it
+    seems long is just used so we can return the -1 rather than actually expecting
+    index values to be ridiculously large.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 704e764f06e8e6ec75484e28271e502bbc4cf06a
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Mon May 11 15:03:56 2015 +0200
+
+    mesa/es3.1: Allow multisampled textures for GLES 3.1
+    
+    GLES 3.1 must be allowed to create multisampled textures.
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 2253a296c9ad7b11f9844640024c5f0784e4e528
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Mon May 11 15:03:55 2015 +0200
+
+    mesa/es3.1: Allow query of GL_TEXTURE_MULTISAMPLE
+    
+    GLES 3.1 must allow a query for GL_TEXTURE_MULTISAMPLE.
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 0fe81a25f7102d78dbe8f7e89d2b024b1741da1c
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Mon May 11 15:03:53 2015 +0200
+
+    mesa/es3.1: Allow enable of GL_SAMPLE_MASK
+    
+    GLES 3.1 must be able to enable GL_SAMPLE_MASK.
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit d74645d3acc815f6129b4cb20e6570c127d5ab2b
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Mon May 11 15:03:52 2015 +0200
+
+    mesa/es3.1: Allow textures with target GL_TEXTURE_2D_MULTISAMPLE
+    
+    GLES 3.1 should be able to bind a texture with the target
+    GL_TEXTURE_2D_MULTISAMPLE.
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit a4bde371c7172fd775dea4377f9bccc3a38992c0
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Mon May 11 15:03:51 2015 +0200
+
+    mesa/es3.1: Allow GL_DEPTH_STENCIL_TEXTURE_MODE
+    
+    GLES 3.1 must support the parameter GL_DEPTH_STENCIL_TEXTURE_MODE.
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 4f8e4a95dbd806bc735bf93dda245be2bb2ea454
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Mon May 11 15:03:50 2015 +0200
+
+    mesa/es3.1: Allow GL_SAMPLE_MASK
+    
+    GLES 3.1 should be allowed to enable GL_SAMPLE_MASK.
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 2e0179e2b3b9ea369816597f789a5bda7e0c46b5
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Mon May 11 15:03:49 2015 +0200
+
+    mesa/es3.1: Allow binding GL_DRAW_INDIRECT_BUFFER with gles 3.1
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit de59a40f6898e20a61ac4ea0e5995334f6ed2932
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 15:19:19 2015 +0200
+
+    r600g: re-enable single-sample fast clear
+    
+    Fixed by the CB_SHADER_MASK fix.
+    
+    Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit d4ad4c20617f45f71152e292ee39f020ef352bfd
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 15:18:36 2015 +0200
+
+    r600g: fix the CB_SHADER_MASK setup
+    
+    This fixes the single-sample fast clear hang.
+    
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+    Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit 828d20bdb79c4b6e6cb761017ad030bd875f9ac2
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Aug 2 15:17:30 2015 +0200
+
+    r600g: fix the single-sample fast clear setup
+    
+    No effect, but this is what we should be doing.
+    
+    Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit 08fd736a45c98bd0acd96dfc1a61e6a695d2703c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jul 16 19:55:42 2015 +0200
+
+    radeonsi: flush if the memory usage for an IB is too high
+    
+    Picked from the amdgpu branch.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit 4d7e0fa8c731776ad5d630f37b36c535f1907371
+Author: Igor Gnatenko <i.gnatenko.brain@gmail.com>
+Date:   Tue Jul 7 13:05:04 2015 +0300
+
+    opencl: use versioned .so in mesa.icd
+    
+    We must have versioned library in mesa.icd, because ICD loader would
+    fail if the mesa-devel package wasn't installed.
+    
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Reported-by: Fabian Deutsch <fabian.deutsch@gmx.de>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73512
+    Signed-off-by: Igor Gnatenko <i.gnatenko.brain@gmail.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Acked-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 2b831334e95e80e1a53dcce2fab21b012d3384c7
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jul 30 15:18:54 2015 +0100
+
+    includes/GL: remove duplicated extension declarations from glx.h
+    
+    All three of GLX_NV_float_buffer, GLX_EXT_texture_from_pixmap and
+    GLX_MESA_query_renderer have been in glxext.h for a while now.
+    
+    As such we can drop this workaround/hack from the header.
+    
+    v2: Remove the comment about GLX_NV_float_buffer.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com> (v1)
+
+commit 6f2d88927a77f902157704d16b70b1265e0ca357
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jul 29 18:13:50 2015 +0100
+
+    docs: rename/bump 10.7.0 release notes to 11.0.0
+    
+    Recently a few drivers have grown OpenGL 4+ support so we might as
+    well go all the way to... 11 ;-)
+    
+    v2: Don't forget to update the version file (Ilia)
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 1307be519b8785249ee863a22115930299ff642a
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jul 29 15:44:32 2015 +0100
+
+    winsys/radeon: don't leak the fd when it is 0
+    
+    Earlier commit added an extra dup(fd) to fix a ZaphodHeads issue.
+    Although it did not consider the (very unlikely) case where we might end
+    up with the valid fd == 0.
+    
+    Fixes: 28dda47ae4d(winsys/radeon: Use dup fd as key in drm-winsys hash
+    table to fix ZaphodHeads.)
+    
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    Reviewed-by: Mario Kleiner <mario.kleiner.de@gmail.com>
+
+commit eb3e2562a4bf728082818b46dcae1ab88340786e
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 10 12:28:23 2015 +0100
+
+    configure.ac: check for mkostemp()
+    
+    We can make use of it over mkstemp + fcntl in the egl/wayland code.
+    
+    Cc: Axel Davy <axel.davy@ens.fr>
+    Suggested-by: Matt Turner <mattst88@gmail.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 175d9752796bbcc52f1df90b1466c879bccfc406
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 10 12:27:06 2015 +0100
+
+    egl/wayland: use drmGetNodeTypeFromFd helper instead of opencoding it
+    
+    Cc: Axel Davy <axel.davy@ens.fr>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Acked-by: Boyan Ding <boyan.j.ding@gmail.com>
+
+commit 5567494403938940f61d44888c436a20a6635ef3
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 10 12:24:11 2015 +0100
+
+    egl/wayland: use designated initializers
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 720125ff99a8563d1f5991bd7428b8d884f1f618
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 10 11:22:13 2015 +0100
+
+    egl: remove ifdef $(egl_extension) compile guards
+    
+    All of these are already defined in the headers provided.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit fa109d02dda118f756903b663879375c06353ae7
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 10 11:01:55 2015 +0100
+
+    egl/wayland: libdrm is a hard requirement, treat it as such
+    
+    Prompt at configure time if it's missing otherwise we'll fail later on
+    in the build. Remove ambiguous HAVE_LIBDRM guard.
+    
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 57c670a823e55f5dd1fb2eb3d15e7db0a4f5c07a
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 10 11:01:35 2015 +0100
+
+    egl: consolidate ifdef HAVE_LIBDRM blocks
+    
+    Move the code around rather than having it scattered. No functional
+    change.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit b0a929960384ffebf3b4f693fa0db4231ed897d4
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 10 00:16:21 2015 +0100
+
+    configure.ac: null,android,gdi are not valid egl-platforms
+    
+    ... and update the documentation to reflect reality.
+    null and gdi are gone, and surfaceless is a recent addition.
+    
+    v2: s/platforms/platform/ (spotted by Thomas)
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 5d29eaef85c15663cde317c2df58ea81637c53f9
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Aug 1 00:51:00 2015 +0200
+
+    Revert "gallium/radeon: re-enable unsafe math for graphics shaders"
+    
+    This reverts commit 8559f6ce62a9d5b52fa8189ba2352cd48bdabccf.
+    
+    It causes hangs in DOTA 2 Reborn.
+
+commit a40179f47ba11e78097ae1a839df6f3911a6749f
+Author: EdB <edb+mesa@sigluy.net>
+Date:   Fri Jul 31 19:14:45 2015 +0200
+
+    clover: make dispatch matches functions def
+    
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 8477dd7c2e4416838c54da75a769109b4c5cc48e
+Author: Vinson Lee <vlee@freedesktop.org>
+Date:   Wed Jul 29 20:17:36 2015 -0700
+
+    gallivm: Fix GCC unused-variable warning.
+    
+    lp_bld_tgsi_soa.c: In function 'lp_emit_immediate_soa':
+    lp_bld_tgsi_soa.c:3065:18: warning: unused variable 'size' [-Wunused-variable]
+           const uint size = imm->Immediate.NrTokens - 1;
+                      ^
+    
+    Signed-off-by: Vinson Lee <vlee@freedesktop.org>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit bafdafa7b2e6649791188b5acf235ba166ceae50
+Author: Adam Jackson <ajax@redhat.com>
+Date:   Fri Jul 31 13:36:21 2015 -0400
+
+    glx: Fix missing bit decl for EXT_texture_integer
+    
+    Missing from:
+    
+        commit b15aba940a3b6fc7c9bebc692968e7e9b72b9f29
+        Author: Adam Jackson <ajax@redhat.com>
+        Date:   Tue Jul 21 11:43:42 2015 -0400
+    
+    	glx: Fix image size computation for EXT_texture_integer (v2)
+    
+    Signed-off-by: Adam Jackson <ajax@redhat.com>
+
+commit 616355160d3ee6edff3429b1abef82f0706dad3d
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Jul 29 10:47:51 2015 -0700
+
+    glsl: Initialize parse-state in constructor of lower_subroutine.
+    
+    Static analysis tools don't like partial object initializations.
+    
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit b15aba940a3b6fc7c9bebc692968e7e9b72b9f29
+Author: Adam Jackson <ajax@redhat.com>
+Date:   Tue Jul 21 11:43:42 2015 -0400
+
+    glx: Fix image size computation for EXT_texture_integer (v2)
+    
+    Without this this extension basically can't work in indirect contexts,
+    TexImage2D will compute the image size as 0 and we'll send no image data
+    to the server.
+    
+    v2: Add EXT_texture_integer to the client extension list too (Ian)
+    
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+    Signed-off-by: Adam Jackson <ajax@redhat.com>
+
+commit 30509788641a413742098f21a4ee0087b1f86e18
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jul 30 17:38:44 2015 +0200
+
+    radeonsi: copy *8_SNORM bits exactly in resource_copy_region
+    
+    Disabling the FP16 mode didn't help.
+    
+    If needed, we can use this trick for blits too, but not for scaled blits.
+    
+    + 4 piglits
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 64d3130994bde98b0be44a5c54511e376b6d994e
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Jul 28 11:39:35 2015 +0200
+
+    r600g: early exit in r600_clear if there's nothing to do
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit f9c4953f99e75e45bc4f0f07315ee643b62b0c23
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Jul 28 11:39:35 2015 +0200
+
+    radeonsi: early exit in si_clear if there's nothing to do
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 190a40580fdfccf00db93f5c8f15bbf16914be2c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Jul 27 19:01:21 2015 +0200
+
+    radeonsi: fix a regression since the resource_copy_region cleanup
+    
+    Broken since:
+        46b2b3b - radeonsi: don't change pipe_resource in resource_copy_region
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91444
+    
+    Reviewed-and-Tested-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 3ca21320583a4c0ba9bee755935df5e1f1637fdf
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jul 26 21:08:18 2015 +0200
+
+    radeonsi: fix broken st/nine from merging tessellation
+    
+    st/nine uses GENERIC slots greater than 60.
+
+commit 2d3ae154ba36546485468b9552e6da905b42aaa4
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 01:25:07 2015 +0200
+
+    radeonsi: move CP DMA functions to their own file
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 3063c5e3d3fefdc5eed7600882bd08f56bf86db8
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 16:15:48 2015 +0200
+
+    radeonsi: add a debug flag that disables printing ISA in shader dumps
+
+commit 2dcbd427da74c8f2b6f46e789924a7ced67be260
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 16:15:48 2015 +0200
+
+    radeonsi: add a debug flag that disables printing TGSI in shader dumps
+    
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit ac19a896d3de13b7d064d01c575f46f4191ef37c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 16:15:48 2015 +0200
+
+    radeonsi: add a debug flag that disables printing the LLVM IR in shader dumps
+    
+    This is for shader-db and should reduce size of shader dumps.
+
+commit 7dd1f45bc41c4a936b0ff84400840524bb9f8871
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 11 00:17:48 2015 +0200
+
+    radeonsi: store shader disassemblies in memory for future users
+    
+    This will be used by the new ddebug pipe. I'm including it now to avoid
+    conflicts with other patches.
+
+commit 1bbe40836306549414408bb7f30b9288c020db75
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 12:17:19 2015 +0200
+
+    radeonsi: don't use llvm.AMDIL.fraction for FRC and DFRAC
+    
+    There are 2 reasons for this:
+    - LLVM optimization passes can work with floor
+    - there are patterns to select v_fract from floor anyway
+    
+    There is no change in the generated code.
+
+commit 8559f6ce62a9d5b52fa8189ba2352cd48bdabccf
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 12:01:07 2015 +0200
+
+    gallium/radeon: re-enable unsafe math for graphics shaders
+    
+    This reverts commit 4db985a5fa9ea985616a726b1770727309502d81.
+    
+    The grass no longer disappears, which was the reason the commit was reverted.
+    This might affect tessellation. We'll see.
+    
+    Totals from affected shaders:
+    SGPRS: 151672 -> 150232 (-0.95 %)
+    VGPRS: 90620 -> 89776 (-0.93 %)
+    Code Size: 3980472 -> 3920836 (-1.50 %) bytes
+    LDS: 67 -> 67 (0.00 %) blocks
+    Scratch: 1357824 -> 1202176 (-11.46 %) bytes per wave
+    
+    Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
+
+commit 12a197b2d58125e4dbe2942204df1bbe3258e54b
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 17:24:08 2015 +0200
+
+    gallium/radeon: don't use rsq_action
+    
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit 681dbcf69040883e91423df56fcb34f4fee57110
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 17:12:39 2015 +0200
+
+    gallium/radeon: move r600-specific code to r600g
+    
+    Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
+
+commit 9a4c57afe48c391bb335f74c88b447f83704b413
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 16:53:29 2015 +0200
+
+    gallium/radeon: remove unused variables and old comments
+    
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit b9dad585e66b1031bdcbb148a19524ee2705baf7
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 11:26:18 2015 +0200
+
+    gallium/radeon: remove build_intrinsic and build_tgsi_intrinsic
+    
+    duplicated now
+    
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit 0c805b6240769891d55db601f91b8dd84d69d43d
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 11:17:48 2015 +0200
+
+    gallivm: add LLVMAttribute parameter to lp_build_intrinsic
+    
+    This will help remove some duplicated code from radeon.
+    
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit 488a83637fe726d445775ee301e42003f749cb9f
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 15:55:45 2015 +0200
+
+    gallium/util: clear up that debug_get_flags_option returns a 64-bit mask
+    
+    Reviewed-by: Kai Wasserbäch <kai@dev.carbon-project.org>
+
+commit b0528118dfb1af00e7d08cdb637191b80c14c2ba
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 00:53:16 2015 +0200
+
+    radeonsi: completely rework updating descriptors without CP DMA
+    
+    The patch has a better explanation. Just a summary here:
+    - The CPU always uploads a whole descriptor array to previously-unused memory.
+    - CP DMA isn't used.
+    - No caches need to be flushed.
+    - All descriptors are always up-to-date in memory even after a hang, because
+      CP DMA doesn't serve as a middle man to update them.
+    
+    This should bring:
+    - better hang recovery (descriptors are always up-to-date)
+    - better GPU performance (no KCACHE and TC flushes)
+    - worse CPU performance for partial updates (only whole arrays are uploaded)
+    - less used IB space (no CP_DMA and WRITE_DATA packets)
+    - simpler code
+    - hopefully, some of the corruption issues with SI cards will go away.
+      If not, we'll know the issue is not here.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 781dc7c0e1f41502f18e07c0940af949a78d2792
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Jul 30 14:45:57 2015 +0300
+
+    i965/fs: Fix regression with SIMD8 VS since b5f1a48e234d47b24df38cb562cffb8941d43795.
+    
+    With num_direct_uniforms == 0 there's no space allocated in the
+    param_size array for the one block of direct uniforms -- On the FS
+    stage this would be a harmless no-op because it would simply re-set
+    one of the param_size entries allocated for the sampler units to zero,
+    but on the VS stage it has been reported to cause memory corruption
+    followed by a crash -- Surprising how a full piglit run on Gen8 didn't
+    catch it.
+    
+    Reported-and-reviewed-by: "Lofstedt, Marta" <marta.lofstedt@intel.com>
+
+commit 383558c56427b0e8b4e56cce8737771ad053f753
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Thu Jul 30 19:16:32 2015 -0700
+
+    i965/gen9: Add hs, ds, and cs thread + urb info
+    
+    For SKL: These are the production values.
+    
+    For BXT: These are low estimates to enable platforms.
+    
+    This patch was originally part of
+    i965/skl: Add production thread counts and URB size
+    but was split out at Jordan's request (which I found to be reasonable).
+    
+    Note on stable inclusion: 10.6 does not care about hs, and ds. It does care
+    about cs, but since Jordan was the one that asked me to extract it, I'll leave
+    it up to him to deal with a backport to stable is required.
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 3cb58010037bad24890785007fd8f47d67249f2f
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Thu Jul 30 19:12:15 2015 -0700
+
+    i965/bxt: Use more conservative thread counts
+    
+    Since we really do not know what may occur in the future, pick a more
+    conservative value for thread counts until we know better what values are
+    correct. As far as I can tell, the old values will work fine, but some of the
+    registers seem to indicate that going even lower is possible and the purpose of
+    having early support is to enable as many configurations that can possibly
+    exist (we can trim things down after platforms begin shipping later).
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 7eaacc1678195738fab3bb98870828611cae066d
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Wed Jul 29 12:35:24 2015 -0700
+
+    i965/skl: Add production thread counts and URB size
+    
+    This patch adjusts the SKL values to the best known values we have.
+    
+    v2: Remove HS/DS/CS fields. Adding this makes most sense to add to the
+    GEN9_FEATURES macro, however, doing that would require updating BXT values, and
+    Jordan requested I not do that. Conveniently, this request makes a lot of sense
+    wrt to stable backport as HS, and DS do not even exist there.
+    
+    Cc: mesa-stable@lists.freedesktop.org
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 7830e465a5f446616ce49a7f8219256a5503a68b
+Author: Eric Anholt <eric@anholt.net>
+Date:   Thu Jul 30 11:16:13 2015 -0700
+
+    vc4: Lower uniform loads to scalar in NIR.
+    
+    This also moves the vec4-to-byte-addressing math into NIR, so that
+    algebraic has a chance at it.
+
+commit 5a8c57b52287ba2bb8faa4447e7d1cc46ef1a3d4
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jul 29 17:27:54 2015 -0700
+
+    vc4: Move some FS input lowering into NIR.
+
+commit 13ddd48b97474c261ef2d7412629748d6d91f2ad
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jul 29 17:29:39 2015 -0700
+
+    vc4: Move program keys to the header file.
+    
+    I want to be able to inspect them from other files for lowering passes in
+    NIR.
+
+commit 27f728cdc5d90f63839fbeb1942e6f27339b102a
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jul 29 17:16:26 2015 -0700
+
+    vc4: Lower NIR inputs to scalar as well.
+    
+    For now this is just scalarizing, but it also means we'll get to dump a
+    bunch of QIR-based lowering in a moment.
+
+commit b85f6ae4b24ee50948f14a9effa982eb0b9b3681
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jul 29 15:52:18 2015 -0700
+
+    vc4: Start adding a NIR-based output lowering pass.
+    
+    For now, this just splits up store_output intrinsics to be scalars, and
+    drops unused outputs in the coordinate shader.  My goal is to be able to
+    drop a bunch of my VC4-specific optimization by letting NIR handle it.
+
+commit c93ffd661a46f0f6d20c9ec2e97d4d9393e28111
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jul 29 12:16:50 2015 -0700
+
+    vc4: Mark our shaders as single-threaded.
+    
+    I had my understanding of this bit flipped.  We're using the full register
+    space, so we need to say so.
+
+commit df3005de189f5120bc06f6cba35ecaf5c4503229
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jul 29 14:41:22 2015 -0700
+
+    vc4: Avoid leaking indirect array access UBOs.
+
+commit 86541cf8cea77f4b887dd061e7d6e3e4767f86fd
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jul 29 14:40:10 2015 -0700
+
+    vc4: Avoid overflowing various static tables.
+
+commit d0173bce371e3aafa732600c1456a9282ff5d900
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jul 29 12:20:33 2015 -0700
+
+    vc4: Fix return values from recent validation changes.
+
+commit a5b3b24958b5e4344e7d8d1e029dbf7e5afb183c
+Author: Kai Wasserbäch <kai@dev.carbon-project.org>
+Date:   Thu Jul 30 20:32:36 2015 +0200
+
+    docs: trivial cleanup of GL3.txt, remove redundant radeonsi entries.
+    
+    Follow-up to 1b2b0e42ce47bfd1fcb5513ed2c23b9bb7a5a5b8
+    
+    Signed-off-by: Kai Wasserbäch <kai@dev.carbon-project.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 518abd0bbe1886550b43c62679a2ebd41e8199e9
+Author: Dave Airlie <airlied@gmail.com>
+Date:   Thu Jul 30 20:44:50 2015 +1000
+
+    st/mesa: don't draw instead of asserting in transform feedback
+    
+    if we get a request to take the count from feedback, but there
+    is no buffer to take it from, just draw as if we got 0 vertices
+    so nothing.
+    
+    This fixes this assert killing the ogl conform, and a piglit
+    test I've sent.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit fdb84876134ed074563b842eae20fd10dbe9e8d6
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Sat Jul 4 08:35:35 2015 +1000
+
+    mesa: remove now unused _mesa_get_uniform_location
+    
+    Cc: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 7f5f7d15fbbd3d306e43e1e9ff215750b8aaa7bf
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Sat Jul 25 12:39:43 2015 +1000
+
+    mesa: remove now unused subscript validations
+    
+    Cc: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 8cd2f88845acd45ebcbaae2e68a8a47b3c17e6d5
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Sat Jul 25 12:33:53 2015 +1000
+
+    mesa: fix and simplify resource query for arrays
+    
+    This removes the need for multiple functions designed to validate an array
+    subscript and replaces them with a call to a single function.
+    
+    The change also means that validation is now only done once and the index
+    is retrived at the same time, as a result the getUniformLocation code can
+    be simplified saving an extra hash table lookup (and yet another
+    validation call).
+    
+    This chage also fixes some tests in:
+    ES31-CTS.program_interface_query.uniform
+    
+    V3: rebase on subroutines, and move the resource index array == 0
+    check into _mesa_GetProgramResourceIndex() to simplify things further
+    
+    V2: Fix bounds checks for program input/output, split unrelated comment fix
+    and _mesa_get_uniform_location() removal into their own patch.
+    
+    Cc: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 3a21e4bd263002dd600e7a693536c93f68b285a5
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Wed Jul 29 17:40:37 2015 +0100
+
+    i965/bxt: Don't use brw_device_info_skl_early on BXT
+    
+    Previously it could end up using the “SKL early” device on BXT
+    depending on the revision number. This would probably break things
+    because for example has_llc would be wrong.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 75a96cedf7b0e5613560be0962dec973a4d2f2fe
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Sat Jul 4 15:43:15 2015 +1000
+
+    glsl: set stage flag for structs and arrays in resource list
+    
+    This fixes the remaining failing tests in:
+    ES31-CTS.program_interface_query.uniform-types
+    
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 1b2b0e42ce47bfd1fcb5513ed2c23b9bb7a5a5b8
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Wed Jul 29 10:51:46 2015 +0100
+
+    docs: consolidate radeonsi in GL3.txt
+    
+    move into DONE for GL4.0 and GL4.1
+    
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit af1e6aa75b7f518cc6b08717fa8844370be3f05c
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Wed Jul 22 01:24:39 2015 +0100
+
+    radeonsi: enable GL4.1 and update documentation (v2)
+    
+    This enables GL4.1 for radeonsi, and updates the
+    docs in the correct places.
+    
+    v2: enable only for llvm 3.7 which has fixes in place.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 3c73c418713adec52389e2723e38bf47df13a24b
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Jul 20 02:37:14 2015 +0100
+
+    radeonsi: add GS multiple streams support (v2)
+    
+    This is the final piece for ARB_gpu_shader5,
+    
+    The code is based on the r600 code from Glenn Kennard,
+    and myself.
+    
+    While developing this, I'm not 100% sure of all the calculations
+    made in the GS registers, this is why the max_stream is worked
+    out there and used to limit the changes in registers. Otherwise
+    my initial attempts either regressed GS texelFetch tests
+    or primitive-id-restart. The current code has no regressions
+    in piglit.
+    
+    This commit doesn't enable ARB_gpu_shader5, since that just
+    bumps the glsl level to 4.00, so I'll just do a separate patch
+    for 4.10.
+    
+    v1.1: fix bug introduced in rebase.
+    v2: Address Marek's review comments,
+    remove my llvm stream code for simpler C,
+    move gsvs_ring and gs_next_vertex to arrays.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit c73a13e9538cab1772b71fb5599e4944c540412e
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Wed Jul 29 10:15:03 2015 -0700
+
+    Delete unused functions in format parser
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Neil Roberts <neil@linux.intel.com>
+
+commit 92994742d079bffdc4c25fdc5a22c7438b7da9c7
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Wed Jul 29 09:57:26 2015 -0700
+
+    i965: Change the type of max_{vs, hs, ...}_threads variables to unsigned
+    
+    Fixes following compiler warning:
+    brw_cs.cpp:386:27: warning: comparison between signed and unsigned
+    integer expressions [-Wsign-compare]
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 2484263fe97cebc9fa7a5c9de04c757dc6cc7713
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Wed Jul 29 09:41:18 2015 -0700
+
+    Delete duplicate function is_power_of_two() and use _mesa_is_pow_two()
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 8413822c8cfaf9110625c1a4a66ee916c2a916e3
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Wed Jul 29 20:45:09 2015 +0100
+
+    gallium/auxiliary: Ensure c99_math.h is included.
+    
+    As it is needed for exp2.
+    
+    Trivial.
+
+commit 2b916c6e47862d82b5545e962ebb83b811904c3b
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Wed Jul 29 22:20:04 2015 +0200
+
+    c99_math: (trivial) implement exp2 for MSVC too
+    
+    Unsurprisingly doesn't build otherwise with old msvc.
+
+commit e933d545997de9e50a8ed5247722c1c786bf4858
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Tue Jul 28 19:52:49 2015 -0700
+
+    i965/bxt: Support 3src simd16 instructions
+    
+    This is easily accomplished by moving simd16 3src to GEN9_FEATURES.
+    
+    v2: small cleanup to make it more similar to GEN8_FEATURES
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit c0731a1b14dc7385f4238b4508b88bfca2ef43cf
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jul 22 16:04:28 2015 +0100
+
+    targets/dri: scons: add missing link against libdrm
+    
+    Otherwise the final dri module will have (additional) unresolved
+    symbols.
+    
+    Cc: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviwed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 4fc86f183eee43117925499d8d1315be481ba636
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 17 18:18:20 2015 +0100
+
+    svga: scons: remove unused HAVE_SYS_TYPES_H define
+    
+    There isn't a single instance in mesa that
+    mentions HAVE_SYS_TYPES_H, other than this file.
+    
+    Cc: Jose Fonseca <jfonseca@vmware.com>
+    Acked-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 23bba717e1178d54927c4968a0466d706a630432
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Jul 15 21:29:21 2015 -0700
+
+    glsl: Avoid double promotion.
+
+commit a562313f378a056c8d886e418b518063ab077c39
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 12 23:15:42 2015 -0700
+
+    mesa: Avoid double promotion.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 7adc9fa1f1d12683c5855bf5854dec814629093d
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 12 23:15:32 2015 -0700
+
+    mesa/math: Avoid double promotion.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 076f73edb34f2a83092c6c8ad04b53def2792bb8
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 12 23:15:19 2015 -0700
+
+    program: Avoid double promotion.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 04aa8b58a09e3b415916fa569111c1f76d07a8d5
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 12 23:15:10 2015 -0700
+
+    swrast: Avoid double promotion.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit c92b2a1d7b286de8641512970a87c94809fbbc3f
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 12 23:15:01 2015 -0700
+
+    tnl: Avoid double promotion.
+    
+    There are a couple of unrelated changes in t_vb_lighttmp.h that I hope
+    you'll excuse -- there's a block of code that's duplicated modulo a few
+    trivial differences that I took the liberty of fixing.
+
+commit 2b47ef715ad33f6c4a4881b10240d792ba9e60b2
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 12 23:14:54 2015 -0700
+
+    vbo: Avoid double promotion.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit b568a5f6a8c6bb07b27e9badce01a8a73ba56c03
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 12 18:01:54 2015 -0700
+
+    util: Avoid double promotion.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 29ef7a9f19265308e7852c0f8920e0f520f08df3
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 12 18:01:42 2015 -0700
+
+    gallium/auxiliary: Avoid double promotion.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 4251ccb47b79c719918e7c372aebb6b2d9719922
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 12 12:37:00 2015 -0700
+
+    nir: Avoid double promotion.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit c1da15709a0c0c2775bd9e534f67c60f7dc95ce8
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sun Jul 12 00:13:45 2015 -0700
+
+    i965: Use float calculations when double is unnecessary.
+    
+    Literals without an f/F suffix are of type double, and implicit
+    conversion rules specify that the float in (float op double) be
+    converted to a double before the operation is performed. I believe float
+    execution was intended (in nearly all cases) or is sufficient (in the
+    case of gen7_urb.c).
+    
+    Removes a lot of float <-> double conversion instructions and replaces
+    many double instructions with float instructions which are cheaper.
+    
+       text     data      bss      dec      hex  filename
+    4928659   195160    26192  5150011   4e953b  i965_dri.so before
+    4928315   195152    26192  5149659   4e93db  i965_dri.so after
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit c67ce2bd3b27a26d7f5665f296d307c0de39b720
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Jul 13 15:19:54 2015 -0700
+
+    gallium/auxiliary: Use exp2(x) instead of pow(2.0, x).
+
+commit b73782bf184b7053026e8dda54800d48e64e20da
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Jul 13 15:19:33 2015 -0700
+
+    program: Use exp2(x) instead of pow(2.0, x).
+
+commit f8a647883a14694f1b758c12187b3f35b9d039a7
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sat Jul 11 22:46:19 2015 -0700
+
+    mesa: Use floats for viewport bounds.
+    
+    ARB_viewport_array specifies that DEPTH_RANGE consists of double-
+    precision parameters (corresponding commit d4dc35987), and a preparatory
+    commit (6340e609a) added _mesa_get_viewport_xform() which returned
+    double-precision scale[3] and translate[3] vectors, even though X, Y,
+    Width, and Height were still floats.
+    
+    All users of _mesa_get_viewport_xform() immediately convert the double
+    scale and translation vectors into floats (which were floats originally,
+    but were converted to doubles in _mesa_get_viewport_xform(), sigh).
+    
+    i965 at least cannot consume doubles (see SF_CLIP_VIEWPORT). If we want
+    to pass doubles to hardware, we should have a different function that
+    does that.
+    
+    Acked-by: Mathias Froehlich <Mathias.Froehlich@web.de>
+
+commit ecc559218d0a544f8a5f878c500f125c2d588d82
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Jul 15 21:28:56 2015 -0700
+
+    c99_math: Implement exp2f for MSVC.
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 5c7fd670459ebff452adeec335c77854af903842
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Jul 15 20:54:46 2015 -0700
+
+    glsl: Remove MSVC implementations of copysign and isnormal.
+    
+    Non-Gallium parts of Mesa require MSVC 2013 which provides these.
+
+commit 02425d3ec2af6945a03583cadcaa5f3f330bbc0e
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 18:51:01 2015 +0300
+
+    i965/fs: Make the default builder 64-wide before entering the optimization loop.
+    
+    Not a typo.  Replace the default builder with one of bogus width to
+    catch cases in which optimization passes assume that the default
+    dispatch width is good enough.  The execution controls of instructions
+    emitted during optimization should in general match the original code
+    that is being manipulated.  Many of the problems fixed in this series
+    were caught by the assertions introduced in this patch.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 4529916dfd227af6c4e151f45261db22157fe45f
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 18:42:31 2015 +0300
+
+    i965/fs: Don't set exec_all on instructions wider than the original in lower_simd_width.
+    
+    This could have led to somewhat increased bandwidth usage for lowered
+    texturing instructions on Gen4 (which is the only case in which
+    lower_width may be greater than inst->exec_size).  After the previous
+    patches the invariant mentioned in the comment should no longer be
+    assumed by any of the other optimization and lowering passes, so the
+    exec_all() call shouldn't be necessary anymore.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit eaba922582cfdccc7b198f9b23d8bd3c26197d03
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 18:28:39 2015 +0300
+
+    i965/fs: Initialize a builder explicitly in the gen4 send dependency work-arounds.
+    
+    Instead of relying on the default one.  This shouldn't lead to any
+    functional changes because DEP_RESOLVE_MOV overrides the execution
+    size of the instruction anyway and other execution controls are
+    irrelevant.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 8b838fa9f01f7ee13fd9fc3e6545a677397f8023
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Jul 10 19:49:49 2015 -0700
+
+    i965/cfg: Assert that cur_do/while/if pointers are non-NULL.
+    
+    More.. like in commit 4d93a07c.
+
+commit 9da9adcfd7df45a0a337e0fbf482f60ff5566499
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Jul 29 11:01:08 2015 -0400
+
+    nvc0/ir: cache vertex out base so that we don't recompute again
+    
+    The global CSE pass stinks and is unable to pull this out. Easy enough
+    to handle it here and avoid generating unnecessary special register
+    loads (which can allegedly be quite slow).
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit ad75620863392b2164a415186087beb831ccfa4c
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Jul 29 09:37:14 2015 -0400
+
+    nvc0/ir: output base for reading is based on laneid
+    
+    PFETCH retrieves the address for incoming vertices, not output vertices
+    in TCS. For output vertices, we must use the laneid as a base.
+    
+    Fixes barrier piglit test, which was failing for entirely non-barrier
+    reasons, but rather that it was (a) trying to draw multiple patches and
+    (b) the incoming patch size was not the same as the outgoing patch size.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit e42d2948d3c58b86d3770d296b96fafcd1218858
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jul 29 15:37:52 2015 +0300
+
+    Revert "pipe-loader: simplify pipe_loader_drm_probe"
+    
+    This reverts commit a27ec5dc460b91dc44675f48cddbbb2631ee824f.  It
+    breaks the intended behaviour of pipe_loader_probe() with ndev==0 as
+    relied upon by clover to query the number of devices available to the
+    pipe loader in the system.
+    
+    Acked-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 5e645e68d6672cac2872fa509fb22bc2581f4b67
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 19:18:51 2015 +0300
+
+    i965/fs: Switch opt_cse() to the fs_builder constructor from instruction.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 992cda2c8a452ec86386a0f98eaf522afe206695
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 18:41:18 2015 +0300
+
+    i965/fs: Switch lower_logical_sends() to the fs_builder constructor from instruction.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 930ebb258524762c765fa864ef7063bd8bb754a1
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 18:34:43 2015 +0300
+
+    i965/fs: Switch lower_load_payload() to the fs_builder constructor from instruction.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit a0b192d3d9fa64f6f8bff5f1e456e40e72f4875e
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 20:14:41 2015 +0300
+
+    i965/fs: Don't rely on the default builder to create a null register in emit_spill.
+    
+    It's not guaranteed to have the same width as the instruction
+    generating the spilled variable.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit bfad71606a987f14f20d2c3607846648f8537f2b
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 18:15:44 2015 +0300
+
+    i965/fs: Set up the builder execution size explicitly in opt_sampler_eot().
+    
+    opt_sampler_eot() was relying on the default builder to have the same
+    width as the sampler and FB write opcodes it was eliminating, the
+    channel selects didn't matter because the builder was only being used
+    to allocate registers, no new instructions were being emitted with it.
+    A future commit will change the width of the default builder what will
+    break this assumption, so initialize it explicitly here.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 09039f4bc120481219d01ed17e1552ca8ad66455
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 19:20:50 2015 +0300
+
+    i965/fs: Initialize a builder explicitly in opt_peephole_predicated_break().
+    
+    This wasn't taking into account the execution controls of the original
+    instruction, but it was most likely not a bug because control flow
+    instructions are typically full width.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit e1f4724097d1074ec9afdc9ce9ad024add125923
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 19:27:30 2015 +0300
+
+    i965/fs: Set execution controls explicitly in opt_peephole_sel().
+    
+    Emit the SELs and MOVs with the same execution controls as the
+    original MOVs, and the CMP with the same execution controls as the IF.
+    Also explicitly check that the execution controls of any pair of MOVs
+    being folded into a SEL are compatible (which is almost always going
+    to be the case), since otherwise it would seem wrong to initialize the
+    builder object below from the then_mov instruction only.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit ff463af436bcf07430807512c9f0bf0f627288ce
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 18:38:59 2015 +0300
+
+    i965/fs: Set execution controls correctly in lower_integer_multiplication().
+    
+    lower_integer_multiplication() was ignoring the execution controls of
+    the original MUL instruction.  Fix it by using the new fs_builder
+    constructor.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit ce90227c71c8cbe6ca4317f1873ff12c70081c4c
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 17:55:49 2015 +0300
+
+    i965/fs: Set execution controls correctly for lowered pull constant loads.
+    
+    demote_pull_constants() was ignoring the execution size and channel
+    selects of the instruction that wanted the constant, which doesn't
+    matter for uniform pull constant loads because all channels get the
+    same scalar value, but it might for varying pull constant loads.  Fix
+    it by using the new fs_builder() constructor that takes care of
+    setting execution controls compatible with the instruction passed as
+    argument.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 53077aee6670022e634a4775d8abbb59c458b7d7
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 19:09:45 2015 +0300
+
+    i965/fs: Set the execution size of the MOVs correctly in opt_combine_constants().
+    
+    The execution size was being left equal to the default of 8/16, which
+    AFAICT would have overwritten components other than the one we wanted
+    to initialize and could potentially have corrupted other registers.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 6f7dea0b3212aa4ce49fcf9e94bf7aab130eeab2
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 17:54:46 2015 +0300
+
+    i965/fs: Define a new fs_builder constructor taking an instruction as argument.
+    
+    We have a number of optimization passes that repeat the same pattern
+    before inserting new instructions into the program based on some
+    previous instruction: They point the default builder at the original
+    instruction, then call exec_all() and group() to select the same
+    execution controls the original instruction had, and then maybe call
+    annotate() to clone the debug annotation from the original
+    instruction.
+    
+    In fact an optimization pass missing any of these steps is likely to
+    be broken if the intention was to emit new code based on a preexisting
+    instruction, so let's make it easy for passes to do the right thing by
+    having an fs_builder constructor that automates the task of setting up
+    a builder to emit a given instruction provided as argument.
+    
+    The following patches fix all cases I've found in which we weren't
+    explicitly initializing the execution controls of the emitted
+    instructions, and clean-up optimization passes which were already
+    doing the right thing to use the new constructor.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 7cb60d770fc24bf00b6f7e5898cca1426e55c026
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 16:25:55 2015 +0300
+
+    i965/fs: Translate memory barrier NIR intrinsics.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit b5f1a48e234d47b24df38cb562cffb8941d43795
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Sun Jun 28 21:15:28 2015 +0300
+
+    i965/fs: Execute nir_setup_uniforms, _inputs and _outputs unconditionally.
+    
+    Images take up zero uniform slots in the nir_shader::num_uniforms
+    calculation, but nir_setup_uniforms needs to be executed even if the
+    program has no non-image uniforms so the driver-specific image
+    parameters are uploaded.  nir_setup_uniforms is a no-op if there are
+    really no uniforms, so checking the num_uniform count is useless in
+    any case.
+    
+    The nir_setup_inputs and _outputs changes shouldn't lead to any
+    functional change, they are just meant to preserve the symmetry
+    between them and nir_setup_uniforms.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 3e5a90792d14aeb599dd236f830e6e344b35c905
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue May 5 22:12:03 2015 +0300
+
+    i965/fs: Don't overwrite fs_visitor::uniforms and ::param_size during the SIMD16 run.
+    
+    Image variables need to allocate additional uniform slots over
+    nir_shader::num_uniforms.  nir_setup_uniforms() overwrites the values
+    imported from the SIMD8 visitor and then exits early before entering
+    the nir_shader::uniforms loop, so image uniforms are never re-created.
+    Instead leave the imported values alone, they *must* be the same for
+    the uniform layout of both runs to be compatible.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit ea0ac53f059c418d5797c495b87020f2ca2ec842
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jun 29 16:50:49 2015 +0300
+
+    i965/fs: Drop unused untyped surface read and atomic emit methods.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 854c4d8b37416d3e5593099a8e5441f3cf861173
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue May 5 20:52:58 2015 +0300
+
+    i965/fs: Revisit NIR atomic counter intrinsic translation.
+    
+    Rewrite the NIR atomic counter intrinsics translation code making use
+    of the recently introduced surface builder.  This will allow the
+    removal of some of the functionality duplicated between the visitor
+    and surface builder.
+    
+    v2: Drop VEC4 suport.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 1aab58f39450213ea2ac43549eefb8acd1e6584a
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Apr 30 19:31:44 2015 +0300
+
+    i965/fs: Import surface message builder helper functions.
+    
+    Implement helper functions that can be used to construct and send
+    untyped and typed surface read, write and atomic messages to the
+    shared dataport unit easily.
+    
+    v2: Drop VEC4 suport.
+    v3: Reimplement in terms of logical send opcodes.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 03846696ce2deaaaff42b2acd7745b51a7f115f2
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 15:39:03 2015 +0300
+
+    i965/fs: Handle zero-size allocations in fs_builder::vgrf().
+    
+    This will be handy to avoid some ugly ternary operators in the next
+    patch, like:
+     fs_reg reg = (size == 0 ? null_reg_ud() : vgrf(..., size));
+    
+    Because a zero-size register allocation is guaranteed not to ever be
+    read or written we can just return the null register.  Another
+    possibility would be to actually allocate a zero-size VGRF what would
+    involve defining a zero-size register class in the register allocator
+    and a considerable amount of churn.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 3352724dfa4eb5c93290db92ae99d26d9b89e630
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue Jul 14 18:42:57 2015 +0300
+
+    i965/fs: Implement lowering of logical surface instructions.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 086d29f4d747bbcfe37beeb18ba77fb2cb84dbdc
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Sat Jul 18 16:16:19 2015 +0300
+
+    i965/fs: Hook up SIMD lowering to unroll surface instructions of unsupported width.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 7a594a95a930f1658062e4d86d0f37d491b372b3
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue Jul 21 18:45:32 2015 +0300
+
+    i965/fs: Define logical typed and untyped surface opcodes.
+    
+    Each logical variant is largely equivalent to the original opcode but
+    instead of taking a single payload source it expects its arguments
+    separately as individual sources, like:
+    
+     typed_surface_write_logical null, coordinates, source, surface,
+                                        num_coordinates, num_components
+    
+    This patch defines the opcodes and usual instruction boilerplate,
+    including a placeholder lowering function provided mainly as
+    documentation for their source registers.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 3af2623da5167aa686bcb2cff01d27058a507026
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 20 17:38:15 2015 +0300
+
+    i965: Lift the constness restriction on surface indices passed to untyped ops.
+    
+    v2: Update NIR atomic intrinsic handling too (Ken).
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit a0c02d2bbb765b0e997ad524d8e51838e529d9c0
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Sun Jun 28 21:04:17 2015 +0300
+
+    i965: Define the setup_vector_uniform_values() backend_visitor interface.
+    
+    This cleans up the VEC4 implementation of setup_uniform_values()
+    somewhat and will avoid duplication of the image uniform upload code
+    by having a common interface to upload a vector of uniforms on either
+    back-end.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit bd0d6a9cce8b28357888bb261fac639e2833c51f
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Fri Jul 24 16:51:14 2015 +0300
+
+    i965/fs: Remove the emit_texture_gen*() fs_visitor methods.
+    
+    This is now dead code.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 59979b133dd16bf46803f87e78677eba944cc757
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Fri Jul 17 18:23:31 2015 +0300
+
+    i965/fs: Reimplement emit_mcs_fetch() in terms of logical sends.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit ba78a5007171afaa5f2d76d71be131f01a5b5023
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 16:07:45 2015 +0300
+
+    i965/fs: Reimplement emit_texture() in terms of logical send messages.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 4be99438e6e40280f9dc071882ce3bfbfabadb4a
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 21:19:52 2015 +0300
+
+    i965/fs: Hook up SIMD lowering to handle texturing opcodes of unsupported width.
+    
+    This should match the set of cases in which we currently call fail()
+    or no16() from the emit_texture_*() methods and the ones in which
+    emit_texture_gen4() enables the SIMD16 workaround.
+    
+    Hint for reviewers: It's not a big deal if I happen to have missed
+    some case here, it will just lead to an assertion failure down the
+    road which is easily fixable, however being stricter than necessary
+    won't cause any visible breakage, it would just decrease performance
+    silently due to the unnecessary message splitting, so feel free to
+    double-check that all cases listed here already cause a SIMD8/16
+    fall-back with the current texturing code -- You may want to skip over
+    the Gen5-6 cases though if you don't have pencil and paper at hand.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 2cd466f6c3192015ea1794afc57eb453d7f13818
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Sat Jul 18 17:09:37 2015 +0300
+
+    i965/fs: Implement lowering of logical texturing opcodes on Gen4.
+    
+    Unlike its Gen5 and Gen7 counterparts this patch isn't a plain
+    refactor of the previous Gen4 texturing code, it's more of a rewrite
+    largely based on emit_texture_gen4_simd16().  The reason is that on
+    the one hand the original emit_texture_gen4() code didn't seem easily
+    fixable to be SIMD width-invariant and had plenty of clutter to
+    support SIMD-width workarounds which are no longer required.  On the
+    other hand emit_texture_gen4_simd16() was missing a number of
+    SIMD8-only opcodes.  This should generalize both and roughly match
+    their current behaviour where there is overlap.
+    
+    Incidentally this will fix the following piglits on Gen4:
+    
+        arb_shader_texture_lod.execution.arb_shader_texture_lod-texgrad
+        arb_shader_texture_lod.execution.tex-miplevel-selection *gradarb 2d
+        arb_shader_texture_lod.execution.tex-miplevel-selection *gradarb 3d
+        arb_shader_texture_lod.execution.tex-miplevel-selection *projgradarb 2d
+        arb_shader_texture_lod.execution.tex-miplevel-selection *projgradarb 2d_projvec4
+        arb_shader_texture_lod.execution.tex-miplevel-selection *projgradarb 3d
+    
+    Acked-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 501134b9fe02633ca0cdda66a9b670ae38e791f7
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Sat Jul 18 16:52:06 2015 +0300
+
+    i965/fs: Implement lowering of logical texturing opcodes on Gen5-6.
+    
+    This should be largely equivalent to emit_texture_gen5() except for
+    slight codestyle changes and the use i965 opcodes instead of the
+    ir_texture_opcode enum, see "i965/fs: Implement lowering of logical
+    texturing opcodes on Gen7+." for the mapping between them.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 03582f95b256e483fc1b0d78bd6a49203a448a23
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Fri Jul 17 18:50:27 2015 +0300
+
+    i965/fs: Lower SHADER_OPCODE_TXF_UMS/MCS_LOGICAL too on Gen7+.
+    
+    These weren't being handled by emit_texture_gen7() but we can easily
+    lower them here for consistency with other texturing opcodes.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 8be01e3548bdd900b7cadb5c9a77e52b01151cfe
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 18:08:51 2015 +0300
+
+    i965/fs: Implement lowering of logical texturing opcodes on Gen7+.
+    
+    This should be largely equivalent to emit_texture_gen7() except that
+    we now get i965 sampling opcodes directly rather than
+    ir_texture_opcode enum values.  The mapping is as follows:
+    
+     - ir_tex -> SHADER_OPCODE_TEX
+     - ir_txb -> FS_OPCODE_TXB
+     - ir_txl -> SHADER_OPCODE_TXL
+     - ir_txd -> SHADER_OPCODE_TXD
+     - ir_txf -> SHADER_OPCODE_TXF
+     - ir_txf_ms -> SHADER_OPCODE_TXF_CMS
+     - ir_txs -> SHADER_OPCODE_TXS
+     - ir_query_levels -> SHADER_OPCODE_TXS too, the visitor will make
+                          sure that the provided lod value is zero in this
+                          case.
+     - ir_lod -> SHADER_OPCODE_LOD
+     - ir_tg4 -> SHADER_OPCODE_TG4_OFFSET if the offset value is not
+                 immediate, SHADER_OPCODE_TG4 otherwise.
+    
+    Other than that there are only minor changes and style fixes like the
+    implementation now being factored out in static functions to improve
+    encapsulation.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit a69332a31243a7733dab926b765964ba6df827b2
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Fri Jul 24 16:41:19 2015 +0300
+
+    i965/fs: Fix misleading comment regarding the message header in emit_texture_gen7.
+    
+    This hasn't been overallocating space for the header for a long time.
+    It still leaves the header uninitialized though until the generator
+    fixes it.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit fc2273a3400963e478582ee1efbfc8cdaae3eae7
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Fri Jul 17 18:46:21 2015 +0300
+
+    i965/fs: Pass a BAD_FILE header source to LOAD_PAYLOAD in emit_texture_gen7().
+    
+    So that it's left uninitialized by LOAD_PAYLOAD, we only need to
+    reserve space for it in the message since it will be initialized
+    implicitly by the generator.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 44a8cf488e0370d7e5abe363c1fd2d21247a6e32
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 15:33:04 2015 +0300
+
+    i965/fs: Fix opt_zero_samples() for texturing ops not matching dispatch_width.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 8fbb3d3569e6d353dee6e558eb9fd961b5a8a12c
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 15:42:20 2015 +0300
+
+    i965/fs: Use exec_size instead of dispatch_width to determine the message variant.
+    
+    dispatch_width is global for a single compilation and doesn't
+    necessarily match the desired execution width if we had to lower the
+    original full-width instruction due to hardware limitations.  These
+    were all inside a Gen4-specific branch so this patch shouldn't have
+    any effect on more recent hardware.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 33deff4f0582d2c073d34d4d6ec8344d2b1fbf7d
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue Jul 21 18:42:27 2015 +0300
+
+    i965/fs: Define logical texture sampling opcodes.
+    
+    Each logical variant is largely equivalent to the original opcode but
+    instead of taking a single payload source it expects the arguments
+    separately as individual sources, like:
+    
+     tex_logical dst, coordinates, shadow_c, lod, lod2,
+                      sample_index, mcs, sampler, offset,
+                      num_coordinate_components, num_grad_components
+    
+    This patch defines the opcodes and usual instruction boilerplate,
+    including a placeholder lowering function provided mostly as
+    documentation for their source registers.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit f18792aa10cedba2034762eade816c4c77ca46c6
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Jul 16 16:12:48 2015 +0300
+
+    i965/fs: Reimplement emit_single_fb_write() in terms of logical framebuffer writes.
+    
+    The only non-trivial thing it still has to do is figure out where to
+    take the src/dst depth values from and predicate the instruction if
+    discard is in use.  The manual SIMD unrolling logic in the dual-source
+    case goes away because this is now handled transparently by the SIMD
+    lowering pass.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Acked-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 59e7e6f7a21f13ff8963cf21af2e969f1f7961f5
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 17:59:34 2015 +0300
+
+    i965/fs: Implement lowering of logical framebuffer writes.
+    
+    This does essentially the same thing as
+    fs_visitor::emit_single_fb_write(), with some slight differences:
+    
+     - We don't have to worry about exec_size and use_2nd_half anymore,
+       16-wide sources have already been lowered to 8-wide thanks to the
+       previous commit and the manual argument unzipping is no longer
+       required.
+    
+     - The src/dst_depth and sample_mask values are now explicit sources
+       of the instruction instead of being taken from the visitor state
+       directly.  The same goes for the kill-pixel mask that will be
+       passed to the instruction explicitly as predicate.
+    
+     - Everything is now done in static functions to improve
+       encapsulation.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Acked-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 633938afd349f2b423146969688c11f1e29ca17a
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 21:19:28 2015 +0300
+
+    i965/fs: Hook up SIMD lowering to unroll FB writes of unsupported width.
+    
+    This shouldn't have any effect because we don't emit logical
+    framebuffer writes yet.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit cecf738b0fbe8ebafe304c717e847f1d3f41d3ca
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 14:49:27 2015 +0300
+
+    i965/fs: Remove the FS_OPCODE_SET_OMASK pseudo-opcode.
+    
+    This is now unused.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 98b0122e0a194e0d6c5d3eb05fd3f29a5286b3b3
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jul 15 17:31:04 2015 +0300
+
+    i965/fs: Don't attempt to copy the useless half of oMask for SIMD8 FB writes.
+    
+    There's no need to initialize the wrong half of oMask in the payload
+    when we're doing an 8-wide framebuffer write because it will be
+    ignored by the hardware anyway.  By doing it this way we can let the
+    SIMD lowering pass split the sample_mask source as a regular
+    per-channel source, otherwise we would have to introduce some sort of
+    per-instruction source query or use fs_inst::header_size for the
+    lowering pass to be able to find out whether some source is
+    header-like, and leave the source untouched in that case.
+    
+    As a bonus this achieves the same purpose as the previous code without
+    making use of the SET_OMASK pseudo-instruction, which will be removed
+    in a future commit.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit b1abfc49476f0277ddee0df269b56fc3de714c4b
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jul 15 18:50:59 2015 +0300
+
+    i965/fs: Move up Gen6 no16 check to emit_fb_writes().
+    
+    And update the comment.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit b145855df624d0031eb2399503389948ebfcdd26
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jul 15 18:49:55 2015 +0300
+
+    i965/fs: Move up prog_data->uses_omask assignment up to brw_codegen_wm_prog().
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 6bd991a1377862e3b1b9c05e835289fff9d6785f
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jul 15 17:05:27 2015 +0300
+
+    i965/fs: Simplify control flow in emit_single_fb_write().
+    
+    Flatten the if ladder to match the way that the ordering of these
+    fields is specified in the hardware documentation a bit more closely.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 1ad928ed9f4e7723f709f91d18d17726c92f0b7b
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jul 15 16:42:57 2015 +0300
+
+    i965/fs: Fix slight layering violation in emit_single_fb_writes().
+    
+    In cases where the color0 argument wasn't being provided,
+    emit_single_fb_writes() would take the alpha channel directly from the
+    visitor state instead of taking it from its arguments.  This sort of
+    hack didn't fit nicely into the logical send-message approach because
+    all parameters of the instruction have to be visible to the SIMD
+    lowering pass for it to be able to split them into halves at all.
+    
+    Fix it by using LOAD_PAYLOAD in fs_visitor::emit_fb_writes() to
+    provide an actual color0 vector with undefined contents except for the
+    alpha component to match the previous behavior when no color buffers
+    are enabled.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit f68ec2baf49e37f9ce4fffe95f13177eb7225015
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 15:40:18 2015 +0300
+
+    i965/fs: Make sure that the type sizes are compatible during copy propagation.
+    
+    It's surprising that we weren't checking for this already.  A future
+    patch will cause code like the following to be emitted:
+    
+     MOV(16) tmp<1>:uw, src
+     MOV(8) dst<1>:ud, tmp<8,8,1>:ud
+    
+    The second MOV comes from the expansion of a LOAD_PAYLOAD header copy,
+    so I don't have control over its types.  Copy propagation will happily
+    turn this into:
+    
+     MOV(8) dst<1>:ud, src
+    
+    Which has different semantics.  Fix it by preventing propagation in
+    cases where a single channel of the instruction would span several
+    channels of the copy (this requirement could in fact be relaxed if the
+    copy is just a trivial memcpy, but this case is unusual enough that I
+    don't think it matters in practice).
+    
+    I'm deliberately only checking if the type of the instruction is
+    larger than the original, because the converse case seems to be
+    handled correctly already in the code below.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit fa75f2d56616cba81014d4fc02931dcfaedaf5b9
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 15:41:34 2015 +0300
+
+    i965/fs: Honour the instruction force_sechalf and exec_size fields for FB writes.
+    
+    We were previously guessing the half based on the EOT flag which seems
+    rather gross.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit a9f31a032b0a1068a4e2ceed9ed4680ecf13e28b
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 27 16:14:36 2015 +0300
+
+    i965/fs: Define logical framebuffer write opcode.
+    
+    The logical variant is largely equivalent to the original opcode but
+    instead of taking a single payload source it expects its arguments
+    that make up the payload separately as individual sources, like:
+    
+     fb_write_logical null, color0, color1, src0_alpha,
+                            src_depth, dst_depth, sample_mask, num_components
+    
+    This patch defines the opcode and usual instruction boilerplate,
+    including a placeholder lowering function provided mainly as
+    self-documentation.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 8368939e5d94f8d4ae55a1f22a755922ee77132b
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 21:15:31 2015 +0300
+
+    i965/fs: Implement pass to lower instructions of unsupported SIMD width.
+    
+    This lowering pass implements an algorithm to expand SIMDN
+    instructions into a sequence of SIMDM instructions in cases where the
+    hardware doesn't support the original execution size natively for some
+    particular instruction.  The most important use-cases are:
+    
+     - Lowering send message instructions that don't support SIMD16
+       natively into SIMD8 (several texturing, framebuffer write and typed
+       surface operations).
+    
+     - Lowering messages that don't support SIMD8 natively into SIMD16
+       (*cough*gen4*cough*).
+    
+     - 64-bit precision operations (e.g. FP64 and 64-bit integer
+       multiplication).
+    
+     - SIMD32.
+    
+    The algorithm works by splitting the sources of the original
+    instruction into chunks of width appropriate for the lowered
+    instructions, and then interleaving the results component-wise into
+    the destination of the original instruction.  The pass is controlled
+    by the get_lowered_simd_width() function that currently just returns
+    the original execution size making the whole pass a no-op for the
+    moment until some user is introduced.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    
+    v2: Reverse order of the source transformations and split_inst emit
+        call to make the code a bit easier to understand.
+
+commit 86ae788baefefdb2fa77fe3c242ad2d81c8e834e
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Jul 16 15:58:56 2015 +0300
+
+    i965/fs: Fix return value of fs_inst::regs_read() for BAD_FILE.
+    
+    Typically BAD_FILE sources are used to mark a source as not present
+    what implies that no registers are read.  This will become much more
+    frequent with logical send opcodes which have a large number of
+    sources, many of them optionally used and marked as BAD_FILE when they
+    aren't applicable.  It will prove to be useful to be able to rely on
+    the value of regs_read() regardless of whether a source is present or
+    not.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 5a5607a16ce7bf5eace2cf4b267af304aef05e90
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue Jul 14 19:32:03 2015 +0300
+
+    i965/fs: Add builder emit method taking a variable number of source registers.
+    
+    And start using it in fs_builder::LOAD_PAYLOAD().  This will be used
+    to emit logical send message opcodes which have an unusually large
+    number of arguments.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 1dd3543ac1bebe089bfe3a8ae5efbe3f564e1144
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 17:44:58 2015 +0300
+
+    i965/fs: Add stub lowering pass for logical send-message opcodes.
+    
+    This pass will house ad-hoc lowering code for several send
+    message-like virtual opcodes that will represent their logically
+    independent arguments as separate instruction sources rather than as a
+    single payload blob.  This pass will basically just take the separate
+    arguments that are supposed to be part of the payload and concatenate
+    them to construct a message in the form required by the hardware.
+    Virtual instructions in separate-source form will eventually allow
+    some simplification of the visitor code and make several
+    transformations easier like lowering SIMD16 instructions to SIMD8
+    algorithmically in cases where the hardware doesn't support the former
+    natively.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit fb7eba97d7235d49ac712a21fb51009c86f3bc64
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue Jul 21 17:28:39 2015 +0300
+
+    i965/fs: Factor out source components calculation to a separate method.
+    
+    This cleans up fs_inst::regs_read() slightly by disentangling the
+    calculation of "components" from the handling of message payload
+    arguments.  This will also simplify the SIMD lowering and logical send
+    message lowering passes, because it will avoid expressions like
+    'regs_read * REG_SIZE / component_size' which are not only ugly, they
+    may be inaccurate because regs_read rounds up the result to the
+    closest register multiple so they could give incorrect results when
+    the component size is lower than one register (e.g. uniforms).  This
+    didn't seem to be a problem right now because all such expressions
+    happen to be dealing with per-channel GRFs only currently, but that's
+    by no means obvious so better be safe than sorry.
+    
+    v2: Split PIXEL_X/Y and LINTERP into separate case blocks.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 24d74b66883da1955f8c2223367d41470d99df6d
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue Jul 28 12:07:56 2015 +0300
+
+    i965/fs: Simplify instruction rewrite loop in the register coalesce pass.
+    
+    For some reason the loop that rewrites all occurrences of the
+    coalesced register was iterating over all possible offsets until it
+    would find one that compares equal to the offset of a source or
+    destination of any instruction in the program.  Since the mapping
+    between old and new offsets is already available in the regs_to_offset
+    array and we know that the whole register has been coalesced we can
+    just look it up.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 170200e0fcb0b16d20bff86e1258e0a1b2034c10
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 14:20:32 2015 +0300
+
+    i965/fs: Fix rewrite of the second half of 16-wide coalesced registers.
+    
+    The register coalesce pass wasn't rewriting the destination and
+    sources of instructions that accessed the second half of a coalesced
+    register previously copied with a 16-wide MOV instruction.  E.g.:
+    
+    | ADD (16) vgrf0:f, vgrf0:f, 1.0:f
+    | MOV (16) vgrf1:f, vgrf0:f
+    | MOV (8)  vgrf2:f, vgrf0+1:f { sechalf }
+    
+    would get incorrectly register-coalesced into:
+    
+    | ADD (16) vgrf1:f, vgrf1:f, 1.0:f
+    | MOV (8)  vgrf2:f, vgrf0+1:f { sechalf }
+    
+    The reason is that the mov[i] pointer was being left equal to NULL for
+    every other register.  The fact that we've made it to the rewrite loop
+    implies that the whole register will be coalesced, so it doesn't seem
+    right not to update something that uses it depending on whether mov[i]
+    is NULL or not.  Fixes an amount of texturing and image_load_store
+    piglit tests on my SIMD-lowering branch.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit d0a42b457fb905ce2cc12bb05110ef63656221c9
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue Jul 28 11:25:59 2015 +0300
+
+    i965/fs: Detect multi-register MOVs correctly in register_coalesce.
+    
+    register_coalesce() was considering the exec_size of the MOV
+    instruction alone to decide whether the register at offset+1 of the
+    source VGRF was being copied to inst->dst.reg_offset+1 of the
+    destination VGRF, which is only a valid assumption if the move has a
+    32-bit execution type.  Use regs_read() instead to find out the number
+    of registers copied by the instruction.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 2294ba9565fbae49f1fc77ca171e9d6aafa34005
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jul 9 16:34:59 2015 +1000
+
+    radeon: add support for streams to the common streamout code. (v2)
+    
+    This adds to the common radeon streamout code, support
+    for multiple streams.
+    
+    It updates radeonsi/r600 to set the enabled mask up.
+    
+    v2: update for changes in previous patch.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 3f0e7c28fe5252f0613b548efd1cbf8e4bc0eb9a
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Sun Jul 26 01:27:17 2015 +0100
+
+    radeon: move streamout buffer config to streamout enable function. (v2)
+    
+    This will be used here later.
+    
+    v2: update atom sizes
+    add check for old vs new enabled mask
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 19d88e3f9f621643ba0524ff37e9a33272353941
+Author: Fabio Pedretti <fabio.ped@libero.it>
+Date:   Tue Jul 28 20:53:25 2015 +0200
+
+    docs: consolidate nvc0 status
+
+commit cb1cfb710c5a30f2e9b9ea1bca9d7ae0f23bcdfc
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Thu Jul 23 16:38:38 2015 +0200
+
+    mesa/es3.1: enable GL_ARB_explicit_uniform_location for GLES 3.1
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit 49db765debf9d1a810810935fafc3eef229e1511
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Wed Jul 29 10:10:40 2015 +0300
+
+    mesa/es3.1: enable GL_ARB_compute_shader for GLES 3.1
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit 49021e5058130db299ac6843e34c5f5c53e565ad
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Mon Jul 27 15:22:51 2015 +0200
+
+    mesa/es3.1: enable GL_ARB_texture_gather for GLES 3.1
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit c561b2faa80d07eedfe201ffdbb3f7746e33a049
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Mon Jul 27 15:22:50 2015 +0200
+
+    mesa/es3.1: enable GL_ARB_texture_multisample for GLES 3.1
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit cd14fcbca0a1dcecfdbee97a3524123ba87f901d
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Thu Jul 23 16:38:34 2015 +0200
+
+    mesa/es3.1: enable GL_ARB_shader_atomic_counters for GLES 3.1
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit 9ec50dc6bb192818dde221e561fb6be6c4bd417b
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Mon Jul 27 15:22:49 2015 +0200
+
+    mesa/es3.1: enable GL_ARB_shader_image_load_store for GLES 3.1
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@intel.com>
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit d1bb3b4910e6c02344550b8982aa8442cd7efd29
+Author: Marta Lofstedt <marta.lofstedt@intel.com>
+Date:   Thu Jul 23 16:38:32 2015 +0200
+
+    mesa/es3.1: Add ES 3.1 handling to get.c and get_hash_generator.py
+    
+    Signed-off-by: Marta Lofstedt <marta.lofstedt@linux.intel.com>
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit c00d093c8f247c41f9122143c49ffa93865a0ded
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Tue Feb 10 16:40:40 2015 +0100
+
+    mesa: Return INVALID_ENUM in glClearBufferiv() when buffer is not color or stencil
+    
+    Page 497 of the PDF, section '17.4.3.1 Clearing Individual Buffers' of the
+    OpenGL 4.5 spec states:
+    
+        "An INVALID_ENUM error is generated by ClearBufferiv and
+         ClearNamedFramebufferiv if buffer is not COLOR or STENCIL."
+    
+    Fixes 1 dEQP test:
+    * dEQP-GLES3.functional.negative_api.buffer.clear_bufferiv
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 055e3a3f87d8be5374902d2ae6fecb0eb5c66714
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Tue Jul 28 18:45:32 2015 -0700
+
+    i965: Use real stage in "Unsupported form of variable indexing" warning.
+    
+    Other stages can be miserably slow too!
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit e235ca159f5f6de2bd29616fdda5c02dc69b0d7f
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Jul 22 20:08:23 2015 -0700
+
+    glsl: Fix a bug where LHS swizzles of swizzles were too small.
+    
+    A simple shader such as
+    
+       vec4 color;
+       color.xy.x = 1.0;
+    
+    would cause ir_assignment::set_lhs() to generate bogus IR:
+    
+       (swiz xy (swiz x (constant float (1.0))))
+    
+    We were setting the number of components of each new RHS swizzle based
+    on the highest channel used in the LHS swizzle.  So, .xy.y would
+    generate (swiz xy (swiz xx ...)), while .xy.x would break.
+    
+    Our existing Piglit test happened to use .xzy.z, which worked, since
+    'z' is the third component, resulting in an xxx swizzle.
+    
+    This patch sets the number of swizzle components based on the size of
+    the LHS swizzle's inner value, so we always have the correct number
+    at each step.
+    
+    Fixes new Piglit tests glsl-vs-swizzle-swizzle-lhs-[23].
+    Fixes ir_validate assertions in in Metro 2033 Redux.
+    
+    v2: Move num_components updating completely out of update_rhs_swizzle
+        (suggested by Timothy Arceri).  Simplify.
+    
+    Cc: mesa-stable@lists.freedesktop.org
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit e17056f5a20beb752a530180fce1aba0e68877b6
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Fri Jul 3 10:19:23 2015 +0300
+
+    glsl: verify location when dual source blending
+    
+    Same check is made for glBindFragDataLocationIndexed but it was missing
+    when using layout qualifiers.
+    
+    Fixes following Piglit test:
+    	arb_blend_func_extended-output-location
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit b868971e786b849e70675852a0043538bcce0739
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Mon Jul 27 13:29:20 2015 +0300
+
+    glsl: move max_index calc to assign_attribute_or_color_locations
+    
+    Change function to get all gl_constants for inspection, this is used
+    by follow-up patch.
+    
+    v2: rebase, update function documentation
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit 2e04492a142102823dfb8fc8599cfd417b84c97a
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jul 28 11:00:58 2015 -0700
+
+    vc4: Skip re-emitting the shader_rec if it's unchanged.
+    
+    It's a bunch of work for us to emit it (and its uniforms), more work for
+    the kernel to validate it, and additional work for the CLE to read
+    it. Improves es2gears framerate by about 50%.
+    
+    Signed-off-by: Eric Anholt <eric@anholt.net>
+
+commit aefec4fa226d06e4b414170739be18dd24d3eed7
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jul 28 19:59:45 2015 -0700
+
+    vc4: Drop unused vpm_offset value.
+    
+    It's been dead since we started doing VS/CS attr offset setup during
+    shader compile.
+
+commit 1f5e070dd7ddd344a913f2f5daddebb4c51abb8a
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jul 28 10:20:10 2015 -0700
+
+    vc4: Simplify vc4_use_bo and make sure it's not a shader.
+    
+    Since the conversion to keeping validated shaders around for the BO's
+    lifetime, we haven't been checking that rendering doesn't happen to
+    shaders.  Make vc4_use_bo check that always, and just don't use it for the
+    VC4_MODE_SHADER case (so now modes are unused)
+
+commit 044f7bbda077ea7029fb1004183b29127307bd84
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jul 28 10:11:08 2015 -0700
+
+    vc4: Keep the validated shader around for the simulator execution.
+    
+    This more closely matches the kernel behavior on shader validation now.
+
+commit 22954db71cd1d8d9ef6e5a16f568e4b3c7845777
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jul 28 09:51:37 2015 -0700
+
+    vc4: Make the object be the return value from vc4_use_bo().
+    
+    Drops 40 bytes of code from validation.
+
+commit cbb7477e8a796211b664ff7e47334cb1b642556d
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jul 28 00:29:31 2015 -0700
+
+    vc4: Ensure that the bin CL is properly capped by increment/flush.
+    
+    We don't want anything to appear after we've kicked off the render (and
+    thus job flush), since that might then get written out to the tile
+    allocation state.
+    
+    Signed-off-by: Eric Anholt <eric@anholt.net>
+
+commit 601733da6708722ceedd35afc7727c28779012f7
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jul 28 00:05:33 2015 -0700
+
+    vc4: Drop NV shader reloc validation.
+    
+    It wasn't validating enough, and we don't need the packet.
+
+commit 95faf2c6397ce231e94176d18cf8fd2c3265bb8a
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Jul 27 23:23:57 2015 -0700
+
+    vc4: Fix raster surface shadow updates under DRI2.
+    
+    Glamor asks GBM for the handle of the BO, then flinks it itself.  We
+    were marking the bo non-private in the flink and dmabuf (DRI3) paths,
+    but not the GEM handle path.  As a result, non-pageflipping DRI2
+    swapbuffers (EGL apps, in particular) were never updating the texture.
+
+commit b0193adbe9403545b0d9f7c7f24a1c30f1491a48
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Jul 27 23:15:39 2015 -0700
+
+    vc4: Fix bus errors on dumping CL on hardware.
+    
+    The kernel can't fixup unaligned float traps for us, so deref as a
+    uint32_t first.
+
+commit 736c6f3cfc2c69e3c29268d4ebb7110dd36ac97f
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jul 23 17:26:56 2015 -0700
+
+    meta/copy_image: Stash off the scissor
+    
+    The meta CopyImageSubData path uses BlitFramebuffers to do the actual copy.
+    The only thing that can affect BlitFramebuffers other than the currently
+    bound framebuffers is the scissor so we need to save that off and reset it.
+    If we don't do this, applications that use a scissor together with
+    CopyImageSubData will get accidentally scissored copies.
+    
+    Tested-by: Markus Wick <markus at selfnet.de>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit bf4019a1c89755af94218055e86544f7823dc4ac
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jul 9 16:33:59 2015 +1000
+
+    radeon: add streamout status 1-3 queries.
+    
+    This adds support for queries against the non-0 vertex streams.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 5142564734bd68f165b02e29e384ebbcf91cce38
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Jul 28 20:41:16 2015 +0200
+
+    st/mesa: remove st_context::missing textures and get_passthrough_fs
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 72f31c63d7b73abcdf47bc303d09987f299aff7a
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 17:26:10 2015 +0200
+
+    st/mesa: remove st_finalize_textures atom
+    
+    It only checks fragment textures and ignores other shaders, which makes it
+    incomplete, and textures are already finalized in update_single_texture.
+    
+    There are no piglit regressions.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 6ca3ff982a9e6a54286158b457d479715be5ab17
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 25 20:25:18 2015 +0200
+
+    st/mesa: add shader dumping for shader-db
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 768b4a25b95b95989dae3ff2f5a06172a2f4ab85
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jul 23 21:57:19 2015 +0200
+
+    st/mesa: fix GLSL 1.30 texture shadow functions with the GL_ALPHA depth mode (v2)
+    
+    Fixes piglit:
+        spec@glsl-1.30@execution@fs-texture-sampler2dshadow-10
+        spec@glsl-1.30@execution@fs-texture-sampler2dshadow-11
+    
+    v2: use st_shader_stage_to_ptarget
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 82546729e3533c9a5ec0392585a60833bd93acca
+Author: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+Date:   Mon Jul 27 11:01:47 2015 +1000
+
+    r600,radeonsi: GL_ARB_conditional_render_inverted
+    
+    By using 'Tobias Klausmann' piglit test-suite patch. We obtain
+    a full 12/12 passes using this patch. By 'faking' to claim
+    support for this extension we obtain 7 fails and 5 passes.
+    
+    Signed-off-by: Edward O'Callaghan <eocallaghan@alterapraxis.com>
+    Tested-by: Furkan Alaca <falaca@gmail.com>
+    Signed-off-by: Marek Olšák <marek.olsak@amd.com>
+
+commit aa25a2c1ba2ea14efdab405707f15dace323cd48
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Fri Jul 17 04:44:18 2015 +0100
+
+    radeonsi: add support for interpolateAt functions (v2)
+    
+    This is part of ARB_gpu_shader5, and this passes
+    all the piglit tests currently available.
+    
+    v2: use macros from the fine derivs commit.
+    add comments.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 56f1f47eda881d6281e9c7531bc17e72b25d9bb9
+Author: Chad Versace <chad.versace@intel.com>
+Date:   Tue Jun 23 15:48:40 2015 -0700
+
+    i965: Support importing R8 and GR88 dma_bufs
+    
+    EGL_EXT_image_dma_buf_import now supports those formats.
+    
+    Tests:
+      - Tested by Piglit ext_image_dma_buf_import-transcode-nv12-as-r8-gr88.
+      - Tested by Peter in Kodi/XBMC to obtain 60fps NV12 transcode at 4K.
+    
+    Tested-by: Peter Frühberger <peter.fruehberger@gmail.com>
+    Signed-off-by: Chad Versace <chad.versace@intel.com>
+
+commit fd865d56d2229d8c5d7ea893ac1dba525d88e647
+Author: Chad Versace <chad.versace@intel.com>
+Date:   Tue Jun 23 15:48:17 2015 -0700
+
+    egl: Add support for DRM_FORMAT_R8, RG88, and GR88
+    
+    The Kodi/XBMC developers want to transcode NV12 to RGB with OpenGL shaders,
+    importing the two source planes through EGL_EXT_image_dma_buf_import. That
+    requires importing the Y plane as an R8 EGLImage and the UV plane as either an
+    RG88 or GR88 EGLImage.
+    
+    This patch teaches the driver-independent part of EGL about the new
+    formats. Real driver support is left for follow-up patches.
+    
+    The new formats landed in airlied's kernel branch 'drm-next' on July 24.
+    
+    Tested-by: Peter Frühberger <peter.fruehberger@gmail.com>
+    Signed-off-by: Chad Versace <chad.versace@intel.com>
+
+commit 313940b03cf7c857143b9e3ec0ab969ce4472c83
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Tue Jul 28 02:37:51 2015 -0400
+
+    nvc0/ir: trim out barrier sync for non-compute shaders
+    
+    It seems like they're never necessary, and actively cause harm. This
+    fixes some of the barrier-related piglits.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit ab63610a3603ae1e40a36d238b5938621bb9e8cc
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Tue Jul 28 02:00:20 2015 -0400
+
+    nvc0/ir: fix barrier emission
+    
+    immediate arguments require a flag to be set for each one
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 7850774f2118ae87c7e6a4f6c17751e405edfb34
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jul 22 12:14:40 2015 -0700
+
+    vc4: Add support for ARB_draw_elements_base_vertex.
+    
+    Gallium exposes it unconditionally, so do our best to support it.  It
+    fails on the negative index cases, but those seem unlikely to be used in
+    the wild.
+
+commit 98a4b111fbb9e3ae45e907ddd4d2407e5ab669ec
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat Jul 25 12:53:23 2015 -0400
+
+    freedreno/ir3: add transform-feedback support
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 96d4db683f90f02e72d34ece544de7eedfa873ee
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat Jul 25 13:51:16 2015 -0400
+
+    freedreno/ir3: track "keeps" in ir
+    
+    Previously we had a fixed array to track kills, since they don't
+    generate an SSA value, and then cheated by stuffing them in the
+    outputs array before sending things through depth/sched/etc.  But
+    store instructions will need similar treatment.  So convert this
+    over to a more general array of instructions that must be kept
+    and fix up the places that were previously relying on kills being
+    in the output array.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 020301baccc77e5753ead1e890c0cf24a9675517
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat Jul 25 13:48:07 2015 -0400
+
+    freedreno/ir3: add support for store instructions
+    
+    For store instructions, the "dst" register is a read register, not a
+    written register.  (Ie. it is the address to store to.)  Lets not
+    confuse register allocation, scheduling, etc, with these details.
+    Instead just leave a dummy instr->regs[0], and take "dst" from
+    instr->regs[1] and srcs following.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit a240748de52f2e469e91b60d29ae872828a594d7
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat Jul 25 12:48:18 2015 -0400
+
+    freedreno/ir3: cleanup driver-param stuff
+    
+    Add 'enum ir3_driver_param' to track driver-param slots, and a
+    create_driver_param() helper to avoid having the knowledge about
+    where driver params are placed in const regs spread throughout
+    the code as we add additional driver-params.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit be8a8ebe578267ab24e343c3c1347936a221468e
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat Jul 25 10:56:39 2015 -0400
+
+    freedreno: add transform-feedback state
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit bda1354aac9d32e236048af4d353d5530f644c34
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sun Jul 26 13:30:26 2015 -0400
+
+    freedreno: add resource tracking support for written buffers
+    
+    With stream-out (transform-feedback) we have the case where resources
+    are *written* by the gpu, which needs basically the same tracking to
+    figure out when rendering must be flushed.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 65d36a109a7dd333c15180a0f30ad919eb01d78f
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jul 24 17:07:23 2015 -0400
+
+    freedreno/a3xx+a4xx: add support for vtxcnt semantic
+    
+    This will be used for stream-out (transform-feedback)
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 1b1ef6b4573ab9f21abd5fb374bc74d03390146d
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jul 24 17:06:01 2015 -0400
+
+    freedreno/ir3: add stream-output support to cmdline compiler
+    
+    A bit hard-coded configuration at the moment, but sufficient for now.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 810763deb514c3fec41c3e95761de34e6211d291
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jul 24 16:42:10 2015 -0400
+
+    freedreno/ir3: drop unused create_input() arg
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 56462a30080c1f25a81ae566d59a25d2ad6bb809
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jul 24 13:07:33 2015 -0400
+
+    freedreno/ir3: move emit_const to ir3
+    
+    Details of the cmdstream packets are different between a3xx and a4xx,
+    but the logic about the layout of const registers is the same, as that
+    is dictated by the ir3 shader compiler.  So rather than duplicating
+    logic that is tightly coupled to ir3 between a3xx and a4xx, move this
+    into ir3 and use per-generation callbacks for to build the cmdstream
+    packets.
+    
+    This should make it easier to pass additional const regs (such as for
+    transform feedback).  And it also keeps the layout internal to ir3 in
+    case we want to make the layout more dynamic some day.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 0815729d964f4e8e6e263acf70b5b91577de027a
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Jul 23 15:51:13 2015 -0400
+
+    freedreno/ir3: bit of shader API refactoring
+    
+    Since for transform-feedback, we'll need more than just the TGSI
+    tokens from the state object, just pass the entire state object to
+    ir3_shader_create().  This also cleans things up a bit for some
+    day in the future when we could take shader either as TGSI or
+    directly NIR (for ex, glsl2nir or spirv2nir paths).  In the same
+    spirit, drop extra args from ir3_compile_shader_nir() (since it
+    can anyways get what it needs from the ir3_shader_variant).
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit bc5e2bec303acd7fd962996bf369be5ce0e15cd2
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Jul 23 15:31:13 2015 -0400
+
+    freedreno/ir3: updated cat6 encoding
+    
+    Sync updated cat6 encoding from freedreno.git, needed to properly encode
+    store instructions.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 4b15cb6daa29d4bdd268eac6c2e40fb1503e98fa
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Jul 25 01:06:20 2015 -0400
+
+    glsl: enable conservative depth, ssbo based on GLSL version
+    
+    Add in missed version checks in the GLSL parser
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit d69da58e84448188808488ad1c1c0181b5630a74
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sun Jul 26 15:20:31 2015 +0100
+
+    docs: add news item and link release notes for mesa 10.6.3
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 11516b8bd1af2709b74a135787b43de55fe6238e
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sun Jul 26 15:18:24 2015 +0100
+
+    docs: Add checksums for mesa 10.6.3 tarballs
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit ccef8901de421eae5dcc8affa14218d46cc06593)
+
+commit e1dcd158785606d4e7e9ca5513732b7e6e7b93d9
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sun Jul 26 14:38:58 2015 +0100
+
+    Add release notes for 10.6.3
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit ddc976368fef367e464472ebcc2ac4fd89eb9fd8)
+
+commit bb9d59aed5b01133f4c8e9f131a83b45fce91fdc
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Fri Jul 17 05:35:30 2015 +0100
+
+    radeonsi: add fine derivate control (v2.1)
+    
+    This adds support for fine derivatives and enables
+    ARB_derivative_control on radeonsi.
+    
+    (just fell out of my working out interpolation)
+    
+    v2: cleanup some bits, write a comment
+    v2.1: take Michel's comment from the mailing list
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 9deb614cacbeca3e99724f08254ab1789f34b56c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jul 24 00:54:08 2015 +0200
+
+    radeonsi: fix GLSL textureGrad(samplerCube*) functions
+    
+    +4 piglits
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit e39ece0d7856d0532a0f011cd5cb17bc85ee82e2
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jul 24 19:47:06 2015 +0200
+
+    st/mesa: don't ignore texture buffer state changes
+    
+    Fixes piglit:
+      spec@arb_texture_buffer_range@ranges-2
+    
+    Cc: mesa-stable@lists.freedesktop.org
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit a818faa6ddcfa6cd90a24b70c49ec76573954111
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Jul 11 12:47:03 2015 -0400
+
+    nvc0: fix geometry program revalidation of clipping params
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 7b40d92f0d0661c05c1afa59555905b2c37e594f
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Jul 13 09:12:18 2015 +0100
+
+    radeonsi: ubo indexing support (v2)
+    
+    This is required as part of ARB_gpu_shader5.
+    
+    no backend changes are required for this, or if
+    any are, it's the same ones as for samplers.
+    
+    v2: use get_indirect_index (Marek)
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit b0654e368b1741083055efd281b981db4fb5724b
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Jul 13 00:07:09 2015 +0100
+
+    radeonsi: add support for indirect samplers (v2)
+    
+    This adds the frontend support, however the llvm
+    backend produces the wrong pattern, however
+    we can conditionalise enabling ARB_gpu_shader5
+    on whatever version of llvm we fix this in.
+    
+    v2: drop unneeded sampler_indirect checks (Marek)
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 4b6c1efb225777231459de54903484367d0b1ca1
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Fri Jul 17 04:43:09 2015 +0100
+
+    radeonsi: split out interpolation input selection
+    
+    This is prep work for using it in the interpolation code
+    later.
+    
+    Also add storage for the input interpolation mode so we
+    can pick it up later.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 730e8c4410d73bf9db2c1768af8cf6e98e24cc73
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jul 16 04:38:41 2015 +0100
+
+    radeonsi: separate out load sample position
+    
+    This is prep work for reusing this in the interpolation
+    code later.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit b42444ffed87114e82522dd81d3e5540c21a128c
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Jul 24 17:06:22 2015 -0400
+
+    glsl: recognize ARB_shading_language_420pack to be enabled with 4.20+
+    
+    The 420pack extension enables various GLSL rules that need to be applied
+    to any GLSL 4.20+ shader even if the extension is not explicitly
+    enabled.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit f8059c9f3fdd270370737c9eff369eb6d14caa0b
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Jul 23 20:18:57 2015 -0400
+
+    mesa: fix error checking for getting zero-sized texture images
+    
+    Commit 17f714836 (mesa: rearrange texture error checking order) moved
+    the width/height/depth == 0 allowance before checking if the image was
+    there. This was in part due to depth having to be == 1 for 2D images and
+    width having to be == 1 for 1D images. Instead relax the height/depth
+    checks to also accept 0 as valid.
+    
+    With this change,
+    
+      bin/arb_direct_state_access-get-textures
+    
+    starts passing again.
+    
+    Fixes: 17f714836 (mesa: rearrange texture error checking order)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 56980f107ef64d0a5bfc5d292cc891661e47d0f0
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Fri Jun 26 15:39:40 2015 -0700
+
+    mesa: Fix typo in a comment
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit a9cbb2c722615e11818066cbe33006c5cfc43381
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Fri Jun 12 14:58:46 2015 -0700
+
+    meta: Use _mesa_need_rgb_to_luminance_conversion() in decompress_texture_image()
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 4b8745680ff45cd7adc7896c06263e14b8d347ce
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Fri Jun 12 14:42:57 2015 -0700
+
+    mesa: Change the signature of _mesa_need_rgb_to_luminance_conversion()
+    
+    This allows us to handle cases when texImage->_BaseFormat doesn't match
+    _mesa_format_get_base_format(texImage->Format). _BaseFormat is what we
+    care about in this function.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit aa40546b2de4cd572af02d31fd5c7d4045505ea2
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue May 12 05:46:04 2015 -0700
+
+    meta: Fix reading luminance texture as rgba in _mesa_meta_pbo_GetTexSubImage()
+    
+    After recent addition of pbo testing in piglit test getteximage-luminance,
+    it fails on i965. This patch makes a sub test pass.
+    
+    This patch adds a clear color operation to meta pbo path, which I think is
+    better than falling back to software path.
+    
+    V2: Fix color mask for GL_LUMINANCE_ALPHA
+    
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit be405ee334ec758a2609d8780221f4f1a1ed3343
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Thu Jun 11 17:23:34 2015 -0700
+
+    meta: Use _mesa_need_luminance_to_rgb_conversion() in decompress_texture_image()
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit c59c0f8a42652603da7f89e3270897cb685fe76b
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Thu Jun 11 16:48:26 2015 -0700
+
+    mesa: Add a helper function _mesa_need_luminance_to_rgb_conversion()
+    
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 9fff00d387cacf7820c344324820cab764541762
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Fri Jun 12 12:11:01 2015 -0700
+
+    meta: Use _mesa_unpack_format_to_base_format() to handle integer formats
+    
+    Replace a call to mesa_base_tex_format() that handles only internal
+    formats with a call to the new _mesa_unpack_format_to_base_format()
+    function that handles allowed unpack formats and does not care for
+    internal formats at all.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 0127580647ee23d543228f0b7f42bd688e76f2bd
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Fri Jun 12 12:09:05 2015 -0700
+
+    mesa: Add a helper function _mesa_unpack_format_to_base_format()
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit bbbefec7323d0a338346233ab4ab715bcf4e1b78
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Thu Jun 11 16:44:45 2015 -0700
+
+    mesa: Set green, blue channels to zero only for formats with these components
+    
+    This is an optimization which avoids setting pixel transfer operations
+    when not required. _mesa_ReadPixels falls back to slower path if
+    transfer operations are set.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit ca4e17e03e9aeaa04fe6bb04bfe2d6f97991005b
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Wed May 6 05:43:08 2015 -0700
+
+    meta: Don't do fragment color clamping in _mesa_meta_pbo_GetTexSubImage
+    
+    _mesa_meta_pbo_GetTexSubImage() uses _mesa_meta_BlitFrameBuffer(),
+    which will do fragment clamping if enabled. But fragment clamping
+    doesn't affect ReadPixels and GetTexImage.
+    
+    Without this patch, piglit test arb_color_buffer_float-clear fails,
+    when forced to use the meta pbo path.
+    
+    v2: Apply this fix to both glReadPixels and glGetTexImage.
+    
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 0d207905e675b778739236072e7a4dfba7cd7959
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Wed May 20 10:22:45 2015 -0700
+
+    meta: Abort meta pbo path if readpixels need signed-unsigned conversion
+    
+    Meta pbo path for ReadPixels rely on BlitFramebuffer which doesn't support
+    signed to unsigned integer conversions and vice versa.
+    
+    Without this patch, piglit test fbo_integer_readpixels_sint_uint fails, when
+    forced to use the meta pbo path.
+    
+    v2: Make need_signed_unsigned_int_conversion() a static function. (Iago)
+        Bump up the comment and the commit message. (Jason)
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Iago Toral <itoral@igalia.com>
+
+commit 1252d53c19ec005c17ca666cecb7db072d77e5ce
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Wed May 20 10:21:39 2015 -0700
+
+    meta: Fix transfer operations check in meta pbo path for readpixels
+    
+    Currently used ctx->_ImageTransferState check is not sufficient
+    because it doesn't include the read color clamping enabled with
+    GL_CLAMP_READ_COLOR. So, use the helper function
+    _mesa_get_readpixels_transfer_ops().
+    
+    Also, transfer operations don't affect glGetTexImage(). So, do
+    the check only for glReadPixles.
+    
+    Without this patch, arb_color_buffer_float-readpixels test fails, when
+    forced to use meta pbo path.
+    
+    V2: Add a comment and bump up the commit message.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 7974e23be9ff7586e5250cff321b6ec7749ecc44
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue May 19 17:44:52 2015 -0700
+
+    mesa: Turn get_readpixels_transfer_ops() in to a global function
+    
+    This utility function is utilized in a later patch.
+    
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 013d731a67538a2eb8f508fa54bb86191f0e5491
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Tue Jul 21 11:12:57 2015 +0100
+
+    i965: Use updated kernel interface for accurate TIMESTAMP reads
+    
+    I was mistaken, I thought we already had fixed this in the kernel a
+    couple of years ago. We had not, and the broken read (the hardware
+    shifts the register output on 64bit kernels, but not on 32bit kernels) is
+    now enshrined into the ABI. I also had the buggy architecture reversed,
+    believing it to be 32bit that had the shifted results. On the basis of
+    those mistakes, I wrote
+    
+    commit c8d3ebaffc0d7d915c1c19d54dba61fd1e57b338
+    Author: Chris Wilson <chris@chris-wilson.co.uk>
+    Date:   Wed Apr 29 13:32:38 2015 +0100
+    
+        i965: Query whether we have kernel support for the TIMESTAMP register once
+    
+    Now that we do have an extended register read interface for always
+    reporting the full 36bit TIMESTAMP (irrespective of whether the hardware
+    is buggy or not), make use of it and in the process fix my reversed
+    detection of the buggy reads for unpatched kernels.
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Cc: Martin Peres <martin.peres@linux.intel.com>
+    Cc: Kenneth Graunke <kenneth@whitecape.org>
+    Cc: Michał Winiarski <michal.winiarski@intel.com>
+    Cc: Daniel Vetter <daniel@ffwll.ch>
+    Tested-and-acked-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
+
+commit 30f97b5e52b324d501c56df8902d294fb755a5b7
+Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+Date:   Thu Jul 23 10:38:36 2015 +0200
+
+    glsl/glcpp: fix SIGSEGV when checking error condition for macro redefinition
+    
+    Commit a6e9cd14c does not take into account than node_{a,b}->next could be NULL
+    in some circumstances, such as in a shader containing this code:
+    
+      #define A 1 /* comment */
+      #define A 1 /* comment */
+    
+    This patch fixes the segmentation fault for cases like that.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91290
+    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 24a7d4e437e27c758c2848e887ceaf1d4a55ae50
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Jul 24 00:21:28 2015 -0400
+
+    nvc0/ir: per-patch vars are in a separate address space
+    
+    There's no need to attempt to avoid overlapping generic i/o with patch
+    i/o. By the same token, we can't merge patch and non-patch loads/stores.
+    
+    This fixes at least the
+    
+      tes-both-input-array-*-index-rd
+    
+    tessellation variable-indexing tests.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 9d60793a03e40e1d139b78fce0144cad57438741
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Jul 23 23:03:53 2015 -0400
+
+    nvc0/ir: kepler can't do indirect shader input/output loads directly
+    
+    There's a special AL2P instruction (called AFETCH in nv50 ir) which
+    computes a "physical" value to be used with indirect addressing with ALD.
+    
+    Fixes
+    
+      tcs-input-array-*-index-rd
+      tcs-output-array-*-index-wr
+    
+    varying-indexing tessellation tests on Kepler.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 22c9339abf00c2ecf40e0d8fd740faafba3ec37b
+Author: Vinson Lee <vlee@freedesktop.org>
+Date:   Tue Jul 21 21:50:29 2015 -0700
+
+    radeon: Silence GCC unused-but-set-variable warnings.
+    
+    radeon_fbo.c: In function 'radeon_map_renderbuffer_s8z24':
+    radeon_fbo.c:162:9: warning: variable 'ret' set but not used [-Wunused-but-set-variable]
+         int ret;
+             ^
+    radeon_fbo.c: In function 'radeon_map_renderbuffer_z16':
+    radeon_fbo.c:200:9: warning: variable 'ret' set but not used [-Wunused-but-set-variable]
+         int ret;
+             ^
+    radeon_fbo.c: In function 'radeon_map_renderbuffer':
+    radeon_fbo.c:242:8: warning: variable 'ret' set but not used [-Wunused-but-set-variable]
+        int ret;
+            ^
+    radeon_fbo.c: In function 'radeon_unmap_renderbuffer':
+    radeon_fbo.c:419:14: warning: variable 'ok' set but not used [-Wunused-but-set-variable]
+        GLboolean ok;
+                  ^
+    
+    Signed-off-by: Vinson Lee <vlee@freedesktop.org>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 00fb21e744b045cc9f945021305b85595c35dd69
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Wed Jul 22 22:14:00 2015 -0600
+
+    doxygen: Link GLvector4f struct members properly, avoiding invalid XML/HTML warning
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit bc893e3dad74622b971e295f60a022f179ca9942
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Wed Jul 22 22:14:00 2015 -0600
+
+    doxygen: Correct grammatical typo in math/m_vector.h
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 28db89fa8b3f59d35032c0576fbd0c74739b3c87
+Author: Brian Paul <brianp@vmware.com>
+Date:   Thu Jul 23 10:04:13 2015 -0600
+
+    mesa: minor clean-ups in shaderapi.c
+    
+    80-column wrapping.  Move break statements.  Indentation fixes.
+
+commit dd86fbeaaa136c4ddfd255286f4975d869e799a0
+Author: Brian Paul <brianp@vmware.com>
+Date:   Thu Jul 23 07:47:25 2015 -0600
+
+    mesa: fix _mesa_error() compiler warnings in shaderapi.c
+    
+    Fix many instances of:
+    main/shaderapi.c: In function '_mesa_GetSubroutineUniformLocation':
+    main/shaderapi.c:2176:7: warning: format not a string literal and no format arguments [-Wformat-security]
+           _mesa_error(ctx, GL_INVALID_OPERATION, api_name);
+           ^
+    
+    Ideally, many of these error messages should be improved to indicate
+    which argument is incorrect as we do in other parts of Mesa.
+    
+    Reviewed-by: Kai Wasserbäch <kai@dev.carbon-project.org>
+    Tested-by: Kai Wasserbäch <kai@dev.carbon-project.org>
+
+commit 43b69aad195f5abfc2c8c75bfa2ff31e5b99fbab
+Author: Brian Paul <brianp@vmware.com>
+Date:   Thu Jul 23 07:43:11 2015 -0600
+
+    st/mesa: remove unused 'samp' function parameters
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit d7cb3f76f5c175c95ffbbec40eea5976493f8681
+Author: Brian Paul <brianp@vmware.com>
+Date:   Thu Jul 23 07:41:09 2015 -0600
+
+    st/mesa: add comments on a few sampler view functions
+    
+    Trivial.
+
+commit 3afa40e43368b29ca99018999936336c3879fa4d
+Author: Brian Paul <brianp@vmware.com>
+Date:   Wed Jul 22 07:53:01 2015 -0600
+
+    mesa: do more thorough target checking in compressed_subtexture_target_check()
+    
+    When we're error-checking the target, we also need to check if the
+    corresponding extension is supported.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit 05a44ab32832efe61a252ef4ac2d128c1101c286
+Author: Brian Paul <brianp@vmware.com>
+Date:   Wed Jul 22 07:42:12 2015 -0600
+
+    mesa: another target fix in compressed_subtexture_target_check()
+    
+    The previous fix added GL_TEXTURE_CUBE_MAP_ARRAY but we also need
+    to support GL_TEXTURE_CUBE_MAP (via DSA).
+    
+    So in the end, GL_TEXTURE_3D is the only (legal) target for
+    glCompressedTex*SubImage3D() which needs additional compression
+    format checking.  GL_TEXTURE_2D_ARRAY, GL_TEXTURE_CUBE_MAP_ARRAY
+    and GL_TEXTURE_CUBE_MAP are basically 2D images which support all
+    compressed formats.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit 81e2c256e921ad4f5c13bb0d95bbe0ad232ec37c
+Author: Brian Paul <brianp@vmware.com>
+Date:   Wed Jul 22 07:32:36 2015 -0600
+
+    mesa: simplify format check in compressed_subtexture_target_check()
+    
+    Lose the invalidformat local variable.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit dbefffa5b4c438008d44db106b5774f575cb495f
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:42:41 2015 -0600
+
+    mesa: initialize variables to silence compiler warnings
+    
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit 319b83b3ee2629f443a8734256bbf33b3fb4a7a9
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Fri Jul 24 12:02:57 2015 +1000
+
+    apiexec: remove leading gl from shader subroutine interfaces
+    
+    Remove the gl at the start, stared at this for a while
+    yesterday, totally missed it.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91441
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 0a51acbb467bce5afddc7edf53db426ac697ccf1
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Jul 23 16:57:25 2015 -0400
+
+    docs: remove expanded ARB_dsa notes
+    
+    This doesn't provide much value since it's all done. The qbo interaction
+    is fairly trivial.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit 7e0036a49258326cc2d875f2960d18c6b3665036
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Jul 23 21:41:38 2015 -0400
+
+    nvc0/ir: tess factors are now sysvals, adapt codegen to expect that
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 7c4768540dacab8a4853f1310413cb976b5fb351
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jul 23 11:19:15 2015 +1000
+
+    docs/GL3.txt: ARB_shader_precision
+    
+    This extension is about setting expectation on GL4.1 implementations
+    rather than actually enforcing things. So once you support GLSL 410
+    then you support this in theory.
+    
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 80511d176a49e754a18ce585bab413db7af63bf7
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue Jul 21 14:22:11 2015 +1000
+
+    i965: add support for ARB_shader_subroutine
+    
+    This just adds some missing pieces to nir/i965,
+    it is lightly tested on my Haswell.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 17f71483698a4e134a0c85ef0aa3da80fdfdb180
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Jul 22 12:59:46 2015 -0400
+
+    mesa: rearrange texture error checking order
+    
+    This moves the width/height/depth == 0 check to the front and avoids
+    doing any other checking when that is the case.
+    
+    Also moves the dimensions check after the format/type checks so that we
+    don't bail out with success on a width/height/depth == 0 request when
+    the format/type don't match.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91425
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit c844afe94eaecc66e00cc4869f700ac1236bdc89
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Jul 22 12:39:47 2015 -0400
+
+    mesa: adjust error message when there's a missing teximage
+    
+    The current message makes it seem like the zoffset is invalid.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit a6f39ec1c568c38e7ef42d60eaf6c9ab8397af2a
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jul 23 21:51:48 2015 +0200
+
+    Revert "Match swrast modes more loosely."
+    
+    This reverts commit f3728a16c9c6a02fc1f44b8069b0060e2358f22e.
+    
+    It broke glxgears on radeonsi. The window was just black.
+
+commit d6b50ba980b733a82fefe2a0f115635a359c445f
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Thu Jul 23 16:54:02 2015 +0100
+
+    gallivm: Fix profile build.
+
+commit c6267ebd6c8a73d51a0c82d0f516177c70e05c81
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Wed Jul 22 13:21:24 2015 +0100
+
+    gallium/util: Stop bundling our snprintf implementation.
+    
+    Use MSVCRT functions instead.  Their semantics are slightly
+    different but they can be made to work as expected.
+    
+    Also, use the same code paths for both MSVCRT and MinGW.
+    
+    https://bugs.freedesktop.org/show_bug.cgi?id=91418
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit f3728a16c9c6a02fc1f44b8069b0060e2358f22e
+Author: Tom Hughes <tom@compton.nu>
+Date:   Tue Jun 2 13:40:37 2015 +0100
+
+    Match swrast modes more loosely.
+    
+    https://bugs.freedesktop.org/show_bug.cgi?id=90817
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit b469cf10efd4734038dcab294f23ca38e9fc7a97
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Thu Jul 23 16:25:21 2015 +0200
+
+    mesa: Fix error in target validation of glCompressedTex(ture)SubImage3D() calls
+    
+    Basically, two different target error checks are chained consecutively, and the
+    second one is executed regardless the result of the first one. This produces an
+    incorrect error if the first check fails but is overrided by the second.
+    
+    This patch conditions the execution of the second check to a successful pass of
+    the first one.
+    
+    Fixes 1 dEQP test:
+    * dEQP-GLES3.functional.negative_api.texture.compressedtexsubimage3d
+    
+    Reviewed-by: Laura Ekstrand <laura@jlekstrand.net>
+
+commit a3b53beaa0351cf1322c6e1a580dc7cc3d0cad0c
+Author: Tom Stellard <thomas.stellard@amd.com>
+Date:   Mon Jul 20 11:24:13 2015 -0400
+
+    gallivm: Add ifdefs so raw_debug_stream is only defined when used
+    
+    Its only use is to implement a custom version of LLVMDumpValue
+    on some Windows and embedded platforms.
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 9f7a68feafc86a51a7c5165672b29cb7182da738
+Author: Tom Stellard <thomas.stellard@amd.com>
+Date:   Mon Jul 20 06:49:05 2015 -0700
+
+    gallivm: Don't use raw_debug_ostream for dissasembling
+    
+    All LLVM API calls that require an ostream object have been removed from
+    the disassemble() function, so we don't need to use this class to wrap
+    _debug_printf() we can just call this function directly.
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 6d8e466792c284e79125bab33fcfb0872d0df2c3
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Jul 23 03:39:13 2015 -0400
+
+    docs: mark off tess for nvc0
+
+commit 88818c4cd6de9d8855a9ba3c3a85306d42f5e9d3
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Jul 22 20:34:30 2015 -0400
+
+    gk110/ir: fake BAR support
+    
+    Makes things sorta work until we figure out the real way to do this.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit fd092328e1e05fe4a3fc82a2e79bdba884bc798d
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun May 17 00:45:12 2015 -0400
+
+    nvc0/ir: cleanup private enums that have graduated to gallium
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit da89e75d9c6399c8fb0286460c91a77778c0eec9
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Apr 30 02:00:20 2015 -0400
+
+    nvc0/ir: allow tess eval output loads to be CSE'd
+    
+    These only happen for gl_TessCoord which are constant.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 77672cdb64e9c19e974fe5985050709fc317498e
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Jul 23 02:27:04 2015 -0400
+
+    nvc0/ir: add hazard for 2nd dim of vfetch/load indirect argument
+    
+    Apparently a multi-word load can potentially overwrite the indirect
+    sources, so make sure that RA picks different registers for those.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 7cf2bffe8254de6808202d866598ec4c9afe1a51
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Feb 21 03:12:54 2015 -0500
+
+    nvc0/ir: patch vertex count is stored in the upper bits
+
+commit e3e2df01bf855f3b435e03224a762649081c6558
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Jul 20 16:23:16 2014 -0400
+
+    nvc0/ir: add support for reading outputs in tess control shaders
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 71744c069264d32e6eb9d095350300b42633a1f8
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Jul 20 13:36:37 2014 -0400
+
+    nvc0/ir: set perPatch flag on load/stores to per-patch varyings
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit c2350fb3dbb1b8d348125e22758da266c15bc198
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Jul 20 13:12:38 2014 -0400
+
+    nvc0/ir: populate info structure based on new tess properties
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 59438a4d0e8ffd7cc4c741d00eff0c87d9813b5f
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Jul 20 12:17:46 2014 -0400
+
+    nvc0/ir: mark varyings as per-patch based on semantic name
+    
+    Also add proper handling for PATCH semantics
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 4b2a58a523715c28c96267286054baf511e15303
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Jul 12 22:08:44 2014 -0400
+
+    nvc0: TESSCOORD comes in as a sysval, not an input
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit c8e5337a9a240befcc953695c8822b0749c7a042
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Jul 20 15:50:43 2014 -0400
+
+    nvc0: add handling for set_tess_state callback
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit d1ffdebce6d03497fa6c2e4c8eb754e9075e29f4
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun May 17 00:40:20 2015 -0400
+
+    nvc0: add support for setting patch vertices at draw time
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit b9ea557fd04da9eb199388c14d64862d18118de3
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Jun 21 13:35:53 2015 -0400
+
+    nvc0: support MAX_SHADER_PATCH_VARYINGS
+
+commit f97c14f9e4ff5ae2b7313eb0098f99816fead71d
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Jul 12 15:40:14 2014 -0400
+
+    nvc0: preliminary tess support
+    
+    Uncomment the various functionality that was already there and add in
+    obvious missing bits that parallel vp/gp/fp functionality.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 65d84daf29adb0da779e9b49291cb4e26f021e1e
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Wed Jul 22 11:04:52 2015 +1000
+
+    docs/GL3.txt: update ARB_shader_subroutine status.
+    
+    Acked-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit c3fad009c54fb526d236fd10f4377ce7fbb54459
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Apr 20 10:30:53 2015 +1000
+
+    st/mesa: enable shader subroutine
+    
+    since this touches drivers, only enable it on gallium
+    for now for drivers reporting GLSL 1.30 or above.
+    
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit a922c279930ec1ab34506ca2e24d8a62a297ea33
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Apr 20 10:29:42 2015 +1000
+
+    st/mesa: add subroutine bits (v1.1)
+    
+    Just add support for the subroutine type to the
+    glsl->tgsi convertor.
+    
+    v1.1: add subroutine to int support.
+    
+    Acked-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 6f57fda494a6b4ccf30cab000ca28154fbabcb78
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Apr 20 10:29:12 2015 +1000
+
+    mesa: fill out the ARB_shader_subroutine APIs
+    
+    This fleshes out the APIs, using the program resource
+    APIs where they should match.
+    
+    It also sets the default values to valid subroutines.
+    
+    Acked-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 3f4f3e2d4877e1e2bda064cc323fb7b3667e12fe
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Apr 20 10:28:40 2015 +1000
+
+    program: add subroutine uniform support (v1.1)
+    
+    Add support for the subroutine uniform type ir->mesa.cpp
+    
+    v1.1: add subroutine to int to switch
+    
+    Acked-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 0a18f160159b93c57943e5cb4d9d9a78a5b72996
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Apr 20 10:27:58 2015 +1000
+
+    program_resource: add subroutine support (v3.1)
+    
+    This fleshes out the ARB_program_query support for the
+    APIs that ARB_shader_subroutine introduces, leaving
+    some TODOs for later addition.
+    
+    v2: reworked for lots of the ARB_program_interface_query
+    entry points and tests
+    v3: use common function to test for subroutine support
+    v3.1: add tess, fix missing breaks
+    
+    Acked-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 60266863d80bb2af94fa5c189ccd23ee20607ea9
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Apr 20 10:27:36 2015 +1000
+
+    glsl: add uniform and program resource support (v2)
+    
+    This adds linker support for subroutine uniforms, they
+    have some subtle differences from real uniforms, we also hide
+    them and they are given internal uniform names.
+    
+    This also adds the subroutine locations and subroutine uniforms
+    to the program resource tracking for later use.
+    
+    v1.1: drop is_subroutine_def
+    
+    v2: handle explicit location properly, ARB_explicit_location
+    has a lot of language for subroutine shaders.
+    Calculate a link time the number of compatible subroutines
+    for a uniform, to make program resource easier later.
+    
+    Acked-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 44ea8b9b8edc5f59da546683fe64129a1c1be449
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue Jul 21 14:59:01 2015 +1000
+
+    mesa/mtypes: add gl_subroutine_function and uniform storage to shader (v2)
+    
+    This adds the necessary storage for subroutine info to gl_shader.
+    
+    v2: add comments, rename one member
+    Acked-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 7dd429e8f74302d44af00d051e59911439152369
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Apr 23 13:34:14 2015 +1000
+
+    glsl/ir: add subroutine lowering pass (v2.3)
+    
+    This lowers the enhanced ir_call using the lookaside table
+    of subroutines into an if ladder. This initially was done
+    at the AST level but it caused some ordering issues so a separate
+    pass was required.
+    
+    v2: clone return value derefs.
+    v2.1: update for subroutine->int convert.
+    v2.2: add a clone for the array index
+    
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 65ac360823ee12ac2d1f3bb6758d352fcd0d9210
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Jun 1 10:55:47 2015 +1000
+
+    glsl: add ast/parser support for subroutine parsing storage (v3.2)
+    
+    This is the guts of the GLSL parser and AST support for
+    shader subroutines.
+    
+    The code creates a subroutine type in the parser, and
+    uses that there to validate the identifiers. The parser
+    also distinguishes between subroutine types/function prototypes
+    /uniforms and subroutine defintions for functions.
+    
+    Then in the AST conversion it recreates the types, and
+    stores the subroutine definition info or subroutine info
+    into the ir_function along with a side lookup table in
+    the parser state. It also converts subroutine calls into
+    the enhanced ir_call.
+    
+    v2: move to handling method calls in
+    function handling not in field selection.
+    v3: merge Chris's previous parser patches in here, to
+    make it clearer what's changed in one place.
+    v3.1: add more documentation, drop unused include
+    v3.2: drop is_subroutine_def
+    
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 884df9ef834d6b77226d0dfd778c5317365a2394
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Fri Apr 24 10:47:03 2015 +1000
+
+    glsl/ir: allow ir_call to handle subroutine calling
+    
+    This adds a ir_variable which contains the subroutine uniform
+    and an array rvalue for the deref of that uniform, these
+    are stored in the ir_call and lowered later.
+    
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 30681c3bb80ad78392f1740aa915efa072c837e8
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Apr 20 10:22:57 2015 +1000
+
+    glsl/ir: add subroutine information storage to ir_function (v1.1)
+    
+    We need to store two sets of info into the ir_function,
+    if this is a function definition with a subroutine list
+    (subroutine_def) or if it a subroutine prototype.
+    
+    v1.1: add some more documentation.
+    
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit d8a250ce5edc3da092ede6d62d433fbb37aa6cf6
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jul 23 10:23:36 2015 +1000
+
+    mesa: add function to check if shader subroutines are enabled.
+    
+    This checks if core profile and shader subroutine extension
+    is enabled.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 670b9e56da588581c90d6c68f0a55ecd9153504d
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Apr 20 10:20:06 2015 +1000
+
+    mesa: add inline conversion functions for ARB_shader_subroutine (v2)
+    
+    This handles converting the shader stages to the internal
+    prefix along with the program resource interfaces.
+    
+    v2: add tess support
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit f73ef824869dbb1f91c32ad563c95ca917f40c12
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue Jul 21 14:52:40 2015 +1000
+
+    glsl: don't eliminate subroutine types.
+    
+    This stops dead code from removing subroutines types,
+    we need these for the queries to work properly.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 57f24299b7fe0f7b20c2a3cf1e94c747825b568d
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Apr 20 10:16:55 2015 +1000
+
+    glsl/types: add new subroutine type (v3.2)
+    
+    This type will be used to store the name of subroutine types
+    
+    as in subroutine void myfunc(void);
+    will store myfunc into a subroutine type.
+    
+    This is required to the parser can identify a subroutine
+    type in a uniform decleration as a valid type, and also for
+    looking up the type later.
+    
+    Also add contains_subroutine method.
+    
+    v2: handle subroutine to int comparisons, needed
+    for lowering pass.
+    v3: do subroutine to int with it's own IR
+    operation to avoid hacking on asserts (Kayden)
+    v3.1: fix warnings in this patch, fix nir,
+    fix tgsi
+    v3.2: fixup tests
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    
+    tests: fix warnings
+
+commit d16ff8ac783874c8ee74ef796b1c853829ff237d
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Aug 10 21:38:23 2014 +1200
+
+    glsl: Make `subroutine` a reserved keyword
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 24b0e5068348aacabbd3e0012de95d34866e4b99
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jul 23 10:49:12 2015 +1000
+
+    dispatch_sanity: add shader subroutine to fix make check
+    
+    Add the shader subroutine to the core only API list,
+    and fixup dispatch_sanity to suit.
+    
+    Acked-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit cc172fddf3fc37991c6d85f2d8e4f6dc63a62809
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Aug 10 21:31:06 2014 +1200
+
+    glsl: Add extension plumbing and define for ARB_shader_subroutine
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 25d6f56c08801909e784f81e9b9ced48977630f4
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Aug 10 21:07:33 2014 +1200
+
+    mesa: Add glGet support for ARB_shader_subroutine implementation limits
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 4c7b007104c63475ec080d0777a41603c78786f6
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Aug 10 20:43:45 2014 +1200
+
+    mesa: Add extension tracking for arb_shader_subroutine (v2)
+    
+    v2: [airlied]: merge version check update.
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit b8f3e316bca2c9abd3c885a9447ecf29446d0ccb
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Aug 10 20:38:53 2014 +1200
+
+    glapi: Add ARB_shader_subroutine functions and enums (v2)
+    
+    v2: fix output="true" and LENGTH typo
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 2ffe9b542116580571b157de8a89476b22694ea9
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Aug 10 21:00:34 2014 +1200
+
+    mesa: Add stubs for ARB_shader_subroutine entrypoints
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 2ca1f767818b02354735b58cef896abb8677e4ed
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jul 23 10:39:47 2015 +1000
+
+    dispatch_sanity.cpp: remove commented out tess entries
+    
+    These entries were put in the GL4.0 section, so removed the commented
+    out ones.
+    
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 18955e8a80ee2b344eaf3eb1d24eed90f6ba8334
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jul 23 11:13:36 2015 +1000
+
+    glsl/tests: fix varying_test since tess changes.
+    
+    This fixes make check since the tess changes.
+    
+    Tested-by: Michel Dänzer <michel.daenzer@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit b406c34a65677cac2517336d93ab279c3d35fce6
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jul 23 10:39:26 2015 +1000
+
+    i965: fix warning since tess merge.
+    
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit bac12c8948681a23fd1a8f8a6bbb5523ccfe0939
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 18:46:53 2015 +0100
+
+    radeonsi: enable tessellation, update GL3.txt & release notes
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit a193c4978b0b536266afc7887457ab11473671d7
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon May 18 14:41:35 2015 +0200
+
+    radeonsi: add scratch buffer support for tessellation shaders
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 12df9a7876ed0e6cfffb7871dc37bf66c95edca3
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 18:16:02 2015 +0100
+
+    radeonsi: update invariant registers for tessellation
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 99bf47f603502cd6f3a6040ba17c0881e3b0c15f
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 18:10:38 2015 +0100
+
+    radeonsi: add assertions into draw_vbo and check tessellation
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 5aa5f9082347941fd8ac2fc3e94cd91aa1489982
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 18:09:18 2015 +0100
+
+    radeonsi: set the rasterization primitive type for tessellation
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 3344699243b856c3bc7b8ea08a949d2e3274e871
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 18:07:51 2015 +0100
+
+    radeonsi: set VGT_LS_HS_CONFIG for tessellation
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 09d02fa463b7207464c99ca887e253476fde851e
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 18:06:34 2015 +0100
+
+    radeonsi: update IA_MULTI_VGT_PARAM for tessellation
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 74c1001d13f07538e349c157598f9de83f252c49
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 18:01:18 2015 +0100
+
+    radeonsi: add derived tessellation state
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit db267a04ceee51ca1698c3a68127508fa1e31c86
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon May 18 01:59:37 2015 +0200
+
+    radeonsi: implement a fixed-function tessellation control shader and its state
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit b6f4fdf6a9238bdb9e0589eafb22396da347b792
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 17:25:37 2015 +0100
+
+    radeonsi: set up a ring buffer for tessellation factors
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit ebfd9e007191d582e22d252e9ff9b93fe4f8c593
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 17:07:34 2015 +0100
+
+    radeonsi: add tessellation shader states
+    
+    ls_rsrc# will be emitted as part of the derived tessellation state
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit aa2fa6723a0f8ab86ce2e55b1ac093f2cffd87c6
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 16:07:04 2015 +0100
+
+    radeonsi: update si_get_vs_info and si_get_vs_state for tessellation
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit fff16e4ad2cf51749e01e04805908effe49217d1
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 15:09:35 2015 +0100
+
+    radeonsi: add shader code generation for tessellation
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 4805685b6fe6efb7891dbc6dbab6ae4edce7e19e
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Oct 6 13:19:53 2014 +0200
+
+    radeonsi: implement TGSI_OPCODE_BARRIER
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 59b3556f4c69f0e6e5430ca6ab384d2ac9372bfc
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Sep 19 00:16:12 2014 +0200
+
+    radeonsi: program VGT_SHADER_STAGES_EN for tessellation
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit d9d0de4d289fa0b18bf23c85586e0111d64bf3b7
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Sep 18 23:39:44 2014 +0200
+
+    radeonsi: add translation of PATCH primitives
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 55b6f1caaeef4fa81fcc34d552aee4f0448417bb
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Sep 18 22:54:40 2014 +0200
+
+    radeonsi: add support for tessellation shader resources and samplers
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit d1f43a7e5b889b30106c4db55ec1caac1ed6ca4a
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Sep 18 22:50:52 2014 +0200
+
+    radeonsi: add code for creating, binding and destroying tessellation shaders
+    
+    This doesn't do anything yet.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit c2670463fd50f5b74066f0e0ab8f9a31dcb37429
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Sep 18 22:26:02 2014 +0200
+
+    radeonsi: add debug flags for dumping tessellation shaders
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 3ce91c727f2a00a05f414351266b0b45d677611e
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Sep 15 23:34:28 2014 +0200
+
+    radeonsi: rework how shader pointers to descriptors are set
+    
+    This is mainly needed for tessellation where a VS can be bound as VS, ES,
+    or LS, and TES (tess. evaluationshader) can be bound as VS or ES or neither.
+    Therefore we need the ability to move pointers to descriptors between
+    shaders arbitrarily.
+    
+    The idea is that the context has a mapping from PIPE_SHADER_x to
+    SPI_SHADER_USER_DATA_x. After a shader is enabled or disabled,
+    si_shader_change_notify should be called to update this mapping accordingly.
+    
+    There is a dirty flag for each shader pointer, but only one emit function
+    for all pointers in the whole context, whose code and logic is separated
+    from descriptors.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 57b6f8d9f9bfafd931974eae6942663e2ba6db02
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 14:54:54 2015 +0100
+
+    radeonsi: rename build_streamout_store -> build_tbuffer_store_dwords
+    
+    It will be reused later.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit f66844820e3ae2403d66d3275b1bf3e77087189c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 14:46:20 2015 +0100
+
+    radeonsi: separate primitive ID computation
+    
+    Support for new shader stages will be added here.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit aa1f2af572a0285e9f5779e17b2d753119e0ec85
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 14:33:49 2015 +0100
+
+    radeonsi: move declaring streamout parameters to its own function
+    
+    It will be reused later.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 2ecb06b946ff8bf4a96de79ab81926fa1bf5a93f
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon Oct 6 00:17:42 2014 +0200
+
+    radeonsi: make ES2GS offset sgpr location dynamic
+    
+    It will have a different location in the tessellation evaluation shader.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 1bc0fba572363f5460be7343cff8b8b7a315d755
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Mar 3 15:11:27 2015 +0100
+
+    gallium/radeon: expose emit_fetch
+    
+    Radeonsi will use this.
+
+commit a3be59b4a91e25d47535f192194ff669cfe2ef6e
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Oct 5 20:20:18 2014 +0200
+
+    gallium/radeon: expose LLVM functions implementing emit_store
+    
+    emit_store will be reimplemented for tessellation control shader outputs
+    where only radeon_llvm_saturate will be used, but radeonsi will want to
+    fall back to radeon_llvm_emit_store for other register types.
+    
+    This exposes both functions.
+
+commit 7626ad8d6daad147bf9a1a82fa4c3ac9e2d3347c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 19:50:42 2015 +0100
+
+    st/mesa: enable tessellation if the driver supports it
+
+commit bda9094f1d69817ed1a51677d38e157ec3b37826
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon May 18 12:49:10 2015 +0200
+
+    st/mesa: set default tessellation levels
+
+commit 82f7fad96691480b9ffdeb3e8e1b3345ede713ef
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Sep 7 18:36:06 2014 -0400
+
+    st/mesa: add barrier support
+
+commit 8f40428afbbfa9080964df3cd4f38f24122c4c5e
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Jul 21 20:45:29 2014 -0400
+
+    st/mesa: disable copy propagation for tessellation shaders
+    
+    This can't work due to shared inputs and outputs and barriers.
+
+commit f4c13fad6550f42524786c70b6f13fc510abaf0b
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Jul 21 18:49:40 2014 -0400
+
+    st/mesa: set vertices_per_patch when drawing
+
+commit 37d1809dd7cdfedbee4fcfef148fcdb1c7b43068
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Jul 13 15:02:53 2014 -0400
+
+    st/mesa: add 2d indexing support to outputs
+
+commit c9998617a8f40ad7e65aca9c581f5bcc7f1d0f4c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 20:47:31 2015 +0100
+
+    st/mesa: handle tessellation 2D varyings correctly
+
+commit 05c847433f5a3f3b2032bef32284ad7d6a2db850
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Jul 12 20:09:06 2014 -0400
+
+    st/mesa: lower gl_TessLevel from float[] to vecn
+
+commit d00e2763b153c212e8f01af610ae305606044bcc
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Jul 12 17:28:24 2014 -0400
+
+    st/mesa: query shader CAPs for tessellation
+    
+    The MaxTessPatchComponents query added by Marek.
+
+commit 40bc1c32d2fb42207ea860053045fa49e45d80b9
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Jul 12 17:15:51 2014 -0400
+
+    st/mesa: add texture updates for tessellation programs
+
+commit bda79139d4579b5105c45561401960a82bab2f7e
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Jul 12 17:10:59 2014 -0400
+
+    st/mesa: handle constbufs/ubos for tessellation shaders
+
+commit a58a66fe8577940cf07530b6235a386950ae04f7
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 20:44:39 2015 +0100
+
+    st/mesa: add conversion for tessellation shaders
+    
+    Based on code from Ilia Mirkin <imirkin@alum.mit.edu>.
+
+commit ba9fb96f86344f1631b82114bb0ce6f926d3853a
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Jul 11 22:11:21 2014 -0400
+
+    st/mesa: add tessellation shader states
+    
+    additional fixes by Marek
+
+commit df4ee8ef366c60ad41502d4e45e0347c1ef1e348
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Jun 16 13:07:04 2015 +0200
+
+    mesa: implement GL_IS_PER_PATCH
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 0af240e9401c12f4237f4a36a2474fe2cc590404
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jun 13 13:50:12 2015 +0200
+
+    glsl: use separate varying slots for patch varyings
+    
+    The idea is to allow 32 normal varyings and 32 patch varyings,
+    a total of 64. Previously, only a total of 32 was allowed.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit d07023894434325de850faabf005224f7b8ef4b8
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat May 16 21:12:54 2015 +0200
+
+    glsl: fix locations of 2-dimensional varyings without varying packing (v2)
+    
+    v2: renamed producer/consumer_type -> producer/consumer_stage
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 41acdae2e9eedb697a0f91815e201daf92d74ab4
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Oct 4 00:13:42 2014 +0200
+
+    glsl: don't demote tess control shader outputs
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 3a4b87f26d6c8c12eb119d72bf46461a7a384ab9
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Oct 4 00:15:33 2014 +0200
+
+    glsl: disable varying packing between tessellation shaders
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit d5787e7eef7c42e4a90cbd89dee81efbf1491487
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Jun 16 02:43:55 2015 +0200
+
+    glsl: allow indexing of gl_out with a non-const if length isn't known
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 19f46d0540d9557a4d458ceb72f27ece28fa935e
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Fri Sep 12 21:27:26 2014 +1200
+
+    glsl: allow redeclaration of TCS gl_out[]
+    
+    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 8cf72972ce2fc7df83d0572745968bbcb41a8c92
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Sep 7 21:42:50 2014 +1200
+
+    glsl: validate restrictions on use of barrier()
+    
+    With the exception of always-taken switch cases (which are
+    indistinguishable from straight line code in our IR), this
+    disallows use of the builtin barrier() function in all the
+    places it may not appear.
+    
+    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 799afadf51ad1ff0775a1bf7b4f3954a8d368b09
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jun 14 20:14:22 2015 +0200
+
+    glsl: allow barrier() in tessellation control shaders
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit df16e0dd63dfeb7d5086339113ff7d7197010847
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Tue Sep 9 19:25:02 2014 +1200
+
+    glsl: analyze TES usage of gl_ClipDistance
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 0e94f350eeecd84cd5f15b10837b285bc9120684
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Sep 7 18:19:15 2014 +1200
+
+    glsl: push vertex count determination down one level
+    
+    We have the prog here, so we don't need the caller to work this out for
+    us.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 567f1b2ee89bf05f0600e9e79847140555f0a035
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Jul 21 21:59:37 2014 -0400
+
+    glsl: pass shader stage to lower_output_reads and handle tess control
+    
+    Tessellation control outputs can be read in directly without first
+    having been written. Accessing these will require some special logic
+    anyways, so just let them through.
+    
+    V2: Never lower tess control output reads, whether patch or not -- both
+    can be read back by other threads.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 61846f222fffeba846f9f7277aba9cc7d48323ed
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Mon Sep 1 20:48:09 2014 +1200
+
+    glsl: properly size unsized arrays in tess stages
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit d563946a4064d50f6fa7ce5e9e8ccb1479d1205e
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Aug 31 19:35:46 2014 +1200
+
+    glsl: restrict indexing for writes to TCS outputs to gl_InvocationID
+    
+    Marek: handle ir_swizzle
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit da7adb99e85fc6efa7f0e570ab93bd7b625975ae
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Aug 17 22:37:16 2014 +1200
+
+    glsl: add builtin constants for ARB_tessellation_shader
+    
+    Limits from other extensions added by Marek.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit b7f98f9f094090c6e8a24407dab67e4873c68694
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Aug 24 16:46:40 2014 +1200
+
+    glsl: allow nonconst indexing of arrays where we can work out an implicit size
+    
+    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 64a0ae88b971e549852348b169de48d1d0b0869d
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Mon Aug 18 21:51:46 2014 +1200
+
+    glsl: relax unsized input/output block arrays for TCS/TES
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit 2abbe941e1bfaf494eb739b9fb81503736298f14
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Jun 16 01:32:28 2015 +0200
+
+    glsl: add the tessellation extension to the list for the "layout" qualifier
+    
+    This is technically not needed, but it makes the compiler return a better
+    error message if tessellation is used with GLSL < 1.50.
+    
+    Instead of:
+        error: syntax error, unexpected NEW_IDENTIFIER, expecting $end
+    It returns:
+        error: #version 150 layout qualifier `triangles' used
+    
+    And the tessellation spec says:
+        OpenGL 3.2 and GLSL 1.50 are required.
+    So it makes perfect sense.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit fb800b3dcd32ddb6f57143b46105d677eb01da80
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu May 28 23:24:08 2015 +0200
+
+    glsl: don't lower variable indexing on non-patch tessellation inputs/outputs
+    
+    There is no way to lower them, because the array sizes are unknown
+    at compile time.
+    
+    Based on a patch from: Fabian Bieler <fabianbieler@fastmail.fm>
+    
+    v2: add comments
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit 0cfac917554aeb46bd78ba5b5f5ee1c8ed1d68bc
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Thu Mar 20 22:34:42 2014 +0100
+
+    glsl: make stand-alone compiler work with tessellation shaders.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit c53aa26379ccee9d53fe1d1ea9bfa26d4d469618
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Thu Mar 20 22:37:37 2014 +0100
+
+    glsl: add "in" or "out" prefix to name when flattening interface blocks
+    
+    This is to prevent a name conflict in tessellation shaders built-in interface
+    blocks.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit 73a9a1539a85ae8fe22e11b4064105d588597736
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Mon Mar 10 17:55:36 2014 +0100
+
+    glsl: lower gl_TessLevel* from float[n] to vecn.
+    
+    Similar to gl_ClipDistance -> gl_ClipDistanceMESA
+    
+    v2: - renamed is_mesa_var to lowered_builtin_array_variable
+        - moved LowerTessLevel into gl_constants
+        - cosmetic changes in lower_tess_level.cpp
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 54f29502972cdd33302e69e029c8d07fb31b7bdf
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Thu Mar 20 22:33:05 2014 +0100
+
+    glsl: make lower_clip_distance work with tessellation shaders.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit 7c758c5a216b0a72a089c4fe9b4facde0e7b2726
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Sep 21 13:33:14 2014 +1200
+
+    glsl: allow linking of tessellation shaders.
+    
+    Marek: require a tess eval shader if a tess control shader is present
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit 1009b3311febe3909e82d4b5be38ceecad6afcc1
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Wed Mar 5 13:43:17 2014 +0100
+
+    glsl: add the patch in/out qualifier (v2)
+    
+    v2: Dropped some unrelated reordering in glsl_parser.yy as Ken suggested.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 1036b024d4c8ce2376ac41219dfda01d5a59b3ef
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Thu Mar 20 22:41:40 2014 +0100
+
+    glsl: add tessellation shader defines and built-in variables.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 497eb295838baccde1420adfcc4ef7e8fdddd774
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Thu Mar 20 22:44:43 2014 +0100
+
+    glsl: add tessellation shader parsing support (v2)
+    
+    v2: Fixed things that Ken suggested.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 206af9d049cab6e794db5abf63e3d11281343423
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu May 28 22:08:55 2015 +0200
+
+    mesa: don't allow drawing with tess ctrl shader and without tess eval shader
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 3d528e7c476f25f24bca35d09d1f4c2b00123234
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Apr 22 23:05:34 2015 +0200
+
+    mesa: handle tessellation shaders in use_shader_program
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 882413f1c5926503550a42554a83f57f85fec82d
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jun 14 01:21:02 2015 +0200
+
+    mesa: add program interface queries for tessellation shaders
+    
+    Based on a patch by Chris Forbes <chrisf@ijw.co.nz>.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 550a570c5325cc64a547fe4d6e1e75af2d0e9587
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Fri Mar 7 10:39:39 2014 +0100
+
+    mesa: add misc tessellation shader stuff
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 6823d713c68dfb5679a7c96d06f72c31f755d686
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Fri Mar 7 10:39:18 2014 +0100
+
+    mesa: add tessellation shader getters (v3)
+    
+    Tessellation dependencies added by Marek.
+    
+    v2: require tessellation in addition to atomics/images for some glGet queries
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit cb0c12512cf83ac412ecc78d4d4c5318c46c9b22
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jun 13 23:06:06 2015 +0200
+
+    mesa: allow setting of patch parameters.
+    
+    Based on a patch from Fabian Bieler <fabianbieler@fastmail.fm>.
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit e32e546c17932161e5417a952db3fb7a19cdc93c
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Sep 21 12:08:22 2014 +1200
+
+    mesa: require VS if TCS or TES is present in pipeline
+    
+    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit a30cc2882934ef25f41e1e41eb56d0b768f00b26
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Sep 21 11:16:06 2014 +1200
+
+    mesa: allow tess stages in glUseProgramStages
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 6435b2909e4f1b82268a1c5769c0c228cda768e0
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Fri Mar 7 10:33:54 2014 +0100
+
+    mesa: support tess stages in glGetProgramPipelineiv
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 5852b5d2fa02d7716c2fbf859d058a2881416e9c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jun 13 22:26:56 2015 +0200
+
+    mesa: take tessellation into account when validating GS input primitive mode
+    
+    I've reported the bug in the Khronos bugzilla.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 8e758c3a74a35f8ee6c5969d5bb5f788b4ef4337
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Sep 21 12:37:47 2014 +1200
+
+    mesa: allow drawing of patch primitives
+    
+    Cosmetic changes and fixes by Marek.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit fa602c208815c3e4d757072cadc00e617e30b933
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu May 28 19:11:07 2015 +0200
+
+    mesa: add _mesa_has_tessellation
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit a894ed82931840713aac25634ed469ac65889bfa
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Mon Mar 10 11:58:37 2014 +0100
+
+    mesa: add misc tessellation shader support
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 78d3054980edd1a12e56ad0362e889915cff335b
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Fri Mar 7 10:28:03 2014 +0100
+
+    mesa: add tessellation shader init functions.
+    
+    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit bb97cc66c149d0782ec269aab29700252fda9db0
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Sep 21 12:41:07 2014 +1200
+
+    mesa: add tessellation shader state and limits
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit a2af956963b6bc4d29f37485e44c98008d2ef077
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Fri Mar 7 10:19:09 2014 +0100
+
+    mesa: add tessellation shader enums
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit df3860a3e3269bfe77562058fd87b39ae2f57fcc
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Fri Mar 7 10:13:16 2014 +0100
+
+    mesa: add tessellation shader structs
+    
+    Marek: remove unused members, cleanup
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit e2b59a39cbb64f6759f463f7bad162f5f03807b4
+Author: Fabian Bieler <fabianbieler@fastmail.fm>
+Date:   Fri Mar 7 09:59:11 2014 +0100
+
+    mapi: add ARB_tessellation_shader
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 5ead448719f39d27bfbf4cabf138324dfee34a4f
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Jun 16 22:13:34 2015 +0200
+
+    drirc: drop support for Heaven 3.0, fixes tessellation in 4.0
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 6b37643b820b32c3e15e4a8661448a11af8321dd
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jul 16 15:27:34 2015 +0200
+
+    winsys/radeon: implement buffer_unmap
+    
+    This has been a no-op due to performance concerns. From now on, drivers
+    should decide when they don't want to unmap, not the winsys.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 5e3974338ed7ea49a41405f8c2e4bcd5fd1f5c80
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jul 16 14:54:50 2015 +0200
+
+    gallium/radeon: remove buffer_unmap calls that can potentially decrease perf
+    
+    buffer_unmap is currently a no-op on radeon and done correctly on amdgpu.
+    I plan to fix it for radeon, but before that, all occurences of buffer_unmap
+    that can negatively affect performance in the future must be removed.
+    
+    There are 2 reasons for removing buffer_unmap calls:
+    - There is a likelihood that buffer_map will be called again, so we don't
+      want to unmap yet.
+    - The buffer is being released, which automatically unmaps it.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 0aa2446e2c18e4a54ccf8555a8ff3426e4eb3ded
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jul 16 14:42:38 2015 +0200
+
+    radeonsi: remove switch statement in si_create_context
+    
+    and make si_init_config static
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 46b2b3bda8d962fce02838e09c742ac06fbec45f
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jul 16 13:17:14 2015 +0200
+
+    radeonsi: don't change pipe_resource in resource_copy_region
+    
+    Copied from r600g. pipe_resource can be shared by multiple threads, so we
+    shouldn't change it.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 50a957c5de842b18e10c361f7b0310aa46bb483f
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jul 10 23:35:55 2015 +0200
+
+    radeonsi: upload shader rodata after updating scratch relocations
+    
+    Cc: 10.5 10.6 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit e4d738f6c6b98a78830c10ab7b89704d847637a5
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jul 10 23:29:04 2015 +0200
+
+    radeonsi: remove redundant parameter in si_shader_binary_read
+    
+    Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 2369dc83826c7b1f413ff78f55e460c38d7a0660
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jul 5 16:57:58 2015 +0200
+
+    cso: eliminate some sampler function wrappers
+
+commit 68dcbf4c4679ad4e62d55e4f2632311aeef38eed
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jul 12 16:12:59 2015 +0200
+
+    gallium/tests: use cso_set_samplers
+
+commit 85f5722f70075e7a93b7a6cc41abee1bc493f4e2
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jul 12 15:52:44 2015 +0200
+
+    gallium/util: use cso_set_samplers
+
+commit 5ef1782b9ff8aa06f5b7fdbc7ade3e80131d1fda
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jul 5 16:54:44 2015 +0200
+
+    st/mesa: use cso_set_samplers
+
+commit 4ef7d93a941257b18506eae056631e8f4a11f893
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jul 5 16:34:59 2015 +0200
+
+    cso: remove clip state handling
+    
+    There is no need for this.
+    
+    v2: handle redundant clip state changes in st/mesa
+
+commit b7492a1f45866a01b00263f9e252ddc3835304e9
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jul 5 16:32:49 2015 +0200
+
+    cso: only allow saving and restoring fragment sampler states
+
+commit 4e8bbed926729fe280701412d85aff64ab79856c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jul 5 16:10:54 2015 +0200
+
+    cso: drop inefficient checking for redundant sampler state changes
+    
+    Drivers can do this better, because they can skip redundant state changes
+    at per-slot granularity.
+
+commit 3639d66a473591e21aa2ec7692c95c827b479632
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jul 5 15:53:10 2015 +0200
+
+    cso: only allow saving and restoring fragment sampler views
+    
+    Not needed for other shader stages.
+
+commit 2d8213bfa9023b47a5fd6599596e1b02fdcdd4f6
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 4 13:18:11 2015 +0200
+
+    gallium/util: improve dump functions
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 8141b4cee514bb673e394f6fbe2cbe02e5b0faf2
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jul 4 13:17:07 2015 +0200
+
+    tgsi: allow dumping to a file directly
+
+commit d082c5324914212f76e45be497229c7a0681f706
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jul 16 17:14:07 2015 +0200
+
+    st/mesa: don't call st_validate_state in BlitFramebuffer
+    
+    None of the draw states are used here.
+    This fixes a crash in piglit: ext_framebuffer_blit/blit-early
+    
+    Calling st_manager_validate_framebuffers is the minimum requirement here.
+    
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 2f50fc040c223339dc14f2975c45d35dd4513c13
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Apr 29 17:57:46 2015 +0200
+
+    docs/relnotes: document new EGL extensions and EGL 1.5
+
+commit 1828357629721e53a305a29047c0eb18be10915b
+Author: Anatoli Antonovitch <anatoli.antonovitch@amd.com>
+Date:   Wed Jun 10 14:47:03 2015 +0200
+
+    st/dri: enable 3D textures and sRGB colorspace for EGL
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Signed-off-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 4f57ccd02d4c5f214c7e59e7302c1dc650cff31d
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Jun 10 14:45:58 2015 +0200
+
+    egl,dri_interface: use DRI2rendererQueryExtension to enable 3D textures & sRGB
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit c2c2e9ab604793c6e01f85497f3f5bf645f962fa
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Jun 10 02:49:29 2015 +0200
+
+    egl: implement EGL_KHR_gl_colorspace (v2)
+    
+    v2: add missing "break"
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 956ebf41aca6b74052cf6876cc479b404777700c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Jun 10 01:49:36 2015 +0200
+
+    st/dri: expose sRGB visuals (v2)
+    
+    v2: The fix for the darkness in Ubuntu Unity is in the hunk
+        with the 4-line comment.
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit b06a6852ff782bb20d9e91a3a67eccb92e856ed3
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Thu Jul 2 10:25:41 2015 -0700
+
+    glapi: fix argument parsing in glX_proto_recv.py
+    
+    One of the plugins I use with vim "helpfully" added an underscore to the
+    front of mode for kicks.
+    
+    Obviously this isn't a feature used very often because it's been broken
+    since d986cb7c70db (since May 20th), and no one has noticed.
+    
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+
+commit 461b4b103f545027beb59c1d747c85892c6c1f63
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 21 15:34:19 2015 +0100
+
+    egl: android: remove DRM_GRALLOC_TOP hack
+    
+    Now that the drm_gralloc module exports the correct includes we can get
+    rid of this hack.
+    
+    Cc: Chih-Wei Huang <cwhuang@android-x86.org>
+    Cc: Eric Anholt <eric@anholt.net>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Tested-by: Varad Gautam <varadgautam@gmail.com>
+
+commit e2ef659c2ed36cca5a5c4a09440edb227eedcf60
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 14 00:28:46 2015 +0100
+
+    egl: remove old makefile.sources
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit 1040a861a80486502c7ac86a258741d5cdf6459a
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 14 00:28:10 2015 +0100
+
+    android: rework the EGL build
+    
+    See previous two commits for details.
+    
+    v2: Don't forget git mv, bring back DRM_GRALLOC_TOP. Spotted by Varad.
+    
+    Cc: Chih-Wei Huang <cwhuang@android-x86.org>
+    Cc: Eric Anholt <eric@anholt.net>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+    Tested-by: Varad Gautam <varadgautam@gmail.com>
+
+commit e7e29189e27bb404bf84d757a8f1dd617126808a
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 14 02:04:30 2015 +0100
+
+    scons: rework the EGL build
+    
+    The scons equivalent of the previous commit - just fold the almost
+    identical driver + main Sconscripts.
+    
+    Cc: Alexander von Gluck IV <kallisti5@unixzen.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit e3420396124c75ec9679c4d1cf3a42c185207e5a
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 14 01:58:33 2015 +0100
+
+    automake: rework the EGL build
+    
+    Simplify things by merging the two makefiles. This way we can combine
+    the duplicated HAVE_PLATFORM_ checks, and build the library without
+    having a separate static library.
+    
+    v2: use $() when referencing variables, use correct define (Matt)
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 0399d7ab3f69624b7f0b7b39e948432959fe270e
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jun 30 22:43:50 2015 +0100
+
+    gbm: do not build intermittent libgbm_dri static library
+    
+    The only user of it (libgbm.la) immediately links it. Just build it
+    directly into the library.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 8e5e18ac286f0782380c72cebffd4c54c98e4ccb
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jul 13 22:35:25 2015 +0100
+
+    egl: automake: remove unused HAVE_XCB_DRI2 define
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit a1202807dccc6e2ac02ff52639a49f6c2d06648d
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 14 01:58:12 2015 +0100
+
+    egl: remove unused _EGL_DRIVER_SEARCH_DIR define
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 0b915856bac4871b90c101b44a2830b9a0e22e05
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jul 13 22:16:11 2015 +0100
+
+    egl/haiku: remove unused DEFAULT_DRIVER_DIR define
+    
+    Cc: Alexander von Gluck IV <kallisti5@unixzen.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit c17e01748e9efc2d638b7b5bc9d4344521334f48
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 14 01:57:23 2015 +0100
+
+    egl: remove final Windows specific workaround
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 3593f37fd7b599e217bd1f894ac671a14a058b8d
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 14 00:19:54 2015 +0100
+
+    egl: remove custom string functions
+    
+    Support for Windows has been removed for a while now, and virtually
+    every POSIX compliant system provides strcasecmp, strdup and snprintf.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit d62879565a5c8479c3cfea513aa4e90f0d90b304
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jul 13 23:36:19 2015 +0100
+
+    egl: remove _EGL_PLATFORM_WINDOWS enum
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 32debea337755cf15550844955b14c29bb3006fa
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jul 13 22:05:46 2015 +0100
+
+    egl: remove final references of platform_null
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit dc1ece3748f3a97d685c9c72ad26684fd35f1944
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jul 13 22:20:38 2015 +0100
+
+    egl: remove flatten HAVE_SHARED_GLAPI
+    
+    It is simply not possible to use the dri backend without shared glapi,
+    as the alternative provider (libGL) is not always present. We have fixed
+    the build for a while now, so we can rip this out.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 78674631a2d0ff1eb538470e2a1d516201361f03
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 14 01:52:51 2015 +0100
+
+    egl: remove the non-haiku scons build
+    
+    It has been broken since 2011 with commit c98ea26e16b(egl: Make
+    egl_dri2 and egl_glx built-in drivers.). When the backends got merged
+    into the main library each entry point was guarded by a
+    _EGL_BUILT_IN_DRIVER_* define.
+    
+    As the define was missing, the linker kindly removed the whole of the
+    dri2 backend, thus we did not notice any errors due to the unresolved
+    link to xcb and friends.
+    
+    Cc: Chia-I Wu <olv@lunarg.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit ce2a4bd541241dade00a36e9f2d8e5ca16c6ff03
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jul 13 21:12:01 2015 +0100
+
+    dri/common: remove unused drm_version variable
+    
+    As of last commit the only user of it (radeon/r200) no longer uses it.
+    As such let's remove it and cleanup the nasty hacks that we had in place
+    to support this.
+    
+    v2: Leave LIBDRM_CFLAGS around.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com> (v1)
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com> (v1)
+
+commit 5284e9e2c4922479b28db96ae88121a053a6e66b
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jul 9 18:06:14 2015 +0100
+
+    radeon,r200: allow hyperz for radeon DRM module v2
+    
+    The original code only half considered hyperz as an option. As per
+    previous commit "major != 2 cannot occur" we can simply things, and
+    allow users to set the option if they choose to do so.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 48926da0f7a1d1656bfbaf9d5344cc1fa0b6e089
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jul 9 17:34:30 2015 +0100
+
+    radeon,r200: remove support for UMS radeon DRM module
+    
+    As mentioned by Michel Dänzer
+     "FWIW though, any code which is specific to radeon DRM major version 1
+      can be removed, because that's the UMS major version."
+    
+    and Marek Olšák
+     "major != 2" can't occur. You don't have to check the major version at
+      all and you can just assume it's always 2."
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit bf6247f608969c3f1fa987e297c6063c02896b5a
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jul 9 17:29:57 2015 +0100
+
+    radeon,r200: remove unused variable texmicrotile
+    
+    Dead since at least 2009 with commit ccf7814a315(radeon: major cleanups
+    removing old dead codepaths.)
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit 66d77cd71c6359cddfd21c128afe95bad860e231
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jul 8 01:44:31 2015 +0100
+
+    scons: don't build the kms-dri winsys
+    
+    Same as previous commit - unused (gbm is not a thing outside the
+    autotools build).
+    
+    v2: Remove trailing HAVE_LIBDRM.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit a29a8b92ff05f3a63dd3b6ae8c7d0e07f039c0ad
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 7 21:02:40 2015 +0100
+
+    android: don't build the kms-dri winsys
+    
+    GBM (the only user of kms-dri) is currently not available under Android.
+    Considering we have no way of testing/using this let's not bother
+    building it for now.
+    
+    Cc: Chih-Wei Huang <cwhuang@linux.org.tw>
+    Cc: Eric Anholt <eric@anholt.net>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit fe1503fe38602c91e030ca206cb392a26a343f91
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 7 16:04:27 2015 +0100
+
+    android: dri: correctly set HAVE_LIBDRM
+    
+    Set the macro if we're not building swrast alone.
+    
+    Cc: Eric Anholt <eric@anholt.net>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chih-Wei Huang <cwhuang@linux.org.tw>
+
+commit 787995bffb52d955f3046618286d831b76b72119
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 7 15:59:32 2015 +0100
+
+    swrast: remove unneeded __NOT_HAVE_DRM_H define
+    
+    No longer applicable since the cleanup of dri_interface.h.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 9ab5b644ef64ee23f88a6687cc541ee4e745234a
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 7 15:57:41 2015 +0100
+
+    dri/common: use HAVE_LIBDRM over __NOT_HAVE_DRM_H
+    
+    See previous commit message for details.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 1c328b8aa79b0644160082b7e9e02df18ab3ca48
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 7 15:42:15 2015 +0100
+
+    loader: use HAVE_LIBDRM instead of ! __NOT_HAVE_DRM_H
+    
+    Double negatives in English language are normally avoided, plus the
+    former seems cleaner and more consistent.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 72c784347bf66b61385cb57bb666033e5234ba69
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jul 22 16:34:15 2015 +0100
+
+    st/dri: unwrap/remove __NOT_HAVE_DRM_H magic
+    
+    With the dri_interface.h clean of the macro, we can remove the final
+    only st/dri specific use of the very same.
+    
+    Seemingly it was incorrectly used, as the build-time presence of dri2 is
+    not libdrm specific. At run-time, the code is already limited to dri2
+    use-cases plus returning true, when the extension is not present (or too
+    old) will likely lead to a crash as one tries to use it shortly after
+    the dri_with_format() call.
+    
+    As a side effect this gives us a nice cleanup the builds.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 0efd773f719dd2deddb4b6703edf022b294cd349
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 7 15:13:46 2015 +0100
+
+    dri_interface: drop __NOT_HAVE_DRM_H magic
+    
+    v2: use HAVE_LIBDRM macro.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 816e4c1b5e2887c45ffa69d41c8106e7b31977fb
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jul 22 16:22:44 2015 +0100
+
+    dri/swrast: automake: add LIBDRM_CFLAGS
+    
+    With the follow up commit we'll remove the __NOT_HAVE_DRM_H macro. As
+    requested by Ian HAVE_LIBDRM will be used instead, which will lead to
+    swrast including drm.h when libdrm package is available, even though we
+    don't need/make use of the header.
+    
+    As the define is added after the AM_CFLAGS we cannnot use -UHAVE_LIBDRM,
+    but instead let's just add LIBDRM_CFLAGS. The latter of which will
+    expand to NULL when the libdrm package is not around.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 16f6d432de07dcb537dafd0c9f3ef7614891ed6b
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jul 9 21:19:15 2015 +0100
+
+    configure.ac: do not set HAVE_DRI(23) when libdrm is missing
+    
+    These conditionals are used to guard both dri modules and loader(s).
+    
+    Currently if we try to build the gallium swrast dri module (without glx)
+    on a system that's missing libdrm the build will fail.
+    
+    v2: Make sure we assign prior to checking the have_libdrm variable.
+    
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit fe4290200942b1103cdc1a238876143b61b731f0
+Author: Brian Paul <brianp@vmware.com>
+Date:   Wed Jul 22 08:04:49 2015 -0600
+
+    mesa: fix typo s/glGetTextImage/glGetTexImage/
+    
+    Trivial.
+
+commit 800efb0690e962750b9a072bcbab279fdaae24a1
+Author: Michel Dänzer <michel.daenzer@amd.com>
+Date:   Wed Jul 22 16:11:39 2015 +0900
+
+    radeonsi: Flush when we're asked to return a fence but don't have one yet
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit fcc1949cc4d97d8ed714020d5b86b31b70eca774
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Sat Jul 4 08:34:32 2015 +1000
+
+    mesa: fix misleading comment
+    
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit 13322a6590b9e64a9a9f8dd304898e9ab6bedd49
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Wed Jun 17 23:03:52 2015 +1000
+
+    mesa: fix active sampler conflict validation
+    
+    The type stored in gl_uniform_storage is the type of a single array
+    element not the array type so size was always 1.
+    
+    V2: Dont validate sampler units pointing to 0
+    
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit 09c440c718992d48ef118c1aad6929ad215ccd3b
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Fri Jul 3 08:45:30 2015 +1000
+
+    glsl: check for leading zeros in array index validation
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit 7fccebf9803973c7403318da20afe23e80b5b59f
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:44:07 2015 -0600
+
+    swrast: remove unneeded & operators in _swrast_choose_texture_sample_func()
+    
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 300926def04afc76d67ce964f10247b2e787c5dc
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:44:07 2015 -0600
+
+    mesa: move check for no-op glShadeModel call earlier
+    
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit d323f26830c1ce7e157cfeeb4f1e38b1a4d19d31
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:44:07 2015 -0600
+
+    mesa: move check for no-op glAlphaFunc call earlier
+    
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 61ed88b1ddf8aea6f74518bcae5c13d9bf4ae822
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:44:07 2015 -0600
+
+    mesa: move check for no-op glFrontFace call earlier
+    
+    If the new mode matches the current mode, there can be no error.
+    
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 24799c422365f609403ccde0759c77b0179328d3
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    mesa: s/GLint/GLsizei/ for consistency
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit b94367ba8d14ea999d2b81cb24db21fa72d4bab8
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    docs: document that GL_ARB_get_texture_sub_image is completed
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 89212f9d06cbd5dd7cebefe7b4e535692525e3e9
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    mesa: enable GL_ARB_get_texture_sub_image for all drivers
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 48f9f0bfdd6bfb0ab4844cf005c1534c86cd3836
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    mesa: add API dispatch for GL_ARB_get_texture_sub_image
+    
+    This adds the new glGetTextureSubImage() and
+    glGetCompressedTextureSubImage() functions.  Also update the
+    dispatch sanity test program.
+    
+    v2: remove stray brace, move xi:include line in gl_API.xml, fix extension
+    number typo, s/program/texture/ in xml file.
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 2494f91fb82eb545fd59ed4c9850ff378fc0c591
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    mesa: add new _mesa_Get[Compressed]TextureSubImage() functions
+    
+    Simple implementations in terms of get_[compressed_]texture_image().
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit a92f0277d854d6ac5dd524134f113632c990b1b0
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    mesa: overhaul the glGetCompressedTexImage code
+    
+    Same idea as the previous patch.
+    v2: a few clean-ups spotted by Ilia
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit f20cfc5a409b69d5ae3d10a870d90e0b4e493ddf
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    mesa: overhaul the glGetTexImage code
+    
+    1. Reorganize the error checking code.
+    2. Lay groundwork for getting sub images by passing image offset and
+       dimensions to the error checking code.
+    3. Implement _mesa_GetnTexImageARB(), _mesa_GetTexImage() and
+       _mesa_GetTextureImage() all in terms of get_texture_image().
+    
+    v2: pass offset/width/height/depth arguments to the error checking
+    function, avoid using magic width/height/depth values.
+    v3: remove unused bufSize param to get_texture_image()
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 613f1e00b8f8493a0cac7dbeec6647ce3a5a0355
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    mesa: 80-column wrapping in texgetimage.c
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 2a95fd153158e20e6b44548d4f247a5763713fb3
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    mesa: replace Driver.GetCompressedTexImage() w/ GetCompressedTexSubImage()
+    
+    For now, pass offsets of zero and width/height/depth equal to the
+    whole image.
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 5bfc360e40680b3fe2b6f74ac487fa76502559e3
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    mesa: make _mesa_get_[compressed_]texture_image() static
+    
+    These functions are only called from teximage.c
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 1ad305b612f389fb04c6d51847427d5ec72fae03
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    mesa: plumb offset/size parameters through GetTexSubImage code
+    
+    Needed for GL_ARB_get_texture_sub_image.  But at this point, the
+    offsets are always zero and the sizes match the whole texture image.
+    
+    v2: Fixes, suggestions from Laura Ekstrand:
+    * Fix calls to ctx->Driver.UnmapTextureImage() to pass the correct
+      slice value.
+    * Added comments and assertions to check zoffset+depth<=tex->Depth before
+      the 'img' loops.
+    * Added a new zoffset==0 assert in get_tex_memcpy().
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit e693fc299f1f78502b9201f1e1e8f333566c9fb6
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    mesa: replace Driver.GetTexImage with GetTexSubImage()
+    
+    The new driver hook has x/y/zoffset and width/height/depth parameters
+    for the new glGetTextureSubImage() function.
+    
+    The meta code and gallium state tracker are updated to handle the
+    new parameters.
+    
+    Callers to Driver.GetTexSubImage() pass in offsets=0 and sizes equal
+    to the whole texture size.
+    
+    v2: update i965 driver code, s/GLint/GLsizei/ in GetTexSubImage hook
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 096371879098c315bc054b6fe1ef6f4b8f18554f
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    meta: add offset, width, height parameters to decompress_texture_image()
+    
+    In preparation for decompressing texture sub images.
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 2a2c9469425bc794c98dcf57237457ba41d10ce2
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    meta: handle subimages in _mesa_meta_setup_texture_coords()
+    
+    v2: fix depth, total_depth mix-up in meta.h, per Laura Ekstrand.
+    
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit d7bd9fa1a363c288324d73fbde86f2257dfc0a15
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    mesa: assorted whitespace, formatting fixes in teximage.c
+    
+    Trivial.
+
+commit 98a6c5ea1129f18ed5f097fad5bbebc86eb0e862
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 21 18:35:38 2015 -0600
+
+    mesa: allow GL_TEXTURE_CUBE_MAP_ARRAY case for glCompressedTexSubImage3D()
+    
+    Since s3tc works for cube maps and 2D arrays, it should also work for
+    cube arrays.  NVIDIA's driver supports this too.  Seems like the spec
+    should say this.
+    
+    This is a minor follow-on fix for the commit "mesa: fix up some texture
+    error checks".
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit 4c7196b684fe384599c1a02bf20aec7b6447968d
+Author: Jonathan Gray <jsg@jsg.id.au>
+Date:   Thu Jul 16 01:17:37 2015 +1000
+
+    mesa: include stdarg.h for va_list
+    
+    Include stdarg.h for va_list.  Unbreaks the build on OpenBSD:
+    
+    In file included from mesa/program/dummy_errors.c:24:
+    ../src/mesa/main/errors.h:85: error: expected declaration specifiers or '...' before 'va_list'
+    
+    Signed-off-by: Jonathan Gray <jsg@jsg.id.au>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit a2a1a5805fd617e7f3cc8be44dd79b50da07ebb9
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Jul 20 19:58:43 2015 -0400
+
+    gallium: replace INLINE with inline
+    
+    Generated by running:
+    git grep -l INLINE src/gallium/ | xargs sed -i 's/\bINLINE\b/inline/g'
+    git grep -l INLINE src/mesa/state_tracker/ | xargs sed -i 's/\bINLINE\b/inline/g'
+    git checkout src/gallium/state_trackers/clover/Doxyfile
+    
+    and manual edits to
+    src/gallium/include/pipe/p_compiler.h
+    src/gallium/README.portability
+    
+    to remove mentions of the inline define.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Acked-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 958b5c31116f46a81249d11033164354ec158556
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Tue Jul 21 21:58:08 2015 +0200
+
+    nvc0: force cache flush when binding a new ubo
+    
+    This fixes the following piglit test:
+      ext_transform_feedback-immediate-reuse-uniform-buffer
+    
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit a62ccdec622ea43a7cdbf572a32dfae19ba9c904
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Tue Jul 21 21:35:43 2015 +0200
+
+    nv50: force cache flush when binding a new ubo
+    
+    This fixes the following piglit test:
+      ext_transform_feedback-immediate-reuse-uniform-buffer
+    
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 912921059d137085faef676504bea265328bdde4
+Author: Vinson Lee <vlee@freedesktop.org>
+Date:   Mon Jul 20 21:52:40 2015 -0700
+
+    st/mesa: Silence GCC unused-variable warning.
+    
+    Silence a release build warning.
+    
+    st_glsl_to_tgsi.cpp: In function 'pipe_error st_translate_program(gl_context*, uint, ureg_program*, glsl_to_tgsi_visitor*, const gl_program*, GLuint, const GLuint*, const GLuint*, const ubyte*, const ubyte*, const GLuint*, const GLuint*, GLuint, const GLuint*, const GLuint*, const ubyte*, const ubyte*, boolean, boolean)':
+    st_glsl_to_tgsi.cpp:5461:36: warning: unused variable 'pscreen' [-Wunused-variable]
+                    struct pipe_screen *pscreen = st->pipe->screen;
+                                        ^
+    
+    Signed-off-by: Vinson Lee <vlee@freedesktop.org>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 5b4a7ec8f1d2eee12895541bb5c7d15382370884
+Author: Adam Jackson <ajax@redhat.com>
+Date:   Tue Jul 21 12:08:20 2015 -0400
+
+    r600/sb: Fix an &/&& mistake
+    
+    gcc says:
+    
+        sb/sb_sched.cpp: In member function 'bool r600_sb::alu_group_tracker::try_reserve(r600_sb::alu_node*)':
+        sb/sb_sched.cpp:492:7: warning: suggest parentheses around operand of '!' or change '&' to '&&' or '!' to '~' [-Wparentheses]
+          if (!trans & fbs)
+    
+    It happens to be harmless; if fbs is ever non-zero, it will be VEC_210,
+    which is 5, so (!trans & 5) == 1 and the branch works as expected.  But
+    logical AND is clearly what was meant.
+    
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+    Signed-off-by: Adam Jackson <ajax@redhat.com>
+
+commit 545dec5b3efeab7691ab3eb1436747048f241cf9
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Fri Jul 17 14:14:29 2015 -0700
+
+    Revert "i965/gen9: Plugin the code for selecting YF/YS tiling on skl+"
+    
+    Commit c9dbdc0 introduced some dead code which is supposed to be used
+    once we have Yf/Ys tiling working and performing better. Ken reported
+    the issue that static analysis tool now shows warnings due to the dead
+    code. To fix these warnings, this patch reverts the changes made in
+    commit c9dbdc0.
+    
+    It'll be better to add the Yf/Ys tiling selection code later, when we
+    are ready to use it.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Acked-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit fadf34773527779eef4622b2586d87ec00476c0f
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 15:52:28 2015 +0300
+
+    i965: Fix stride field for the result of emit_uniformize().
+    
+    This is essentially the same problem fixed in an earlier patch for
+    immediates.  Setting the stride to zero will be particularly useful
+    for my future SIMD lowering pass, because we will be able to just
+    check whether the stride of a source register is zero and skip
+    emitting the copies required to unzip it in that case.
+    
+    Instead of setting stride to zero in every caller of emit_uniformize()
+    I've changed the function to return the result as its return value
+    (previously it was being written into a caller-provided destination
+    register), because this way we can enforce that the result is used with
+    the correct regioning from the function itself.
+    
+    The changes to the prototype of its VEC4 counterpart are mainly for
+    the sake of symmetry, VEC4 registers don't have stride.
+    
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit 9383664a9cbc5bc4858fc50d7fa565f43028d779
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 15:29:39 2015 +0300
+
+    i965/fs: Fix stride field for uniforms.
+    
+    This fixes essentially the same problem as for immediates.  Registers
+    of the UNIFORM file are typically accessed according to the formula:
+    
+     read_uniform(r, channel_index, array_index) =
+        read_element(r, channel_index * 0 + array_index * 1)
+    
+    Which matches the general direct addressing formula for stride=0:
+    
+     read_direct(r, channel_index, array_index) =
+        read_element(r, channel_index * stride +
+                        array_index * max{1, stride * width})
+    
+    In either case if reladdr is present the access will be according to
+    the composition of two register regions, the first one determining the
+    per-channel array_index used for the second, like:
+    
+     read_indirect(r, channel_index, array_index) =
+        read_direct(r, channel_index,
+                    read(r.reladdr, channel_index, array_index))
+    
+    where:
+     read(r, channel_index, array_index) = if r.reladdr == NULL
+        then read_direct(r, channel_index, array_index)
+        else read_indirect(r, channel_index, array_index)
+    
+    In conclusion we can handle uniforms consistently with the other
+    register files if we set stride to zero.  After lowering to a GRF
+    using VARYING_PULL_CONSTANT_LOAD in demote_pull_constant_loads() the
+    stride of the source is set to one again because the result of
+    VARYING_PULL_CONSTANT_LOAD is generally non-uniform.
+    
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit 5f8d9ae5a54961deb02eb52e924a84b99b60f035
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 13 14:50:24 2015 +0300
+
+    i965/fs: Fix stride for immediate registers.
+    
+    When the width field was removed from fs_reg the BROADCAST handling
+    code in opt_algebraic() started to miss a number of trivial
+    optimization cases resulting in the ugly indirect-addressing sequence
+    to be emitted unnecessarily for some variable-indexed texturing and
+    UBO loads regardless of one of the sources of BROADCAST being
+    immediate.  Apparently the reason was that we were setting the stride
+    field to one for immediates even though they are typically uniform.
+    Width used to be set to one too which is why this optimization used to
+    work previously until the "reg.width == 1" check was removed.
+    
+    The stride field of vector immediates is intentionally left equal to
+    one, because they are strictly speaking not uniform.  The assertion in
+    fs_generator makes sure that immediates have the expected stride as
+    consistency check.
+    
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit b298311d517017834841e53b7e641738e6067cdc
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Mon Jul 20 12:58:12 2015 +0200
+
+    i965/vec4: Fix liveness analysis with BRW_OPCODE_SEL
+    
+    We only consider a vgrf defined by a given block if the block writes to it
+    unconditionally. So far we have been checking this by testing that the
+    instruction is not predicated, however, in the case of BRW_OPCODE_SEL,
+    the predication is used to select the value to write, not to decide if
+    the write is actually done. The consequence of this was increased life
+    spans for affected vgrfs, which could lead to additional register pressure.
+    
+    Since NIR generates selects for conditional writes this was causing massive
+    register pressure in a handful of piglit and dEQP tests that had a large
+    number of select operations with the NIR-vec4 backend.
+    
+    Fixes the following piglit tests with the NIR-vec4 backend:
+    spec/glsl-1.50/execution/variable-indexing/vs-output-array-vec4-index-wr-before-gs
+    spec/glsl-1.50/execution/variable-indexing/gs-input-array-vec4-index-rd
+    spec/glsl-1.50/execution/variable-indexing/vs-output-array-vec2-index-wr-before-gs
+    spec/glsl-1.50/execution/variable-indexing/vs-output-array-vec3-index-wr-before-gs
+    spec/glsl-1.50/execution/variable-indexing/vs-output-array-float-index-wr-before-gs
+    
+    Fixes 80 dEQP tests with the NIR-vec4 backend in the following category:
+    dEQP-GLES3.functional.ubo.*
+    
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 2f11e92cef51c88a09bc778e2ceca4ab50cf0017
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sat Jul 18 01:22:00 2015 -0700
+
+    mesa: Rename _mesa_lookup_enum_by_nr() to _mesa_enum_to_string().
+    
+    Generated by sed; no manual changes.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit cd0dec0d9dfab642c51774c3f5788cbdf00b8c9b
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Mon Jul 20 21:32:43 2015 +0200
+
+    nouveau: use bool instead of boolean
+    
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Acked-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 4be30fcd058209966fc72fbfa51bbe881c307ed5
+Author: Tom Stellard <thomas.stellard@amd.com>
+Date:   Mon Jul 20 15:12:56 2015 +0000
+
+    gallivm: Initialize LLVM Modules's DataLayout to an empty string.
+    
+    This fixes crashes in llvmpipe with LLVM 3.8 and also some piglit tests
+    on radeonsi that use the draw module.
+    
+    This is just a temporary solution.  The correct solution will require
+    creating a TargetMachine during gallivm initialization and pulling the
+    DataLayout from there.  This will be a somewhat invasive change, and it
+    will need to be validatated on multiple LLVM versions.
+    
+    https://llvm.org/bugs/show_bug.cgi?id=24172
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit 5b7dd4d41900e3c795af134e0fad59cac9e0e7b4
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Thu Jul 16 23:05:05 2015 +0200
+
+    nvc0: add a missing parameter to nvc0_set_shader_images()
+    
+    This fixes a compilation warning introduced in commit 05a12c5
+    (gallium: add interface for writable shader images).
+    
+    While we are at it, fix indentation and rename parameters according to
+    the gallium interface.
+    
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit c2cb771354d2d738e0ab3ca7c8008748c5f57953
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Mon Jul 20 18:47:17 2015 +0200
+
+    nouveau: always align buffers to 0x100
+    
+    Only constbufs must be aligned to 0x100, but since all buffers can be
+    rebinded as constant buffers they must be also aligned.
+    
+    This patch prevents this behaviour by aligning everything to 256-byte
+    increments at buffer creation.
+    
+    This fixes dmesg fails for the following piglit test:
+      ext_transform_feedback-immediate-reuse-uniform-buffer -auto -fbo
+    
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 19a6214b0ff707ae52e9624c263b7d6c1c20e6d3
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Mon Jul 13 13:34:31 2015 +0200
+
+    nv50: limit the maximum number of samplers to 16
+    
+    NV50_3D_BIND_TSC only allows to bind 16 samplers, and since we don't
+    want to do anything with NV50_3D_BIND_TSC2, just limit the maximum
+    number of samplers to 16 like for nvc0.
+    
+    This fixes dmesg fails with the following piglit test:
+     max-samplers
+    
+    But the test still fails.
+    
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 6d207b8e3548cd7832a5edc7b847a5e7d06c0925
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Mon Jul 13 12:52:57 2015 +0200
+
+    nv50: turn samples counts off during blit
+    
+    Fixes the following piglit test:
+      occlusion_query_meta_no_fragments
+    
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit d246a96bbc4253a8339a505df97742fd252ebc55
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Mon Jul 13 12:51:21 2015 +0200
+
+    nv50: add nesting support for occlusion queries
+    
+    This is loosely based on nvc0.
+    
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 8ba1982b1e37aa69680e243fe391254211ae273a
+Author: Alejandro Piñeiro <apinheiro@igalia.com>
+Date:   Fri Jul 17 11:54:34 2015 +0200
+
+    i965/nir/fs: removed unneeded support for global variables
+    
+    As functions are inlined, and nir_lower_global_vars_to_local gets
+    run, all global variables are lowered to local variables.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 801d41fa43eba996c6bd7c071282ad15e51609d3
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Jul 20 00:19:56 2015 -0400
+
+    nv50: fix max level clamping on G80
+    
+    It appears that the G80 did not have support for the sampler view
+    first/last clamping. Put the view's last level in the place of the
+    texture's so that it doesn't go past what the sampler view allows.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 8c8a71f0d125bb655b17a32914ffecf8d159593b
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Jul 18 19:02:29 2015 -0400
+
+    gm107/ir: fix indirect txq emission
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 346ce0b98832e33d5411200002571b3edea9e2bb
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Jul 18 18:38:42 2015 -0400
+
+    nvc0/ir: don't worry about sampler in txq handling
+    
+    There's no need to deal with samplers for texture size queries. That
+    code also was accidentally setting an invalid sIndirectSrc position, but
+    it can now just be removed.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 20e484afa4874e87cd18daffd66286bb893cf3fb
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Jul 18 16:43:17 2015 -0400
+
+    nvc0/ir: fix txq on indirect samplers
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 670914ea7cf7808ff37ca54db2844f711436031c
+Author: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
+Date:   Wed May 20 18:02:44 2015 +0300
+
+    i965: Disable resource streamer in BLORP
+    
+    Switch off hardware-generated binding tables and gather push
+    constants in the blorp. Blorp requires only a minimal set of
+    simple constants. There is no need for the extra complexity
+    to program a gather table entry into the pipeline.
+    
+    Cc: kenneth@whitecape.org
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
+
+commit fc65b6eb610f4b1e42930cae7594131fa9ea566e
+Author: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
+Date:   Wed Oct 2 16:37:20 2013 +0300
+
+    i965: Upload binding tables in hw-generated binding table format.
+    
+    When hardware-generated binding tables are enabled, use the hw-generated
+    binding table format when uploading binding table state.
+    
+    Normally, the CS will will just consume the binding table pointer commands
+    as pipelined state. When the RS is enabled however, the RS flushes whatever
+    edited surface state entries of our on-chip binding table to the binding
+    table pool before passing the command on to the CS.
+    
+    Note that the the binding table pointer offset is relative to the binding table
+    pool base address when resource streamer instead of the surface state base address.
+    
+    v2: Fix possible buffer overflow when allocating a chunk out of the
+        hw-binding table pool (Ken).
+    v3: Remove extra newline and add missing brace around if-statement (Matt).
+    v4: Fix broken INTEL_DEBUG=shader_time for hw-generated binding tables.
+        Document PRM WaStateBindingTableOverfetch workaround.
+    
+    Cc: kenneth@whitecape.org
+    Cc: mattst88@gmail.com
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
+
+commit 2133980bc7dff52bdeb142301184e464d113ce7c
+Author: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
+Date:   Fri Jul 17 12:20:18 2015 +0300
+
+    i965: Implement interface to edit binding table entries
+    
+    Unlike normal software binding tables where the driver has to manually
+    generate and fill a binding table array which are then uploaded to the
+    hardware, the resource streamer instead presents the driver with an option
+    to fill out slots for individual binding table indices. The hardware
+    accumulates the state for these combined edits which it then automatically
+    flushes to a binding table pool when the binding table pointer state
+    command is invoked.
+    
+    v2: Clarify binding table edit bit aligment (Topi).
+    v3: Make comments and function names more clearer (Ken).
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
+
+commit 190756482e62cb57e2bc8c798181e5f0171726fb
+Author: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
+Date:   Wed Apr 15 13:04:45 2015 +0300
+
+    i965: Enable hardware-generated binding tables on render path.
+    
+    This patch implements the binding table enable command which is also
+    used to allocate a binding table pool where where hardware-generated
+    binding table entries are flushed into. Each binding table offset in
+    the binding table pool is unique per each shader stage that are
+    enabled within a batch.
+    
+    Also insert the required brw_tracked_state objects to enable
+    hw-generated binding tables in normal render path.
+    
+    v2: - Use MOCS in binding table pool alloc for GEN8
+        - Fix spurious offset when allocating binding table pool entry
+          and start from zero instead.
+    v3: - Include GEN8 fix for spurious offset above.
+    v4: - Fixup wrong packet length in enable/disable hw-binding table
+          for GEN8 (Ville).
+        - Don't invoke HW-binding table disable command when we dont
+          have resource streamer (Chris).
+    v5: - Reorder the state cache invalidate flush so it happens in-between
+          enabling hw-generated binding tables and the previous sw-binding
+          table GPU state (Chris).
+    v6: - Do the same fix in v5 for gen7_disable_hw_binding_tables().
+        - Adhere to coding guidelines and make comments more informative.
+    
+    Cc: kenneth@whitecape.org
+    Cc: syrjala@sci.fi
+    Cc: chris@chris-wilson.co.uk
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
+
+commit 090529af1828817344e0850ef27eebd1f096eb5f
+Author: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
+Date:   Tue Jul 2 11:48:22 2013 -0400
+
+    i965: Enable resource streamer for the batchbuffer
+    
+    Check first if the hardware and kernel supports resource streamer. If this
+    is allowed, tell the kernel to enable the resource streamer enable bit on
+    MI_BATCHBUFFER_START by specifying I915_EXEC_RESOURCE_STREAMER
+    execbuffer flags.
+    
+    v2: - Use new I915_PARAM_HAS_RESOURCE_STREAMER ioctl to check if kernel
+          supports RS (Ken).
+        - Add brw_device_info::has_resource_streamer and toggle it for
+          Haswell, Broadwell, Cherryview, Skylake, and Broxton (Ken).
+    v3: - Update I915_PARAM_HAS_RESOURCE_STREAMER to match updated kernel.
+    v4: - Always inspect the getparam.value (Chris Wilson).
+    v5: - Fold redundant devinfo->has_resource_streamer check in context create
+          into init screen.
+    
+    Cc: kenneth@whitecape.org
+    Cc: chris@chris-wilson.co.uk
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
+
+commit ccf9598ad7681f5c9c87e9ca8bf856fcb5198b45
+Author: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
+Date:   Mon May 18 14:32:17 2015 +0300
+
+    i965: Define HW-binding table and resource streamer control opcodes
+    
+    v2: Use macros for HW binding table edits (Topi)
+    v3: Add Broadwell support.
+    v4: Make hardware binding table bit definitions even more clearer (Ken)
+    
+    Cc: kenneth@whitecape.org
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
+
+commit ff7896a398f55baefd00e695c8f45f2ffa57bceb
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 17 10:01:48 2015 -0700
+
+    vc4: Switch to using a separate ioctl for making shaders.
+    
+    This gives the kernel a chance to validate and lock down the data,
+    without having to deal with mmap zapping.
+    
+    With this, GLBenchmark stops on a texture relocations, because we'd
+    recycled a shader BO as another shader and failed to revalidate, since we
+    weren't clearing the cached validation state on mmap faults.
+
+commit e42cfe5d032e97e0444df39421a9f93f84452d68
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Fri Jul 17 18:01:01 2015 +0200
+
+    mesa: fix up some texture error checks
+    
+    In particular, we were incorrectly accepting s3tc (and lots of others)
+    for CompressedTexSubImage3D (but not CompressedTexImage3D) calls with 3d
+    targets. At this time, the only allowed formats for these calls are the
+    bptc ones, since none of the specific extensions allow it (astc hdr would).
+    Also, fix up a bug in _mesa_target_can_be_compressed - 3d target needs to
+    be allowed for bptc formats.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 27aa31fab40783356207ba5dabd839b430496e7b
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 17 11:52:09 2015 -0700
+
+    vc4: Fix printing of shader-db debug when shader-db isn't turned on.
+
+commit 5341349dde6f5c70af188e48ef0082e6e7d5361f
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 17 11:22:40 2015 -0700
+
+    vc4: Add debugging on texture relocation validation failures.
+
+commit be7adc2ecad0d04037cb0c99754703dde86ee73a
+Author: Eric Anholt <eric@anholt.net>
+Date:   Thu Jul 16 14:30:28 2015 -0700
+
+    vc4: Also consider uniform 0 in uniform lowering.
+    
+    The hash table considers key 0 to be the empty key.
+
+commit 90dfabc3b5ce5b485a1bbcd7e815a72588f7153d
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 10 16:30:27 2015 -0700
+
+    vc4: Use the pure/const attributes on a bunch of our QPU functions.
+    
+    On a release build, this makes the rest of vc4_qpu_validate.c go away
+    (the compiler didn't know that our qpu helper function calls had no
+    side effects).
+
+commit be1f49bda90425b7fd009ac177b307e61da0f994
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 10 16:25:26 2015 -0700
+
+    mesa: Detect and provide macros for function attributes pure and const.
+    
+    These are really useful hints to the compiler in the absence of link-time
+    optimization, and I'm going to use them in VC4.
+    
+    I've made the const attribute be ATTRIBUTE_CONST unlike other function
+    attributes, because we have other things in the tree #defining CONST for
+    their own unrelated purposes.
+    
+    v2: Alphabetize.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)
+
+commit bde4c8ec1fd69e312fe21e36c8ce07139916811a
+Author: Connor Abbott <cwabbott0@gmail.com>
+Date:   Tue Jun 30 13:42:15 2015 -0700
+
+    i965/fs: don't make unused payload registers interfere
+    
+    Before, we were setting payload_last_use_ip for unused payload
+    registers to 0, which made them interfere with whatever the first
+    instruction wrote to due to the workaround for SIMD16 uniform arguments.
+    Just use -1 to mean "unused" instead, and then skip setting any
+    interferences for unused payload registers.
+    
+    instructions in affected programs:     0 -> 0
+    helped:                                0
+    HURT:                                  0
+    GAINED:                                1
+    LOST:                                  0
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Signed-off-by: Connor Abbott <connor.w.abbott@intel.com>
+
+commit 18e73bf7f8b12022e02db3230ee109657581900b
+Author: Connor Abbott <cwabbott0@gmail.com>
+Date:   Tue Jun 30 13:38:20 2015 -0700
+
+    i965/fs: remove special case in setup_payload_interference()
+    
+    regs_read() will handle LINTERP for us since the previous commit. In
+    addition, we were being too conservative, since it will only read 2
+    registers on SIMD8.
+    
+    instructions in affected programs:     9061 -> 8893 (-1.85%)
+    helped:                                10
+    HURT:                                  0
+    GAINED:                                0
+    LOST:                                  0
+    
+    All of the changes were due to spills being eliminated, mostly in KSP
+    shaders.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Signed-off-by: Connor Abbott <connor.w.abbott@intel.com>
+
+commit c4a2217e79ac78c59cec3eb97542ceb819f92a44
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Mon Feb 2 14:23:35 2015 -0800
+
+    i965/fs: Mark last used ip for all regs read in the payload
+    
+    If a source register in the push constant registers uses more than one
+    register, then we wouldn't update payload_last_use_ip for subsequent
+    registers.
+    
+    Unlike most uniform data pushed into registers, the CS gl_LocalInvocationID
+    data varies per execution channel. Therefore for SIMD16 mode, we have vec16
+    data in the payload. In this case we then need to mark 2 registers in
+    payload_last_use_ip as last used by the instruction. There's a similar
+    situation for the z and w coordinates of gl_FragCoord for fragment shaders,
+    where it had only happened to work before because of some bogus interferences
+    which the next commit removes.
+    
+    (Connor: added bit about gl_FragCoord to commit message)
+    
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Connor Abbott <connor.w.abbott@intel.com>
+
+commit 9f344b908a95440d215f29c0b05b8ea8dba2839e
+Author: Connor Abbott <connor.w.abbott@intel.com>
+Date:   Wed Jul 1 09:58:47 2015 -0700
+
+    i965/fs: fix regs_read() for LINTERP
+    
+    The second source always stays within the same SIMD8 register.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Signed-off-by: Connor Abbott <connor.w.abbott@intel.com>
+
+commit eaf799ddff9f2583d6dee5a0db36fa0a1162fde6
+Author: Connor Abbott <connor.w.abbott@intel.com>
+Date:   Wed Jul 15 12:01:20 2015 -0700
+
+    nir: add nir_foreach_instr_safe_reverse()
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Signed-off-by: Connor Abbott <connor.w.abbott@intel.com>
+
+commit 8eea091747c9b12b21688b738145632b90d923cb
+Author: Connor Abbott <connor.w.abbott@intel.com>
+Date:   Wed Jul 15 12:00:47 2015 -0700
+
+    nir: add nir_instr_is_first() and nir_instr_is_last() helpers
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Signed-off-by: Connor Abbott <connor.w.abbott@intel.com>
+
+commit 01cdbba341b47972a743e7f192d3554010d0da84
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Thu Jul 16 15:07:05 2015 -0700
+
+    i965/cs: Use dispatch width of 8 for cs terminate payload setup
+    
+    This prevents an assertion failure in brw_fs_live_variables.cpp,
+    fs_live_variables::setup_one_write: Assertion `var < num_vars' failed.
+    
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 7e337859ff98a0caf00fd201a5389933d42d0baa
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Thu Jul 16 15:04:43 2015 -0700
+
+    i965/cs: Return 1 for regs_read on CS_OPCODE_CS_TERMINATE
+    
+    This prevents an assertion failure in brw_fs_live_variables.cpp,
+    fs_live_variables::setup_one_read: Assertion `var < num_vars' failed.
+    
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 4b17f0d9f58637300b0748d1fb702a7e4d51979f
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sat Jul 4 19:15:16 2015 -0700
+
+    program: Allow redundant OPTION ARB_fog_* directives.
+    
+    A fragment program from "Pixel Piracy" contains redundant OPTION
+    directives:
+    
+    !!ARBfp1.0
+    OPTION ARB_precision_hint_fastest;
+    OPTION ARB_fog_exp2;
+    OPTION ARB_precision_hint_fastest;
+    OPTION ARB_fog_exp2;
+    ...
+    
+    We already allow redundant ARB_precision_hint_fastest directives, but
+    disallow the redundant (yet consistent) ARB_fog_exp2 directives, failing
+    to compile the program.
+    
+    The specification seems to contradict itself - the main text says that
+    only one fog application option may be specified, but then backpedals,
+    indicating the intent is to disallow /contradictory/ flags.  One of the
+    issues suggests that specifying contradictory ones is stupid, but
+    allowed, and only the last one should take effect.
+    
+    Accepting multiple redundant (but consistent) directives seems harmless,
+    and like a reasonable interpretation of the specification.  It also
+    fixes a fragment program found in the wild.
+    
+    Cc: mesa-stable@lists.freedesktop.org
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 3a31876600cb5c4d90c998ecb5635c602eeb2bd1
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Tue Jul 14 09:56:09 2015 -0700
+
+    i965: Push miptree tiling request into flags
+    
+    With the last few patches a way was provided to influence lower layer miptree
+    layout and allocation decisions via flags (replacing bools). For simplicity, I
+    chose not to touch the tiling requests because the change was slightly less
+    mechanical than replacing the bools.
+    
+    The goal is to organize the code so we can continue to add new parameters and
+    tiling types while minimizing risk to the existing code, and not having to
+    constantly add new function parameters.
+    
+    v2: Rebased on Anuj's recent Yf/Ys changes
+    Fix non-msrt MCS allocation (was only happening in gen8 case before)
+    
+    v3: small fix in assertion requested by Chad
+    
+    v4: Use parens to get the order right from v3.
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit ef42352ff4e1feeea7338db73f540038c6755472
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Thu Jul 16 16:52:08 2015 -0700
+
+    Revert "i965: Push miptree tiling request into flags"
+    
+    This reverts commit 51e8d549e110f86cb7107cf712843aebd956fb9a.
+
+commit 51e8d549e110f86cb7107cf712843aebd956fb9a
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Tue Jul 14 09:56:09 2015 -0700
+
+    i965: Push miptree tiling request into flags
+    
+    With the last few patches a way was provided to influence lower layer miptree
+    layout and allocation decisions via flags (replacing bools). For simplicity, I
+    chose not to touch the tiling requests because the change was slightly less
+    mechanical than replacing the bools.
+    
+    The goal is to organize the code so we can continue to add new parameters and
+    tiling types while minimizing risk to the existing code, and not having to
+    constantly add new function parameters.
+    
+    v2: Rebased on Anuj's recent Yf/Ys changes
+    Fix non-msrt MCS allocation (was only happening in gen8 case before)
+    
+    v3: small fix in assertion requested by Chad
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com> (v2)
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com> (v2)
+    Reviewed-by: Chad Versace <chad.versace@intel.com> (v2)
+
+commit 4bddd82bf3dae44c2b75cef34e9e85e15d63df7f
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue Jul 14 15:43:44 2015 +0300
+
+    i965/fs: Factor out universally broken calculation of the register component size.
+    
+    This in principle simple calculation was being open-coded in a number
+    of places (in a series I haven't yet sent for review there will be a
+    couple more), all of them were subtly broken in one way or another:
+    None of them were handling the HW_REG case correctly as pointed out by
+    Connor, and fs_inst::regs_read() was handling the stride=0 case rather
+    naively.  This patch solves both problems and factors out the
+    calculation as a new fs_reg method.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit b00cd6e4a0f9a84d514f428428be348900236e2e
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Jul 9 21:42:28 2015 +0300
+
+    i965: Implement nir_op_uadd_carry and _usub_borrow without accumulator.
+    
+    This gets rid of two no16() fall-backs and should allow better
+    scheduling of the generated IR.  There are no uses of usubBorrow() or
+    uaddCarry() in shader-db so no changes are expected.  However the
+    "arb_gpu_shader5/execution/built-in-functions/fs-usubBorrow" and
+    "arb_gpu_shader5/execution/built-in-functions/fs-uaddCarry" piglit
+    tests go from 40 to 28 instructions.  The reason is that the plain ADD
+    instruction can easily be CSE'ed with the original addition, and the
+    b2i negation can easily be propagated into the source modifier of
+    another instruction, so effectively both operations are performed with
+    just one instruction.
+    
+    v2: Rely on carry_to_arith() and borrow_to_arith() to lower these
+        (Ilia Mirkin).
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 3ee2daf23dc91b8dfc017b5c89c10ab1376ba4df
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Fri Jul 10 19:18:39 2015 +0300
+
+    i965: Implement b2f and b2i using negation.
+    
+    Booleans are represented as 0/-1 on modern hardware which means we can
+    just negate them to convert them into a numeric type.  Negation has
+    the benefit that it can be implemented using a source modifier which
+    can easily be propagated into some other instruction.  shader-db
+    results on HSW:
+    
+    total instructions in shared programs: 6349082 -> 6346693 (-0.04%)
+    instructions in affected programs:     40948 -> 38559 (-5.83%)
+    helped:                                123
+    HURT:                                  1
+    GAINED:                                1
+    LOST:                                  0
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 8fba933ca2dd3c3487281135a9063b6ca9bed359
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jul 5 15:00:22 2015 +0200
+
+    gallium: add interface for writable shader buffers
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 05a12c53a308965aba1c00f0caf36d8e0f32e035
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jul 5 14:48:33 2015 +0200
+
+    gallium: add interface for writable shader images
+    
+    PIPE_CAPs will be added some other time.
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit b73bec0ecd43861337daf9663e242d2b44f36dbd
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jul 5 14:34:13 2015 +0200
+
+    gallium: add new limits for shader buffers and images
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit f9f79d29ce75c681c46bdbac5aa3f19ee1adb93b
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jul 5 13:51:16 2015 +0200
+
+    gallium: add BIND flags for R/W buffers and images
+    
+    PIPE_CAPs and TGSI support will be added later. The TGSI support should be
+    straightforward. We only need to split TGSI_FILE_RESOURCE into TGSI_FILE_IMAGE
+    and TGSI_FILE_BUFFER, though duplicating all opcodes shouldn't be necessary.
+    
+    The idea is:
+    * ARB_shader_image_load_store should use set_shader_images.
+    * ARB_shader_storage_buffer_object should use set_shader_buffers(slots 0..M-1)
+      if M shader storage buffers are supported.
+    * ARB_shader_atomic_counters should use set_shader_buffers(slots M..N)
+      if N-M+1 atomic counter buffers are supported.
+    
+    PIPE_CAPs can describe various constraints for early DX11 hardware.
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 26222932c013da3688e39dc831179659cc65c39a
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jun 12 14:24:17 2015 +0200
+
+    gallium: add PIPE_CAP_MAX_SHADER_PATCH_VARYINGS
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit af768922cafa3eb3e78a2fdfee90380a74c79460
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jul 1 16:32:24 2015 +0300
+
+    i965/gen9: Use custom MOCS entries set up by the kernel.
+    
+    Instead of relying on hardware defaults the i915 kernel driver is
+    going program custom MOCS tables system-wide on Gen9 hardware.  The
+    "WT" entry previously used for renderbuffers had a number of problems:
+    It disabled caching on eLLC, it used a reserved L3 cacheability
+    setting, and it used to override the PTE controls making renderbuffers
+    always WT on LLC regardless of the kernel's setting.  Instead use an
+    entry from the new MOCS tables with parameters: TC=LLC/eLLC, LeCC=PTE,
+    L3CC=WB.
+    
+    The "WB" entry previously used for anything other than renderbuffers
+    has moved to a different index in the new MOCS tables but it should
+    have the same caching semantics as the old entry.
+    
+    Even though the corresponding kernel change ("drm/i915: Added
+    Programming of the MOCS") is in a way an ABI break it doesn't seem
+    necessary to check that the kernel is recent enough because the change
+    should only affect Gen9 which is still unreleased hardware.
+    
+    v2: Update MOCS values for the new Android-incompatible tables
+        introduced in v7 of the kernel patch.
+    
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+    Reference: http://lists.freedesktop.org/archives/intel-gfx/2015-July/071080.html
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 7e0180d57d330bd8d3047e841086712376b2a1cc
+Author: EdB <edb+mesa@sigluy.net>
+Date:   Tue Jul 7 17:58:56 2015 +0200
+
+    clover: little OpenCL status code logging clean
+    
+    s/build_error/compile_error in order to match the stored OpenCL status code.
+    Make program::build catch and log every OpenCL error.
+    Make tgsi error triggering uniform with the llvm one.
+    
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 7b9ebf879b6f35038996805a641667f00d93c4b7
+Author: Renaud Gaubert <renaud@lse.epita.fr>
+Date:   Sat Jul 11 19:38:10 2015 +0200
+
+    glsl: avoid compiler's segfault when processing operators with void arguments
+    
+    This is done by returning an rvalue of type void in the
+    ast_function_expression::hir function instead of a void expression.
+    
+    This produces (in the case of the ternary) an hir with a call
+    to the void returning function and an assignment of a void variable
+    which will be optimized out (the assignment) during the optimization
+    pass.
+    
+    This fix results in having a valid subexpression in the many
+    different cases where the subexpressions are functions whose
+    return values are void.
+    
+    Thus preventing to dereference NULL in the following cases:
+      * binary operator
+      * unary operators
+      * ternary operator
+      * comparison operators (except equal and nequal operator)
+    
+    Equal and nequal had to be handled as a special case because
+    instead of segfaulting on a forbidden syntax it was now accepting
+    expressions with a void return value on either (or both) side of
+    the expression.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85252
+    
+    Signed-off-by: Renaud Gaubert <renaud@lse.epita.fr>
+    Reviewed-by: Gabriel Laskar <gabriel@lse.epita.fr>
+    Reviewed-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+
+commit 779cabfc7d022de8b7b9bc7fdac0caffa8646c51
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Thu Jul 16 03:55:59 2015 +0200
+
+    r200: fix some potential big endian issues
+    
+    The formats chosen (both by texture format choser, fbo storage allocation)
+    are different for big endian not just for rgba8 but also lower bit width
+    formats (why I don't actually know). Even the function to test for renderable
+    formats used different formats, however the actual colorbuffer setup did not.
+    And the blitter did not take that into account neither.
+    Untested (what could possibly go wrong...).
+    Same as for r100.
+    
+    Acked-by: Marek Olšák <marek.olsak@amd.com>
+
+commit d21320f6258b2e1780a15c1ca718963d8a15ca18
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Thu Jul 16 03:18:20 2015 +0200
+
+    radeon: fix some potential big endian issues
+    
+    The formats chosen (both by texture format choser, fbo storage allocation)
+    are different for big endian not just for rgba8 but also lower bit width
+    formats (why I don't actually know). Even the function to test for renderable
+    formats used different formats, however the actual colorbuffer setup did not.
+    And the blitter did not take that into account neither.
+    Untested (what could possibly go wrong...).
+    
+    Acked-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 882476fea3ba4fdd05d21582eeb968f84523fb9a
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Sat Jul 11 20:03:27 2015 +0200
+
+    radeon/r200: mark state atoms as dirty after blits
+    
+    Blit submits lots of packets which are usually handled by state atoms, so
+    these must be dirtied.
+    Not sure if this fixes anything, but it was a concern raised by bug 51658
+    (with this all issues there seen as actual bugs should be fixed, with the
+    exception of the patch to upload non-used texenv state atoms which I just
+    don't understand).
+    
+    Acked-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 26c1361ac386bd5b108d79289a3f82d15b01d014
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Thu Jul 16 03:06:47 2015 +0200
+
+    r200: fix fbo rendering by disabling optimized texture format chooser
+    
+    It is rather unfortunate that we don't know if a texture is going to be used
+    as a rt later, and we lack the means to do something about a format chosen
+    which we can't render to directly, so disable this and always chose renderable
+    format for rgba8 textures.
+    This addresses an issue raised on (old) bug,
+    https://bugs.freedesktop.org/show_bug.cgi?id=51658 with gnome-shell, don't
+    know if that's still applicable but it might fix other things as well.
+    
+    Acked-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 642f289824dc9a07e8209c905badef31b4841ae1
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Jul 7 12:23:33 2015 -0700
+
+    i965: Fix 32 bit build warnings in intel_get_yf_ys_bo_size()
+    
+    Along with fixing the type of pitch parameter, patch also changes
+    the types of few local variables and function return type.
+    
+    Warnings fixed are:
+    intel_mipmap_tree.c:671:7: warning: passing argument 3 of
+    'intel_get_yf_ys_bo_size' from incompatible pointer type
+    
+    intel_mipmap_tree.c:563:1: note: expected 'uint64_t *' but
+    argument is of type 'long unsigned int *'
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit f11c6f09cf36909ff399353b20195a31cf0f1907
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Jul 8 19:00:48 2015 -0700
+
+    i965: Optimize batchbuffer macros.
+    
+    Previously OUT_BATCH was just a macro around an inline function which
+    does
+    
+       brw->batch.map[brw->batch.used++] = dword;
+    
+    When making consecutive calls to intel_batchbuffer_emit_dword() the
+    compiler isn't able to recognize that we're writing consecutive memory
+    locations or that it doesn't need to write batch.used back to memory
+    each time.
+    
+    We can avoid both of these problems by making a local pointer to the
+    next location in the batch in BEGIN_BATCH().
+    
+    Cuts 18k from the .text size.
+    
+       text     data      bss      dec      hex  filename
+    4946956   195152    26192  5168300   4edcac  i965_dri.so before
+    4928956   195152    26192  5150300   4e965c  i965_dri.so after
+    
+    This series (including commit c0433948) improves performance of Synmark
+    OglBatch7 by 8.01389% +/- 0.63922% (n=83) on Ivybridge.
+    
+    Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 131573df7aea0b10e97d9d5db0d26d89f8dfef54
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Sat Jul 11 14:36:25 2015 -0700
+
+    i965: Add and use USED_BATCH macro.
+    
+    The next patch will replace the .used field with an on-demand
+    calculation of batchbuffer usage.
+    
+    Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 09348c12fceba59c22219fe3272260eb8ea6051e
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Jul 8 18:59:51 2015 -0700
+
+    i965: Split batch emission from relocation functions.
+    
+    So that everything writing to the batch between BEGIN_BATCH() and
+    ADVANCE_BATCH() goes through OUT_BATCH.
+    
+    Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit fbf3aebf1f33fbec559c5b69bdf3b5dec6031612
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Jul 8 18:56:52 2015 -0700
+
+    i965: Move BEGIN_BATCH() into same control flow as ADVANCE_BATCH().
+    
+    BEGIN_BATCH() and ADVANCE_BATCH() will contain "do {" and "} while (0)"
+    respectively to allow declaring local variables used by intervening
+    OUT_BATCH macros. As such, BEGIN_BATCH() and ADVANCE_BATCH() need to be
+    in the same control flow.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+    Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 141e1eb29fe80ad341e718147a1277cc3b1b9c11
+Author: Brian Paul <brianp@vmware.com>
+Date:   Wed Jul 15 06:15:06 2015 -0600
+
+    osmesa: fix OSMesaPixelsStore typo
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91337
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 7124feba1b879deb88dbf2baf600ed42309d9839
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jul 14 12:32:04 2015 -0700
+
+    vc4: Cache the texture p1 for the sampler.
+    
+    Cuts another 12% of vc4_uniforms.o, in exchange for computing it at
+    CSO creation time.
+
+commit 0f4d2b0a2dd3fa39426f2789bf2a8fc939adf001
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jul 14 12:18:40 2015 -0700
+
+    vc4: Cache texture p0/p1 setup for the sampler view.
+    
+    In exchange for a bit of space and computation in CSO setup, we cut
+    vc4_uniform.c (draw time) code size by 4.8%.
+
+commit 1835ce6e35e6a186c2ba1bdf39b73783a2cb2ad5
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jul 14 12:21:23 2015 -0700
+
+    vc4: Move uniforms handling to a separate file.
+    
+    The rest of vc4_program.c is about compiling, while this is about
+    uniform emit at draw time.
+
+commit 9476b11d6edc67403dd7c5aaddbc375400e02425
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jul 14 11:54:15 2015 -0700
+
+    vc4: Fix some -Wdouble-promotion warnings.
+    
+    No code generation changes from this, but it'll be useful to have this
+    next time I go checking -Wdouble-promotion.
+
+commit 320089dbd63de3ac1bd3d42ee8cec41837486d8c
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Thu Jun 11 19:17:03 2015 -0700
+
+    i965/cs: Initialize GPGPU Thread Count
+    
+    This field should always be set for gen8. In the bdw PRM, Volume 2d:
+    Command Reference: Structures under INTERFACE_DESCRIPTOR_DATA, DWORD
+    6, Bits 9:0, Number of Threads in GPGPU Thread Group:
+    
+    "This field should not be set to 0 even if the barrier is disabled,
+    since an accurate value is needed for proper pre-emption."
+    
+    In the HSW PRM, the it doesn't mention that it must always be set, but
+    it should not hurt.
+    
+    Reported-by: Kristian Høgsberg <krh@bitplanet.net>
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit cd7dd45bfec9ad68719c5e4e04b66ea4bcc1a2c1
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 10 17:00:34 2015 -0700
+
+    vc4: Fix compiler warnings on release builds.
+
+commit 1e80c9fab98d7de216937a47f8e231f3beb78403
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 10 17:01:37 2015 -0700
+
+    vc4: Add better debug for register allocation failure.
+
+commit 3df78928786134874eafa6f68186c8edbbdd3ae7
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 10 16:11:23 2015 -0700
+
+    vc4: Drop reloc_count tracking for debug asserts on non-debug builds.
+    
+    Cuts another 88 bytes of compiled code.
+
+commit 7432017f65174e82a3de7afef3e4e6f60932356c
+Author: Eric Anholt <eric@anholt.net>
+Date:   Thu Jul 9 22:51:06 2015 -0700
+
+    vc4: Rework cl handling to be friendlier to the compiler.
+    
+    Drops 680 bytes of code, from avoiding a bunch of extra updates to the
+    next pointer in the struct.
+
+commit a0d3915663fb7cbd3c1a5561450e256e00ecf11b
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jul 10 14:46:42 2015 -0700
+
+    vc4: Make a helper function for getting the current offset in the CL.
+    
+    I needed to rewrite this a bit for safety checking in the next commit.
+    Despite being a static inline of the same thing that was being done, we
+    lose 36 bytes of code for some reason.
+
+commit 748bf459b46b44e184ee1d425ce612da61a0800e
+Author: Eric Anholt <eric@anholt.net>
+Date:   Thu Jul 9 22:48:17 2015 -0700
+
+    vc4: Drop separate cl*_reloc_hindex().
+    
+    Now that RCL generation is in the kernel, we don't have any other
+    callers.  Oddly, the compiler generates another 8 bytes of code for
+    this, but the simplification is worth it.
+
+commit e4c540f6d09390013a9cb66060a29f236ad7dcfc
+Author: Eric Anholt <eric@anholt.net>
+Date:   Thu Jul 9 22:42:22 2015 -0700
+
+    vc4: Store reloc pointers as pointers, not offsets.
+    
+    Now that we don't resize the CL as we build (it's set up at the top by
+    vc4_start_draw()), we can store the pointers instead of offsets from
+    the base.  Saves a bit of math in emitting relocs (about 60 bytes of
+    code).
+
+commit ab80519b3cd08401dff2d07343064a27f32b33ca
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Jun 29 22:32:03 2015 -0700
+
+    vc4: Add perf debug for when we wait on BOs.
+
+commit 759ed0bd03818c912e7f1fa62bafc50ef52ef291
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Jul 13 15:40:41 2015 -0700
+
+    i965: Mark constant static data as const.
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit ea633db65ffa684ea5237b8cb5bd96fbc1a7769a
+Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+Date:   Wed Jul 8 17:30:44 2015 +0200
+
+    glsl: Lower shader storage buffer object loads to GLSL IR instrinsics
+    
+    Extend the existing lower_ubo_reference pass to also detect SSBO loads
+    and lower them to __intrinsic_load_ssbo intrinsics.
+    
+    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 1966ea57728a1c05300982ddd83de989e363613c
+Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+Date:   Wed Jul 8 17:03:06 2015 +0200
+
+    glsl: Lower shader storage buffer object writes to GLSL IR instrinsics
+    
+    Extend the existing lower_ubo_reference pass to also detect SSBO writes
+    and lower them to __intrinsic_store_ssbo intrinsics.
+    
+    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 2a66ee6fc1fa1e64f2d9a22271187d4462d9e042
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Fri Apr 24 11:17:15 2015 +0200
+
+    glsl: Don't do copy propagation on buffer variables
+    
+    Since the backing storage for these is shared we cannot ensure that
+    the value won't change by writes from other threads. Normally SSBO
+    accesses are not guaranteed to be syncronized with other threads,
+    except when memoryBarrier is used. So, we might be able to optimize
+    some SSBO accesses, but for now we always take the safe path and emit
+    the SSBO access.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 5dfea83ee6bf85fb3962679d043eb06b33bfd4c1
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Fri Apr 24 11:15:48 2015 +0200
+
+    glsl: Don't do constant variable on buffer variables
+    
+    Since the backing storage for these is shared we cannot ensure that
+    the value won't change by writes from other threads. Normally SSBO
+    accesses are not guaranteed to be syncronized with other threads,
+    except when memoryBarrier is used. So, we might be able to optimize
+    some SSBO accesses, but for now we always take the safe path and emit
+    the SSBO access.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 0b1111d985714816fad20c99b4e6ea762df17b46
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Fri Apr 24 11:14:17 2015 +0200
+
+    glsl: Don't do constant propagation on buffer variables
+    
+    Since the backing storage for these is shared we cannot ensure that
+    the value won't change by writes from other threads. Normally SSBO
+    accesses are not guaranteed to be syncronized with other threads,
+    except when memoryBarrier is used. So, we might be able to optimize
+    some SSBO accesses, but for now we always take the safe path and emit
+    the SSBO access.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 5360ff30c4de966422fde6a574e3959c81bf5037
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Mon Apr 6 10:19:50 2015 +0200
+
+    glsl: Do not kill dead assignments to buffer variables or SSBO declarations.
+    
+    If we kill dead assignments we lose the buffer writes.
+    
+    Also, we never kill UBO declarations even if they are never referenced
+    by the shader, they are always considered active. Although the spec
+    does not seem say this specifically for SSBOs, it is probably implied
+    since SSBOs are pretty much the same as UBOs, only that you can write
+    to them.
+    
+    v2:
+    - Fix the comment (Jordan)
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 3ad92589f29466383c0218aa4a73bff52019c4be
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Mon Apr 6 09:37:58 2015 +0200
+
+    glsl: Don't do tree grafting on buffer variables
+    
+    Otherwise we can lose writes into the buffers backing the variables.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 173ed05a6d9e851b2b7b2f9f2d8993e5da115c40
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Thu Mar 19 11:42:33 2015 +0100
+
+    mesa: Implement _mesa_BindBufferRange for target GL_SHADER_STORAGE_BUFFER
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 8a1d58bd6129d61ec4efb79cc6f2b61ac777b85b
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Thu Mar 19 11:37:43 2015 +0100
+
+    mesa: Implement _mesa_BindBufferBase for target GL_SHADER_STORAGE_BUFFER
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 7b0d0a2bf2d147c6024ff1a4b1eaaad955e7d297
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Thu Mar 19 11:21:52 2015 +0100
+
+    mesa: Implement _mesa_BindBuffersRange for target GL_SHADER_STORAGE_BUFFER
+    
+    v2:
+    - Fix error message (Jordan)
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 0aa83f3e90a5ca547593631bc1557412e5305bdd
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Thu Mar 19 10:47:17 2015 +0100
+
+    mesa: Implement _mesa_BindBuffersBase for target GL_SHADER_STORAGE_BUFFER
+    
+    v2:
+    - Add space before const (Jordan)
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit e72f5ef50211c3ce31abaab4ed1bf82df2884157
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Thu Mar 19 10:31:23 2015 +0100
+
+    mesa: Implement _mesa_DeleteBuffers for target GL_SHADER_STORAGE_BUFFER
+    
+    v2:
+    - Remove the extra spaces (Jordan)
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 98a1a2c7302526d649a727d63400407727d7aad9
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Thu Mar 19 11:50:51 2015 +0100
+
+    mesa: Initialize and free shader storage buffers
+    
+    v2:
+    - Fix indention, used tabs instead of whitespaces. (Jordan)
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 2747d566f187cdab5d6bdc508e460a76e5cbd6c4
+Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+Date:   Thu Mar 19 10:22:00 2015 +0100
+
+    glsl: fix error messages in invalid declarations of shader storage blocks
+    
+    Due to GL_ARB_shader_storage_buffer_object extension, shader storage blocks
+    have the same limitations as uniform blocks.
+    
+    This patch fixes the corresponding error messages.
+    
+    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 9f651dbf7924938a8aa2c9c940ae3ed1366d6198
+Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+Date:   Wed Mar 18 10:52:53 2015 +0100
+
+    glsl: buffer variables cannot be defined outside interface blocks
+    
+    Section 4.3.7 "Buffer Variables", GLSL 4.30 spec:
+    
+    "Buffer variables may only be declared inside interface blocks
+    (section 4.3.9 “Interface Blocks”), which are then referred to as
+    shader storage blocks. It is a compile-time error to declare buffer
+    variables at global scope (outside a block)."
+    
+    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 20b2907db7b93656cbafe1d24302498e5817dbe2
+Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+Date:   Wed Mar 18 10:25:10 2015 +0100
+
+    glsl: shader buffer variables cannot have initializers
+    
+    Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec:
+    
+        "Buffer variables cannot have initializers."
+    
+    v2:
+    - Rewrite error message (Jordan)
+    
+    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit fa0a86c057ac9bff9b208f93db75c5ce5bd7136f
+Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+Date:   Wed Mar 18 09:02:51 2015 +0100
+
+    glsl: enable binding layout qualifier usage for shader storage buffer objects
+    
+    See GLSL 4.30 spec, section 4.4.5 "Uniform and Shader Storage Block
+    Layout Qualifiers".
+    
+    v2:
+    - Add whitespace in an error message. Delete period '.' at the end of that
+    error message (Jordan).
+    
+    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit c717604dc4b5119fa9091241535c3efd1370438c
+Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+Date:   Thu May 14 12:37:07 2015 +0200
+
+    mesa: add MaxShaderStorageBlocks to struct gl_program_constants
+    
+    v2:
+    - Set MaxShaderStorageBlocks to 8.
+    
+    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit cd50906e0334d7ad0102e5733a152d55d672776b
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Thu Mar 19 10:15:30 2015 +0100
+
+    mesa: Add shader storage buffer support to struct gl_context
+    
+    This includes the array of bindings, the current buffer bound to the
+    GL_SHADER_STORAGE_BUFFER target and a set of general limits and default
+    values for shader storage buffers.
+    
+    v2:
+    - Use spec values for the new defined constants (Jordan)
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit df89ed1591c9d1c55e79fe8effb976c21b172a7d
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Mon Mar 23 11:19:12 2015 +0100
+
+    glsl: Identify active uniform blocks that are buffer blocks as such.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit a78a589efc5440443439d474e45fa1ef8b79178c
+Author: Kristian Høgsberg <krh@bitplanet.net>
+Date:   Wed May 13 11:17:23 2015 +0200
+
+    glsl: link buffer variables and shader storage buffer interface blocks
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 84fc5fece006f2bd95287496e32482ac08bfd399
+Author: Kristian Høgsberg <krh@bitplanet.net>
+Date:   Wed May 13 10:53:46 2015 +0200
+
+    glsl: Implement parser support for 'buffer' qualifier
+    
+    This is used to identify shader storage buffer interface blocks where
+    buffer variables are declared.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 6b09598d63b8b6069b230fbe8283c75cf86f711a
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Mon May 18 15:47:18 2015 +0200
+
+    nir: add nir_var_shader_storage
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 1146696f75ea0f2b49e6379c2a62602dfeb51190
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Fri Jun 5 09:11:53 2015 +0200
+
+    mesa: rename is_in_uniform_block to is_in_buffer_block
+    
+    Since this now checks if a variable is inside a uniform or a shader
+    storage block.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 18feaa8f36b311c443fd56666507ec1768fb9582
+Author: Kristian Høgsberg <krh@bitplanet.net>
+Date:   Wed May 13 10:41:55 2015 +0200
+
+    glsl: Add ir_var_shader_storage
+    
+    This will be used to identify buffer variables inside shader storage
+    buffer objects, which are very similar to uniforms except for a few
+    differences, most important of which is that they are writable.
+    
+    Since buffer variables are so similar to uniforms, we will almost always
+    want them to go through the same paths as uniforms.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 3095ee9b8bd4154cc63b6332c21b16954555e241
+Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+Date:   Tue Mar 17 12:17:27 2015 +0100
+
+    mesa: define ARB_shader_storage_buffer_object extension
+    
+    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 75df8f00192415eb4ad378708ff3745390931b4e
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Tue Jul 14 07:41:26 2015 +1000
+
+    glsl: free interface_types
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 431a0658616575953868f6d16bb9641306cceea8
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Tue Jul 14 07:37:59 2015 +1000
+
+    glsl: replace some more old hash_table uses
+    
+    The util/hash_table was intended to be a fast hash table
+    replacement for the program/hash_table see 35fd61bd99c1 and 72e55bb6888ff.
+    
+    This change replaces some more uses of the old hash table.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 7a50bf6c7f7729f5eee3ddf7aa9b38a81873f2c6
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jul 10 21:27:13 2015 +0100
+
+    auxiliary/vl: use the correct screen index
+    
+    Inspired (copied) from Marek's commit for egl/x11
+    commit 0b56e23e7f3(egl/dri2: use the correct screen index)
+    
+    v2: Fix copy/pasta errors.
+    
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 10a7b579fdc0e3f3b38920ae5c103c058cc63eec
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 7 14:44:11 2015 +0100
+
+    radeon: remove dri_mirror state
+    
+    Most of the data stored(duplicated) was unused, and for the one that is
+    follow the approach set by other drivers.
+    This eliminates the use of legacy (dri1) types.
+    
+    Cc: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 82b9b2e523ad53f54d5620f47f7aea4f11397b81
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jul 7 14:13:33 2015 +0100
+
+    i915: remove unused driFd variable
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit a025e539e430b7bbfae9b786bd79d0d608f1acf8
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jul 6 09:42:01 2015 +0100
+
+    i965: bump libdrm requirement to 2.4.61 and drop in-tree workaround
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit c505064b2cea14c9da115a26e9326b9c0c7dca3b
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jul 13 20:01:39 2015 +0100
+
+    bugzilla_mesa.sh: sort the bugs list by number
+    
+    v2: Use change sed/sort based on Ilia's suggestion.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 9027d53b2a00b3073f904cc3cb995e8953e41036
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jun 30 14:37:19 2015 +0100
+
+    radeonsi: directly include radeon/* headers
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit dd50ccf0f4dcba2fd586d5b5c58750259e29c357
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jun 29 14:03:22 2015 +0100
+
+    auxiliary/vl: use loader_open_device() over open()
+    
+    The former handles O_CLOEXEC (and the lack of it) appropriately.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit cc32d25454c382a971e81ae584a4296fdf492e70
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jun 29 14:01:39 2015 +0100
+
+    pipe-loader: use loader_open_device() rather than open()
+    
+    The former handles O_CLOEXEC (and the lack of it) appropriately.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 132031b110a9fd652f3c9d5727502134ef9c22c1
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jun 29 13:28:33 2015 +0100
+
+    pipe-loader: remove pipe_loader_sw_probe_xlib
+    
+    It was only useful for st/egl, although I've never got to merging the
+    pipe-loader and inline-helpers before it was removed. There are no users
+    for it ATM.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit c73d30dfe90d9aa096fc64024612a6543bd748c7
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jun 29 13:08:06 2015 +0100
+
+    automake: remove empty GALLIUM_PIPE_LOADER_LIBS
+    
+    Cc: Rob Clark <robclark@freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit abc20120e4aa5a3782f40f7d4a7c6a4f953fca9c
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jun 29 13:02:21 2015 +0100
+
+    automake: pipe-loader: remove the 'client' pipe-loader
+    
+    Was only around as opencl's pipe-loader wanted to link against xcb in
+    some cases.
+    
+    Cc: Rob Clark <robclark@freedesktop.org>
+    Cc: Tom Stellard <thomas.stellard@amd.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 0959d7312d37dd9841cbf7a53cb40b3cfa6e5fc9
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jun 29 12:44:44 2015 +0100
+
+    pipe-loader: remove pipe_loader_drm_probe_fd() x_auth argument
+    
+    No longer used by anyone, as of last commit.
+    
+    Cc: Tom Stellard <thomas.stellard@amd.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit a27ec5dc460b91dc44675f48cddbbb2631ee824f
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Tue Jun 30 15:53:27 2015 +0100
+
+    pipe-loader: simplify pipe_loader_drm_probe
+    
+    Do not iterate and (attempt to) open the render device, if we're over
+    the requested number of devices.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 69a1b9959e59653da262185c4e2cf57d24939b19
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jun 29 12:36:45 2015 +0100
+
+    pipe-loader: drop support for non-render node devices
+    
+    Render nodes have been around for quite some time. Removing support via
+    the master/primary node allows us to clean up the conditional
+    compilation and simplify the build greatly.
+    
+    For example currently we the pipe-loader, which explicitly links against
+    xcb and friends (for X auth) if found at compile-time. That
+    would cause problems as one will be forced to use X/xcb, even if it's a
+    headless system that is used for opencl.
+    
+    v2: Clarify the linking topic in the commit message.
+    
+    Cc: Tom Stellard <thomas.stellard@amd.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit de5c2b6f2b53924bceab6f4b8255d8e9dcad21b4
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Jul 13 09:11:20 2015 +0100
+
+    radeonsi: direct emit intrinsic for DFRAC.
+    
+    Michel reported this still failed, and this fixed it
+    
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 4cbf0a0ccf2fb4545b206066b756fd9a07acab92
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Wed Jul 1 04:58:24 2015 +0100
+
+    radeonsi: ARB_gpu_shader_fp64 + ARB_vertex_attrib_64bit support.
+    
+    This adds the translation from TGSI to AMDGPU llvm backend, for the
+    64-bit opcodes. The backend pretty much handles everything for us
+    fine. There is one patch required for SI DFRAC support, that I know
+    off.
+    
+    [airlied: fixed missing comma, updated relnotes]
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 8108de4774f2542a8fe65de71b82221821f73434
+Author: Guillaume Desmottes <guillaume.desmottes@collabora.co.uk>
+Date:   Fri Apr 17 15:13:36 2015 +0200
+
+    loader: don't leak udev_enumerate
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90073
+    Signed-off-by: Guillaume Desmottes <guillaume.desmottes@collabora.co.uk>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit f7008ebcdc4d936e8b2b1a317d870e907e4d369f
+Author: Guillaume Desmottes <guillaume.desmottes@collabora.co.uk>
+Date:   Fri Apr 17 15:13:35 2015 +0200
+
+    dri3_open: don't leak the reply
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90073
+    Signed-off-by: Guillaume Desmottes <guillaume.desmottes@collabora.co.uk>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 5d219908ce045805647b85d1d302b58887e63c1b
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Sat Jun 27 13:14:38 2015 +1000
+
+    doxygen: Remove doxygen_sqlite3.db with 'make clean'
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 6cc29cf5e2c21dc0937b2c794758be61d3281324
+Author: Rhys Kidd <rhyskidd@gmail.com>
+Date:   Sat Jun 27 13:14:37 2015 +1000
+
+    doxygen: Add doxygen_sqlite3.db to .gitignore
+    
+    Signed-off-by: Rhys Kidd <rhyskidd@gmail.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 846c60fc7df587a24d5bf0835497aa25034538b3
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Jul 11 20:36:44 2015 +0100
+
+    docs: add news item and link release notes for mesa 10.6.2
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 6dfce109c27b4e15373adcbbde981140912001ae
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Jul 11 20:33:16 2015 +0100
+
+    docs: Add sha256 checksums for the 10.6.2 release
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 89cbd91b17989ec7eb1cb93ac427a84dca56cd79)
+
+commit 66d354384505fd5ef67b8683db94e8967aba338b
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Jul 11 19:46:49 2015 +0100
+
+    Add release notes for the 10.6.2 release
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 9643cce94c8a1938e3342fb83d025a1e5c2aa79b)
+
+commit ad2c3905d3460a6ddfc6756fc58a78332d82e72f
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Wed Jul 1 06:31:13 2015 +0100
+
+    tgsi: add DFMA to the opcode infer functions.
+    
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit e70d0515603df081916f6f31bb9e0455298b10cc
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jul 9 15:49:56 2015 +1000
+
+    r600g: move sampler/ubo index registers before temp reg
+    
+    temp_reg needs to be last, as we increment things
+    away from it, otherwise on cayman some tests were overwriting
+    the index regs.
+    
+    Fixes 2 piglit with ARB_gpu_shader5 forced on cayman.
+    
+    Reviewed-by: Glenn Kennard <glenn.kennard@gmail.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit c397bd14077b760125604426a99aba00d6193788
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jul 9 15:22:09 2015 +1000
+
+    r600g: fix sampler/ubo indexing on cayman
+    
+    Cayman needs a different method to upload the CF IDX0/1
+    
+    This fixes 31 piglits when ARB_gpu_shader5 is forced on
+    with cayman.
+    
+    Reviewed-by: Glenn Kennard <glenn.kennard@gmail.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 1bfa25e88d21f95b9e176232bb091af77c294578
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Jul 10 16:42:18 2015 -0400
+
+    nv50, nvc0: enable at least one color RT if alphatest is enabled
+    
+    Fixes the following piglits:
+      fbo-alphatest-nocolor
+      fbo-alphatest-nocolor-ff
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 4fe15717ce2fc0b1c239d3d7bf9a7bb04fb50dd5
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Thu Jun 25 10:08:06 2015 -0700
+
+    i965: Remove special case for layered drawbuffer attachments.
+    
+    When binding a layered texture, the layer is already 0.  There's no need
+    to special case this.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit 6be024f44dea7df6608e5a3111deffc61dbf6d6d
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Thu Jun 25 09:17:38 2015 -0700
+
+    i965/gen6: Set up layer constraints properly for depth buffers.
+    
+    This ports over Chris Forbes' equivalent fixes in gen7_misc_state.c
+    from commit 77d55ef4819436ebbf9786a1e720ec00707bbb19.
+    
+    No Piglit changes on Sandybridge.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit f3a620e2a63956a37367b9e393d4c1ecd41e5d43
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Apr 22 17:46:08 2015 -0700
+
+    i965: Label the repclear shader "meta repclear" rather than "meta clear".
+    
+    Color clears can be performed via two separate shaders - one is the
+    generic "meta clear" shader (in meta.c); the other is the i965 specific
+    "repclear" shader (in brw_meta_fast_clear.c).
+    
+    Giving them separate names makes them distinguishable when reading
+    INTEL_DEBUG=shader_time output.
+    
+    v2: Call it "meta repclear", as suggested by Jason.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit a078e13a7cfba9275bea2a1c7f80ac54bcf40036
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Jul 1 17:01:54 2015 -0700
+
+    i965: Fix indentation in emit_control_data_bits().
+    
+    The last patch left the code indented too far.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 0edb084f9d0444c451a08fd2ed7daee2eb8a6f4a
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Jul 1 17:01:24 2015 -0700
+
+    i965/gs: Move vertex_count != 0 check up a level; skip one caller.
+    
+    Paul's original code had emit_control_data_bits() skip the URB write if
+    vertex_count was 0.  This meant wrapping every control data write in a
+    conditional write.
+    
+    We accumulate control data bits in a single UD (32-bit) register.  For
+    simple shaders that don't emit many vertices, the control data header
+    will be <= 32-bits long, so we only need to write it once at the end of
+    the shader.
+    
+    For shaders with larger headers, we write out batches of control data
+    bits at EmitVertex(), when (vertex_count * bits_per_vertex) % 32 == 0.
+    On the first EmitVertex() call, the above expression will evaluate to
+    true simply because vertex_count == 0.  But we want to avoid emitting
+    the control data bits, because we haven't accumulated 32-bits worth yet.
+    
+    In other words, the vertex_count != 0 check is really only necessary in
+    the EmitVertex() batching case, not the end-of-thread case.
+    
+    This saves a CMP/IF/ENDIF in every shader that uses EndPrimitive() or
+    multiple streams.  The only downside is that a shader which emits no
+    vertices at all will execute an additional URB write---but such shaders
+    are pointless and not worth optimizing.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 0fae4e451bc60de1138729d20e03100e93cc6f38
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Wed Jul 8 21:26:02 2015 +1000
+
+    glsl: use set rather than old hash table for ir_validate
+    
+    When the new hash table implementation was added to Mesa it claimed to be much
+    faster, see commits 35fd61bd99c1 and 72e55bb6888ff.
+    
+    The set implementation follows the same implementation strategy so this should
+    be faster and there was no need to store a data field.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 75784243df1f5bb0652fb243b37d69f36d493a86
+Author: Chad Versace <chad.versace@intel.com>
+Date:   Thu Jul 9 18:46:21 2015 -0700
+
+    mesa: Fix generation of git_sha1.h.tmp for gitlinks
+    
+    Don't assume that $(top_srcdir)/.git is a directory. It may be a
+    gitlink file [1] if $(top_srcdir) is a submodule checkout or a linked
+    worktree [2].
+    
+    [1] A "gitlink" is a text file that specifies the real location of
+        the gitdir.
+    [2] Linked worktrees are a new feature in Git 2.5.
+    
+    Cc: "10.6, 10.5" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 15d3524ad24a698095cc542cf9a527c8a8615f78
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Jul 2 18:07:27 2015 -0400
+
+    freedreno/a4xx: occlusion query support
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 2b7a54452fbb7e6436aa4ecc700cb2fe2f96ad86
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Jul 9 18:14:36 2015 -0400
+
+    freedreno: update generated headers
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit e44845472a4e04e7b6a82ab6c768f9648729d7e9
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sun Jul 5 20:17:56 2015 -0400
+
+    freedreno/ir3/sched: fixup new instr's block
+    
+    If we split addr/pred, the original instruction could have originated
+    from a different block.  If we don't fixup the block ptr we hit asserts
+    later (in debug builds).
+    
+    NOTE: perhaps we don't want to try to preserve addr/pred reg's across
+    block boundaries.. this at least needs some thought in case addr/pred
+    writes end up inside a conditional block..
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit a1a6f007823f203755fb54a1f3b7f53ae6cbfef0
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sun Jul 5 19:53:10 2015 -0400
+
+    freedreno/ir3/ra: fix failed assert for a0/p0
+    
+    The address and predicate register are special, they don't get assigned
+    in RA.  So do a better job of ignoring them rather than hitting later
+    asserts.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 65b2ae510bb07b75f583ecedfd59766621e1cb43
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sun Jul 5 18:23:25 2015 -0400
+
+    freedreno/ir3: shader-db traces
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 422296e38d04789cc4ca336b46979b44abd19b5d
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Jul 2 18:15:43 2015 -0400
+
+    freedreno: fix crash in fd_invalidate_resource()
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit ab3ba21f979605b90b2fb44482138732b42514b0
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Jul 8 14:51:46 2015 -0400
+
+    vc4: unref old fence
+    
+    Some, but not all, state trackers will explicitly unref (and set to
+    NULL) the previous *fence before calling pipe->flush().  So driver
+    should use fence_ref() which will unref the old fence if not NULL.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Acked-by: Eric Anholt <eric@anholt.net>
+
+commit 749dced4b363963b2230a18b0776fa92653116b8
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Jul 8 14:51:10 2015 -0400
+
+    ilo: unref old fence
+    
+    Some, but not all, state trackers will explicitly unref (and set to
+    NULL) the previous *fence before calling pipe->flush().  So driver
+    should use fence_ref() which will unref the old fence if not NULL.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Acked-by: Chia-I Wu <olvaffe@gmail.com>
+
+commit 7e0a26defe65dad7ffc8e7a95b5577be51feb2bc
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Jul 8 14:48:01 2015 -0400
+
+    freedreno: unref old fence
+    
+    Some, but not all, state trackers will explicitly unref (and set to
+    NULL) the previous *fence before calling pipe->flush().  So driver
+    should use fence_ref() which will unref the old fence if not NULL.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit f60354ee72fdee988fd604994e8b8c8d75fe78be
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Jul 8 15:00:51 2015 -0400
+
+    gallium: clarify reference counting for fence
+    
+    Nowhere was it spelled out that the state tracker may expect the pipe
+    driver to unref the old fence.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 0a8af6361eecaba0f34a668328746924b61caa6a
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Jul 8 13:30:22 2015 -0400
+
+    xa: don't leak fences
+    
+    XA was never unref'ing last_fence in the various call paths to
+    pipe->flush().  Add this to xa_context_flush() and update the other
+    open-coded calls to pipe->flush() to use xa_context_flush() instead.
+    
+    This fixes a memory leak reported with xf86-video-freedreno.
+    
+    Reported-by: Nicolas Dechesne <nicolas.dechesne@linaro.org>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit f12302b89836a24255674a251f7a6902b4e9af7c
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sun Jun 28 21:16:29 2015 -0700
+
+    i965/vs: Get rid of brw_vs_compile completely.
+    
+    After tearing it out another level or two, and just passing the key and
+    vp directly, we can finally remove this struct.  It also eliminates a
+    pointless memcpy() of the key.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 64390967c1abc326875e495f233afec6e685db72
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Mon Jun 29 22:07:37 2015 -0700
+
+    i965/vs: Remove 'c'/vs_compile from vec4_vs_visitor.
+    
+    At this point, the brw_vs_compile structure only contains the key and
+    gl_vertex_program pointer.  We may as well pass and store them directly;
+    it's simpler and more convenient (key-> instead of vs_compile->key...).
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 13372a0ce746cde6fa6e0aa3c5130e4227f123e0
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sun Jun 28 21:02:15 2015 -0700
+
+    i965/vec4: Move c->last_scratch into vec4_visitor.
+    
+    Nothing outside of vec4_visitor uses it, so we may as well keep it
+    internal.
+    
+    Commit db9c915abcc5ad78d2d11d0e732f04cc94631350 for the vec4 backend.
+    
+    (The empty class will be going away soon.)
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 8524deb8c8fc37abc2cb2717be64a533746a92f9
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sun Jun 28 20:55:25 2015 -0700
+
+    i965/vec4: Move total_scratch calculation into the visitor.
+    
+    This is more consistent with how we do it in the FS backend, and reduces
+    a tiny bit of duplication.  It'll also allow for a bit more tidying.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit dc776ffb900b21421158ef8efbd675bdd47593bc
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sun Jun 28 20:45:47 2015 -0700
+
+    i965/vec4: Move perf_debug about register spilling into the visitor.
+    
+    This patch makes us only issue the performance warning about register
+    spilling if we actually spilled registers.  We also use scratch space
+    for indirect addressing and the like.
+    
+    This is basically commit c51163b0cf7aff0375b1a5ea4cb3da9d9e164044 for
+    the vec4 backend.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 0163c99e8f6959b5d6c7a937a322127cfdf9315f
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Mon Jun 29 21:58:47 2015 -0700
+
+    i965/vec4: Plumb log_data through so the backend_shader field gets set.
+    
+    Jason plumbed this through a while back in the FS backend, but
+    apparently we were just passing NULL in the vec4 backend.
+    
+    This patch passes brw in as intended.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 308c0bf74307af0f3385cdcbb00aa0534ec3e5da
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Thu Mar 12 10:43:23 2015 -0700
+
+    i965: Switch on shader stage in nir_setup_outputs().
+    
+    Adding new shader stages to a switch statement is less confusing than an
+    if-else-if ladder where all but the first case are fragment shader
+    specific (but don't claim to be).
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 04a57a7ee92403a1d9e01eada69f1ab133fc0b47
+Author: Brian Paul <brianp@vmware.com>
+Date:   Thu Jul 9 16:58:04 2015 -0600
+
+    tgsi: whitespace fixes in tgsi_parse.c
+    
+    Trivial.
+
+commit 1f02a82c8bcac67ced81243631bad6ee1bb810ee
+Author: Brian Paul <brianp@vmware.com>
+Date:   Wed Jul 8 18:05:27 2015 -0600
+
+    gallium: fix comment typo in p_shader_tokens.h
+
+commit 27d8a690c41748b39c65d1ff51bb63e9f860bae1
+Author: Brian Paul <brianp@vmware.com>
+Date:   Wed Jul 8 15:56:15 2015 -0600
+
+    gallium/docs: s/treaded/treated/ typo in tgsi.rst
+    
+    Trivial.
+
+commit a2dde3a8dabbbd45fb3155771bc1802866ff5f61
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue Jul 7 23:33:57 2015 -0700
+
+    util: Don't link to SHA1 library if shader-cache is disabled.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit c04339486a26b7bee3575bf30dde4f7152a70211
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue Jul 7 18:51:30 2015 -0700
+
+    i965: Set brw->batch.emit only #ifdef DEBUG.
+    
+    It's only used inside #ifdef DEBUG. Cuts ~1.7k of .text, and more
+    importantly prevents a larger code size regression in the next commit
+    when the .used field is replaced and calculated on demand.
+    
+       text     data      bss      dec      hex  filename
+    4945468   195152    26192  5166812   4ed6dc  i965_dri.so before
+    4943740   195152    26192  5165084   4ed01c  i965_dri.so after
+    
+    And surround the emit and total fields with #ifdef DEBUG to prevent
+    such mistakes from happening again.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 0166b4c165271bd7525a91049e58e390cb596c60
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Thu Jul 9 10:35:19 2015 -0700
+
+    i965/hsw: Implement end of batch workaround
+    
+    This patch can cause an infinite recursion if the previous patch titled, "i965:
+    Track finished batch state" isn't present (backporters take notice).
+    
+    v2: Sent out the wrong patch originally. This patches switches the order of
+    flushes, doing the generic flush before the CC_STATE, and the required
+    workaround flush afterwards
+    
+    v3: Only perform workaround for render ring
+    Add text to the BATCH_RESERVE comments
+    
+    v4 (By Ken): Rebase; update citation to mention PRM and Wa name; combine two
+    blocks.
+    
+    http://otc-mesa-ci.jf.intel.com/job/bwidawsk/171/
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 2cfa64e159a68998b76bdbcd20f8c7810379fce0
+Author: Christian König <christian.koenig@amd.com>
+Date:   Mon Jun 29 10:19:36 2015 +0200
+
+    st/vdpau: fix mixer size checks
+    
+    We need to check what the 3D pipe is able to handle for the mixer, not what
+    the decoder is able to decode. This fixes output of resolutions like 720x1280.
+    
+    Signed-off-by: Christian König <christian.koenig@amd.com>
+    CC: mesa-stable@lists.freedesktop.org
+
+commit bbfdf5c17b695c31915e293e1ec858cbcb340894
+Author: Christian König <christian.koenig@amd.com>
+Date:   Fri May 29 15:10:31 2015 +0200
+
+    vl: cleanup video buffer private when the decoder is destroyed
+    
+    Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=90728
+    
+    Signed-off-by: Christian König <christian.koenig@amd.com>
+    CC: mesa-stable@lists.freedesktop.org
+
+commit adc816a1e41812e6489a5bc388f80de65504be5b
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Mon Jul 6 23:34:23 2015 +0200
+
+    nv50: avoid segfault with enabled but unbound vertex attrib
+    
+    Before validating vertex arrays we need to check if a VBO is present.
+    Checking if vb->buffer is not NULL fixes the issue.
+    
+    Fixes the following piglit test:
+      gl-3.1-vao-broken-attrib
+    
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit ec151e2f72bd4a239573770aea563d47d0268708
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Mon Jul 6 22:06:08 2015 +0200
+
+    nvc0: fix wrong use of BLIT_SRC_Y_INT for 2D texture copy
+    
+    According to nv50, this should be src->ms_y instead of src->ms_x. This
+    code is here since 2012, so it's probably a typo error which has never
+    been detected since a long time. I didn't do a full piglit run to check
+    if it fixes some other weird issues.
+    
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit efb36271a92b44ee0e35c4f833610dbea776badd
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Jul 8 01:57:00 2015 -0700
+
+    nir: Fix comment above nir_convert_from_ssa() prototype.
+    
+    Connor renamed the parameter, inverting the sense.
+    Update the comment accordingly.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
+
+commit e27ea996444743b8cbdca096a4aab47dd405ebf9
+Author: Julien Isorce <julien.isorce@gmail.com>
+Date:   Thu Jun 18 06:53:52 2015 +0100
+
+    egl/dri2: load libglapi.0.dylib on osx
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90903
+    Signed-off-by: Julien Isorce <j.isorce@samsung.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 7d642442d9339e5b65c30802c44091816cdf18be
+Author: Julien Isorce <j.isorce@samsung.com>
+Date:   Thu Jul 2 23:10:38 2015 +0100
+
+    egl: use unix defines on osx with clang
+    
+    I also created an bug in Khronos 's bugzilla as you suggested:
+    https://www.khronos.org/bugzilla/show_bug.cgi?id=1356
+    I'll let you know if I get feedback from this bug or else where.
+    
+    Patch with updated error messages:
+    
+    [PATCH] eglplatform: treat __APPLE__ the same way as __unix__ to handle X11 types
+    
+      CC       eglapi.lo
+    ./egldisplay.h:258:19: error: unknown type name 'Display'
+    _eglGetX11Display(Display *native_display, const EGLint *attrib_list);
+    eglapi.c:290:4: error: array size is negative
+       STATIC_ASSERT(sizeof(void*) == sizeof(nativeDisplay));
+    eglapi.c:291:25: warning: cast to 'void *' from smaller integer type
+       'EGLNativeDisplayType' (aka 'int') [-Wint-to-void-pointer-cast]
+       native_display_ptr = (void*) nativeDisplay;
+    eglapi.c:307:32: error: use of undeclared identifier 'Display'
+          dpy = _eglGetX11Display((Display*) native_display, attrib_list);
+    eglapi.c:776:35: error: use of undeclared identifier 'Window'
+          native_window = (void*) (* (Window*) native_window);
+    eglapi.c:847:35: error: use of undeclared identifier 'Pixmap'
+          native_pixmap = (void*) (* (Pixmap*) native_pixmap);
+    
+    Bugzilla Mesa: https://bugs.freedesktop.org/show_bug.cgi?id=90249
+    Bugzilla Khronos: https://www.khronos.org/bugzilla/show_bug.cgi?id=1356
+    Signed-off-by: Julien Isorce <j.isorce@samsung.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit c7f3657450683827446072ad6b1e8fce04078162
+Author: Julien Isorce <julien.isorce@gmail.com>
+Date:   Wed Jul 1 00:33:14 2015 +0100
+
+    darwin: Suppress type conversion warnings for GLhandleARB
+    
+    This patch and its description are inspired from Jose Fonseca
+    explanations and suggestions.
+    
+    With this patch the following logic applies and only if __APPLE__:
+    
+    When building mesa, GLhandleARB is defined as unsigned long and
+    at some point casted to GLuint in gl fuction implementations.
+    These exact points are where these errors and warnings appear.
+    
+    When building an application GLhandleARB is defined as void*.
+    Later when calling a gl function, for example glBindAttribLocationARB,
+    it will be dispatched to _mesa_BindAttribLocation. So internally
+    void* will be treated as unsigned long which has the same size.
+    So the same truncation happens when casting it to GLuint.
+    
+    Same when GLhandleARB appears as return value.
+    For mesa it will be GLuint -> unsigned long.
+    For an application it will be GLuint -> unsigned long -> void*.
+    Note that the value will be preserved when casting back to GLuint.
+    
+    When GLhandleARB appears as a pointer there are also separate
+    entry-points, i.e. _mesa_FuncNameARB. So the same logic can
+    be applied.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=66346
+    Signed-off-by: Julien Isorce <julien.isorce@gmail.com>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 64cb014037551c4b7bbed1cf2ca8f1126c970146
+Author: Varad Gautam <varadgautam@gmail.com>
+Date:   Sat Jun 27 11:32:26 2015 +0530
+
+    android: freedreno: add missing components to the build
+    
+    Freedreno requires {a4xx,ir3}_SOURCES and NIR to build.
+    
+    Signed-off-by: Varad Gautam <varadgautam@gmail.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit f1d08c4f75794add30d1714a4cd9ce2bf335148d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Fri May 1 11:25:20 2015 +0100
+
+    i965: Move pipecontrol workaround bo to brw_pipe_control
+    
+    With the exception of gen8, the sole user of the workaround bo are for
+    emitting pipe controls. Move it out of the purview of the batchbuffer
+    and into the pipecontrol.
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Martin Peres <martin.peres@linux.intel.com>
+
+commit f2413457937f8f4a92e11379569be69e508d7477
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Wed Jun 10 08:28:13 2015 +0100
+
+    loader: Look for any version of currently linked libudev.so
+    
+    Since there was an ABI break and linking twice against libudev.so.0 and
+    libudev.so.1 causes the application to quickly crash, we first check if
+    the application is currently linked against libudev before dlopening a
+    local handle. However for backwards/forwards compatability, we need to
+    inspect the application for current linkage against all known versions
+    first. Not doing so causes a crash when both libraries are present and
+    so mesa chooses libudev.so.1 but the application was linked against
+    libudev.so.0.
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    
+    Emil Velikov:
+    
+    I'm ever so slightly conserned that RTLD_NOLOAD is not part of the POSIX
+    standard, thus it's missing on some platforms (*BSD seems ok, while
+    Solaris, MacOS are not).
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit c8d3ebaffc0d7d915c1c19d54dba61fd1e57b338
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Wed Apr 29 13:32:38 2015 +0100
+
+    i965: Query whether we have kernel support for the TIMESTAMP register once
+    
+    Move the query for the TIMESTAMP register from context init to the
+    screen, so that it is only queried once for all contexts.
+    
+    On 32bit systems, some old kernels trigger a hw bug resulting in the
+    TIMESTAMP register being shifted and the low 32bits always zero. Detect
+    this by repeating the read a few times and check the register is
+    incrementing every 80ns as expected and not stuck on zero (as would be
+    the case with the buggy kernel/hw.).
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Cc: Martin Peres <martin.peres@linux.intel.com>
+    Reviewed-by: Martin Peres <martin.peres@linux.intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 38c2ec5ff0bf626578db7b84387279342aa48844
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Tue Jul 7 23:05:45 2015 -0400
+
+    nvc0: turn sample counts off during blit
+    
+    Fixes the following piglits:
+      occlusion_query_meta_fragments
+      occlusion_query_meta_no_fragments
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 87d2e15b1aa6f438983405aa25bf067034c898b0
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Wed Jul 8 09:20:40 2015 +1000
+
+    mesa: use implementation specified MAX_VERTEX_ATTRIBS rather than hardcoded value
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 73d0e7f3451eaeb62ac039d2dcee1e1c6787e3db
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Jul 1 20:13:00 2015 -0700
+
+    i965/vs: Fix matNxM vertex attributes where M != 4.
+    
+    Matrix vertex attributes have their columns padded out to vec4s, which
+    I was failing to account for.  Scalar NIR expects them to be packed,
+    however.
+    
+    Fixes 1256 dEQP tests on Broadwell.
+    
+    Cc: mesa-stable@lists.freedesktop.org
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Tested-by: Mark Janes <mark.a.janes@intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 6611f65047575054a38ce83ebfe0331e39e1774f
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Jul 7 18:28:31 2015 +0200
+
+    st/dri: don't set PIPE_BIND_SCANOUT for MSAA surfaces
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91231
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 10cff5e1ae55406799f4b0ad6b327d4c45dbca11
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 7 13:17:01 2015 -0600
+
+    gallium/hud: display percentages with % suffix
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit a804f5824352e4f714779bd9445c09b66d54bc4a
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 7 09:15:59 2015 -0600
+
+    gallium/hud: add PIPE_DRIVER_QUERY_TYPE_MICROSECONDS for HUD
+    
+    This allows drivers to report queries in units of microseconds and
+    have the HUD display "us" (microseconds), "ms" (milliseconds) or "s"
+    (seconds) on the graph.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 86ebd31c672f389f354e11b7aef4513dc8b76f13
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jul 7 09:13:02 2015 -0600
+
+    gallium/hud: replace byte units flag with pipe_driver_query_type
+    
+    Instead of using a boolean 'is bytes' value, use the pipe_driver_query_type
+    enum type.  This will let is add support for time values in the next patch.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit f025aec906fce0f2918b6f4acb15548dc957ba67
+Author: Brian Paul <brianp@vmware.com>
+Date:   Mon Jul 6 15:28:59 2015 -0600
+
+    gallium/os: minor whitespace fixes in os_time.h
+    
+    Trivial.
+
+commit 7009e2683ebb917393d87639f549588f22c03a32
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 6 18:55:26 2015 +0300
+
+    i965/gen4-5: Enable 16-wide dispatch on shaders with control flow.
+    
+    This was probably disabled due to a combination of several bugs in the
+    generator code (fixed earlier in this series) and a misunderstanding
+    of the hardware spec.  The documentation for most control flow
+    instructions mentions among other restrictions:
+    
+     "Instruction compression is not allowed."
+    
+    This however doesn't have any implications on 16 wide not being
+    supported, because none of the control flow instructions have
+    multi-register operands (control flow instructions are not compressed
+    on more recent hardware either, except maybe SNB's IF with inline
+    compare).  In fact Gen4-5 had 16-wide control flow masks and stacks,
+    and the spec mentions in several places that control flow instructions
+    push and pop 16 channels worth of data -- Otherwise there doesn't seem
+    to be any indication that it shouldn't work.
+    
+    Causes no piglit regressions, and gives the following shader-db
+    results on ILK:
+    
+     total instructions in shared programs: 4711384 -> 4711384 (0.00%)
+     instructions in affected programs:     0 -> 0
+     helped:                                0
+     HURT:                                  0
+     GAINED:                                1215
+     LOST:                                  0
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 24842e18aabdaeff41668b0e71e52d32975d2ccd
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 6 19:11:54 2015 +0300
+
+    i965/gen4-5: Program the execution size correctly for DO/WHILE instructions.
+    
+    From the hardware docs for the DO instruction:
+    
+     "Execution size is ignored for this instruction."
+    
+    My observation on ILK hardware contradicts the spec though, channels
+    over the execution size of a DO instruction won't enter the loop, and
+    channels over the execution size of a WHILE instruction will exit the
+    loop after the first iteration -- The latter is consistent with the
+    spec though, there's no claim about the execution size being ignored
+    for the WHILE instruction so it's not completely unexpected that it
+    has an influence on the evaluation of EMask.
+    
+    The execute_size argument of brw_DO() shouldn't have any effect on
+    Gen6 and newer hardware.  On Gen4-5 WHILE instructions inherit the
+    execution size from the matching DO, so this patch should fix them
+    too.  The execution size of BREAK and CONT instructions was already
+    being set correctly.
+    
+    Fixes some 50 piglit tests on Gen4-5 when forced to run shaders with
+    conditional and loop instructions 16-wide,
+    e.g. shaders/glsl-fs-continue-inside-do-while.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 40e2102e528498dd4c03c4567d3522241f4d1f22
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jul 6 18:23:57 2015 +0300
+
+    i965/gen4-5: Set ENDIF dst and src0 fields to the null register.
+    
+    The hardware docs don't mention explicitly what these fields should
+    be, but I've verified experimentally on ILK that using a GRF as
+    destination causes the register to be corrupted when the execution
+    size of an ENDIF instruction is higher than 8 -- and because the
+    destination we were using was g0, eventually a hang.
+    
+    Fixes some 150 piglit tests on Gen4-5 when forced to run shaders with
+    if conditionals 16-wide, e.g. shaders/glsl-fs-sampler-numbering-3.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 248b26429f52d0f19949a083aa3e0aeebcbe2138
+Author: Michel Dänzer <michel.daenzer@amd.com>
+Date:   Mon Jul 6 17:23:07 2015 +0900
+
+    radeonsi: Use param export count from si_llvm_export_vs in si_shader_vs
+    
+    This eliminates the error prone logic in si_shader_vs recalculating this
+    value.
+    
+    It also fixes TGSI_SEMANTIC_CLIPDIST outputs incorrectly not being
+    counted for VS exports. They need to be counted because they are passed
+    to the pixel shader as parameters as well.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91193
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit b0334a9aeb9369fd20854ab2ef4b2ee0087492ab
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Jun 25 16:57:20 2015 -0700
+
+    mesa: Convert some asserts into STATIC_ASSERT.
+    
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 7b06af9d3ca7310197d39d55fc52c265da4bc59e
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Sat Jul 4 03:03:33 2015 +0200
+
+    gallivm: fix lp_build_compare_ext
+    
+    The expansion should always be to the same width as the input arguments
+    no matter what, since these functions should work with any bit width of
+    the arguments (the sext is a no-op on any sane simd architecture).
+    Thus, fix the caller expecting differently.
+    
+    This fixes https://bugs.freedesktop.org/show_bug.cgi?id=91222
+    
+    Tested-by: Vinson Lee <vlee@freedesktop.org>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 128de6f6d7cd0eb5386dcc622afc6e28a8512e7f
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Mon Jul 6 11:04:19 2015 -0700
+
+    mesa: Add a MUST_CHECK macro for __attribute__((warn_unused_result)).
+    
+    In the kernel, this is called __must_check; all our attribute macros in
+    Mesa appear to be uppercase, so I went with that.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 86a3557d7c95ac945eedf42ab095639b255c1bed
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Sat Jul 4 22:40:59 2015 +0100
+
+    glsl: Make sure not to dereference NULL
+    
+    In this bit of code point_five can be NULL if the expression is not a
+    constant. This fixes it to match the pattern of the rest of the chunk
+    of code so that it checks for NULLs.
+    
+    Cc: Matt Turner <mattst88@gmail.com>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 18039078e0254c7cb5e15b7186be05e2e4c10f38
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Sat Jul 4 22:40:58 2015 +0100
+
+    glsl: Add missing check for whether an expression is an add operation
+    
+    There is a piece of code that is trying to match expressions of the
+    form (mul (floor (add (abs x) 0.5) (sign x))). However the check for
+    the add expression wasn't checking whether it had the expected
+    operation. It looks like this was just an oversight because it doesn't
+    match the pattern for the rest of the code snippet. The existing line
+    to check whether add_expr!=NULL was added as part of a coverity fix in
+    3384179f.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91226
+    Cc: Matt Turner <mattst88@gmail.com>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit d9ab95b365f058a46bc43a8cb96b6fff10a13faf
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Mar 4 15:46:57 2015 -0800
+
+    i965: Reserve more batch space to accomodate Gen6 perfmonitors.
+    
+    Ben noticed that I said each PIPE_CONTROL was 4 DWords, but it's
+    actually 5 DWords on Gen6-7.  We've been reserving insufficient space
+    for performance monitoring on Sandybridge, which means it would likely
+    break if you used that functionality.  (Thankfully, no one does...)
+    
+    Also, the existing number of 146 was the result of me flubbing up the
+    arithmetic: it should have actually been 140.
+    
+    Cc: mesa-stable@lists.freedesktop.org
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 493af150fb3b1c007d791b24dcd5ea8a92ad763c
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Fri Jul 3 13:15:21 2015 +0100
+
+    i965/skl: Set the pulls bary bit in 3DSTATE_PS_EXTRA
+    
+    On Gen9+ there is a new bit in 3DSTATE_PS_EXTRA that must be set if
+    the shader sends a message to the pixel interpolator. This fixes the
+    interpolateAt* tests on SKL, apart from interpolateatsample-nonconst
+    but that is not implemented anywhere so it's not a regression.
+    
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Cc: "10.6 10.5" <mesa-stable@lists.freedesktop.org>
+
+commit fc2726e4afa6dfb691affed576a38d2b0573465b
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jun 26 19:01:04 2015 +0200
+
+    winsys/radeon: use os_wait_until_zero in radeon_bo_set_tiling
+
+commit f1be3d8cdde17a9b9ae283e1bab2f46b992d3bf3
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jun 27 14:03:46 2015 +0200
+
+    radeonsi: don't flush an empty IB if the only thing we need is a fence
+    
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 7316cc92f3810c9e53a22c35343190d8fb7980be
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jun 27 00:05:26 2015 +0200
+
+    gallium/os: add conversion and wait functions for absolute timeouts
+    
+    Absolute timeouts are used with the amdgpu kernel driver.
+    It also makes waiting for several variables and fences at the same time
+    easier (the timeout doesn't have to be recalculated after every wait call).
+    
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 3836857a777a248dd212ce7a1d7307d2984fda7d
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jun 25 20:39:34 2015 +0200
+
+    gallium/os: add os_wait_until_zero (v2)
+    
+    This will be used by radeon and amdgpu winsyses.
+    Copied from the amdgpu winsys.
+    
+    v2: use volatile and p_atomic_read
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 245b464d5caa21680373ae5929dccd294078cc50
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jun 26 19:01:23 2015 +0200
+
+    gallium/radeon: mark the gpu load thread stop trigger as volatile
+
+commit 872ede6fd136c7f9701cc60268ab195a48e75e67
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jun 27 13:57:06 2015 +0200
+
+    st/mesa: if a fence isn't returned, assume it's signalled
+    
+    The reason might be that no commands have been submitted before the flush
+    and the GPU is idle.
+
+commit 5a69929683b15d48e4f2fd47e2c816e716ab60ef
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jun 26 16:34:31 2015 +0200
+
+    gallium: remove redundant pipe_context::fence_signalled
+    
+    fence_finish(timeout=0) does the same thing
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit bd214f030f1cb102a7fe41f40f140d4de2b304c0
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jun 26 16:28:53 2015 +0200
+
+    gallium: use fence_finish instead of fence_signalled in state trackers
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 3da1c7919d0dffee3887f390fcf29893016e3043
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jun 26 13:13:16 2015 +0200
+
+    gallium: handle fence_finish timeout in various drivers
+    
+    I copied what fence_signalled does.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit d50598fbad16bfb2b46800b664d382f42af64db0
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jun 26 13:19:45 2015 +0200
+
+    gallium/docs: remove out-of-date document about D3D11 features
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit d3f4f6b2e9380a91ab61b93c55ab36106345e7b2
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Jun 24 11:58:50 2015 +0200
+
+    radeonsi: fix a hang with DrawTransformFeedback on 4 SE chips
+    
+    Cc: 10.6 10.5 <mesa-stable@lists.freedesktop.org>
+    Acked-by: Christian König <christain.koenig@amd.com>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit ff0a41b5d524d7f10494e0c9006389d184ed6330
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Jul 4 12:53:22 2015 +0100
+
+    docs: add news item and link release notes for mesa 10.5.9
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit c427daa23ef4879550ed3b756d6a901475432c32
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Jul 4 12:48:39 2015 +0100
+
+    docs: Add sha256sums for the 10.5.9 release
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 4a0bd3dcff3c07965828e648e14d89314d262169)
+
+commit 24bf11e9c7846a2ad9624e421c85aaa1d4411cd9
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Jul 4 12:09:10 2015 +0100
+
+    Add release notes for the 10.5.9 release
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 7f40d083748f3a8276e08a2fa0ae7149269ea379)
+
+commit 939dc2850645786b4ff76aa162e44eb9f77be805
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Sat Mar 14 12:40:20 2015 +1100
+
+    glsl: update types for unsized arrays of members
+    
+    Assigns a new array type based on the max access of
+    unsized array members. This is to support arrays of arrays.
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 7ecb11c81c1e2fc816b36c82657ab139eb1d84b6
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Sun Feb 22 23:35:43 2015 +1100
+
+    glsl: update assert to support arrays of arrays
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 9565e345285c71af064e2bb5e0ee762655310802
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Wed May 27 12:02:40 2015 +1000
+
+    glsl: allow precision qualifiers for AoA
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit f70719cc4b64e12310dfe8825a8e2d4bce970673
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Jul 3 19:09:09 2015 -0400
+
+    nv50/ir: UCMP arguments are float, so make sure modifiers are applied
+    
+    The first argument to UCMP needs to be compared against 0, but the
+    latter arguments are treated as float and need to be able to properly
+    apply neg/abs arguments. Adjust the inferSrcType function accordingly.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 83984f134b4a1e2829cb238c404bc82c98be6082
+Author: Erik Faye-Lund <kusmabite@gmail.com>
+Date:   Fri Jul 3 09:46:01 2015 +0200
+
+    glsl: add a missing call to _mesa_locale_init
+    
+    After c61bc6e ("util: port _mesa_strto[df] to C"), "make check"
+    fails due to a missing _mesa_locale_init. Fixup this oversight,
+    by moving the stand-alone compiler initializer inside
+    initialize_context_to_defaults().
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
+
+commit 28dda47ae4d974e3e032d60e8e0965c8c068c6d8
+Author: Mario Kleiner <mario.kleiner.de@gmail.com>
+Date:   Sun Jun 28 03:02:31 2015 +0200
+
+    winsys/radeon: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads.
+    
+    Same problem and fix as for nouveau's ZaphodHeads trouble.
+    
+    See patch ...
+    
+    "nouveau: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads."
+    
+    ... for reference.
+    
+    Cc: "10.3 10.4 10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 97ec2c694fe568e375ec7a2b85c1acb1e4666b54
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jul 3 16:20:32 2015 +0200
+
+    r600g: disable single-sample fast color clear due to hangs
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73528
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82186
+    
+    Cc: 10.4 10.5 10.6 <mesa-stable@lists.freedesktop.org>
+
+commit 7744687ddb7f1b223da6a862c282173123921023
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Apr 29 17:57:46 2015 +0200
+
+    docs/relnotes: document create_context_robustness extensions
+
+commit 914365c0eb039f66370cff166428c703e02ad510
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Apr 29 15:27:50 2015 +0200
+
+    r600g,radeonsi: implement get_device_reset_status
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit a34e8714491022a2efde8a44972ac582f098b7ad
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Jun 10 02:50:42 2015 +0200
+
+    dri/common: allow BGRX sRGB visuals
+
+commit 9e127325ef461a11345df7ba6884e77c7168ab37
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed Jun 10 02:53:33 2015 +0200
+
+    mesa: fix sRGB rendering for GLES1
+
+commit 32aa1d769de070c4e8756922571c35deaf12a40a
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Jun 9 23:08:57 2015 +0200
+
+    egl: sort extension lists alphabetically
+    
+    and add the missing KHR_gl_colorspace case.
+
+commit b193f2b9b6ae4d071e2cdef62d4398fec5d9aad8
+Author: Anatoli Antonovitch <anatoli.antonovitch@amd.com>
+Date:   Wed Jun 10 14:42:31 2015 +0200
+
+    egl: implement EGL_KHR_gl_texture_3D_image
+    
+    Most of the code has been in place already.
+
+commit a84505c71920f2c70bc8d83cee3e223cd2d976ad
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Jul 2 15:38:34 2015 -0400
+
+    freedreno/ir3: don't be confused by eliminated indirects
+    
+    If an instruction using address register value gets eliminated, we need
+    to remove it from the indirects list, otherwise it causes mayhem in
+    sched for scheduling address register usage.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 2215ff2a5d5f1df5791399e1ff78b56bf06e9102
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Jul 2 14:59:08 2015 -0400
+
+    freedreno/ir3: sched fixes for addr register usage
+    
+    A handful of fixes and cleanups:
+    
+    1) If we split addr/pred, we need the newly created instruction to
+       end up in the unscheduled_list
+    2) Avoid scheduling a write to the address register if there is no
+       instruction using the address register that is otherwise ready
+       to schedule.  Note that I currently don't bother with the same
+       logic for predicate register, since the only instructions using
+       predicate (br/kill) don't take any other src registers, so this
+       situation should not arise.
+    3) few other cosmetic cleanups
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 6b9f5cd5f7b25e9e03104fe279df74817f69fe87
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Jul 2 13:52:38 2015 -0400
+
+    freedreno/ir3: fix indirects tracking
+    
+    cp would update instr->address but not update the indirects array
+    resulting in sched getting confused when it had to 'spill' the address
+    register.  Add an ir3_instr_set_address() helper to set instr->address
+    and also update ir->indirects, and update all places that were writing
+    instr->address to use helper instead.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 0a155538eb7e7870b99fb8b3fd8e2a268361d2c8
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat Jun 27 17:38:57 2015 -0400
+
+    gallium/ttn: mark location specially in nir for color0-writes-all
+    
+    We need to distinguish a shader that has separate writes to each MRT
+    from one which is supposed to write the data from MRT 0 to all the MRTs.
+    In TGSI this is done with a property. NIR doesn't have that, so encode
+    it as a funny location and decode on the other end.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 959b47262b339ad6d1a072c17a1abe9735ead41d
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jun 26 15:05:32 2015 -0400
+
+    nir/lower_phis_to_scalar: undef is trivially scalarizable
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
+
+commit 29addf50e038d7323a7ac8093d93422c28ad8635
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jun 26 19:11:53 2015 -0400
+
+    gallium/ttn: IN/OUT are only array if ArrayID != 0
+    
+    Fixes issue with gallium HUD.  See this thread for details:
+    http://lists.freedesktop.org/archives/mesa-dev/2015-June/087140.html
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit fc73f8ab8cd3975993546b5e0312d595b76d03be
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jun 26 19:04:39 2015 -0400
+
+    tgsi: update docs for ArrayID usage
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 7abc1e3286bc4729e144d3a247c2a275e46aaf53
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Thu Jul 2 17:49:19 2015 +0100
+
+    i965/fs: Don't disable SIMD16 when using the pixel interpolator
+    
+    There was a comment saying that in SIMD16 mode the pixel interpolator
+    returns coords interleaved 8 channels at a time and that this requires
+    extra work to support. However, this interleaved format is exactly
+    what the PLN instruction requires so I don't think anything needs to
+    be done to support it apart from removing the line to disable it and
+    to ensure that the message lengths for the send message are correct.
+    
+    I am more convinced that this is correct because as it says in the
+    comment this interleaved output is identical to what is given in the
+    thread payload. The code generated to apply the plane equation to
+    these coordinates is identical on SIMD16 and SIMD8 except that the
+    dispatch width is larger which implies no special unmangling is
+    needed.
+    
+    Perhaps the confusion stems from the fact that the description of the
+    PLN instruction in the IVB PRM seems to imply that the src1 inputs are
+    not interleaved so it wouldn't work. However, in the HSW and BDW PRMs,
+    the pseudo-code is different and looks like it expects the interleaved
+    format. Mesa doesn't seem to generate different code on IVB to
+    uninterleave the payload registers and everything is working so I can
+    only assume that the PRM is wrong.
+    
+    I tested the interpolateAt tests on HSW and did a full Piglit run on
+    IVB on there were no regressions.
+    
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 89bd5ee64c5aa1b977f4ba832cf7772e81ee286d
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Wed Jul 1 16:00:08 2015 -0700
+
+    nir: Don't allow copying SSA destinations
+    
+    Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
+
+commit 197a19f9ed0ba12cc431542ac09f2af0a8bd0bce
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Jul 1 18:22:23 2015 -0400
+
+    mesa/prog: relative offsets into constbufs are not constant
+    
+    The optimization logic relies on being able to read out constbuf values
+    from program parameters. However that only works if there's no relative
+    addressing involved.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91173
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit fe2b748a39ff676949fcefccf739aff967fc38c5
+Author: Mike Stroyan <mike@lunarg.com>
+Date:   Wed Jul 1 10:16:28 2015 -0600
+
+    i965: allocate at least 1 BLEND_STATE element
+    
+    When there are no color buffer render targets, gen6 and gen7 still
+    use the first BLEND_STATE element to determine alpha test.
+    gen6_upload_blend_state was allocating zero elements when
+    ctx->Color.AlphaEnabled was false.
+    That left _3DSTATE_CC_STATE_POINTERS or _3DSTATE_BLEND_STATE_POINTERS
+    pointing to random data from some previous brw_state_batch().
+    That sometimes suppressed depth rendering when those bits
+    happened to mean COMPAREFUNC_NEVER.
+    This produced flickering shadows for dota2 reborn.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=80500
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 9d408a41a3ab2fe456ebf2f7af7bad8f6c4bca17
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Mon Jun 29 10:44:52 2015 +0200
+
+    mesa/st: Add checks for signed/unsigned integer conversions in ReadPixels
+    
+    These checks were in Mesa prior to commit fbba25bba, but they were
+    not necessary for the purpose that Mesa intended (check if we could
+    resolve ReadPixels via memcpy), so that commit took them away.
+    
+    Unfortunately, it seems that some Gallium drivers rely on these
+    checks to make the decision of whether they should fallback to Mesa's
+    implementation of ReadPixels correctly. Michel Dänzer reported that
+    the following piglit test would fail on radeonsi after commit
+    fbba25bba:
+    
+    spec@ext_texture_integer@fbo_integer_readpixels_sint_uint
+    
+    This patch puts the checks back in Gallium, where they are needed.
+    
+    Tested-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit c3215ef204c0fdfc44230adbd423720169d44dcb
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Jul 2 00:13:36 2015 -0400
+
+    nv50/ir: don't emit src2 in immediate form
+    
+    In the immediate form, src2 == dst, so it does not need to be emitted.
+    Otherwise it overlaps with the immediate value's low bits.
+    
+    Fixes: 09ee907266 (nv50/ir: Fold IMM into MAD)
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 1087c566e3496d08fe70bc0725073e3022716dc5
+Author: Alexandre Courbot <acourbot@nvidia.com>
+Date:   Thu Jul 2 11:36:55 2015 +0900
+
+    nvc0: tune PREFER_BLIT_BASED_TEXTURE_TRANSFER capability
+    
+    Prefer blit-based texture transfers only if the chip has dedicated VRAM
+    since it would translate to a copy into the same memory on shared-memory
+    chips.
+    
+    Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
+    Reported-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 4f57cdba2767b56eb4752f14ba9853ba6bc06d0e
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Jul 1 15:18:47 2015 -0400
+
+    mesa: reset the source packing when creating temp transfer image
+    
+    Commit 4b249d2ee (mesa: Handle transferOps in texstore_rgba) introduced
+    proper transferops handling, but in updating the source to the newly
+    allocated temporary image neglected to reset the source packing. Set it
+    to the default which should be appropriate for the floats used.
+    
+    Fixes: 4b249d2ee (mesa: Handle transferOps in texstore_rgba)
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91173
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit e212a80db37b0fc9d57beb91dbca1c43ae4476a0
+Author: Alexandre Courbot <acourbot@nvidia.com>
+Date:   Tue Jun 30 22:37:40 2015 +0900
+
+    nvc0: create screen fence objects with coherent attribute
+    
+    This is required on non-coherent architectures to ensure the value of
+    the fence is correct at all times. Failure to do this results in the
+    display freezing for a few seconds every now and then on Tegra.
+    
+    The NOUVEAU_BO_COHERENT is a no-op for coherent architectures, so behavior
+    on x86 should not be affected by this patch.
+    
+    Also bump the required libdrm version to 2.4.62, which introduced this
+    flag.
+    
+    Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
+    Reviewed-by: Martin Peres <martin.peres@free.fr>
+
+commit 2c8f251369072ce382f651ba73ca280517d26e7f
+Author: Nanley Chery <nanley.g.chery@intel.com>
+Date:   Wed Jun 24 10:59:13 2015 -0700
+
+    i965/gen9: use an unreserved surface alignment value
+    
+    Although the horizontal and vertical alignment fields are ignored here,
+    0 is a reserved value for them and may cause undefined behavior. Change
+    the default value to an abitrary valid one.
+    
+    v2: add comment about chosen value (Topi).
+    
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
+
+commit 80fc9c01dfe4cbbcf1c6b101fcdfdecbda63131e
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Tue Jun 30 17:04:52 2015 -0700
+
+    i965/fs: Use the builder directly for the gen6 interpolation add(32)
+    
+    Now that we can create builders with a bigger width than their parent as
+    long as it's exec_all, we don't need to create the instruction manually.
+    
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit dabec9c293ee29335f5a6d5d1d3c2b7a715605c1
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Tue Jun 30 15:15:44 2015 +0300
+
+    i965/fs: Relax fs_builder channel group assertion when force_writemask_all is on.
+    
+    This assertion was meant to catch code inadvertently escaping the
+    control flow jail determined by the group of channel enable signals
+    selected by some caller, however it seems useful to be able to
+    increase the default execution size as long as force_writemask_all is
+    enabled, because force_writemask_all is an explicit indication that
+    there is no longer a one-to-one correspondence between channels and
+    SIMD components so the restriction doesn't apply.
+    
+    In addition reorder the calls to fs_builder::group and ::exec_all in a
+    couple of places to make sure that we don't temporarily break this
+    invariant in the future for instructions with exec_size higher than
+    the dispatch width.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 8276ba260e5500664b8d8748f3224f73ef221887
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Jul 1 03:47:41 2015 -0400
+
+    nouveau: rename var name for nouveau_vieux to avoid conflict with nouveau
+    
+    We want to require different versions for nouveau and nouveau_vieux.
+    autoconf will only check for NOUVEAU once if both drivers are enabled,
+    meaning both version checks don't get executed. Rename the nouveau_vieux
+    one to NVVIEUX to avoid the issue.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Tested-by: Alexandre Courbot <acourbot@nvidia.com>
+    Tested-by: Martin Peres <martin.peres@free.fr>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit f045b8b2ff5ac75da3e092f482fd1717571d8462
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Mon Jun 29 15:23:45 2015 +0300
+
+    glsl: create program resource list after LinkShader
+    
+    Resource list can be created properly  only after LinkShader hook
+    has been called to make sure all dead variables have been removed.
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Martin Peres <martin.peres@linux.intel.com>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90925
+
+commit 73afa31f07fe4af605088f6590edc4227652c482
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Mon Jun 29 14:39:05 2015 +0300
+
+    glsl: expose build_program_resource_list function
+    
+    This is required so that we can move resource list creation
+    to happen later.
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Martin Peres <martin.peres@linux.intel.com>
+
+commit ccaf37f4496eb836866c9daacf21f1f5ac8c6d66
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Mon Jun 29 14:19:00 2015 +0300
+
+    glsl: build stageref mask using IR, not symbol table
+    
+    Instead of using symbol table, build mask by inspecting IR. This
+    change is required by further patches to move resource list creation
+    to happen later when symbol table does not exist anymore.
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Martin Peres <martin.peres@linux.intel.com>
+
+commit 19ea623586aacc995b3f4a1a3ea321ead12dc43c
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 29 16:58:17 2015 +0800
+
+    ilo: remove ilo_image_params
+    
+    It suffices to use ilo_image_layout directly.
+
+commit b4c66e4d3eadc04bdffbf4821636299bc49c89a4
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 29 16:51:46 2015 +0800
+
+    ilo: add image_init_gen6_transfer_layout()
+    
+    It replaces img_init_for_transfer().
+
+commit 3c6af396f9526bdc8351ff61bcc6c42a3892e6b8
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 29 16:46:34 2015 +0800
+
+    ilo: add image_set_gen6_bo_size()
+    
+    It replaces img_calculate_bo_size().
+
+commit 0896d629fded96178daa79c393ba4dae0d56f2ff
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 29 16:42:04 2015 +0800
+
+    ilo: add image_set_gen6_{hiz,mcs}
+    
+    They replace img_calculate_{hiz,mcs}_size().
+
+commit 0da3b732ad156f63e32e7520bc1af97e1b733be7
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 29 16:38:49 2015 +0800
+
+    ilo: add image_get_gen6_monolithic_size()
+    
+    It replaces img_align().
+
+commit 0faeb21dc0c029b345eaf5545b17b97d5fb8d8da
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 29 16:25:32 2015 +0800
+
+    ilo: add image_get_gen6_lods()
+    
+    It replaces img_init_lods() and img_init_layer_height().
+
+commit f1946546c7d4ac22799a8b4944d6c36b77e22626
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 29 16:16:11 2015 +0800
+
+    ilo: add image_get_gen{6,7}_alignment()
+    
+    They replace img_init_alignments().
+
+commit c88e6cdfbfd7a7727dbae6b47a803b18aca5d9f4
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 29 16:14:36 2015 +0800
+
+    ilo: add image_get_gen6_{hiz,mcs}_enable()
+    
+    They replace img_init_aux().
+
+commit c3b205dbeba9534e0cf707ddd9c075170ccad1bf
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 29 16:11:09 2015 +0800
+
+    ilo: add image_get_gen6_tiling()
+    
+    It replaces img_init_tiling().
+
+commit 9e13f5c85f23ff67e685b41a4d439fc443de2dd0
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 29 16:02:52 2015 +0800
+
+    ilo: add image_get_gen6_layout()
+    
+    It replaces only img_init_walk() right now.  It will replace all img_init_*().
+
+commit 5dcb28c3d26828ed1b0e2bd5a0589c5baab04b85
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Jul 1 02:11:39 2015 -0400
+
+    nv50/ir: copy joinAt when splitting both before and after
+    
+    The current implementation only moves the joinAt when splitting after
+    the given instruction, not before it. So if you have a BB with
+    
+      foo
+      instr
+      bar
+      joinat
+    
+    and thus with joinAt set, we end up first splitting before instr, at
+    which point the instr's bb is updated to the new bb. Since that bb
+    doesn't have a joinAt set (despite containing one), when splitting after
+    the instr, there is nothing to copy over. Since the joinat will be in
+    the "split" bb irrespective of whether we're splitting before or after
+    the instruction, move it over in either case.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91124
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 4caaa2681e727fa6405ff6de6d1f6c6a356ede34
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Jun 29 17:11:59 2015 +1000
+
+    docs: update for llvmpipe fp64 support
+    
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit e35c5717837d9ac6d9722b011852bdf187f29776
+Author: Dave Airlie <airlied@gmail.com>
+Date:   Sat Jun 27 14:21:54 2015 +1000
+
+    gallivm: add fp64 support. (v2.1)
+    
+    This adds support for ARB_gpu_shader_fp64 and ARB_vertex_attrib_64bit to
+    llvmpipe.
+    
+    Two things that don't mix well are SoA and doubles, see
+    emit_fetch_double, and emit_store_double_chan in this.
+    
+    I've also had to split emit_data.chan, to add src_chan,
+    which can be different for doubles.
+    
+    It handles indirect double fetches from temps, inputs, constants
+    and immediates. It doesn't handle double stores to indirects,
+    however it appears the mesa/st doesn't currently emit these,
+    it always does UARL/MOV combos, which will work fine.
+    
+    tested with piglit, no regressions, all the fp64 tests seem to pass.
+    
+    v2:
+    switch to using shuffles for fetch/store (Roland)
+    assert on indirect double stores - mesa/st never emits these (it uses MOV)
+    fix indirect temp/input/constant/immediates (Roland)
+    typos/formatting fixes (Roland)
+    
+    v2.1:
+    cleanup some long lines, emit_store_double_chan cleanups.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 5ccd61217d873567b8d9a7a0fa8f678522ec78cb
+Author: Dave Airlie <airlied@gmail.com>
+Date:   Sat Jun 27 14:21:27 2015 +1000
+
+    tgsi: add infer support for double opcodes.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 1de93f94991c41081c3d9e01c2097401970f4095
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Tue Jun 23 07:53:24 2015 +1000
+
+    freedreno: use consistent version string format
+    
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit 5afed936fea56a60300c6ed1228eaccf60c8cbd6
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Tue Jun 23 07:47:58 2015 +1000
+
+    glsl: use consistent version string format
+    
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit ebe3043eeacb073c7dbb6162d8f0aee3bc66eeb1
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Tue Jun 30 17:47:53 2015 -0700
+
+    i965/fs: Fix PIXEL_X/Y in regs_read()
+    
+    PIXEL_X/Y takes a vec2 in the first argument
+
+commit 830f67046ace3c0b95a7f093fe373eeb417a1aad
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 12:44:35 2015 -0700
+
+    i965/fs: Remove the width field from fs_reg
+    
+    As of now, the width field is no longer used for anything.  The width field
+    "seemed like a good idea at the time" but is actually entirely redundant
+    with the instruction's execution size.  Initially, it gave us the ability
+    to easily set the instructions execution size based entirely on register
+    widths.  With the builder, we can easiliy set the sizes explicitly and the
+    width field doesn't have as much purpose.  At this point, it's just
+    redundant information that can get out of sync so it really needs to go.
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 7f77abc9edf1348b8c6b82dfff102896cd4a2a58
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 13:57:37 2015 -0700
+
+    i965/fs_generator: Use inst->exec_size for determining hardware reg widths
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 83458e7c53cfc1f344280da6eb9a3b4e2dfdbc00
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 13:49:22 2015 -0700
+
+    i965/fs: Use exec_size instead of dst.width for computing component size
+    
+    There are a variety of places where we use dst.width / 8 to compute the
+    size of a single logical channel.  Instead, we should be using exec_size.
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 9a0c883292cf48910a32634f7cc8b855e08c09d5
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 25 11:00:01 2015 -0700
+
+    i965/fs: Use the builder dispatch_width for computing register offsets
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 21803b7b3304f053a48e313951ffddf1d2cd0bd9
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 13:41:38 2015 -0700
+
+    i965/fs: Use the builder dispatch width instead of dst.width for pull constants
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit c9676329dd6c69b2e0b12405c3b4078f7d216f2f
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 12:34:52 2015 -0700
+
+    i965/fs: Remove exec_size guessing from fs_inst::init()
+    
+    Now that all of the non-explicit constructors are gone, we don't need to
+    guess anymore.
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit b624ccc206cbf19989c6562416d7c21b66270577
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 12:51:51 2015 -0700
+
+    i965/fs_builder: Use the dispatch width for setting exec sizes
+    
+    Previously we used dst.width but the two *should* be the same.
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 500525e96019aff551afa8fee841d00ca9ec4c4f
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 12:50:09 2015 -0700
+
+    i965/fs: Use exec_size for determining regs read/written and partial writes
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 89bc4c78c394e50ddb16cc089bd3ec90681342d7
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 12:30:43 2015 -0700
+
+    i965/fs: Remove fs_inst constructors that don't take an explicit exec_size
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 67c4c9e1a709508b88d6d31eb1f7cb61d187189e
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 12:24:27 2015 -0700
+
+    i965/fs: Make better use of the builder in shader_time
+    
+    Previously, we were just depending on register widths to ensure that
+    various things were exec_size of 1 etc.  Now, we do so explicitly using the
+    builder.
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit f7dcc1160331462a071c54ca1067f9e2f57b55be
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 12:07:27 2015 -0700
+
+    i965/fs: Add a builder argument to offset()
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 7fcbe141076d18bf0245de1fd589c82f7c543fdf
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 25 10:55:51 2015 -0700
+
+    i965/fs: Move offset(fs_reg, unsigned) to brw_fs.h
+    
+    Shortly, offset() will depend on the builder so we need it moved to some
+    place where it has access to that.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igali.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit b535ba55ed6023f402374aeff79f9f37dbb21df0
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 12:00:54 2015 -0700
+
+    i965/blorp: Explicitly set execution sizes for new'd instructions
+    
+    This doesn't affect instructions allocated using the builder.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 362eff7741f9ca6e49074509120a2e6c03ef7ae6
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 15:58:59 2015 -0700
+
+    i965/fs: Set the builder group for emitting FB-write stencil/AA alpha
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 438e9c8b88c8faf7cbc2a20b03c077342be214e3
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Wed Jun 17 17:32:24 2015 -0700
+
+    i965/fs: Explicitly set the exec_size on the add(32) in interpolation setup
+    
+    Soon we will start using the builder to explicitly set all the execution
+    sizes.  We could make a 32-wide builder, but the builder asserts that we
+    never grow it which is usually a reasonable assumption.  Since this one
+    instruction is a bit of an odd-ball, we just set the exec_size explicitly.
+    
+    v2: Explicitly new the fs_inst instead of using the builder and setting
+        exec_size after the fact.
+    
+    v3: Set force_writemask_all with the builder instead of directly.  The
+        builder over-writes it if we set it manually.  Also, if we don't have
+        force_writemask_all in the builder it will assert-fail on SIMD32.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit c5a8da5f24eae4479b4ebe6301d780f781e24ed2
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Tue Jun 30 15:51:13 2015 -0700
+
+    i965/fs: Properly handle LOAD_PAYLOAD in fs_inst::regs_read
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 12bc22ef58377191508af91a918efd18e2da7500
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 17:48:27 2015 -0700
+
+    i965/fs: Report the right value in fs_inst::regs_read() for PIXEL_X/Y
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit aca5228011e7b9e96f3bd3a621c88e63ba47a4f3
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Wed Jun 17 18:02:11 2015 -0700
+
+    i965/fs: Fix fs_inst::regs_read() for uniform pull constant loads
+    
+    Previously, fs_inst::regs_read() fell back to depending on the register
+    width for the second source.  This isn't really correct since it isn't a
+    SIMD8 value at all, but a SIMD4x2 value.  This commit changes it to
+    explicitly be always one register.
+    
+    v2: Use mlen for determining the number of registers read
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 241317d59ab440bdcda25bacaadacfb3b4c2dd93
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Fri Jun 19 12:58:37 2015 -0700
+
+    i965/fs: Actually set/use the mlen for gen7 uniform pull constant loads
+    
+    Previously, we were allocating the payload with different sizes per gen and
+    then figuring out the mlen in the generator based on gen.  This meant,
+    among other things, that the higher level passes knew nothing about it.
+    
+    Acked-by: Francisco Jerez <currojerez@riseup.net>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 3258e1b80d66ec26f14a24a5eae0629a2d23a444
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 18 11:53:08 2015 -0700
+
+    i965/fs: Use a switch statement in fs_inst::regs_read()
+    
+    This makes things a little simpler, more efficient, and quite a bit more
+    readable.
+    
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit aa7d4cecec1a1236d237b83ebf035285f438ee67
+Author: Connor Abbott <cwabbott0@gmail.com>
+Date:   Wed Jun 24 12:55:41 2015 -0700
+
+    nir: remove parent_instr from nir_register
+    
+    It's no longer used.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit f49e51ef44ac6400967731b75db871129b6c45f5
+Author: Connor Abbott <cwabbott0@gmail.com>
+Date:   Wed Jun 24 12:43:15 2015 -0700
+
+    nir: remove nir_src_get_parent_instr()
+    
+    It's now unused.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 0ecdf04060518149e99a098caf4f6025fd6482a4
+Author: Connor Abbott <cwabbott0@gmail.com>
+Date:   Thu Jun 25 16:22:26 2015 -0700
+
+    i965/fs: emit constants only once
+    
+    Before, we would lazily emit a MOV whenever we encountered a use of a
+    constant. Now that we have a dedicated file for SSA values, we can
+    instead only emit the MOV's once, which is more consistent and prevents
+    us from relying on CSE to re-combine the constants when they aren't
+    absorbed into the instruction.
+    
+    total instructions in shared programs: 6078991 -> 6073118 (-0.10%)
+    instructions in affected programs:     402221 -> 396348 (-1.46%)
+    helped:                                1527
+    HURT:                                  0
+    GAINED:                                8
+    LOST:                                  2
+    
+    v2: split this out from the previous commit (Jason)
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 864907e2f14523c130e6ff24c081789bb079bae1
+Author: Connor Abbott <cwabbott0@gmail.com>
+Date:   Wed Jun 24 12:28:47 2015 -0700
+
+    i965/fs: use SSA values directly
+    
+    Before, we would use registers, but set a magical "parent_instr" field
+    to indicate that it was actually purely an SSA value (i.e., it wasn't
+    involved in any phi nodes). Instead, just use SSA values directly, which
+    lets us get rid of the hack and reduces memory usage since we're not
+    allocating a nir_register for every value. It also makes our handling of
+    load_const more consistent compared to the other instructions.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 2b1a1d8b1294f91b7ac563da1f395deba4384765
+Author: Connor Abbott <cwabbott0@gmail.com>
+Date:   Wed Jun 24 05:28:34 2015 -0700
+
+    nir/from_ssa: add a flag to not convert everything from SSA
+    
+    We already don't convert constants out of SSA, and in our backend we'd
+    like to have only one way of saying something is still in SSA.
+    
+    The one tricky part about this is that we may now leave some undef
+    instructions around if they aren't part of a phi-web, so we have to be
+    more careful about deleting them.
+    
+    v2: rename and flip meaning of flag (Jason)
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit af2aea40d29dffd5e584432e0652db114113469b
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 18 20:39:28 2015 +0100
+
+    egl/x11: handle when invalid drawable is passed in create_surface
+    
+    0 is not used as a valid drawable id, as such there is no point in
+    attempting to query its geometry. Just bail out early and provide the
+    more meaningful EGL_BAD_NATIVE_WINDOW to the user.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 4ea5223a95436b76a3f808732c565e9833f84551
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 18 20:22:54 2015 +0100
+
+    egl/wayland: cleanup dri2_wl_create_surface error path
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 0afa6335079093627b47ff08da38bed00972c217
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 18 20:19:32 2015 +0100
+
+    egl/wayland: handle NULL native_window in create_surface
+    
+    Raise EGL_BAD_NATIVE_WINDOW instead of crashing.
+    
+    v2: s/Rise/Raise/ (spotted by Michel)
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 6098ef824467f685fb34914eb3fef73b3ba18c6f
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 18 20:16:46 2015 +0100
+
+    egl/drm:  plug memory leak
+    
+    Free the memory for dri2_surf in the unlikely case that one provides
+    NULL for native_window. Also set the relevant EGL_ERROR to provide
+    feedback to the user.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 879dcf07f6a3ab56f23d540b0df94c57e0706094
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jun 26 14:24:08 2015 -0400
+
+    gallium/ttn: don't upset nir_validate w/ BRK's
+    
+    Previously we were unconditionally doing ttn_get_src() even for
+    instructions with no src's.  Which created a lot of unnecessary
+    load_const instructions.  These were mostly harmless since NIR opt
+    passes would strip them back out.  But for an ENDIF following a
+    BRK, it would result in load_const instructions created after the
+    NIR break instruction.  Which nir_validate dislikes.
+    
+    But we can actually just dtrt by using NumSrcRegs instead.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit d1f0e019797863b23388bfef53a77f659f749d3c
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jun 26 13:48:29 2015 -0400
+
+    gallium/ttn: add TXB2
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 6082515de7c7b4885bd685d88aee32fc9e5103a1
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat Jun 27 09:58:28 2015 -0400
+
+    gallium/ttn: partial fix for output arrays
+    
+    It isn't quite yet practical to enable TGSI_ANY_INOUT_DECL_RANGE shader
+    cap yet, at least not in drivers that need lower_to_scalar pass (which
+    right now is all of the ttn users), since the register arrays do not get
+    converted to SSA, which angers nir_lower_alu_to_scalar.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit dc7e6463d3ec6980f1517ff10048e0dbf5bb38ad
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat Jun 27 10:07:18 2015 -0400
+
+    nir: cleanup open-coded instruction casts
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+
+commit 00b6b41482985ba4a81fbb479a47c06ec83f3797
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon Jun 29 14:49:08 2015 -0400
+
+    freedreno/ir3: cache defining instruction
+    
+    It is silly to traverse back to find first instruction that writes part
+    of a larger "virtual" register many times per instruction (plus per use
+    as a src to later instructions).  Cache this information so we only
+    figure it out once.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 906da495272b1be4c278f5f7402594e3c52521c1
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sun Jun 28 11:13:58 2015 -0400
+
+    freedreno/ir3: fix RA issue with fanin
+    
+    The fanin source could be grouped, for example with shaders like:
+    
+        VERT
+        DCL IN[0]
+        DCL IN[1]
+        DCL OUT[0], POSITION
+        DCL OUT[1], GENERIC[9]
+        DCL SAMP[0]
+        DCL SVIEW[0], 2D, FLOAT
+        DCL TEMP[0], LOCAL
+          0: MOV TEMP[0].xy, IN[1].xyyy
+          1: MOV TEMP[0].w, IN[1].wwww
+          2: TXF TEMP[0], TEMP[0], SAMP[0], 2D
+          3: MOV OUT[1], TEMP[0]
+          4: MOV OUT[0], IN[0]
+          5: END
+    
+    The second arg to the isaml is IN[1].w, so we need to look at the fanin
+    source to get the correct offset.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit db5105b4b35e064f3934154b45de15422a1bdb0a
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon Jun 29 10:21:08 2015 -0400
+
+    freedreno/ir3: add ir3_shader_disasm()
+    
+    Split out most of dump_info() from ir3_cmdline compiler into a function
+    that can be used both by cmdline compiler and also for the disasm debug
+    option.  This way, for FD_MESA_DEBUG=disasm we also get to see intput/
+    output registers, etc.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 3244195f48affec1d3c2eb5d0e267c75b046db9f
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jun 26 13:55:49 2015 -0400
+
+    freedreno/a4xx: fix for sparse-samplers
+    
+    Some piglit tests, like arb_fragment_program-sparse-samplers, result in
+    having a null samp#0 but valid samp#1.
+    
+    TODO: a3xx probably needs similar fix
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 0a8c8fa770db4cc4ef3db89a5dae1d136361495d
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jun 26 13:38:03 2015 -0400
+
+    freedreno/ir3: fix crash in fail path
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 1370fde8af1b0b5c5e6204c0dea6ebffb85dce0a
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jun 26 14:32:08 2015 -0400
+
+    freedreno/ir3: fix crash in RA
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit bb2c4b68f78f0105088c11408f8902fb22802125
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jun 26 10:52:34 2015 -0400
+
+    freedreno/ir3: fixes for indirect writes
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 01b5f1336330f1c0f937fb08a444efc593b43435
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Jun 24 18:57:22 2015 -0400
+
+    freedreno/ir3: fix constlen in case of load_uniform_indirect
+    
+    We can't rely on what we get from the assembler if we have indirect
+    addressing of constant file, since the assembler doesn't know the array
+    index.  This got lost in the transition to NIR.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 9350ea6979c48772e1fb55d4f1c7c5a3cfa987b0
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Tue May 19 15:01:49 2015 +0300
+
+    glsl: validate sampler array indexing for 'constant-index-expression'
+    
+    Desktop GLSL < 130 and GLSL ES < 300 allow sampler array indexing where
+    index can contain a loop induction variable. This extra check will warn
+    during linking if some of the indexes could not be turned in to constant
+    expressions.
+    
+    v2: warning instead of error for backends that did not enable
+        EmitNoIndirectSampler option (have dynamic indexing)
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+    Cc: "10.5" and "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit f17c8c287f3581fccb52714fbd4b2ea09a58e3d3
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Mon Jun 29 09:48:52 2015 +0300
+
+    mesa/st: use EmitNoIndirectSampler if !ARB_gpu_shader5
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Cc: "10.5" and "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 2dc2b12ed15abb84c7e2b3c2726dcc1b735abcda
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Mon Jun 29 09:53:45 2015 +0300
+
+    i915: use EmitNoIndirectSampler
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+    Cc: "10.5" and "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 8852e26e93af1fc4b72bf9d57e847f53e1a1371b
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Wed Jun 24 13:22:43 2015 +0300
+
+    i965: use EmitNoIndirectSampler for gen < 7
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+    Cc: "10.5" and "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit e4512e1581cf90f56d13cfa6a809832ef3517283
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Tue Jun 9 13:33:39 2015 +0300
+
+    mesa/glsl: new compiler option EmitNoIndirectSampler
+    
+    Patch provides new compiler option for backend to force unroll loops
+    that have non-constant expression indexing on sampler arrays.
+    
+    This makes sure that we can never end up with a shader that uses loop
+    induction variable as sampler array index but does not unroll because
+    of having too much instructions. This would not work without dynamic
+    indexing support.
+    
+    v2: change option name as EmitNoIndirectSampler
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+    Cc: "10.5" and "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit edb8383c98ee23385731d0fc23a6b6673528a8ec
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Tue Jun 9 13:28:44 2015 +0300
+
+    glsl: Allow dynamic sampler array indexing with GLSL ES < 3.00
+    
+    Dynamic indexing of sampler arrays is prohibited by GLSL ES 3.00.
+    Earlier versions allow 'constant-index-expression' indexing, where
+    index can contain a loop induction variable.
+    
+    Patch allows dynamic indexing for sampler arrays when GLSL ES < 3.00.
+    This change makes 'sampler-array-index.frag' parser test in Piglit
+    pass + fishgl.com works when running Chrome on OpenGL ES 2.0 backend
+    
+    v2: small change and some more commit message (Tapani)
+    v3: refactor checks to make it more readable (Ian Romanick)
+    v4: change warning comment in GLSL ES case (Curro)
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Signed-off-by: Kalyan Kondapally <kalyan.kondapally@intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+    Cc: "10.5" and "10.6" <mesa-stable@lists.freedesktop.org>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=84225
+
+commit d5f1253b0c4637ad996fd0da45095165006d61d3
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Tue Jun 30 02:46:26 2015 -0400
+
+    nv50/ir: fix emission of address reg in 3rd source
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91056
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 21b7c58b8a0cbf18c9ed90c260f01d00fefe0db2
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Tue Jun 23 23:57:31 2015 -0700
+
+    i965: Don't use GCC extension for ?: with only two operands.
+    
+    From the "apparently I don't know C" files...GCC apparently supports:
+    
+        x ?: y
+    
+    which is equivalent to
+    
+        x ? x : y
+    
+    except that it doesn't cause side-effects to occur twice.  See:
+    https://gcc.gnu.org/onlinedocs/gcc/Conditionals.html#Conditionals
+    
+    This was confusing and looked like a typo.  It doesn't really buy us
+    anything, so just write the obvious code in normal C.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit e22e0de0d7c3a412bdd53c6d53825b7646624e3d
+Author: Alexander von Gluck IV <kallisti5@unixzen.com>
+Date:   Mon Jun 29 23:29:44 2015 -0500
+
+    egl/haiku: fix Mesa build under Haiku
+    
+    Performing a goto crosses the initialization of 'BWindow* win'
+    breaking the build. We also fix a missing semicolon.
+
+commit 089e7c378838e7972d2c0588bb84a316fb929a59
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Jun 29 21:58:54 2015 -0400
+
+    nv30: align transfer stride to 64, required by blit, sifm transfer impls
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit dacf9efd6326bed1166750680bfaa4e173315eba
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Jun 29 21:58:11 2015 -0400
+
+    nv30: allow vertex state creation with 0 elements
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit bad107f2ec24b16118f4d99c54b853277b1a966d
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Jun 29 02:38:38 2015 -0400
+
+    nv30: reset fragprog bufctx at bind time
+    
+    A clear will do a partial validate, which will in turn reference all the
+    buffers in the bufctx again. However the fragprog last validated might
+    have already been deleted. So reset the bufctx when updating state.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit b875198f1f0b7c90bcb22511c0050b06d8a33ac4
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Jun 29 02:16:23 2015 -0400
+
+    nv30: modernize fp upload logic
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 54afb10f0e0a3b72a977c239c0aee04ea5dec967
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Jun 29 22:04:50 2015 -0400
+
+    nv30: provide a minimum map buffer alignment
+    
+    Otherwise we return 0, which is out of spec. Return 64 like all the
+    other nouveau drivers.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 3df5aaaa158bfb878e9e5ce467dd654466942880
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Wed May 27 19:28:34 2015 -0700
+
+    i965/skl: Extract the blit command setup in to a helper
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 412c8c8e7eaeec2763bb21a30626544b5a711cb2
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:49 2015 -0700
+
+    i965/gen9: Add XY_FAST_COPY_BLT support to intelEmitCopyBlit()
+    
+    This patch enables using XY_FAST_COPY_BLT only for Yf/Ys tiled buffers.
+    It can be later turned on for other tiling patterns (X,Y) too.
+    
+    V3: Flush in between sequential fast copy blits.
+        Fix src/dst alignment requirements.
+        Make can_fast_copy_blit() helper.
+        Use ffs(), is_power_of_two()
+        Move overlap computation inside intel_miptree_blit().
+    
+    V4: Use _mesa_regions_overlap() function.
+        Add check for src_buffer == dst_buffer.
+        Simplify horizontal and vertical alignment computations.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit ca21c9ab28df24ef015ead28df1dcccd90387df6
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Jun 9 15:18:13 2015 -0700
+
+    mesa/swrast: Use global function _mesa_regions_overlap()
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 2a397c7958089f766aa0d3c66016742fdf7494dd
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Fri Jun 5 19:23:46 2015 -0700
+
+    mesa/st: Use global function _mesa_regions_overlap()
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 7f282d05a11e0c29bddc1fac8c7028c7e823234f
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Fri Jun 5 19:18:19 2015 -0700
+
+    mesa: Add a new helper function _mesa_regions_overlap()
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 69ee316c1daf93b4a53b1c02301ffe9df9598d28
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:48 2015 -0700
+
+    i965/gen9: Allocate YF/YS tiled buffer objects
+    
+    In case of I915_TILING_{X,Y} we need to pass tiling format to libdrm
+    using drm_intel_bo_alloc_tiled(). But, In case of YF/YS tiled buffers
+    libdrm need not know about the tiling format because these buffers
+    don't have hardware support to be tiled or detiled through a fenced
+    region. libdrm still need to know buffer alignment value for its use
+    in kernel when resolving the relocation.
+    
+    Using drm_intel_bo_alloc_for_render() for YF/YS tiled buffers
+    satisfy both the above conditions.
+    
+    V2: Delete min/max buffer size restrictions not valid for i965+.
+        Remove redundant align to tile size statements.
+        Remove some redundant code now when there are no min/max buffer size.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit a1afd59662449803fa4a40a79bdf0db16ffcbcf5
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Fri Jun 5 10:56:40 2015 -0700
+
+    i965: Make a helper function intel_miptree_can_use_tr_mode()
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 385cd3e0bed8113659f2db8976b677b090acc9d8
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Fri Jun 5 10:41:24 2015 -0700
+
+    i965: Make a helper function intel_miptree_release_levels()
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit c9dbdc08b9de016ab3b076feac3df4c81009996e
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:49 2015 -0700
+
+    i965/gen9: Plugin the code for selecting YF/YS tiling on skl+
+    
+    Buffers with Yf/Ys tiling end up using meta upload / download
+    paths or the blitter for cases where they used tiled_memcpy paths
+    in case of Y tiling. This has exposed some bugs in meta path. To
+    avoid any piglit regressions on SKL this patch keeps the Yf/Ys
+    tiling disabled at the moment.
+    
+    V3: Make brw_miptree_choose_tr_mode() actually choose TRMODE. (Ben)
+        Few cosmetic changes.
+    V4: Get rid of brw_miptree_choose_tr_mode().
+        Take care of all tile resource modes {Yf, Ys, none} for all
+        generations at one place.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 06f76b7fa68db1ac74ecca015412f71b3a5e9f9c
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Thu Jun 4 16:57:02 2015 -0700
+
+    i965: Make a helper function intel_miptree_set_alignment()
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit e566e5203aaba98109a67766cf28991de3358490
+Author: Erik Faye-Lund <kusmabite@gmail.com>
+Date:   Sun Jun 28 14:51:09 2015 +0200
+
+    mesa/main: free locale at exit
+    
+    In order to save a small leak if mesa is continously loaded and
+    unloaded, let's free the locale when the shared object is unloaded.
+    
+    Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit c61bc6ed844b39e600cc64e3e552c7bf1894d7ba
+Author: Erik Faye-Lund <kusmabite@gmail.com>
+Date:   Sun Jun 28 14:51:08 2015 +0200
+
+    util: port _mesa_strto[df] to C
+    
+    _mesa_strtod and _mesa_strtof are only used from the GLSL compiler and
+    the ARB_[vertex|fragment]_program code, meaning that the locale doesn't
+    need to be initialized before the first OpenGL context gets initialized.
+    
+    So let's use explicit initialization from the one-time init code instead
+    of depending on a C++ compiler to initialize at image-load time.
+    
+    Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit de3e323be1bdc40a2a7d724d0f3db7a81a93bbbb
+Author: Erik Faye-Lund <kusmabite@gmail.com>
+Date:   Sun Jun 28 14:51:07 2015 +0200
+
+    glsl: No need to lock in _mesa_glsl_release_types
+    
+    This function only gets called while mesa is unloading, so there's
+    no potential of racing or multiple calls at the same time. So let's
+    just get rid of the locking.
+    
+    Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 195ab79ddecbdbf1f1714c233df278bff46c13e8
+Author: Erik Faye-Lund <kusmabite@gmail.com>
+Date:   Sun Jun 28 14:51:06 2015 +0200
+
+    mesa/main: only call _mesa_destroy_shader_compiler once on exit
+    
+    There's no point in calling _mesa_destroy_shader_compiler multiple
+    times on exit; the resources will only be released once anyway.
+    
+    So let's move the atexit-call into the part that is only called
+    once.
+    
+    Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit ba5e1612c892282b930e278b5b98f1578cbe7dbb
+Author: Erik Faye-Lund <kusmabite@gmail.com>
+Date:   Sun Jun 28 14:51:05 2015 +0200
+
+    dri: don't touch the shader compiler
+    
+    This function is for deleting per-screen resources, and the shader
+    compiler resources are not of such nature. Besides, dri shouldn't
+    need to even know about the presence of a shader compiler.
+    
+    These resources will already be released when mesa gets unloaded,
+    and that should be sufficient.
+    
+    Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 73d2b5af526676fd3f34243cdc155b3e1341b988
+Author: Erik Faye-Lund <kusmabite@gmail.com>
+Date:   Sun Jun 28 14:51:04 2015 +0200
+
+    mesa/main: Get rid of outdated GDB-hack
+    
+    All of these enums are now in use around in the code, so there's no need
+    to explicitly use them here any more.
+    
+    Signed-off-by: Erik Faye-Lund <kusmabite@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit d15b32ebded278243eb648bb9ecd4c5f5d6d0569
+Author: Grigori Goronzy <greg@chown.ath.cx>
+Date:   Thu May 28 13:01:51 2015 +0200
+
+    clover: implement CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
+    
+    Work-group size should always be aligned to subgroup size; this is a
+    basic requirement, otherwise some work-items will be no-operation.
+    
+    It might make sense to refine the value according to a kernel's
+    resource usage, but that's a possible optimization for the future.
+    
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 249a9df7fce0a6bebc70852ab583c5324208bf06
+Author: Grigori Goronzy <greg@chown.ath.cx>
+Date:   Thu May 28 12:40:29 2015 +0200
+
+    gallium: add PIPE_COMPUTE_CAP_SUBGROUP_SIZE
+    
+    We need this to implement OpenCL's
+    CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE.
+    
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit c0ca6c30eaf7f488f154c462a01a8945cb4a3103
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Fri Jun 26 17:54:15 2015 +0100
+
+    i965: Don't try to print the GLSL IR if it has been freed
+    
+    Since commit 104c8fc2c2aa5621261f8 the GLSL IR will be freed if NIR is
+    being used. This was causing it to segfault if INTEL_DEBUG=wm is set.
+    This patch just makes it avoid dumping the GLSL IR in that case.
+    
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit dd9ceb0219f6ca7864940ee1961f1b1890d27cea
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jun 29 09:03:19 2015 +0100
+
+    docs: add news item and link release notes for mesa 10.6.1
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 24df6cd0f7723e163d75ed3eb0b7e22adc3ffd7f
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jun 29 09:00:24 2015 +0100
+
+    docs: Add sha256 checksums for the 10.6.1 release
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 6ff3ae8deb1d99037f2f8e5890b09bd984059cf0)
+
+commit 07158c508ac9b933d60dd3e2cd1e748601c44b68
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Mon Jun 29 08:23:14 2015 +0100
+
+    Add release notes for the 10.6.1 release
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit a871e80fc6237fa029d6970f7e9b414fd097bd98)
+
+commit 6218c68bece0cea671f2940a651119a87ab8b24e
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sun Jun 28 22:17:16 2015 -0700
+
+    Revert "glsl: clone inputs and outputs during linking"
+    
+    This reverts commit c2ff3485b3d48749ea9dcad07bc1a691627dc3e5.
+    
+    Ilia and I noticed a memory leak caused by this patch: at least with
+    fixed-function programs, we clone things using ProgramResourceList as
+    the context before reralloc makes it non-NULL.
+    
+    I believe Tapani found other bugs with these patches, so I'm just going
+    to revert them for now and let him pursue them further.
+
+commit cae701fc8ed0faeaaaafd1cf57f6143031edcab2
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sun Jun 28 22:17:09 2015 -0700
+
+    Revert "i965: Delete linked GLSL IR when using NIR."
+    
+    This reverts commit 104c8fc2c2aa5621261f80aa6b4f76c3163078f1.
+
+commit 61912036d1cb67e52b1cc191bdff8ebded439e8c
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Jun 29 00:23:55 2015 -0400
+
+    nv30: avoid leaking blit fp/vp
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit b5622313ea2e070cc0c20c7cdccd844d383713d0
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Jun 28 22:30:27 2015 -0400
+
+    nv40: enable base vertex
+    
+    Still appears to have issues with negative indices less than -1M, but
+    that's a corner case of a corner case.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 19a0ba130fd0d0f3b86181a8d05cf5391420360d
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Fri Jun 26 15:05:13 2015 -0700
+
+    i965/vs: Move compute_clip_distance() out of emit_urb_writes().
+    
+    Legacy user clipping (using gl_Position or gl_ClipVertex) is handled by
+    turning those into the modern gl_ClipDistance equivalents.
+    
+    This is unnecessary in Core Profile: if user clipping is enabled, but
+    the shader doesn't write the corresponding gl_ClipDistance entry,
+    results are undefined.  Hence, it is also unnecessary for geometry
+    shaders.
+    
+    This patch moves the call up to run_vs().  This is equivalent for VS,
+    but removes the need to pass clip distances into emit_urb_writes().
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 17e8fca626c908dcbedabf57ce175113840e65c2
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Fri May 29 22:40:07 2015 -0700
+
+    i965: Write at least some data in SIMD8 URB write messages.
+    
+    According to the "URB SIMD8 Write > Write Data Payload" documentation,
+    "The write data payload can be between 1 and 8 message phases long."
+    
+    Apparently, the simulator considers it an error if you issue an URB
+    SIMD8 message with only a header and no actual data to write.
+    
+    v2: Try to put in a better PRM citation, now that the Broadwell docs
+        actually exist (requested by Jordan).
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit b4b4406e1e8dcf577551087cc6eb068e5303efdf
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Wed Jun 24 21:11:27 2015 +0200
+
+    gallium/hud: prevent NULL pointer dereference with pipe_query functions
+    
+    The HUD doesn't check if query_create() fails and it calls other
+    pipe_query functions with NULL pointer instead of a valid query object.
+    
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit a98600b0ebdfc8481c168aae6c5670071e22fc29
+Author: Mario Kleiner <mario.kleiner.de@gmail.com>
+Date:   Fri Jun 5 15:36:52 2015 +0200
+
+    nouveau: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads.
+    
+    The dup'ed fd owned by the nouveau_screen for a device node
+    must also be used as key for the winsys hash table, instead
+    of using the original fd passed in for a screen, to make
+    multi-x-screen ZaphodHeads configurations work on nouveau.
+    
+    The original fd's lifetime differs from that of the nouveau_screen stored
+    in the hash. The hash key is the fd, and in order to compare hash entries
+    we fstat them, so the fd must be around for as long as the screen is.
+    
+    This is an extension of the fix in commit a59f2bb1 (nouveau: dup fd
+    before passing it to device).
+    
+    Cc: "10.3 10.4 10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 2a210b797eacd27a556af9c5e0edca940f9486c5
+Author: Mike Stroyan <mike@lunarg.com>
+Date:   Fri Jun 26 15:15:46 2015 -0600
+
+    meta: Only change and restore viewport 0 in mesa meta mode
+    
+    The meta code was setting a default depth range for all viewports
+    and 'restoring' all viewports to depth range values saved from viewport 0.
+    
+    Cc: mesa-stable@lists.freedesktop.org
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 556dd4af76ca0be9b0698139c06e6d12d52e8ff3
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jun 25 03:55:54 2015 +0100
+
+    radeonsi: add support for geometry shader invocations.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 7e5064360c03b8dbdd60298b46e1595418c6cea3
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Thu Jun 25 03:36:23 2015 +0100
+
+    radeonsi: add support for viewport array (v3)
+    
+    This isn't pretty and I'd suggest it the pm4 interface builder
+    could be tweaked to do this more efficently, but I'd need
+    guidance on how that would look.
+    
+    This seems to pass the few piglit tests I threw at it.
+    
+    v2: handle passing layer/viewport index to fragment shader.
+    fix crash in blit changes,
+    add support to io_get_unique_index for layer/viewport index
+    update docs.
+    v3: avoid looking up viewport index and layer in es (Marek).
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 35d83793047b3de31a706fa2a62a233090ea7cfc
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Thu Jun 18 13:55:52 2015 -0700
+
+    i965/fs: Fix ir_txs in emit_texture_gen4_simd16().
+    
+    We were not emitting the LOD, which led to message lengths of 1 instead
+    of 3.  Setting has_lod makes us emit the LOD, but I had to make changes
+    to avoid emitting the non-existent coordinate as well.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91022
+    Cc: mesa-stable@lists.freedesktop.org
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit ad62ec8316a926682958e7ab52639992867c3755
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Jun 26 15:01:22 2015 -0400
+
+    nv50/ir: propagate modifier to right arg when const-folding mad
+    
+    An immediate has to be the second arg of an ADD operation. However we
+    were mistakenly propagating the modifier of the non-folded value to the
+    folded immediate argument.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91117
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 052b3d4e2f159038137504f01e9ff2380a67af8b
+Author: Boyan Ding <boyan.j.ding@gmail.com>
+Date:   Sat Jun 13 15:36:27 2015 +0800
+
+    egl_dri2: Remove trailing whitespaces
+    
+    Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 3cf90bb183c7f403ded4c069a78eae1fd71f8eab
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Tue Jun 16 13:53:40 2015 +0100
+
+    i965/skl: Fix aligning mt->total_width to the block size
+    
+    brw_miptree_layout_2d tries to ensure that mt->total_width is a
+    multiple of the compressed block size, presumably because it wouldn't
+    be possible to make an image that has a fraction of a block. However
+    it was doing this by aligning mt->total_width to align_w. Previously
+    align_w has been used as a shortcut for getting the block width
+    because before Gen9 the block width was always equal to the alignment.
+    Commit 4ab8d59a2 tried to fix these cases to use the block width
+    instead of the alignment but it missed this case.
+    
+    I think in practice this probably won't make any difference because
+    the buffer for the texture will be allocated to be large enough to
+    contain the entire pitch and libdrm aligns the pitch to the tile width
+    anyway. However I think the patch is worth having to make the
+    intention clearer.
+    
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 404a90b82786080564fe32716f83ce055b9a934f
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed Jun 10 16:30:56 2015 -0700
+
+    mesa: Enable subdir-objects globally.
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 229450520a23ba211fd9f7b3c9bc80f291229ec1
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jun 24 14:06:33 2015 +0100
+
+    mesa: fold duplicated GL/GL_CORE/GLES3 entry in get_hash_params.py
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 7de85694fa606b112b8badd4f07969aef782efb8
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri Jun 26 11:38:46 2015 +0800
+
+    ilo: define ILO_IMAGE_MAX_LEVEL_COUNT
+    
+    Define ILO_IMAGE_MAX_LEVEL_COUNT for ilo_image and remove unnecessary header
+    includes.
+
+commit cbdc26aa3f76dc20285caa7e62ca8809cb2fe638
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Thu Jun 25 22:27:04 2015 +0800
+
+    ilo: replace pipe_format by gen_surface_format
+    
+    Replace pipe_format by gen_surface_format in ilo_image.  Change how depth
+    format is specified in ilo_state_zs.
+
+commit 2ee95f6d64aca9e9490c1ac293dd711b5f60a16b
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Thu Jun 25 07:43:47 2015 +0800
+
+    ilo: always use the specified image format
+    
+    Move silent promotion of PIPE_FORMAT_ETC1_RGB8 or combined depth/stencil out
+    of core.
+
+commit dc2e92b2d3d216fc9657f2ef594d7c5d0b03370e
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Wed Jun 24 22:47:30 2015 +0800
+
+    ilo: replace pipe_texture_target by gen_surface_type
+    
+    Replace pipe_texture_target by gen_surface_type in ilo_image.  Change how
+    GEN6_SURFTYPE_CUBE is specified in ilo_state_surface and ilo_state_zs.
+
+commit 934e4a469fd37dac03b8280cce41df4d9f4ed123
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Wed Jun 24 22:46:36 2015 +0800
+
+    ilo: initialize ilo_image from ilo_image_info
+    
+    Convert pipe_resource to ilo_image_info for image initialization.
+
+commit f825fe8e13adfec4cd488bac3663b7e9c90a8c06
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Thu Jun 25 07:18:31 2015 +0800
+
+    ilo: remove ilo_image_disable_aux()
+    
+    Fail resource creation when aux bo allocation fails.
+
+commit 07acf9cb167d4e1f7aebd6837d22e3523ad63109
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Wed Jun 24 12:57:57 2015 +0800
+
+    ilo: improve SURFTYPE_BUFFER validations
+    
+    Reorganize the validations to make them more systematic.
+
+commit 9871646c132ba137709b0bfebfe285985dc351e6
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri Jun 26 13:08:32 2015 +0800
+
+    ilo: remove ilo_buffer
+    
+    Since the addition of ilo_vma, it was used only to pad a bo for sampling
+    engine surfaces.  Replace it entirely with these functions
+    
+      ilo_state_surface_buffer_size()
+      ilo_state_vertex_buffer_size()
+      ilo_state_index_buffer_size()
+      ilo_state_sol_buffer_size()
+
+commit 36d107e92cc4c1d2b60e0017dbe998af3a2e8b75
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Tue Jun 23 23:59:31 2015 -0600
+
+    ilo: introduce ilo_vma
+    
+    This cleans up the code a bit and makes ilo_state_vector_resource_renamed()
+    simpler and more robust.  It also allows a single bo to back mulitple VMAs.
+
+commit fbba25bba017b3dde5f6613698004b0086bdea00
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Tue Jun 23 08:42:14 2015 +0200
+
+    mesa: remove unnecessary checks in _mesa_readpixels_needs_slow_path
+    
+    readpixels_can_use_memcpy will later call _mesa_format_matches_format_and_type
+    which does much tighter checks than these to decide if we can use
+    memcpy for readpixels.
+    
+    Also, the checks do not seem to be extensive enough anyway, since we are
+    checking for signed/unsigned conversion only when the framebuffer has integers,
+    but the same checks could be done for other types anyway, since as long as
+    there is a signed/unsigned conversion we can't memcpy.
+    
+    No regressions observed on i965/llvmpipe.
+    
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 316206ee9ea06419c9a2ea6fe48d66a0b805319d
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Jun 25 08:08:27 2015 -0700
+
+    i965/vec4_live_variables: Do liveness analysis bottom-to-top
+    
+    From Muchnick's Advanced Compiler Design and Implementation:
+    
+    "To determine which variables are live at each point in a flowgraph, we
+    perform a backward data-flow analysis"
+    
+    Previously, we were walking the blocks forwards and updating the livein and
+    then the liveout.  However, the livein calculation depends on the liveout
+    and the liveout depends on the successor blocks.  The net result is that it
+    takes one full iteration to go from liveout to livein and then another
+    full iteration to propagate to the predecessors.  This works out to an
+    O(n^2) computation where n is the number of blocks.  If we run things in
+    the other order, it's O(nl) where l is the maximum loop depth which is
+    practically bounded by 3.
+    
+    In b2c6ba0c4b21391dc35018e1c8c4f7f7d8952bea, we made this same change in
+    the FS backend to great effect.  Might as well keep it consistent and make
+    the same change for vec4.  Also, this took the time to run the test:
+    
+    ES31-CTS.arrays_of_arrays.InteractionFunctionCalls1
+    
+    from 6:49.62 to 3:31.40 on Timothy Arceri's machine.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit c1151b18f2dce7c6f238f057e9c4fa8d912ce6b5
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Wed Jun 24 20:07:54 2015 -0700
+
+    i965/skl: Use more compact hiz dimensions
+    
+    gen8 had some special restrictions which don't seem to carry over to gen9.
+    Quoting the spec for SKL:
+    "The Z_Height and Z_Width values must equal those present in
+    3DSTATE_DEPTH_BUFFER incremented by one."
+    
+    This fixes nothing in piglit (and regresses nothing).
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 101a73846b48ebac8e2386a25b24659f013c66a4
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jun 25 14:58:37 2015 +0200
+
+    radeonsi: don't fail in si_shader_io_get_unique_index
+    
+    Trivial. Picked from my tessellation branch.
+
+commit c97105ee12e54ab893351ebbda8c2348c899adde
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Jun 24 00:04:11 2015 -0700
+
+    i965: Drop brw->depthstencil.stencil_offset from gen8_depth_state.c.
+    
+    This is always 0 - only brw_workaround_depthstencil_alignment ever sets
+    it, and that doesn't run on Gen6+.  My initial Broadwell depth state
+    commit had this mistake.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit 6026f7e8fb993a34f3e2ad1638d7a842a5cefd80
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Thu Jun 11 01:59:44 2015 -0700
+
+    nir: Recognize max(min(a, 1.0), 0.0) as fsat(a).
+    
+    We already recognize min(max(a, 0.0), 1.0) as a saturate, but neglected
+    this variant (which is also handled by the GLSL IR pass).
+    
+    shader-db results on Broadwell:
+    total instructions in shared programs: 7363046 -> 7362788 (-0.00%)
+    instructions in affected programs:     11928 -> 11670 (-2.16%)
+    helped:                                64
+    HURT:                                  0
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 77a78c65f80323059d892c501ca551ccf324b17d
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Jun 25 00:56:32 2015 +0200
+
+    softpipe,llvmpipe: fix PIPE_SHADER_CAP_MAX_INPUTS value
+    
+    PIPE_MAX_SHADER_INPUTS was recently bumped to 80 because of tessellation.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91099
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91101
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit d1663ccb4c664b0f544ed5d6f0761f3ae2435199
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Wed Jun 17 15:50:11 2015 -0700
+
+    i965/bxt: Add basic Broxton infrastructure
+    
+    The thread counts and URB information are all speculative numbers that were
+    based on some CHV numbers at the time.
+    
+    v2:
+    Originally this patch had PCI IDs. I've moved that to a new patch at the end of
+    the series.
+    Remove is_cherryview hack.
+    Add PCI ids. These match the ones defined in the kernel. The only one tested by
+    us is 0x0a84.
+    Capitalize the hex string (Mark)
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Tested-by: "Lecluse, Philippe" <Philippe.Lecluse@intel.com>
+    Reviewed-by: Mark Janes <mark.a.janes@intel.com>
+
+commit 9f261dc18dba0aa4dc43fc560d343ba9ffd486e9
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon Jun 22 11:09:27 2015 -0700
+
+    radeon: Advertise correct GL_QUERY_COUNTER_BITS/GL_SAMPLES_PASSED value
+    
+    Commit b765119c changed the default value of all the counter bits to
+    64.  However, older hardware only has 32 counter bits.
+    
+    This has only been build-tested.  We don't have any tests that verify
+    the advertised value against implementation behavior, so I don't know
+    what additional testing could be done.
+    
+    NOTE: It appears that many Gallium drivers (at least r300 and i915g)
+    have the same problem, but I don't see a way for the state-tracker to
+    determine the counter size.  Marek says, "For Gallium, a new PIPE_CAP or
+    new get_xxx_param function will be needed."
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Cc: Alex Deucher <alexander.deucher@amd.com>
+
+commit b2c6ba0c4b21391dc35018e1c8c4f7f7d8952bea
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Jun 8 16:03:19 2015 -0700
+
+    i965/fs_live_variables: Do liveness analysis bottom-to-top
+    
+    From Muchnick's Advanced Compiler Design and Implementation:
+    
+    "To determine which variables are live at each point in a flowgraph, we
+    perform a backward data-flow analysis"
+    
+    Previously, we were walking the blocks forwards and updating the livein and
+    then the liveout.  However, the livein calculation depends on the liveout
+    and the liveout depends on the successor blocks.  The net result is that it
+    takes one full iteration to go from liveout to livein and then another
+    full iteration to propagate to the predecessors.  This works out to an
+    O(n^2) computation where n is the number of blocks.  If we run things in
+    the other order, it's O(nl) where l is the maximum loop depth which is
+    practically bounded by 3.
+    
+    On my HSW desktop, one particular shadertoy test gets a 20% improvement in
+    compile times:
+    
+    N           Min           Max        Median           Avg        Stddev
+    x  10        15.965        16.884        16.026       16.1822    0.34736846
+    +  10        12.813        13.052        12.876       12.8891    0.06913666
+    Difference at 95.0% confidence
+            -3.2931 +/- 0.235316
+            -20.3501% +/- 1.45417%
+            (Student's t, pooled s = 0.250444)
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 104c8fc2c2aa5621261f80aa6b4f76c3163078f1
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Thu Jun 11 10:41:53 2015 +0300
+
+    i965: Delete linked GLSL IR when using NIR.
+    
+    This is based on Kenneth's patch to delete 'most of the IR'. Due to
+    linker changes to clone variables, we can now free all of IR.
+    
+    Saves 58MB of memory when replaying a Dota 2 trace on Broadwell.
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit c2ff3485b3d48749ea9dcad07bc1a691627dc3e5
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Thu Jun 11 10:41:52 2015 +0300
+
+    glsl: clone inputs and outputs during linking
+    
+    This increases memory pressure during linking but makes it easier
+    for backend to free IR after it is not needed anymore.
+    
+    v2: use resource list as ralloc context in case of relink (Kenneth)
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 4b35ab9bdb4e663f41ff5c9ae5bbcc650b6093f9
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Thu Apr 30 17:04:51 2015 +0100
+
+    i965: Rename intel_emit* to reflect their new location in brw_pipe_control
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 9d4b9f1e0c661e5ed8ce2e71c76ce8cc1adf90dd
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Thu Apr 30 16:53:12 2015 +0100
+
+    i965: Transplant PIPE_CONTROL routines to brw_pipe_control
+    
+    Start trimming the fat from intel_batchbuffer.c. First by moving the set
+    of routines for emitting PIPE_CONTROLS (along with the lore concerning
+    hardware workarounds) to a separate brw_pipe_control.c
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 147cdb53ecd225ea21d8d552607d384217346ecb
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Tue Jun 23 23:17:53 2015 -0700
+
+    nir: Use a switch statement for detecting move-like operations.
+    
+    Suggested by Jason Ekstrand.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit e31bce4041122cd00712b60b4dc1eae6486f6579
+Author: Brian Paul <brianp@vmware.com>
+Date:   Wed Jun 24 10:41:52 2015 -0600
+
+    svga: silence warnings about unexpected shader type
+    
+    Trivial.
+
+commit c1de7df6d4086070e63369ab0af3950f53a03592
+Author: Brian Paul <brianp@vmware.com>
+Date:   Mon Jun 22 14:04:09 2015 -0600
+
+    st/mesa: remove unneeded pipe_surface_release() in st_render_texture()
+    
+    This caused us to always free the pipe_surface for the renderbuffer.
+    The subsequent call to st_update_renderbuffer_surface() would typically
+    just recreate it.  Remove the call to pipe_surface_release() and let
+    st_update_renderbuffer_surface() take care of freeing the old surface
+    if it needs to be replaced (because of change to mipmap level, etc).
+    
+    This can save quite a few calls to pipe_context::create_surface() and
+    surface_destroy().
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit a552c897caea31bbff3f16d2af8f5028a58bd344
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jun 24 12:59:55 2015 +0100
+
+    st/wgl: add stw_nopfuncs.h to the sources lists
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 30d67d38246410274713380664be87cd1df9486a
+Author: Julien Isorce <julien.isorce@gmail.com>
+Date:   Tue Jun 23 22:47:05 2015 +0100
+
+    loader: move loader_open_device out of HAVE_LIBUDEV block
+    
+    Fixes the following build issue, when building without libudev.
+    
+    CCLD   libGL.la
+    ./.libs/libglx.a(dri2_glx.o): In function `dri2CreateScreen':
+    src/glx/dri2_glx.c:1186: undefined reference to `loader_open_device'
+    collect2: ld returned 1 exit status
+    
+    CCLD     libEGL.la
+    Undefined symbols for architecture x86_64:
+    "_loader_open_device", referenced from:
+      _dri2_initialize_x11_dri2 in libegl_dri2.a(platform_x11.o)
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91077
+    Signed-off-by: Julien Isorce <j.isorce@samsung.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 390f94e3581384838595185a06d5943089d3f9ab
+Author: Grigori Goronzy <greg@chown.ath.cx>
+Date:   Wed Jun 24 03:40:38 2015 +0200
+
+    winsys/radeon: reduce BO cache timeout
+    
+    1000 ms is an extreme value for typical interactive loads. A large
+    cache has some disadvantages. Search for reusable BOs can take a long
+    time and memory might get exhausted.
+    
+    Let's be rather conservative and use half of the old value,
+    500ms. This is beneficial to some loads on my test system and there
+    are no regressions.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 29aaab2b5f55cc6d9a84f58ce2bb8607e76a9dde
+Author: Grigori Goronzy <greg@chown.ath.cx>
+Date:   Wed Jun 24 03:38:02 2015 +0200
+
+    winsys/radeon: align BO size to page size
+    
+    This is the basic granularity for BO allocations. The alignment also
+    helps with BO reuse by the cached bufmgr.
+    
+    This results in a huge 45% speedup in Metro 2033 Redux on my test
+    system. The game relies on buffer orphaning with very small buffers
+    (hundreds of bytes in size) and that did not work efficiently
+    before. This change may also affect other applications and games.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 32a220f1f60980de50ecefb3b9ab1f754ade8c83
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Tue Jun 9 11:06:56 2015 +0300
+
+    glsl: remove cross validation of interpolation qualifier with GLSL 4.40
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+
+commit 23132cd13baa7b3e9688a118466261a282594b8e
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Tue Jun 23 23:15:22 2015 -0700
+
+    i965: Fix whitespace error in gen8_depth_state.c
+    
+    Trivial.
+
+commit c8b8e8b29b755cd3d80fc5e470f441cb3716152a
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Mon Jun 22 14:20:20 2015 -0700
+
+    i965: Don't count NIR instructions for shader-db.
+    
+    Matt, Jason, and I haven't found this useful in a long time.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 7796e8889a9a2cc1b454dc32d8da3d756404339a
+Author: Michel Dänzer <michel.daenzer@amd.com>
+Date:   Thu May 21 10:49:05 2015 +0900
+
+    winsys/radeon: Unmap GPU VM address range when destroying BO
+    
+    But only when doing so is safe according to the
+    RADEON_INFO_VA_UNMAP_WORKING kernel query.
+    
+    This avoids kernel GPU VM address range conflicts when the BO has other
+    references than the GEM handle being closed, e.g. when the BO is shared.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90537
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90873
+    
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Christian König <christian.koenig@amd.com>
+
+commit 3fd4c80b32e3080d761e176d129a1e46c618584a
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Jun 22 17:38:14 2015 -0700
+
+    vc4: Also dump VC4_PACKET_LOAD_TILE_BUFFER_GENERAL.
+
+commit 5458ac01ae046010f3f7e4ddbf8ef18cca04d96c
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Jun 22 17:34:24 2015 -0700
+
+    vc4: Add dumping for VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER.
+
+commit 997f6778414a352457162b73ff5295e51e09ad63
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jun 23 18:08:49 2015 -0700
+
+    vc4: Don't try to CSE color reads.
+    
+    It returns a new value for each sample in the TLB.  We've already avoided
+    trying to get the same index's color multiple times at the vc4_program.c
+    level, so we're not losing anything by doing this.
+
+commit 0f69d59b1c8f5314c1abe18659b96adcfc51a0e5
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jun 23 18:04:00 2015 -0700
+
+    vc4: Make a helper for TLB color writes, too.
+    
+    We've done so for all the other QIR instruction generation in this file.
+
+commit af83eb25812fbda89de62b58f9e59a5408ad4654
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jun 23 17:53:07 2015 -0700
+
+    vc4: Pull the blending operation out to a separate function.
+    
+    It's fairly separate from the rest of the TLB operations at frag end time,
+    and we'll need to run it multiple times to support MSAA blending.
+
+commit 76851f49a5beac01b4eee7892ca95f44b5e18e29
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Jun 22 11:45:27 2015 -0700
+
+    vc4: Clarify size calculation for Z/S writes.
+    
+    It's the same value for loads and stores, because they're basically the
+    same packet.
+
+commit 8fbcabc41a4b2c7d7571585bde2e009e57982da4
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Jun 22 13:14:57 2015 -0700
+
+    vc4: Add an "args" temporary for RCL setup.
+
+commit 19056d04296444afefe71ad8094d327ed38967bf
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Jun 22 17:31:24 2015 -0700
+
+    vc4: Reuse (and extend) the packet.h sizes for dumping.
+
+commit fc0da629b502bb072b945932bae0477eb9b62bd5
+Author: Eric Anholt <eric@anholt.net>
+Date:   Sat Jun 20 15:30:19 2015 -0700
+
+    vc4: Fix printfs for blit fallbacks.
+
+commit e70f5617f1125e1f39a75d7a8c92ddda86a8056d
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jun 23 11:02:12 2015 -0700
+
+    tgsi_to_nir: Fix translation of TXF on MSAA targets.
+    
+    Noticed while trying to add GL_ARB_texture_multisample support to vc4.
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 6844d6b7f8398a25eff511541b187afeb1199ce0
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Tue Jun 23 15:39:42 2015 -0700
+
+    i965/fs: Get rid of an unused variable in emit_barrier()
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 40801295d5a3d747661abb1e2ca64d44c0e3dc05
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Jun 22 17:17:56 2015 -0700
+
+    i965: Remove the brw_context from the visitors
+    
+    As of this commit, nothing actually needs the brw_context.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit bcaf4a3f077e3e3fbc66f264fe9124fa920ee70c
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Jun 22 17:30:23 2015 -0700
+
+    i965/vec4_vs: Add an explicit use_legacy_snorm_formula flag
+    
+    This way we can stop doing is_gles3 checks inside of the compiler.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 924b15d7de2a4ae9057cdf6d5d589c9b677d3325
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Jun 22 17:17:51 2015 -0700
+
+    i965/vec4: Turn some _mesa_problem calls into asserts
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 663f8d121d792edee5c012461bfd0b650011ff4a
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Fri Jun 19 17:29:42 2015 -0700
+
+    i965/vs: Pass the current set of clip planes through run() and run_vs()
+    
+    Previously, these were pulled out of the GL context conditionally based on
+    whether we were running ff/ARB or a GLSL program.  Now, we just pass them
+    in so that the visitor doesn't have to grab them itself.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 4af62c0f5cbadc762abb1bd2e59f44ca220e3f0a
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Fri Jun 19 17:25:28 2015 -0700
+
+    i965/fs: Add a do_rep_send flag to run_fs
+    
+    Previously, we were pulling it from brw->do_rep_send
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 1b0f6ffa15b25e8601d60fe1ea74e893f7d33cf5
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Fri Jun 19 15:40:09 2015 -0700
+
+    i965: Pull calls to get_shader_time_index out of the visitor
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit c7893dc3c590b86787d8118e3920debaea3f16da
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Fri Jun 19 14:46:03 2015 -0700
+
+    i965: Use a single index per shader for shader_time.
+    
+    Previously, each shader took 3 shader time indices which were potentially
+    at arbirary points in the shader time buffer.  Now, each shader gets a
+    single index which refers to 3 consecutive locations in the buffer.  This
+    simplifies some of the logic at the cost of having a magic 3 a few places.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 6e255a3299c9ec5208cb5519b5da2edb0ce2972b
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Apr 16 15:28:17 2015 -0700
+
+    i965: Add compiler options to brw_compiler
+    
+    This creates the options at screen cration time and then we just copy them
+    into the context at context creation time.  We also move is_scalar to the
+    brw_compiler structure.
+    
+    We also end up manually setting some values that the core would have set by
+    default for us.  Fortunately, there are only two non-zero shader compiler
+    option defaults that we aren't overriding anyway so this isn't a big deal.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 073294d3ef20d0dbeffcc38aff3d69eda624ee75
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Jun 22 17:01:22 2015 -0700
+
+    i965/fs: Plumb compiler debug logging through brw_compiler
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 3fd457c9ddd4b9f730e70bfd19b2f9eeeeaef089
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Jun 22 16:32:06 2015 -0700
+
+    i965/fs: Do the no16 perf logging directly in fs_visitor::no16()
+    
+    While we're at it, we'll drop the note about 10-20% performance loss.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit f45bf97f30f2feacf8f976271a43feea70e5c382
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon Jun 22 16:30:04 2015 -0700
+
+    i965/fs: Make no16 non-variadic
+    
+    We never used the fact that it was variadic anyway.
+    
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 1bc3b62d4aad22b94b8031c29c654a8f90ccc24d
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Apr 16 17:39:13 2015 -0700
+
+    i965: Move INTEL_DEBUG variable parsing to screen creation time
+    
+    v2: Do bufmgr set_debug and set_aub_dump at screen time as well.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit d7565b7d65f8203c20735a61b86e9158b8ec4447
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Apr 16 14:34:04 2015 -0700
+
+    i965: Remove the dependance on brw_context from the generators
+    
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit e639a6f68e701f23b977a49c45d646c164991d36
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Thu Apr 16 14:13:52 2015 -0700
+
+    i965: Plumb compiler debug logging through a function pointer in brw_compiler
+    
+    v2 (Ken): Make shader_debug_log a printf-like function.
+    v3 (Jason): Add a void * to pass the brw_context through
+    
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit b0ad3ce4e7b9a23ab8fad4823e3c1094d6cf42a6
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sat Apr 18 12:23:33 2015 -0700
+
+    mesa: Add a va_args variant of _mesa_gl_debug().
+    
+    This will be useful for wrapper functions.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 630764407aeba4acf9364739bafb0e3516f72e31
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Fri Jun 19 17:19:38 2015 -0700
+
+    i965: Replace some instances of brw->gen with devinfo->gen
+
+commit ae097580ac49fbfaf184c89c68cb42b755f62939
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Jun 22 11:42:15 2015 -0700
+
+    i965: Initialize backend_shader::mem_ctx in its constructor.
+    
+    We were initializing it in each subclasses' constructors for some
+    reason.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit d8eeb4917ca39a0698731f64933c85a7c44e9247
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Jun 22 11:20:32 2015 -0700
+
+    i965: Assert that the GL primitive isn't out of range.
+    
+    Coverity sees the if (mode >= BRW_PRIM_OFFSET (128)) test and assumes
+    that the else-branch might execute for mode to up 127, which out be out
+    of bounds.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 4d93a07c45c8aa4cb3adbfcb9d61dcb54d8c404f
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Jun 22 11:09:49 2015 -0700
+
+    i965/cfg: Assert that cur_do/while/if pointers are non-NULL.
+    
+    Coverity sees that the functions immediately below the new assertions
+    dereference these pointers, but is unaware that an ENDIF always follows
+    an IF, etc.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 04758d25b4240129d4fa8784608a54c40bff3568
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Fri Jun 19 20:40:15 2015 -0700
+
+    mesa: Delete unused ICEIL().
+    
+    Can't find any uses of it in git history.
+    
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit a49328d58d1e3e143f9434976d9f3574acefc4ea
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Jun 22 10:59:33 2015 -0700
+
+    i965/fs: Don't mess up stride for uniform integer multiplication.
+    
+    If the stride is 0, the source is a uniform and we should not modify the
+    stride.
+    
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91047
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 3fa9bb81ec8b21f472de32e08d0caf917239da08
+Author: Boyan Ding <boyan.j.ding@gmail.com>
+Date:   Sat Jun 13 15:33:20 2015 +0800
+
+    egl/x11: Remove duplicate call to dri2_x11_add_configs_for_visuals
+    
+    The call to dri2_x11_add_configs_for_visuals (previously
+    dri2_add_configs_for_visuals) was moved downwards in commit f8c5b8a1,
+    but appeared again in its original position after its rename in
+    d019cd81. Remove it.
+    
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 20dca37a20f90762df13efd0e0ec97002b6a89f2
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Wed Jun 17 15:50:13 2015 -0700
+
+    i965/gen9: Don't use encrypted MOCS
+    
+    On gen9+ MOCS is an index into a table. It is 7 bits, and AFAICT, bit 0 is for
+    doing encrypted reads.
+    
+    I don't recall how I decided to do this for BXT. I don't know this patch was
+    ever needed, since it seems nothing is broken today on SKL. Furthermore, this
+    patch may no longer be needed because of the ongoing changes with MOCS setup. It
+    is what is being used/tested, so it's included in the series.
+    
+    The chosen values are the old values left shifted. That was also an arbitrary
+    choice.
+    
+    v2: Use shift in MOCS to make it clear what we're doing. (Ken)
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 78d58e642549fbf340fdb4fca06720d2891216a8
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Jun 21 19:03:35 2015 -0400
+
+    nv50,nvc0: make sure to pushbuf_refn before putting bo into pushbuf_data
+    
+    Without first running the bo through pushbuf_refn, the nouveau drm
+    library will have uninitialized structures regarding this bo, and will
+    insert incorrect data.
+    
+    This fixes supertuxkart 0.9 crash on start (where it ends up doing a lot
+    of indirect draws).
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 9fcbf515b431a92e0289f234ab77a796cf2a5612
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun Jun 21 15:00:16 2015 -0400
+
+    nvc0: always put all tfb bufs into bufctx
+    
+    Since we clear the TFB bufctx binding point above, we need to put all of
+    the active tfb's back in, even if they haven't changed since last time.
+    Otherwise the tfb may get moved into sysmem and the underlying mapping
+    will generate write errors.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit fccf012adc0d3aad877de095244324aa1d2d046a
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Tue Jun 23 00:16:59 2015 -0400
+
+    glsl: binding point is a texture unit, which is a combined space
+    
+    This fixes compilation failures in Dota 2 Reborn where a texture unit
+    binding point was used that was numerically higher than the max
+    per stage.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Tested-by: Nick Sarnie <commendsarnex@gmail.com>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 59f8d4ee793a1b620fb385f53b4dfe10e4b70f19
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jun 19 19:35:19 2015 +0100
+
+    android: egl: do not link against libglapi
+    
+    The only reason we touch glapi is to dlopen it in order to:
+     - make sure that the unresolved _glapi* symbols in the dri modules are
+    provided.
+     - fetch glFlush() and use it at various stages in the dri2 driver.
+    
+    Cc: Chih-Wei Huang <cwhuang@linux.org.tw>
+    Cc: Eric Anholt <eric@anholt.net>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit a0dc6b7824d3b9095919e29393a379ea7f9c1318
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jun 19 19:22:38 2015 +0100
+
+    gbm: do not (over)link against libglapi.so
+    
+    The whole of GBM does not rely on even a single symbol from the GL
+    dispatch library, unsuprisingly. The only need for it comes from the
+    unresolved symbols in the DRI modules, which are now correctly handled
+    with Frank's commit.
+    
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 828f13330c9384f2b55c8b0f962d93a74ecd0601
+Author: Frank Henigman <fjhenigman@chromium.org>
+Date:   Thu Nov 6 16:29:26 2014 -0500
+
+    gbm: dlopen libglapi so gbm_create_device works
+    
+    Dri driver libs are not linked to pull in libglapi so gbm_create_device()
+    fails when it tries to dlopen them (unless the application is linked
+    with something that does pull in libglapi, like libGL).
+    Until dri drivers can be fixed properly, dlopen libglapi before trying
+    to dlopen them.
+    
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Frank Henigman <fjhenigman@google.com>
+    [Emil Velikov: Drop misleading bugzilla link, mention that libname differs]
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 6ed52f78a05a2b56eb521c50767b7a991df51564
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jun 19 17:48:30 2015 +0100
+
+    configure: drop unused variable GBM_BACKEND_DIRS
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 994be5143a097ae2cf504ba344362edfee388ac3
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jun 19 17:46:41 2015 +0100
+
+    configure: error out when building libEGL without shared-glapi
+    
+    The latter is a hard requirement and without it we'll error out later
+    on in the build.
+    
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit ddc886b5bfe5976fa2e5f49eeefa918736f1aa97
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jun 19 17:44:02 2015 +0100
+
+    configure: error out when building backend-less libEGL
+    
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 2752e629e71a8b4345b61f55b09d5ed04fc5e4b8
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jun 19 17:37:18 2015 +0100
+
+    drivers/x11: drop unneeded HAVE_X11_DRIVER check
+    
+    Already handled in the Makefile which includes the drivers/x11 subdir.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 92dc50786224411dc0aeff18b80a1995fcb04dc1
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jun 19 19:43:23 2015 +0100
+
+    configure: allow building shared-glapi powered libgl-xlib
+    
+    Cc: Brian Paul <brianp@vmware.com>
+    Cc: Adam Jackson <ajax@redhat.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+    Acked-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 5c37ababae6069ed73522bee35bca6228a80be77
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jun 19 17:30:13 2015 +0100
+
+    targets/libgl-xlib: fix the build against shared_glapi
+    
+    Cc: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+    Acked-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit b92233f2a57ec09e9266ba4ed7f200904b784b9a
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jun 19 17:28:25 2015 +0100
+
+    drivers/x11: fix the build against shared_glapi
+    
+    Cc: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+    Acked-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 6d744aaf4e427b6b0b3d8d35d756592a50abbb97
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Fri Jun 19 17:19:46 2015 +0100
+
+    configure: warn about shared_glapi & xlib-glx only when both are set
+    
+    Printing out the message when shared_glapi is disabled only leads to
+    confusion.
+    
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 06109db47b69867da0c7537f97b1aa8650598a08
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 18 23:30:34 2015 +0100
+
+    glapi: remap_helper.py: remove unused argument 'es'
+    
+    Identical to the previous commit - unused by neither the Autotools,
+    Android or SCons build.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Dylan Baker <baker.dylan.c@gmail.com>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit ec16bb62acfdfe6023d1ba6456ae8a19f14c5d80
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 18 23:28:05 2015 +0100
+
+    glapi: gl_table.py: remove unused variable 'es'
+    
+    None of the three build systems ever set it, as such we can clear things
+    up a bit.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Dylan Baker <baker.dylan.c@gmail.com>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 4f8f790525f1adcb5259cb72b7c9dbfd121867c6
+Author: Derek Foreman <derekf@osg.samsung.com>
+Date:   Wed Jun 17 11:28:51 2015 -0500
+
+    egl: Use the loader_open_device() helper to do open with CLOEXEC
+    
+    We've moved the open with CLOEXEC idiom into a helper function, so
+    call it instead of duplicating the code.
+    
+    This also replaces a couple of opens that didn't properly do CLOEXEC.
+    
+    Signed-off-by: Derek Foreman <derekf@osg.samsung.com>
+    Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 324ee9b391ea2db4b74709d30a131e79055bf071
+Author: Derek Foreman <derekf@osg.samsung.com>
+Date:   Wed Jun 17 11:28:50 2015 -0500
+
+    glx: Use loader_open_device() helper
+    
+    We've moved the open with CLOEXEC idiom into a helper function, so
+    call it instead of duplicating the code here.
+    
+    Signed-off-by: Derek Foreman <derekf@osg.samsung.com>
+    Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 9c927463492dea14d82ebdd77f711f86b0e6fc5e
+Author: Derek Foreman <derekf@osg.samsung.com>
+Date:   Wed Jun 17 11:28:49 2015 -0500
+
+    loader: Rename drm_open_device() to loader_open_device() and share it
+    
+    This is already our common idiom for opening files with CLOEXEC and
+    it's a little ugly, so let's share this one implementation.
+    
+    Signed-off-by: Derek Foreman <derekf@osg.samsung.com>
+    Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit aaac913e901229d11a1894f6aaf646de6b1a542c
+Author: Derek Foreman <derekf@osg.samsung.com>
+Date:   Wed Jun 17 11:28:48 2015 -0500
+
+    egl/drm: Duplicate fd with F_DUPFD_CLOEXEC to prevent leak
+    
+    Replacing dup() with fcntl F_DUPFD_CLOEXEC creates the duplicate
+    file descriptor with CLOEXEC so it won't be leaked to child
+    processes if the process fork()s later.
+    
+    Signed-off-by: Derek Foreman <derekf@osg.samsung.com>
+    Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit be5f71d4a52c9ef72f63bb6c339fe0110f2027af
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Tue Jun 23 12:18:06 2015 +0100
+
+    draw,tgsi: Assume TGSI_PROPERTY_GS_INVOCATIONS default of 1.
+    
+    If the shader doesn't specify number of invocations, assume one.
+    
+    This fixes geometry shaders on state trackers other than Mesa (and
+    probably graw tests too.)
+    
+    Trivial.
+
+commit 634cfb9a458bcc1051b60ab13bd12e17bba0f71b
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Fri Jun 19 13:53:46 2015 +0100
+
+    glsl: Specify the shader stage in linker errors due to too many in/outputs.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 4731be701f3094666c24c143d9d6ddf53c456c39
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue Jun 23 15:55:30 2015 +1000
+
+    docs: update GL3 with softpipe/llvmpipe gpu_shader5 pieces.
+    
+    This just updates the bits I've added in the previous few patches.
+    
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 1a71fbe28ca0525b618f6fb9d7354f3a6589af2f
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Jun 22 13:59:25 2015 +1000
+
+    draw/gallivm: add invocation ID support for llvmpipe.
+    
+    This extends the draw code to add support for invocations.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 40d225803ecfa805b4dea4ee0ebd04df00ca8827
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Wed May 20 10:32:32 2015 +1000
+
+    draw/tgsi: implement geom shader invocation support.
+    
+    This is just for softpipe, llvmpipe won't work without
+    some changes.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 24e77cb09fda9a57d4a8288ced3e01df4c8ac280
+Author: Dave Airlie <airlied@gmail.com>
+Date:   Wed May 27 18:37:17 2015 +1000
+
+    tgsi: handle indirect sampler arrays. (v2)
+    
+    This is required for ARB_gpu_shader5 support in softpipe.
+    
+    v2: add support to txd/txf/txq paths.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 1762568fd39b9be42d963d335e36daea25df7044
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Jun 10 00:52:07 2015 -0700
+
+    nir: Allow vec2/vec3/vec4 instructions in the select peephole pass.
+    
+    These are basically just moves, so they should be safe as well.
+    
+    When disabling i965's GLSL IR level scalarizer (channel expressions)
+    pass, I started seeing NIR code like this:
+    
+            if ssa_21 {
+                    block block_1:
+                    /* preds: block_0 */
+                    vec4 ssa_120 = vec4 ssa_82, ssa_83, ssa_84, ssa_30
+                    /* succs: block_3 */
+            } else {
+                    block block_2:
+                    /* preds: block_0 */
+                    /* succs: block_3 */
+            }
+            block block_3:
+            /* preds: block_1 block_2 */
+            vec4 ssa_33 = phi block_1: ssa_120, block_2: ssa_2
+    
+    Previously, the GLSL IR scalarizer pass would break the vec4 into a
+    series of fmovs, which were allowed by the peephole pass.  But with
+    the vec4 operation, they were not.  We want to keep getting selects.
+    
+    Normal i965 on Broadwell:
+    instructions in affected programs:     200 -> 176 (-12.00%)
+    helped:                                4
+    
+    With brw_fs_channel_expressions() disabled:
+    instructions in affected programs:     1832 -> 1646 (-10.15%)
+    helped:                                30
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
+
+commit 94e3864707e48d4b1d5fb5f88a01370a73ddb0cb
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Fri May 15 09:58:42 2015 -0700
+
+    i965: Add and fix comments in brw_vue_map.c.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 38eb9015e3be9b93248e64b6befce16872107a7c
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Fri May 15 09:54:23 2015 -0700
+
+    i965: Split VUE map handling out of brw_vs.c into brw_vue_map.c.
+    
+    This was originally only used by the vertex shader, but it's now used by
+    the geometry shader as well, and will also eventually be used for
+    tessellation control and evaluation shaders.
+    
+    I suspect it will be easier to find in a file named after the concept.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 90754d2df05eafe1a3ee3cd9bb1611a19099fc49
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Wed Jun 3 21:35:51 2015 -0700
+
+    i965/gen9: Implement Push Constant Buffer workaround
+    
+    This implements a workaround (exact excerpt as a comment in the code). The docs
+    specify [clearly, after you struggle for a while] that the offset isn't relative
+    to state base. This actually makes sense. This fixes hangs on SKL.
+    
+    Buffer #0 is meant to be used for normal uniforms.
+    Buffer #1 is typically used for gather constants when using RS.
+    Buffer #1-#3 could be used to push a bunch of UBO data which would just be
+      somewhere in memory, and not relative to the dynamic state.
+    
+    NOTE: I've moved away from the ternary operator for the new gen9 conditions.
+    Admittedly it's probably not great to do this, but I really want to fix this all
+    up in the subsequent patch and doing it here makes that diff a lot nicer. I want
+    to split out the gen8/9 code to make the function a bit more readable, but to
+    keep this easily cherry-pickable I am doing this fix first. If we decide not to
+    merge the cleanup patch then I can revisit this.
+    
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Tested-by: Valtteri Rantala <Valtteri.rantala@intel.com>
+
+commit 2b07b8d104a93c26ac92edb3ba72328cdc2dcb52
+Author: Brian Paul <brianp@vmware.com>
+Date:   Mon Jun 22 08:29:49 2015 -0600
+
+    mesa: use _mesa_lookup_enum_by_nr() in print_array()
+    
+    Print GL_FLOAT, etc. instead of hex value.
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 878714142999ca6a6aa03d962e01da94d44c8574
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 22 14:27:19 2015 +0800
+
+    ilo: emit 3DPRIMITIVE from gen6_3dprimitive_info
+    
+    It allows us to remove ilo_ib_state::draw_start_offset and
+    ILO_PRIM_RECTANGLES.  gen6_3d_translate_pipe_prim() is also replaced by
+    ilo_translate_draw_mode().
+
+commit 58f95b332d0cbad226f5bb2e96cd0cad8864fe79
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 22 14:15:52 2015 +0800
+
+    ilo: align vertex buffer size in buf_create()
+    
+    With ilo_format.[ch] moved out of core, the aligning of vertex buffers does
+    not belong to core anymore.
+
+commit 513bc5d90b8e9237bd6a04da5d0dee175ff134f6
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 22 14:06:13 2015 +0800
+
+    ilo: move ilo_format.[ch] out of core
+    
+    They provide PIPE_FORMAT_x to GEN6_FORMAT_x translation as well as some
+    convenient helpers.  Move them out of core.
+
+commit 3547bb078307995e92d509037bc86af7fd60c8c3
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 22 13:37:05 2015 +0800
+
+    ilo: add ilo_state_surface_valid_format()
+    
+    Check if a surface format can be used for the specified access type.
+
+commit aa3e5e0dded4d732ea46083201940bd23214785c
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 22 13:15:24 2015 +0800
+
+    ilo: add ilo_state_vf_valid_element_format()
+    
+    Check if a surface format can be used as a VE format.
+
+commit da8300cb03e8cf1f37b5573a2db026fd28e0a3c5
+Author: Alexandre Courbot <acourbot@nvidia.com>
+Date:   Fri Oct 17 15:05:32 2014 +0900
+
+    nvc0: use NV_VRAM_DOMAIN() macro
+    
+    Use the newly-introduced NV_VRAM_DOMAIN() macro to support alternative
+    VRAM domains for chips that do not have dedicated video memory.
+    
+    Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Martin Peres <martin.peres@free.fr>
+
+commit f22406837ff5dc881d8496d05ab001204b14eaf5
+Author: Alexandre Courbot <acourbot@nvidia.com>
+Date:   Fri Oct 17 14:58:11 2014 +0900
+
+    nouveau: support for custom VRAM domains
+    
+    Some GPUs (e.g. GK20A, GM20B) do not embed VRAM of their own and use
+    the system memory as a backend instead. For such systems, allocating
+    objects in VRAM results in errors since the kernel will not allow
+    VRAM objects allocations.
+    
+    This patch adds a vram_domain member to struct nouveau_screen that can
+    optionally be initialized to an alternative domain to use for VRAM
+    allocations. If left untouched, NOUVEAU_BO_VRAM will be used for
+    systems that embed VRAM, and NOUVEAU_BO_GART will be used for VRAM-less
+    systems.
+    
+    Code that uses GPU objects is then expected to use the NV_VRAM_DOMAIN()
+    macro in place of NOUVEAU_BO_VRAM to ensure correct behavior on
+    VRAM-less chips.
+    
+    Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Martin Peres <martin.peres@free.fr>
+
+commit 57bdcae9e0fbf639014cd375543a8dd356406ac0
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Sat Jun 20 23:27:08 2015 +0800
+
+    ilo: add ilo_state_compute
+    
+    Replace gen6_idrt_data with ilo_state_compute, which has a bunch of
+    validations and is now preferred.
+
+commit 2bf5a4211ef305d90ca6133ca09c3b79e6088d50
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Mon Jun 22 13:36:41 2015 +1000
+
+    r600g: ignore sampler views for now.
+    
+    This fixes a regression in that r600 stopped working when
+    sampler views were pushed.
+    
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 66a93a0ff9aa402c37aa9d00b4489715d611b496
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat Jun 13 09:14:31 2015 -0400
+
+    freedreno/ir3: pass sz to split_dest()
+    
+    For query_levels, we generate a getinfo with writemask of (z), which RA
+    will consider as size==3.  But we were still generating four fanouts.
+    Which meant that RA would see it as two different register classes,
+    depending on the path to definer.  Ie. on the getinfo instruction itself
+    it would see size==3, but when chasing back through the fanouts it would
+    see size==4.
+    
+    Easiest way to solve that is to just generate the chain of neighboring
+    fanouts to have the correct size in the first place.
+    
+    Note: we may eventually want split_dest() to take start/end or wrmask
+    instead, since really we only need size==1.  But RA is not clever enough
+    for that, query_levels is not that common, and the other two registers
+    that get allocated are never used so those register slots can be
+    immediately re-used.  So bunch of work for probably no real gain.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 1ee4d51e7a68f8f2dcb52a0e2f9af81fdbe078a2
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri Jun 12 14:27:44 2015 -0400
+
+    freedreno/ir3/nir: add more opcodes
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 43048c7093c367897fbcbb3ca8580e9c122cd6f6
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon Jun 8 14:45:47 2015 -0400
+
+    freedreno/ir3: only unminify txf coords on a3xx
+    
+    Seems like a4xx gets this right.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 0f008082b184072159e5aedc7fc103efba8740ed
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon Jun 8 14:23:49 2015 -0400
+
+    freedreno: remove int sampler shader variants
+    
+    We get this information from NIR (which gets it from sview decl in tgsi
+    when translating from tgsi), so no need to maintain shader variants for
+    this.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 457f7c2a2a93b45396ac66e0d4b3896d2db8fdf3
+Author: Rob Clark <robdclark@gmail.com>
+Date:   Tue Jun 9 17:17:06 2015 -0400
+
+    freedreno/ir3: block reshuffling and loops!
+    
+    This shuffles things around to allow the shader to have multiple basic
+    blocks.  We drop the entire CFG structure from nir and just preserve the
+    blocks.  At scheduling we know whether to schedule conditional branches
+    or unconditional jumps at the end of the block based on the # of block
+    successors.  (Dropping jumps to the following instruction, etc.)
+    
+    One slight complication is that variables (load_var/store_var, ie.
+    arrays) are not in SSA form, so we have to figure out where to put the
+    phi's ourself.  For this, we use the predecessor set information from
+    nir_block.  (We could perhaps use NIR's dominance frontier information
+    to help with this?)
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 660d5c1646f5d63f9626b24beabc9cfc318849d4
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon Jun 1 12:35:19 2015 -0400
+
+    freedreno/ir3: a4xx encodes larger immed offset
+    
+    Without this, negative branch/jump offsets look like very large positive
+    offsets.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit d646d3ae9d221104db0e9daec33ef470b1bdd957
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon May 25 10:59:21 2015 -0400
+
+    freedreno/ir3: simplify find_neighbors stop condition
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit c8fb5f8a011e1db78af3ceaf91c5cb3b1acaee14
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon May 25 10:30:54 2015 -0400
+
+    freedreno/ir3: move inputs/outputs to shader
+    
+    These belong in the shader, rather than the block.  Mostly a lot of
+    churn and nothing too interesting.  But splitting this out from the
+    rest of ir3_block reshuffling to cut down the noise in the later
+    patch.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit d52fb2f5ad828f879286b9068023b82b9897bc17
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Fri May 1 12:21:12 2015 -0400
+
+    freedreno/ir3/ra: use register_allocate
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 694beb8b830c993e9bfb744655be3dbd558ab3a8
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat May 23 13:37:41 2015 -0400
+
+    freedreno/ir3: introduce ir3_compiler object
+    
+    Right now, just provides a cleaner way to get at the gpu-id, given the
+    separation between compiler and context.  But we will need this also to
+    hold the reg-set for new register allocation.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 5c1e153467a50dec91df49239654017e9ed86d69
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat Apr 25 16:30:55 2015 -0400
+
+    freedreno/ir3: dump nocp option
+    
+    No longer used, or even possible, with NIR frontend.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 7674ab12e826d2ea33f13fb2e6ca8ae2a62fe460
+Author: Rob Clark <robdclark@gmail.com>
+Date:   Tue Jun 9 17:42:16 2015 -0400
+
+    freedreno/ir3: silence warnings
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 0f6faa8ff317634ffb75e6040f2de2019dd80d13
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat Apr 25 10:22:49 2015 -0400
+
+    freedreno/ir3: remove tgsi f/e
+    
+    Also remove ir3_flatten which was only used by tgsi f/e.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 7273cb4e933f8be65fc73b9d8c69c76d1078cb14
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Apr 30 13:57:15 2015 -0400
+
+    freedreno/ir3/sched: convert to priority queue
+    
+    Use a more standard priority-queue based scheduling algo.  It is simpler
+    and will make things easier once we have multiple basic blocks and flow
+    control.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit adf1659ff5f07d907eca552be3b566e408c8601e
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Apr 30 11:38:43 2015 -0400
+
+    freedreno/ir3: use standard list implementation
+    
+    Use standard list_head double-linked list and related iterators,
+    helpers, etc, rather than weird combo of instruction array and next
+    pointers depending on stage.  Now block has an instrs_list.  In
+    certain stages where we want to remove and re-add to the blocks list
+    we just use list_replace() to copy the list to a new list_head.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 67d994c6761e09205dbc9a0515c510fc9dde02c7
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Apr 30 10:10:14 2015 -0400
+
+    freedreno/ir3: drop dot graph dumping
+    
+    At least for now.. right now the instruction and instruction list
+    printing should suffice, and the re-working of ir3_block would require
+    a lot of changes in that code.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit 5c8c2e2f97394436effbdd3e0f61eec4590accb2
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Sat Apr 25 11:05:27 2015 -0400
+
+    freedreno/ir3: more builder helpers
+    
+    Use ir3_MOV() builder in a couple of spots, rather than open-coding the
+    instruction construction.  Also add ir3_NOP() builder and use that
+    instead of open coding.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit b33015f8895a37fcae1da2984796cb1ef30f8b13
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Thu Apr 30 15:20:03 2015 -0400
+
+    gallium/ttn: add missing SNE
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit c79b2e626c60a29f684bc389f07a712b59fa99cc
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Apr 29 08:38:45 2015 -0400
+
+    util/list: add list_first/last_entry
+    
+    I need an easier way to get at head/tail in ir3.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+
+commit b3d2e367167b675c0b402c90220f40f8cd567d7c
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon Jun 8 14:09:09 2015 -0400
+
+    gallium/ttn: add texture-type support
+    
+    v2: rebased on using SVIEW to hold type information
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit cb258c1dec1ff348d508a6b02fbc9aa11eb9f829
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Mon Jun 8 13:20:30 2015 -0400
+
+    glsl_to_tgsi: add SVIEW decl support
+    
+    Freedreno needs sampler type information to deal with int/uint textures.
+    To accomplish this, start creating sampler-view declarations, as
+    suggested here:
+    
+     http://lists.freedesktop.org/archives/mesa-dev/2014-November/071583.html
+    
+    create a sampler-view with index matching the sampler, to encode the
+    texture type (ie. SINT/UINT/FLOAT).  Ie:
+    
+       DCL SVIEW[n], 2D, UINT
+       DCL SAMP[n]
+       TEX OUT[1], IN[1], SAMP[n]
+    
+    For tgsi texture instructions which do not take an explicit SVIEW
+    argument, the SVIEW index is implied by the SAMP index.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 93379748f7e4f5ab22040cdb7a4cccdcfb7954c1
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Jun 10 20:02:55 2015 -0400
+
+    util/blitter (and friends): generate appropriate SVIEW decls
+    
+    Some hardware needs to know the sampler type.  Update the blit related
+    shaders to include SVIEW decl.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit e53699298640df7d7659a8ce88b68e43918b600c
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Jun 10 20:01:11 2015 -0400
+
+    util/pstipple: updates for SVIEW decls
+    
+    To allow for shaders which use SVIEW decls for TEX* instructions, we
+    need to preserve the constraint that the shader either has no SVIEW's or
+    it has one matching SVIEW for each SAMP.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit b516e68afb548894eff6b1f375c01f6dfafb6aed
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Jun 10 19:59:20 2015 -0400
+
+    draw: updates to support SVIEW decls
+    
+    To allow for shaders which use SVIEW decls for TEX* instructions, we
+    need to preserve the constraint that the shader either has no SVIEW's or
+    it has one matching SVIEW for each SAMP.
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit f481af110e6ab42b2d184f225bfe7eb1e66df393
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Jun 10 19:51:32 2015 -0400
+
+    tgsi/transform: add support for SVIEW decls
+    
+    TODO single return_type (use enum)
+    
+    v2: single return_type arg, and use enum
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit b13135e06671468d296a33abf4150060f2b2a061
+Author: Rob Clark <robclark@freedesktop.org>
+Date:   Wed Jun 10 19:49:55 2015 -0400
+
+    tgsi: update docs for SVIEW usage with TEX* instructions
+    
+    Based on mailing list discussion here:
+    
+    http://lists.freedesktop.org/archives/mesa-dev/2014-November/071583.html
+    
+    Signed-off-by: Rob Clark <robclark@freedesktop.org>
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 717376155d2082d7bf94122a1e1d383b39e0b070
+Author: Eric Anholt <eric@anholt.net>
+Date:   Sat Jun 20 15:02:50 2015 -0700
+
+    mesa: Back out an accidental change I had in a VC4 commit.
+    
+    This was a hack as part of debugging some glamor-on-GLES2 behavior that
+    ended up being an xserver bug.  I suspect we can just flip this extension
+    on for GLES2, but the spec says it requires 3.1.
+
+commit 104bff037665075aa2b92964ad2895f45d9a5866
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Jun 20 16:40:56 2015 +0100
+
+    docs: add news item and link release notes for mesa 10.5.8
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit aa28423bcc7cd6b5f2c5f9c8f1a385a79469a439
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Jun 20 16:37:16 2015 +0100
+
+    docs: Add sha256sums for the 10.5.8 release
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit a81b1d5512f64ffca1c13a5937e7eb0de24713ae)
+
+commit 97caf2054f6ebd3106ed22ef73622483ef193bf7
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat Jun 20 15:14:45 2015 +0100
+
+    Add release notes for the 10.5.8 release
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 24b043aab73ce066ded6e4bc93f589008dfc8484)
+
+commit c00903867417f1522047b7c50ea9248e1aa2f50c
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jun 19 19:47:44 2015 -0700
+
+    vc4: Use a defined t value for 1D textures.
+    
+    This doesn't fix the broken 1D cases of texsubimage, but it does prevent
+    segfaulting when dumping the QIR code generated in fbo-1d.
+
+commit bb107110a4d97191841985076dd9f2fbd0937dfc
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jun 19 19:41:25 2015 -0700
+
+    vc4: Fix write-only texsubimage when we had to align.
+    
+    We need to make sure that when we store the aligned box, we've got
+    initialized contents in the border.  We could potentially just load the
+    border area, but for now let's get text rendering working in X (and fix
+    the GL_TEXTURE_2D errors in piglit's texsubimage test and
+    gl-2.1-pbo/test_tex_image)
+
+commit 028590cbc758e877b963ba430f0a0cb49e882a6b
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Thu Jun 18 22:48:14 2015 +0800
+
+    ilo: clean up header includes
+    
+    Core is more self-contained now.
+
+commit 244caba2502402b93876cb89952ac05e6d87c5b2
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Sat Jun 20 00:34:29 2015 +0800
+
+    ilo: avoid ilo_ib_state in genX_3DPRIMITIVE()
+    
+    ilo_ib_state is not in core.
+
+commit dcb5bad3a3a8ff116c32ecb01827ea8461fa2baa
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Thu Jun 18 22:47:20 2015 +0800
+
+    ilo: move gen6_so_SURFACE_STATE() out of core
+    
+    It does not belong to core.
+
+commit e3372c4bfb8d5960714651ca7d3f1acc0018a8fa
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 15 15:17:45 2015 +0800
+
+    ilo: add ilo_state_sol_buffer
+    
+    It serves the same purpose as ilo_state_vertex_buffer does.
+
+commit 9904e647cca0a15c80557ed7bcc6893faf147436
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri Jun 19 15:10:02 2015 +0800
+
+    ilo: add ilo_state_index_buffer
+    
+    It serves the same purpose as ilo_state_vertex_buffer does.
+
+commit da4878cb807f46e6053731a177c3c75497aaf4fb
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri Jun 19 15:06:50 2015 +0800
+
+    ilo: add ilo_state_vertex_buffer
+    
+    Being a parameter-like state, we may want to get rid of
+    ilo_state_vertex_buffer_info or ilo_state_vertex_buffer eventually.  But we
+    want them now as they are how we do cross-validation right now.
+
+commit 4555211028394673f8ad68f3de9c12e9a1f93160
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Thu Jun 18 14:26:29 2015 +0800
+
+    ilo: add 3DSTATE_VF_INSTANCING to ilo_state_vf
+    
+    3DSTATE_VF_INSTANCING specifies instancing enable and step rate.  They are
+    specified along with 3DSTATE_VERTEX_BUFFERS instead prior to Gen8.  Both
+    commands are added.
+
+commit e8d297b7a108fcf1cb688fe1db89e83b8f85e091
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Tue Jun 16 23:11:06 2015 +0800
+
+    ilo: add 3DSTATE_VF to ilo_state_vf
+    
+    3DSTATE_VF specifies cut index enable and cut index.  Cut index enable is
+    specified in 3DSTATE_INDEX_BUFFER instead prior to Gen7.5.  Both commands are
+    added.
+
+commit 7b3432b62d25494b3662d12634e34d75e29ec865
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Thu Jun 18 13:55:32 2015 +0800
+
+    ilo: embed pipe_index_buffer in ilo_ib_state
+    
+    Make it obvious that we save a copy of pipe_index_buffer.
+
+commit 73f0d6d22db21f1fa553d8a26687edc5083e3c23
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri Jun 19 23:29:32 2015 +0800
+
+    ilo: fix a buffer overrun
+    
+    Add missing parentheses in SURFTYPE_NULL initialization.
+
+commit aa3ec8bc465f8c82cb38e0ed067dbdd9122dbd44
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri Jun 19 23:24:17 2015 +0800
+
+    ilo: fix a -Wmaybe-uninitialized warning
+    
+    ilo_shader.c: In function ‘ilo_shader_select_kernel_sbe’:
+    ilo_shader.c:1140:27: warning: ‘src_skip’ may be used uninitialized in this
+    function [-Wmaybe-uninitialized]
+
+commit a1f84453a2f104a92f9efd353c629177e00e4b5e
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri Jun 19 16:45:44 2015 -0600
+
+    glsl: fix formatting glitch in _mesa_print_ir()
+    
+    Print the closing ) before the newline.  Trivial.
+
+commit 7c3da3592e8799059abca9cd7c92d61ebfd09f29
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Thu Jun 18 18:45:47 2015 -0700
+
+    i965/gen8: Use HALIGN_16 for single sample mcs buffers
+    
+    The original code meant to do this, but was only checking num_samples == 1 to
+    figure out if a surface was fast clear capable. However, we can allocate single
+    sample miptrees with num_samples == 0 (when it's an internally created buffer).
+    
+    This fixes a bunch of the piglit tests on gen8. Other gens should have been
+    fine.
+    
+    Here is the order of events that allowed this to slip through:
+    t0: I wrote halign patches and tested them. These alignment assertions are for
+       gen8 fast clear surfaces, basically.
+    t1: I pushed bogus perf patch which made fast clears never happen
+    t2: Reworked halign patches based on Chad's feedback and introduced the bug this
+       patch fixes.
+    t2.5: I tested reworked patches, but assertion wasn't hit because of t1.
+    t3. Matt fixed issue in t1 which made fast clears happen here:
+    commit 22af95af8316f2888a3935cdf774ff0997b3dd42
+    Author: Matt Turner <mattst88@gmail.com>
+    Date:   Thu Jun 18 16:14:50 2015 -0700
+    
+        i965: Add missing braces around if-statement.
+    
+    This logic should match that of the v1 of my halign patch series.
+    
+    Cc: Kenneth Graunke <kenneth@whitecape.org>
+    Cc: Matt Turner <mattst88@gmail.com>
+    Reported-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Tested-by: Mark Janes <mark.a.janes@intel.com>
+
+commit 539cb2b76efd02f14798cad0a5462ee9ed27aa83
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Jun 19 12:08:24 2015 -0400
+
+    mesa: move ARB_gs5 enums to core, EXT_polygon_offset_clamp to desktop
+    
+    When adding EXT_polygon_offset_clamp, I first made it core-only, and
+    never moved the enum getter back to the GL/GL_CORE section. Similarly,
+    ARB_gs5 is a core-only extension, so move its getters to the GL_CORE
+    section.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 6ec4e9c28d54877fbaca04b080c249048c6e7634
+Author: Brian Paul <brianp@vmware.com>
+Date:   Thu Jun 18 18:03:29 2015 -0600
+
+    u_vbuf: fix src_offset alignment in u_vbuf_create_vertex_elements()
+    
+    If the driver says PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY=1,
+    the driver should never receive a pipe_vertex_element::src_offset value
+    that's not a multiple of four.  But the vbuf code wasn't actually adjusting
+    the src_offset value when creating the vertex element state object.
+    
+    We just need to align the src_offset values put in the driver_attribs[]
+    array.
+    
+    See the piglit gl-1.5-vertex-buffer-offsets test.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit c40f44cc991d9499243063cba95fbdc947c53371
+Author: Brian Paul <brianp@vmware.com>
+Date:   Thu Jun 18 17:53:42 2015 -0600
+
+    gallium: whitespace, formatting clean-up in p_state.h
+    
+    Remove trailing whitespace, move some braces, 78-column wrapping.
+    Trivial.
+
+commit 4c11008eba9f58621bbbae430f8717176045b0ce
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jun 16 15:32:46 2015 -0600
+
+    st/wgl: fix WGL_SWAP_METHOD_ARB query
+    
+    There are three possible return values (not two): WGL_SWAP_COPY_ARB,
+    WGL_SWAP_EXCHANGE_EXT and WGL_SWAP_UNDEFINED_ARB.
+    
+    VMware bug 1431184
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+    Reviewed-by: Charmaine Lee <charmainel@vmware.com>
+
+commit 73bdf4ba86751983dff011ac488ac60321d70a7f
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jun 16 15:32:46 2015 -0600
+
+    stw: use new stw_get_nop_function() function to avoid Viewperf 12 crashes
+    
+    Also, print a warning if we do return NULL from wglGetProcAddress() to
+    help spot this sort of problem in the future.
+    
+    Reviewed-by: José Fonseca <jfonseca@vmware.com>
+
+commit 8d005a643ed94c1871ec854bc069366cdda6581f
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jun 16 15:32:46 2015 -0600
+
+    stw: add some no-op functions for GL_EXT_dsa, GL_NV_half_float
+    
+    Viewperf 12 calls wglGetProcAddress() to get pointers to some unsupported
+    DSA and half-float functions.  We return NULL but Viewperf doesn't check
+    for null before trying to jump through the pointer.  That causes a crash.
+    
+    This patch adds no-op functions to call instead (used by the next patch).
+    This avoids the crash but the rendering is incorrect.
+    
+    Some DSA functions are being added to Mesa at this time so we may be
+    able to remove some of these no-ops in the future.
+    
+    More no-op functions may be added as needed.
+    
+    VMware PR1383421
+    
+    Reviewed-by: José Fonseca <jfonseca@vmware.com>
+
+commit eee9247018d710659f14678715a85e4ad6f54366
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Tue Jun 16 15:32:46 2015 -0600
+
+    st/wgl: Don't return core profile for 3.1 contexts.
+    
+    WGL_CONTEXT_PROFILE_MASK_ARB doesn't apply to desktop OpenGL versions
+    less than 3.2 -- applications can't specify whether they want a core or
+    a compat 3.1 context -- instead they are supposed the check whether the
+    returned context advertises GL_ARB_compatibility extension.
+    
+    Mesa doesn't support compatability contexts for version higher than 3.1,
+    so we used to return core profile context, but this makes several Windows
+    applications unhappy, because they just assume they got a compatability
+    context without checking.
+    
+    So it seems safer to on Windows to never return core profile for 3.1,
+    ie, just fail the context creation.
+    
+    VMware PR1365920.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 528bd94432b20becc1f436da75f7a102416dabeb
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jun 16 15:32:46 2015 -0600
+
+    st/wgl: set PIPE_BIND_SAMPLER_VIEW for window color buffers
+    
+    To allow sampling from the surface for things like glCopyPixels
+    or glCopyTexSubImage.
+    
+    Reviewed-by: Charmaine Lee <charmainel@vmware.com>
+
+commit 9405c1b3b0b207409931166a608276198a068cb8
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jun 16 15:32:45 2015 -0600
+
+    st/wgl: add support for multisample pixel formats
+    
+    Create pixel formats with 0, 4, 8 and 16 samples per pixel.
+    Add a SVGA_FORCE_MSAA env var to force creating all pixel formats
+    with a particular sample count.  This is useful for testing Mesa/GLUT/
+    etc. programs which don't ordinarily use multisample.
+    
+    Reviewed-by: Matthew McClure <mcclurem@vmware.com>
+
+commit 0925e5f5bc843237e534313dd5b99095ecbdd987
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jun 16 15:32:45 2015 -0600
+
+    st/wgl: respect sample count when creating framebuffer surfaces
+    
+    Use the visual/pixel format's sample count instead of zero.
+    
+    Reviewed-by: Matthew McClure <mcclurem@vmware.com>
+
+commit b8249de646e75f8af0c84d7f06b7805fe555e13e
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jun 16 15:32:45 2015 -0600
+
+    st/wgl: fix WGL_SAMPLE_BUFFERS_ARB query
+    
+    Only report 1 for WGL_SAMPLE_BUFFERS_ARB if the number of samples
+    per pixel > 1.
+    
+    Reviewed-by: Matthew McClure <mcclurem@vmware.com>
+
+commit 5ad5d44af57a815c6eb16d4d61070135acb55f37
+Author: Brian Paul <brianp@vmware.com>
+Date:   Sat Jun 13 08:07:08 2015 -0600
+
+    tgsi: add comments for ureg_emit_label()
+
+commit 12c1c0706d4356819cfbaa15c3d71402a42e3539
+Author: Brian Paul <brianp@vmware.com>
+Date:   Sat Jun 13 07:58:53 2015 -0600
+
+    tgsi: new comments, assertion for executing TGSI_OPCODE_CAL
+
+commit 2ce2b80c6fd7ba5effbdf86ca3affe10a9c70492
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Fri Jun 19 13:03:36 2015 +1000
+
+    docs: update developer info
+    
+    Update piglit link to the current Piglit website.
+    
+    Add note about updating patchwork when sending patch revisions.
+    
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit afeb92220690c8f27cdc56c30e109ca175d51d83
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Thu Jun 18 15:47:00 2015 +0100
+
+    llvmpipe: Truncate the binned constants to max const buffer size.
+    
+    Tested with Ilia Mirkin's gzdoom.trace and
+    "arb_uniform_buffer_object-maxuniformblocksize fsexceed" piglit test
+    without my earlier fix to fail linkage when UBO exceeds
+    GL_MAX_UNIFORM_BLOCK_SIZE.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit f734d2556013e9239e91f43b563b5b1d8f03ada4
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Mon Jun 15 18:29:02 2015 +0100
+
+    glsl: Fail linkage when UBO exceeds GL_MAX_UNIFORM_BLOCK_SIZE.
+    
+    It's not totally clear whether other Mesa drivers can safely cope with
+    over-sized UBOs, but at least for llvmpipe receiving a UBO larger than
+    its limit causes problems, as it won't fit into its internal display
+    lists.
+    
+    This fixes piglit "arb_uniform_buffer_object-maxuniformblocksize
+    fsexceed" without regressions for llvmpipe.
+    
+    NVIDIA driver also fails to link the shader from
+    "arb_uniform_buffer_object-maxuniformblocksize fsexceed".
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=65525
+    
+    PS: I don't recommend cherry-picking this for Mesa stable, as some app
+    might inadvertently been relying on UBOs larger than
+    GL_MAX_UNIFORM_BLOCK_SIZE to work on other drivers, so even if this
+    commit is universally accepted it's probably best to let it mature in
+    master for a while.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit 5974841fd0be7e2c336f63bd9ef416723e1923dc
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Jun 18 19:08:24 2015 -0400
+
+    glsl: guard gl_NumSamples enablement on ARB_sample_shading
+    
+    gl_NumSamples should only be enabled when ARB_sample_shading is enabled.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit 22af95af8316f2888a3935cdf774ff0997b3dd42
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Thu Jun 18 16:14:50 2015 -0700
+
+    i965: Add missing braces around if-statement.
+    
+    Fixes a performance problem caused by commit b639ed2f.
+    
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90895
+
+commit 2310a65c28f809442c24fc8893c65ce7c7a4dca3
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Tue Jun 16 14:27:15 2015 -0700
+
+    i965/compute: Fix undefined code with right_mask for SIMD32
+    
+    Although we don't support SIMD32, krh pointed out that the left shift
+    by 32 is undefined by C/C++ for 32-bit integers.
+    
+    Suggested-by: Kristian Høgsberg <krh@bitplanet.net>
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 770f141866654dab969302f720228497f0fb35fd
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Jun 17 23:00:44 2015 -0400
+
+    mesa: add GL_PROGRAM_PIPELINE support in KHR_debug calls
+    
+    This was apparently missed when ARB_sso support was added.
+    Add label support to pipeline objects just like all the other
+    debug-related objects.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit b6e238023c4f8af2328dc3bcab1d73a3e19f4fbb
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Jun 17 15:09:26 2015 -0400
+
+    glsl: add version checks to conditionals for builtin variable enablement
+    
+    A number of builtin variables have checks based on the extension being
+    enabled, but were missing enablement via a higher GLSL version.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit c40e7ee7c47cb24264fd77ef37fab99dea4c299a
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Jun 17 15:07:14 2015 -0400
+
+    glsl: handle conversions to double when comparing param matches
+    
+    This allows mod(int, int) to become selected as float mod when doubles
+    are supported.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 6b0378e483ba53359545ac8b774dbdd81c2fab3f
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 18 12:59:28 2015 +0100
+
+    ilo: remove missing ilo_fence.h from the sources list
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 997fc807b2f71ef65b4601d6db33d0f912c18d3f
+Author: Boyan Ding <boyan.j.ding@gmail.com>
+Date:   Tue Jun 16 11:08:33 2015 +0800
+
+    egl/x11: Set version of swrastLoader to 2
+    
+    which it actually implements instead of the newest version defined in
+    dri_interface.h
+    
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 1d45e44b2f9e52d6eebe84ab08da6b7393011f95
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jun 17 13:24:06 2015 -0700
+
+    vc4: Move tile state/alloc allocation into the kernel.
+    
+    This avoids a security issue where userspace could have written the tile
+    state/tile alloc behind the GPU's back, and will apparently be necessary
+    for fixing stability bugs (tile state buffers are missing some top bits
+    for the tile alloc's address).
+
+commit 9adcd2d80aceec90b9c3712b53d8e7839dc5634b
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jun 10 12:36:47 2015 -0700
+
+    vc4: Move RCL generation into the kernel.
+    
+    There weren't that many variations of RCL generation, and this lets us
+    skip all the in-kernel validation for what we generated.
+
+commit 91c73a9a280b749a781cd3f071fc377fcb9758e1
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jun 17 13:51:55 2015 -0700
+
+    vc4: Add dumping of VC4_PACKET_TILE_BINNING_MODE_CONFIG.
+
+commit dc1fbad2eb5454ed36a066d2a69b575cd5a8abaf
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jun 17 23:49:19 2015 -0700
+
+    vc4: Fix memory leak from simple_list conversion.
+    
+    I accidentally shadowed the outside declaration, so we always returned
+    NULL even when we'd found something in the cache.
+
+commit 62d153ea37b1bf572c39aab8ec46099fc903362d
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jun 17 22:56:15 2015 -0700
+
+    vc4: Track the number of BOs allocated and their size.
+    
+    This is useful for BO leak debugging.
+
+commit 2b1cdb0eddb73f62e4848d4b64840067f1f70865
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Tue Feb 24 19:02:50 2015 +0100
+
+    i965: Fix textureGrad with cube samplers
+    
+    We can't use sampler messages with gradient information (like
+    sample_g or sample_d) to deal with this scenario because according
+    to the PRM:
+    
+    "The r coordinate and its gradients are required only for surface
+    types that use the third coordinate. Usage of this message type on
+    cube surfaces assumes that the u, v, and gradients have already been
+    transformed onto the appropriate face, but still in [-1,+1] range.
+    The r coordinate contains the faceid, and the r gradients are ignored
+    by hardware."
+    
+    Instead, we should lower this to compute the LOD manually based on the
+    gradients and use a different sample message that takes the computed
+    LOD instead of the gradients. This is already being done in
+    brw_lower_texture_gradients.cpp, but it is restricted to shadow
+    samplers only, although there is a comment stating that we should
+    probably do this also for samplerCube and samplerCubeArray.
+    
+    Because of this, both dEQP and Piglit test cases for textureGrad with
+    cube maps currently fail.
+    
+    This patch does two things:
+    1) Activates the texturegrad lowering pass for all cube samplers.
+    2) Corrects the computation of the LOD value for cube samplers.
+    
+    I had to do 2) because for cube maps the calculations implemented
+    in the lowering pass always compute a value of rho that is twice
+    the value we want (so we get a LOD value one unit larger than we
+    want). This only happens for cube map samplers (all kinds). I am
+    not sure about why we need to do this, but I suspect that it is
+    related to the fact that cube map coordinates, when transported
+    to a specific face in the cube, are in the range [-1, 1] instead of
+    [0, 1] so we probably need to divide the derivatives by 2 when
+    we compute the LOD. Doing that would produce the same result as
+    dividing the final rho computation by 2 (or removing a unit
+    from the computed LOD, which is what we are doing here).
+    
+    Fixes the following piglit tests:
+    bin/tex-miplevel-selection textureGrad Cube -auto -fbo
+    bin/tex-miplevel-selection textureGrad CubeArray -auto -fbo
+    bin/tex-miplevel-selection textureGrad CubeShadow -auto -fbo
+    
+    Fixes 10 dEQP tests in the following category:
+    dEQP-GLES3.functional.shaders.texture_functions.texturegrad.*cube*
+    
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 36e3eb6a957f8f20ed187ec88a067fc65cb81432
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Wed Jun 17 22:18:09 2015 -0400
+
+    nvc0/ir: can't have a join on a load with an indirect source
+    
+    Triggers an INVALID_OPCODE warning on GK208. Seems rare enough to not
+    warrant verification on other chips. Fixes the new piglits:
+    
+      ubo_array_indexing/fs-nonuniform-control-flow.shader_test
+      ubo_array_indexing/vs-nonuniform-control-flow.shader_test
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit ff06901082b84c91ee64d3a54bf372f0c809f4bf
+Author: Kevin Rogovin <kevin.rogovin@intel.com>
+Date:   Wed Jun 17 13:29:59 2015 +0300
+
+    docs: mark GL_ARB_framebuffer_no_attachments done for i965
+    
+    Mark GL_ARB_framebuffer_no_attachments as done for i965.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Signed-off-by: Kevin Rogovin <kevin.rogovin@intel.com>
+
+commit 83199998310591b9162ab12e922ed79ee235b5c8
+Author: Kevin Rogovin <kevin.rogovin@intel.com>
+Date:   Wed Jun 17 13:29:58 2015 +0300
+
+    i965: enable ARB_framebuffer_no_attachments for Gen7+
+    
+    Enable GL_ARB_framebuffer_no_attachments in i965 for Gen7 and higher.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Signed-off-by: Kevin Rogovin <kevin.rogovin@intel.com>
+
+commit 9ded6369754910f7f58f896c1627ba0bbfb0f864
+Author: Kevin Rogovin <kevin.rogovin@intel.com>
+Date:   Wed Jun 17 13:29:57 2015 +0300
+
+    i965: execution of frag-shader when it has atomic buffer
+    
+    Ensure that the GPU spawns the fragment shader thread for those
+    fragment shaders with atomic buffer access.
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Signed-off-by: Kevin Rogovin <kevin.rogovin@intel.com>
+
+commit bbb700967e9991a03ed6e8073c9bdc2ca0d1381d
+Author: Kevin Rogovin <kevin.rogovin@intel.com>
+Date:   Wed Jun 17 13:29:56 2015 +0300
+
+    mesa: function for testing if current frag-shader has atomics
+    
+    Add helper function that checks if current fragment shader active
+    of gl_context has atomic buffer access.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Signed-off-by: Kevin Rogovin <kevin.rogovin@intel.com>
+
+commit 41b6db225f42a5d81beec1b4455ec7b504e2416d
+Author: Kevin Rogovin <kevin.rogovin@intel.com>
+Date:   Wed Jun 17 13:29:55 2015 +0300
+
+    i965: Use _mesa_geometric_ functions appropriately
+    
+    Change references to gl_framebuffer::Width, Height, MaxNumLayers
+    and Visual::samples to use the _mesa_geometry_ convenience functions
+    for those places where the geometry of the gl_framebuffer is needed
+    (in contrast to the geometry of the intersection of the attachments
+    of the gl_framebuffer).
+    
+    This patch is to pave the way to enable GL_ARB_framebuffer_no_attachments
+    on Gen7 and higher in i965.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Signed-off-by: Kevin Rogovin <kevin.rogovin@intel.com>
+
+commit 51f4b51151cb08988b5de466f3c2348876784cc5
+Author: Kevin Rogovin <kevin.rogovin@intel.com>
+Date:   Wed Jun 17 13:29:54 2015 +0300
+
+    mesa: helper function for scissor box of gl_framebuffer
+    
+    Add helper convenience function that intersects the scissor values
+    against a passed bounding box. In addition, to avoid replicated code,
+    make the function _mesa_scissor_bounding_box() use this new function.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Signed-off-by: Kevin Rogovin <kevin.rogovin@intel.com>
+
+commit 74987977a36a7111281e8fb53568dc05dbd3a8b4
+Author: Kevin Rogovin <kevin.rogovin@intel.com>
+Date:   Wed Jun 17 13:29:53 2015 +0300
+
+    mesa: add helper functions for geometry of gl_framebuffer
+    
+    Add convenience helper functions for fetching geometry of gl_framebuffer
+    that return the geometry of the gl_framebuffer instead of the geometry of
+    the buffers of the gl_framebuffer when then the gl_framebuffer has no
+    attachments.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Signed-off-by: Kevin Rogovin <kevin.rogovin@intel.com>
+
+commit 6aa12994bdf0068a9804204a8f1b197cc0f46ec6
+Author: Kevin Rogovin <kevin.rogovin@intel.com>
+Date:   Wed Jun 17 13:29:52 2015 +0300
+
+    PATCH 03/10] mesa: Complete ARB_framebuffer_no_attachments in Mesa core
+    
+    Implement GL_ARB_framebuffer_no_attachments in Mesa core
+     - changes to conditions for framebuffer completenss
+     - implement set/get functions for framebuffers for
+       new functions in GL_ARB_framebuffer_no_attachments
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Signed-off-by: Kevin Rogovin <kevin.rogovin@intel.com>
+
+commit c9d26f201aca58c72629d1ba1bb13c32c158d9dd
+Author: Kevin Rogovin <kevin.rogovin@intel.com>
+Date:   Wed Jun 17 13:29:51 2015 +0300
+
+    mesa: Constants and functions for ARB_framebuffer_no_attachments
+    
+    Define the enumeration constants, function entry points and
+    glGet for the GL_ARB_framebuffer_no_attachments.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Signed-off-by: Kevin Rogovin <kevin.rogovin@intel.com>
+
+commit da81999bee7b1f1bc0bb296e903deb03617ae22c
+Author: Kevin Rogovin <kevin.rogovin@intel.com>
+Date:   Wed Jun 17 13:29:50 2015 +0300
+
+    mesa: Define infrastructure for ARB_framebuffer_no_attachments
+    
+    Define the infrastructure for the extension GL_ARB_framebuffer_no_attachments:
+     - extension table
+     - additions to gl_framebuffer
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Signed-off-by: Kevin Rogovin <kevin.rogovin@intel.com>
+
+commit a0cd1a4060fdb55a57609b460629c7059bbe7047
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Jun 15 15:05:36 2015 -0700
+
+    vc4: Make sure that direct texture clamps have a minimum value of 0.
+    
+    I was thinking of the MIN opcode in terms of unsigned math, but it's
+    signed, so if you used a negative array index, you could read before the
+    UBO.  Fixes segfaults under simulation in piglit array indexing tests with
+    mprotect-based guard pages.
+
+commit d4d27361499cac73da4716b571519ecb71cef551
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Jun 15 17:47:12 2015 -0700
+
+    vc4: Swap around which src we spill to ra31/rb31.
+    
+    I wanted to assert that src1 came from a non-unspilled register in shader
+    validation, and this easily gets us that.  And, as a bonus:
+    
+    total instructions in shared programs: 93347 -> 92723 (-0.67%)
+    instructions in affected programs:     60524 -> 59900 (-1.03%)
+
+commit 507f3e708cbd10a4272aeffa0f066f1a80b48239
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jun 16 12:03:10 2015 -0700
+
+    vc4: R4 is not a valid register for clamped direct texturing.
+    
+    Our array only goes to R3, and R4 is a special case that shouldn't be
+    used.
+
+commit 2eac356467cef898ed05d0699077d9a9f4fa9156
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Jun 15 14:54:26 2015 -0700
+
+    vc4: Factor out the live clamp register getter.
+
+commit 596532cc7d477671f87116e0788b4214ae1d0559
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Jun 15 11:41:06 2015 -0700
+
+    vc4: Drop the unused "stride" field of surfaces.
+    
+    We're always looking at the slice anyway, when we would have needed it.
+
+commit 6dd55b49090da22d3a8e9226507a95e914eaf10f
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri Jun 12 12:47:47 2015 -0700
+
+    vc4: Handle refcounting the exec BO like we do in the kernel.
+    
+    This reduces the diff to the kernel, and will be useful when I make the
+    kernel allocate more BOs as part of validation.
+
+commit 731ac05cc4e444175288032a76a29c95059af038
+Author: Eric Anholt <eric@anholt.net>
+Date:   Thu Jun 11 16:08:11 2015 -0700
+
+    vc4: Use VC4_SET/GET_FIELD for some RCL packets.
+
+commit e22a1927844cdda499ea15f539028c16e47394ea
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jun 10 13:20:25 2015 -0700
+
+    vc4: Make symbolic values for packet sizes.
+
+commit c2f82876014c9acb0518cf31a6f675fcc73c955a
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jun 10 12:58:47 2015 -0700
+
+    vc4: Use symbolic values in texture ptype validation.
+
+commit 5fbbec9aae8185b96aa4cf6d778901dea44fefa4
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jun 10 12:47:56 2015 -0700
+
+    vc4: Move vc4_packet.h to the kernel/ directory, since it's also shared.
+    
+    I want to notice discrepancies when I diff -u between Mesa and the kernel.
+
+commit e20345204d8fe8864240be2428ac0f225b92b7cc
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:50 2015 -0700
+
+    i965/gen9: Disable Mip Tail for YF/YS tiled surfaces
+    
+    Disabling miptails fixed the buffer corruption happening in FBO
+    which use YF/YS tiled renderbuffer or texture as color attachment.
+    
+    Spec recommends disabling mip tails only for non-mip-mapped surfaces.
+    But, without disabling miptails I couldn't get correct data out of
+    mipmapped YF/YS tiled surface.
+    
+    We need better understanding of miptails before start using them.
+    For now this patch helps move things forward.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 54591bb67f189820ef0d61b040179abbd5ecf78a
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:48 2015 -0700
+
+    i965/gen9: Set vertical and horizontal surface alignments
+    
+    Patch sets the alignments for texture and renderbuffer surfaces.
+    
+    V3: Make changes inside horizontal_alignment() and
+        vertical_alignment() (Topi)
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 6c380d42b161da977d164ccf75ccc25a2e056bb1
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:48 2015 -0700
+
+    i965: Use BRW_SURFACE_* in place of GL_TEXTURE_*
+    
+    Makes no functional changes in the code.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit af0853033296a4db3c48352e8cb60f8209424f79
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:48 2015 -0700
+
+    i965: Rename use_linear_1d_layout() and make it global
+    
+    This function will be utilised in later patches.
+    
+    V2: Make both pointers constants (Topi)
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 06687564479be1a2eed5842cfe4ad85dd099261b
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:48 2015 -0700
+
+    i965/gen9: Set tiled resource mode in surface state
+    
+    This patch sets the tiled resource mode for texture and renderbuffer
+    surfaces.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 6b8accb36b541f77774109ea42533c02bb90bc68
+Author: Haixia Shi <hshi@chromium.org>
+Date:   Fri Jun 12 10:10:58 2015 -0700
+
+    egl/dri2: implement platform_surfaceless
+    
+    The surfaceless platform is for off-screen rendering only. Render node support
+    is required.
+    
+    Only consider the render nodes. Do not use normal nodes as they require
+    auth hooks.
+    
+    v3: change platform_null to platform_surfaceless
+    v4: make libdrm required for surfaceless
+    v5: remove modified include guards with defined(HAVE_SURFACELESS_PLATFORM)
+    v6: use O_CLOEXEC for drm fd
+    
+    Signed-off-by: Haixia Shi <hshi@chromium.org>
+    Signed-off-by: Zach Reizner <zachr@google.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit c753866cc4ae15313430f9b6edba1b82e44b003a
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Thu May 28 19:35:44 2015 +0100
+
+    i965/vec4: Fix the source register for indexed samplers
+    
+    Previously when setting up the sample instruction for an indirect
+    sampler the vec4 backend was directly passing the pseudo opcode's
+    src0. However vec4_visitor::visit(ir_texture *) doesn't set the
+    texture operation's src0 -- it's left as BAD_FILE, which when
+    translated into a brw_reg gives the null register. In brw_SAMPLE,
+    gen6_resolve_implied_move() inserts a MOV from the inst->base_mrf and
+    sets the src0 appropriately. The indirect sampler case did not have a
+    call to gen6_resolve_implied_move().
+    
+    The fs backend avoids this because the platforms that support dynamic
+    indexing of samplers (IVB+) have been converted to not use the
+    fake-MRF hack, and instead send from proper GRFs.
+    
+    This patch makes it call gen6_resolve_implied_move before setting up
+    the indirect message. This is similar to what is done for constant
+    sampler numbers in brw_SAMPLE.
+    
+    The Piglit tests for sampler array indexing didn't pick this up
+    because they were using a texture with a solid colour so it didn't
+    matter what texture coordinates were actually used. The tests have now
+    been changed to be more thorough in this commit:
+    
+    http://cgit.freedesktop.org/piglit/commit/?id=4f9caf084eda7
+    
+    With that patch the tests for gs and vs are currently failing on
+    Ivybridge, but this patch fixes them. There are no other changes to a
+    Piglit run on Ivybridge.
+    
+    On Skylake the gs tests were failing even without the Piglit patch
+    because Skylake needs the source registers to work correctly in order
+    to send a message header to select SIMD4x2 mode.
+    
+    (The explanation in the commit message is partially written by Matt
+    Turner)
+    
+    Tested-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit aab55b0bc6086a032f44c99ad6569ea2eac128ca
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue Apr 7 01:10:17 2015 +0200
+
+    st/mesa: improve assertions in vp/fp translation
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 42a3c1ec8471fc76eb6d3d6f1bd1739e24a5f33a
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Jun 14 16:37:02 2015 +0200
+
+    mesa: don't rebind constant buffers after every state change if GS is active
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 358b6bb7a71663ab5e1c62f2b7767c20acebc2fa
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Sep 21 12:07:55 2014 +1200
+
+    mesa: generalize sso stage interleaving check
+    
+    For tessellation.
+    
+    v2: cleanup by Marek Olšák
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 8af11afc38532c65a242f7d45c31cf098ce2fa2f
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jun 13 13:02:20 2015 +0200
+
+    mesa: remove unused variables from gl_program
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit fa49536ab10748f6ab05e930d4b01fe714ea6b59
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Sep 7 19:24:15 2014 +1200
+
+    glsl: add ir reader support for ir_barrier
+    
+    Picked from the tessellation branch.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 2f86c22e75a3273a7541f88ffedd2edefaf6f482
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu Mar 19 23:28:25 2015 +0100
+
+    glsl: print locations of variables
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 797f4eacea8a6b08b7c9143a74c7f2b422d1535d
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jun 6 13:24:11 2015 +0200
+
+    configure.ac: rename LLVM_VERSION_PATCH to avoid conflict with llvm-config.h
+    
+    Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
+
+commit da6996485f2ca636218e3d83b53cf6a450bb9b38
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Mon Jun 15 21:00:47 2015 +1000
+
+    Revert "glsl: remove restriction on unsized arrays in GLSL ES 3.10"
+    
+    This reverts commit adee54f8269c5e9f4fde91d19f0e465afc8f14d8.
+    
+    Further down in the GLSL ES 3.10 spec it say:
+    
+    "If an array is declared as the last member of a shader storage block
+    and the size is not specified at compile-time, it is sized at run-time.
+    In all other cases, arrays are sized only at compile-time."
+    
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit 7d88ab42b9dda825feddbae774a2a48ddf3cbec2
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Tue Jun 16 13:46:47 2015 +0300
+
+    mesa: set override_version per api version override
+    
+    Before 9b5e92f get_gl_override was called only once, but now it is
+    called for multiple APIs (GLES2, GL), version needs to be set always.
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90797
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Martin Peres <martin.peres@linux.intel.com>
+    Tested-by: Martin Peres <martin.peres@linux.intel.com>
+
+commit 1a6220b416f02e56575894efbbd1717c9427c763
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Thu Jun 11 16:59:07 2015 +0100
+
+    i965: Fix aligning to the block size in intel_miptree_copy_slice
+    
+    This function was trying to align the width and height to a multiple
+    of the block size for compressed textures. It was using align_w/h as a
+    shortcut to get the block size as up until Gen9 this always happens to
+    match. However in Gen9+ the alignment values are expressed as
+    multiples of the block size so in effect the alignment values are
+    always 4 for compressed textures as that is the minimum value we can
+    pick. This happened to work for most compressed formats because the
+    block size is also 4, but for FXT1 this was breaking because it has a
+    block width of 8.
+    
+    This fixes some Piglit tests testing FXT1 such as
+    
+    spec@3dfx_texture_compression_fxt1@fbo-generatemipmap-formats
+    
+    Reviewed-by: Nanley Chery <nanley.g.chery@intel.com>
+
+commit 8b24388647f626a5cad10fd48e61335ed26a8560
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon Jun 15 15:48:58 2015 -0400
+
+    nv50,nvc0: clamp uniform size to 64k
+    
+    The state tracker will pass through requests from buggy applications
+    which will have the buffer size larger than the max allowed (64k). Clamp
+    the size to 64k so that we don't get errors when uploading the constbuf
+    data.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit a2af42c1d2dc91f4c31e25ff9fff15a89a9b6ead
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri Jun 12 16:09:05 2015 +0200
+
+    nvc0/ir: fix collection of first uses for texture barrier insertion
+    
+    One of the places we have to insert texbars is in situations where the
+    result of the tex gets overwritten by a different instruction (e.g. in a
+    conditional statement). However in some situations it can actually
+    appear as though the original tex itself is an overwriting instruction.
+    This can naturally never really happen, so just ignore the tex
+    instruction when it comes up.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90347
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 932d1613d1e15ec22555e5ec09105c49eb850e36
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jun 9 12:16:19 2015 -0700
+
+    egl: Drop check for driver != NULL.
+    
+    Back in 2013, a patch was added (with 2 reviewers!) at the end of the
+    block to early exit the loop in this case, without noticing that the loop
+    already did.  I added another early exit case, again without noticing, but
+    Rob caught me.  Just drop the loop condition that apparently surprises
+    most of us, instead of leaving the end of the loop conspicuously not
+    exiting on success.
+    
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Rob Clark <robdclark@gmail.com>
+
+commit bcd8a64f32f6387cbd8ed8d0bda0f49bd7dd4251
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Jun 9 11:45:05 2015 -0700
+
+    gallium: Drop the gallium-specific Android sw winsys.
+    
+    This was part of gallium_egl, and we now have the normal libEGL Android
+    winsys support to handle it.
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 6ce0b0e31754d88a542d4e3c90062e3f6a67f7b9
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jun 3 10:15:31 2015 -0700
+
+    vc4: Add support for building on Android.
+    
+    v2: Add a comment explaining why we link libmesa_glsl.  Drop warning
+        option from freedreno.  Add vc4 to the documentation for
+        BOARD_GPU_DRIVERS.
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit fd3234891f7203d6b2b0992c34e880df325f75ea
+Author: Eric Anholt <eric@anholt.net>
+Date:   Sun Jun 7 11:57:46 2015 -0700
+
+    gallium: Enable build of NIR support on Android.
+    
+    v2: Add a comment explaining why we link libmesa_glsl.
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 71aaf62fca3ed8b18fc2dcd69be0fd6bb7e58a91
+Author: Eric Anholt <eric@anholt.net>
+Date:   Sun Jun 7 16:47:25 2015 -0700
+
+    egl/dri2: Fix Android Lollipop build on ARM.
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 8e9eec5cbf73bf977bc7e808a4e653737ee94c38
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Fri May 15 06:01:15 2015 -0700
+
+    meta: Abort texture upload if pixels == null and no pixel unpack buffer set
+    
+    in case of glTexImage{1,2,3}D(). Texture has already been allocated
+    at this point and we have no data to upload. With out this patch,
+    with create_pbo = true, we end up creating a temporary pbo and then
+    uploading uninitialzed texture data.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Neil Roberts <neil@linux.intel.com>
+
+commit a4ff47ade9d95a27c9c55afbf6dd77d3f3b10562
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue May 12 04:17:04 2015 -0700
+
+    meta: Abort meta path if ReadPixels need rgb to luminance conversion
+    
+    After recent addition of pbo testing in piglit test getteximage-luminance,
+    it fails on i965. This patch makes a sub test pass.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit ba2b1f8668811eade97a4f134f6df900ff36c8aa
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Fri May 1 00:05:18 2015 -0700
+
+    mesa: Turn need_rgb_to_luminance_conversion() in to a global function
+    
+    This will be used by _mesa_meta_pbo_GetTexSubImage() in a later patch.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 0b13adcd0802d1ad60f625e7e557d2090a7c143e
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Thu Apr 30 23:36:18 2015 -0700
+
+    mesa: Use helper function need_rgb_to_luminance_conversion()
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 82abdf209a2fb5b95b2bae80045aecc61202b13c
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Thu Apr 30 23:35:20 2015 -0700
+
+    mesa: Handle integer formats in need_rgb_to_luminance_conversion()
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Cc: <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+
+commit 6c14b66e40d34104c841ee6dfaeb65617e47be80
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Mon Jun 1 09:32:55 2015 -0700
+
+    meta: Use is_power_of_two() helper function
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 278460279b4e089d51a24fb01dc56dc1e88dcb72
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Mon May 4 23:10:28 2015 -0700
+
+    i965: Check for miptree pitch alignment before using intel_miptree_map_movntdqa()
+    
+    We have an assert() in intel_miptree_map_movntdqa() which expects
+    the pitch to be 16 byte aligned.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 84d27c32d238ca7a7b115bf190e7e527b7f70e92
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Thu May 28 14:48:51 2015 -0700
+
+    i965: Remove break after return
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 2e42deb29c878fb4c52aed6d2d54833aacba18ae
+Author: Jürgen Rühle <j-r@online.de>
+Date:   Sat Jun 6 18:37:20 2015 +0200
+
+    nv50/ir: OP_JOIN is a flow instruction
+    
+    OP_JOIN instructions are assumed to be flow instructions and mercilessly
+    casted to FlowInstruction.
+    
+    This patch fixes an instance where an OP_JOIN is created as a plain
+    instruction. This can cause crashes in the ir printer.
+    
+    [imirkin: add ->fixed = 1]
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 061c9bc2042b0686867e4321d94ba18761a6a1a7
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sun Jun 14 16:43:21 2015 +0100
+
+    docs: add news item and link release notes for mesa 10.6.0
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit f9e04413284ce29214527b4d6369c8462000cb3d
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sun Jun 14 16:40:00 2015 +0100
+
+    docs: Add sha256sums for the 10.6.0 release
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 5d327b373531861f86a726db669b3d656f1b5f8d)
+
+commit 311abe7fbd590505fd86e22a3030e00445218cb0
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sun Jun 14 16:26:40 2015 +0100
+
+    docs: Update 10.6.0 release notes
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 3b9cde5c8138fb5cc45c652f2a5c15c5fa222bd7)
+
+commit 94ab56367169ba2902e83aded409db2df3d25eb1
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 15 11:24:47 2015 +0800
+
+    ilo: add ilo_state_raster_{line,poly}_stipple
+    
+    Initialize hardware stipple states on bound instead of on emission.
+
+commit 7cb853d52ae795b76adec41c98870166b41c9e6f
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 15 12:01:29 2015 +0800
+
+    ilo: add ilo_state_sample_pattern
+    
+    Move sample pattern initialization from ilo_render to
+    ilo_state_sample_pattern.
+
+commit 8f37e8e64fc897180603a7247e2fd47bf0ffb834
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon Jun 15 11:57:10 2015 +0800
+
+    ilo: add 3DSTATE_AA_LINE_PARAMETERS to ilo_state_raster
+    
+    Utilize ilo_state_raster to avoid redundant state change.
+
+commit b0a2280e45e5abc56e5301f84f33226469000d6c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun May 10 20:35:15 2015 +0200
+
+    gallium/util: add util_last_bit64
+    
+    This will be needed by radeonsi.
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 2489054f663baa69e659e0878cb39f4e7197ee0b
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat Jun 6 14:12:34 2015 +0200
+
+    glsl: fix "tesselation" typo
+    
+    Trivial.
+
+commit 790510808e614ee6c5f55ba773734838041902cb
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri Jun 5 19:09:21 2015 +0200
+
+    r600g: handle TGSI input/output array declarations correctly
+    
+    Most of this code could be removed if r600g used tgsi_shader_info.
+
+commit 117926debb72e5027faae885f9aa7f1ca61f6a9c
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Thu Jun 11 07:36:28 2015 +0800
+
+    ilo: merge ilo_state_3d*.[ch] to ilo_state.[ch]
+    
+    With most code replaced to ilo_state_*, what was left did not belong there
+    anymore.
+
+commit 54e0a8ed5dcaaa0ef483d5960ae86f88e0bf8990
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri Jun 12 15:08:02 2015 +0800
+
+    ilo: add ilo_state_ps to ilo_shader_cso
+
+commit 30fcb31c9b095451ce5ac5a10c3c6b177dc03e20
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri Jun 12 14:47:02 2015 +0800
+
+    ilo: add ilo_state_{vs,hs,ds,gs} to ilo_shader_cso
+
+commit da6e45fcbc4570df0ec4b8c8885f33a206da3552
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Tue Jun 2 23:09:53 2015 +0800
+
+    ilo: embed ilo_state_sbe in ilo_shader
+
+commit 5a52627c4f9215649b0f244af96512b9aafceaa1
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Tue Jun 2 14:57:48 2015 +0800
+
+    ilo: embed ilo_state_vf in ilo_ve_state
+
+commit 9bfa987fb00a4e0471bcdb4948c8f416d7c5b562
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Thu May 28 13:43:56 2015 +0800
+
+    ilo: embed ilo_state_urb in ilo_state_vector
+
+commit eaf2c738991d43ec8e7b36bed05727deaf8151b6
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri May 29 15:25:13 2015 +0800
+
+    ilo: embed ilo_state_sol in ilo_shader
+
+commit 960ca7d5e32997a5367cf798f7930cbb890b3ab4
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon May 11 19:48:52 2015 +0800
+
+    ilo: embed ilo_state_cc in ilo_blend_state
+
+commit 402e155cd3a757a583f81fa6545c855b63947e7c
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri Jun 5 10:23:24 2015 +0800
+
+    ilo: embed ilo_state_raster in ilo_rasterizer_state
+
+commit ded7d412d04cf702596e91f36ba586b18f1933a2
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon May 18 00:00:37 2015 +0800
+
+    ilo: embed ilo_state_viewport in ilo_viewport_state
+
+commit 4b5c0a83415137ba1f894d70a6cf73db83d21f15
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Thu May 21 17:18:37 2015 +0800
+
+    ilo: replace ilo_sampler_cso with ilo_state_sampler
+
+commit 745ef2c07b23e1cf227eb26871fc464198b956e8
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Wed May 20 21:44:30 2015 +0800
+
+    ilo: replace ilo_view_surface with ilo_state_surface
+
+commit c10c1ac0cfb0ae42742f369d9f3fa2f4fba8639a
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon May 18 23:32:10 2015 +0800
+
+    ilo: replace ilo_zs_surface with ilo_state_zs
+
+commit 6dad848d1acfe781c735120c3db97f1a2f0c28fa
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri Jun 12 14:56:56 2015 +0800
+
+    ilo: add ilo_state_ps
+    
+    We want to make ilo_shader_cso a union of ilo_state_{vs,hs,ds,gs,ps}.
+
+commit df9f846ac6153e171fbcf661bad19168b336a703
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Sat May 30 00:58:51 2015 +0800
+
+    ilo: add ilo_state_{vs,hs,ds,gs}
+    
+    We want to make ilo_shader_cso a union of ilo_state_{vs,hs,ds,gs} and ps
+    payload.
+
+commit a0bb1c2d1787cf2bd14620bf81d6d59cebfa766a
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri Jun 12 14:02:37 2015 +0800
+
+    ilo: add ilo_state_sbe
+    
+    We want to replace ilo_kernel_routing with ilo_state_sbe.
+
+commit 1ccab943b66de70b49cdbf3f14071fec9fe833cc
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Sun May 31 00:00:49 2015 +0800
+
+    ilo: add ilo_state_vf
+    
+    We want to replace ilo_ve_state with ilo_state_vf.
+
+commit 9c77ebef2499a79fc9a0816971a6d16d50cf2954
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Thu May 28 13:21:02 2015 +0800
+
+    ilo: add ilo_state_urb
+
+commit 3ff40be0eecfd6bbcc17471590e44042b3ffa5d3
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri May 29 13:08:18 2015 +0800
+
+    ilo: add ilo_state_sol
+
+commit 62bb6437187b439d5959ccab094762163713a992
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Mon May 11 14:23:49 2015 +0800
+
+    ilo: add ilo_state_cc
+    
+    We want to replace ilo_dsa_state and ilo_blend_state with ilo_state_cc.
+
+commit 6be8b6053de356a679707a0de92b083a4ea83937
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Sun May 10 13:52:21 2015 +0800
+
+    ilo: add ilo_state_raster
+    
+    We want to replace ilo_rasterizer_state with ilo_state_raster.
+
+commit 4fa7ed99a1e9334d96c1efd42344774dae19f466
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Tue May 12 23:43:50 2015 +0800
+
+    ilo: add ilo_state_viewport
+    
+    We want to replace ilo_viewport_cso and ilo_scissor_state with
+    ilo_state_viewport.
+
+commit 61fea171af64288bdf622e7ecf07e3ca42f83974
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Wed May 13 13:10:54 2015 +0800
+
+    ilo: add ilo_state_sampler
+    
+    We want to replace ilo_sampler_cso with ilo_state_sampler.
+
+commit f5f2007322b5468aa3025e7e259b4c50c7a7a0bd
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Thu May 14 09:46:42 2015 +0800
+
+    ilo: add ilo_state_surface
+    
+    We want to replace ilo_view_surface with ilo_state_surface.
+
+commit b91250a56b0af51b82bf4152a4f98e74fab22ed4
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Sat May 16 08:27:24 2015 +0800
+
+    ilo: add ilo_state_zs
+    
+    We want to replace ilo_zs_surface with ilo_state_zs.  One noteworthy
+    difference is that ilo_state_zs always aligns level 0 to 8x4 when HiZ is
+    enabled.  HiZ will not be enabled for 1D surfaces as a result.
+
+commit 9af1fc590d90fdda65aa0cf145773480af52a4e5
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Sat May 9 21:39:34 2015 +0800
+
+    ilo: update genhw headers
+    
+    Generate these new enums
+    
+      enum gen_reorder_mode;
+      enum gen_clip_mode;
+      enum gen_front_winding;
+      enum gen_fill_mode;
+      enum gen_cull_mode;
+      enum gen_pixel_location;
+      enum gen_sample_count;
+      enum gen_inputattr_select;
+      enum gen_msrast_mode;
+      enum gen_prefilter_op;
+    
+    Correct the type of GEN6_SAMPLER_DW0_BASE_LOD.  Rename gen_logicop_function,
+    gen_sampler_mip_filter, gen_sampler_map_filter, gen_sampler_aniso_ratio, and
+    others.
+
+commit 9cb0df4b50593e69f65b65704f5b64f3a12be9b5
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri May 22 14:21:22 2015 +0800
+
+    ilo: add ilo_image_disable_aux()
+    
+    When aux bo allocation fails, ilo_image_disable_aux() should be called to
+    disable aux buffer.
+
+commit f0de65cbc29b45fffbe4bf4e1ce299ddb8be9eda
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Tue May 26 15:46:44 2015 +0800
+
+    ilo: add array_size and level_count to ilo_image
+    
+    We will use them for bound checking.
+
+commit f9d2bbe967fb1fbbe7102c0765f067b3155f5ca6
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Sun May 17 11:55:05 2015 +0800
+
+    ilo: add pipe_texture_target to ilo_image
+    
+    Save the target in ilo_image instead of passing it around.
+
+commit 9da9cf729ff74684902cbb4b53b5cccd442df28e
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri May 15 10:39:05 2015 +0800
+
+    ilo: fix "Render Cache Read Write Mode"
+    
+    It needs be set to R/W only when using certain messages via DP render cache.
+    Since we only use RT wrties with the render cache, we never need to set it.
+
+commit 1885ac490834e70d831b5b4a287c272b4148761c
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Thu May 21 16:30:03 2015 +0800
+
+    ilo: avoid resource owning in core
+    
+    It is up to the users whether to reference count the BOs or not.
+
+commit ab7229b9b6b160e805d14d600a432e76a5e88ef8
+Author: Chia-I Wu <olvaffe@gmail.com>
+Date:   Fri May 22 13:49:20 2015 +0800
+
+    ilo: assert core objects are zero-initialized
+    
+    Core objects are usually embedded inside calloc()'ed objects and we expect
+    them to be zero-initialized.
+
+commit 4d35eef326e49cc8da50879d30a1c5088d4775e1
+Author: Tom Stellard <thomas.stellard@amd.com>
+Date:   Thu Jun 11 15:42:25 2015 +0000
+
+    radeon/llvm: Handle LLVM backend rename from R600 to AMDGPU
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+
+commit 3e7412233739c882548f50fe01d9f6c5f0dd4bbb
+Author: Tom Stellard <thomas.stellard@amd.com>
+Date:   Wed May 27 16:51:43 2015 -0700
+
+    gallivm: Only build lp_profile() body when PROFILE is defined
+    
+    The only use of lp_profile() is wrapped in #if defined(PROFILE),
+    so there is no reason to build it unless this macro is defined.
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit faf7670ee86253cb7bf9422bf7937a0a63f4956f
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Wed Jun 10 18:35:08 2015 +1000
+
+    glsl: fix compile error message
+    
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit 935f1f60da71df07aa45a3da92fa764a1830e0fb
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Thu May 14 09:28:37 2015 -0700
+
+    i965/gen8+: Add aux buffer alignment assertions
+    
+    This helped find the incorrect HALIGN values from the previous patches.
+    
+    v2: Add PRM references for assertions (Chad)
+    
+    v3: Remove duplicated part of commit message, assert num_samples > 1, instead of
+    num_samples > 0. (Chad)
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit a2421623db9b900d2ab0026539e8f7f6294475ea
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Fri May 22 15:57:37 2015 -0700
+
+    i965/gen9: Set HALIGN_16 for all aux buffers
+    
+    Just like the previous patch, but for the GEN9 constraints.
+    
+    v2:
+    bugfix: Gen9 HALIGN was being set for all miptree buffers (Chad). To address
+    this, move the check to where the gen8 check is, and do the appropriate
+    conditional there.
+    
+    v3:
+    Remove stray whitespace introduced in v2 (Chad)
+    Rework comment to show AUX_CCS and AUX_MCS specifically. Remove misworded part
+    about gen7 (Chad).
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com> (v1)
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com> (v1)
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit c4aa041a611dfeb0a880c2173cb35c9c08dc79ca
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Thu May 14 09:30:02 2015 -0700
+
+    i965/gen8: Correct HALIGN for AUX surfaces
+    
+    This restriction was attempted in this commit:
+    commit 47053464630888f819ef8cc44278f1a1220159b9
+    Author: Anuj Phogat <anuj.phogat@gmail.com>
+    Date:   Fri Feb 13 11:21:21 2015 -0800
+    
+       i965/gen8: Use HALIGN_16 if MCS is enabled for non-MSRT
+    
+    However, the commit itself doesn't achieve the desired goal as determined by the
+    asserts which the next patch adds. mcs_mt is NULL (never set) we're in the
+    process of allocating the mcs_mt miptree when we get to this function. I didn't
+    check, but perhaps this would work with blorp, however, meta clears allocate the
+    miptree structure (which AFAICT needs the alignment also) way before it
+    allocates using meta clears where the renderbuffer is allocated way before the
+    aux buffer.
+    
+    The restriction is referenced in a few places, but the most concise one [IMO]
+    from the spec is for Gen9. Gen8 loosens the restriction in that it only requires
+    this for non-msrt surface.
+    
+       When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN 16 must
+       be used.
+    
+    With the code before the miptree layout flag rework (patches preceding this),
+    accomplishing this workaround is very difficult.
+    
+    v2:
+    bugfix: Don't set HALIGN16 for gens before 8 (Chad)
+    
+    v3:
+    non-trivial rebase
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Cc: Neil Roberts <neil@linux.intel.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit e92fbdcf9cf69e6b135c17c2851d50e256da8c29
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Thu May 21 22:47:37 2015 -0700
+
+    i965: Extract tiling from fast clear decision
+    
+    There are several constraints when determining if one can fast clear a surface.
+    Some of these are alignment, pixel density, tiling formats, and others that vary
+    by generation. The helper function which exists today does a suitable job,
+    however it conflates "BO properties" with "Miptree properties" when using
+    tiling. I consider the former to be attributes of the physical surface, things
+    which are determined through BO allocation, and the latter being attributes
+    which are derived from the API, and having nothing to do with the underlying
+    surface.
+    
+    Determining tiling properties and creating miptrees are related operations
+    (when we allocate a BO for a miptree) with some disjoint constraints. By
+    extracting the decisions into two distinct choices (tiling vs. miptree
+    properties), we gain flexibility throughout the code to make determinations
+    about when we can or cannot fast clear strictly on the miptree.
+    
+    To signify this change, I've also renamed the function to indicate it is a
+    distinction made on the miptree. I am torn as to whether or not it was a good
+    idea to remove "non_msrt" since it's a really nice thing for grep.
+    
+    v2:
+    Reword some comments (Chad)
+    intel_is_non_msrt_mcs_tile_supported->intel_tiling_supports_non_msrt_mcs (Chad)
+    Make full if ladder for gens in above function (Chad)
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Cc: Topi Pohjolainen <topi.pohjolainen@intel.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@linux.intel.com>
+
+commit b91a110d5ce946abe0ee84625498676ac4bcf7aa
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Fri May 22 18:13:24 2015 -0700
+
+    i965/gen9: Only allow Y-Tiled MCS buffers
+    
+    For GEN9, much of the logic to use X-Tiled buffers has been stripped out. It is
+    still supported in some places, but it's never desirable. Unfortunately we don't
+    yet have the ability to have Y-Tiled scanout (see:
+    http://patchwork.freedesktop.org/patch/46984/),
+    
+    NOTE: This patch shouldn't actually do anything since SKL doesn't yet use fast
+    clears (they are disabled because they are causing regressions). THerefore, the
+    only case we can get to this function on SKL is by way of
+    intel_update_winsys_renderbuffer_miptree.
+    
+    v2: Update commit message to be more clear that the NOTE is for SKL only.
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit b5c5aac687ca4e203695790e334c1f7f3315c240
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Thu May 21 16:04:43 2015 -0700
+
+    i965: Consolidate certain miptree params to flags
+    
+    I think pretty much everyone agrees that having more than a single bool as a
+    function argument is bordering on a bad idea. What sucks about the current
+    code is in several instances it's necessary to propagate these boolean
+    selections down to lower layers of the code. This requires plumbing (mechanical,
+    but still churn) pretty much all of the miptree functions each time.  By
+    introducing the flags paramater, it is possible to add miptree constraints very
+    easily.
+    
+    The use of this, as is already the case, is sometimes we have some information
+    at the time we create the miptree that needs to be known all the way at the
+    lowest levels of the create/allocation, disable_aux_buffers is currently one
+    such example. There will be another example coming up in a few patches.
+    
+    v2:
+    Tab fix. (Ben)
+    Long line fixes (Topi)
+    Use anonymous enum instead of #define for layout flags (Chad)
+    Use 'X != 0' instead of !!X (everyone except Chad)
+    
+    v3:
+    Some non-trivial conflict resolution on top of Anuj's patches.
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Cc: "Pohjolainen, Topi" <topi.pohjolainen@intel.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
+
+commit 0d2068a92d74f421960947e589cf56a2b125035f
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Wed Jun 10 19:40:07 2015 +1000
+
+    glsl: enforce restriction on AoA interface blocks in GLSL ES 3.10
+    
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit 94d669b0d2f56d58a494a40f9e8acdef01306496
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Fri Jun 12 16:03:56 2015 +1000
+
+    glsl: enforce fragment shader input restrictions in GLSL ES 3.10
+    
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit 3d78bdea3155ff3f19a782e0eb3a55612bfd8dd0
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Wed Jun 10 18:46:22 2015 +1000
+
+    glsl: enforce output variable rules for GLSL ES 3.10
+    
+    Some rules are already applied this just adds the missing ones.
+    
+    Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
+
+commit f0e772392f1c61df6e3f253dc236eb9737fb6146
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Fri Mar 13 12:03:52 2015 -0700
+
+    i965/nir: Support barrier intrinsic function
+    
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit f7ef8ec9d8f56b77029534952628c3204c4d5f63
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Wed Aug 27 11:32:08 2014 -0700
+
+    i965/fs: Implement support for ir_barrier
+    
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 7953c000731ec1310fdbb5d8a13720fe0cdbf6f4
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Tue Nov 4 18:11:37 2014 -0800
+
+    i965: Add brw_barrier to emit a Gateway Barrier SEND
+    
+    This will be used to implement the Gateway Barrier SEND needed to implement
+    the barrier function.
+    
+    v2:
+     * notify => gateway_notify (Ken)
+     * combine short lines of brw_barrier proto/decl (mattst88)
+    
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 0d250cc210f971f566bbe5b1e54cf3cd114537e9
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Tue Nov 4 18:05:04 2014 -0800
+
+    i965: Add brw_WAIT to emit wait instruction
+    
+    This will be used to implement the barrier function.
+    
+    v2:
+     * Rename to brw_WAIT (mattst88)
+    
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit b925f1a1df86120d2846bf09797bb0967040f9c6
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Tue Nov 4 17:52:42 2014 -0800
+
+    i965: Add notification register
+    
+    This will be used by the wait instruction when implementing the barrier()
+    function.
+    
+    v2:
+     * Changes suggested by mattst88
+    
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit bdbbec33cf23193e1c81e0ecf28f2cc793d507bf
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Tue Nov 4 17:51:19 2014 -0800
+
+    i965: Disassemble Gateway SEND messages
+    
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 69659546a6a352239c5989624f9d9f084c643d7d
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Tue Nov 4 18:07:52 2014 -0800
+
+    i965/inst: Add gateway_notify and gateway_subfuncid fields
+    
+    These fields will be used when emitting a send for the barrier function.
+    
+    Reference: IVB PRM Volume 4, Part 2, Section 1.1.1 Message Descriptor
+    
+    v2:
+     * notify => gateway_notify (Ken)
+     * define bits for gen4-gen6 (bwidawsk, Ken)
+    
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 1b9cc257d4b805e86af4860bb356dbedf5e054c6
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Tue Nov 4 17:48:44 2014 -0800
+
+    i965: Add GATEWAY_SFID definitions
+    
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 2867f2e8cd54e2cbb38140e2e0f5521973091ace
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Fri Mar 13 12:03:15 2015 -0700
+
+    nir: Add barrier intrinsic function
+    
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 86855365b4059c60a9e1dcc0b7713941a2507bd0
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Sep 7 19:29:50 2014 +1200
+
+    glsl: Add builtin barrier() function
+    
+    [jordan.l.justen@intel.com: Add CS support]
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit e7f628c2fc5ef42672e3281e224226c3d47b1bac
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Sun Sep 7 19:24:15 2014 +1200
+
+    glsl: Add ir node for barrier
+    
+    v2:
+     * Changes suggested by mattst88
+    
+    [jordan.l.justen@intel.com: Add nir support]
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 86b4acb409a2103d6a12f83de7ec04af6cc05fec
+Author: Jordan Justen <jordan.l.justen@intel.com>
+Date:   Thu Jun 11 09:44:54 2015 -0700
+
+    i965/cs: Use exec all for CS terminate
+    
+    This prevents an assertion from being hit with SIMD16:
+    
+    Assertion `inst->exec_size == dispatch_width() || force_writemask_all' failed.
+    
+    Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit cfc175b40995ca4e590cd30897f6bb017e1376a3
+Author: Chad Versace <chad.versace@intel.com>
+Date:   Wed Jun 10 09:50:47 2015 -0700
+
+    i965/fs: Fix unused variable warning
+    
+    Annotate offset_components with attribute 'unused'.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit d15c06b514936fb927b174a716c24af8f5892542
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Jun 10 23:50:21 2015 +0100
+
+    vc4: automake: enable subdir-objects
+    
+    Silence the warnings about the future incompatibility with automake 2.0
+    
+    Cc: Eric Anholt <eric@anholt.net>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 634f2002563b4fca68490c0a39518ea838f28fb1
+Author: Erik Faye-Lund <kusmabite@gmail.com>
+Date:   Wed Jun 10 23:35:04 2015 +0100
+
+    mesa: build xmlconfig to a separate static library
+    
+    As we use the file from both the dri modules and loader, we end up with
+    multiple definition of the symbols provided in our gallium dri  modules.
+    Additionally we compile the file twice.
+    
+    Resolve both issues, effectively enabling the build on toolchains which
+    don't support -Wl,--allow-multiple-definition.
+    
+    v2: [Emil Velikov]
+     - Fix the Scons/Android build.
+     - Resolve libgbm build issues (bring back the missing -lm)
+    
+    Cc: Julien Isorce <j.isorce@samsung.com>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90310
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90905
+    Acked-by: Matt Turner <mattst88@gmail.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 83b5648a1e0b7c21536af18c0d29da2f2a31215e
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Apr 15 14:34:00 2015 +0100
+
+    targets/nine: link against libnir/libglsl_util
+    
+    Based on commit 101142c4010(xa: support for drivers which use NIR)
+    
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90466
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit ba512cc7fa5db0aeeb2fc0708920914cd3a5bf95
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Apr 15 12:46:30 2015 +0100
+
+    pipe-loader: add libnir and libglsl_util to the link
+    
+    Based on commit 101142c4010(xa: support for drivers which use NIR)
+    
+    Cc: Rob Clark <robclark@freedesktop.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90466
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 1df5a6c71ee4a3c08b5da3f8bae24880af16b74c
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Apr 15 13:40:55 2015 +0100
+
+    mesa; add a dummy _mesa_error_no_memory() symbol  to libglsl_util
+    
+    Rather than forcing everyone to provide their own definition of the symbol
+    provide a common (dummy) one.
+    
+    This helps us resolve the build of the standalone pipe-drivers (amongst
+    others), which are missing the symbol.
+    
+    Cc: Rob Clark <robclark@freedesktop.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 4722743f4b920c6986a7148ef3ce76b4fd12db46
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Apr 15 11:42:55 2015 +0100
+
+    gallium: use $(top_builddir) when referencing static archives
+    
+    Just like every other place in gallium.
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 3f5dc9b94fc47f25821cec0a052df3d8f4cb5a1f
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Wed Apr 15 11:28:38 2015 +0100
+
+    freedreno: use CXX linker rather than explicit link against libstdc++
+    
+    Cc: Rob Clark <robclark@freedesktop.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 0e55db3b8a9a360511d8679953b8e4b890d66ed7
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 11 13:08:00 2015 +0100
+
+    egl/haiku: coding style fixes
+    
+    Cc: Alexander von Gluck IV <kallisti5@unixzen.com>
+    Acked-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit b0f33e9736116a1a6a7bd8bade51d473d7373daa
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 11 13:07:08 2015 +0100
+
+    egl/haiku: plug some obvious memory leaks
+    
+    Cc: Alexander von Gluck IV <kallisti5@unixzen.com>
+    Acked-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit e77a32fcaed30815d0f95e0d05432e8637ab0f3e
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 11 12:33:55 2015 +0100
+
+    egl/haiku: minor surface management cleanups
+    
+    Drop the stub/unused function haiku_create_surface() and add some basic implementation for destroy_surface()
+    
+    Cc: Alexander von Gluck IV <kallisti5@unixzen.com>
+    Acked-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit d38a80ba6c75b8f594a4ff88e59ede254075a859
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 11 12:22:28 2015 +0100
+
+    egl/haiku: kill off haiku_log()
+    
+    It's an incomplete copy of the default _eglLog() implementation. Just
+    use the default logger.
+    
+    Cc: Alexander von Gluck IV <kallisti5@unixzen.com>
+    Acked-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 667fe2f5e9508a9591eeabdd7a01596006d87e5f
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 11 12:20:34 2015 +0100
+
+    egl/haiku: we don't use src/loader, drop all the references to it
+    
+    Cc: Alexander von Gluck IV <kallisti5@unixzen.com>
+    Acked-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit d0af2833039dca2963f3ddf241e3084e4bf7e840
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 11 12:18:35 2015 +0100
+
+    egl/haiku: remove unused variables in struct haiku_egl_driver
+    
+    Cc: Alexander von Gluck IV <kallisti5@unixzen.com>
+    Acked-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 46f87b2c19dc0a326c963c652b174384d59e3943
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 11 12:17:23 2015 +0100
+
+    egl/haiku: handle memory allocation failure
+    
+    Cc: Alexander von Gluck IV <kallisti5@unixzen.com>
+    Acked-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit ed9dcdf927b9badd1325130b6b88ad26b04d2ec1
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 11 12:02:45 2015 +0100
+
+    egl/haiku: use CALL/TRACE/ERROR over _eglLog() for haiku specifics
+    
+    Cc: Alexander von Gluck IV <kallisti5@unixzen.com>
+    Acked-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 0b652fedb5e097bcdea79e3b922e946d143148f6
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 11 11:31:40 2015 +0100
+
+    egl/haiku: remove commented out code
+    
+    It serves little to no purpose. As the driver gets updated, one can
+    look at the existing implementation (dri2) for reference rather than
+    letting the commented functions bitrot.
+    
+    Cc: Alexander von Gluck IV <kallisti5@unixzen.com>
+    Acked-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit c3036f4bb1c4ad788200afc877d42e63b64f330e
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Thu Jun 11 11:24:17 2015 +0100
+
+    egl/haiku: use correct version variable
+    
+    Earlier commit folded the two separate variables into one, but forgot to
+    update the haiku driver.
+    
+    Fixes: 0e4b564ef28(egl: combine VersionMajor and VersionMinor into one
+    variable)
+    Cc: Marek Olšák <marek.olsak@amd.com>>
+    Cc: Alexander von Gluck IV <kallisti5@unixzen.com>
+    Acked-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 0dde821bcc96c579ac1f26e26fc03ca117caa377
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Fri Jun 12 12:13:41 2015 +0100
+
+    trace: Add missing p_compiler.h include.
+    
+    For boolean.
+    
+    Trivial.
+
+commit 8d3c48eed24f351c86361707978647c78010bb7f
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 10 14:40:33 2015 +0300
+
+    i965/fs: Remove one more fixed brw_null_reg() from the visitor.
+    
+    Instead use fs_builder::null_reg_f() which has the correct register
+    width.  Avoids the assertion failure in fs_builder::emit() hit by the
+    "ES3-CTS.shaders.loops.for_dynamic_iterations.unconditional_break_fragment"
+    GLES3 conformance test introduced by 4af4cfba9ee1014baa4a777660fc9d53d57e4c82.
+    
+    Reported-and-reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 16658f426dbd81fcbc317b21ae9a3f7c9b6448fb
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Tue Jun 9 09:20:58 2015 -0700
+
+    Revert "i965: Advertise a line width of 40.0 on Cherryview and Skylake."
+    
+    This reverts commit f3b709c0ac073cd0ec90a3a0d91d1ee94668e043.
+    
+    The "dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_4.
+    interpolation.lines_wide" test appears to be broken on Cherryview when
+    we expose line widths greater than 12.0.  I'm not sure why.
+    
+    For now, just go back to the limits we used on older platforms.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90902
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit f4310cdbd08f20276237fbefa3eba406aa109636
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Jun 10 01:46:13 2015 -0700
+
+    i965: Re-index SSA definitions before printing NIR code.
+    
+    This makes the SSA definitions use sequential numbers (0, 1, 2, ...)
+    instead of seemingly random ones.  There's not much point normally,
+    but it makes debug output much easier to read.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
+    Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
+
+commit 1a6e4f46ed117b393e26aff326e5b05d4aea7fb0
+Author: Brian Paul <brianp@vmware.com>
+Date:   Wed Jun 10 10:59:37 2015 -0600
+
+    gallium: remove explicit values from PIPE_CAP_ enums
+    
+    The other PIPE_CAPF_ and PIPE_SHADER_CAP_ enums don't have explicit values.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 9fed4f9bf5146af1fcd093422b39353845f2267a
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Thu Jun 11 13:32:21 2015 +0100
+
+    mesa/main: Don't use ONCE_FLAG_INIT as a r-value.
+    
+    It should only be used as an initializer expression.
+    
+    Trivial, and fixes Windows builds.
+    
+    Nevertheless, overwriting an once_flag like this seems dangerous and
+    should be revised.
+
+commit 0f1fe649b7fdfb3ab8c7b14e642bc0e3831fc092
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Thu Jun 11 08:49:46 2015 +0200
+
+    i965/gen8: Fix antialiased line rendering with width < 1.5
+    
+    The same fix Marius implemented for gen6 (commit a9b04d8a) and
+    gen7 (commit 24ecf37a).
+    
+    Also, we need the same code to handle special cases of line width
+    in gen6, gen7 and now gen8, so put that in the helper function
+    we use to compute the line width.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 5b61cb12366f65a5d7e21b47fa3501a03fd884ee
+Author: Martin Peres <martin.peres@linux.intel.com>
+Date:   Tue May 26 15:32:21 2015 +0300
+
+    glsl: fix constructing a vector from a matrix
+    
+    Without this patch, the following constructs (not an extensive list)
+    would crash mesa:
+    
+    - mat2 foo = mat2(1); vec4 bar = vec4(foo);
+    - mat3 foo = mat3(1); vec4 bar = vec4(foo);
+    - mat3 foo = mat3(1); ivec4 bar = ivec4(foo);
+    
+    The first case is explicitely allowed by the GLSL spec, as seen on
+    page 101 of the GLSL 4.40 spec:
+    
+    	"vec4(mat2) // the vec4 is column 0 followed by column 1"
+    
+    The other cases are implicitely allowed also.
+    
+    The actual changes are quite minimal. We first split each column of
+    the matrix to a list of vectors and then use them to initialize the
+    vector. An additional check to make sure that we are not trying to
+    copy 0 elements of a vector fix the (i)vec4(mat3) case as the last
+    vector (3rd column) is not needed at all.
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
+
+commit 83624c141d3568217190933945c3243913e7ba2c
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Wed May 6 13:43:54 2015 +0300
+
+    mesa/es3.1: enable DRAW_INDIRECT_BUFFER_BINDING for gles3.1
+    
+    (increases ES31-CTS.draw_indirect.basic.* passing tests)
+    
+    v2: only expose DRAW_INDIRECT_BUFFER_BINDING for GL core + ES3.1
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Martin Peres <martin.peres@linux.intel.com>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 56e9f3b493a8677e60e4473ca0faf0e3d1a79888
+Author: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+Date:   Fri Mar 20 15:40:26 2015 +0200
+
+    mesa/main: avoid null access in format_array_table_init()
+    
+    If _mesa_hash_table_create failed we'd get null pointer. Report
+    error and go away.
+    
+    Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit fd00c738c08e54c9dfdc195e59f780f30d2f9e07
+Author: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+Date:   Fri Mar 20 15:13:14 2015 +0200
+
+    mesa/main: Remove _mesa_HashClone()
+    
+    I didn't find this being used anywhere.
+    
+    Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
+    Reviewed-by: Timothy Arceri <t_arceri@yahoo.com.au>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit bd38f91f8d80897ca91979962d80d4bc0acef586
+Author: Alexander Monakov <amonakov@gmail.com>
+Date:   Tue Jun 9 20:58:22 2015 +0300
+
+    i965: do_blit_drawpixels: decode array formats
+    
+    Correct a regression introduced by commit 922c0c9fd526 by converting "array
+    format", if received from _mesa_format_from_format_and_type, to mesa_format.
+    
+    References: https://bugs.freedesktop.org/show_bug.cgi?id=90839
+    Signed-off-by: Alexander Monakov <amonakov@gmail.com>
+    Tested-by: AnAkkk <anakin.cs@gmail.com>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit f9a18acb56c69b24c1e47cd326dc98e14fadcf94
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Wed Jun 10 09:07:32 2015 +0200
+
+    i965: do not round line width when multisampling or antialiaing are enabled
+    
+    In commit fe74fee8fa721a we rounded the line width to the nearest integer to
+    match the GLES3 spec requirements stated in section 13.4.2.1, but that seems
+    to break a dEQP test that renders wide lines in some multisampling scenarios.
+    
+    Ian noted that the Open 4.4 spec has the following similar text:
+    
+        "The actual width of non-antialiased lines is determined by rounding the
+        supplied width to the nearest integer, then clamping it to the
+        implementation-dependent maximum non-antialiased line width."
+    
+    and suggested that when ES removed antialiased lines, they removed
+    "non-antialised" from that paragraph but probably should not have.
+    
+    Going by that note, this patch restricts the quantization implemented in
+    fe74fee8fa721a only to regular aliased lines. This seems to keep the
+    tests fixed with that commit passing while fixing the broken test.
+    
+    v2:
+      - Drop one of the clamps (Ken, Marius)
+      - Add a rule to prevent advertising line widths that when rounded go beyond
+        the limits allowed by the hardware (Ken)
+      - Update comments in the code accordingly (Ian)
+      - Put the code in a utility function (Ian)
+    
+    Fixes:
+    dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_max.primitives.lines_wide
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90749
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit f83b9e58f6e8a748def367c7d523eb7285b1aeb7
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Tue Jun 9 14:33:47 2015 -0700
+
+    i965: Momentarily pretend to support ARB_texture_stencil8 for blits.
+    
+    Broadwell's stencil blitting code attempts to bind a renderbuffer as a
+    texture, using dd->BindRenderbufferTexImage().
+    
+    This calls _mesa_init_teximage_fields(), which then attempts to set
+    img->_BaseFormat = _mesa_base_tex_format(ctx, internalFormat), which
+    assert fails if internalFormat is GL_STENCIL_INDEX8 but
+    ARB_texture_stencil8 is unsupported.
+    
+    To work around this, just pretend to support the extension momentarily,
+    during the blit.  Meta has already munged a variety of other things in
+    the context (including the API!), so it's not that much worse than what
+    we're already doing.
+    
+    Fixes regressions since commit f7aad9da20b13c98f77d6a690b327716f39c0a47
+    (mesa/teximage: use correct extension for accept stencil texture.).
+    
+    v2: Add an XXX comment explaining the situation (requested by Jason
+        Ekstrand and Martin Peres), and an assert that we don't support
+        the extension so we remember to remove this hack (requested by
+        Neil Roberts).
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 7217faf39f63f81b74f268d62fbdd94d445b0e6f
+Author: Brian Paul <brianp@vmware.com>
+Date:   Wed Jun 10 07:28:40 2015 -0600
+
+    llvmpipe: simplify lp_resource_copy()
+    
+    Just implement it in terms of util_resource_copy_region().  Both the
+    original code and util_resource_copy_region() boil down to mapping,
+    calling util_copy_box() and unmapping.
+    
+    No piglit regressions.  This will also help to implement GL_ARB_copy_image.
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 5b0d6f5c1bc3f7bd37c6efebf48f80ca6ff3ef87
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Tue Jun 9 12:26:48 2015 +0300
+
+    mesa: add GL_RED, GL_RG support for floating point textures
+    
+    Mesa supports EXT_texture_rg and OES_texture_float. This patch adds
+    support for using unsized enums GL_RED and GL_RG for floating point
+    targets and writes proper checks for internalformat when format is
+    GL_RED or GL_RG and type is of GL_FLOAT or GL_HALF_FLOAT.
+    
+    Later, internalformat will get adjusted by adjust_for_oes_float_texture
+    after these checks.
+    
+    v2: simplify to check vs supported enums
+    v3: follow the style and break out if internalFormat ok (Kenneth)
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90748
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 07e4f12e66f64c8075c0d3fd1c23cbd7c657970c
+Author: Tapani Pälli <tapani.palli@intel.com>
+Date:   Mon Jun 8 14:53:26 2015 +0300
+
+    mesa: allow unsized formats GL_RG, GL_RED for GLES 3.0 with half float
+    
+    v2: && -> ||, we enable on gles3 or if ARB_texture_rg is enabled
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90748
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit adee54f8269c5e9f4fde91d19f0e465afc8f14d8
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Tue Jun 9 16:53:55 2015 +1000
+
+    glsl: remove restriction on unsized arrays in GLSL ES 3.10
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 563706c14641fde2ab604d590b5425680354f280
+Author: Dave Airlie <airlied@gmail.com>
+Date:   Wed Jun 10 13:51:59 2015 +1000
+
+    st/dri: check pscreen is valid before querying param
+    
+    we don't check the validity of pscreen until dri_init_screen_helper
+    
+    hit this trying to init glamor on a device with no driver (udl).
+    
+    Acked-by: Michel Dänzer <michel.daenzer@amd.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit c6877c9e5983287a0741b26a358b7d744aebe232
+Author: Dave Airlie <airlied@gmail.com>
+Date:   Wed Jun 10 13:26:56 2015 +1000
+
+    nouveau: set imported buffers to what the kernel gives us
+    
+    When we import a dma-buf fd from another driver the kernel
+    gives us the right info, and this trashes it.
+    
+    Convert the kernel bo flags into the domain flags.
+    
+    This helps getting reverse prime and glamor working.
+    
+    Cc: mesa-stable@lists.freedesktop.org
+    Acked-by: Ben Skeggs <bskeggs@redhat.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 9dca3beb62e894bbd720c5eecb47c0fd2c6132f9
+Author: Eric Anholt <eric@anholt.net>
+Date:   Sun Jun 7 12:02:02 2015 -0700
+
+    vc4: Drop qir include from vc4_screen.h
+    
+    We didn't need any of it except for the list header, and qir.h pulls in
+    nir.h, which is not really interesting to winsys.
+
+commit 8d10b2a0460ca01a5c65a87184717c6c2e4bcaeb
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed Jun 3 10:18:04 2015 -0700
+
+    vc4: Drop subdirectory in vc4 build.
+    
+    Just because we put the source in a subdir, doesn't mean we need helper
+    libraries in the build.  This will also simplify the Android build setup.
+
+commit e67b12eaf89acc9c446de77b77120a2f6cdbbe12
+Author: Eric Anholt <eric@anholt.net>
+Date:   Mon Jun 1 12:50:49 2015 -0700
+
+    vc4: Update to current kernel validation code.
+    
+    After profiling on real hardware, I found a few ways to cut down the
+    kernel overhead.
+
+commit c5e11e5f7f67fe5a1d28b1446f87af7aa3ba68d8
+Author: Chih-Wei Huang <cwhuang@android-x86.org>
+Date:   Wed May 20 11:25:39 2015 +0800
+
+    android: build with libcxx on android lollipop
+    
+    On Lollipop, apparently stlport is gone and libcxx must be used instead.
+    We still support stlport when building on earlier android releases.
+    
+    Signed-off-by: Chih-Wei Huang <cwhuang@linux.org.tw>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 1842832660c4eade037caa760110b58a2d7f055b
+Author: Chih-Wei Huang <cwhuang@android-x86.org>
+Date:   Wed May 20 11:25:34 2015 +0800
+
+    android: enable the radeonsi driver
+    
+    Based on the nice work of Paulo Sergio Travaglia <pstglia@gmail.com>.
+    
+    The main modifications are:
+    
+    - Include paths for LLVM header files and shared/static libraries
+    - Set C++ flag "c++11" to avoid compiling errors on LLVM header files
+    - Set defines for LLVM
+    - Add GALLIVM source files
+    - Changes path of libelf library for lollipop
+    
+    Signed-off-by: Chih-Wei Huang <cwhuang@linux.org.tw>
+    Acked-by: Eric Anholt <eric@anholt.net>
+
+commit 1e4081f54aa5c6cba566ed549389d847bf7e6799
+Author: Chih-Wei Huang <cwhuang@android-x86.org>
+Date:   Wed May 20 11:25:33 2015 +0800
+
+    android: generate files by $(call es-gen)
+    
+    Use the pre-defined macro es-gen to generate new added files
+    instead of writing new rules manually. The handmade rules
+    that may generate the files before the directory is created
+    result in such an error:
+    
+    /bin/bash: out/target/product/x86/gen/STATIC_LIBRARIES/libmesa_st_mesa_intermediates/main/format_pack.c: No such file or directory
+    make: *** [out/target/product/x86/gen/STATIC_LIBRARIES/libmesa_st_mesa_intermediates/main/format_pack.c] Error 1
+    
+    Signed-off-by: Chih-Wei Huang <cwhuang@linux.org.tw>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit c3b5afbd4e682f76e16ea85883af571165bd24ee
+Author: Chih-Wei Huang <cwhuang@android-x86.org>
+Date:   Wed May 20 11:25:30 2015 +0800
+
+    android: try to load gallium_dri.so directly
+    
+    This avoids needing hardlinks between all of the DRI driver .so names,
+    since we're the only loader on the system.
+    
+    v2: Add early exit on success (like previous block) and log message on
+        failure.
+    
+    Signed-off-by: Chih-Wei Huang <cwhuang@linux.org.tw>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit ac296aee58158ccf1953e812a04f99eb5f8eb57b
+Author: Chih-Wei Huang <cwhuang@android-x86.org>
+Date:   Wed May 20 11:25:29 2015 +0800
+
+    android: Depend on gallium_dri from EGL, instead of linking in gallium.
+    
+    The Android gallium build used to use gallium_egl, which was removed back
+    in March.  Instead, we will now use a normal Mesa libEGL loader with
+    dlopen()ing of a DRI module.
+    
+    v2: add a clean step to rebuild all dri modules properly.
+    v3: Squish the 2 patches doing this together (change by anholt).
+    
+    Signed-off-by: Chih-Wei Huang <cwhuang@linux.org.tw>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 933df3d3350867282d7334c94abf1ec677d78029
+Author: Chih-Wei Huang <cwhuang@android-x86.org>
+Date:   Wed May 20 11:25:28 2015 +0800
+
+    android: add rules to build a gallium_dri.so
+    
+    This single .so includes all of the enabled gallium drivers.
+    
+    Signed-off-by: Chih-Wei Huang <cwhuang@linux.org.tw>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit f4f609b27e4fbefb52b84b617051fb4cdba45c8f
+Author: Chih-Wei Huang <cwhuang@android-x86.org>
+Date:   Wed May 20 11:25:27 2015 +0800
+
+    android: add rules to build gallium/state_trackers/dri
+    
+    Signed-off-by: Chih-Wei Huang <cwhuang@linux.org.tw>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 581aa208fa8fc653dce50d95c1f3400bb0c68ab2
+Author: Chih-Wei Huang <cwhuang@android-x86.org>
+Date:   Wed May 20 11:25:26 2015 +0800
+
+    android: export more dirs from libmesa_dri_common
+    
+    The include paths of libmesa_dri_common are also used by modules
+    that need libmesa_dri_common.
+    
+    Signed-off-by: Chih-Wei Huang <cwhuang@linux.org.tw>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit b8213bbe4cec5bab89e07aab8d225e617d4a2087
+Author: Chih-Wei Huang <cwhuang@android-x86.org>
+Date:   Wed May 20 11:25:25 2015 +0800
+
+    android: loader: export the path to be included
+    
+    Signed-off-by: Chih-Wei Huang <cwhuang@linux.org.tw>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit 30ba4faf5dcb9f55352eed1b37a3e820e8efe2ad
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Mon Jun 8 14:52:07 2015 -0700
+
+    i965/gen9: Use raw PS invocation count for queries
+    
+    Previously the number needed to be divided by 4 to get the proper results. Now
+    the hardware does the right thing. Through experimentation it seems Braswell
+    (CHV) does also need the division by 4.
+    
+    Fixes piglit test:
+    arb_pipeline_statistics_query-frag
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit c10dc485f395d3b8d616bf2857bcdef9712dc47b
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue Jun 9 09:14:17 2015 -0600
+
+    glsl: fix comment typo: s/accpet/accept/
+
+commit 37e0677870febefdd8b89be335f0e97bfd4a7c9b
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri Jun 5 08:00:09 2015 -0600
+
+    mesa: remove some MAX_NV_FRAGMENT_PROGRAM_* macros
+    
+    GL_NV_fragment_program support was removed a while ago.  This is just
+    some clean-up.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 670862a5069f2759418450698aa4ab7d9f0e079f
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Sat Jun 6 12:08:00 2015 -0700
+
+    fs/reg_allocate: Remove the MRF hack helpers from fs_visitor
+    
+    These are helpers that only exist in this one file.  No reason to put them
+    in the visitor.
+    
+    Reviewed-by: Neil Roberts <neil@linux.intel.com>
+
+commit 86e5afbfee5492235cab1a7be4ea49ac02be1644
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Sat Jun 6 12:15:30 2015 -0700
+
+    i965/fs: Don't let the EOT send message interfere with the MRF hack
+    
+    Previously, we just put the message for the EOT send as high in the file as
+    it would go.  This is because the register pre-filling hardware will stop
+    all over the early registers in the file in preparation for the next thread
+    while you're still sending the last message.  However, if something happens
+    to spill, then the MRF hack interferes with the EOT send message and, if
+    things aren't scheduled nicely, will stomp on it.
+    
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90520
+    Reviewed-by: Neil Roberts <neil@linux.intel.com>
+
+commit 65bd4159b35c7213e0ac27c6299495e08a105ab4
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Tue Jun 9 14:43:21 2015 +0100
+
+    rtasm: Generalize executable memory allocator to all Unices.
+    
+    We're only using fairly portable standard Unix calls here, so might as
+    well save ourselves future trouble by enabling on all Unices by default.
+    
+    https://bugs.freedesktop.org/show_bug.cgi?id=90904
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit 698c391521561a1f0e4ff2570e35417be9968eaa
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 18:17:50 2015 +0300
+
+    i965/fs: Drop fs_inst::force_uncompressed.
+    
+    This is now unused.  Saves a whole bit of memory per instruction.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 44928b799adbbf2671c482431b3b7a390118725c
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jun 8 19:32:18 2015 +0300
+
+    i965/fs: Remove dead IR construction code from the visitor.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 51948085a2e5d97dbf2cd3c255a5873d509773eb
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 18:16:30 2015 +0300
+
+    i965/fs: Migrate test_fs_cmod_propagation to the IR builder.
+    
+    v2: Use set_predicate/condmod.  Use fs_builder::OPCODE instead of
+        ::emit.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 76c8142d0af45ab9907ebc9cfd2855fa753735b6
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 18:15:22 2015 +0300
+
+    i965/fs: Migrate test_fs_saturate_propagation to the IR builder.
+    
+    v2: Use set_saturate.  Use fs_builder::OPCODE instead of ::emit.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit bf83a1a219af8bf82c3c721888bbe0dfc3eced34
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:02:57 2015 +0300
+
+    i965/fs: Migrate translation of NIR texturing instructions to the IR builder.
+    
+    v2: Don't remove assignments of base_ir just yet.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 979fe2ffee3956186017fe6c115aed53fc87ad3d
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:01:32 2015 +0300
+
+    i965/fs: Migrate translation of NIR intrinsics to the IR builder.
+    
+    v2: Use fs_builder::SEL instead of ::emit.  Use set_condmod().
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit fe88c7ae38c72ea09ced69fb12ff00f58bdf1d6e
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 20:59:26 2015 +0300
+
+    i965/fs: Migrate translation of NIR ALU instructions to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 3632c28bde071950dc57e42eb62a65fb838c8bdc
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 20:57:12 2015 +0300
+
+    i965/fs: Migrate translation of NIR control flow to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 9976731485abb68eb3b5ae6f11a7838977b95b5b
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:17:36 2015 +0300
+
+    i965/fs: Migrate NIR variable handling to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 09733f220ac9921ce7d8c3524bc5327d8203c446
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:12:49 2015 +0300
+
+    i965/fs: Migrate NIR emit_percomp() to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit d5cb2e513794f6c26259665bc93cf507e86ae3b8
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:08:43 2015 +0300
+
+    i965/fs: Migrate CS terminate message to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit e522f12f03bcb0edb1384adff894918bf8d6d1b6
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 22:43:00 2015 +0300
+
+    i965/fs: Migrate VS output writes to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit e32c16c47f7a3cf25e2b4d2f3b97d0f8f89669c0
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:07:52 2015 +0300
+
+    i965/fs: Migrate FS framebuffer writes to the IR builder.
+    
+    The explicit call to fs_builder::group() in emit_single_fb_write() is
+    required by the builder (otherwise the assertion in fs_builder::emit()
+    would fail) because the subsequent LOAD_PAYLOAD and FB_WRITE
+    instructions are in some cases emitted with a non-native execution
+    width.  The previous code would always use the channel enables for the
+    first quarter, which is dubious but probably worked in practice
+    because FB writes are never emitted inside non-uniform control flow
+    and we don't pass the kill-pixel mask via predication in the cases
+    where we have to fall-back to SIMD8 writes.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 840cbef416b47fa1a92d6491cdd2895442f063bc
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:07:34 2015 +0300
+
+    i965/fs: Migrate FS alpha test to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit ad68853f17868081a69b3f73f4bf4c1bc8b2571d
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 20:45:54 2015 +0300
+
+    i965/fs: Migrate FS discard handling to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 46f264638ad97a0b806e6fad7117d62a2cf914b6
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:56:20 2015 +0300
+
+    i965/fs: Migrate FS gl_SamplePosition/ID computation code to the IR builder.
+    
+    v2: Use fs_builder::AND/SHR/MOV instead of ::emit.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 31477226ec6cbe956a4bbdcae81cc7ca5ad28cc6
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:54:54 2015 +0300
+
+    i965/fs: Migrate FS interpolation code to the IR builder.
+    
+    v2: Fix some preexisting trivial codestyle issues.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit d3c10ad42729c1fe74a7f7c67465bd2beb7f9e75
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 20:43:09 2015 +0300
+
+    i965/fs: Migrate shader time to the IR builder.
+    
+    v2: Change null register destination type to UD so it can be compacted.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 35e64f2a769c915bedeafdb86152b0c4a2067b35
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:06:13 2015 +0300
+
+    i965/fs: Migrate untyped surface read and atomic to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit db83d9d2d0f2743cf64ece731c753f21aba87da6
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:05:28 2015 +0300
+
+    i965/fs: Migrate texturing implementation to the IR builder.
+    
+    v2: Remove tabs from modified lines.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 546839ef639bf871feaa62ab7d811f2fc783bdcd
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 22:22:39 2015 +0300
+
+    i965/fs: Migrate pull constant loads to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 8f626c14989f005599f7841b89144d2bf58b5704
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 22:22:10 2015 +0300
+
+    i965/fs: Migrate Gen4 send dependency workarounds to the IR builder.
+    
+    v2: Change brw_null_reg() to bld.null_reg_f().
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 4af4cfba9ee1014baa4a777660fc9d53d57e4c82
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 20:49:32 2015 +0300
+
+    i965/fs: Migrate lower_integer_multiplication to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit efa60e49f2e5dd56f1c81487e9aad9f89136d8b4
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 20:36:47 2015 +0300
+
+    i965/fs: Migrate lower_load_payload to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 8f8c6b7bdab1fc25fe8277705ebb1818ab220821
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 19:05:54 2015 +0300
+
+    i965/fs: Migrate register spills and fills to the IR builder.
+    
+    Yes, it's incorrect to use the 0-th channel enable group
+    unconditionally without considering the execution and regioning
+    controls of the instruction that uses the spilled value, but it
+    matches the previous behaviour exactly, the builder just makes the
+    preexisting problem more obvious because emitting an instruction of
+    non-native SIMD width without having called .group() or .exec_all()
+    explicitly would have led to an assertion failure.
+    
+    I'll fix the problem in a follow-up series, as the solution is going
+    to be non-trivial.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 3e6ac0bcedfe1b5d092d6ee19323c3ef87b99dba
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:05:45 2015 +0300
+
+    i965/fs: Migrate try_replace_with_sel to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 6114ba4dccfdb8f7c657feeed8f8c9b69debba91
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 20:46:31 2015 +0300
+
+    i965/fs: Migrate opt_sampler_eot to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit a800ec04ad84abeb6243897a276facc4ef6cac82
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 19:51:47 2015 +0300
+
+    i965/fs: Migrate opt_peephole_sel to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 78f7c9edeb21ec4e7a4f96aa12b51cecc40e9688
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 19:33:44 2015 +0300
+
+    i965/fs: Create and emit instructions in one step in opt_peephole_sel.
+    
+    This simplifies opt_peephole_sel() slightly by emitting the SEL
+    instructions immediately after they are created, what makes the
+    sel_inst and mov_imm_inst arrays unnecessary and will make it possible
+    to get rid of the explicit inserts when the pass is migrated to the IR
+    builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 74c2458ecf492f2dd344b4f6114b13a376f90657
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Jun 4 16:13:35 2015 +0300
+
+    i965/fs: Migrate opt_cse to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit e7069fbc701de68b65a876e1b4bfde4f111dd084
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Jun 4 16:09:47 2015 +0300
+
+    i965/fs: Don't drop force_writemask_all and _sechalf when copying a CSE temporary.
+    
+    LOAD_PAYLOAD instructions need the same treatment as any other
+    generator instructions, at least FB writes and typed surface messages
+    will need a payload built with non-zero execution controls.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 497d238ae72aa59fb32b21191a1a0444ca09fc10
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Jun 4 16:05:33 2015 +0300
+
+    i965/vec4: Take into account all instruction fields in CSE instructions_match().
+    
+    Most of these fields affect the behaviour of the instruction, but
+    apparently we currently don't CSE the kind of instructions for which
+    these fields could make a difference in the VEC4 back-end.  That's
+    likely to change soon though when we start using send-from-GRF for
+    texture sampling and surface access messages.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 8013b8147ae5fc652799c7ff01c2d419ebebe3db
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Thu Jun 4 15:09:10 2015 +0300
+
+    i965/fs: Take into account all instruction fields in CSE instructions_match().
+    
+    Most of these fields affect the behaviour of the instruction so it
+    could actually break the program if we CSE a pair of otherwise
+    matching instructions with different values of these fields.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit d86c2e6e539db518dca162145c096b7440d043a7
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 18:22:17 2015 +0300
+
+    i965/fs: Migrate opt_peephole_predicated_break to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 35e5f118a5116685b30ad3305c1c153f1af37f66
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 18:20:50 2015 +0300
+
+    i965/fs: Migrate opt_combine_constants to the IR builder.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit e04b4156a745fc09afa066c892c1913362eae9df
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 19:59:44 2015 +0300
+
+    i965/fs: Allocate a common IR builder object in fs_visitor.
+    
+    v2: Call fs_builder::at_end() to point the builder at the end of the
+        program explicitly.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 8ea8f83c8f6b932749ada32ac666d151a9636508
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Apr 22 14:02:47 2015 +0300
+
+    i965/fs: Introduce FS IR builder.
+    
+    The purpose of this change is threefold: First, it improves the
+    modularity of the compiler back-end by separating the functionality
+    required to construct an i965 IR program from the rest of the visitor
+    god-object, what in turn will reduce the coupling between other
+    components and the visitor allowing a more modular design.  This patch
+    doesn't yet remove the equivalent functionality from the visitor
+    classes, as it involves major back-end surgery.
+    
+    Second, it improves consistency between the scalar and vector
+    back-ends.  The FS and VEC4 builders can both be used to generate
+    scalar code with a compatible interface or they can be used to
+    generate natural vector width code -- 1 or 4 components respectively.
+    
+    Third, the approach to IR construction is somewhat different to what
+    the visitor classes currently do.  All parameters affecting code
+    generation (execution size, half control, point in the program where
+    new instructions are inserted, etc.) are encapsulated in a stand-alone
+    object rather than being quasi-global state (yes, anything defined in
+    one of the visitor classes is effectively global due to the tight
+    coupling with virtually everything else in the compiler back-end).
+    This object is lightweight and can be copied, mutated and passed
+    around, making helper IR-building functions more flexible because they
+    can now simply take a builder object as argument and will inherit its
+    IR generation properties in exactly the same way that a discrete
+    instruction would from the same builder object.
+    
+    The emit_typed_write() function from my image-load-store branch is an
+    example that illustrates the usefulness of the latter point: Due to
+    hardware limitations the function may have to split the untyped
+    surface message in 8-wide chunks.  That means that the several
+    functions called to help with the construction of the message payload
+    are themselves required to set the execution width and half control
+    correctly on the instructions they emit, and to allocate all registers
+    with half the default width.  With the previous approach this would
+    require the used helper functions to be aware of the parameters that
+    might differ from the default state and explicitly set the instruction
+    bits accordingly.  With the new approach they would get a modified
+    builder object as argument that would influence all instructions
+    emitted by the helper function as if it were the default state.
+    
+    Another example is the fs_visitor::VARYING_PULL_CONSTANT_LOAD()
+    method.  It doesn't actually emit any instructions, they are simply
+    created and inserted into an exec_list which is returned for the
+    caller to emit at some location of the program.  This sort of two-step
+    emission becomes unnecessary with the builder interface because the
+    insertion point is one more of the code generation parameters which
+    are part of the builder object.  The caller can simply pass
+    VARYING_PULL_CONSTANT_LOAD() a modified builder object pointing at the
+    location of the program where the effect of the constant load is
+    desired.  This two-step emission (which pervades the compiler back-end
+    and is in most cases redundant) goes away: E.g. ADD() now actually
+    adds two registers rather than just creating an ADD instruction in
+    memory, emit(ADD()) is no longer necessary.
+    
+    v2: Drop scalarizing VEC4 builder.
+    v3: Take a backend_shader as constructor argument.  Improve handling
+        of debug annotations and execution control flags.
+    v4: Drop Gen6 IF with inline comparison.  Rename "instr" variable.
+        Initialize cursor to NULL by default and add method to explicitly
+        point the builder at the end of the program.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 6e040657292d8d0a6fe8fe7d4d94e9808f29e924
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:24:50 2015 +0300
+
+    i965: Define consistent interface to enable instruction result saturation.
+    
+    v2: Use set_ prefix.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 7624f8410f64a7ce0ba125a2025904c70610c076
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:24:18 2015 +0300
+
+    i965: Define consistent interface to enable instruction conditional modifiers.
+    
+    v2: Use set_ prefix.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 239dfc5410d98f3b31a06652ceff13d9858c1f9b
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Wed Jun 3 21:23:46 2015 +0300
+
+    i965: Define consistent interface to predicate an instruction.
+    
+    v2: Use set_ prefix.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit f9367191b30956b9cfe578dd8e426b28d2417b6b
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jun 8 14:49:31 2015 +0300
+
+    mesa: Drop include of simple_list.h from mtypes.h.
+    
+    simple_list.h defines a number of macros with short non-namespaced
+    names that can easily collide with other declarations (first_elem,
+    last_elem, next_elem, prev_elem, at_end), and according to the comment
+    it was only being included because of struct simple_node, which is no
+    longer used in this file.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 277b94f172c44cb4199a740722f42bc701d591dc
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jun 8 14:48:29 2015 +0300
+
+    dri/nouveau: Include simple_list.h explicitly in nv*_state_tnl.c.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 7065c8153b5bea3fe4f364dbb922488f755bc1db
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jun 8 14:47:17 2015 +0300
+
+    tnl: Include simple_list.h explicitly in t_context.c.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 08a1046f6777c589f90eae3fd1e7e41ca364c45c
+Author: Francisco Jerez <currojerez@riseup.net>
+Date:   Mon Jun 8 14:46:58 2015 +0300
+
+    mesa: Include simple_list.h explicitly in errors.c.
+    
+    This seems to be the only user of simple_list in core mesa not
+    including the header explicitly.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit f7aad9da20b13c98f77d6a690b327716f39c0a47
+Author: Dave Airlie <airlied@gmail.com>
+Date:   Sun Apr 5 16:48:47 2015 +1000
+
+    mesa/teximage: use correct extension for accept stencil texture.
+    
+    This was using the wrong extension, ARB_stencil_texturing
+    doesn't mention any changes in this area.
+    
+    Fixes "dEQP-GLES3.functional.fbo.completeness.renderable.texture.
+    stencil.stencil_index8."
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90751
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 556b2fbd240bff5d20c5137827757e053c00c3a8
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:47 2015 -0700
+
+    i965: Make a helper function intel_miptree_set_total_width_height()
+    
+    and some more code refactoring. No functional changes in this patch.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 9111377978edf1c688811f877896942be9f8a332
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:48 2015 -0700
+
+    i965/gen9: Set vertical alignment for the miptree
+    
+    v3: Use ffs() and a switch loop in
+        tr_mode_horizontal_texture_alignment() (Ben)
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 447410b66436acde4440aeae45f701b0e4502e97
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:48 2015 -0700
+
+    i965/gen9: Set horizontal alignment for the miptree
+    
+    v3: Use ffs() and a switch loop in
+        tr_mode_vertical_texture_alignment() (Ben)
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 126078faca7a9da0f825d3ad07ce9b1183737240
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:47 2015 -0700
+
+    i965/gen9: Set tiled resource mode for the miptree
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit ef6b9985ea6b60a562daed3a9ed3be0f91f21e01
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:47 2015 -0700
+
+    i965: Pass miptree pointer as function parameter in intel_vertical_texture_alignment_unit
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 9edac38f2a7aaa55bc4f33eb268155ba76908925
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:47 2015 -0700
+
+    i965: Move intel_miptree_choose_tiling() to brw_tex_layout.c
+    
+    and change the name to brw_miptree_choose_tiling().
+    
+    V3: Remove redundant function parameters. (Topi)
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 2cbe730ac53a8510d0decde20a42f1acd51a93a9
+Author: Anuj Phogat <anuj.phogat@gmail.com>
+Date:   Tue Apr 14 22:06:47 2015 -0700
+
+    i965: Choose tiling in brw_miptree_layout() function
+    
+    This refactoring is required by later patches in this series.
+    
+    Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 4f2f5c8d81673473dce8bee3d66b524b4908a823
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Mon Dec 22 19:29:24 2014 -0800
+
+    i965: Disallow saturation for MACH operations.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 922c0c9fd526ce19b87bc74a3159dec7705c1de1
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Fri Jun 5 14:45:18 2015 +0100
+
+    i965: Export format comparison for blitting between miptrees
+    
+    Since the introduction of
+    
+    commit 536003c11e4cb1172c540932ce3cce06f03bf44e
+    Author: Boyan Ding <boyan.j.ding@gmail.com>
+    Date:   Wed Mar 25 19:36:54 2015 +0800
+    
+        i965: Add XRGB8888 format to intel_screen_make_configs
+    
+    winsys buffers no longer have an alpha channel. This causes
+    _mesa_format_matches_format_and_type() to reject previously working BGRA
+    uploads from using the BLT fast path. Instead of using the generic
+    routine for matching formats exactly, export the slightly more relaxed
+    check from intel_miptree_blit() which importantly allows the blitter
+    routine to apply a small number of format conversions.
+    
+    References: https://bugs.freedesktop.org/show_bug.cgi?id=90839
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Cc: Jason Ekstrand <jason@jlekstrand.net>
+    Cc: Alexander Monakov <amonakov@gmail.com>
+    Cc: Kristian Høgsberg <krh@bitplanet.net>
+    Cc: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit c2d0606827412b710dcaed80268fc665de8c9c5d
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Fri Jun 5 14:33:36 2015 +0100
+
+    i915: Blit RGBX<->RGBA drawpixels
+    
+    The blitter already has code to accommodate filling in the alpha channel
+    for BGRX destination formats, so expand this to also allow filling the
+    alpha channgel in RGBX formats.
+    
+    More importantly for the next patch is moving the test into its own
+    function for the purpose of exporting the check to the callers.
+    
+    v2: Fix alpha expansion as spotted by Alexander with the fix suggested by
+    Kenneth
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Cc: Jason Ekstrand <jason@jlekstrand.net>
+    Cc: Alexander Monakov <amonakov@gmail.com>
+    Cc: Kristian Høgsberg <krh@bitplanet.net>
+    Cc: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by Kenneth Graunke <kenneth@whitecape.org>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 8da79b8378ae87474d8c47ad955e4833edf98359
+Author: Chris Wilson <chris@chris-wilson.co.uk>
+Date:   Fri Jun 5 13:49:08 2015 +0100
+
+    i965: Fix HW blitter pitch limits
+    
+    The BLT pitch is specified in bytes for linear surfaces and in dwords
+    for tiled surfaces. In both cases the programmable limit is 32,767, so
+    adjust the check to compensate for the effect of tiling.
+    
+    v2: Tweak whitespace for functions (Kenneth)
+    
+    Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+    Cc: Kristian Høgsberg <krh@bitplanet.net>
+    Cc: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by Kenneth Graunke <kenneth@whitecape.org>
+    Cc: mesa-stable@lists.freedesktop.org
+
+commit 8614b9e489e65bb672ab16053d30ce8708856214
+Author: Martin Peres <martin.peres@linux.intel.com>
+Date:   Fri Jun 5 15:19:01 2015 +0300
+
+    softpipe/query: force parenthesis around a logical not
+    
+    This makes GCC5 happy.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
+
+commit 184e4de3a126fa21945fe59f68b8a29977919fc4
+Author: Martin Peres <martin.peres@linux.intel.com>
+Date:   Fri Jun 5 15:03:19 2015 +0300
+
+    main/version: make sure all the output variables get set in get_gl_override
+    
+    This fixes 2 warnings in gcc 5.1.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
+
+commit 56e38edc960bf08213cdb0282838ccec3e5ea10e
+Author: Michel Dänzer <michel.daenzer@amd.com>
+Date:   Tue May 26 16:27:15 2015 +0900
+
+    radeonsi: Add CIK SDMA support
+    
+    Based on the corresponding SI support. Same as that, this is currently
+    only enabled for one-dimensional buffer copies due to issues with
+    multi-dimensional SDMA copies.
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 79f2acb8f89704c609dd87d969353a506e03b05e
+Author: Michel Dänzer <michel.daenzer@amd.com>
+Date:   Wed Nov 19 15:31:24 2014 +0900
+
+    r600g,radeonsi: Assert that there's enough space after flushing
+    
+    Reviewed-by: Marek Olšák <marek.olsak@amd.com>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+
+commit 9538902c4f0e94e57228f939489d31676c43a778
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sun Jun 7 13:44:37 2015 +0100
+
+    docs: add news item and link release notes for mesa 10.5.7
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit f7db7fe6ea3d6044b4ceda0c2c477642302e3997
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sun Jun 7 11:45:25 2015 +0100
+
+    docs: Add sha256sums for the 10.5.7 release
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit eb3a704bb0008c1d046abae31dcb0b2b980c66b1)
+
+commit 56efe81ab163a0c7af15fc53821ac56c0d7641d8
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sun Jun 7 11:13:19 2015 +0100
+
+    Add release notes for the 10.5.7 release
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 495bcbc48cf4e7cee0f2de11c1166a1fd6eb3969)
+
+commit 7b8f20ec5505a25958bcd98aabe73a7ca2b6cbba
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Thu Jun 4 17:00:17 2015 -0700
+
+    prog_to_nir: Fix fragment depth writes.
+    
+    In the ARB_fragment_program specification, the result.depth output
+    variable is treated as a vec4, where the fragment depth is stored in the
+    .z component, and the other three components are undefined.
+    
+    This is different than GLSL, which uses a scalar value (gl_FragDepth).
+    
+    To make this consistent for driver backends, this patch makes
+    prog_to_nir use a scalar output variable for FRAG_RESULT_DEPTH,
+    moving result.depth.z into the first component.
+    
+    Fixes Glean's fragProg1 "Z-write test" subtest.
+    
+    Cc: mesa-stable@lists.freedesktop.org
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90000
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 52e5ad7bf8c731280ca4506b7d38e8c7a8e734b9
+Author: Chris Forbes <chrisf@ijw.co.nz>
+Date:   Wed Jun 3 12:11:27 2015 +1200
+
+    i965: Set max texture buffer size to hardware limit
+    
+    Previously we were leaving this at the default of 64K, which meets the
+    spec but is too small for some real uses. The hardware can handle up to
+    128M.
+    
+    User was complaining about this on freenode ##OpenGL today.
+    
+    Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit b639ed2f1b170d1184c6d94c88c826c51ffc8726
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Thu Jun 4 23:59:23 2015 -0700
+
+    i965: Add gen8 fast clear perf debug
+    
+    In an ideal world I would just implement this instead of adding the perf debug.
+    There are some errata involved which lead me to believe it won't be so simple as
+    flipping a few bits.
+    
+    There is room to add a thing for Gen9s flexibility, but since I am actively
+    working on that I have opted to ignore it.
+    
+    Example:
+    Multi-LOD fast clear - giving up (256x128x8).
+    
+    v2: Use braces for if statements because they are multiple lines (Ken)
+    
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 77a44512d9ed56be5e53ebf09e917b5aeeba0189
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Thu Jun 4 22:05:13 2015 -0700
+
+    i965: Add buffer sizes to perf debug of fast clears
+    
+    When we cannot do the optimized fast clear it's important to know the buffer
+    size since a small buffer will have much less performance impact.
+    
+    A follow-on patch could restrict printing the message to only certain sizes.
+    
+    Example:
+    Failed to fast clear 1400x1056 depth because of scissors.  Possible 5% performance win if avoided.
+    
+    Recommended-by: Kenneth Graunke <kenneth@whitecape.org>
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 6acb61fc9c2c5f81569d17d90a480abc48ec6055
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 12 22:53:00 2015 +0200
+
+    clover: clarify and fix the EGL interop error case
+    
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+
+commit a1cb407b049bb431b0f6f21e6e376d11f136af67
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon May 11 22:18:04 2015 +0200
+
+    egl: expose EGL 1.5 if all requirements are met
+    
+    There's no driver support yet, because EGL_KHR_gl_colorspace isn't
+    implemented.
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 51c8c66e1d81d03f0db6aee0a510aa85c277053e
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 12 21:41:32 2015 +0200
+
+    egl: return correct invalid-type error from eglCreateSync
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 820a4d402ad3891ec460882feab3801fbc646a65
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 12 21:06:41 2015 +0200
+
+    egl: add new platform functions (v2)
+    
+    These are just wrappers around the existing extension functions.
+    
+    v2: return BAD_ALLOC if _eglConvertAttribsToInt fails
+    
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 515f04ed6fe0c914b2cd22c7ea65db6e34c362e3
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 12 20:42:05 2015 +0200
+
+    egl: add eglCreateImage (v2)
+    
+    v2: - use calloc
+        - return BAD_ALLOC if calloc fails
+    
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 1e79e054e7dff0c45538fff1257e1f81a206d7c9
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 12 18:14:31 2015 +0200
+
+    egl: add eglGetSyncAttrib (v2)
+    
+    v2: - don't modify "value" in eglGetSyncAttribKHR after an error
+        - rename _egl_api::GetSyncAttribKHR -> GetSyncAttrib
+        - rename GetSyncAttribKHR_t -> GetSyncAttrib_t
+        - rename _eglGetSyncAttribKHR to _eglGetSyncAttrib
+    
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 7524592da6305d52e95d718691d5a6665738aade
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 12 18:13:31 2015 +0200
+
+    egl: add eglWaitSync
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 2885ba0e4cea102d77832e2af4b212d00ab5edd4
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 12 20:54:22 2015 +0200
+
+    egl: add EGL 1.5 functions that don't need any changes from extensions
+    
+    Declare the functions without the suffix, so that the core names are exported.
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit d333d30632516b1fc5b60181c2c237653e55a8e1
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 12 17:34:57 2015 +0200
+
+    egl: use EGL 1.5 types without suffixes
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 706466f4619b76f2475120f187c34d01ab5aa727
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 12 00:44:56 2015 +0200
+
+    egl: add context attribs from EGL 1.5
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit f9f894447e4e7442d5dfa489bb43f2823e2fc71d
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 12 00:44:20 2015 +0200
+
+    egl: fix setting context flags
+    
+    Cc: 10.6 10.5 10.4 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 0e4b564ef288159f16f7a6886b6cfc0110411af8
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 12 16:40:29 2015 +0200
+
+    egl: combine VersionMajor and VersionMinor into one variable
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit efda9c56491f5cb90e77f5fe7979477fc9b2b529
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon May 11 22:16:52 2015 +0200
+
+    egl: set the EGL version in common code
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 3a83adeb7c6340104e9417beefc086f7d33183bc
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon May 11 21:38:55 2015 +0200
+
+    egl: remove unused _egl_global::ClientExtensions
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 20249d355989668bfdcfed61708a6959794b9710
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri May 15 19:59:59 2015 +0200
+
+    egl: import platform headers from registry (v2)
+    
+    v2: don't remove local Mesa changes
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 6b31f22338656c154e028b4bc2cbd14ab733a957
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri May 15 19:58:51 2015 +0200
+
+    egl: import eglext.h from registry and cleanup eglmesaext.h (v2)
+    
+    v2: include mesa and chromium extensions in eglext.h so as not to break
+        existing users
+    v3: keep PFNEGLSWAPBUFFERSREGIONNOK because piglit uses it
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit 49ae822183aa4daf6c6df9ef33e0b9a148d1a0d1
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Fri May 15 19:55:24 2015 +0200
+
+    egl: import egl.h from registry (v2)
+    
+    v2: split the commit into 3 patches
+    
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Reviewed-by: Chad Versace <chad.versace@intel.com>
+
+commit f52e8572ae1d91bcb6aef9fd3aac02ede62dee4e
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu May 28 16:13:37 2015 +0200
+
+    mesa: remove unused gl_config::colorIndexMode
+    
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 4312b4f5704ddd88e27b3bf2c17eaf054567f067
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu May 28 18:14:29 2015 +0200
+
+    mesa: use GL_GEOMETRY_PROGRAM_NV instead of MESA_GEOMETRY_PROGRAM
+    
+    There's no reason to use our own definition.
+    Tessellation will use the NV definitions too.
+    
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 3b2721ce1145cc60bf35e41e9f50a92849142a06
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu May 28 18:10:08 2015 +0200
+
+    mesa: use _mesa_has_geometry_shader in get_programiv
+    
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit b7ef7903b8f582438172ef1bdc72788be3aa0860
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu May 28 16:28:39 2015 +0200
+
+    mesa: remove useless gl_compute_program_state::Current
+    
+    This is for user assembly shaders only (not GLSL). We won't support those.
+    
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit e8b040477e271324a88d35c003775337848a500c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Thu May 28 16:09:23 2015 +0200
+
+    mesa: remove unused geometry shader variables
+    
+    These states are for GS assembly shaders only. We don't support those.
+    
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 3d16b5af1dca889ccc3716470f38c1fa84713f26
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 26 19:07:35 2015 +0200
+
+    tgsi/ureg: fix a coverity defect in emit_decls
+    
+    Reported by Ilia Mirkin.
+
+commit 6aff87bb01d2bd583ac629d02ebf56ecbf86ec2c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 26 19:07:35 2015 +0200
+
+    r600g: fix a coverity defect in streamout code
+    
+    Reported by Ilia Mirkin.
+
+commit 6bf3729a3fa32a779e9593ffdf1c14e7a4f6dbad
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed May 20 11:43:55 2015 +0200
+
+    glsl_to_tgsi: use TGSI array declarations for VS,GS arrays of outputs (v2)
+    
+    v2: don't use PIPE_MAX_SHADER_ARRAYS
+
+commit 9b1921100ef5f265403b278fab26ae404db719da
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun May 10 13:03:27 2015 +0200
+
+    glsl_to_tgsi: use TGSI array declarations for GS,FS arrays of inputs (v2)
+    
+    v2: don't use PIPE_MAX_SHADER_ARRAYS
+
+commit 26c8a49bc40839298d5cd986181d4bee31a48936
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat May 9 23:54:35 2015 +0200
+
+    glsl_to_tgsi: remove some emit functions by using C++ default values
+
+commit 85cd1cf4b88aff9bd2667359e36e6fbb7be92122
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat May 9 23:38:52 2015 +0200
+
+    glsl_to_tgsi: rename emit -> emit_asm
+    
+    My editor thinks "emit" is a keyword, which breaks code indexing.
+
+commit 30b74c02cd57463591588274ad638ca80b34cb57
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 26 15:18:27 2015 +0200
+
+    glsl_to_tgsi: remove memset after calloc
+
+commit 6ae3bc256927b583690729b8940a4418e75b0596
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 26 15:09:20 2015 +0200
+
+    glsl_to_tgsi: don't use a static array size for st_translate::arrays
+
+commit 57c98e22db3397efe42268ba0750f319cea3b0fd
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 26 15:09:20 2015 +0200
+
+    glsl_to_tgsi: don't use a static array size for "array_sizes"
+
+commit b6ebe7eabf54936a02acc0968e718e0c264a73f5
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon May 25 19:30:44 2015 +0200
+
+    tgsi/ureg: don't emit in/out arrays if drivers don't support ranged declarations
+    
+    Softpipe, llvmpipe, r300g, and radeonsi pass tests. Other drivers need testing.
+    
+    Freedreno and nv30 are definitely broken. Other drivers seem to be alright.
+
+commit a015b3952f568ad3da1ddfe42ff7ce6568f52780
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed May 20 11:11:43 2015 +0200
+
+    tgsi/ureg: add support for output array declarations
+
+commit 1fa6c99e24890359e9cee2a9da02f21ea77b9f15
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun May 10 16:17:35 2015 +0200
+
+    tgsi/ureg: add support for GS input array declarations
+
+commit d3fbc659868ecdbfe14600a75eafe28174f7d99e
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun May 10 16:51:32 2015 +0200
+
+    tgsi/ureg: merge input and fs_input arrays
+
+commit 3b1d15775190945b1a639dd9b2581b4032cd2ac6
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun May 10 13:10:03 2015 +0200
+
+    tgsi/ureg: rename and simplify ureg_DECL_gs_input
+    
+    There is nothing special about it and it's used for tessellation shaders
+    too.
+
+commit 918ca4031f670066f054cdebcfe68ad75c963ac6
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun May 10 12:52:02 2015 +0200
+
+    tgsi/ureg: add support for FS input array declarations
+
+commit cf2c9265a3977d43beb9a9894a5b934af74df7d7
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun May 10 17:41:26 2015 +0200
+
+    tgsi/scan: get more information about arrays and handle arrays correctly (v2)
+    
+    v2: use less memory for the information
+
+commit 78395dbf9ff429d98523f8b4a340f7188d8b4db0
+Author: Tapani <tapani.palli@intel.com>
+Date:   Fri Jun 5 08:22:07 2015 +0300
+
+    mesa: fix program resource queries for builtin variables
+    
+    Patch fixes special cases with gl_VertexID and sets all builtin
+    variables locations as '-1' as specified by the extension spec.
+    
+    Fixes ES 3.1 conformance test failure:
+    	ES31-CTS.program_interface_query.input-built-in
+    
+    v2: comments + use is_gl_identifier() (Martin)
+    
+    Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
+    Reviewed-by: Martin Peres <martin.peres@linux.intel.com>
+
+commit cb277cde6f2a210b0515cd04269964fd409307e9
+Author: Alan Coopersmith <alan.coopersmith@oracle.com>
+Date:   Sat May 23 00:03:53 2015 -0700
+
+    glsl_compiler: Remove unused extra argument to printf in usage_fail
+    
+    Flagged by Oracle's parfait static analyzer:
+    
+    Error: Format string argument mismatch (CWE 628)
+       In call to printf with format string "usage: %s [options] <file.vert | file.geom | file.frag>\n\nPossible options are:\n"
+          Too many arguments for format string (got more than 1 arguments)
+            at line 285 of src/glsl/main.cpp in function 'usage_fail'.
+    
+    Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 00d8733120276fc5bdd3ecb7fea6e04b7940d71b
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Fri Jun 5 02:25:03 2015 +0200
+
+    docs: add note about llvmpipe supporting GL_ARB_shader_stencil_export
+
+commit 6e5970ffee0129fb94d8b7f0ebd4fac3992e7dce
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Thu Jun 4 14:35:59 2015 +0200
+
+    draw: (trivial) fix NULL pointer dereference
+    
+    This probably got broken when the samplers were converted to be indexed
+    by shader type.
+    Seen when looking at bug 89819 though I'm not sure if that really was what
+    the bug was about...
+    
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit c820407ef0aac87546d1a778e169cfa1a915a219
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Tue Jun 2 20:40:54 2015 -0700
+
+    i965/fs: Print mlen in dump_instructions() output.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 15a12795c6f3edef0e1cbab39b6da3d5b8f64fc3
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Thu Jun 4 01:24:07 2015 -0700
+
+    prog_to_nir: Make RSQ properly take the absolute value of its argument.
+    
+    I just botched this when writing the original code.
+    
+    From the ARB_vertex_program specification:
+    "The RSQ instruction approximates the reciprocal of the square root of
+     the absolute value of the scalar operand and replicates it to all four
+     components of the result vector."
+    
+    Fixes a Glean vertProg1 subtest:
+    RSQ test 2 (reciprocal square root of negative value)
+    
+    Cc: mesa-stable@lists.freedesktop.org
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90547
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 71e94578779e4344066d434004fd85ca493de552
+Author: Martin Peres <martin.peres@linux.intel.com>
+Date:   Thu Jun 4 14:09:31 2015 +0300
+
+    main: fix a regression in uniform handling introduced by 87a4bc5
+    
+    The comment was accurate but the condition was reversed...
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
+
+commit 87a4bc511811327a00f9bbc1b6870b7fa46675f7
+Author: Martin Peres <martin.peres@linux.intel.com>
+Date:   Thu May 21 15:51:09 2015 +0300
+
+    mesa: reference built-in uniforms into gl_uniform_storage
+    
+    This change introduces a new field in gl_uniform_storage to
+    explicitely say that a uniform is built-in. In the case where it is,
+    no storage is defined to make it clear that it is read-only from the
+    mesa side. I fixed all the places in the code that made use of the
+    structure that I changed. Any place making a wrong assumption and using
+    the storage straight away will just crash.
+    
+    This patch seems to implement the path of least resistance towards
+    listing built-in uniforms in GL_ACTIVE_UNIFORM (and other APIs).
+    
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+    Signed-off-by: Martin Peres <martin.peres@linux.intel.com>
+
+commit 4fd42a7c2798d03476c84b79cb855984a15c222c
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Wed Jun 3 01:34:06 2015 +0200
+
+    llvmpipe: Implement stencil export
+    
+    Pretty trivial, fixes the issue that we're expected to be able to blit
+    stencil surfaces (as the blit just relies on util blitter code which needs
+    stencil export to do it).
+    2 piglits skip->pass, 11 fail->pass
+    
+    v2: prettify, keep different stencil ref value handling out of depth/stencil
+    test itself.
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+    Reviewed-by: Dave Airlie <airlied@redhat.com>
+
+commit d46d04529b9c1e55b4c3b65a7078bbbd7ab1a810
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue Jun 2 17:46:38 2015 -0700
+
+    i965: Use UW-typed immediate in multiply inst.
+    
+    Some hardware reads only the low 16-bits even if the type is UD, but
+    other hardware like Cherryview can't handle this.
+    
+    Fixes spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple on
+    Cherryview.
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90830
+    Reviewed-by: Neil Roberts <neil@linux.intel.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 54a70a8ef20a9a875f0828acb42332cf69217ff5
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Jun 1 16:46:29 2015 -0700
+
+    program: Replace gl_inst_opcode with enum prog_opcode.
+    
+    Both were introduced at the same time. I'm not sure why we needed two.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit fb011d31578ada40c2755314db783522477d0ad4
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Jun 1 16:27:46 2015 -0700
+
+    program: Remove dead Aux field from prog_instruction.
+    
+    Appears to have been last used by the i965 driver (removed by commit
+    098acf6c).
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit ef3f89e53e76332ddb300b08f4698347e17d1633
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Jun 1 16:22:55 2015 -0700
+
+    program: Shrink and rename SaturateMode field to Saturate.
+    
+    It was 2 bits to accommodate SATURATE_PLUS_MINUS_ONE (removed by commit
+    09b566e1). A similar change was made to TGSI recently in commit
+    e1c4e8aa.
+    
+    Reducing the size from 2 bits to 1 reduces the size of the bit fields
+    from 17 bits to 16, which is a much nicer number.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 56b2b3d385170ab33934ec71fd9d0a6e0e1af9a8
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 15 12:12:04 2015 -0600
+
+    mesa: move no-change glDepthFunc check earlier
+    
+    If the incoming func matches the current state it must be a legal
+    value so we can do this before the switch statement.
+    
+    Signed-off-by: Brian Paul <brianp@vmware.com>
+
+commit 4dd72fe70d6800525302c734d161be411ed9f7e0
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 15 12:09:54 2015 -0600
+
+    mesa: restore GL_EXT_depth_bounds_test state in glPopAttrib()
+    
+    Spotted by inspection.  Untested (no piglit test).
+    
+    Signed-off-by: Brian Paul <brianp@vmware.com>
+
+commit 6139195606d97b43a739500627c906baf804fab0
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 15 11:22:25 2015 -0600
+
+    mesa: fix glPushAttrib(0) / glPopAttrib() error
+    
+    If the glPushAttrib() mask value was zero we didn't actually push
+    anything onto the attribute stack.  A subsequent glPopAttrib() call
+    would generate a GL_STACK_UNDERFLOW error.  Now push a dummy attribute
+    in that case to prevent the error.
+    
+    Mesa now matches nvidia's behavior.
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 86a74e9b6b8953a55de234f185a14defd646f489
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Tue Jun 2 19:26:42 2015 +1000
+
+    nir: use src for ssa helper
+    
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+    Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
+
+commit 5f7b8fa4811ae0acb49de5d0ef44ae1573eb5ccc
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Mon Jun 1 08:00:14 2015 +1000
+
+    nir: remove extra semicolon
+    
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+    Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
+
+commit 5da809d70fb50eb4b290ee7cbe1b8f09e9286f4e
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Jun 1 12:22:54 2015 -0700
+
+    prog_to_nir: Remove OPCODE_MOV special case.
+    
+    OPCODE_MOV is in the op_trans[] array.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 576f7241b6ce0ae22aa52a3e91fb1ac913d4b7b2
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon Jun 1 12:19:30 2015 -0700
+
+    prog_to_nir: Remove from op_trans[] opcodes handled in the switch.
+    
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 5b226a12420993a0f4aae2295b33aaa305242a3d
+Author: Eduardo Lima Mitev <elima@igalia.com>
+Date:   Tue Jun 2 13:42:46 2015 +0200
+
+    nir: prevent use-after-free condition in should_lower_phi()
+    
+    lower_phis_to_scalar() pass recurses the instruction dependence graph to
+    determine if all the sources of a given instruction are scalarizable.
+    To prevent cycles, it temporary marks the phi instruction before recursing in,
+    then updates the entry with the resulting value. However, it does not consider
+    that the entry value may have changed after a recursion pass, hence causing
+    a use-after-free situation and a crash.
+    
+    This patch fixes this by reloading the entry corresponding to the 'phi'
+    after recursing and before updating its value.
+    
+    The crash can be reproduced ~20% of times with the dEQP test:
+    
+    dEQP-GLES3.functional.shaders.loops.while_constant_iterations.nested_sequence_fragment
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 762395736be3adcc810274e1e96acd4bdceb10c6
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sat May 30 11:19:28 2015 -0700
+
+    i965: Add Gen8+ VS dispatch_mode assertion.
+    
+    Suggested by Ben Widawsky.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit a2655e0dd422599c07c572472855abd98d20d21a
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Fri May 29 23:02:56 2015 -0700
+
+    i965: Drop LOAD_PAYLOAD workaround in fs_visitor::emit_urb_writes().
+    
+    Now that Jason's LOAD_PAYLOAD improvements have landed, we don't need
+    this.  Passing 1 for the number of header registers already takes care
+    of setting force_writemask_all on the header copy.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+
+commit 386bf336c400104fbc80bf8a21f745eca5771ec1
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed May 13 14:45:45 2015 -0700
+
+    i965: Use proper pitch for scalar GS pull constants and UBOs.
+    
+    See the corresponding code in brw_vs_surface_state.c.
+    
+    v2: const more things (requested by Topi Pohjolainen)
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 0f8ec779ddff4126837a7d4216ecf1d4b97e93d2
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Wed Mar 11 21:18:42 2015 -0700
+
+    i965: Create a shader_dispatch_mode enum to replace VS/GS fields.
+    
+    We used to store the GS dispatch mode in brw_gs_prog_data while
+    separately storing the VS dispatch mode in brw_vue_prog_data::simd8.
+    
+    This patch introduces an enum to represent all possible dispatch modes,
+    and stores it in brw_vue_prog_data::dispatch_mode, unifying the two.
+    
+    Based on a suggestion by Matt Turner.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 9945573d65f4f66d127df7cbb62648889d09a7ed
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sun May 10 23:33:30 2015 -0700
+
+    i965: Drop "Vector Mask Enable" bit from 3DSTATE_GS on Gen8+.
+    
+    The documentation makes it pretty clear that we shouldn't use this:
+    
+       "Under normal conditions SW shall specify DMask, as the GS stage
+        will provide a Dispatch Mask appropriate to SIMD4x2 or SIMD8 thread
+        execution (as a function of dispatch mode).  E.g., for SIMD4x2
+        execution, the GS stage will generate a Dispatch Mask that is equal
+        to what the EU would use as the Vector Mask.  For SIMD8 execution
+        there is no known usage model for use of Vector Mask (as there is
+        for PS shaders)."
+    
+    I also managed to find descriptions of DMask and VMask, in the "State
+    Register" (sr0.2/3) field descriptions:
+    
+       "Dispatch Mask (DMask).  This 32-bit field specifies which channels
+        are active at Dispatch time."
+    
+       "Vector Mask (VMask).  This 32-bit field contains, for each 4-bit
+        group, the OR of the corresponding 4-bit group in the dispatch
+        mask."
+    
+    SIMD4x2 shaders process one or two vec4 values, with each 4-bit group
+    corresponding to xyzw channel enables (either all on, or all off).
+    Thus, DMask = VMask in SIMD4x2 mode.  But in SIMD8 mode, 4-bit groups
+    are meaningless, so it just messes up your values.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit f97166e550f17f69f8de7e51775e745a5218d3e7
+Author: Brian Paul <brianp@vmware.com>
+Date:   Mon Jun 1 07:40:34 2015 -0600
+
+    docs: update GL_ARB_copy_image, GL_ARB_clear_texture gallium status
+    
+    VMware is working on these.
+    
+    Signed-off-by: Brian Paul <brianp@vmware.com>
+
+commit 51d08d55f46655715cb4a4ef5d14fb7d051b989a
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 29 16:02:44 2015 -0600
+
+    gallium/util: silence silence unused var warnings for non-debug build
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 54070a9d1db332853609a31e5da76126d8f9445a
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 29 16:01:37 2015 -0600
+
+    egl/dri2: silence uninitialized variable warnings
+    
+    And update assertions to be more informative.
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 87813c504a8a72198a6a4e8de9e5905fd751ac2c
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 29 16:00:52 2015 -0600
+
+    gallivm: silence unused var warnings for non-debug build
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 71afc13eda53f36827f19e96404ff782561b523b
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 29 16:00:14 2015 -0600
+
+    pipebuffer: silence unused var warnings for non-debug build
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 875918587156e139a82ac9ece9c1290b8019d007
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 29 15:59:50 2015 -0600
+
+    st/mesa: silence unused var warnings for non-debug build
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit ae5d6db924d304a6b4af4f802e4ca1e1e2f25489
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 29 15:59:27 2015 -0600
+
+    draw: silence unused var warnings for non-debug build
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 512117ce0e1b32b4e3086a638bf50b966a4724bc
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Fri May 29 14:33:18 2015 +0100
+
+    gallivm: Remove stub disassemblerSymbolLookupCB.
+    
+    It's incompletete -- it wasn't filling ReferenceType so it was causing
+    garbagge on the disassembly.  Furthermore it seems impossible to get the
+    jump information through this interface.
+    
+    The solution for function size problem is to effectively book-keep the
+    machine code start and end address while JIT'ing.
+
+commit 7f62fdae1629d75dd581d1c57b28c2f099c5ef6b
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Fri May 29 13:41:48 2015 +0100
+
+    i965: Don't add base_binding_table_index if it's zero
+    
+    When calculating the binding table index for non-constant sampler
+    array indexing it needs to add the base binding table index which is a
+    constant within the generated code. Often this base is zero so we can
+    avoid a redundant instruction in that case.
+    
+    It looks like nothing in shader-db is doing non-constant sampler array
+    indexing so this patch doesn't make any difference but it might be
+    worth having anyway.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Acked-by: Ben Widawsky <ben@bwidawsk.net>
+
+commit 6c846dc57b1d6f3e015a604dba1976f96c4be9e9
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Thu May 28 15:27:31 2015 +0100
+
+    i965: Don't use a temporary when generating an indirect sample
+    
+    Previously when generating the send instruction for a sample
+    instruction with an indirect sampler it would use the destination
+    register as a temporary store. This breaks when used in combination
+    with the opt_sampler_eot optimisation because that forces the
+    destination to be null. This patch fixes that by avoiding the temp
+    register altogether.
+    
+    The reason the temporary register was needed was because it was trying
+    to ensure the binding table index doesn't overflow a byte by and'ing
+    it with 0xff. The result is then or'd with samper_index<<8. This patch
+    instead just and's the whole thing by 0xfff. This will ensure that a
+    bogus sampler index won't overflow into the rest of the message
+    descriptor but unlike the previous code it won't ensure that the
+    binding table index doesn't overflow into the sampler index. It
+    doesn't seem like that should matter very much though because if the
+    shader is generating a bogus sampler index then it's going to just get
+    garbage out either way.
+    
+    Instead of doing sampler_index<<8|(sampler_index+base_table_index) the
+    new code avoids one operation by doing
+    sampler_index*0x101+base_table_index which should be equivalent.
+    However if we wanted to avoid the multiply for some reason we could do
+    this by adding an extra or instruction still without needing the
+    temporary register.
+    
+    This fixes a number of Piglit tests on Skylake that were using
+    indirect samplers such as:
+    
+     spec@arb_gpu_shader5@execution@sampler_array_indexing@fs-simple
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+    Acked-by: Ben Widawsky <ben@bwidawsk.net>
+    Tested-by: Anuj Phogat <anuj.phogat@gmail.com>
+
+commit ec1c72d38ea4c709a39c6be9e0ff96bc2a90940f
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri May 29 21:27:53 2015 -0700
+
+    vc4: Don't bother with safe list traversal in CSE.
+    
+    We don't remove or move instructions.
+
+commit 78c773bb3646295e4a4f1fe7d6d10f05758ee48b
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri May 29 18:19:42 2015 -0700
+
+    vc4: Convert from simple_list.h to list.h
+    
+    list.h is a nicer and more familiar set of list functions/macros.
+
+commit 21a22a61c02a1d1807ff03df8eb8fa16ebdd1b74
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri May 29 18:06:32 2015 -0700
+
+    vc4: Make sure we allocate idle BOs from the cache.
+    
+    We were returning the most recently freed BO, without checking if it
+    was idle yet.  This meant that we generally stalled immediately on the
+    previous frame when generating a new one.  Instead, allocate new BOs
+    when the *oldest* BO is still busy, so that the cache scales with how
+    much is needed to keep some frames outstanding, as originally
+    intended.
+    
+    Note that if you don't have some throttling happening, this means that
+    you can accidentally run the system out of memory.  The kernel is now
+    applying some throttling on all execs, to hopefully avoid this.
+
+commit c821ccf0e3a051e5e867792898ae9b8f08e4601a
+Author: Eric Anholt <eric@anholt.net>
+Date:   Fri May 29 17:21:15 2015 -0700
+
+    vc4: Fix return value handling for BO waits.
+    
+    If the wait ever returned -ETIME, we'd abort because the errno was
+    stored in errno and not drmIoctl()'s return value.
+
+commit fcc79af9e25d5770b8de1f4102901cbf97857a34
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Fri May 29 21:47:53 2015 +1000
+
+    mesa: remove unused function declaration
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 82305f7b003879a3d08e1445f8ac4b1c6bee6330
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 29 11:28:58 2015 -0600
+
+    dri_util: make version var unsigned to silence warnings
+    
+    _mesa_override_gl_version_contextless() takes an unsigned version
+    parameter.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit b307921c3ff3b36607752f881a180272366a79cf
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Wed May 27 17:55:02 2015 -0700
+
+    i965: Disable compaction for EOT send messages
+    
+    AFAICT, there is no real way to make sure a send message with EOT is properly
+    ignored from compact, nor can I see a way to actually encode EOT while
+    compacting. Before the single send optimization we'd always bail because we hit
+    the is_immediate && !is_compactable_immediate case. However, with single send,
+    is_immediate is not true, and so we end up trying to compact the un-compactible.
+    
+    Without this, any compacting single send instruction will hang because the EOT
+    isn't there. I am not sure how I didn't hit this when I originally enabled the
+    optimization.  I didn't check if some surrounding code changed.
+    
+    I know Neil and Matt were both looking into this. I did a quick search and
+    didn't see any patches out there to handle this. Please ignore if this has
+    already been sent by someone. (Direct me to it and I will review it).
+    
+    Reported-by: Neil Roberts <neil@linux.intel.com>
+    Reported-by: Mark Janes <mark.a.janes@intel.com>
+    Tested-by: Mark Janes <mark.a.janes@intel.com>
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit c0d2b83f0bb15c1a10e53ef85c167febf699921a
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Fri May 29 18:17:24 2015 +0200
+
+    gallivm: make sampling more robust when the sampler setup is bogus
+    
+    Pure integer formats cannot be sampled with linear tex / mip filters. In GL
+    such a setup would make the texture incomplete.
+    We shouldn't rely on the state tracker though to filter that out, just return
+    all zeros instead of dying in the lerp.
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit 0ad15e55bfbca3d6b829b985f9e7ea7e3e69bc61
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Fri May 29 12:13:36 2015 +0100
+
+    configure.ac: Link mcdisassembler component.
+    
+    gallivm now depends on it. And depending on particular LLVM version /
+    configure options, the build can fail without this change due to
+    undefined reference to `LLVM*Disasm*' symbols.
+    
+    Trivial.
+
+commit 9119cd7d2c959e437c40c86f214d08dc198bfa69
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Fri May 29 11:58:58 2015 +0100
+
+    configure.ac: Don't bother checking whether LLVM's MCJIT component is available.
+    
+    Now that we require LLVM 3.3, MCJIT is guaranteed to be available.
+    
+    Trvial.
+
+commit 0db4ef9df152da1d0f3601bbccc68ac1c94d4a3b
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Thu May 28 16:55:10 2015 +0100
+
+    gallivm: Use the LLVM's C disassembly interface.
+    
+    It doesn't do everything we want.  In particular it doesn't allow to
+    detect jumps or return opcodes.  Currently we detect the x86's RET
+    opcode.
+    
+    Even though it's worse for LLVM 3.3, it's an improvement for LLVM 3.7,
+    which was totally busted.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit 29203e77388f11e36db3190834809c3196ee47b5
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Thu May 28 15:35:14 2015 +0100
+
+    gallivm: Disable frame pointer omission on LLVM 3.7.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit dd048543e944d95b4471572454cfa902392e3f61
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 26 12:47:03 2015 +0200
+
+    configure.ac: enable building GLES1 and GLES2 by default
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 25e9ae2b79f32631e7255807a242e5fc4e39984c
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 26 19:32:36 2015 +0200
+
+    st/dri: fix postprocessing crash when there's no depth buffer
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89131
+    
+    Cc: 10.6 10.5 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 7116250b7a3aa8863f11d18032a3fbd24e2eee73
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Wed May 27 00:15:16 2015 +0200
+
+    radeon/llvm: reset temps_count on deallocation
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 7afc992c20a94883b876fe53e155b9fec6e5fb27
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Tue May 26 15:35:10 2015 +0200
+
+    radeon/llvm: don't use a static array size for radeon_llvm_context::arrays (v2)
+    
+    v2: - don't use realloc (tgsi_shader_info provides the size)
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 065978d36b8a8ba5aa23248c6bcd0f0e4d6e86de
+Author: Dave Airlie <airlied@gmail.com>
+Date:   Wed May 27 11:11:06 2015 +1000
+
+    softpipe: fix offset wrapping calculations (v2)
+    
+    Roland pointed out my previous attempt was lacking, so I enhanced the
+    texwrap piglit test, and tested them. This fixes the offset calculations
+    in a number of areas by adding the offset first, it also fixes the fastpaths,
+    which I forgot to address in the previous commit.
+    
+    v2: try and avoid divides in most paths, the repeat mirror path
+    really was ugly no matter which way I went, so I left it having
+    the divide.
+    Also fix the gather lod calculation bug.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit b95ec49e57f81bdd75795dc93022533704efe509
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Wed May 20 12:03:33 2015 -0700
+
+    i965/vs: Rework the logic for generating NIR from ARB vertex programs
+    
+    Whether or not to use NIR is now equivalent to brw->scalar_vs.  We can
+    simplify the logic and make it far less confusing.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 78644ffc4d341deb431145108f0b2d377e59b61e
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Wed May 20 10:35:34 2015 -0700
+
+    i965/fs: Remove the ir_visitor code
+    
+    Now that everything is running through NIR, this is all dead.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 66a03a4c4bb416a30b65e0334b248660a268c6a8
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Wed May 20 10:39:03 2015 -0700
+
+    i965: Remove the old fragment program code
+    
+    Now that everything is running through NIR, this is all dead.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 114497afff4e49139b8c7d61f11a7872b81398bf
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Wed May 20 10:03:50 2015 -0700
+
+    i965: Make NIR non-optional for scalar shaders
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 8b9ecfff360711cffc41a0a062de5ad810f9cf2b
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Wed May 20 09:45:47 2015 -0700
+
+    i965: Make fs/vec4_visitor inherit from ir_visitor directly
+    
+    This is using multiple inheritance in C++.  However, ir_visitor is really
+    just an interface with no data so it shouldn't be so bad.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 99cb4233205edcfa1a1e2967eef7bb16ff19bec4
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Wed May 20 09:44:01 2015 -0700
+
+    i965: Rename backend_visitor to backend_shader
+    
+    The backend_shader class really is a representation of a shader.  The fact
+    that it inherits from ir_visitor is somewhat immaterial.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 1ca60de4c00e864bffbee8265f631b2267c8ea29
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:56:45 2015 -0700
+
+    mesa: Enable ARB_direct_state_access by default for core profile
+    
+    And core profile only.
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit ef4dd0fc3e6b5ffbad6bd286ef9c6c25d0b25bae
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Wed May 20 20:19:07 2015 -0700
+
+    dispatch_sanity: Validate the compatibility profile dispatch table too
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Suggested-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 49ab670f52947dda04585cc5156e55b89f0c1c4a
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Wed May 20 20:17:19 2015 -0700
+
+    dispatch_sanity: Split list of GL 3.1 functions in to core and common
+    
+    The next patch will add a test for compatibility profile dispatch, and
+    it seems to make more sense to share the lists.
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Cc: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit a6fa74e6bb65f852ad1608f43dd0731e854ea42f
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Wed May 20 20:13:12 2015 -0700
+
+    mesa: Don't install glVertexAttribL* functions in compatibility profile
+    
+    GL_ARB_vertex_attrib_64bit is exclusive to core profile, and none of the
+    other functions added by the extension are advertised in other profiles.
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Cc: Dave Airlie <airlied@redhat.com>
+    Cc: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 4e5efa9e7ddb6d5273996cf9b09677d918759d17
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Tue May 19 11:48:11 2015 -0700
+
+    glapi: Make GL_ARB_direct_state_access functions exclusive to core profile
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Cc: Dave Airlie <airlied@redhat.com>
+    Cc: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: Dylan Baker <baker.dylan.c@gmail.com>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit f20899b7276b73e1b60c3ed8d8abdf959e787c0c
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Tue May 19 11:24:26 2015 -0700
+
+    glapi: Store exec table version info outside the XML
+    
+    Currently on the functions that are exclusive to core-profile are
+    implemented.  The remainder continue to live in the XML.  Additional
+    functions can be moved later.
+    
+    The functions for GL_ARB_draw_indirect and GL_ARB_multi_draw_indirect
+    are put in the dispatch table inside the VBO module, so they do not need
+    to be moved over.
+    
+    The diff of src/mesa/main/api_exec.c before and after this patch is as
+    expected.  All of the functions listed in apiexec.py moved out of a 'if
+    (_mesa_is_desktop(ctx))' block into a new 'if (ctx->API ==
+    API_OPENGL_CORE)' block.
+    
+    v2: Remove stray shebang line in apiexec.py.  Suggested by Ilia.
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: Dave Airlie <airlied@redhat.com>
+    Cc: Dylan Baker <baker.dylan.c@gmail.com>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 5c4aab58ee79a8bfa3d96f3ec442f37da587ff45
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:55:04 2015 -0700
+
+    Revert "mesa: Add an extension flag for ARB_direct_state_access"
+    
+    This reverts commit 30dcaaec356cc117d7227c6680620cd50ff534e7.
+    
+    Acked-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 832ea2345a96388950bb39ce8a2e4ca8bfdb4fe5
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Wed May 20 17:19:29 2015 -0700
+
+    mesa: Use the profile instead of an extension bit to validate GL_TEXTURE_CUBE_MAP
+    
+    The extension on which this depends will always be enabled in core
+    profile, and the extension bit is about to be removed.
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 90e98ea215906bb7e9ecadc4d30d2718ba2186ad
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:54:55 2015 -0700
+
+    Revert "mesa: Add ARB_direct_state_access checks in XFB functions"
+    
+    This reverts commit 7d212765a470972f4712e42caf6406b257220369.
+    
+    Acked-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit cab233f277936f4cdc49aa0bbfc7ed1a85c925f1
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:54:39 2015 -0700
+
+    Revert "mesa: Add ARB_direct_state_access checks in buffer object functions"
+    
+    This reverts commit 339ed0984d4f54fca91235a1df2ce3a850f6123f.
+    
+    Acked-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 8bcd14fab9a86276980a8859740999a1db4c55d5
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:54:35 2015 -0700
+
+    Revert "mesa: Add ARB_direct_state_access checks in FBO functions"
+    
+    This reverts commit 6ad0b7e07a0445e9e0f368e079c4f7b8a6757bb3.
+    
+    Acked-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit f3e8596a371c3708e9d9d68a021c39982c676cf1
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:54:29 2015 -0700
+
+    Revert "mesa: Add ARB_direct_state_access checks in renderbuffer functions"
+    
+    This reverts commit cb49940766b581c6656473d89c221653c69fa0f9.
+    
+    Acked-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 1ac6a8f1d1952a20d54df3e513c253d7988402ac
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:54:25 2015 -0700
+
+    Revert "mesa: Add ARB_direct_state_access checks in texture functions"
+    
+    This reverts commit 8940957238e8584ce27295791cee4cc3d6f7cf1e.
+    
+    Acked-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 92e362191e6c1c15e3944464fbf6bbda9e7d9892
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:54:21 2015 -0700
+
+    Revert "mesa: Add ARB_direct_state_access checks in VAO functions"
+    
+    This reverts commit 36b05793372b86b914d9b95d0188f5f387e01d68.
+    
+    Acked-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit ae5457754492b594c55911433a9b3675216c46c1
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:54:18 2015 -0700
+
+    Revert "mesa: Add ARB_direct_state_access checks in sampler object functions"
+    
+    This reverts commit 9e7149c8986348bf9567f049444783ef52775f4e.
+    
+    Acked-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit a9dcf45cd88b6e4d7816f45756d0b96d9c77cffe
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:54:15 2015 -0700
+
+    Revert "mesa: Add ARB_direct_state_access checks in program pipeline functions"
+    
+    This reverts commit bebf3c6ab314bde05ac5a3b4d3e63fd36243c58e.
+    
+    Acked-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit a9f678a8f4d4f9806dc4e931477fad300c61b4a1
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:54:11 2015 -0700
+
+    Revert "mesa: Add ARB_direct_state_access checks in query object functions"
+    
+    This reverts commit d3368e0c9e27ced6059eb2ecdf2aa999a00e90b0.
+    
+    Acked-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit f1fcf79e3c8e4f3594dc1b6d268430f8e8d4eb97
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:54:08 2015 -0700
+
+    Revert "i915: Enable ARB_direct_state_access"
+    
+    This reverts commit 121030eed8fc41789d2f4f7517bbc0dd6199667b.
+    
+    Acked-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 4bc00b1a4b34abc3e6e26f126686608ccfa33f52
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:53:58 2015 -0700
+
+    Revert "i965: Enable ARB_direct_state_access"
+    
+    This reverts commit a57feba0a35de35728269aeb26b039e4f2393d69.
+    
+    Acked-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 73cf10e6236fbf119c8262e69cd24f55557116f1
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Mon May 18 13:53:49 2015 -0700
+
+    Revert "st/mesa: Enable ARB_direct_state_access"
+    
+    This reverts commit 357bf80caade9e0be20dcc88ec38884e34abc986.
+    
+    Acked-by: Fredrik Höglund <fredrik@kde.org>
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 9b5e92f4ccc6ee1cb9caea947f6efaad2b391cf1
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Wed Apr 29 16:12:40 2015 -0700
+
+    mesa: Allow overriding the version of ES2+ contexts
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 03fd6704db9f1d0f203bf8da18bd587c7e35ce60
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Tue May 26 12:07:13 2015 -0700
+
+    mesa: Add support for a new override string MESA_GLES_VERSION_OVERRIDE
+    
+    The string is only applied when the context is API_OPENGLES2.
+    
+    The bulk of the change is to prevent overriding the context to
+    API_OPENGL_CORE based on the requested version.  If the context is
+    API_OPENGL_ES2, don't change it.
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 464c56d3d5ca2c9d6e437e756950f0fa2996d8da
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Tue May 26 12:14:39 2015 -0700
+
+    dri_util: Use _mesa_override_gl_version_contextless
+    
+    Remove _mesa_get_gl_version_override.  We don't need two functions that
+    do basically the same thing.  This change seemed easier (esp. with the
+    next patch) than going the other way.
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 1fe243938b11be740417cf016d8c50cd69228628
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Tue Apr 28 13:13:47 2015 -0700
+
+    mesa/es3.1: Enable ES 3.1 API and shading language version
+    
+    This is a bit of a hack for now.  Several of the extensions required for
+    OpenGL ES 3.1 have no support, at all, in Mesa.  However, with this
+    patch and a patch to allow MESA_GL_VERSION_OVERRIDE to work with ES
+    contexts, people can begin testing the ES "version" of the functionality
+    that is supported.
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 366ceacf72258a4a81d9c6b412dd565a4c611b17
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Tue Apr 28 18:00:43 2015 -0700
+
+    gles/es3.1: Enable dispatch of almost all new GLES 3.1 functions
+    
+    A couple functions are missing because there are no implementations of
+    them yet.  These are:
+    
+          glFramebufferParameteri (from GL_ARB_framebuffer_no_attachments)
+          glGetFramebufferParameteriv (from GL_ARB_framebuffer_no_attachments)
+          glMemoryBarrierByRegion
+    
+    v2: Rebase on updated dispatch_sanity.cpp test.
+    
+    v3: Add support for glDraw{Arrays,Elements}Indirect in vbo_exec_array.c.
+    The updated dispatch_sanity.cpp test discovered this omission.
+    
+    v4: Rebase on glapi changes.
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 8bbe7fa7a853d8ebf69e5d2d0fdc4343a20b638f
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon May 25 09:31:55 2015 -0700
+
+    i965/fs: Properly handle explicit depth in SIMD16 with dual-source blend
+    
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90629
+    Tested-by: Markus Wick <markus@selfnet.de>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit e354cc9b791cf025d26de7e19c58d499b83a3570
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed May 27 12:19:07 2015 -0700
+
+    i965: Silence warning in 3-src type-setting.
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 0596134410a0decc2f6bba77bfedb82d308aabbe
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Wed May 27 10:44:45 2015 -0700
+
+    i965/fs: Fix lowering of integer multiplication with cmod.
+    
+    If the multiplication's result is unused, except by a conditional_mod,
+    the destination will be null. Since the final instruction in the lowered
+    sequence is a partial-write, we can't put the conditional mod on it and
+    we have to store the full result to a register and do a MOV with a
+    conditional mod.
+    
+    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90580
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 2231cf0ba3a79d9abb08065e0f72811c5eea807f
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Thu May 28 09:06:33 2015 +0200
+
+    nir: Fix output swizzle in get_mul_for_src
+    
+    When we compute the output swizzle we want to consider the number of
+    components in the add operation. So far we were using the writemask
+    of the multiplication for this instead, which is not correct.
+    
+    Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
+
+commit 09d6243aed016eed4518435c9885275dbb6d2aa9
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Thu May 28 10:11:36 2015 +0100
+
+    gallivm: Workaround LLVM PR23628.
+    
+    Temporarily undefine DEBUG macro while including LLVM C++ headers,
+    leveraging the push/pop_macro pragmas, which are supported both by GCC
+    and MSVC.
+    
+    https://bugs.freedesktop.org/show_bug.cgi?id=90621
+    
+    Trivial.
+
+commit 10aacf5ae8f3e90e2f0967fbdcf96df93e346e20
+Author: Eric Anholt <eric@anholt.net>
+Date:   Tue Apr 14 22:42:02 2015 -0700
+
+    vc4: Just stream out fallback IB contents.
+    
+    The idea I had when I wrote the original shadow code was that you'd see a
+    set_index_buffer to the IB, then a bunch of draws out of it.  What's
+    actually happening in openarena is that set_index_buffer occurs at every
+    draw, so we end up making a new shadow BO every time, and converting more
+    of the BO than is actually used in the draw.
+    
+    While I could maybe come up with a better caching scheme, for now just
+    do the simple thing that doesn't result in a new shadow IB allocation
+    per draw.
+    
+    Improves performance of isosurf in drawelements mode by 58.7967% +/-
+    3.86152% (n=8).
+
+commit f8de6277bfa1a7db9a8c0f0baaa441276264a982
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed May 27 16:20:28 2015 -0700
+
+    vc4: Don't try to put our dmabuf-exported BOs into the BO cache.
+    
+    We'd sometimes try to reallocate something that X was using as a new
+    pipe_resource, and potentially conflict in our rendering.  But even
+    worse, if we reallocated the BO as a shader, the kernel would reject
+    rendering using the shader.
+
+commit b0edc19a521853371a63e9ffbc519424c8f82942
+Author: Eric Anholt <eric@anholt.net>
+Date:   Wed May 27 16:01:00 2015 -0700
+
+    vc4: Don't forget to make our raster shadow textures non-raster.
+    
+    Not sure what happened in my testing that made the previous shadow
+    code fix glxgears swapbuffering, but this also fixes lots of CopyArea
+    in X (like dragging xlogo around in metacity).
+
+commit 41630c0653578db0c237296aaeec0a85a4e7f4ad
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Fri May 22 12:45:43 2015 +0200
+
+    vc4: make vc4_begin_query() return a boolean
+    
+    I forgot to make the change in 96f164f6f047833091eb98a73aa80c31dc94f962.
+    This fixes a warning with GCC and probably an error with Clang.
+    
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Eric Anholt <eric@anholt.net>
+
+commit e2d84d99f5a66738e8f584bdfea66182f36fe46c
+Author: Ben Widawsky <benjamin.widawsky@intel.com>
+Date:   Wed May 20 19:20:14 2015 -0700
+
+    i965: Emit 3DSTATE_MULTISAMPLE before WM_HZ_OP (gen8+)
+    
+    Starting with GEN8, there is documentation that the multisample state command
+    must be emitted before the 3DSTATE_WM_HZ_OP command any time the multisample
+    count changes. The 3DSTATE_WM_HZ_OP packet gets emitted as a result of a
+    intel_hix_exec(), which is called upon a fast clear and/or a resolve. This can
+    happen before the state atoms are checked, and so the multisample state must be
+    put directly in the function.
+    
+    v1:
+    - In v0, I was always emitting the command, but Ken came up with the condition to
+    determine whether or not the sample count actually changed.
+    - Ken's recommendation was to set brw->num_multisamples after emitting
+    3DSTATE_MULTISAMPLE. This doesn't work. I put my best guess as to why in the XXX
+    (it was causing 7 regressions on BDW).
+    
+    v2:
+    Flag NEW_MULTISAMPLE state. As Ken found, in state upload we check for the
+    multisample change to determine whether or not to emit certain packets. Since
+    the hiz code doesn't actually care about the number of multisamples, set the
+    flag and let the later code take care of it.
+    
+    Jenkins results:
+    http://otc-mesa-ci.jf.intel.com/view/dev/job/bwidawsk/136/
+    
+    Fixes around 200 piglit tests on SKL. I'm somewhat surprised that it seems to
+    have no impact on BDW as the restriction is needed there as well.
+    
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
+    Reviewed-by: Neil Roberts <neil@linux.intel.com> (v0)
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v2)
+
+commit 147ffd48166d851341cadd12de98895f32ec25a2
+Author: Vinson Lee <vlee@freedesktop.org>
+Date:   Tue May 26 22:18:28 2015 -0700
+
+    gallivm: Do not use NoFramePointerElim with LLVM 3.7.
+    
+    TargetOptions::NoFramePointerElim was removed in llvm-3.7.0svn r238244
+    "Remove NoFramePointerElim and NoFramePointerElimOverride from
+    TargetOptions and remove ExecutionEngine's dependence on CodeGen. NFC."
+    
+    Signed-off-by: Vinson Lee <vlee@freedesktop.org>
+    Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
+
+commit 70c6f2323e602d115b21db8f2bf212223fdef921
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Thu May 21 07:53:09 2015 -0700
+
+    i965: Remove _NEW_MULTISAMPLE dirty bit from 3DSTATE_PS_EXTRA.
+    
+    BRW_NEW_NUM_SAMPLES is sufficient.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit bb18df008e31dd93a364289d003697587d7d78fa
+Author: Kenneth Graunke <kenneth@whitecape.org>
+Date:   Sat May 23 14:27:40 2015 -0700
+
+    i965: Delete GS scratch space workaround warning.
+    
+    This workaround is documented in the 3DSTATE_GS documentation.  It
+    appears to only apply to early steppings of Broadwell and Skylake.
+    
+    I don't think it ever affected production hardware, so at this point it
+    probably makes sense to delete it.
+    
+    Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 40665362fd660a8d58f9edbdfec79a33d44b1534
+Author: EdB <edb+mesa@sigluy.net>
+Date:   Mon May 11 17:45:08 2015 +0200
+
+    clover: Log build options when dumping clc source.
+    
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
+
+commit 2b8c51834bcc34a70dec9b470a28c0ef972d6993
+Author: Ian Romanick <ian.d.romanick@intel.com>
+Date:   Tue May 26 11:41:44 2015 -0700
+
+    glapi: Encapsulate nop table knowledge in new _mesa_new_nop_table function
+    
+    Encapsulate the knowledge about how to build the nop table in a new
+    _mesa_new_nop_table function.  This makes it easier for dispatch_sanity
+    to keep working now and in the future.
+    
+    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Tested-by: Mark Janes <mark.a.janes@intel.com>
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+
+commit 8d813d14e1f5c690c6737c6cd6fc01937a7d4246
+Author: Thomas Helland <thomashelland90@gmail.com>
+Date:   Tue May 26 12:14:00 2015 -0600
+
+    docs: Fix some typos in the developer notes
+    
+    Found when double-checking my review on Brian's series.
+    
+    Signed-off-by: Thomas Helland <thomashelland90@gmail.com>
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit be71bbfaa2ad201b570b56847a13328fc359d0ee
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 15 09:08:14 2015 -0600
+
+    mesa: do not use _glapi_new_nop_table() for DRI builds
+    
+    Commit 4bdbb588a9d38 introduced new _glapi_new_nop_table() and
+    _glapi_set_nop_handler() functions in the glapi dispatcher (which
+    live in libGL.so).  The calls to those functions from context.c
+    would be undefined (i.e. an ABI break) if the libGL used at runtime
+    was older.
+    
+    For the time being, use the old single generic_nop() function for
+    non-Windows builds to avoid this problem.  At some point in the future
+    it should be safe to remove this work-around.  See comments for more
+    details.
+    
+    v2: Incorporate feedback from Emil.  Use _WIN32 instead of
+    GLX_DIRECT_RENDERING to control behavior, move comments.
+    
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+    Reviewed-and-tested-by: Ian Romanick <ian.d.romanick@intel.com>
+
+commit 2ab0ca36c155cc77e3d5c950270c70a24efee3d3
+Author: Brian Paul <brianp@vmware.com>
+Date:   Tue May 26 11:30:22 2015 -0600
+
+    docs: add information about reviewing patches
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit c6184f84b7227e1548947e42bca3ff3ddb7e379c
+Author: Brian Paul <brianp@vmware.com>
+Date:   Mon May 25 10:18:35 2015 -0600
+
+    docs: update the coding style information
+    
+    This hasn't been updated in a long time and from recent discussion on
+    the mailing list, it's not always clear what's expected.  Hopefully,
+    this will help a bit.
+    
+    v2: document function brace placement, per Thomas Helland.
+    
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit d959885b9109878acc427b9321e46d8c6d133d1a
+Author: Brian Paul <brianp@vmware.com>
+Date:   Mon May 25 09:42:04 2015 -0600
+
+    docs: update documentation about patch formatting, testing, etc
+    
+    v2: correctly escape < and > chars.
+    
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit 98f2f47f7a1d893bb482d508a690c417c2453c6e
+Author: Brian Paul <brianp@vmware.com>
+Date:   Mon May 25 09:13:09 2015 -0600
+
+    docs: reorganize devnotes.html file
+    
+    Move "Adding Extensions" to the end.  Add a simple table of contents
+    at the top.
+    
+    Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
+
+commit eec904d29c0d996fb05f24771a2fdd33e152f519
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 22 13:39:03 2015 -0700
+
+    xlib: fix X_GLXCreateContextAtrribs/Attribs typo
+    
+    In case the glproto.h file isn't up to date, we provide the #define
+    for X_GLXCreateContextAttribsARB.
+    
+    v2: fix other occurances, improve #ifndef test, per Jose.
+    
+    Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
+
+commit dce53a7d2453c0b2b69a345340455866e75f0a8d
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 22 17:26:12 2015 -0700
+
+    mesa: add some comments in copyimage.c
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 0b76541ce0cc34020ef1057a17149cbf9cb3dbe1
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 22 17:22:47 2015 -0700
+
+    mesa: move decls, add const qualifiers in copyimage.c
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 8369675a55ab300a84b3a82632042a33883ca255
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 22 17:20:57 2015 -0700
+
+    mesa: code clean-ups in textureview.[ch]
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 3ddd1cf7d128018639de7e4c8bc17896233ebdb7
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 22 16:42:21 2015 -0700
+
+    mesa: const qualify, return bool for _mesa_texture_view_compatible_format()
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit 09eabf5be68b901999ef15733a22dfcb82dfec5f
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 22 16:39:32 2015 -0700
+
+    mesa: add const qualifer on _mesa_is_compressed_format()
+    
+    Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+
+commit b787f48ed2a7e1855100afd943ae6b407abb401f
+Author: Jose Fonseca <jfonseca@vmware.com>
+Date:   Tue May 26 11:01:57 2015 +0100
+
+    glapi: Avoid argparse type argument for API XML input files.
+    
+    argparse type is a nice type saver for simple data types, but it doesn't
+    look a good fit for the input XML file:
+    
+    - Certain implementations of argparse (particularly python 2.7.3's)
+      invoke the type constructor for the default argument even when an
+      option is passed in the command line.  Causing `No such file or
+      directory: 'gl_API.xml'` when the current dir is not
+      src/mapi/glapi/gen.
+    
+    - The parser takes multiple arguments.  This is currently worked around
+      using lambdas, but that unnecessarily complex and hard to read.
+      Furthermore it's odd to have a side-effect as heavy as parsing XML
+      happening deep inside the argument parsing.
+    
+    https://bugs.freedesktop.org/show_bug.cgi?id=90600
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+
+commit 224a77cc60cc0e7f8a14e35ebca6e42544af39b1
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 15:38:21 2015 +0100
+
+    radeonsi: use a switch statement in si_delete_shader_selector
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 0c5a309cee868cd6e3870f439f560f5f32eb7c40
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun Feb 22 15:21:59 2015 +0100
+
+    radeonsi: use a switch statement in si_shader_selector_key
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit fa7f606e89dc4447f07fec0b84d396a4ff25ee7e
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon May 18 14:56:34 2015 +0200
+
+    radeonsi: fix scratch buffer setup for geometry shaders
+    
+    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit f41517242aaad3cb67fb7f6c9a03dc34198ca500
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun May 10 18:03:47 2015 +0200
+
+    radeonsi: remove unused cases from si_shader_io_get_unique_index
+    
+    These can't occur between VS and GS, because GS is only supported
+    in the core profile.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit af4b9c7c2e119d373f9684119b833a8b62cbb756
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon May 18 14:50:19 2015 +0200
+
+    radeonsi: don't count special outputs for the VS export count
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit e4339bc9886a26d75b924ad045c3ddd003f802c3
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sat May 9 19:36:17 2015 +0200
+
+    radeonsi: add support for PIPE_CAP_TGSI_TEXCOORD
+    
+    Without it, texcoords are mapped to GENERIC[0..7], PointCoord is mapped to
+    GENERIC[8], and user-defined varyings start from GENERIC[9]. Since texcoords
+    can only be used between VS and PS, and PointCoord is PS-only, it's silly to
+    always start from GENERIC[9] in all other shaders (such as LS, HS, ES, GS).
+    
+    This adds support for TEXCOORD and PCOORD semantics. As a result, st/mesa
+    will use GENERIC[0] as a base for user-defined varyings, which should make
+    linking ES and GS as well as tessellation shaders at runtime easier.
+    
+    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
+
+commit 3d35027fdc383c2bd009f3690b2b160e3b39d58b
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon May 18 02:23:04 2015 +0200
+
+    tgsi/ureg: enable creating tessellation shaders with ureg_create_shader
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit c1266f28d6af7788e19634f0d36257e78d1139be
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon May 18 02:21:47 2015 +0200
+
+    tgsi/text: enable parsing tessellation shaders
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 0d84b6cf84971f3378bb95c85f7d39e0c6680b8f
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon May 18 13:52:30 2015 +0200
+
+    gallium: rename TGSI tessellation processor types to match pipe shader names
+    
+    I forgot to do this when pushing the interface changes.
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 92c31bb0dd8149d3e5db48b8dec62b242be80d28
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Mon May 18 12:34:44 2015 +0200
+
+    gallium: use const in set_tess_state
+    
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 967825d053f71c5f5fc3ba31eabc0c6004fde4f1
+Author: Koop Mast <kwm@rainbow-runner.nl>
+Date:   Tue May 26 10:24:40 2015 +0200
+
+    clover: Build fix for FreeBSD.
+    
+    Cc: 10.6 10.5 <mesa-stable@lists.freedesktop.org>
+
+commit 5ae6c7bfce5c9fb91ab6cef2ea74a39af091d5f6
+Author: Neil Roberts <neil@linux.intel.com>
+Date:   Wed May 20 19:26:02 2015 +0100
+
+    i965/skl: Add a message header for the TXF_MCS instruction in vec4vs
+    
+    When using SIMD4x2 on Skylake, the sampler instructions need a message
+    header to select the correct mode. This was added for most sample
+    instructions in 0ac4c2727 but the TXF_MCS instruction is emitted
+    separately and it was missed.
+    
+    This fixes a bunch of Piglit tests which test texelFetch in a geometry
+    shader, for example:
+    
+     spec/arb_texture_multisample/texelfetch/2-gs-sampler2dms
+    
+    Cc: mesa-stable@lists.freedesktop.org
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+
+commit 3ec18152858fd9aadb398d78d5ad2d2b938507c1
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon May 25 17:46:45 2015 -0400
+
+    nv30: falling back to draw path for edgeflag does no good
+    
+    The problem is that the EDGEFLAG has to be toggled at vertex submission
+    time. This can be done from either the draw or the regular paths. Avoid
+    falling back to draw just because there's an edgeflag.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 25be70462dbb7ee994e69ffccc3de94e4114e667
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat May 23 22:11:38 2015 -0400
+
+    nv30/draw: switch varying hookup logic to know about texcoords
+    
+    Commit 8acaf862dfe switched things over to use TEXCOORD instead of
+    GENERIC, but did not update the nv30 swtnl draw paths. This teaches the
+    draw logic about TEXCOORD.
+    
+    Among other things, this fixes a crash in demos/arbocclude when using
+    swtnl. Curiously enough, the point-sprite piglit works without this.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit c3d36a2e1a87a4aded662db7a5d320ee7ac3a8b5
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon May 25 21:14:13 2015 -0400
+
+    nv30/draw: allocate vertex buffers in gart
+    
+    These are only used once per draw, so it makes sense to keep them in
+    GART. Also take this opportunity to modernize the buffer mapping API
+    usage.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit fdad7dfbdae07b9273fc8f57e63258dbe542c9b5
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon May 25 21:12:46 2015 -0400
+
+    nv30/draw: only use the DMA1 object (GART) if the bo is not in VRAM
+    
+    Instead of always having it in the data, let the bo placement decide it.
+    This fixes glxgears with swtnl forced on.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 3600439897c79d37c3c654546867ddfa0c420743
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon May 25 20:15:09 2015 -0400
+
+    nv30/draw: fix indexed draws with swtnl path and a resource index buffer
+    
+    The map = assignment was missing.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 5646f0f18a620292524eebcd77353ff3d3687eb2
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun May 17 17:56:44 2015 -0400
+
+    glsl: avoid leaking linked gl_shader when there's a late linker error
+    
+    This makes piglit mixing-clip-distance-and-clip-vertex-disallowed have 0
+    definitely lost blocks with valgrind. (Same non-0 number of possibly
+    lost blocks though.)
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 6a111e54d7578abee6bce4a75ce1399ed369ab5f
+Author: Roland Scheidegger <sroland@vmware.com>
+Date:   Mon May 25 22:24:05 2015 +0200
+
+    llvmpipe: (trivial) add parantheses in (!x == y) expression
+    
+    Apparently some compilers think we probably wanted to do !(x == y) instead
+    and issue a warning, so just shut it up... No functional change, obviously.
+    
+    Cc: <mesa-stable@lists.freedesktop.org>
+
+commit bb973723a5e1f27817b6be2c2fa4fb3ea28e733c
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun May 17 17:32:24 2015 -0400
+
+    st/mesa: don't leak glsl_to_tgsi object on link failure
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 147816375d22a653176ab28ed650fa811ceea83f
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Mon May 25 14:06:01 2015 -0400
+
+    nv30/draw: draw expects constbuf size in bytes, not vec4 units
+    
+    This fixes glxgears with NV30_SWTNL=1 forced on. Probably fixes a bunch
+    of other situations where we fall back to the swtnl path.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 89585edf3c01c94b62d163adf0209568efa68568
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun May 24 11:56:21 2015 -0400
+
+    nv30/draw: avoid leaving stale pointers in draw state
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit cc3d2755577dab8c930f0bccff2756cb92aef8bc
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Mon May 25 09:27:08 2015 -0700
+
+    Fix an unused variable warning
+    
+    Trivial.  Deleted the 2 unneeded lines.
+
+commit 843ff4ba2af0b19a377a3bf1c9ae7b2b1f2c0e4c
+Author: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
+Date:   Mon May 25 15:57:09 2015 +0200
+
+    docs: Mark ARB_cull_distance as in progress
+    
+    Signed-off-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
+    Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+
+commit 3dec892d9b873f6c8a2a963a7646af90ada361bc
+Author: Iago Toral Quiroga <itoral@igalia.com>
+Date:   Mon May 25 09:40:01 2015 +0200
+
+    docs: Mark ARB_shader_storage_buffer_object as in progress
+    
+    Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
+
+commit 7518fc3c66e9b5703b987bccca7970a344deadfa
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat May 23 20:58:53 2015 -0400
+
+    nv30: fix clip plane uploads and enable changes
+    
+    nv30_validate_clip depends on the rasterizer state. Also we should
+    upload all the new clip planes on change since next time the plane data
+    won't have changed, but the enables might.
+    
+    This fixes fixed-clip-enables and vs-clip-vertex-enables shader tests.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit aba3392541f38f82e3ebde251fdcca78e90adbf3
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Thu Mar 5 12:10:15 2015 -0500
+
+    nv30: avoid doing extra work on clear and hitting unexpected states
+    
+    Clearing can happen at a time when various state objects are incoherent
+    and not ready for a draw. Some of the validation functions don't handle
+    this well, so only flush the framebuffer state. This has the advantage
+    of also not doing extra work.
+    
+    This works around some crashes that can happen when clearing.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
+
+commit 207ae2b0efcdb48a39fd91b05181c7e6d81e5002
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sun May 24 10:47:48 2015 +0100
+
+    docs: add news item and link release notes for mesa 10.5.6
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+
+commit 81d5d78573f821fb0983523bbd698d6691c4bb34
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sun May 24 10:43:31 2015 +0100
+
+    docs: Add sha256sums for the 10.5.6 release
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit 8cb28bc49d7799d5accb1feb7e355ec48518e20b)
+
+commit 3ab4556b84a944278dbddc21fba40b328a77c2e9
+Author: Emil Velikov <emil.l.velikov@gmail.com>
+Date:   Sat May 23 09:02:41 2015 +0100
+
+    Add release notes for the 10.5.6 release
+    
+    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    (cherry picked from commit b1cf9cfb1618f0b73e673745d3c8612aea61723d)
+
+commit 9870ed05dd333a20662479b9b1e3a8db542924c4
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun May 24 02:23:16 2015 -0400
+
+    nv30: avoid leaking render state and draw shaders
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit 605ce36d7f4a90c4062d6940bea82ab483bbe3b2
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun May 24 01:31:11 2015 -0400
+
+    nv30: don't leak fragprog consts
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit fa7f9f123b70f313d3c073b52c9c16b4b8df28f8
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat May 23 01:57:41 2015 -0400
+
+    nv50/ir: avoid messing up arg1 of PFETCH
+    
+    There can be scenarios where the "indirect" arg of a PFETCH becomes
+    known, and so the code will attempt to propagate it. Use this
+    opportunity to just fold it into the first argument, and prevent the
+    load propagation pass from touching PFETCH further.
+    
+    This fixes gs-input-array-vec4-index-rd.shader_test and
+    vs-output-array-vec4-index-wr-before-gs.shader_test on nvc0 at least.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+
+commit f972b223c4cb4ec58a9451cbac5d120ac9deb336
+Author: Grigori Goronzy <greg@chown.ath.cx>
+Date:   Tue May 19 09:28:30 2015 +0200
+
+    clover: try userptr for CL_MEM_USE_HOST_PTR
+    
+    According to spec, CL_MEM_USE_HOST_PTR should directly use host memory,
+    if possible. This is just what userptr is for, so use it.
+    
+    In case the memory cannot be mapped, a fallback similar to
+    CL_MEM_COPY_HOST_PTR is used.
+    
+    v2: constify, drop unneeded cast
     
-    Acked-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit f3e8596a371c3708e9d9d68a021c39982c676cf1)
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
 
-commit 83007290c6340a8bc9a940f99525d1fa0e211522
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:54:25 2015 -0700
+commit 5c495e86388b55af067677e8608eb124a5d70d29
+Author: Grigori Goronzy <greg@chown.ath.cx>
+Date:   Tue May 12 02:22:12 2015 +0200
 
-    Revert "mesa: Add ARB_direct_state_access checks in texture functions"
+    clover: implement CL_MEM_ALLOC_HOST_PTR
     
-    This reverts commit 8940957238e8584ce27295791cee4cc3d6f7cf1e.
+    This flag is typically used to request pinned host memory, to avoid
+    any copies between GPU and CPU.
     
-    Acked-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 1ac6a8f1d1952a20d54df3e513c253d7988402ac)
+    This improves throughput with an older OpenCL app which I unfortunately
+    can't publish due to its licensing.
+    
+    Reviewed-by: Francisco Jerez <currojerez@riseup.net>
 
-commit 38fb22ceced0c180db5d0c505855e00be3de42be
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:54:21 2015 -0700
+commit c922758685932e86d935972980df3be22d7b2fdf
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat May 23 19:07:48 2015 -0400
 
-    Revert "mesa: Add ARB_direct_state_access checks in VAO functions"
-    
-    This reverts commit 36b05793372b86b914d9b95d0188f5f387e01d68.
+    nv30: check nouveau_bo_map output of notify bo
     
-    Acked-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 92e362191e6c1c15e3944464fbf6bbda9e7d9892)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
 
-commit 1deda22b880c9672650ca8e6efbda37c331bf096
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:54:18 2015 -0700
+commit 921917c8d8e707dd854e7be05fba7a3e55bc71bf
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sat May 23 17:35:42 2015 -0400
 
-    Revert "mesa: Add ARB_direct_state_access checks in sampler object functions"
+    nvc0: a geometry shader can have up to 1024 vertices output
     
-    This reverts commit 9e7149c8986348bf9567f049444783ef52775f4e.
+    The 1024 is already reported everywhere, not sure where this 0x1ff came
+    from.
     
-    Acked-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit ae5457754492b594c55911433a9b3675216c46c1)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
 
-commit ef6670ca4315c5ad20ea5e9e869997feda637546
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:54:15 2015 -0700
+commit 6ca67f62e885f0e42c0cef2db5c0ae837adfe646
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Tue May 19 17:35:29 2015 -0700
 
-    Revert "mesa: Add ARB_direct_state_access checks in program pipeline functions"
+    i965/fs: Fix implied_mrf_writes for scratch writes
     
-    This reverts commit bebf3c6ab314bde05ac5a3b4d3e63fd36243c58e.
+    We build the entire message in the generator so all the MRF writes are
+    implied.
     
-    Acked-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit a9dcf45cd88b6e4d7816f45756d0b96d9c77cffe)
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
 
-commit b8c030d9cf2bf593f07a265aa6883c3435083bee
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:54:11 2015 -0700
+commit 58aed1031d40e62c9f41f7c512b3165dd5913d1e
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Tue May 19 16:25:02 2015 -0700
 
-    Revert "mesa: Add ARB_direct_state_access checks in query object functions"
+    prog_to_nir: Use a variable for uniform data
     
-    This reverts commit d3368e0c9e27ced6059eb2ecdf2aa999a00e90b0.
+    Previously, the prog_to_nir pass was directly generating uniform load/store
+    intrinsics.  This converts it to use a single giant "parameters" variable
+    and we now depend on lowering to get the uniform load/store intrinsics.
+    One advantage of this is that we now have one code-path after we do the
+    initial conversion into NIR.
     
-    Acked-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit a9f678a8f4d4f9806dc4e931477fad300c61b4a1)
+    No shader-db changes.
+    
+    Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
 
-commit 9c04f375dbd838cae7c70799686efda21c69fe6c
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:54:08 2015 -0700
+commit c783fd476c61fae41bddead4e47740e23d0cf2eb
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Sat May 23 18:50:25 2015 +0200
 
-    Revert "i915: Enable ARB_direct_state_access"
+    nv50: fix PIPE_QUERY_TIMESTAMP_DISJOINT, based on nvc0
     
-    This reverts commit 121030eed8fc41789d2f4f7517bbc0dd6199667b.
+    PIPE_QUERY_TIMESTAMP_DISJOINT could not work because q->ready was always
+    set to FALSE. To fix this issue, add more different states for queries
+    according to nvc0.
     
-    Acked-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit f1fcf79e3c8e4f3594dc1b6d268430f8e8d4eb97)
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
 
-commit 944bf20c1704b780bd1fd98d9d20c7654441a335
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:53:58 2015 -0700
+commit 217301843aea0299ab245e260b20af7ad250e9d8
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri May 22 19:03:58 2015 -0400
 
-    Revert "i965: Enable ARB_direct_state_access"
+    nvc0/ir: LOAD's can't be used for shader inputs
     
-    This reverts commit a57feba0a35de35728269aeb26b039e4f2393d69.
+    We forgot to convert to VFETCH in case of indirect access. Fix that.
     
-    Acked-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 4bc00b1a4b34abc3e6e26f126686608ccfa33f52)
+    This avoids crashes on the new gs-input-array-vec4-index-rd and
+    vs-output-array-vec4-index-wr-before-gs but they still fail.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
 
-commit b4da1d9ebd8e3c4b5c3b5833c13e9e216b084bb0
-Author: Ian Romanick <ian.d.romanick@intel.com>
-Date:   Mon May 18 13:53:49 2015 -0700
+commit 0bab3962f5f313ea829c95920c02f32afb23715d
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri May 22 19:02:41 2015 -0400
 
-    Revert "st/mesa: Enable ARB_direct_state_access"
+    nv50/ir: guess that the constant offset is the starting slot of array
     
-    This reverts commit 357bf80caade9e0be20dcc88ec38884e34abc986.
+    When we get something like IN[ADDR[0].x+5], we will now guess that we
+    should look at IN[5] for the "base" information.
     
-    Acked-by: Fredrik Höglund <fredrik@kde.org>
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 73cf10e6236fbf119c8262e69cd24f55557116f1)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
 
-commit 75691166bef11f00af05d0504bc3a113eaa92656
-Author: Jason Ekstrand <jason.ekstrand@intel.com>
-Date:   Mon May 25 09:31:55 2015 -0700
+commit d1eea18a595a468dbc2267a8d14197a3b1a5a4b6
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri May 22 16:40:08 2015 -0400
 
-    i965/fs: Properly handle explicit depth in SIMD16 with dual-source blend
+    nvc0/ir: set ftz when sources are floats, not just destinations
     
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90629
-    Tested-by: Markus Wick <markus@selfnet.de>
-    Reviewed-by: Matt Turner <mattst88@gmail.com>
-    (cherry picked from commit 8bbe7fa7a853d8ebf69e5d2d0fdc4343a20b638f)
+    In the case of a compare, the destination might be a predicate, but we
+    still want to flush denorms.
+    
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
 
-commit 8c57dc26a749f8759b30efbc689e3d99ffcba785
-Author: Ben Widawsky <benjamin.widawsky@intel.com>
-Date:   Wed May 20 19:20:14 2015 -0700
+commit a85aba190dfab02ffccf744bad5ad10357394de0
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri May 8 23:46:53 2015 -0400
 
-    i965: Emit 3DSTATE_MULTISAMPLE before WM_HZ_OP (gen8+)
-    
-    Starting with GEN8, there is documentation that the multisample state command
-    must be emitted before the 3DSTATE_WM_HZ_OP command any time the multisample
-    count changes. The 3DSTATE_WM_HZ_OP packet gets emitted as a result of a
-    intel_hix_exec(), which is called upon a fast clear and/or a resolve. This can
-    happen before the state atoms are checked, and so the multisample state must be
-    put directly in the function.
+    nv50/ir: allow OP_SET to merge with OP_SET_AND/etc as well as a neg
     
-    v1:
-    - In v0, I was always emitting the command, but Ken came up with the condition to
-    determine whether or not the sample count actually changed.
-    - Ken's recommendation was to set brw->num_multisamples after emitting
-    3DSTATE_MULTISAMPLE. This doesn't work. I put my best guess as to why in the XXX
-    (it was causing 7 regressions on BDW).
+    This covers the pattern where a KILL_IF is used, which triggers a
+    comparison of -x to 0. This can usually be folded into the comparison whose
+    result is being compared to 0, however it may, itself, have already been
+    combined with another comparison. That shouldn't impact the logic of
+    this pass however. With this and the & 1.0 change, code like
     
-    v2:
-    Flag NEW_MULTISAMPLE state. As Ken found, in state upload we check for the
-    multisample change to determine whether or not to emit certain packets. Since
-    the hiz code doesn't actually care about the number of multisamples, set the
-    flag and let the later code take care of it.
+    00000020: 001c0001 80081df4     set b32 $r0 lt f32 $r0 0x3e800000
+    00000028: 001c0000 201fc000     and b32 $r0 $r0 0x3f800000
+    00000030: 7f9c001e dd885c00     set $p0 0x1 lt f32 neg $r0 0x0
+    00000038: 0000003c 19800000     $p0 discard
     
-    Jenkins results:
-    http://otc-mesa-ci.jf.intel.com/view/dev/job/bwidawsk/136/
+    becomes
     
-    Fixes around 200 piglit tests on SKL. I'm somewhat surprised that it seems to
-    have no impact on BDW as the restriction is needed there as well.
+    00000020: 001c001d b5881df4     set $p0 0x1 lt f32 $r0 0x3e800000
+    00000028: 0000003c 19800000     $p0 discard
     
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
-    Reviewed-by: Neil Roberts <neil@linux.intel.com> (v0)
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v2)
-    (cherry picked from commit e2d84d99f5a66738e8f584bdfea66182f36fe46c)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
 
-commit 230891cc9c24744cbe59cc40a748a69a6b89edf6
-Author: Matt Turner <mattst88@gmail.com>
-Date:   Wed May 27 10:44:45 2015 -0700
+commit d2a474e8d4b03f10aec57c7f7740addad1e1ea9d
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun May 3 22:15:16 2015 -0400
 
-    i965/fs: Fix lowering of integer multiplication with cmod.
+    nvc0/ir: optimize set & 1.0 to produce boolean-float sets
     
-    If the multiplication's result is unused, except by a conditional_mod,
-    the destination will be null. Since the final instruction in the lowered
-    sequence is a partial-write, we can't put the conditional mod on it and
-    we have to store the full result to a register and do a MOV with a
-    conditional mod.
+    This has started to happen more now that the backend is producing
+    KILL_IF more often.
     
-    Cc: "10.6" <mesa-stable@lists.freedesktop.org>
-    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90580
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-    (cherry picked from commit 0596134410a0decc2f6bba77bfedb82d308aabbe)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+    Reviewed-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
 
-commit ffd133bdbe4560f3a70aa76b7a6cbedc23e554ed
-Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Wed May 27 12:39:19 2015 +0100
+commit e5ad19a46e87ed22943d7f6ad046f974fd5977e1
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Fri May 8 23:00:05 2015 -0400
 
-    Increment version to 10.6.0-rc2
+    nvc0/ir: allow iset to produce a boolean float
     
-    Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
 
-commit 77b116f1d3c2f0df1a1bb040bad592d2ec2749d9
-Author: Brian Paul <brianp@vmware.com>
-Date:   Fri May 15 09:08:14 2015 -0600
+commit 0ec6b8ea8ce0929ecacf6edc8db198b7b9604f18
+Author: Ilia Mirkin <imirkin@alum.mit.edu>
+Date:   Sun May 3 18:38:52 2015 -0400
 
-    mesa: do not use _glapi_new_nop_table() for DRI builds
+    nvc0/ir: avoid jumping to a sched instruction
     
-    Commit 4bdbb588a9d38 introduced new _glapi_new_nop_table() and
-    _glapi_set_nop_handler() functions in the glapi dispatcher (which
-    live in libGL.so).  The calls to those functions from context.c
-    would be undefined (i.e. an ABI break) if the libGL used at runtime
-    was older.
-    
-    For the time being, use the old single generic_nop() function for
-    non-Windows builds to avoid this problem.  At some point in the future
-    it should be safe to remove this work-around.  See comments for more
-    details.
-    
-    v2: Incorporate feedback from Emil.  Use _WIN32 instead of
-    GLX_DIRECT_RENDERING to control behavior, move comments.
-    
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
-    Reviewed-and-tested-by: Ian Romanick <ian.d.romanick@intel.com>
-    (cherry picked from commit be71bbfaa2ad201b570b56847a13328fc359d0ee)
+    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
+
+commit 491adb61d25eef8afe2615e0fd842dda20b17004
+Author: Brian Paul <brianp@vmware.com>
+Date:   Fri May 22 13:18:54 2015 -0700
+
+    glx: fix Scons build
     
-    Squashed with commit
+    Replace -h with --header-tag as was done for the Makefile build.
     
-    glapi: Encapsulate nop table knowledge in new _mesa_new_nop_table function
+    Reviewed-by: Dylan Baker <baker.dylan.c@gmail.com>
+
+commit 3f823cc55a15bc0b54d09e2c3fb5944a645b09e4
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Wed May 20 15:51:40 2015 -0700
+
+    glapi: glX_proto_size.py: use a main function
     
-    Encapsulate the knowledge about how to build the nop table in a new
-    _mesa_new_nop_table function.  This makes it easier for dispatch_sanity
-    to keep working now and in the future.
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit 9ace0b542241c77ae82a0835ac8a09e2a7510eaf
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Wed May 20 15:49:11 2015 -0700
+
+    glapi: glX_proto_size.py: use argparse instead of getopt
+    
+    This is roughly equivalent to the original getopt, except that it
+    removes the '-h' short option, which argparse reserves for
+    auto-generated help messages. It does retain the long option specified
+    by the getopt version, and changes the makefile to use that.
     
-    Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
-    Reviewed-by: Brian Paul <brianp@vmware.com>
-    Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
-    Tested-by: Mark Janes <mark.a.janes@intel.com>
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 2b8c51834bcc34a70dec9b470a28c0ef972d6993)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 1eef92e336346b677ddf341331503f815e0dfbd5
-Author: Marek Olšák <marek.olsak@amd.com>
-Date:   Mon May 18 14:56:34 2015 +0200
+commit 1c7cc67778073fd802773390da55980702637547
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Wed May 20 15:20:09 2015 -0700
 
-    radeonsi: fix scratch buffer setup for geometry shaders
+    glapi: glX_proto_recv.py: Use a main function
     
-    Cc: 10.6 <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
-    (cherry picked from commit fa7f606e89dc4447f07fec0b84d396a4ff25ee7e)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 317fa3e7eff892bf5406eda1a285df35d2757a3b
-Author: Koop Mast <kwm@rainbow-runner.nl>
-Date:   Tue May 26 10:24:40 2015 +0200
+commit d986cb7c70db3b512f6ee0bbc95ba2565606c222
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Wed May 20 15:19:28 2015 -0700
 
-    clover: Build fix for FreeBSD.
+    glapi: glX_proto_recv.py: use argparse instead of getopt
     
-    Cc: 10.6 10.5 <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 967825d053f71c5f5fc3ba31eabc0c6004fde4f1)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 580351d3d31c588fb151e887ca33d9caeb357287
-Author: Neil Roberts <neil@linux.intel.com>
-Date:   Wed May 20 19:26:02 2015 +0100
+commit 67d3ec0bb8f6a8c918b371ed03ef21814899f07d
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Wed May 20 11:49:40 2015 -0700
 
-    i965/skl: Add a message header for the TXF_MCS instruction in vec4vs
+    glapy: gl_genexec.py: use a main function
     
-    When using SIMD4x2 on Skylake, the sampler instructions need a message
-    header to select the correct mode. This was added for most sample
-    instructions in 0ac4c2727 but the TXF_MCS instruction is emitted
-    separately and it was missed.
-    
-    This fixes a bunch of Piglit tests which test texelFetch in a geometry
-    shader, for example:
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit 79c4e595bce563d6075fed176c2256bf2e7e99a5
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Wed May 20 11:49:10 2015 -0700
+
+    glapi: gl_genexec.py: use argparse instead of getopt
     
-     spec/arb_texture_multisample/texelfetch/2-gs-sampler2dms
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit 9097a4a103f2f7abf5af3e1056467c21051405ca
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Tue Feb 10 15:20:57 2015 -0800
+
+    glapi: glX_proto_send.py: use a main function.
     
-    Cc: mesa-stable@lists.freedesktop.org
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-    (cherry picked from commit 5ae6c7bfce5c9fb91ab6cef2ea74a39af091d5f6)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 534f5e8d802c5bac7349152128931228c78bf1ef
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Mon May 25 17:46:45 2015 -0400
+commit 9eed4e6232b9ca936ad8e87aa21f97ffb81981ce
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Tue Feb 10 15:20:18 2015 -0800
 
-    nv30: falling back to draw path for edgeflag does no good
+    glapi: glX_proto_send.py: use argparse instead of getopt
     
-    The problem is that the EDGEFLAG has to be toggled at vertex submission
-    time. This can be done from either the draw or the regular paths. Avoid
-    falling back to draw just because there's an edgeflag.
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit dddac8cac3ab883b6beeb4af9ca27bb2f3b4ebec
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Fri Feb 13 16:41:03 2015 -0800
+
+    glapi: glX_server_table.py: use argparse instead of getopt
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 3ec18152858fd9aadb398d78d5ad2d2b938507c1)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 74e2db8a9202acc7ed38a31fe0a3453ec3a7a1dd
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sat May 23 22:11:38 2015 -0400
+commit 952bd305c6862113c60d3b62402fc5a32dbb65ca
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Fri Feb 13 15:54:17 2015 -0800
 
-    nv30/draw: switch varying hookup logic to know about texcoords
+    glapi: gl_SPARC_asm.py: use main function
     
-    Commit 8acaf862dfe switched things over to use TEXCOORD instead of
-    GENERIC, but did not update the nv30 swtnl draw paths. This teaches the
-    draw logic about TEXCOORD.
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit 86c9fb526ed9b0a68eb7bb29d661b7f61415d3f0
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Fri Feb 13 15:49:16 2015 -0800
+
+    glapi: gl_SPARC_asm.py use argparse instead of getopt
     
-    Among other things, this fixes a crash in demos/arbocclude when using
-    swtnl. Curiously enough, the point-sprite piglit works without this.
+    Also drop -m switch, which only accepted a single value or raised an
+    error, and was unused in the makefile.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 25be70462dbb7ee994e69ffccc3de94e4114e667)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit c288bf3b89dfd510dec7b1481dfb5ae339301ae3
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Mon May 25 21:14:13 2015 -0400
+commit f2e78bd697e168c4f8cb1fd7f939713f8319eb78
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Thu Feb 12 14:05:46 2015 -0800
+
+    glapi: gl_x86-64_asm.py: Use a main function
+    
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-    nv30/draw: allocate vertex buffers in gart
+commit 2e3da443f16e479997cdc5a2a137b9823f8617df
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Thu Feb 12 14:04:03 2015 -0800
+
+    glapi: gl_x86_64_asm.py: Use argparse instead of getopt
     
-    These are only used once per draw, so it makes sense to keep them in
-    GART. Also take this opportunity to modernize the buffer mapping API
-    usage.
+    Also removes the redundant -m argument, which could only be set to
+    'generic', or it would raise an exception. This option wasn't used in
+    the makefile.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit c3d36a2e1a87a4aded662db7a5d320ee7ac3a8b5)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 18e05588dfe8d56fc78aa4e8bbae6dab95fe1faa
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Mon May 25 21:12:46 2015 -0400
+commit 48924567994c43e734f97a4b9150e87fa72b6c11
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Wed Feb 11 18:05:35 2015 -0800
 
-    nv30/draw: only use the DMA1 object (GART) if the bo is not in VRAM
+    glapi: gl_x86_asm.py: use a main function
     
-    Instead of always having it in the data, let the bo placement decide it.
-    This fixes glxgears with swtnl forced on.
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit fc96122fb6450dd9b8c90dc5efb6bb0ab235fe0e
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Wed Feb 11 18:04:22 2015 -0800
+
+    glapi: gl_x86_asm.py: use argparse instead of getopt
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit fdad7dfbdae07b9273fc8f57e63258dbe542c9b5)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 407e20d45d09b5e170aced7e78d61914cecca5cd
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Mon May 25 20:15:09 2015 -0400
+commit 5998d32f09777b9bbcd422dfbab9261f1b0e07b8
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Tue Feb 10 14:33:12 2015 -0800
 
-    nv30/draw: fix indexed draws with swtnl path and a resource index buffer
+    glapi: gl_gentable.py: use a main function
     
-    The map = assignment was missing.
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit d36fa4472ea408eb2a1ecadc44268bce4bab92ea
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Tue Feb 10 14:32:40 2015 -0800
+
+    glapi: gl_gentable.py: Replace getopt with argparse
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 3600439897c79d37c3c654546867ddfa0c420743)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 5eef18390d540d679aaeb664fdb2deb7d56e74ae
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sun May 17 17:56:44 2015 -0400
+commit 3317cea0488075f291744ebc4eaa48fc73d293de
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Tue Feb 10 10:57:08 2015 -0800
 
-    glsl: avoid leaking linked gl_shader when there's a late linker error
+    glapi: gl_apitemp.py: Use a main function
     
-    This makes piglit mixing-clip-distance-and-clip-vertex-disallowed have 0
-    definitely lost blocks with valgrind. (Same non-0 number of possibly
-    lost blocks though.)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit 24ec03bd05153bf0c8b1063d4e6a68b073e57840
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Tue Feb 10 10:55:45 2015 -0800
+
+    glapi: gl_apitemp.py: Convert to argparse instead of getopt
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Reviewed-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 5646f0f18a620292524eebcd77353ff3d3687eb2)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 34ff020aeac214976bdc152dc5009be9534c6ed7
-Author: Roland Scheidegger <sroland@vmware.com>
-Date:   Mon May 25 22:24:05 2015 +0200
+commit 6c4dcef6dc704156115b8d3ad19c3020663c7ffc
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Mon Feb 9 14:19:23 2015 -0800
 
-    llvmpipe: (trivial) add parantheses in (!x == y) expression
+    glapi: gl_enums.py: use main() function for if __name__ == "__main__"
     
-    Apparently some compilers think we probably wanted to do !(x == y) instead
-    and issue a warning, so just shut it up... No functional change, obviously.
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit fd5f1dd6c72e34a8f0522c4706cd5eec624e80cf
+Author: Dylan Baker <baker.dylan.c@gmail.com>
+Date:   Mon Feb 9 14:18:30 2015 -0800
+
+    glapi: gl_enums.py: use argparse instead of getopt.
     
-    Cc: <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 6a111e54d7578abee6bce4a75ce1399ed369ab5f)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 8fc109160efa8ff018081f3331dd4215a6e66a9f
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sun May 17 17:32:24 2015 -0400
+commit e51530ba1665af7120be852653bbff930fa1ca33
+Author: Dylan Baker <dylanx.c.baker@intel.com>
+Date:   Mon Nov 24 14:14:12 2014 -0800
 
-    st/mesa: don't leak glsl_to_tgsi object on link failure
+    glapi: gl_procs.py: Use argparse rather than getopt
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit bb973723a5e1f27817b6be2c2fa4fb3ea28e733c)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit cb0c057a3135b4e6c0b6921df7bc7b29dea3315d
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Mon May 25 14:06:01 2015 -0400
+commit 28ecdd6be7e6f58eabfc9aa0461fb8db7dd8133d
+Author: Dylan Baker <dylanx.c.baker@intel.com>
+Date:   Thu Nov 20 17:07:48 2014 -0800
 
-    nv30/draw: draw expects constbuf size in bytes, not vec4 units
+    glapi: gl_procs.py: Fix a few low hanging style things
     
-    This fixes glxgears with NV30_SWTNL=1 forced on. Probably fixes a bunch
-    of other situations where we fall back to the swtnl path.
+    Shuts up analysis tools to make them return actual problems.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 147816375d22a653176ab28ed650fa811ceea83f)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 60294f8c39191374631b574b97ebe46b6afe2a44
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sun May 24 11:56:21 2015 -0400
+commit 622fee43c8aa339e6b642fc8a90c759dcf28c6e7
+Author: Dylan Baker <dylanx.c.baker@intel.com>
+Date:   Thu Nov 20 14:07:15 2014 -0800
 
-    nv30/draw: avoid leaving stale pointers in draw state
+    glapi: remap_helper.py: use argparse instead of optparse
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 89585edf3c01c94b62d163adf0209568efa68568)
+    Make the code simpler, cleaner, and easier to work with.
+    
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 6319fd51fe56a17fbae78bfd64c7875c69c12231
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sat May 23 20:58:53 2015 -0400
+commit bdae3bc1ffb14b705a0c6fef3e90380dfd0eed97
+Author: Dylan Baker <dylanx.c.baker@intel.com>
+Date:   Thu Nov 20 14:01:40 2014 -0800
 
-    nv30: fix clip plane uploads and enable changes
+    glapi: remap_helper.py: Fix some low hanging style issues
     
-    nv30_validate_clip depends on the rasterizer state. Also we should
-    upload all the new clip planes on change since next time the plane data
-    won't have changed, but the enables might.
+    This makes the tools shut up about a bunch of problems, making them more
+    useful for catching actual problems.
     
-    This fixes fixed-clip-enables and vs-clip-vertex-enables shader tests.
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
+
+commit cf718cc964f86dc49c1fc9ed5e39aa5bd87ad931
+Author: Dylan Baker <dylanx.c.baker@intel.com>
+Date:   Wed Nov 19 13:36:35 2014 -0800
+
+    glapi: gl_table.py: replace getopt with argparse.
+    
+    This results in slightly less code, but code that is much more readable.
+    It has the advantage of putting everything together in one place, all of
+    the code is self documenting, help messages are auto-generated, choices
+    are automatically enforced, and the syntax is much less C like, taking
+    advantage of python features and idioms.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Reviewed-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 7518fc3c66e9b5703b987bccca7970a344deadfa)
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit 08baacb6db3b38d241f7abc7c853c219cf5d876d
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sun May 24 02:23:16 2015 -0400
+commit b6298c7a7143eafea3c1be6e98af1d0239fdf5b7
+Author: Dylan Baker <dylanx.c.baker@intel.com>
+Date:   Wed Nov 19 13:17:48 2014 -0800
 
-    nv30: avoid leaking render state and draw shaders
+    glapi: gl_table.py: Fix some low hanging style issues
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 9870ed05dd333a20662479b9b1e3a8db542924c4)
+    Making the tools shut up about worthless errors so you can see real ones
+    is very useful
+    
+    Signed-off-by: Dylan Baker <dylanx.c.baker@intel.com>
+    Acked-by: Matt Turner <mattst88@gmail.com>
 
-commit c23bbfc007ef2bda14bbcbd99f69fc7b42547062
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sun May 24 01:31:11 2015 -0400
+commit a1c070c1a7c6b37a36f591bd8caf4619e4457eae
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon May 18 15:23:28 2015 -0700
 
-    nv30: don't leak fragprog consts
+    i965/disasm: Skip swizzle disassembly when using 3-src repctrl.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 605ce36d7f4a90c4062d6940bea82ab483bbe3b2)
+    ... since it's always .x, and also always print the subreg offset when
+    using repctrl.
 
-commit aa326e4e223f060c0e96a40bb633d955dc010c7c
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sat May 23 01:57:41 2015 -0400
+commit 5614bcc416cf2ff1d816d52198e644565ca23bcd
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Mon May 18 14:59:13 2015 -0700
 
-    nv50/ir: avoid messing up arg1 of PFETCH
+    nir: Remove sRGB colorspace conversion round-trip.
     
-    There can be scenarios where the "indirect" arg of a PFETCH becomes
-    known, and so the code will attempt to propagate it. Use this
-    opportunity to just fold it into the first argument, and prevent the
-    load propagation pass from touching PFETCH further.
+    Some shaders in Civilization V and Beyond Earth do
     
-    This fixes gs-input-array-vec4-index-rd.shader_test and
-    vs-output-array-vec4-index-wr-before-gs.shader_test on nvc0 at least.
+       pow(pow(x, 2.2), 0.454545)
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Reviewed-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit fa7f9f123b70f313d3c073b52c9c16b4b8df28f8)
-
-commit 1595955974976a68eda47d56bdde0110c21f2252
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Sat May 23 17:35:42 2015 -0400
-
-    nvc0: a geometry shader can have up to 1024 vertices output
+    which is converting to and from sRGB colorspace.
     
-    The 1024 is already reported everywhere, not sure where this 0x1ff came
-    from.
+    A more general rule that replaces pow(pow(a, b), c) with pow(a, b * c)
+    actually regresses two shaders in Sun Temple in which the result of the
+    inner pow is used twice, once by another pow and once by another
+    instruction. Also, since 2.2 * 0.454545 isn't exactly one, the more
+    general pattern would have still left us with a pow, and I'm 2.2 *
+    0.454545 percent sure that's not what they want.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 921917c8d8e707dd854e7be05fba7a3e55bc71bf)
+    instructions in affected programs:     934 -> 886 (-5.14%)
+    helped:                                16
 
-commit a760db21ecd78051a37429d9d67e9301047d3435
-Author: Jason Ekstrand <jason.ekstrand@intel.com>
-Date:   Tue May 19 17:35:29 2015 -0700
+commit a21d23e191696ca130fd63617b8d177055b73dda
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Thu May 21 20:14:44 2015 +0200
 
-    i965/fs: Fix implied_mrf_writes for scratch writes
+    nv50: fix PIPELINE_STATISTICS with HUD, based on nvc0
     
-    We build the entire message in the generator so all the MRF writes are
-    implied.
+    Tested on NVA8. No regression for ARB_pipeline_statistics piglit tests.
     
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-    (cherry picked from commit 6ca67f62e885f0e42c0cef2db5c0ae837adfe646)
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
 
-commit 2cf0e748c3558a2013a39004f2d2f2d19b6b50cb
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Fri May 22 19:03:58 2015 -0400
+commit 867fd2b5f586085c137af264e3eb640094e88526
+Author: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+Date:   Thu May 21 20:06:19 2015 +0200
 
-    nvc0/ir: LOAD's can't be used for shader inputs
-    
-    We forgot to convert to VFETCH in case of indirect access. Fix that.
+    nv50: fix 64-bit queries with HUD, based on nvc0
     
-    This avoids crashes on the new gs-input-array-vec4-index-rd and
-    vs-output-array-vec4-index-wr-before-gs but they still fail.
+    A sequence number is written for 32-bits queries to make sure they are
+    ready, but not for 64-bits queries. Instead, we have to use a fence in
+    order to fix the HUD because it doesn't wait until the result is ready.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 217301843aea0299ab245e260b20af7ad250e9d8)
+    Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
+    Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
 
-commit 564c56de12e9c140082da5afed7169525a5b2950
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Fri May 22 19:02:41 2015 -0400
+commit 6921ea42a17c715c4b5b2d0092f9b9f4df42b10c
+Author: Christian König <christian.koenig@amd.com>
+Date:   Thu Mar 26 10:00:09 2015 +0100
 
-    nv50/ir: guess that the constant offset is the starting slot of array
+    radeon/vce: adapt new firmware interface changes
     
-    When we get something like IN[ADDR[0].x+5], we will now guess that we
-    should look at IN[5] for the "base" information.
+    v2: make this also compatible with original released firmware
+    v3 (chk): switch to original idea of separate files for fw versions
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit 0bab3962f5f313ea829c95920c02f32afb23715d)
+    Signed-off-by: Leo Liu <leo.liu@amd.com>
+    Signed-off-by: Christian König <christian.koenig@amd.com>
+    Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v2)
 
-commit 45986bd391e6ebc1bdb72c0a81a10ec363369ad0
-Author: Ilia Mirkin <imirkin@alum.mit.edu>
-Date:   Fri May 22 16:40:08 2015 -0400
+commit 2b40c306d238e2e738d8901e10f351a109b02687
+Author: Christian König <christian.koenig@amd.com>
+Date:   Thu Mar 26 09:52:37 2015 +0100
 
-    nvc0/ir: set ftz when sources are floats, not just destinations
+    radeon/vce: move CPB handling function into common code
     
-    In the case of a compare, the destination might be a predicate, but we
-    still want to flush denorms.
+    They are not firmware version dependent.
     
-    Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
-    (cherry picked from commit d1eea18a595a468dbc2267a8d14197a3b1a5a4b6)
+    Signed-off-by: Christian König <christian.koenig@amd.com>
 
-commit 90644f9217b66539b52635475eb8c1367f551c05
+commit 7c1a00174b2bec102030b19b6094ebcab23fe04d
 Author: Dave Airlie <airlied@redhat.com>
 Date:   Thu May 21 11:23:06 2015 +1000
 
@@ -3398,9 +27354,32 @@
     Reviewed-by: Brian Paul <brianp@vmware.com>
     Cc: "10.6" mesa-stable@lists.freedesktop.org
     Signed-off-by: Dave Airlie <airlied@redhat.com>
-    (cherry picked from commit 7c1a00174b2bec102030b19b6094ebcab23fe04d)
 
-commit 61c6819d1aeb24589ffdf671ace1547388c09394
+commit d67515b7be1ebd9482970ac1867ee4e9bbbf96d5
+Author: Timothy Arceri <t_arceri@yahoo.com.au>
+Date:   Thu Apr 30 20:45:54 2015 +1000
+
+    glsl: remove element_type() helper
+    
+    We now have is_array() and without_array() that make the
+    code much clearer and remove the need for this.
+    
+    For all remaining calls to this we already knew that
+    the type was an array so returning a null wasn't adding any value.
+    
+    v2: use without_array() in _mesa_ast_array_index_to_hir() and don't use
+     without_array() in lower_clip_distance_visitor() as we want to make sure the
+     array is 2D.
+    
+    Reviewed-by: Matt Turner <mattst88@gmail.com>
+
+commit 51ccdb63467b1e848db025670f126eccb051f8f2
+Author: Matt Turner <mattst88@gmail.com>
+Date:   Tue May 19 07:38:40 2015 -0700
+
+    glsl: Use AM_V_GEN/AM_V_at in NIR rules.
+
+commit 6cdb29d52fc51e3d904b50bb7003c9fa38bb7896
 Author: Ilia Mirkin <imirkin@alum.mit.edu>
 Date:   Wed May 20 04:00:16 2015 -0400
 
@@ -3411,9 +27390,8 @@
     
     Cc: "10.6" <mesa-stable@lists.freedesktop.org>
     Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    (cherry picked from commit 6cdb29d52fc51e3d904b50bb7003c9fa38bb7896)
 
-commit bf33fc653dca329ba6bc72ce6d76c6bc554e1837
+commit 3e7bc6728520b469ed53a2588ead28287f8b88f0
 Author: Ilia Mirkin <imirkin@alum.mit.edu>
 Date:   Mon Apr 27 23:47:40 2015 -0400
 
@@ -3424,9 +27402,8 @@
     
     Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
     Cc: mesa-stable@lists.freedesktop.org
-    (cherry picked from commit 3e7bc6728520b469ed53a2588ead28287f8b88f0)
 
-commit e4f74121dbe481f7df54d7703e8a260807df1b8f
+commit 36438f0db6c7c696df73ced12684f4df9d2b47e5
 Author: Emil Velikov <emil.l.velikov@gmail.com>
 Date:   Wed May 20 21:51:52 2015 +0100
 
@@ -3437,9 +27414,8 @@
     
     Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
     Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
-    (cherry picked from commit 36438f0db6c7c696df73ced12684f4df9d2b47e5)
 
-commit 62fda88080f157215a9ff1c006969a501a183a96
+commit 06ff751f97fbeb62a23936cd8f9c54733920d082
 Author: Jeremy Huddleston Sequoia <jeremyhu@apple.com>
 Date:   Wed Feb 11 02:32:33 2015 -0800
 
@@ -3456,10 +27432,8 @@
     Signed-off-by: Jeremy Huddleston Sequoia <jeremyhu@apple.com>
     [Emil Velikov: Tweak the commit message.]
     Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
-    
-    (cherry picked from commit 06ff751f97fbeb62a23936cd8f9c54733920d082)
 
-commit 4c83138e5f67cee2016dbfbc9b6daf79260b279b
+commit 31cd2d75dc3844e40143f649fe383de17c152a13
 Author: Alan Coopersmith <alan.coopersmith@oracle.com>
 Date:   Fri May 15 19:05:45 2015 -0700
 
@@ -3470,9 +27444,37 @@
     Cc: "10.5 10.6" <mesa-stable@lists.freedesktop.org>
     Signed-off-by: Alan Coopersmith <alan.coopersmith@oracle.com>
     Reviewed-by: Jeremy Huddleston Sequoia <jeremyhu@apple.com>
-    (cherry picked from commit 31cd2d75dc3844e40143f649fe383de17c152a13)
 
-commit f02f25dcf7b146808daf9892580bc918f7c44624
+commit 2126c68e5cba79709e228f12eb3062a9be634a0e
+Author: Jason Ekstrand <jason.ekstrand@intel.com>
+Date:   Tue May 19 16:57:43 2015 -0700
+
+    nir: Get rid of the array elements parameter on load/store intrinsics
+    
+    Previously, we used intrinsic->const_index[1] to represent "the number of
+    array elements to load" for load/store intrinsics.  However, this set to 1
+    by every pass that ever creates a load/store intrinsic.  Also, while it
+    might make some sense for registers, it makes no sense whatsoever in SSA.
+    On top of that, the i965 backend was the only backend to ever support it;
+    freedreno and vc4 just assert that it's always 1.  Let's just delete it.
+    
+    Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
+    Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
+    Reviewed-by: Rob Clark <robclark@freedesktop.org>
+
+commit e1c4e8aaaafddd0e04cf2a16e28ef8f1e09d8b44
+Author: Marek Olšák <marek.olsak@amd.com>
+Date:   Sun May 17 16:35:14 2015 +0200
+
+    gallium: remove TGSI_SAT_MINUS_PLUS_ONE
+    
+    It's a remnant of some old NV extension. Unused.
+    
+    I also have a patch that removes predicates if anyone is interested.
+    
+    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
+
+commit e4201bb618f02a279fda59a1c528d7218e6900a5
 Author: Marek Olšák <marek.olsak@amd.com>
 Date:   Mon May 18 11:26:10 2015 +0200
 
@@ -3485,13 +27487,126 @@
     Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
     Reviewed-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
     Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
-    (cherry picked from commit e4201bb618f02a279fda59a1c528d7218e6900a5)
 
-commit 7c5cca5ee2c4076db45f89658e3a38098d6f828d
+commit 4ee69a97bb0af0cc216539c48b246ea2abf8f208
+Author: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+Date:   Wed May 13 12:18:31 2015 +0200
+
+    mesa/main: validate name syntax for array variables only
+    
+    From ARB_program_interface_query:
+    
+     "Note that if an interface enumerates a single active resource list
+     entry for an array variable (e.g., "a[0]"), a <name> identifying
+     any array element other than the first (e.g., "a[1]") is not
+     considered to match."
+    
+    It doesn't apply to arrays of interface blocks but just to array
+    variables.
+    
+    Signed-off-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
+    Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
+
+commit 1b052906763a36465e384366b875235b962ac143
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Wed May 20 12:36:14 2015 +1000
+
+    GL3.txt: update softpipe ARB_gpu_shader5 status
+    
+    texture gather and it already supported the new instructions.
+    
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 55a7b5165d40b831fd303079f8f80962d195d6ee
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue May 19 16:29:39 2015 +1000
+
+    softpipe: start adding gather support (v2)
+    
+    This adds both ARB_texture_gather and the enhanced gather
+    for ARB_gpu_shader5.
+    
+    This passes all the piglit tests, it relies on the GLSL
+    lowering pass to make textureGatherOffsets work.
+    
+    v2: use inline to get gather component (Brian)
+    fix function name, add asserts (Brian)
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 0108eae2911d2fc8f2ae0ef0fc6fc503fbfc600d
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue May 19 16:22:35 2015 +1000
+
+    softpipe: use arrays to make gather easier
+    
+    This is a prep change for gather, and it makes more sense
+    to use an array in these cases.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit a6861ecfc91973ba97989def97dd571e0e096888
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue May 19 16:16:07 2015 +1000
+
+    tgsi: handle TG4 opcode in tgsi exec
+    
+    This just adds a new modifier interface for drivers to implement.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 3f5c67d6510fe0210079ddecc0d30227a6cc4111
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue May 19 16:12:45 2015 +1000
+
+    softpipe: add textureOffset support.
+    
+    This was an oversight when GLSL1.30 was enabled, I think my
+    misunderstanding.
+    
+    This fixes a bunch of tex-miplevel-selection tests under softpipe,
+    and is required for textureGather support.
+    
+    I'm not sure this won't make sampling slowering, but its softpipe,
+    correctness first and all that.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 8bec83a30761d52088fa5cd2301b469b7aacf755
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue May 19 15:39:47 2015 +1000
+
+    softpipe: move control into a filter args struct
+    
+    more stuff for offsets and gather will go in here later.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit 99e583120cde8820aae94eb0f8beb723509398fc
+Author: Dave Airlie <airlied@redhat.com>
+Date:   Tue May 19 15:31:06 2015 +1000
+
+    softpipe: move some image filter parameters into a struct
+    
+    This moves some of the image filter args into a struct,
+    and passes that instead, this is prep work for adding texture
+    gather support which needs new arguments.
+    
+    review: make filter args const.
+    
+    Reviewed-by: Brian Paul <brianp@vmware.com>
+    Signed-off-by: Dave Airlie <airlied@redhat.com>
+
+commit b9b516248e0441a5aa06bdeb58525b4ef8dd0001
 Author: Emil Velikov <emil.l.velikov@gmail.com>
-Date:   Tue May 19 12:02:04 2015 +0100
+Date:   Tue May 19 11:59:50 2015 +0100
 
-    Increment version to 10.6.0-rc1
+    Post-branch version bump to 10.7.0-devel, add release notes template
     
     Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/CleanSpec.mk mesa-11.0.0~git20150916+11.0.c4bae579/CleanSpec.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/CleanSpec.mk	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/CleanSpec.mk	2015-09-16 14:36:08.000000000 +0000
@@ -13,3 +13,4 @@
 $(call add-clean-step, rm -rf $(HOST_OUT_release)/*/EXECUTABLES/mesa_*_intermediates)
 $(call add-clean-step, rm -rf $(HOST_OUT_release)/*/EXECUTABLES/glsl_compiler_intermediates)
 $(call add-clean-step, rm -rf $(HOST_OUT_release)/*/STATIC_LIBRARIES/libmesa_*_intermediates)
+$(call add-clean-step, rm -rf $(PRODUCT_OUT)/*/SHARED_LIBRARIES/*_dri_intermediates)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/configure.ac mesa-11.0.0~git20150916+11.0.c4bae579/configure.ac
--- mesa-10.6.5~git20150829+10.6.fa342251/configure.ac	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/configure.ac	2015-09-16 14:37:00.000000000 +0000
@@ -43,7 +43,7 @@
 AC_CONFIG_AUX_DIR([bin])
 AC_CONFIG_MACRO_DIR([m4])
 AC_CANONICAL_SYSTEM
-AM_INIT_AUTOMAKE([foreign tar-ustar dist-xz])
+AM_INIT_AUTOMAKE([foreign tar-ustar dist-xz subdir-objects])
 
 dnl We only support native Windows builds (MinGW/MSVC) through SCons.
 case "$host_os" in
@@ -67,12 +67,13 @@
 AC_SUBST([OPENCL_VERSION])
 
 dnl Versions for external dependencies
-LIBDRM_REQUIRED=2.4.38
+LIBDRM_REQUIRED=2.4.60
 LIBDRM_RADEON_REQUIRED=2.4.56
-LIBDRM_INTEL_REQUIRED=2.4.60
+LIBDRM_AMDGPU_REQUIRED=2.4.63
+LIBDRM_INTEL_REQUIRED=2.4.61
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
-LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41"
-LIBDRM_FREEDRENO_REQUIRED=2.4.57
+LIBDRM_NOUVEAU_REQUIRED=2.4.62
+LIBDRM_FREEDRENO_REQUIRED=2.4.64
 DRI2PROTO_REQUIRED=2.6
 DRI3PROTO_REQUIRED=1.0
 PRESENTPROTO_REQUIRED=1.0
@@ -80,7 +81,7 @@
 GLPROTO_REQUIRED=1.4.14
 LIBOMXIL_BELLAGIO_REQUIRED=0.0
 LIBVA_REQUIRED=0.35.0
-VDPAU_REQUIRED=0.4.1
+VDPAU_REQUIRED=1.1
 WAYLAND_REQUIRED=1.2.0
 XCB_REQUIRED=1.9.3
 XCBDRI2_REQUIRED=1.8
@@ -206,11 +207,14 @@
 AX_GCC_BUILTIN([__builtin_popcountll])
 AX_GCC_BUILTIN([__builtin_unreachable])
 
+AX_GCC_FUNC_ATTRIBUTE([const])
 AX_GCC_FUNC_ATTRIBUTE([flatten])
 AX_GCC_FUNC_ATTRIBUTE([format])
 AX_GCC_FUNC_ATTRIBUTE([malloc])
 AX_GCC_FUNC_ATTRIBUTE([packed])
+AX_GCC_FUNC_ATTRIBUTE([pure])
 AX_GCC_FUNC_ATTRIBUTE([unused])
+AX_GCC_FUNC_ATTRIBUTE([warn_unused_result])
 
 AM_CONDITIONAL([GEN_ASM_OFFSETS], test "x$GEN_ASM_OFFSETS" = xyes)
 
@@ -231,7 +235,7 @@
 _SAVE_CPPFLAGS="$CPPFLAGS"
 
 dnl Compiler macros
-DEFINES=""
+DEFINES="-D__STDC_LIMIT_MACROS"
 AC_SUBST([DEFINES])
 case "$host_os" in
 linux*|*-gnu*|gnu*)
@@ -282,6 +286,9 @@
     # Work around aliasing bugs - developers should comment this out
     CFLAGS="$CFLAGS -fno-strict-aliasing"
 
+    # We don't want floating-point math functions to set errno or trap
+    CFLAGS="$CFLAGS -fno-math-errno -fno-trapping-math"
+
     # gcc's builtin memcmp is slower than glibc's
     # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
     CFLAGS="$CFLAGS -fno-builtin-memcmp"
@@ -652,6 +659,7 @@
 AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
 AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
 AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
+AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])
 
 dnl Check to see if dlopen is in default libraries (like Solaris, which
 dnl has it in libc), or if libdl is needed to get it.
@@ -715,15 +723,15 @@
     [enable_opengl="$enableval"],
     [enable_opengl=yes])
 AC_ARG_ENABLE([gles1],
-    [AS_HELP_STRING([--enable-gles1],
-        [enable support for OpenGL ES 1.x API @<:@default=disabled@:>@])],
+    [AS_HELP_STRING([--disable-gles1],
+        [disable support for OpenGL ES 1.x API @<:@default=enabled@:>@])],
     [enable_gles1="$enableval"],
-    [enable_gles1=no])
+    [enable_gles1=yes])
 AC_ARG_ENABLE([gles2],
-    [AS_HELP_STRING([--enable-gles2],
-        [enable support for OpenGL ES 2.x API @<:@default=disabled@:>@])],
+    [AS_HELP_STRING([--disable-gles2],
+        [disable support for OpenGL ES 2.x API @<:@default=enabled@:>@])],
     [enable_gles2="$enableval"],
-    [enable_gles2=no])
+    [enable_gles2=yes])
 
 AC_ARG_ENABLE([dri],
     [AS_HELP_STRING([--enable-dri],
@@ -949,12 +957,6 @@
     ;;
 esac
 
-# Building Xlib-GLX requires shared glapi to be disabled.
-if test "x$enable_shared_glapi$enable_xlib_glx" = xyesyes; then
-    AC_MSG_NOTICE([Shared GLAPI should not used with Xlib-GLX, disabling])
-    enable_shared_glapi=no
-fi
-
 AM_CONDITIONAL(HAVE_SHARED_GLAPI, test "x$enable_shared_glapi" = xyes)
 
 # Build the pipe-drivers as separate libraries/modules.
@@ -966,11 +968,9 @@
 dnl Driver specific build directories
 dnl
 
-case "x$enable_glx$enable_xlib_glx" in
-xyesyes)
+if test -n "$with_gallium_drivers" -a "x$enable_glx$enable_xlib_glx" = xyesyes; then
     NEED_WINSYS_XLIB="yes"
-    ;;
-esac
+fi
 
 if test "x$enable_dri" = xyes; then
     enable_gallium_loader="$enable_shared_pipe_drivers"
@@ -1222,7 +1222,7 @@
         fi
         ;;
     darwin*)
-        DEFINES="$DEFINES -DGLX_ALIAS_UNSUPPORTED"
+        DEFINES="$DEFINES -DGLX_ALIAS_UNSUPPORTED -DBUILDING_MESA"
         if test "x$with_dri_drivers" = "xyes"; then
             with_dri_drivers="swrast"
         fi
@@ -1243,26 +1243,6 @@
                      [AC_MSG_ERROR([Expat library required for DRI not found])])
          EXPAT_LIBS="-lexpat"])
 
-    DRICOMMON_NEED_LIBDRM=no
-    # If we are building any DRI driver other than swrast.
-    if test -n "$with_dri_drivers"; then
-        if test "x$with_dri_drivers" != xswrast; then
-            # ... libdrm is required
-            if test "x$have_libdrm" != xyes; then
-                AC_MSG_ERROR([DRI drivers requires libdrm >= $LIBDRM_REQUIRED])
-            fi
-            DRICOMMON_NEED_LIBDRM=yes
-        fi
-    fi
-
-    # If we're building any gallium DRI driver other than swrast
-    if test -n "$with_gallium_drivers" -a "x$DRICOMMON_NEED_LIBDRM" = xno; then
-        if test "x$with_gallium_drivers" != xswrast; then
-            # ... build a libdrm aware dricommon
-            DRICOMMON_NEED_LIBDRM=yes
-        fi
-    fi
-
     # put all the necessary libs together
     DRI_LIB_DEPS="$DRI_LIB_DEPS $SELINUX_LIBS $LIBDRM_LIBS $EXPAT_LIBS -lm $PTHREAD_LIBS $DLOPEN_LIBS"
 fi
@@ -1290,7 +1270,7 @@
             ;;
         xnouveau)
             HAVE_NOUVEAU_DRI=yes;
-            PKG_CHECK_MODULES([NOUVEAU], [libdrm_nouveau >= $LIBDRM_NVVIEUX_REQUIRED])
+            PKG_CHECK_MODULES([NVVIEUX], [libdrm_nouveau >= $LIBDRM_NVVIEUX_REQUIRED])
             ;;
         xradeon)
             HAVE_RADEON_DRI=yes;
@@ -1376,7 +1356,6 @@
     fi
 
     if test "x$enable_dri" = xyes; then
-        GBM_BACKEND_DIRS="$GBM_BACKEND_DIRS dri"
         if test "x$enable_shared_glapi" = xno; then
             AC_MSG_ERROR([gbm_dri requires --enable-shared-glapi])
         fi
@@ -1515,6 +1494,10 @@
     if test "x$with_gallium_drivers" = xswrast; then
         AC_MSG_ERROR([nine requires at least one non-swrast gallium driver])
     fi
+    if test $GCC_VERSION_MAJOR -lt 4 -o $GCC_VERSION_MAJOR -eq 4 -a $GCC_VERSION_MINOR -lt 6; then
+        AC_MSG_ERROR([gcc >= 4.6 is required to build nine])
+    fi
+
     if test "x$enable_dri3" = xno; then
         AC_MSG_WARN([using nine together with wine requires DRI3 enabled system])
     fi
@@ -1650,7 +1633,9 @@
 			AC_MSG_ERROR([EGL platform drm requires libdrm >= $LIBDRM_REQUIRED])
 		;;
 
-	android|gdi|null)
+	surfaceless)
+		test "x$have_libdrm" != xyes &&
+			AC_MSG_ERROR([EGL platform surfaceless requires libdrm >= $LIBDRM_REQUIRED])
 		;;
 
 	*)
@@ -1673,12 +1658,10 @@
     EGL_NATIVE_PLATFORM="_EGL_INVALID_PLATFORM"
 fi
 
-if echo "$egl_platforms" | grep -q 'x11'; then
-    NEED_WINSYS_XLIB=yes
-fi
 AM_CONDITIONAL(HAVE_EGL_PLATFORM_X11, echo "$egl_platforms" | grep -q 'x11')
 AM_CONDITIONAL(HAVE_EGL_PLATFORM_WAYLAND, echo "$egl_platforms" | grep -q 'wayland')
 AM_CONDITIONAL(HAVE_EGL_PLATFORM_DRM, echo "$egl_platforms" | grep -q 'drm')
+AM_CONDITIONAL(HAVE_EGL_PLATFORM_SURFACELESS, echo "$egl_platforms" | grep -q 'surfaceless')
 AM_CONDITIONAL(HAVE_EGL_PLATFORM_NULL, echo "$egl_platforms" | grep -q 'null')
 
 AM_CONDITIONAL(HAVE_EGL_DRIVER_DRI2, test "x$HAVE_EGL_DRIVER_DRI2" != "x")
@@ -1794,10 +1777,7 @@
             AC_MSG_ERROR([LLVM $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is required])
         fi
 
-        LLVM_COMPONENTS="engine bitwriter"
-        if $LLVM_CONFIG --components | grep -qw 'mcjit'; then
-            LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit"
-        fi
+        LLVM_COMPONENTS="engine bitwriter mcjit mcdisassembler"
 
         if test "x$enable_opencl" = xyes; then
             llvm_check_version_for "3" "5" "0" "opencl"
@@ -1805,7 +1785,7 @@
             LLVM_COMPONENTS="${LLVM_COMPONENTS} all-targets ipo linker instrumentation"
             LLVM_COMPONENTS="${LLVM_COMPONENTS} irreader option objcarcopts profiledata"
         fi
-        DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT -DLLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
+        DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT -DMESA_LLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
         MESA_LLVM=1
 
         dnl Check for Clang internal headers
@@ -1924,16 +1904,19 @@
 }
 
 radeon_llvm_check() {
+    if test ${LLVM_VERSION_INT} -lt 307; then
+        amdgpu_llvm_target_name='r600'
+    else
+        amdgpu_llvm_target_name='amdgpu'
+    fi
     if test "x$enable_gallium_llvm" != "xyes"; then
         AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
     fi
     llvm_check_version_for "3" "4" "2" $1 
-    if test true && $LLVM_CONFIG --targets-built | grep -qvw 'R600' ; then
-        AC_MSG_ERROR([LLVM R600 Target not enabled.  You can enable it when building the LLVM
-                      sources with the --enable-experimental-targets=R600
-                      configure flag])
+    if test true && $LLVM_CONFIG --targets-built | grep -iqvw $amdgpu_llvm_target_name ; then
+        AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM build.])
     fi
-    LLVM_COMPONENTS="${LLVM_COMPONENTS} r600 bitreader ipo"
+    LLVM_COMPONENTS="${LLVM_COMPONENTS} $amdgpu_llvm_target_name bitreader ipo"
     NEED_RADEON_LLVM=yes
     if test "x$have_libelf" != xyes; then
        AC_MSG_ERROR([$1 requires libelf when using llvm])
@@ -1988,6 +1971,7 @@
         xradeonsi)
             HAVE_GALLIUM_RADEONSI=yes
             PKG_CHECK_MODULES([RADEON], [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
+            PKG_CHECK_MODULES([AMDGPU], [libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED])
             gallium_require_drm "radeonsi"
             gallium_require_drm_loader
             radeon_llvm_check "radeonsi"
@@ -2098,31 +2082,15 @@
 #       use by XA tracker in particular, but could be used in any case
 #       where communication with xserver is not desired).
 if test "x$enable_gallium_loader" = xyes; then
-    if test "x$NEED_WINSYS_XLIB" = xyes; then
-        GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_XLIB"
-    fi
-
     if test "x$enable_dri" = xyes; then
         GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRI"
     fi
 
     if test "x$enable_gallium_drm_loader" = xyes; then
         GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRM"
-        PKG_CHECK_MODULES([GALLIUM_PIPE_LOADER_XCB], [xcb xcb-dri2],
-                          pipe_loader_have_xcb=yes, pipe_loader_have_xcb=no)
-        if test "x$pipe_loader_have_xcb" = xyes; then
-            GALLIUM_PIPE_LOADER_CLIENT_DEFINES="$GALLIUM_PIPE_LOADER_CLIENT_DEFINES -DHAVE_PIPE_LOADER_XCB"
-            GALLIUM_PIPE_LOADER_CLIENT_LIBS="$GALLIUM_PIPE_LOADER_CLIENT_LIBS $GALLIUM_PIPE_LOADER_XCB_LIBS $LIBDRM_LIBS"
-        fi
     fi
 
-    GALLIUM_PIPE_LOADER_CLIENT_DEFINES="$GALLIUM_PIPE_LOADER_CLIENT_DEFINES $GALLIUM_PIPE_LOADER_DEFINES"
-    GALLIUM_PIPE_LOADER_CLIENT_LIBS="$GALLIUM_PIPE_LOADER_CLIENT_LIBS $GALLIUM_PIPE_LOADER_LIBS"
-
     AC_SUBST([GALLIUM_PIPE_LOADER_DEFINES])
-    AC_SUBST([GALLIUM_PIPE_LOADER_LIBS])
-    AC_SUBST([GALLIUM_PIPE_LOADER_CLIENT_DEFINES])
-    AC_SUBST([GALLIUM_PIPE_LOADER_CLIENT_LIBS])
 fi
 
 AM_CONDITIONAL(HAVE_I915_DRI, test x$HAVE_I915_DRI = xyes)
@@ -2149,7 +2117,6 @@
 
 AC_SUBST([ELF_LIB])
 
-AM_CONDITIONAL(DRICOMMON_NEED_LIBDRM, test "x$DRICOMMON_NEED_LIBDRM" = xyes)
 AM_CONDITIONAL(HAVE_LIBDRM, test "x$have_libdrm" = xyes)
 AM_CONDITIONAL(HAVE_X11_DRIVER, test "x$enable_xlib_glx" = xyes)
 AM_CONDITIONAL(HAVE_OSMESA, test "x$enable_osmesa" = xyes)
@@ -2202,8 +2169,7 @@
 dnl Substitute the config
 AC_CONFIG_FILES([Makefile
 		src/Makefile
-		src/egl/drivers/dri2/Makefile
-		src/egl/main/Makefile
+		src/egl/Makefile
 		src/egl/main/egl.pc
 		src/egl/wayland/wayland-drm/Makefile
 		src/egl/wayland/wayland-egl/Makefile
@@ -2226,7 +2192,6 @@
 		src/gallium/drivers/svga/Makefile
 		src/gallium/drivers/trace/Makefile
 		src/gallium/drivers/vc4/Makefile
-		src/gallium/drivers/vc4/kernel/Makefile
 		src/gallium/state_trackers/clover/Makefile
 		src/gallium/state_trackers/dri/Makefile
 		src/gallium/state_trackers/glx/xlib/Makefile
@@ -2259,6 +2224,7 @@
 		src/gallium/winsys/intel/drm/Makefile
 		src/gallium/winsys/nouveau/drm/Makefile
 		src/gallium/winsys/radeon/drm/Makefile
+		src/gallium/winsys/amdgpu/drm/Makefile
 		src/gallium/winsys/svga/drm/Makefile
 		src/gallium/winsys/sw/dri/Makefile
 		src/gallium/winsys/sw/kms-dri/Makefile
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/debian/changelog mesa-11.0.0~git20150916+11.0.c4bae579/debian/changelog
--- mesa-10.6.5~git20150829+10.6.fa342251/debian/changelog	2015-09-16 15:35:22.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/debian/changelog	2015-09-16 15:35:23.000000000 +0000
@@ -1,24 +1,105 @@
-mesa (10.6.5~git20150829+10.6.fa342251-0ubuntu0ricotz~trusty) trusty; urgency=medium
+mesa (11.0.0~git20150916+11.0.c4bae579-0ubuntu0sarvatt~trusty) trusty; urgency=high
 
-  * Checkout from git 20150829 (10.6 branch) up to commit
-    fa34225167396008e75e93f23696666caba8a7bf
-  * Only added debian/ tree from origin/ubuntu
-  * hook: Disable MIR support.
-  * hook: Relax symbols check.
-  * hook: Drop egl-platform-mir.patch (no-mir)
-  * hook: Drop 07_gallium-fix-build-failure-on-powerpcspe.diff
-    (upstream)
-  * hook: update symbols.
+  * Checkout from git 20150916 (11.0 branch) up to commit
+    c4bae5792bb5515da42e23f166f5ba5d68f79615
+  * Only added debian/ tree from origin/ubuntu+1
+  * Drop mir support.
 
- -- Rico Tzschichholz <ricotz@ubuntu.com>  Sat, 29 Aug 2015 16:01:21 +0200
+ -- Robert Hooker <sarvatt@ubuntu.com>  Wed, 16 Sep 2015 10:36:58 -0400
 
-mesa (10.6.3-1ubuntu1) wily; urgency=medium
+mesa (11.0.0-1ubuntu1) UNRELEASED; urgency=medium
+
+  * Merge from Debian. (LP: #1484279)
+  * egl-platform-mir.patch: Updated.
+  * i965-remove-early-release-of-dri2-miptree.patch: Add a workaround to
+    fix crashes in brw_meta_fast_clear. (LP: #1492037)
+  * control, rules: Default to llvm-3.6 again, because 3.7 won't be in
+    main for wily.
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Mon, 07 Sep 2015 16:27:37 +0300
+
+mesa (11.0.0-1) experimental; urgency=medium
+
+  * New upstream release.
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Mon, 14 Sep 2015 14:23:13 +0300
+
+mesa (11.0.0~rc3-1) experimental; urgency=medium
+
+  [ Andreas Boll ]
+  * Use https for Vcs-* fields.
+
+  [ Timo Aaltonen ]
+  * New upstream release candidate.
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Mon, 07 Sep 2015 15:04:32 +0300
+
+mesa (11.0.0~rc2-1) experimental; urgency=medium
+
+  * New upstream release candidate.
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Wed, 02 Sep 2015 11:38:28 +0300
+
+mesa (11.0.0~rc1-1) experimental; urgency=medium
+
+  [ Andreas Boll ]
+  * New upstream release candidate.
+  * control: Drop unneeded libomxil-bellagio-dev build-dep.
+  * rules: Explicitly disable vaapi (Closes: #789100).
+  * control: Update upstream url.
+  * control: Update Vcs-* fields.
+  * Drop libgl1-mesa-swx11* packages.
+  * control: Update package description.
+
+  [ Timo Aaltonen ]
+  * control: Delete commented out libgl1-mesa-glx-i686 from the file.
+  * control: Bump llvm/libclang build-deps to match versions where
+    amdgpu is enabled.
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Mon, 24 Aug 2015 11:15:16 +0300
+
+mesa (11.0.0~git20150817-1) experimental; urgency=medium
+
+  * New upstream snapshot
+  * control: Bump libdrm build-dep to 2.4.63.
+  * control: Add libomxil-bellagio-dev to build-deps.
+  * rules: Disable gles1 & 2 for swx11 builds.
+  * libegl1-mesa.symbols, libgles2-mesa.symbols: Updated
+  * control, rules: Migrate to llvm 3.7.
+  * rules: Enable llvmpipe on armhf again.
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Tue, 18 Aug 2015 07:53:45 +0300
+
+mesa (10.6.7-1) unstable; urgency=medium
+
+  * New upstream release.
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Mon, 14 Sep 2015 11:15:23 +0300
+
+mesa (10.6.5-1) unstable; urgency=medium
+
+  [ Andreas Boll ]
+  * New upstream release.
+
+  [ Julien Cristau ]
+  * Break libopengl-perl (<< 0.6704+dfsg-2), thanks to Niko Tyni
+    (closes: #796918)
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Wed, 02 Sep 2015 12:26:37 +0300
+
+mesa (10.6.4-1) unstable; urgency=medium
+
+  * New upstream release.
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Wed, 19 Aug 2015 09:11:47 +0300
+
+mesa (10.6.3-0ubuntu1) UNRELEASED; urgency=medium
 
   * Merge from debian-experimental git.
   * Drop skl-*, i965-* patches, upstream.
   * egl-platform-mir.patch: Updated.
 
- -- Timo Aaltonen <tjaalton@debian.org>  Tue, 11 Aug 2015 08:00:09 +0300
+ -- Timo Aaltonen <tjaalton@debian.org>  Thu, 18 Jun 2015 15:31:27 +0300
 
 mesa (10.6.3-1) unstable; urgency=medium
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/debian/control mesa-11.0.0~git20150916+11.0.c4bae579/debian/control
--- mesa-10.6.5~git20150829+10.6.fa342251/debian/control	2015-09-16 15:35:22.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/debian/control	2015-09-16 15:35:23.000000000 +0000
@@ -8,7 +8,7 @@
  debhelper (>= 9),
  quilt (>= 0.40),
  pkg-config,
- libdrm-dev (>= 2.4.56) [!hurd-any],
+ libdrm-dev (>= 2.4.63) [!hurd-any],
  libx11-dev,
  x11proto-gl-dev (>= 1.4.14),
  libxxf86vm-dev,
@@ -39,12 +39,16 @@
  libudev-dev [linux-any],
  flex,
  bison,
- llvm-3.6-dev (>= 1:3.5-1) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf],
+ llvm-3.6-dev (>= 1:3.6) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf],
  libelf-dev [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf],
  libwayland-dev (>= 1.2.0) [linux-any],
-Vcs-Git: git://git.debian.org/git/pkg-xorg/lib/mesa
-Vcs-Browser: http://git.debian.org/?p=pkg-xorg/lib/mesa.git
-Homepage: http://mesa3d.sourceforge.net/
+# libclang-3.7-dev (>= 1:3.7~+rc2) [amd64 i386 armhf],
+# libclc-dev [amd64 i386 armhf],
+# mir-client-platform-mesa-dev [!arm64 !powerpc !ppc64 !ppc64el],
+# libmirclient-dev [!arm64 !powerpc !ppc64 !ppc64el],
+Vcs-Git: https://anonscm.debian.org/pkg-xorg/lib/mesa
+Vcs-Browser: https://anonscm.debian.org/cgit/pkg-xorg/lib/mesa.git
+Homepage: http://mesa3d.org/
 
 Package: libxatracker2
 Section: libs
@@ -200,6 +204,7 @@
  libxcb-sync-dev,
  libxshmfence-dev,
  libx11-xcb-dev,
+# libmirclient-dev [!arm64 !powerpc !ppc64 !ppc64el],
  libwayland-dev (>= 1.2.0) [linux-any],
  ${misc:Depends},
 Multi-Arch: same
@@ -410,13 +415,14 @@
  libgl1-nvidia-alternatives (<= 275.09.07-1),
  fglrx-glx (<< 1:11-6-1),
  glx-diversions (<< 0.4),
+ libopengl-perl (<< 0.6704+dfsg-2),
 Pre-Depends: ${misc:Pre-Depends}
 Multi-Arch: same
 Description: free implementation of the OpenGL API -- GLX runtime
  Mesa is a 3-D graphics library with an API which is very similar to
  that of OpenGL.  To the extent that Mesa utilizes the OpenGL command
  syntax or state machine, it is being used with authorization from
- Silicon Graphics, Inc.  However, the author makes no claim that Mesa
+ Silicon Graphics, Inc.  However, the authors make no claim that Mesa
  is in any way a compatible replacement for OpenGL or associated with
  Silicon Graphics, Inc.
  .
@@ -449,27 +455,6 @@
  This package contains debugging symbols for the GL library with GLX and DRI
  capabilities.
 
-#Package: libgl1-mesa-glx-i686
-#Section: libs
-#Priority: extra
-#Architecture: any-i386
-#Pre-Depends: libgl1-mesa-glx
-#Description: A free implementation of the OpenGL API -- GLX runtime [i686 optimized]
-# This version of Mesa provides GLX and DRI capabilities: it is capable of
-# both direct and indirect rendering.  For direct rendering, it can use DRI
-# modules from the libgl1-mesa-dri package to accelerate drawing.
-# .
-# This package does not include the modules themselves: these can be found
-# in the libgl1-mesa-dri package.
-# .
-# For a complete description of Mesa, please look at the
-# libgl1-mesa-glx package.
-# .
-# This set of libraries is optimized for i686 machines and will only be used if
-# you are running a 2.6 kernel on an i686 class CPU. This includes Pentium Pro,
-# Pentium II/II/IV, Celeron CPU's and similar class CPU's (including clones
-# such as AMD Athlon/Opteron, VIA C3 Nehemiah, but not VIA C3 Ezla).
-
 Package: libgl1-mesa-dri
 Section: libs
 Priority: optional
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/debian/libegl1-mesa.symbols mesa-11.0.0~git20150916+11.0.c4bae579/debian/libegl1-mesa.symbols
--- mesa-10.6.5~git20150829+10.6.fa342251/debian/libegl1-mesa.symbols	2015-09-16 15:35:22.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/debian/libegl1-mesa.symbols	2015-09-16 15:35:23.000000000 +0000
@@ -2,14 +2,21 @@
  eglBindAPI@Base 7.8.1
  eglBindTexImage@Base 7.8.1
  eglChooseConfig@Base 7.8.1
+ eglClientWaitSync@Base 10.7.0~
  eglCopyBuffers@Base 7.8.1
  eglCreateContext@Base 7.8.1
+ eglCreateImage@Base 10.7.0~
  eglCreatePbufferFromClientBuffer@Base 7.8.1
  eglCreatePbufferSurface@Base 7.8.1
  eglCreatePixmapSurface@Base 7.8.1
+ eglCreatePlatformPixmapSurface@Base 10.7.0~
+ eglCreatePlatformWindowSurface@Base 10.7.0~
+ eglCreateSync@Base 10.7.0~
  eglCreateWindowSurface@Base 7.8.1
  eglDestroyContext@Base 7.8.1
+ eglDestroyImage@Base 10.7.0~
  eglDestroySurface@Base 7.8.1
+ eglDestroySync@Base 10.7.0~
  eglGetConfigAttrib@Base 7.8.1
  eglGetConfigs@Base 7.8.1
  eglGetCurrentContext@Base 7.8.1
@@ -17,7 +24,9 @@
  eglGetCurrentSurface@Base 7.8.1
  eglGetDisplay@Base 7.8.1
  eglGetError@Base 7.8.1
+ eglGetPlatformDisplay@Base 10.7.0~
  eglGetProcAddress@Base 7.8.1
+ eglGetSyncAttrib@Base 10.7.0~
  eglInitialize@Base 7.8.1
  eglMakeCurrent@Base 7.8.1
  eglQueryAPI@Base 7.8.1
@@ -33,4 +42,5 @@
  eglWaitClient@Base 7.8.1
  eglWaitGL@Base 7.8.1
  eglWaitNative@Base 7.8.1
+ eglWaitSync@Base 10.7.0~
  (arch=linux-any)wl_drm_interface@Base 8.0-2~
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/debian/libgl1-mesa-glx-i686.install.in mesa-11.0.0~git20150916+11.0.c4bae579/debian/libgl1-mesa-glx-i686.install.in
--- mesa-10.6.5~git20150829+10.6.fa342251/debian/libgl1-mesa-glx-i686.install.in	2015-09-16 15:35:22.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/debian/libgl1-mesa-glx-i686.install.in	1970-01-01 00:00:00.000000000 +0000
@@ -1 +0,0 @@
-dri/usr/lib/${DEB_HOST_MULTIARCH}/i686/cmov/libGL.so.* usr/lib/${DEB_HOST_MULTIARCH}/i686/cmov
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/debian/libgles2-mesa.symbols mesa-11.0.0~git20150916+11.0.c4bae579/debian/libgles2-mesa.symbols
--- mesa-10.6.5~git20150829+10.6.fa342251/debian/libgles2-mesa.symbols	2015-09-16 15:35:22.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/debian/libgles2-mesa.symbols	2015-09-16 15:35:23.000000000 +0000
@@ -8,11 +8,13 @@
  glBindBufferBase@Base 9.1~
  glBindBufferRange@Base 9.1~
  glBindFramebuffer@Base 7.8.1
+ glBindImageTexture@Base 10.7.0~
  glBindRenderbuffer@Base 7.8.1
  glBindSampler@Base 9.1~
  glBindTexture@Base 7.8.1
  glBindTransformFeedback@Base 9.1~
  glBindVertexArray@Base 9.1~
+ glBindVertexBuffer@Base 10.7.0~
  glBlendColor@Base 7.8.1
  glBlendEquation@Base 7.8.1
  glBlendEquationSeparate@Base 7.8.1
@@ -61,10 +63,14 @@
  glDetachShader@Base 7.8.1
  glDisable@Base 7.8.1
  glDisableVertexAttribArray@Base 7.8.1
+ glDispatchCompute@Base 10.7.0~
+ glDispatchComputeIndirect@Base 10.7.0~
  glDrawArrays@Base 7.8.1
+ glDrawArraysIndirect@Base 10.7.0~
  glDrawArraysInstanced@Base 9.1~
  glDrawBuffers@Base 9.1~
  glDrawElements@Base 7.8.1
+ glDrawElementsIndirect@Base 10.7.0~
  glDrawElementsInstanced@Base 9.1~
  glDrawRangeElements@Base 9.1~
  glEnable@Base 7.8.1
@@ -95,6 +101,7 @@
  glGetActiveUniformsiv@Base 9.1~
  glGetAttachedShaders@Base 7.8.1
  glGetAttribLocation@Base 7.8.1
+ glGetBooleani_v@Base 10.7.0~
  glGetBooleanv@Base 7.8.1
  glGetBufferParameteri64v@Base 9.1~
  glGetBufferParameteriv@Base 7.8.1
@@ -108,6 +115,7 @@
  glGetIntegeri_v@Base 9.1~
  glGetIntegerv@Base 7.8.1
  glGetInternalformativ@Base 9.1~
+ glGetMultisamplefv@Base 10.7.0~
  glGetProgramBinary@Base 9.1~
  glGetProgramInfoLog@Base 7.8.1
  glGetProgramiv@Base 7.8.1
@@ -123,6 +131,8 @@
  glGetString@Base 7.8.1
  glGetStringi@Base 9.1~
  glGetSynciv@Base 9.1~
+ glGetTexLevelParameterfv@Base 10.7.0~
+ glGetTexLevelParameteriv@Base 10.7.0~
  glGetTexParameterfv@Base 7.8.1
  glGetTexParameteriv@Base 7.8.1
  glGetTransformFeedbackVarying@Base 9.1~
@@ -155,6 +165,7 @@
  glLineWidth@Base 7.8.1
  glLinkProgram@Base 7.8.1
  glMapBufferRange@Base 9.1~
+ glMemoryBarrier@Base 10.7.0~
  glMultiDrawArraysEXT@Base 7.8.1
  glMultiDrawElementsEXT@Base 7.8.1
  glPauseTransformFeedback@Base 9.1~
@@ -169,6 +180,7 @@
  glRenderbufferStorageMultisample@Base 9.1~
  glResumeTransformFeedback@Base 9.1~
  glSampleCoverage@Base 7.8.1
+ glSampleMaski@Base 10.7.0~
  glSamplerParameterf@Base 9.1~
  glSamplerParameterfv@Base 9.1~
  glSamplerParameteri@Base 9.1~
@@ -189,6 +201,7 @@
  glTexParameteri@Base 7.8.1
  glTexParameteriv@Base 7.8.1
  glTexStorage2D@Base 9.1~
+ glTexStorage2DMultisample@Base 10.7.0~
  glTexStorage3D@Base 9.1~
  glTexSubImage2D@Base 7.8.1
  glTexSubImage3D@Base 9.1~
@@ -238,12 +251,16 @@
  glVertexAttrib3fv@Base 7.8.1
  glVertexAttrib4f@Base 7.8.1
  glVertexAttrib4fv@Base 7.8.1
+ glVertexAttribBinding@Base 10.7.0~
  glVertexAttribDivisor@Base 9.1~
+ glVertexAttribFormat@Base 10.7.0~
  glVertexAttribI4i@Base 9.1~
  glVertexAttribI4iv@Base 9.1~
  glVertexAttribI4ui@Base 9.1~
  glVertexAttribI4uiv@Base 9.1~
+ glVertexAttribIFormat@Base 10.7.0~
  glVertexAttribIPointer@Base 9.1~
  glVertexAttribPointer@Base 7.8.1
+ glVertexBindingDivisor@Base 10.7.0~
  glViewport@Base 7.8.1
  glWaitSync@Base 9.1~
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/debian/not-installed mesa-11.0.0~git20150916+11.0.c4bae579/debian/not-installed
--- mesa-10.6.5~git20150829+10.6.fa342251/debian/not-installed	2015-09-16 15:35:22.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/debian/not-installed	2015-09-16 15:35:23.000000000 +0000
@@ -2,4 +2,4 @@
 
 # Common list:
 NOT_INSTALLED := \
-	dri/usr/include/GL/wglext.h \
+	dri/usr/include/GL/wglext.h
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/debian/patches/egl-platform-mir.patch mesa-11.0.0~git20150916+11.0.c4bae579/debian/patches/egl-platform-mir.patch
--- mesa-10.6.5~git20150829+10.6.fa342251/debian/patches/egl-platform-mir.patch	2015-09-16 15:35:22.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/debian/patches/egl-platform-mir.patch	2015-09-16 15:35:23.000000000 +0000
@@ -1,8 +1,8 @@
 --- a/configure.ac
 +++ b/configure.ac
-@@ -1779,7 +1779,9 @@ for plat in $egl_platforms; do
- 
- 	android|gdi|null)
+@@ -1772,7 +1772,9 @@ for plat in $egl_platforms; do
+ 		test "x$have_libdrm" != xyes &&
+ 			AC_MSG_ERROR([EGL platform surfaceless requires libdrm >= $LIBDRM_REQUIRED])
  		;;
 -
 +        mir)
@@ -11,9 +11,9 @@
  	*)
  		AC_MSG_ERROR([EGL platform '$plat' does not exist])
  		;;
-@@ -1807,6 +1809,7 @@ AM_CONDITIONAL(HAVE_EGL_PLATFORM_X11, ec
- AM_CONDITIONAL(HAVE_EGL_PLATFORM_WAYLAND, echo "$egl_platforms" | grep -q 'wayland')
+@@ -1798,6 +1800,7 @@ AM_CONDITIONAL(HAVE_EGL_PLATFORM_WAYLAND
  AM_CONDITIONAL(HAVE_EGL_PLATFORM_DRM, echo "$egl_platforms" | grep -q 'drm')
+ AM_CONDITIONAL(HAVE_EGL_PLATFORM_SURFACELESS, echo "$egl_platforms" | grep -q 'surfaceless')
  AM_CONDITIONAL(HAVE_EGL_PLATFORM_NULL, echo "$egl_platforms" | grep -q 'null')
 +AM_CONDITIONAL(HAVE_EGL_PLATFORM_MIR, echo "$egl_platforms" | grep -q 'mir')
  
@@ -21,9 +21,9 @@
  
 --- a/include/EGL/eglplatform.h
 +++ b/include/EGL/eglplatform.h
-@@ -104,6 +104,13 @@ typedef struct ANativeWindow        *EGL
- typedef struct egl_native_pixmap_t  *EGLNativePixmapType;
- typedef void                        *EGLNativeDisplayType;
+@@ -105,6 +105,13 @@ typedef struct ANativeWindow*
+ typedef struct egl_native_pixmap_t*     EGLNativePixmapType;
+ typedef void*                           EGLNativeDisplayType;
  
 +#elif defined(MIR_EGL_PLATFORM)
 +
@@ -32,12 +32,12 @@
 +typedef void                   *EGLNativePixmapType;
 +typedef MirEGLNativeWindowType  EGLNativeWindowType;
 +
- #elif defined(__unix__)
+ #elif defined(__unix__) || defined(__APPLE__)
  
  #if defined(MESA_EGL_NO_X11_HEADERS)
 --- a/include/GL/internal/dri_interface.h
 +++ b/include/GL/internal/dri_interface.h
-@@ -893,10 +893,12 @@ struct __DRIbufferRec {
+@@ -886,10 +886,12 @@ struct __DRIbufferRec {
      unsigned int pitch;
      unsigned int cpp;
      unsigned int flags;
@@ -51,22 +51,9 @@
  struct __DRIdri2LoaderExtensionRec {
      __DRIextension base;
  
---- a/src/egl/drivers/dri2/Makefile.am
-+++ b/src/egl/drivers/dri2/Makefile.am
-@@ -65,4 +65,10 @@ libegl_dri2_la_SOURCES += platform_drm.c
- AM_CFLAGS += -DHAVE_DRM_PLATFORM
- endif
- 
-+if HAVE_EGL_PLATFORM_MIR
-+libegl_dri2_la_SOURCES += platform_mir.c
-+AM_CFLAGS += -DHAVE_MIR_PLATFORM
-+AM_CFLAGS += $(MIR_CFLAGS)
-+endif
-+
- EXTRA_DIST = SConscript
 --- a/src/egl/drivers/dri2/egl_dri2.c
 +++ b/src/egl/drivers/dri2/egl_dri2.c
-@@ -677,6 +677,12 @@ dri2_initialize(_EGLDriver *drv, _EGLDis
+@@ -757,6 +757,12 @@ dri2_initialize(_EGLDriver *drv, _EGLDis
           return EGL_TRUE;
        return dri2_initialize_wayland(drv, disp);
  #endif
@@ -79,7 +66,7 @@
  #ifdef HAVE_ANDROID_PLATFORM
     case _EGL_PLATFORM_ANDROID:
        if (disp->Options.TestOnly)
-@@ -726,6 +732,13 @@ dri2_terminate(_EGLDriver *drv, _EGLDisp
+@@ -806,6 +812,13 @@ dri2_terminate(_EGLDriver *drv, _EGLDisp
        }
        break;
  #endif
@@ -93,7 +80,7 @@
  #ifdef HAVE_WAYLAND_PLATFORM
     case _EGL_PLATFORM_WAYLAND:
        if (dri2_dpy->wl_drm)
-@@ -747,7 +760,8 @@ dri2_terminate(_EGLDriver *drv, _EGLDisp
+@@ -827,7 +840,8 @@ dri2_terminate(_EGLDriver *drv, _EGLDisp
      * the ones from the gbm device. As such the gbm itself is responsible
      * for the cleanup.
      */
@@ -116,7 +103,7 @@
  #include "eglconfig.h"
  #include "eglcontext.h"
  #include "egldisplay.h"
-@@ -204,6 +208,10 @@ struct dri2_egl_display
+@@ -205,6 +209,10 @@ struct dri2_egl_display
     int			     is_render_node;
     int			     is_different_gpu;
  #endif
@@ -127,7 +114,7 @@
  };
  
  struct dri2_egl_context
-@@ -250,7 +258,7 @@ struct dri2_egl_surface
+@@ -251,7 +259,7 @@ struct dri2_egl_surface
     struct gbm_dri_surface *gbm_surf;
  #endif
  
@@ -136,7 +123,7 @@
     __DRIbuffer           *dri_buffers[__DRI_BUFFER_COUNT];
     struct {
  #ifdef HAVE_WAYLAND_PLATFORM
-@@ -262,9 +270,12 @@ struct dri2_egl_surface
+@@ -263,9 +271,12 @@ struct dri2_egl_surface
        void *data;
        int data_size;
  #endif
@@ -150,7 +137,7 @@
        int                 locked;
        int                 age;
     } color_buffers[4], *back, *current;
-@@ -277,6 +288,10 @@ struct dri2_egl_surface
+@@ -278,6 +289,10 @@ struct dri2_egl_surface
     /* EGL-owned buffers */
     __DRIbuffer           *local_buffers[__DRI_BUFFER_COUNT];
  #endif
@@ -161,9 +148,9 @@
  };
  
  
-@@ -354,4 +369,7 @@ dri2_initialize_android(_EGLDriver *drv,
- void
- dri2_flush_drawable_for_swapbuffers(_EGLDisplay *disp, _EGLSurface *draw);
+@@ -364,4 +379,7 @@ const __DRIconfig *
+ dri2_get_dri_config(struct dri2_egl_config *conf, EGLint surface_type,
+                     EGLenum colorspace);
  
 +EGLBoolean
 +dri2_initialize_mir(_EGLDriver *drv, _EGLDisplay *disp);
@@ -171,7 +158,7 @@
  #endif /* EGL_DRI2_INCLUDED */
 --- /dev/null
 +++ b/src/egl/drivers/dri2/platform_mir.c
-@@ -0,0 +1,619 @@
+@@ -0,0 +1,617 @@
 +/*
 + * Copyright © 2012 Canonical, Inc
 + *
@@ -780,8 +767,6 @@
 +   disp->Extensions.EXT_swap_buffers_with_damage = EGL_TRUE;
 +   disp->Extensions.KHR_image_pixmap = EGL_TRUE;
 +
-+   disp->VersionMajor = 1;
-+   disp->VersionMinor = 4;
 +   dri2_dpy->vtbl = &dri2_mir_display_vtbl;
 +
 +   return EGL_TRUE;
@@ -791,20 +776,6 @@
 +
 +   return EGL_FALSE;
 +}
---- a/src/egl/main/Makefile.am
-+++ b/src/egl/main/Makefile.am
-@@ -68,6 +68,11 @@ if HAVE_EGL_PLATFORM_NULL
- AM_CFLAGS += -DHAVE_NULL_PLATFORM
- endif
- 
-+if HAVE_EGL_PLATFORM_MIR
-+AM_CFLAGS += -DHAVE_MIR_PLATFORM
-+AM_CFLAGS += $(MIR_CFLAGS)
-+endif
-+
- if HAVE_EGL_DRIVER_DRI2
- AM_CFLAGS += -D_EGL_BUILT_IN_DRIVER_DRI2
- AM_CFLAGS += -DHAVE_XCB_DRI2
 --- a/src/egl/main/egldisplay.c
 +++ b/src/egl/main/egldisplay.c
 @@ -56,7 +56,10 @@
@@ -819,17 +790,15 @@
  
  /**
   * Map --with-egl-platforms names to platform types.
-@@ -71,7 +74,8 @@ static const struct {
-    { _EGL_PLATFORM_DRM, "drm" },
-    { _EGL_PLATFORM_NULL, "null" },
+@@ -71,6 +74,7 @@ static const struct {
     { _EGL_PLATFORM_ANDROID, "android" },
--   { _EGL_PLATFORM_HAIKU, "haiku" }
-+   { _EGL_PLATFORM_HAIKU, "haiku" },
+    { _EGL_PLATFORM_HAIKU, "haiku" },
+    { _EGL_PLATFORM_SURFACELESS, "surfaceless" },
 +   { _EGL_PLATFORM_MIR, "mir" }
  };
  
  
-@@ -131,6 +135,47 @@ _eglPointerIsDereferencable(void *p)
+@@ -130,6 +134,47 @@ _eglPointerIsDereferencable(void *p)
  #endif
  }
  
@@ -877,7 +846,7 @@
  
  /**
   * Try detecting native platform with the help of native display characteristcs.
-@@ -141,6 +186,11 @@ _eglNativePlatformDetectNativeDisplay(vo
+@@ -140,6 +185,11 @@ _eglNativePlatformDetectNativeDisplay(vo
     if (nativeDisplay == EGL_DEFAULT_DISPLAY)
        return _EGL_INVALID_PLATFORM;
  
@@ -889,7 +858,7 @@
     if (_eglPointerIsDereferencable(nativeDisplay)) {
        void *first_pointer = *(void **) nativeDisplay;
  
-@@ -179,7 +229,7 @@ _eglNativePlatformDetectNativeDisplay(vo
+@@ -178,7 +228,7 @@ _eglNativePlatformDetectNativeDisplay(vo
  _EGLPlatformType
  _eglGetNativePlatform(void *nativeDisplay)
  {
@@ -900,10 +869,10 @@
     if (native_platform == _EGL_INVALID_PLATFORM) {
 --- a/src/egl/main/egldisplay.h
 +++ b/src/egl/main/egldisplay.h
-@@ -51,6 +51,7 @@ enum _egl_platform_type {
-    _EGL_PLATFORM_NULL,
+@@ -50,6 +50,7 @@ enum _egl_platform_type {
     _EGL_PLATFORM_ANDROID,
     _EGL_PLATFORM_HAIKU,
+    _EGL_PLATFORM_SURFACELESS,
 +   _EGL_PLATFORM_MIR,
  
     _EGL_NUM_PLATFORMS,
@@ -946,7 +915,7 @@
  
     .getBuffers              = dri_get_buffers,
     .flushFrontBuffer        = dri_flush_front_buffer,
-@@ -528,9 +528,11 @@ gbm_dri_is_format_supported(struct gbm_d
+@@ -536,9 +536,11 @@ gbm_dri_is_format_supported(struct gbm_d
     switch (format) {
     case GBM_BO_FORMAT_XRGB8888:
     case GBM_FORMAT_XRGB8888:
@@ -958,7 +927,7 @@
        if (usage & GBM_BO_USE_SCANOUT)
           return 0;
        break;
-@@ -612,6 +614,9 @@ gbm_dri_to_gbm_format(uint32_t dri_forma
+@@ -620,6 +622,9 @@ gbm_dri_to_gbm_format(uint32_t dri_forma
     case __DRI_IMAGE_FORMAT_ABGR8888:
        ret = GBM_FORMAT_ABGR8888;
        break;
@@ -968,7 +937,7 @@
     default:
        ret = 0;
        break;
-@@ -620,6 +625,41 @@ gbm_dri_to_gbm_format(uint32_t dri_forma
+@@ -628,6 +633,41 @@ gbm_dri_to_gbm_format(uint32_t dri_forma
     return ret;
  }
  
@@ -1010,7 +979,7 @@
  static struct gbm_bo *
  gbm_dri_bo_import(struct gbm_device *gbm,
                    uint32_t type, void *buffer, uint32_t usage)
-@@ -828,31 +868,7 @@ gbm_dri_bo_create(struct gbm_device *gbm
+@@ -836,31 +876,7 @@ gbm_dri_bo_create(struct gbm_device *gbm
     bo->base.base.height = height;
     bo->base.base.format = format;
  
@@ -1043,3 +1012,18 @@
  
     if (usage & GBM_BO_USE_SCANOUT)
        dri_use |= __DRI_IMAGE_USE_SCANOUT;
+--- a/src/egl/Makefile.am
++++ b/src/egl/Makefile.am
+@@ -75,6 +75,12 @@ AM_CFLAGS += -DHAVE_SURFACELESS_PLATFORM
+ dri2_backend_FILES += drivers/dri2/platform_surfaceless.c
+ endif
+ 
++if HAVE_EGL_PLATFORM_MIR
++AM_CFLAGS += -DHAVE_MIR_PLATFORM
++AM_CFLAGS += $(MIR_CFLAGS)
++dri2_backend_FILES += drivers/dri2/platform_mir.c
++endif
++
+ if HAVE_EGL_DRIVER_DRI2
+ AM_CFLAGS += \
+ 	-I$(top_srcdir)/src/loader \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/debian/patches/i965-remove-early-release-of-dri2-miptree.patch mesa-11.0.0~git20150916+11.0.c4bae579/debian/patches/i965-remove-early-release-of-dri2-miptree.patch
--- mesa-10.6.5~git20150829+10.6.fa342251/debian/patches/i965-remove-early-release-of-dri2-miptree.patch	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/debian/patches/i965-remove-early-release-of-dri2-miptree.patch	2015-09-16 15:35:23.000000000 +0000
@@ -0,0 +1,25 @@
+From e2a696a4cd93c2dbe445243de48ed478fbdb8009 Mon Sep 17 00:00:00 2001
+From: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Fri, 10 Jul 2015 10:41:35 +0100
+Subject: i965: Remove early release of DRI2 miptree
+
+intel_update_winsys_renderbuffer_miptree() will release the existing
+miptree when wrapping a new DRI2 buffer, so we can remove the early
+release and so prevent a NULL mt dereference should importing the new
+DRI2 name fail for any reason. (Reusing the old DRI2 name will result
+in the rendering going astray, to a stale buffer, and not shown on the
+screen, but it allows us to issue a warning and not crash much later in
+innocent code.)
+
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+
+--- a/src/mesa/drivers/dri/i965/brw_context.c
++++ b/src/mesa/drivers/dri/i965/brw_context.c
+@@ -1412,7 +1412,6 @@ intel_process_dri2_buffer(struct brw_con
+               buffer->cpp, buffer->pitch);
+    }
+ 
+-   intel_miptree_release(&rb->mt);
+    bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
+                                           buffer->name);
+    if (!bo) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/debian/patches/series mesa-11.0.0~git20150916+11.0.c4bae579/debian/patches/series
--- mesa-10.6.5~git20150829+10.6.fa342251/debian/patches/series	2015-09-16 15:35:22.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/debian/patches/series	2015-09-16 15:35:23.000000000 +0000
@@ -1,5 +1,6 @@
-#07_gallium-fix-build-failure-on-powerpcspe.diff
+07_gallium-fix-build-failure-on-powerpcspe.diff
 
 # Ubuntu patches.
 #egl-platform-mir.patch
 i915-dont-default-to-2.1.patch
+i965-remove-early-release-of-dri2-miptree.patch
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/debian/rules mesa-11.0.0~git20150916+11.0.c4bae579/debian/rules
--- mesa-10.6.5~git20150829+10.6.fa342251/debian/rules	2015-09-16 15:35:22.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/debian/rules	2015-09-16 15:35:23.000000000 +0000
@@ -32,9 +32,6 @@
 	$(shell DEB_CFLAGS_MAINT_APPEND="-Wall -fno-optimize-sibling-calls" DEB_CXXFLAGS_MAINT_APPEND="-Wall -fno-optimize-sibling-calls" dpkg-buildflags --export=configure)
 endif
 
-buildflags-i686 = \
-	$(shell DEB_CFLAGS_MAINT_APPEND="-Wall -march=i686" DEB_CXXFLAGS_MAINT_APPEND="-Wall -march=i686" dpkg-buildflags --export=configure)
-
 # keep a list of files we don't install (yet), but since it's a bit
 # large, use an external file:
 include debian/not-installed
@@ -76,6 +73,11 @@
 # Non-Linux ports also lack *_CLOEXEC and epoll, so wayland isn't ready yet:
 	EGL_DISPLAYS += wayland
 
+# Mir isn't built on AArch64 and PPC
+#    ifeq (,$(filter $(DEB_HOST_ARCH),arm64 powerpc ppc64 ppc64el))
+#	EGL_DISPLAYS += mir
+#    endif
+
     ifeq (,$(filter $(DEB_HOST_ARCH), s390 s390x))
 	DRI_DRIVERS += nouveau
     endif
@@ -88,13 +90,6 @@
 	DRI_DRIVERS += i915 i965
   endif
 
-  # Do not build llvmpipe for armhf, it regresses compared to the classic swrast.
-  ifneq (,$(filter $(DEB_HOST_ARCH),amd64 i386 kfreebsd-amd64 kfreebsd-i386))
-	GALLIUM_DRIVERS += swrast
-  else
-	DRI_DRIVERS += swrast
-  endif
-
   ifeq (,$(filter $(DEB_HOST_ARCH), s390 s390x))
     DRI_DRIVERS += r200 radeon
     GALLIUM_DRIVERS += r600 r300
@@ -105,6 +100,9 @@
 	GALLIUM_DRIVERS += radeonsi
 	confflags_GALLIUM += --enable-gallium-llvm
 	confflags_GALLIUM += ac_cv_path_LLVM_CONFIG=llvm-config-3.6
+	GALLIUM_DRIVERS += swrast
+  else
+	DRI_DRIVERS += swrast
   endif
 
 	confflags_DIRECT_RENDERING = --enable-driglx-direct
@@ -278,10 +276,10 @@
 	# proprietary alternatives can't make more difficult…
 	set -e ; for PACKAGE in \
 		libegl1-mesa libgles1-mesa libgles2-mesa; do \
-			dh_makeshlibs -p$$PACKAGE -- -c0 \
+			dh_makeshlibs -p$$PACKAGE -- -c4 \
 			-edebian/$$PACKAGE/usr/lib/$(DEB_HOST_MULTIARCH)/mesa-egl/\* \
 		; done
-	dh_makeshlibs -s --remaining-packages -- -c0
+	dh_makeshlibs -s --remaining-packages -- -c4
 
 	dh_installdeb -s
 	dh_shlibdeps -s -l/usr/lib/$(DEB_HOST_MULTIARCH)/mesa:\
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/docs/devinfo.html mesa-11.0.0~git20150916+11.0.c4bae579/docs/devinfo.html
--- mesa-10.6.5~git20150829+10.6.fa342251/docs/devinfo.html	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/docs/devinfo.html	2015-09-16 14:36:08.000000000 +0000
@@ -17,159 +17,241 @@
 <h1>Development Notes</h1>
 
 
-<h2>Adding Extensions</h2>
-
-<p>
-To add a new GL extension to Mesa you have to do at least the following.
-
 <ul>
-<li>
-   If glext.h doesn't define the extension, edit include/GL/gl.h and add
-   code like this:
-   <pre>
-     #ifndef GL_EXT_the_extension_name
-     #define GL_EXT_the_extension_name 1
-     /* declare the new enum tokens */
-     /* prototype the new functions */
-     /* TYPEDEFS for the new functions */
-     #endif
-   </pre>
-</li>
-<li>
-   In the src/mapi/glapi/gen/ directory, add the new extension functions and
-   enums to the gl_API.xml file.
-   Then, a bunch of source files must be regenerated by executing the
-   corresponding Python scripts.
-</li>
-<li>
-   Add a new entry to the <code>gl_extensions</code> struct in mtypes.h
-</li>
-<li>
-   Update the <code>extensions.c</code> file.
-</li>
-<li>
-   From this point, the best way to proceed is to find another extension,
-   similar to the new one, that's already implemented in Mesa and use it
-   as an example.
-</li>
-<li>
-   If the new extension adds new GL state, the functions in get.c, enable.c
-   and attrib.c will most likely require new code.
-</li>
-<li>
-   The dispatch tests check_table.cpp and dispatch_sanity.cpp
-   should be updated with details about the new extensions functions. These
-   tests are run using 'make check'
-</li>
+<li><a href="#style">Coding Style</a>
+<li><a href="#submitting">Submitting Patches</a>
+<li><a href="#release">Making a New Mesa Release</a>
+<li><a href="#extensions">Adding Extensions</a>
 </ul>
 
 
-
-<h2>Coding Style</h2>
-
-<p>
-Mesa's code style has changed over the years.  Here's the latest.
-</p>
+<h2 id="style">Coding Style</h2>
 
 <p>
-Comment your code!  It's extremely important that open-source code be
-well documented.  Also, strive to write clean, easily understandable code.
+Mesa is over 20 years old and the coding style has evolved over time.
+Some old parts use a style that's a bit out of date.
+If the guidelines below don't cover something, try following the format of
+existing, neighboring code.
 </p>
 
 <p>
-3-space indentation
+Basic formatting guidelines
 </p>
 
-<p>
-If you use tabs, set them to 8 columns
-</p>
-
-<p>
-Line width: the preferred width to fill comments and code in Mesa is 78
-columns.  Exceptions are sometimes made for clarity (e.g. tabular data is
-sometimes filled to a much larger width so that extraneous carriage returns
-don't obscure the table).
-</p>
-
-<p>
-Brace example:
-</p>
+<ul>
+<li>3-space indentation, no tabs.
+<li>Limit lines to 78 or fewer characters.  The idea is to prevent line
+wrapping in 80-column editors and terminals.  There are exceptions, such
+as if you're defining a large, static table of information.
+<li>Opening braces go on the same line as the if/for/while statement.
+For example:
 <pre>
-	if (condition) {
-	   foo;
-	}
-	else {
-	   bar;
-	}
-
-	switch (condition) {
-	case 0:
-	   foo();
-	   break;
+   if (condition) {
+      foo;
+   } else {
+      bar;
+   }
+</pre>
 
-	case 1: {
-	   ...
-	   break;
-	}
+<li>Put a space before/after operators.  For example, <tt>a = b + c;</tt>
+and not <tt>a=b+c;</tt>
 
-	default:
-	   ...
-	   break;
-	}
+<li>This GNU indent command generally does the right thing for formatting:
+<pre>
+   indent -br -i3 -npcs --no-tabs infile.c -o outfile.c
 </pre>
 
-<p>
-Here's the GNU indent command which will best approximate my preferred style:
-(Note that it won't format switch statements in the preferred way)
-</p>
+<li>Use comments wherever you think it would be helpful for other developers.
+Several specific cases and style examples follow.  Note that we roughly
+follow <a href="http://www.stack.nl/~dimitri/doxygen/">Doxygen</a> conventions.
+<br>
+<br>
+Single-line comments:
 <pre>
-	indent -br -i3 -npcs --no-tabs infile.c -o outfile.c
+   /* null-out pointer to prevent dangling reference below */
+   bufferObj = NULL;
 </pre>
+Or,
+<pre>
+   bufferObj = NULL;  /* prevent dangling reference below */
+</pre>
+Multi-line comment:
+<pre>
+   /* If this is a new buffer object id, or one which was generated but
+    * never used before, allocate a buffer object now.
+    */
+</pre>
+We try to quote the OpenGL specification where prudent:
+<pre>
+   /* Page 38 of the PDF of the OpenGL ES 3.0 spec says:
+    *
+    *     "An INVALID_OPERATION error is generated for any of the following
+    *     conditions:
+    *
+    *     * <length> is zero."
+    *
+    * Additionally, page 94 of the PDF of the OpenGL 4.5 core spec
+    * (30.10.2014) also says this, so it's no longer allowed for desktop GL,
+    * either.
+    */
+</pre>
+Function comment example:
+<pre>
+   /**
+    * Create and initialize a new buffer object.  Called via the
+    * ctx->Driver.CreateObject() driver callback function.
+    * \param  name  integer name of the object
+    * \param  type  one of GL_FOO, GL_BAR, etc.
+    * \return  pointer to new object or NULL if error
+    */
+   struct gl_object *
+   _mesa_create_object(GLuint name, GLenum type)
+   {
+      /* function body */
+   }
+</pre>
+
+<li>Put the function return type and qualifiers on one line and the function
+name and parameters on the next, as seen above.  This makes it easy to use
+<code>grep ^function_name dir/*</code> to find function definitions.  Also,
+the opening brace goes on the next line by itself (see above.)
+
+<li>Function names follow various conventions depending on the type of function:
+<pre>
+   glFooBar()       - a public GL entry point (in glapi_dispatch.c)
+   _mesa_FooBar()   - the internal immediate mode function
+   save_FooBar()    - retained mode (display list) function in dlist.c
+   foo_bar()        - a static (private) function
+   _mesa_foo_bar()  - an internal non-static Mesa function
+</pre>
+
+<li>Constants, macros and enumerant names are ALL_UPPERCASE, with _ between
+words.
+<li>Mesa usually uses camel case for local variables (Ex: "localVarname")
+while gallium typically uses underscores (Ex: "local_var_name").
+<li>Global variables are almost never used because Mesa should be thread-safe.
 
+<li>Booleans.  Places that are not directly visible to the GL API
+should prefer the use of <tt>bool</tt>, <tt>true</tt>, and
+<tt>false</tt> over <tt>GLboolean</tt>, <tt>GL_TRUE</tt>, and
+<tt>GL_FALSE</tt>.  In C code, this may mean that
+<tt>#include &lt;stdbool.h&gt;</tt> needs to be added.  The
+<tt>try_emit_</tt>* methods in src/mesa/program/ir_to_mesa.cpp and
+src/mesa/state_tracker/st_glsl_to_tgsi.cpp can serve as examples.
 
-<p>
-Local variable name example:  localVarName (no underscores)
-</p>
+</ul>
 
-<p>
-Constants and macros are ALL_UPPERCASE, with _ between words
-</p>
+
+<h2 id="submitting">Submitting patches</h2>
 
 <p>
-Global variables are not allowed.
+The basic guidelines for submitting patches are:
 </p>
 
+<ul>
+<li>Patches should be sufficiently tested before submitting.
+<li>Code patches should follow Mesa coding conventions.
+<li>Whenever possible, patches should only effect individual Mesa/Gallium
+components.
+<li>Patches should never introduce build breaks and should be bisectable (see
+<code>git bisect</code>.)
+<li>Patches should be properly formatted (see below).
+<li>Patches should be submitted to mesa-dev for review using
+<code>git send-email</code>.
+<li>Patches should not mix code changes with code formatting changes (except,
+perhaps, in very trivial cases.)
+</ul>
+
+<h3>Patch formatting</h3>
+
 <p>
-Function name examples:
+The basic rules for patch formatting are:
 </p>
+
+<ul>
+<li>Lines should be limited to 75 characters or less so that git logs
+displayed in 80-column terminals avoid line wrapping.  Note that git
+log uses 4 spaces of indentation (4 + 75 &lt; 80).
+<li>The first line should be a short, concise summary of the change prefixed
+with a module name.  Examples:
+<pre>
+    mesa: Add support for querying GL_VERTEX_ATTRIB_ARRAY_LONG
+
+    gallium: add PIPE_CAP_DEVICE_RESET_STATUS_QUERY
+
+    i965: Fix missing type in local variable declaration.
+</pre>
+<li>Subsequent patch comments should describe the change in more detail,
+if needed.  For example:
+<pre>
+    i965: Remove end-of-thread SEND alignment code.
+    
+    This was present in Eric's initial implementation of the compaction code
+    for Sandybridge (commit 077d01b6). There is no documentation saying this
+    is necessary, and removing it causes no regressions in piglit on any
+    platform.
+</pre>
+<li>A "Signed-off-by:" line is not required, but not discouraged either.
+<li>If a patch address a bugzilla issue, that should be noted in the
+patch comment.  For example:
+<pre>
+   Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89689
+</pre>
+<li>If there have been several revisions to a patch during the review
+process, they should be noted such as in this example:
+<pre>
+    st/mesa: add ARB_texture_stencil8 support (v4)
+    
+    if we support stencil texturing, enable texture_stencil8
+    there is no requirement to support native S8 for this,
+    the texture can be converted to x24s8 fine.
+    
+    v2: fold fixes from Marek in:
+       a) put S8 last in the list
+       b) fix renderable to always test for d/s renderable
+        fixup the texture case to use a stencil only format
+        for picking the format for the texture view.
+    v3: hit fallback for getteximage
+    v4: put s8 back in front, it shouldn't get picked now (Ilia)
+</pre>
+<li>If someone tested your patch, document it with a line like this:
+<pre>
+    Tested-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+<li>If the patch was reviewed (usually the case) or acked by someone,
+that should be documented with:
 <pre>
-	glFooBar()       - a public GL entry point (in glapi_dispatch.c)
-	_mesa_FooBar()   - the internal immediate mode function
-	save_FooBar()    - retained mode (display list) function in dlist.c
-	foo_bar()        - a static (private) function
-	_mesa_foo_bar()  - an internal non-static Mesa function
+    Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
+    Acked-by: Joe Hacker &lt;jhacker@foo.com&gt;
 </pre>
+</ul>
+
+
+
+<h3>Testing Patches</h3>
 
 <p>
-Places that are not directly visible to the GL API should prefer the use
-of <tt>bool</tt>, <tt>true</tt>, and
-<tt>false</tt> over <tt>GLboolean</tt>, <tt>GL_TRUE</tt>, and
-<tt>GL_FALSE</tt>.  In C code, this may mean that
-<tt>#include &lt;stdbool.h&gt;</tt> needs to be added.  The
-<tt>try_emit_</tt>* methods in src/mesa/program/ir_to_mesa.cpp and
-src/mesa/state_tracker/st_glsl_to_tgsi.cpp can serve as examples.
+It should go without saying that patches must be tested.  In general,
+do whatever testing is prudent.
 </p>
 
-<h2>Submitting patches</h2>
-
 <p>
-You should always run the Mesa Testsuite before submitting patches.
-The Testsuite can be run using the 'make check' command. All tests
+You should always run the Mesa test suite before submitting patches.
+The test suite can be run using the 'make check' command. All tests
 must pass before patches will be accepted, this may mean you have
 to update the tests themselves.
 </p>
 
 <p>
+Whenever possible and applicable, test the patch with
+<a href="http://piglit.freedesktop.org">Piglit</a> to
+check for regressions.
+</p>
+
+
+<h3>Mailing Patches</h3>
+
+<p>
 Patches should be sent to the Mesa mailing list for review.
 When submitting a patch make sure to use git send-email rather than attaching
 patches to emails. Sending patches as attachments prevents people from being
@@ -184,7 +266,38 @@
 it harder for reviewers to accidentally review old patches.
 </p>
 
-<h2>Marking a commit as a candidate for a stable branch</h2>
+<p>
+When submitting follow-up patches you should also login to
+<a href="https://patchwork.freedesktop.org">patchwork</a> and change the
+state of your old patches to Superseded.
+</p>
+
+<h3>Reviewing Patches</h3>
+
+<p>
+When you've reviewed a patch on the mailing list, please be unambiguous
+about your review.  That is, state either
+<pre>
+    Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+or
+<pre>
+    Acked-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+Rather than saying just "LGTM" or "Seems OK".
+</p>
+
+<p>
+If small changes are suggested, it's OK to say something like:
+<pre>
+   With the above fixes, Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+which tells the patch author that the patch can be committed, as long
+as the issues are resolved first.
+</p>
+
+
+<h3>Marking a commit as a candidate for a stable branch</h3>
 
 <p>
 If you want a commit to be applied to a stable branch,
@@ -221,7 +334,7 @@
 <a href="http://cworth.org/~cworth/mesa-stable-queue/">Mesa Stable Queue</a>
 page.
 
-<h2>Criteria for accepting patches to the stable branch</h2>
+<h3>Criteria for accepting patches to the stable branch</h3>
 
 Mesa has a designated release manager for each stable branch, and the release
 manager is the only developer that should be pushing changes to these
@@ -306,7 +419,8 @@
   regression that is unaacceptable for the stable branch.</li>
 </ul>
 
-<h2>Making a New Mesa Release</h2>
+
+<h2 id="release">Making a New Mesa Release</h2>
 
 <p>
 These are the instructions for making a new Mesa release.
@@ -456,7 +570,7 @@
 tarballs" in the previous step. Commit this change.
 </p>
 
-<h3>Push all commits and the tag creates above</h3>
+<h3>Push all commits and the tag created above</h3>
 
 <p>
 This is the first step that cannot easily be undone. The release is going
@@ -483,7 +597,7 @@
 	mv ~/MesaLib-X.Y.Z* .
 </pre>
 
-<h3>Back on mesa master, andd the new release notes into the tree</h3>
+<h3>Back on mesa master, add the new release notes into the tree</h3>
 
 <p>
 Something like the following steps will do the trick:
@@ -543,6 +657,56 @@
 </pre>
 </p>
 
+
+<h2 id="extensions">Adding Extensions</h2>
+
+<p>
+To add a new GL extension to Mesa you have to do at least the following.
+
+<ul>
+<li>
+   If glext.h doesn't define the extension, edit include/GL/gl.h and add
+   code like this:
+   <pre>
+     #ifndef GL_EXT_the_extension_name
+     #define GL_EXT_the_extension_name 1
+     /* declare the new enum tokens */
+     /* prototype the new functions */
+     /* TYPEDEFS for the new functions */
+     #endif
+   </pre>
+</li>
+<li>
+   In the src/mapi/glapi/gen/ directory, add the new extension functions and
+   enums to the gl_API.xml file.
+   Then, a bunch of source files must be regenerated by executing the
+   corresponding Python scripts.
+</li>
+<li>
+   Add a new entry to the <code>gl_extensions</code> struct in mtypes.h
+</li>
+<li>
+   Update the <code>extensions.c</code> file.
+</li>
+<li>
+   From this point, the best way to proceed is to find another extension,
+   similar to the new one, that's already implemented in Mesa and use it
+   as an example.
+</li>
+<li>
+   If the new extension adds new GL state, the functions in get.c, enable.c
+   and attrib.c will most likely require new code.
+</li>
+<li>
+   The dispatch tests check_table.cpp and dispatch_sanity.cpp
+   should be updated with details about the new extensions functions. These
+   tests are run using 'make check'
+</li>
+</ul>
+
+
+
+
 </div>
 </body>
 </html>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/docs/egl.html mesa-11.0.0~git20150916+11.0.c4bae579/docs/egl.html
--- mesa-10.6.5~git20150829+10.6.fa342251/docs/egl.html	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/docs/egl.html	2015-09-16 14:36:08.000000000 +0000
@@ -88,10 +88,10 @@
 <code>EGLNativeWindowType</code> defined for.</p>
 
 <p>The available platforms are <code>x11</code>, <code>drm</code>,
-<code>wayland</code>, <code>null</code>, <code>android</code>,
-<code>haiku</code>, and <code>gdi</code>.  The <code>android</code> platform
+<code>wayland</code>, <code>surfaceless</code>, <code>android</code>,
+and <code>haiku</code>.  The <code>android</code> platform
 can only be built as a system component, part of AOSP, while the
-<code>haiku</code> and <code>gdi</code> platforms can only be built with SCons.
+<code>haiku</code> platform can only be built with SCons.
 Unless for special needs, the build system should
 select the right platforms automatically.</p>
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/docs/GL3.txt mesa-11.0.0~git20150916+11.0.c4bae579/docs/GL3.txt
--- mesa-10.6.5~git20150829+10.6.fa342251/docs/GL3.txt	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/docs/GL3.txt	2015-09-16 14:36:08.000000000 +0000
@@ -92,43 +92,43 @@
   GL_ARB_vertex_type_2_10_10_10_rev                     DONE ()
 
 
-GL 4.0, GLSL 4.00:
+GL 4.0, GLSL 4.00 --- all DONE: nvc0, radeonsi
 
-  GL_ARB_draw_buffers_blend                            DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
-  GL_ARB_draw_indirect                                 DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
-  GL_ARB_gpu_shader5                                   DONE (i965, nvc0)
+  GL_ARB_draw_buffers_blend                            DONE (i965, nv50, r600, llvmpipe, softpipe)
+  GL_ARB_draw_indirect                                 DONE (i965, r600, llvmpipe, softpipe)
+  GL_ARB_gpu_shader5                                   DONE (i965)
   - 'precise' qualifier                                DONE
-  - Dynamically uniform sampler array indices          DONE (r600)
+  - Dynamically uniform sampler array indices          DONE (r600, softpipe)
   - Dynamically uniform UBO array indices              DONE (r600)
   - Implicit signed -> unsigned conversions            DONE
   - Fused multiply-add                                 DONE ()
-  - Packing/bitfield/conversion functions              DONE (r600, radeonsi)
-  - Enhanced textureGather                             DONE (r600, radeonsi)
-  - Geometry shader instancing                         DONE (r600)
+  - Packing/bitfield/conversion functions              DONE (r600, softpipe)
+  - Enhanced textureGather                             DONE (r600, softpipe)
+  - Geometry shader instancing                         DONE (r600, llvmpipe, softpipe)
   - Geometry shader multiple streams                   DONE ()
-  - Enhanced per-sample shading                        DONE (r600, radeonsi)
+  - Enhanced per-sample shading                        DONE (r600)
   - Interpolation functions                            DONE (r600)
   - New overload resolution rules                      DONE
-  GL_ARB_gpu_shader_fp64                               DONE (nvc0, softpipe)
-  GL_ARB_sample_shading                                DONE (i965, nv50, nvc0, r600, radeonsi)
-  GL_ARB_shader_subroutine                             started (Dave)
-  GL_ARB_tessellation_shader                           started (Chris, Ilia)
-  GL_ARB_texture_buffer_object_rgb32                   DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
-  GL_ARB_texture_cube_map_array                        DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
-  GL_ARB_texture_gather                                DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
-  GL_ARB_texture_query_lod                             DONE (i965, nv50, nvc0, r600, radeonsi)
-  GL_ARB_transform_feedback2                           DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
-  GL_ARB_transform_feedback3                           DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
+  GL_ARB_gpu_shader_fp64                               DONE (llvmpipe, softpipe)
+  GL_ARB_sample_shading                                DONE (i965, nv50, r600)
+  GL_ARB_shader_subroutine                             DONE (i965, nv50, r600, llvmpipe, softpipe)
+  GL_ARB_tessellation_shader                           DONE ()
+  GL_ARB_texture_buffer_object_rgb32                   DONE (i965, r600, llvmpipe, softpipe)
+  GL_ARB_texture_cube_map_array                        DONE (i965, nv50, r600, llvmpipe, softpipe)
+  GL_ARB_texture_gather                                DONE (i965, nv50, r600, llvmpipe, softpipe)
+  GL_ARB_texture_query_lod                             DONE (i965, nv50, r600)
+  GL_ARB_transform_feedback2                           DONE (i965, nv50, r600, llvmpipe, softpipe)
+  GL_ARB_transform_feedback3                           DONE (i965, nv50, r600, llvmpipe, softpipe)
 
 
-GL 4.1, GLSL 4.10:
+GL 4.1, GLSL 4.10 --- all DONE: nvc0, radeonsi
 
-  GL_ARB_ES2_compatibility                             DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
+  GL_ARB_ES2_compatibility                             DONE (i965, nv50, r600, llvmpipe, softpipe)
   GL_ARB_get_program_binary                            DONE (0 binary formats)
   GL_ARB_separate_shader_objects                       DONE (all drivers)
-  GL_ARB_shader_precision                              started (Micah)
-  GL_ARB_vertex_attrib_64bit                           DONE (nvc0, softpipe)
-  GL_ARB_viewport_array                                DONE (i965, nv50, nvc0, r600, llvmpipe)
+  GL_ARB_shader_precision                              DONE (all drivers that support GLSL 4.10)
+  GL_ARB_vertex_attrib_64bit                           DONE (llvmpipe, softpipe)
+  GL_ARB_viewport_array                                DONE (i965, nv50, r600, llvmpipe)
 
 
 GL 4.2, GLSL 4.20:
@@ -139,7 +139,7 @@
   GL_ARB_texture_storage                               DONE (all drivers)
   GL_ARB_transform_feedback_instanced                  DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_base_instance                                 DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
-  GL_ARB_shader_image_load_store                       in progress (curro)
+  GL_ARB_shader_image_load_store                       DONE (i965)
   GL_ARB_conservative_depth                            DONE (all drivers that support GLSL 1.30)
   GL_ARB_shading_language_420pack                      DONE (all drivers that support GLSL 1.30)
   GL_ARB_shading_language_packing                      DONE (all drivers)
@@ -153,18 +153,18 @@
   GL_ARB_ES3_compatibility                             DONE (all drivers that support GLSL 3.30)
   GL_ARB_clear_buffer_object                           DONE (all drivers)
   GL_ARB_compute_shader                                in progress (jljusten)
-  GL_ARB_copy_image                                    DONE (i965)
+  GL_ARB_copy_image                                    DONE (i965) (gallium - in progress, VMware)
   GL_KHR_debug                                         DONE (all drivers)
   GL_ARB_explicit_uniform_location                     DONE (all drivers that support GLSL)
-  GL_ARB_fragment_layer_viewport                       DONE (nv50, nvc0, r600, llvmpipe)
-  GL_ARB_framebuffer_no_attachments                    not started
+  GL_ARB_fragment_layer_viewport                       DONE (nv50, nvc0, r600, radeonsi, llvmpipe)
+  GL_ARB_framebuffer_no_attachments                    DONE (i965)
   GL_ARB_internalformat_query2                         not started
   GL_ARB_invalidate_subdata                            DONE (all drivers)
   GL_ARB_multi_draw_indirect                           DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_program_interface_query                       DONE (all drivers)
   GL_ARB_robust_buffer_access_behavior                 not started
-  GL_ARB_shader_image_size                             in progress (Martin Peres)
-  GL_ARB_shader_storage_buffer_object                  not started
+  GL_ARB_shader_image_size                             DONE (i965)
+  GL_ARB_shader_storage_buffer_object                  in progress (Iago Toral, Samuel Iglesias)
   GL_ARB_stencil_texturing                             DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_texture_buffer_range                          DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
   GL_ARB_texture_query_levels                          DONE (all drivers that support GLSL 1.30)
@@ -177,7 +177,7 @@
 
   GL_MAX_VERTEX_ATTRIB_STRIDE                          DONE (all drivers)
   GL_ARB_buffer_storage                                DONE (i965, nv50, nvc0, r600, radeonsi)
-  GL_ARB_clear_texture                                 DONE (i965)
+  GL_ARB_clear_texture                                 DONE (i965) (gallium - in progress, VMware)
   GL_ARB_enhanced_layouts                              not started
   GL_ARB_multi_bind                                    DONE (all drivers)
   GL_ARB_query_buffer_object                           not started
@@ -189,20 +189,11 @@
 
   GL_ARB_ES3_1_compatibility                           not started
   GL_ARB_clip_control                                  DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
-  GL_ARB_conditional_render_inverted                   DONE (i965, nv50, nvc0, llvmpipe, softpipe)
-  GL_ARB_cull_distance                                 not started
-  GL_ARB_derivative_control                            DONE (i965, nv50, nvc0, r600)
+  GL_ARB_conditional_render_inverted                   DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
+  GL_ARB_cull_distance                                 in progress (Tobias)
+  GL_ARB_derivative_control                            DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_ARB_direct_state_access                           DONE (all drivers)
-  - Transform Feedback object                          DONE
-  - Buffer object                                      DONE
-  - Framebuffer object                                 DONE
-  - Renderbuffer object                                DONE
-  - Texture object                                     DONE
-  - Vertex array object                                DONE
-  - Sampler object                                     DONE
-  - Program Pipeline object                            DONE
-  - Query object                                       DONE (will require changes when GL_ARB_query_buffer_object lands)
-  GL_ARB_get_texture_sub_image                         started (Brian Paul)
+  GL_ARB_get_texture_sub_image                         DONE (all drivers)
   GL_ARB_shader_texture_image_samples                  not started
   GL_ARB_texture_barrier                               DONE (nv50, nvc0, r600, radeonsi)
   GL_KHR_context_flush_control                         DONE (all - but needs GLX/EXT extension to be useful)
@@ -216,12 +207,12 @@
   GL_ARB_compute_shader                                in progress (jljusten)
   GL_ARB_draw_indirect                                 DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_explicit_uniform_location                     DONE (all drivers that support GLSL)
-  GL_ARB_framebuffer_no_attachments                    not started
+  GL_ARB_framebuffer_no_attachments                    DONE (i965)
   GL_ARB_program_interface_query                       DONE (all drivers)
   GL_ARB_shader_atomic_counters                        DONE (i965)
-  GL_ARB_shader_image_load_store                       in progress (curro)
-  GL_ARB_shader_image_size                             in progress (Martin Peres)
-  GL_ARB_shader_storage_buffer_object                  not started
+  GL_ARB_shader_image_load_store                       DONE (i965)
+  GL_ARB_shader_image_size                             DONE (i965)
+  GL_ARB_shader_storage_buffer_object                  in progress (Iago Toral, Samuel Iglesias)
   GL_ARB_shading_language_packing                      DONE (all drivers)
   GL_ARB_separate_shader_objects                       DONE (all drivers)
   GL_ARB_stencil_texturing                             DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/docs/index.html mesa-11.0.0~git20150916+11.0.c4bae579/docs/index.html
--- mesa-10.6.5~git20150829+10.6.fa342251/docs/index.html	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/docs/index.html	2015-09-16 14:36:08.000000000 +0000
@@ -16,6 +16,71 @@
 
 <h1>News</h1>
 
+<h2>August 22 2015</h2>
+<p>
+<a href="relnotes/10.6.5.html">Mesa 10.6.5</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>August 11 2015</h2>
+<p>
+<a href="relnotes/10.6.4.html">Mesa 10.6.4</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>July 26 2015</h2>
+<p>
+<a href="relnotes/10.6.3.html">Mesa 10.6.3</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>July 11 2015</h2>
+<p>
+<a href="relnotes/10.6.2.html">Mesa 10.6.2</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>July 04, 2015</h2>
+<p>
+<a href="relnotes/10.5.9.html">Mesa 10.5.9</a> is released.
+This is a bug-fix release.
+<br>
+NOTE: It is anticipated that 10.5.9 will be the final release in the 10.5
+series. Users of 10.5 are encouraged to migrate to the 10.6 series in order
+to obtain future fixes.
+</p>
+
+<h2>June 29, 2015</h2>
+<p>
+<a href="relnotes/10.6.1.html">Mesa 10.6.1</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>June 20, 2015</h2>
+<p>
+<a href="relnotes/10.5.8.html">Mesa 10.5.8</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>June 14, 2015</h2>
+<p>
+<a href="relnotes/10.6.0.html">Mesa 10.6.0</a> is released.  This is a new
+development release.  See the release notes for more information about
+the release.
+</p>
+
+<h2>June 07, 2015</h2>
+<p>
+<a href="relnotes/10.5.7.html">Mesa 10.5.7</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>May 23, 2015</h2>
+<p>
+<a href="relnotes/10.5.6.html">Mesa 10.5.6</a> is released.
+This is a bug-fix release.
+</p>
+
 <h2>May 11, 2015</h2>
 <p>
 <a href="relnotes/10.5.5.html">Mesa 10.5.5</a> is released.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/docs/relnotes/10.5.6.html mesa-11.0.0~git20150916+11.0.c4bae579/docs/relnotes/10.5.6.html
--- mesa-10.6.5~git20150829+10.6.fa342251/docs/relnotes/10.5.6.html	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/docs/relnotes/10.5.6.html	2015-09-16 14:36:08.000000000 +0000
@@ -0,0 +1,147 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.5.6 Release Notes / May 23, 2015</h1>
+
+<p>
+Mesa 10.5.6 is a bug fix release which fixes bugs found since the 10.5.5 release.
+</p>
+<p>
+Mesa 10.5.6 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+89ff9cb08d0f6e3f34154864c3071253057cd21020759457c8ae27e0f70985d3  mesa-10.5.6.tar.gz
+66017853bde5f7a6647db3eede30512a091a3491daa1708e0ad8027c328ba595  mesa-10.5.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86792">Bug 86792</a> - [NVC0] Portal 2 Crashes in Wine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90147">Bug 90147</a> - swrast: build error undeclared _SC_PHYS_PAGES on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90350">Bug 90350</a> - [G96] Portal's portal are incorrectly rendered</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90363">Bug 90363</a> - [nv50] HW state is not reset correctly when using a new GL context</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (1):</p>
+<ul>
+  <li>radeonsi: add new bonaire pci id</li>
+</ul>
+
+<p>Axel Davy (2):</p>
+<ul>
+  <li>egl/wayland: properly destroy wayland objects</li>
+  <li>glx/dri3: Add additional check for gpu offloading case</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: Add sha256 sums for the 10.5.5 release</li>
+  <li>egl/main: fix EGL_KHR_get_all_proc_addresses</li>
+  <li>targets/osmesa: drop the -module tag from LDFLAGS</li>
+  <li>Update version to 10.5.6</li>
+</ul>
+
+<p>Francisco Jerez (4):</p>
+<ul>
+  <li>clover: Refactor event::trigger and ::abort to prevent deadlock and reentrancy issues.</li>
+  <li>clover: Wrap event::_status in a method to prevent unlocked access.</li>
+  <li>clover: Implement locking of the wait_count, _chain and _status members of event.</li>
+  <li>i965: Fix PBO cache coherency issue after _mesa_meta_pbo_GetTexSubImage().</li>
+</ul>
+
+<p>Fredrik Höglund (2):</p>
+<ul>
+  <li>main: Require that the texture exists in framebuffer_texture</li>
+  <li>mesa: Generate GL_INVALID_VALUE in framebuffer_texture when layer &lt; 0</li>
+</ul>
+
+<p>Ilia Mirkin (7):</p>
+<ul>
+  <li>nv50/ir: only propagate saturate up if some actual folding took place</li>
+  <li>nv50: keep track of PGRAPH state in nv50_screen</li>
+  <li>nvc0: keep track of PGRAPH state in nvc0_screen</li>
+  <li>nvc0: reset the instanced elements state when doing blit using 3d engine</li>
+  <li>nv50/ir: only enable mul saturate on G200+</li>
+  <li>st/mesa: make sure to create a "clean" bool when doing i2b</li>
+  <li>nvc0: switch mechanism for shader eviction to be a while loop</li>
+</ul>
+
+<p>Jeremy Huddleston Sequoia (2):</p>
+<ul>
+  <li>swrast: Build fix for darwin</li>
+  <li>darwin: Fix install name of libOSMesa</li>
+</ul>
+
+<p>Laura Ekstrand (2):</p>
+<ul>
+  <li>main: Fix an error generated by FramebufferTexture</li>
+  <li>main: Complete error conditions for glInvalidate*Framebuffer.</li>
+</ul>
+
+<p>Marta Lofstedt (1):</p>
+<ul>
+  <li>main: glGetIntegeri_v fails for GL_VERTEX_BINDING_STRIDE</li>
+</ul>
+
+<p>Rob Clark (2):</p>
+<ul>
+  <li>freedreno: enable a306</li>
+  <li>freedreno: fix bug in tile/slot calculation</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>draw: (trivial) fix out-of-bounds vector initialization</li>
+</ul>
+
+<p>Tim Rowley (1):</p>
+<ul>
+  <li>mesa: fix shininess check for ffvertex_prog v2</li>
+</ul>
+
+<p>Tom Stellard (2):</p>
+<ul>
+  <li>clover: Add a mutex to guard queue::queued_events</li>
+  <li>clover: Fix a bug with multi-threaded events v2</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/docs/relnotes/10.5.7.html mesa-11.0.0~git20150916+11.0.c4bae579/docs/relnotes/10.5.7.html
--- mesa-10.6.5~git20150829+10.6.fa342251/docs/relnotes/10.5.7.html	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/docs/relnotes/10.5.7.html	2015-09-16 14:36:08.000000000 +0000
@@ -0,0 +1,103 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.5.7 Release Notes / June 07, 2015</h1>
+
+<p>
+Mesa 10.5.7 is a bug fix release which fixes bugs found since the 10.5.6 release.
+</p>
+<p>
+Mesa 10.5.7 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+8f865ce497435fdf25d4e35f3b5551b2bcd5f9bc6570561183be82af20d18b82  mesa-10.5.7.tar.gz
+04d06890cd69af8089d6ca76f40e46dcf9cacfe4a9788b32be620574d4638818  mesa-10.5.7.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89131">Bug 89131</a> - [Bisected] Graphical corruption in Weston,  shows old framebuffer pieces</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Ben Widawsky (1):</p>
+<ul>
+  <li>i965: Emit 3DSTATE_MULTISAMPLE before WM_HZ_OP (gen8+)</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: Add sha256sums for the 10.5.6 release</li>
+  <li>get-pick-list.sh: Require explicit "10.5" for nominating stable patches</li>
+  <li>cherry-ignore: add clover build fix not applicable for 10.5</li>
+  <li>Update version to 10.5.7</li>
+</ul>
+
+<p>Ilia Mirkin (18):</p>
+<ul>
+  <li>nvc0/ir: set ftz when sources are floats, not just destinations</li>
+  <li>nv50/ir: guess that the constant offset is the starting slot of array</li>
+  <li>nvc0/ir: LOAD's can't be used for shader inputs</li>
+  <li>nvc0: a geometry shader can have up to 1024 vertices output</li>
+  <li>nv50/ir: avoid messing up arg1 of PFETCH</li>
+  <li>nv30: don't leak fragprog consts</li>
+  <li>nv30: avoid leaking render state and draw shaders</li>
+  <li>nv30: fix clip plane uploads and enable changes</li>
+  <li>nv30/draw: avoid leaving stale pointers in draw state</li>
+  <li>nv30/draw: draw expects constbuf size in bytes, not vec4 units</li>
+  <li>st/mesa: don't leak glsl_to_tgsi object on link failure</li>
+  <li>glsl: avoid leaking linked gl_shader when there's a late linker error</li>
+  <li>nv30/draw: fix indexed draws with swtnl path and a resource index buffer</li>
+  <li>nv30/draw: only use the DMA1 object (GART) if the bo is not in VRAM</li>
+  <li>nv30/draw: allocate vertex buffers in gart</li>
+  <li>nv30/draw: switch varying hookup logic to know about texcoords</li>
+  <li>nv30: falling back to draw path for edgeflag does no good</li>
+  <li>nv30: avoid doing extra work on clear and hitting unexpected states</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/fs: Fix implied_mrf_writes for scratch writes</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>st/dri: fix postprocessing crash when there's no depth buffer</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/docs/relnotes/10.5.8.html mesa-11.0.0~git20150916+11.0.c4bae579/docs/relnotes/10.5.8.html
--- mesa-10.6.5~git20150829+10.6.fa342251/docs/relnotes/10.5.8.html	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/docs/relnotes/10.5.8.html	2015-09-16 14:36:08.000000000 +0000
@@ -0,0 +1,112 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.5.8 Release Notes / June 20, 2015</h1>
+
+<p>
+Mesa 10.5.8 is a bug fix release which fixes bugs found since the 10.5.7 release.
+</p>
+<p>
+Mesa 10.5.8 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+611ddcfa3c1bf13f7e6ccac785c8749c3b74c9a78452bac70f8372cf6b209aa0  mesa-10.5.8.tar.gz
+2866b855c5299a4aed066338c77ff6467c389b2c30ada7647be8758663da2b54  mesa-10.5.8.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90310">Bug 90310</a> - Fails to build gallium_dri.so at linking stage with clang because of multiple redefinitions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90347">Bug 90347</a> - [NVE0+] Failure to insert texbar under some circumstances (causing bad colors in Terasology)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90520">Bug 90520</a> - Register spilling clobbers registers used elsewhere in the shader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90905">Bug 90905</a> - mesa: Finish subdir-objects transition</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Ben Widawsky (1):</p>
+<ul>
+  <li>i965: Disable compaction for EOT send messages</li>
+</ul>
+
+<p>Boyan Ding (1):</p>
+<ul>
+  <li>egl/x11: Set version of swrastLoader to 2</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: Add sha256sums for the 10.5.7 release</li>
+  <li>Update version to 10.5.8</li>
+</ul>
+
+<p>Erik Faye-Lund (1):</p>
+<ul>
+  <li>mesa: build xmlconfig to a separate static library</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>i965: Don't compact instructions with unmapped bits.</li>
+</ul>
+
+<p>Ilia Mirkin (3):</p>
+<ul>
+  <li>nvc0/ir: fix collection of first uses for texture barrier insertion</li>
+  <li>nv50,nvc0: clamp uniform size to 64k</li>
+  <li>nvc0/ir: can't have a join on a load with an indirect source</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/fs: Don't let the EOT send message interfere with the MRF hack</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>egl: fix setting context flags</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>draw: (trivial) fix NULL pointer dereference</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/docs/relnotes/10.5.9.html mesa-11.0.0~git20150916+11.0.c4bae579/docs/relnotes/10.5.9.html
--- mesa-10.6.5~git20150829+10.6.fa342251/docs/relnotes/10.5.9.html	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/docs/relnotes/10.5.9.html	2015-09-16 14:36:08.000000000 +0000
@@ -0,0 +1,140 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.5.9 Release Notes / July 04, 2015</h1>
+
+<p>
+Mesa 10.5.9 is a bug fix release which fixes bugs found since the 10.5.8 release.
+</p>
+<p>
+Mesa 10.5.9 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+0c081b59572ee9732e7438d34adc3817fe8cc8d4b58abc0e71fd4b4c904945cb  mesa-10.5.9.tar.gz
+71c69f31d3dbc35cfa79950e58a01d27030378d8c7ef1259a0b31d4d0487f4ec  mesa-10.5.9.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84225">Bug 84225</a> - Allow constant-index-expression sampler array indexing with GLSL-ES &lt; 300</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=88999">Bug 88999</a> - [SKL] Compiz crashes after opening unity dash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89118">Bug 89118</a> - [SKL Bisected]many Ogles3conform cases core dumped</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90537">Bug 90537</a> - radeonsi bo/va conflict on RADEON_GEM_VA (rscreen-&gt;ws-&gt;buffer_from_handle returns NULL)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90839">Bug 90839</a> - [10.5.5/10.6 regression, bisected] PBO glDrawPixels no longer using blit fastpath</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90873">Bug 90873</a> - Kernel hang, TearFree On, Mate desktop environment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91056">Bug 91056</a> - The Bard's Tale (2005, native)  has rendering issues</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91117">Bug 91117</a> - Nimbus (running in wine) has rendering issues, objects are semi-transparent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91124">Bug 91124</a> - Civilization V (in Wine) has rendering issues: text missing, menu bar corrupted</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Ben Widawsky (2):</p>
+<ul>
+  <li>i965/gen9: Implement Push Constant Buffer workaround</li>
+  <li>i965/skl: Use 1 register for uniform pull constant payload</li>
+</ul>
+
+<p>Boyan Ding (1):</p>
+<ul>
+  <li>egl/x11: Remove duplicate call to dri2_x11_add_configs_for_visuals</li>
+</ul>
+
+<p>Chris Wilson (3):</p>
+<ul>
+  <li>i965: Fix HW blitter pitch limits</li>
+  <li>i915: Blit RGBX&lt;-&gt;RGBA drawpixels</li>
+  <li>i965: Export format comparison for blitting between miptrees</li>
+</ul>
+
+<p>Emil Velikov (6):</p>
+<ul>
+  <li>docs: Add sha256sums for the 10.5.8 release</li>
+  <li>configure: warn about shared_glapi &amp; xlib-glx only when both are set</li>
+  <li>configure: error out when building backend-less libEGL</li>
+  <li>configure: error out when building libEGL without shared-glapi</li>
+  <li>gbm: do not (over)link against libglapi.so</li>
+  <li>Update version to 10.5.9</li>
+</ul>
+
+<p>Frank Henigman (1):</p>
+<ul>
+  <li>gbm: dlopen libglapi so gbm_create_device works</li>
+</ul>
+
+<p>Ilia Mirkin (8):</p>
+<ul>
+  <li>glsl: add version checks to conditionals for builtin variable enablement</li>
+  <li>mesa: add GL_PROGRAM_PIPELINE support in KHR_debug calls</li>
+  <li>glsl: binding point is a texture unit, which is a combined space</li>
+  <li>nvc0: always put all tfb bufs into bufctx</li>
+  <li>nv50,nvc0: make sure to pushbuf_refn before putting bo into pushbuf_data</li>
+  <li>nv50/ir: propagate modifier to right arg when const-folding mad</li>
+  <li>nv50/ir: fix emission of address reg in 3rd source</li>
+  <li>nv50/ir: copy joinAt when splitting both before and after</li>
+</ul>
+
+<p>Mario Kleiner (2):</p>
+<ul>
+  <li>nouveau: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads.</li>
+  <li>winsys/radeon: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads.</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>winsys/radeon: Unmap GPU VM address range when destroying BO</li>
+</ul>
+
+<p>Tapani Pälli (6):</p>
+<ul>
+  <li>glsl: Allow dynamic sampler array indexing with GLSL ES &lt; 3.00</li>
+  <li>mesa/glsl: new compiler option EmitNoIndirectSampler</li>
+  <li>i915: use EmitNoIndirectSampler</li>
+  <li>mesa/st: use EmitNoIndirectSampler if !ARB_gpu_shader5</li>
+  <li>i965: use EmitNoIndirectSampler for gen &lt; 7</li>
+  <li>glsl: validate sampler array indexing for 'constant-index-expression'</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/docs/relnotes/11.0.0.html mesa-11.0.0~git20150916+11.0.c4bae579/docs/relnotes/11.0.0.html
--- mesa-10.6.5~git20150829+10.6.fa342251/docs/relnotes/11.0.0.html	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/docs/relnotes/11.0.0.html	2015-09-16 14:36:08.000000000 +0000
@@ -0,0 +1,259 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.0 Release Notes / September 12, 2015</h1>
+
+<p>
+Mesa 11.0.0 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 11.0.1.
+</p>
+<p>
+Mesa 11.0.0 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+7d7e4ddffa3b162506efa01e2cc41e329caa4995336b92e5cc21f2e1fb36c1b3  mesa-11.0.0.tar.gz
+e095a3eb2eca9dfde7efca8946527c8ae20a0cc938a8c78debc7f158ad44af32  mesa-11.0.0.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+
+<p>
+Note: some of the new features are only available with certain drivers.
+</p>
+
+<ul>
+<li>New hardware support for AMD GCN 1.2 GPUs: Tonga, Iceland, Carrizo, Fiji</li>
+<li>OpenGL 4.1 on radeonsi, nvc0</li>
+<li>OpenGL ES 3.0 on freedreno (a3xx, a4xx)
+<li>GL_AMD_vertex_shader_viewport_index on radeonsi</li>
+<li>GL_ARB_conditional_render_inverted on r600, radeonsi</li>
+<li>GL_ARB_depth_buffer_float on a4xx</li>
+<li>GL_ARB_derivative_control on radeonsi</li>
+<li>GL_ARB_draw_buffers, GL_ARB_draw_buffers_blend on a4xx</li>
+<li>GL_ARB_fragment_layer_viewport on radeonsi</li>
+<li>GL_ARB_framebuffer_no_attachments on i965</li>
+<li>GL_ARB_get_texture_sub_image for all drivers</li>
+<li>GL_ARB_gpu_shader5 on radeonsi</li>
+<li>GL_ARB_gpu_shader_fp64 on llvmpipe, radeonsi</li>
+<li>GL_ARB_shader_image_load_store on i965</li>
+<li>GL_ARB_shader_precision on radeonsi, nvc0</li>
+<li>GL_ARB_shader_image_size on i965</li>
+<li>GL_ARB_shader_stencil_export on llvmpipe</li>
+<li>GL_ARB_shader_subroutine on core profile all drivers</li>
+<li>GL_ARB_tessellation_shader on nvc0, radeonsi</li>
+<li>GL_ARB_transform_feedback2, GL_ARB_transform_feedback_instanced, GL_EXT_transform_feedback on a3xx, a4xx</li>
+<li>GL_ARB_vertex_attrib_64bit on llvmpipe, radeonsi</li>
+<li>GL_ARB_viewport_array on radeonsi</li>
+<li>GL_EXT_depth_bounds_test on radeonsi, nv30, nv50, nvc0</li>
+<li>GL_EXT_texture_compression_s3tc on freedreno (a3xx)</li>
+<li>GL_NV_read_depth (GLES) on all drivers</li>
+<li>GL_NV_read_depth_stencil (GLES) on all drivers</li>
+<li>GL_NV_read_stencil (GLES) on all drivers</li>
+<li>GL_OES_texture_float on all r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe</li>
+<li>GL_OES_texture_half_float on all r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe</li>
+<li>GL_OES_texture_float_linear on all r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe</li>
+<li>GL_OES_texture_half_float_linear on all r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe</li>
+<li>GL_EXT_draw_buffers2 on a4xx</li>
+<li>GLX_ARB_create_context_robustness on r600, radeonsi</li>
+<li>EGL_EXT_create_context_robustness on r600, radeonsi</li>
+<li>EGL_KHR_gl_colorspace on r600, radeonsi, nv50, nvc0</li>
+<li>EGL_KHR_gl_texture_3D_image on r600, radeonsi, nv50, nvc0</li>
+<li>EGL 1.5 on r600, radeonsi, nv50, nvc0</li>
+</ul>
+
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=51658">Bug 51658</a> - r200 (&amp; possibly radeon) DRI fixes for gnome shell on Mesa 8.0.3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65525">Bug 65525</a> - [llvmpipe] lp_scene.h:210:lp_scene_alloc: Assertion `size &lt;= (64 * 1024)' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66346">Bug 66346</a> - shader_query.cpp:49: error: invalid conversion from 'void*' to 'GLuint'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73512">Bug 73512</a> - [clover] mesa.icd. should contain full path</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73528">Bug 73528</a> - Deferred lighting in Second Life causes system hiccups and screen flickering</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74329">Bug 74329</a> - Please expose OES_texture_float and OES_texture_half_float on the ES3 context</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80500">Bug 80500</a> - Flickering shadows in unreleased title trace</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82186">Bug 82186</a> - [r600g] BARTS GPU lockup with minecraft shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84225">Bug 84225</a> - Allow constant-index-expression sampler array indexing with GLSL-ES &lt; 300</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=85252">Bug 85252</a> - Segfault in compiler while processing ternary operator with void arguments</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89131">Bug 89131</a> - [Bisected] Graphical corruption in Weston,  shows old framebuffer pieces</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90000">Bug 90000</a> - [i965 Bisected NIR] Piglit/gglean_fragprog1-z-write_test fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90073">Bug 90073</a> - Leaks in xcb_dri3_open_reply_fds() and get_render_node_from_id_path_tag</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90249">Bug 90249</a> - Fails to build egl_dri2 on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90310">Bug 90310</a> - Fails to build gallium_dri.so at linking stage with clang because of multiple redefinitions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90347">Bug 90347</a> - [NVE0+] Failure to insert texbar under some circumstances (causing bad colors in Terasology)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90466">Bug 90466</a> - arm: linker error ndefined reference to `nir_metadata_preserve'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90520">Bug 90520</a> - Register spilling clobbers registers used elsewhere in the shader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90537">Bug 90537</a> - radeonsi bo/va conflict on RADEON_GEM_VA (rscreen-&gt;ws-&gt;buffer_from_handle returns NULL)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90547">Bug 90547</a> - [BDW/BSW/SKL Bisected]Piglit/glean&#64;vertprog1-rsq_test_2_(reciprocal_square_root_of_negative_value) fais</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90580">Bug 90580</a> - [HSW bisected] integer multiplication bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90600">Bug 90600</a> - IOError: [Errno 2] No such file or directory: 'gl_API.xml'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90629">Bug 90629</a> - [i965] SIMD16 dual_source_blend assertion `src[i].file != GRF || src[i].width == dst.width' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90691">Bug 90691</a> - [BSW]Piglit/spec/nv_conditional_render/dlist fails intermittently</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90728">Bug 90728</a> - dvd playback with vlc and vdpau causes segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is &gt; 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90749">Bug 90749</a> - [BDW Bisected]dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_max.primitives.lines_wide fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90797">Bug 90797</a> - [ALL bisected] Mesa change cause performance case manhattan fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90817">Bug 90817</a> - swrast fails to load with certain remote X servers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90830">Bug 90830</a> - [bsw bisected regression] GPU hang for spec.arb_gpu_shader5.execution.sampler_array_indexing.vs-nonzero-base</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90839">Bug 90839</a> - [10.5.5/10.6 regression, bisected] PBO glDrawPixels no longer using blit fastpath</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90873">Bug 90873</a> - Kernel hang, TearFree On, Mate desktop environment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90887">Bug 90887</a> - PhiMovesPass in register allocator broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90895">Bug 90895</a> - [IVB/HSW/BDW/BSW Bisected] GLB2.7 Egypt, GfxBench3.0 T-Rex &amp; ALU and many SynMark cases performance reduced by 10-23%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: &quot;Found invalid pixel values&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90903">Bug 90903</a> - egl_dri2.c:dri2_load fails to load libglapi on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90904">Bug 90904</a> - OSX: EXC_BAD_ACCESS when using translate_sse + gallium + softpipe/llvmpipe</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90905">Bug 90905</a> - mesa: Finish subdir-objects transition</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - &quot;high fidelity&quot;: Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91022">Bug 91022</a> - [g45 g965 bisected] assertions generated from textureGrad cube samplers fix</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91047">Bug 91047</a> - [SNB Bisected] Messed up Fog in Super Smash Bros. Melee in Dolphin</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91056">Bug 91056</a> - The Bard's Tale (2005, native)  has rendering issues</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91077">Bug 91077</a> - dri2_glx.c:1186: undefined reference to `loader_open_device'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91099">Bug 91099</a> - [llvmpipe] piglit glsl-max-varyings &gt;max_varying_components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91101">Bug 91101</a> - [softpipe] piglit glsl-1.50&#64;execution&#64;geometry&#64;max-input-components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91117">Bug 91117</a> - Nimbus (running in wine) has rendering issues, objects are semi-transparent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91124">Bug 91124</a> - Civilization V (in Wine) has rendering issues: text missing, menu bar corrupted</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91173">Bug 91173</a> - Oddworld: Stranger's Wrath HD: disfigured models in wrong colors</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91193">Bug 91193</a> - [290x] Dota2 reborn ingame rendering breaks with git-af4b9c7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91222">Bug 91222</a> - lp_test_format regression on CentOS 7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91226">Bug 91226</a> - Crash in glLinkProgram (NEW)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91231">Bug 91231</a> - [NV92] Psychonauts (native) segfaults on start when DRI3 enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91290">Bug 91290</a> - SIGSEGV glcpp/glcpp-parse.y:1077</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91337">Bug 91337</a> - OSMesaGetProcAdress(&quot;OSMesaPixelStore&quot;) returns nil</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91418">Bug 91418</a> - Visual Studio 2015 vsnprintf build error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91425">Bug 91425</a> - [regression, bisected] Piglit spec/ext_packed_float/ getteximage-invalid-format-for-packed-type fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91441">Bug 91441</a> - make check DispatchSanity_test.GL30 regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91444">Bug 91444</a> - regression bisected radeonsi: don't change pipe_resource in resource_copy_region</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91461">Bug 91461</a> - gl_TessLevel* writes have no effect for all but the last TCS invocation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91513">Bug 91513</a> - [IVB/HSW/BDW/SKL Bisected] Lightsmark performance reduced by 7%-10%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91544">Bug 91544</a> - [i965, regression, bisected] regression of several tests in 93977d3a151675946c03e</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91551">Bug 91551</a> - DXTn compressed normal maps produce severe artifacts on all NV5x and NVDx chipsets</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91570">Bug 91570</a> - Upgrading mesa to 10.6 causes segfault in OpenGL applications with GeForce4 MX 440 / AGP 8X</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91591">Bug 91591</a> - rounding.h:102:2: error: #error &quot;Unsupported or undefined LONG_BIT&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91610">Bug 91610</a> - [BSW] GPU hang for spec.shaders.point-vertex-id gl_instanceid divisor</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91847">Bug 91847</a> - glGenerateTextureMipmap not working (no errors) unless glActiveTexture(GL_TEXTURE1) is called before</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91857">Bug 91857</a> - Mesa 10.6.3 linker is slow</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91881">Bug 91881</a> - regression: GPU lockups since mesa-11.0.0_rc1 on RV620 (r600) driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91890">Bug 91890</a> - [nve7] witcher2: blurry image &amp; DATA_ERRORs (class 0xa097 mthd 0x2380/0x238c)</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<li>Removed the EGL loader from the Linux SCons build.</li>
+
+</div>
+</body>
+</html>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/docs/relnotes.html mesa-11.0.0~git20150916+11.0.c4bae579/docs/relnotes.html
--- mesa-10.6.5~git20150829+10.6.fa342251/docs/relnotes.html	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/docs/relnotes.html	2015-09-16 14:36:08.000000000 +0000
@@ -21,6 +21,16 @@
 </p>
 
 <ul>
+<li><a href="relnotes/10.6.5.html">10.6.5 release notes</a>
+<li><a href="relnotes/10.6.4.html">10.6.4 release notes</a>
+<li><a href="relnotes/10.6.3.html">10.6.3 release notes</a>
+<li><a href="relnotes/10.6.2.html">10.6.2 release notes</a>
+<li><a href="relnotes/10.5.9.html">10.5.9 release notes</a>
+<li><a href="relnotes/10.6.1.html">10.6.1 release notes</a>
+<li><a href="relnotes/10.5.8.html">10.5.8 release notes</a>
+<li><a href="relnotes/10.6.0.html">10.6.0 release notes</a>
+<li><a href="relnotes/10.5.7.html">10.5.7 release notes</a>
+<li><a href="relnotes/10.5.6.html">10.5.6 release notes</a>
 <li><a href="relnotes/10.5.5.html">10.5.5 release notes</a>
 <li><a href="relnotes/10.5.4.html">10.5.4 release notes</a>
 <li><a href="relnotes/10.5.3.html">10.5.3 release notes</a>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/doxygen/.gitignore mesa-11.0.0~git20150916+11.0.c4bae579/doxygen/.gitignore
--- mesa-10.6.5~git20150829+10.6.fa342251/doxygen/.gitignore	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/doxygen/.gitignore	2015-09-16 14:36:08.000000000 +0000
@@ -1,3 +1,4 @@
+*.db
 *.tag
 *.tmp
 agpgart
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/doxygen/Makefile mesa-11.0.0~git20150916+11.0.c4bae579/doxygen/Makefile
--- mesa-10.6.5~git20150829+10.6.fa342251/doxygen/Makefile	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/doxygen/Makefile	2015-09-16 14:36:08.000000000 +0000
@@ -33,3 +33,4 @@
 clean:
 	-rm -rf $(FULL:.doxy=) $(SUBSET:.doxy=)
 	-rm -rf *.tag
+	-rm -rf *.db
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/include/c99_math.h mesa-11.0.0~git20150916+11.0.c4bae579/include/c99_math.h
--- mesa-10.6.5~git20150829+10.6.fa342251/include/c99_math.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/include/c99_math.h	2015-09-16 14:36:08.000000000 +0000
@@ -140,6 +140,18 @@
    return rounded;
 }
 
+static inline float
+exp2f(float f)
+{
+   return powf(2.0f, f);
+}
+
+static inline double
+exp2(double d)
+{
+   return pow(2.0, d);
+}
+
 #endif /* C99 */
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/include/EGL/eglext.h mesa-11.0.0~git20150916+11.0.c4bae579/include/EGL/eglext.h
--- mesa-10.6.5~git20150829+10.6.fa342251/include/EGL/eglext.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/include/EGL/eglext.h	2015-09-16 14:36:08.000000000 +0000
@@ -6,7 +6,7 @@
 #endif
 
 /*
-** Copyright (c) 2013 The Khronos Group Inc.
+** Copyright (c) 2013-2014 The Khronos Group Inc.
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a
 ** copy of this software and/or associated documentation files (the
@@ -33,12 +33,12 @@
 ** used to make the header, and the header can be found at
 **   http://www.opengl.org/registry/
 **
-** Khronos $Revision: 24567 $ on $Date: 2013-12-18 09:50:17 -0800 (Wed, 18 Dec 2013) $
+** Khronos $Revision$ on $Date$
 */
 
 #include <EGL/eglplatform.h>
 
-#define EGL_EGLEXT_VERSION 20131218
+#define EGL_EGLEXT_VERSION 20150508
 
 /* Generated C header for:
  * API: egl
@@ -94,12 +94,28 @@
 #define EGL_OPENGL_ES3_BIT_KHR            0x00000040
 #endif /* EGL_KHR_create_context */
 
+#ifndef EGL_KHR_create_context_no_error
+#define EGL_KHR_create_context_no_error 1
+#define EGL_CONTEXT_OPENGL_NO_ERROR_KHR   0x31B3
+#endif /* EGL_KHR_create_context_no_error */
+
 #ifndef EGL_KHR_fence_sync
 #define EGL_KHR_fence_sync 1
+typedef khronos_utime_nanoseconds_t EGLTimeKHR;
 #ifdef KHRONOS_SUPPORT_INT64
 #define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR 0x30F0
 #define EGL_SYNC_CONDITION_KHR            0x30F8
 #define EGL_SYNC_FENCE_KHR                0x30F9
+typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNCKHRPROC) (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync);
+typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSyncKHR (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncKHR (EGLDisplay dpy, EGLSyncKHR sync);
+EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value);
+#endif
 #endif /* KHRONOS_SUPPORT_INT64 */
 #endif /* EGL_KHR_fence_sync */
 
@@ -207,9 +223,38 @@
 #endif
 #endif /* EGL_KHR_lock_surface3 */
 
+#ifndef EGL_KHR_partial_update
+#define EGL_KHR_partial_update 1
+#define EGL_BUFFER_AGE_KHR                0x313D
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSETDAMAGEREGIONKHRPROC) (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSetDamageRegionKHR (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects);
+#endif
+#endif /* EGL_KHR_partial_update */
+
+#ifndef EGL_KHR_platform_android
+#define EGL_KHR_platform_android 1
+#define EGL_PLATFORM_ANDROID_KHR          0x3141
+#endif /* EGL_KHR_platform_android */
+
+#ifndef EGL_KHR_platform_gbm
+#define EGL_KHR_platform_gbm 1
+#define EGL_PLATFORM_GBM_KHR              0x31D7
+#endif /* EGL_KHR_platform_gbm */
+
+#ifndef EGL_KHR_platform_wayland
+#define EGL_KHR_platform_wayland 1
+#define EGL_PLATFORM_WAYLAND_KHR          0x31D8
+#endif /* EGL_KHR_platform_wayland */
+
+#ifndef EGL_KHR_platform_x11
+#define EGL_KHR_platform_x11 1
+#define EGL_PLATFORM_X11_KHR              0x31D5
+#define EGL_PLATFORM_X11_SCREEN_KHR       0x31D6
+#endif /* EGL_KHR_platform_x11 */
+
 #ifndef EGL_KHR_reusable_sync
 #define EGL_KHR_reusable_sync 1
-typedef khronos_utime_nanoseconds_t EGLTimeKHR;
 #ifdef KHRONOS_SUPPORT_INT64
 #define EGL_SYNC_STATUS_KHR               0x30F1
 #define EGL_SIGNALED_KHR                  0x30F2
@@ -221,17 +266,9 @@
 #define EGL_SYNC_FLUSH_COMMANDS_BIT_KHR   0x0001
 #define EGL_FOREVER_KHR                   0xFFFFFFFFFFFFFFFFull
 #define EGL_NO_SYNC_KHR                   ((EGLSyncKHR)0)
-typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNCKHRPROC) (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list);
-typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync);
-typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout);
 typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode);
-typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value);
 #ifdef EGL_EGLEXT_PROTOTYPES
-EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSyncKHR (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list);
-EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncKHR (EGLDisplay dpy, EGLSyncKHR sync);
-EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout);
 EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode);
-EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value);
 #endif
 #endif /* KHRONOS_SUPPORT_INT64 */
 #endif /* EGL_KHR_reusable_sync */
@@ -333,6 +370,14 @@
 #define EGL_KHR_surfaceless_context 1
 #endif /* EGL_KHR_surfaceless_context */
 
+#ifndef EGL_KHR_swap_buffers_with_damage
+#define EGL_KHR_swap_buffers_with_damage 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSWITHDAMAGEKHRPROC) (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersWithDamageKHR (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects);
+#endif
+#endif /* EGL_KHR_swap_buffers_with_damage */
+
 #ifndef EGL_KHR_vg_parent_image
 #define EGL_KHR_vg_parent_image 1
 #define EGL_VG_PARENT_IMAGE_KHR           0x30BA
@@ -389,6 +434,12 @@
 #define EGL_D3D_TEXTURE_2D_SHARE_HANDLE_ANGLE 0x3200
 #endif /* EGL_ANGLE_d3d_share_handle_client_buffer */
 
+#ifndef EGL_ANGLE_device_d3d
+#define EGL_ANGLE_device_d3d 1
+#define EGL_D3D9_DEVICE_ANGLE             0x33A0
+#define EGL_D3D11_DEVICE_ANGLE            0x33A1
+#endif /* EGL_ANGLE_device_d3d */
+
 #ifndef EGL_ANGLE_query_surface_pointer
 #define EGL_ANGLE_query_surface_pointer 1
 typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACEPOINTERANGLEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value);
@@ -401,6 +452,11 @@
 #define EGL_ANGLE_surface_d3d_texture_2d_share_handle 1
 #endif /* EGL_ANGLE_surface_d3d_texture_2d_share_handle */
 
+#ifndef EGL_ANGLE_window_fixed_size
+#define EGL_ANGLE_window_fixed_size 1
+#define EGL_FIXED_SIZE_ANGLE              0x3201
+#endif /* EGL_ANGLE_window_fixed_size */
+
 #ifndef EGL_ARM_pixmap_multisample_discard
 #define EGL_ARM_pixmap_multisample_discard 1
 #define EGL_DISCARD_SAMPLES_ARM           0x3286
@@ -423,6 +479,42 @@
 #define EGL_LOSE_CONTEXT_ON_RESET_EXT     0x31BF
 #endif /* EGL_EXT_create_context_robustness */
 
+#ifndef EGL_EXT_device_base
+#define EGL_EXT_device_base 1
+typedef void *EGLDeviceEXT;
+#define EGL_NO_DEVICE_EXT                 ((EGLDeviceEXT)(0))
+#define EGL_BAD_DEVICE_EXT                0x322B
+#define EGL_DEVICE_EXT                    0x322C
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDEVICEATTRIBEXTPROC) (EGLDeviceEXT device, EGLint attribute, EGLAttrib *value);
+typedef const char *(EGLAPIENTRYP PFNEGLQUERYDEVICESTRINGEXTPROC) (EGLDeviceEXT device, EGLint name);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDEVICESEXTPROC) (EGLint max_devices, EGLDeviceEXT *devices, EGLint *num_devices);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDISPLAYATTRIBEXTPROC) (EGLDisplay dpy, EGLint attribute, EGLAttrib *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDeviceAttribEXT (EGLDeviceEXT device, EGLint attribute, EGLAttrib *value);
+EGLAPI const char *EGLAPIENTRY eglQueryDeviceStringEXT (EGLDeviceEXT device, EGLint name);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDevicesEXT (EGLint max_devices, EGLDeviceEXT *devices, EGLint *num_devices);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDisplayAttribEXT (EGLDisplay dpy, EGLint attribute, EGLAttrib *value);
+#endif
+#endif /* EGL_EXT_device_base */
+
+#ifndef EGL_EXT_device_drm
+#define EGL_EXT_device_drm 1
+#define EGL_DRM_DEVICE_FILE_EXT           0x3233
+#endif /* EGL_EXT_device_drm */
+
+#ifndef EGL_EXT_device_enumeration
+#define EGL_EXT_device_enumeration 1
+#endif /* EGL_EXT_device_enumeration */
+
+#ifndef EGL_EXT_device_openwf
+#define EGL_EXT_device_openwf 1
+#define EGL_OPENWF_DEVICE_ID_EXT          0x3237
+#endif /* EGL_EXT_device_openwf */
+
+#ifndef EGL_EXT_device_query
+#define EGL_EXT_device_query 1
+#endif /* EGL_EXT_device_query */
+
 #ifndef EGL_EXT_image_dma_buf_import
 #define EGL_EXT_image_dma_buf_import 1
 #define EGL_LINUX_DMA_BUF_EXT             0x3270
@@ -454,6 +546,48 @@
 #define EGL_MULTIVIEW_VIEW_COUNT_EXT      0x3134
 #endif /* EGL_EXT_multiview_window */
 
+#ifndef EGL_EXT_output_base
+#define EGL_EXT_output_base 1
+typedef void *EGLOutputLayerEXT;
+typedef void *EGLOutputPortEXT;
+#define EGL_NO_OUTPUT_LAYER_EXT           ((EGLOutputLayerEXT)0)
+#define EGL_NO_OUTPUT_PORT_EXT            ((EGLOutputPortEXT)0)
+#define EGL_BAD_OUTPUT_LAYER_EXT          0x322D
+#define EGL_BAD_OUTPUT_PORT_EXT           0x322E
+#define EGL_SWAP_INTERVAL_EXT             0x322F
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETOUTPUTLAYERSEXTPROC) (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputLayerEXT *layers, EGLint max_layers, EGLint *num_layers);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETOUTPUTPORTSEXTPROC) (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputPortEXT *ports, EGLint max_ports, EGLint *num_ports);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLOUTPUTLAYERATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYOUTPUTLAYERATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib *value);
+typedef const char *(EGLAPIENTRYP PFNEGLQUERYOUTPUTLAYERSTRINGEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint name);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLOUTPUTPORTATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYOUTPUTPORTATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib *value);
+typedef const char *(EGLAPIENTRYP PFNEGLQUERYOUTPUTPORTSTRINGEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint name);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglGetOutputLayersEXT (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputLayerEXT *layers, EGLint max_layers, EGLint *num_layers);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetOutputPortsEXT (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputPortEXT *ports, EGLint max_ports, EGLint *num_ports);
+EGLAPI EGLBoolean EGLAPIENTRY eglOutputLayerAttribEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib value);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryOutputLayerAttribEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib *value);
+EGLAPI const char *EGLAPIENTRY eglQueryOutputLayerStringEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint name);
+EGLAPI EGLBoolean EGLAPIENTRY eglOutputPortAttribEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib value);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryOutputPortAttribEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib *value);
+EGLAPI const char *EGLAPIENTRY eglQueryOutputPortStringEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint name);
+#endif
+#endif /* EGL_EXT_output_base */
+
+#ifndef EGL_EXT_output_drm
+#define EGL_EXT_output_drm 1
+#define EGL_DRM_CRTC_EXT                  0x3234
+#define EGL_DRM_PLANE_EXT                 0x3235
+#define EGL_DRM_CONNECTOR_EXT             0x3236
+#endif /* EGL_EXT_output_drm */
+
+#ifndef EGL_EXT_output_openwf
+#define EGL_EXT_output_openwf 1
+#define EGL_OPENWF_PIPELINE_ID_EXT        0x3238
+#define EGL_OPENWF_PORT_ID_EXT            0x3239
+#endif /* EGL_EXT_output_openwf */
+
 #ifndef EGL_EXT_platform_base
 #define EGL_EXT_platform_base 1
 typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETPLATFORMDISPLAYEXTPROC) (EGLenum platform, void *native_display, const EGLint *attrib_list);
@@ -466,6 +600,11 @@
 #endif
 #endif /* EGL_EXT_platform_base */
 
+#ifndef EGL_EXT_platform_device
+#define EGL_EXT_platform_device 1
+#define EGL_PLATFORM_DEVICE_EXT           0x313F
+#endif /* EGL_EXT_platform_device */
+
 #ifndef EGL_EXT_platform_wayland
 #define EGL_EXT_platform_wayland 1
 #define EGL_PLATFORM_WAYLAND_EXT          0x31D8
@@ -477,6 +616,19 @@
 #define EGL_PLATFORM_X11_SCREEN_EXT       0x31D6
 #endif /* EGL_EXT_platform_x11 */
 
+#ifndef EGL_EXT_protected_surface
+#define EGL_EXT_protected_surface 1
+#define EGL_PROTECTED_CONTENT_EXT         0x32C0
+#endif /* EGL_EXT_protected_surface */
+
+#ifndef EGL_EXT_stream_consumer_egloutput
+#define EGL_EXT_stream_consumer_egloutput 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMEROUTPUTEXTPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLOutputLayerEXT layer);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerOutputEXT (EGLDisplay dpy, EGLStreamKHR stream, EGLOutputLayerEXT layer);
+#endif
+#endif /* EGL_EXT_stream_consumer_egloutput */
+
 #ifndef EGL_EXT_swap_buffers_with_damage
 #define EGL_EXT_swap_buffers_with_damage 1
 typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSWITHDAMAGEEXTPROC) (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects);
@@ -485,6 +637,35 @@
 #endif
 #endif /* EGL_EXT_swap_buffers_with_damage */
 
+#ifndef EGL_EXT_yuv_surface
+#define EGL_EXT_yuv_surface 1
+#define EGL_YUV_ORDER_EXT                 0x3301
+#define EGL_YUV_NUMBER_OF_PLANES_EXT      0x3311
+#define EGL_YUV_SUBSAMPLE_EXT             0x3312
+#define EGL_YUV_DEPTH_RANGE_EXT           0x3317
+#define EGL_YUV_CSC_STANDARD_EXT          0x330A
+#define EGL_YUV_PLANE_BPP_EXT             0x331A
+#define EGL_YUV_BUFFER_EXT                0x3300
+#define EGL_YUV_ORDER_YUV_EXT             0x3302
+#define EGL_YUV_ORDER_YVU_EXT             0x3303
+#define EGL_YUV_ORDER_YUYV_EXT            0x3304
+#define EGL_YUV_ORDER_UYVY_EXT            0x3305
+#define EGL_YUV_ORDER_YVYU_EXT            0x3306
+#define EGL_YUV_ORDER_VYUY_EXT            0x3307
+#define EGL_YUV_ORDER_AYUV_EXT            0x3308
+#define EGL_YUV_SUBSAMPLE_4_2_0_EXT       0x3313
+#define EGL_YUV_SUBSAMPLE_4_2_2_EXT       0x3314
+#define EGL_YUV_SUBSAMPLE_4_4_4_EXT       0x3315
+#define EGL_YUV_DEPTH_RANGE_LIMITED_EXT   0x3318
+#define EGL_YUV_DEPTH_RANGE_FULL_EXT      0x3319
+#define EGL_YUV_CSC_STANDARD_601_EXT      0x330B
+#define EGL_YUV_CSC_STANDARD_709_EXT      0x330C
+#define EGL_YUV_CSC_STANDARD_2020_EXT     0x330D
+#define EGL_YUV_PLANE_BPP_0_EXT           0x331B
+#define EGL_YUV_PLANE_BPP_8_EXT           0x331C
+#define EGL_YUV_PLANE_BPP_10_EXT          0x331D
+#endif /* EGL_EXT_yuv_surface */
+
 #ifndef EGL_HI_clientpixmap
 #define EGL_HI_clientpixmap 1
 struct EGLClientPixmapHI {
@@ -533,11 +714,42 @@
 #endif
 #endif /* EGL_MESA_drm_image */
 
+#ifndef EGL_MESA_image_dma_buf_export
+#define EGL_MESA_image_dma_buf_export 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDMABUFIMAGEQUERYMESAPROC) (EGLDisplay dpy, EGLImageKHR image, int *fourcc, int *num_planes, EGLuint64KHR *modifiers);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDMABUFIMAGEMESAPROC) (EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageQueryMESA (EGLDisplay dpy, EGLImageKHR image, int *fourcc, int *num_planes, EGLuint64KHR *modifiers);
+EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageMESA (EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets);
+#endif
+#endif /* EGL_MESA_image_dma_buf_export */
+
 #ifndef EGL_MESA_platform_gbm
 #define EGL_MESA_platform_gbm 1
 #define EGL_PLATFORM_GBM_MESA             0x31D7
 #endif /* EGL_MESA_platform_gbm */
 
+#ifndef EGL_NOK_swap_region
+#define EGL_NOK_swap_region 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGIONNOKPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersRegionNOK (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects);
+#endif
+#endif /* EGL_NOK_swap_region */
+
+#ifndef EGL_NOK_swap_region2
+#define EGL_NOK_swap_region2 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGION2NOKPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersRegion2NOK (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects);
+#endif
+#endif /* EGL_NOK_swap_region2 */
+
+#ifndef EGL_NOK_texture_from_pixmap
+#define EGL_NOK_texture_from_pixmap 1
+#define EGL_Y_INVERTED_NOK                0x307F
+#endif /* EGL_NOK_texture_from_pixmap */
+
 #ifndef EGL_NV_3dvision_surface
 #define EGL_NV_3dvision_surface 1
 #define EGL_AUTO_STEREO_NV                0x3136
@@ -556,6 +768,13 @@
 #define EGL_COVERAGE_SAMPLE_RESOLVE_NONE_NV 0x3133
 #endif /* EGL_NV_coverage_sample_resolve */
 
+#ifndef EGL_NV_cuda_event
+#define EGL_NV_cuda_event 1
+#define EGL_CUDA_EVENT_HANDLE_NV          0x323B
+#define EGL_SYNC_CUDA_EVENT_NV            0x323C
+#define EGL_SYNC_CUDA_EVENT_COMPLETE_NV   0x323D
+#endif /* EGL_NV_cuda_event */
+
 #ifndef EGL_NV_depth_nonlinear
 #define EGL_NV_depth_nonlinear 1
 #define EGL_DEPTH_ENCODING_NV             0x30E2
@@ -563,6 +782,11 @@
 #define EGL_DEPTH_ENCODING_NONLINEAR_NV   0x30E3
 #endif /* EGL_NV_depth_nonlinear */
 
+#ifndef EGL_NV_device_cuda
+#define EGL_NV_device_cuda 1
+#define EGL_CUDA_DEVICE_NV                0x323A
+#endif /* EGL_NV_device_cuda */
+
 #ifndef EGL_NV_native_query
 #define EGL_NV_native_query 1
 typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEDISPLAYNVPROC) (EGLDisplay dpy, EGLNativeDisplayType *display_id);
@@ -645,6 +869,16 @@
 #endif /* KHRONOS_SUPPORT_INT64 */
 #endif /* EGL_NV_system_time */
 
+#ifndef EGL_TIZEN_image_native_buffer
+#define EGL_TIZEN_image_native_buffer 1
+#define EGL_NATIVE_BUFFER_TIZEN           0x32A0
+#endif /* EGL_TIZEN_image_native_buffer */
+
+#ifndef EGL_TIZEN_image_native_surface
+#define EGL_TIZEN_image_native_surface 1
+#define EGL_NATIVE_SURFACE_TIZEN          0x32A1
+#endif /* EGL_TIZEN_image_native_surface */
+
 #include <EGL/eglmesaext.h>
 #include <EGL/eglextchromium.h>
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/include/EGL/egl.h mesa-11.0.0~git20150916+11.0.c4bae579/include/EGL/egl.h
--- mesa-10.6.5~git20150829+10.6.fa342251/include/EGL/egl.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/include/EGL/egl.h	2015-09-16 14:36:08.000000000 +0000
@@ -1,11 +1,12 @@
-/* -*- mode: c; tab-width: 8; -*- */
-/* vi: set sw=4 ts=8: */
-/* Reference version of egl.h for EGL 1.4.
- * $Revision: 9356 $ on $Date: 2009-10-21 02:52:25 -0700 (Wed, 21 Oct 2009) $
- */
+#ifndef __egl_h_
+#define __egl_h_ 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 /*
-** Copyright (c) 2007-2009 The Khronos Group Inc.
+** Copyright (c) 2013-2014 The Khronos Group Inc.
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a
 ** copy of this software and/or associated documentation files (the
@@ -26,304 +27,277 @@
 ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 ** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 */
+/*
+** This header is generated from the Khronos OpenGL / OpenGL ES XML
+** API Registry. The current version of the Registry, generator scripts
+** used to make the header, and the header can be found at
+**   http://www.opengl.org/registry/
+**
+** Khronos $Revision: 31039 $ on $Date: 2015-05-04 17:01:57 -0700 (Mon, 04 May 2015) $
+*/
 
-#ifndef __egl_h_
-#define __egl_h_
-
-/* All platform-dependent types and macro boilerplate (such as EGLAPI
- * and EGLAPIENTRY) should go in eglplatform.h.
- */
 #include <EGL/eglplatform.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+/* Generated on date 20150504 */
+
+/* Generated C header for:
+ * API: egl
+ * Versions considered: .*
+ * Versions emitted: .*
+ * Default extensions included: None
+ * Additional extensions included: _nomatch_^
+ * Extensions removed: _nomatch_^
+ */
 
-/* EGL Types */
-/* EGLint is defined in eglplatform.h */
+#ifndef EGL_VERSION_1_0
+#define EGL_VERSION_1_0 1
 typedef unsigned int EGLBoolean;
-typedef unsigned int EGLenum;
-typedef void *EGLConfig;
-typedef void *EGLContext;
 typedef void *EGLDisplay;
+#include <KHR/khrplatform.h>
+#include <EGL/eglplatform.h>
+typedef void *EGLConfig;
 typedef void *EGLSurface;
-typedef void *EGLClientBuffer;
-
-/* EGL Versioning */
-#define EGL_VERSION_1_0			1
-#define EGL_VERSION_1_1			1
-#define EGL_VERSION_1_2			1
-#define EGL_VERSION_1_3			1
-#define EGL_VERSION_1_4			1
-
-/* EGL Enumerants. Bitmasks and other exceptional cases aside, most
- * enums are assigned unique values starting at 0x3000.
- */
-
-/* EGL aliases */
-#define EGL_FALSE			0
-#define EGL_TRUE			1
-
-/* Out-of-band handle values */
-#define EGL_DEFAULT_DISPLAY		((EGLNativeDisplayType)0)
-#define EGL_NO_CONTEXT			((EGLContext)0)
-#define EGL_NO_DISPLAY			((EGLDisplay)0)
-#define EGL_NO_SURFACE			((EGLSurface)0)
-
-/* Out-of-band attribute value */
-#define EGL_DONT_CARE			((EGLint)-1)
-
-/* Errors / GetError return values */
-#define EGL_SUCCESS			0x3000
-#define EGL_NOT_INITIALIZED		0x3001
-#define EGL_BAD_ACCESS			0x3002
-#define EGL_BAD_ALLOC			0x3003
-#define EGL_BAD_ATTRIBUTE		0x3004
-#define EGL_BAD_CONFIG			0x3005
-#define EGL_BAD_CONTEXT			0x3006
-#define EGL_BAD_CURRENT_SURFACE		0x3007
-#define EGL_BAD_DISPLAY			0x3008
-#define EGL_BAD_MATCH			0x3009
-#define EGL_BAD_NATIVE_PIXMAP		0x300A
-#define EGL_BAD_NATIVE_WINDOW		0x300B
-#define EGL_BAD_PARAMETER		0x300C
-#define EGL_BAD_SURFACE			0x300D
-#define EGL_CONTEXT_LOST		0x300E	/* EGL 1.1 - IMG_power_management */
-
-/* Reserved 0x300F-0x301F for additional errors */
-
-/* Config attributes */
-#define EGL_BUFFER_SIZE			0x3020
-#define EGL_ALPHA_SIZE			0x3021
-#define EGL_BLUE_SIZE			0x3022
-#define EGL_GREEN_SIZE			0x3023
-#define EGL_RED_SIZE			0x3024
-#define EGL_DEPTH_SIZE			0x3025
-#define EGL_STENCIL_SIZE		0x3026
-#define EGL_CONFIG_CAVEAT		0x3027
-#define EGL_CONFIG_ID			0x3028
-#define EGL_LEVEL			0x3029
-#define EGL_MAX_PBUFFER_HEIGHT		0x302A
-#define EGL_MAX_PBUFFER_PIXELS		0x302B
-#define EGL_MAX_PBUFFER_WIDTH		0x302C
-#define EGL_NATIVE_RENDERABLE		0x302D
-#define EGL_NATIVE_VISUAL_ID		0x302E
-#define EGL_NATIVE_VISUAL_TYPE		0x302F
-#define EGL_SAMPLES			0x3031
-#define EGL_SAMPLE_BUFFERS		0x3032
-#define EGL_SURFACE_TYPE		0x3033
-#define EGL_TRANSPARENT_TYPE		0x3034
-#define EGL_TRANSPARENT_BLUE_VALUE	0x3035
-#define EGL_TRANSPARENT_GREEN_VALUE	0x3036
-#define EGL_TRANSPARENT_RED_VALUE	0x3037
-#define EGL_NONE			0x3038	/* Attrib list terminator */
-#define EGL_BIND_TO_TEXTURE_RGB		0x3039
-#define EGL_BIND_TO_TEXTURE_RGBA	0x303A
-#define EGL_MIN_SWAP_INTERVAL		0x303B
-#define EGL_MAX_SWAP_INTERVAL		0x303C
-#define EGL_LUMINANCE_SIZE		0x303D
-#define EGL_ALPHA_MASK_SIZE		0x303E
-#define EGL_COLOR_BUFFER_TYPE		0x303F
-#define EGL_RENDERABLE_TYPE		0x3040
-#define EGL_MATCH_NATIVE_PIXMAP		0x3041	/* Pseudo-attribute (not queryable) */
-#define EGL_CONFORMANT			0x3042
-
-/* Reserved 0x3041-0x304F for additional config attributes */
-
-/* Config attribute values */
-#define EGL_SLOW_CONFIG			0x3050	/* EGL_CONFIG_CAVEAT value */
-#define EGL_NON_CONFORMANT_CONFIG	0x3051	/* EGL_CONFIG_CAVEAT value */
-#define EGL_TRANSPARENT_RGB		0x3052	/* EGL_TRANSPARENT_TYPE value */
-#define EGL_RGB_BUFFER			0x308E	/* EGL_COLOR_BUFFER_TYPE value */
-#define EGL_LUMINANCE_BUFFER		0x308F	/* EGL_COLOR_BUFFER_TYPE value */
-
-/* More config attribute values, for EGL_TEXTURE_FORMAT */
-#define EGL_NO_TEXTURE			0x305C
-#define EGL_TEXTURE_RGB			0x305D
-#define EGL_TEXTURE_RGBA		0x305E
-#define EGL_TEXTURE_2D			0x305F
-
-/* Config attribute mask bits */
-#define EGL_PBUFFER_BIT			0x0001	/* EGL_SURFACE_TYPE mask bits */
-#define EGL_PIXMAP_BIT			0x0002	/* EGL_SURFACE_TYPE mask bits */
-#define EGL_WINDOW_BIT			0x0004	/* EGL_SURFACE_TYPE mask bits */
-#define EGL_VG_COLORSPACE_LINEAR_BIT	0x0020	/* EGL_SURFACE_TYPE mask bits */
-#define EGL_VG_ALPHA_FORMAT_PRE_BIT	0x0040	/* EGL_SURFACE_TYPE mask bits */
-#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT 0x0200	/* EGL_SURFACE_TYPE mask bits */
-#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT 0x0400	/* EGL_SURFACE_TYPE mask bits */
-
-#define EGL_OPENGL_ES_BIT		0x0001	/* EGL_RENDERABLE_TYPE mask bits */
-#define EGL_OPENVG_BIT			0x0002	/* EGL_RENDERABLE_TYPE mask bits */
-#define EGL_OPENGL_ES2_BIT		0x0004	/* EGL_RENDERABLE_TYPE mask bits */
-#define EGL_OPENGL_BIT			0x0008	/* EGL_RENDERABLE_TYPE mask bits */
-
-/* QueryString targets */
-#define EGL_VENDOR			0x3053
-#define EGL_VERSION			0x3054
-#define EGL_EXTENSIONS			0x3055
-#define EGL_CLIENT_APIS			0x308D
-
-/* QuerySurface / SurfaceAttrib / CreatePbufferSurface targets */
-#define EGL_HEIGHT			0x3056
-#define EGL_WIDTH			0x3057
-#define EGL_LARGEST_PBUFFER		0x3058
-#define EGL_TEXTURE_FORMAT		0x3080
-#define EGL_TEXTURE_TARGET		0x3081
-#define EGL_MIPMAP_TEXTURE		0x3082
-#define EGL_MIPMAP_LEVEL		0x3083
-#define EGL_RENDER_BUFFER		0x3086
-#define EGL_VG_COLORSPACE		0x3087
-#define EGL_VG_ALPHA_FORMAT		0x3088
-#define EGL_HORIZONTAL_RESOLUTION	0x3090
-#define EGL_VERTICAL_RESOLUTION		0x3091
-#define EGL_PIXEL_ASPECT_RATIO		0x3092
-#define EGL_SWAP_BEHAVIOR		0x3093
-#define EGL_MULTISAMPLE_RESOLVE		0x3099
-
-/* EGL_RENDER_BUFFER values / BindTexImage / ReleaseTexImage buffer targets */
-#define EGL_BACK_BUFFER			0x3084
-#define EGL_SINGLE_BUFFER		0x3085
-
-/* OpenVG color spaces */
-#define EGL_VG_COLORSPACE_sRGB		0x3089	/* EGL_VG_COLORSPACE value */
-#define EGL_VG_COLORSPACE_LINEAR	0x308A	/* EGL_VG_COLORSPACE value */
-
-/* OpenVG alpha formats */
-#define EGL_VG_ALPHA_FORMAT_NONPRE	0x308B	/* EGL_ALPHA_FORMAT value */
-#define EGL_VG_ALPHA_FORMAT_PRE		0x308C	/* EGL_ALPHA_FORMAT value */
-
-/* Constant scale factor by which fractional display resolutions &
- * aspect ratio are scaled when queried as integer values.
- */
-#define EGL_DISPLAY_SCALING		10000
-
-/* Unknown display resolution/aspect ratio */
-#define EGL_UNKNOWN			((EGLint)-1)
-
-/* Back buffer swap behaviors */
-#define EGL_BUFFER_PRESERVED		0x3094	/* EGL_SWAP_BEHAVIOR value */
-#define EGL_BUFFER_DESTROYED		0x3095	/* EGL_SWAP_BEHAVIOR value */
-
-/* CreatePbufferFromClientBuffer buffer types */
-#define EGL_OPENVG_IMAGE		0x3096
-
-/* QueryContext targets */
-#define EGL_CONTEXT_CLIENT_TYPE		0x3097
-
-/* CreateContext attributes */
-#define EGL_CONTEXT_CLIENT_VERSION	0x3098
-
-/* Multisample resolution behaviors */
-#define EGL_MULTISAMPLE_RESOLVE_DEFAULT 0x309A	/* EGL_MULTISAMPLE_RESOLVE value */
-#define EGL_MULTISAMPLE_RESOLVE_BOX	0x309B	/* EGL_MULTISAMPLE_RESOLVE value */
-
-/* BindAPI/QueryAPI targets */
-#define EGL_OPENGL_ES_API		0x30A0
-#define EGL_OPENVG_API			0x30A1
-#define EGL_OPENGL_API			0x30A2
-
-/* GetCurrentSurface targets */
-#define EGL_DRAW			0x3059
-#define EGL_READ			0x305A
-
-/* WaitNative engines */
-#define EGL_CORE_NATIVE_ENGINE		0x305B
-
-/* EGL 1.2 tokens renamed for consistency in EGL 1.3 */
-#define EGL_COLORSPACE			EGL_VG_COLORSPACE
-#define EGL_ALPHA_FORMAT		EGL_VG_ALPHA_FORMAT
-#define EGL_COLORSPACE_sRGB		EGL_VG_COLORSPACE_sRGB
-#define EGL_COLORSPACE_LINEAR		EGL_VG_COLORSPACE_LINEAR
-#define EGL_ALPHA_FORMAT_NONPRE		EGL_VG_ALPHA_FORMAT_NONPRE
-#define EGL_ALPHA_FORMAT_PRE		EGL_VG_ALPHA_FORMAT_PRE
-
-/* EGL extensions must request enum blocks from the Khronos
- * API Registrar, who maintains the enumerant registry. Submit
- * a bug in Khronos Bugzilla against task "Registry".
- */
-
-
-
-/* EGL Functions */
-
-EGLAPI EGLint EGLAPIENTRY eglGetError(void);
-
-EGLAPI EGLDisplay EGLAPIENTRY eglGetDisplay(EGLNativeDisplayType display_id);
-EGLAPI EGLBoolean EGLAPIENTRY eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor);
-EGLAPI EGLBoolean EGLAPIENTRY eglTerminate(EGLDisplay dpy);
-
-EGLAPI const char * EGLAPIENTRY eglQueryString(EGLDisplay dpy, EGLint name);
-
-EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigs(EGLDisplay dpy, EGLConfig *configs,
-			 EGLint config_size, EGLint *num_config);
-EGLAPI EGLBoolean EGLAPIENTRY eglChooseConfig(EGLDisplay dpy, const EGLint *attrib_list,
-			   EGLConfig *configs, EGLint config_size,
-			   EGLint *num_config);
-EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigAttrib(EGLDisplay dpy, EGLConfig config,
-			      EGLint attribute, EGLint *value);
-
-EGLAPI EGLSurface EGLAPIENTRY eglCreateWindowSurface(EGLDisplay dpy, EGLConfig config,
-				  EGLNativeWindowType win,
-				  const EGLint *attrib_list);
-EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config,
-				   const EGLint *attrib_list);
-EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurface(EGLDisplay dpy, EGLConfig config,
-				  EGLNativePixmapType pixmap,
-				  const EGLint *attrib_list);
-EGLAPI EGLBoolean EGLAPIENTRY eglDestroySurface(EGLDisplay dpy, EGLSurface surface);
-EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface(EGLDisplay dpy, EGLSurface surface,
-			   EGLint attribute, EGLint *value);
-
-EGLAPI EGLBoolean EGLAPIENTRY eglBindAPI(EGLenum api);
-EGLAPI EGLenum EGLAPIENTRY eglQueryAPI(void);
-
-EGLAPI EGLBoolean EGLAPIENTRY eglWaitClient(void);
-
-EGLAPI EGLBoolean EGLAPIENTRY eglReleaseThread(void);
-
-EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferFromClientBuffer(
-	      EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer,
-	      EGLConfig config, const EGLint *attrib_list);
-
-EGLAPI EGLBoolean EGLAPIENTRY eglSurfaceAttrib(EGLDisplay dpy, EGLSurface surface,
-			    EGLint attribute, EGLint value);
-EGLAPI EGLBoolean EGLAPIENTRY eglBindTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer);
-EGLAPI EGLBoolean EGLAPIENTRY eglReleaseTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer);
-
-
-EGLAPI EGLBoolean EGLAPIENTRY eglSwapInterval(EGLDisplay dpy, EGLint interval);
-
-
-EGLAPI EGLContext EGLAPIENTRY eglCreateContext(EGLDisplay dpy, EGLConfig config,
-			    EGLContext share_context,
-			    const EGLint *attrib_list);
-EGLAPI EGLBoolean EGLAPIENTRY eglDestroyContext(EGLDisplay dpy, EGLContext ctx);
-EGLAPI EGLBoolean EGLAPIENTRY eglMakeCurrent(EGLDisplay dpy, EGLSurface draw,
-			  EGLSurface read, EGLContext ctx);
-
-EGLAPI EGLContext EGLAPIENTRY eglGetCurrentContext(void);
-EGLAPI EGLSurface EGLAPIENTRY eglGetCurrentSurface(EGLint readdraw);
-EGLAPI EGLDisplay EGLAPIENTRY eglGetCurrentDisplay(void);
-EGLAPI EGLBoolean EGLAPIENTRY eglQueryContext(EGLDisplay dpy, EGLContext ctx,
-			   EGLint attribute, EGLint *value);
-
-EGLAPI EGLBoolean EGLAPIENTRY eglWaitGL(void);
-EGLAPI EGLBoolean EGLAPIENTRY eglWaitNative(EGLint engine);
-EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffers(EGLDisplay dpy, EGLSurface surface);
-EGLAPI EGLBoolean EGLAPIENTRY eglCopyBuffers(EGLDisplay dpy, EGLSurface surface,
-			  EGLNativePixmapType target);
-
-/* This is a generic function pointer type, whose name indicates it must
- * be cast to the proper type *and calling convention* before use.
- */
+typedef void *EGLContext;
 typedef void (*__eglMustCastToProperFunctionPointerType)(void);
+#define EGL_ALPHA_SIZE                    0x3021
+#define EGL_BAD_ACCESS                    0x3002
+#define EGL_BAD_ALLOC                     0x3003
+#define EGL_BAD_ATTRIBUTE                 0x3004
+#define EGL_BAD_CONFIG                    0x3005
+#define EGL_BAD_CONTEXT                   0x3006
+#define EGL_BAD_CURRENT_SURFACE           0x3007
+#define EGL_BAD_DISPLAY                   0x3008
+#define EGL_BAD_MATCH                     0x3009
+#define EGL_BAD_NATIVE_PIXMAP             0x300A
+#define EGL_BAD_NATIVE_WINDOW             0x300B
+#define EGL_BAD_PARAMETER                 0x300C
+#define EGL_BAD_SURFACE                   0x300D
+#define EGL_BLUE_SIZE                     0x3022
+#define EGL_BUFFER_SIZE                   0x3020
+#define EGL_CONFIG_CAVEAT                 0x3027
+#define EGL_CONFIG_ID                     0x3028
+#define EGL_CORE_NATIVE_ENGINE            0x305B
+#define EGL_DEPTH_SIZE                    0x3025
+#define EGL_DONT_CARE                     ((EGLint)-1)
+#define EGL_DRAW                          0x3059
+#define EGL_EXTENSIONS                    0x3055
+#define EGL_FALSE                         0
+#define EGL_GREEN_SIZE                    0x3023
+#define EGL_HEIGHT                        0x3056
+#define EGL_LARGEST_PBUFFER               0x3058
+#define EGL_LEVEL                         0x3029
+#define EGL_MAX_PBUFFER_HEIGHT            0x302A
+#define EGL_MAX_PBUFFER_PIXELS            0x302B
+#define EGL_MAX_PBUFFER_WIDTH             0x302C
+#define EGL_NATIVE_RENDERABLE             0x302D
+#define EGL_NATIVE_VISUAL_ID              0x302E
+#define EGL_NATIVE_VISUAL_TYPE            0x302F
+#define EGL_NONE                          0x3038
+#define EGL_NON_CONFORMANT_CONFIG         0x3051
+#define EGL_NOT_INITIALIZED               0x3001
+#define EGL_NO_CONTEXT                    ((EGLContext)0)
+#define EGL_NO_DISPLAY                    ((EGLDisplay)0)
+#define EGL_NO_SURFACE                    ((EGLSurface)0)
+#define EGL_PBUFFER_BIT                   0x0001
+#define EGL_PIXMAP_BIT                    0x0002
+#define EGL_READ                          0x305A
+#define EGL_RED_SIZE                      0x3024
+#define EGL_SAMPLES                       0x3031
+#define EGL_SAMPLE_BUFFERS                0x3032
+#define EGL_SLOW_CONFIG                   0x3050
+#define EGL_STENCIL_SIZE                  0x3026
+#define EGL_SUCCESS                       0x3000
+#define EGL_SURFACE_TYPE                  0x3033
+#define EGL_TRANSPARENT_BLUE_VALUE        0x3035
+#define EGL_TRANSPARENT_GREEN_VALUE       0x3036
+#define EGL_TRANSPARENT_RED_VALUE         0x3037
+#define EGL_TRANSPARENT_RGB               0x3052
+#define EGL_TRANSPARENT_TYPE              0x3034
+#define EGL_TRUE                          1
+#define EGL_VENDOR                        0x3053
+#define EGL_VERSION                       0x3054
+#define EGL_WIDTH                         0x3057
+#define EGL_WINDOW_BIT                    0x0004
+EGLAPI EGLBoolean EGLAPIENTRY eglChooseConfig (EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, EGLint config_size, EGLint *num_config);
+EGLAPI EGLBoolean EGLAPIENTRY eglCopyBuffers (EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target);
+EGLAPI EGLContext EGLAPIENTRY eglCreateContext (EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferSurface (EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurface (EGLDisplay dpy, EGLConfig config, EGLNativePixmapType pixmap, const EGLint *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreateWindowSurface (EGLDisplay dpy, EGLConfig config, EGLNativeWindowType win, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroyContext (EGLDisplay dpy, EGLContext ctx);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroySurface (EGLDisplay dpy, EGLSurface surface);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigAttrib (EGLDisplay dpy, EGLConfig config, EGLint attribute, EGLint *value);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigs (EGLDisplay dpy, EGLConfig *configs, EGLint config_size, EGLint *num_config);
+EGLAPI EGLDisplay EGLAPIENTRY eglGetCurrentDisplay (void);
+EGLAPI EGLSurface EGLAPIENTRY eglGetCurrentSurface (EGLint readdraw);
+EGLAPI EGLDisplay EGLAPIENTRY eglGetDisplay (EGLNativeDisplayType display_id);
+EGLAPI EGLint EGLAPIENTRY eglGetError (void);
+EGLAPI __eglMustCastToProperFunctionPointerType EGLAPIENTRY eglGetProcAddress (const char *procname);
+EGLAPI EGLBoolean EGLAPIENTRY eglInitialize (EGLDisplay dpy, EGLint *major, EGLint *minor);
+EGLAPI EGLBoolean EGLAPIENTRY eglMakeCurrent (EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryContext (EGLDisplay dpy, EGLContext ctx, EGLint attribute, EGLint *value);
+EGLAPI const char *EGLAPIENTRY eglQueryString (EGLDisplay dpy, EGLint name);
+EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint *value);
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffers (EGLDisplay dpy, EGLSurface surface);
+EGLAPI EGLBoolean EGLAPIENTRY eglTerminate (EGLDisplay dpy);
+EGLAPI EGLBoolean EGLAPIENTRY eglWaitGL (void);
+EGLAPI EGLBoolean EGLAPIENTRY eglWaitNative (EGLint engine);
+#endif /* EGL_VERSION_1_0 */
+
+#ifndef EGL_VERSION_1_1
+#define EGL_VERSION_1_1 1
+#define EGL_BACK_BUFFER                   0x3084
+#define EGL_BIND_TO_TEXTURE_RGB           0x3039
+#define EGL_BIND_TO_TEXTURE_RGBA          0x303A
+#define EGL_CONTEXT_LOST                  0x300E
+#define EGL_MIN_SWAP_INTERVAL             0x303B
+#define EGL_MAX_SWAP_INTERVAL             0x303C
+#define EGL_MIPMAP_TEXTURE                0x3082
+#define EGL_MIPMAP_LEVEL                  0x3083
+#define EGL_NO_TEXTURE                    0x305C
+#define EGL_TEXTURE_2D                    0x305F
+#define EGL_TEXTURE_FORMAT                0x3080
+#define EGL_TEXTURE_RGB                   0x305D
+#define EGL_TEXTURE_RGBA                  0x305E
+#define EGL_TEXTURE_TARGET                0x3081
+EGLAPI EGLBoolean EGLAPIENTRY eglBindTexImage (EGLDisplay dpy, EGLSurface surface, EGLint buffer);
+EGLAPI EGLBoolean EGLAPIENTRY eglReleaseTexImage (EGLDisplay dpy, EGLSurface surface, EGLint buffer);
+EGLAPI EGLBoolean EGLAPIENTRY eglSurfaceAttrib (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint value);
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapInterval (EGLDisplay dpy, EGLint interval);
+#endif /* EGL_VERSION_1_1 */
 
-/* Now, define eglGetProcAddress using the generic function ptr. type */
-EGLAPI __eglMustCastToProperFunctionPointerType EGLAPIENTRY
-       eglGetProcAddress(const char *procname);
+#ifndef EGL_VERSION_1_2
+#define EGL_VERSION_1_2 1
+typedef unsigned int EGLenum;
+typedef void *EGLClientBuffer;
+#define EGL_ALPHA_FORMAT                  0x3088
+#define EGL_ALPHA_FORMAT_NONPRE           0x308B
+#define EGL_ALPHA_FORMAT_PRE              0x308C
+#define EGL_ALPHA_MASK_SIZE               0x303E
+#define EGL_BUFFER_PRESERVED              0x3094
+#define EGL_BUFFER_DESTROYED              0x3095
+#define EGL_CLIENT_APIS                   0x308D
+#define EGL_COLORSPACE                    0x3087
+#define EGL_COLORSPACE_sRGB               0x3089
+#define EGL_COLORSPACE_LINEAR             0x308A
+#define EGL_COLOR_BUFFER_TYPE             0x303F
+#define EGL_CONTEXT_CLIENT_TYPE           0x3097
+#define EGL_DISPLAY_SCALING               10000
+#define EGL_HORIZONTAL_RESOLUTION         0x3090
+#define EGL_LUMINANCE_BUFFER              0x308F
+#define EGL_LUMINANCE_SIZE                0x303D
+#define EGL_OPENGL_ES_BIT                 0x0001
+#define EGL_OPENVG_BIT                    0x0002
+#define EGL_OPENGL_ES_API                 0x30A0
+#define EGL_OPENVG_API                    0x30A1
+#define EGL_OPENVG_IMAGE                  0x3096
+#define EGL_PIXEL_ASPECT_RATIO            0x3092
+#define EGL_RENDERABLE_TYPE               0x3040
+#define EGL_RENDER_BUFFER                 0x3086
+#define EGL_RGB_BUFFER                    0x308E
+#define EGL_SINGLE_BUFFER                 0x3085
+#define EGL_SWAP_BEHAVIOR                 0x3093
+#define EGL_UNKNOWN                       ((EGLint)-1)
+#define EGL_VERTICAL_RESOLUTION           0x3091
+EGLAPI EGLBoolean EGLAPIENTRY eglBindAPI (EGLenum api);
+EGLAPI EGLenum EGLAPIENTRY eglQueryAPI (void);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferFromClientBuffer (EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, EGLConfig config, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglReleaseThread (void);
+EGLAPI EGLBoolean EGLAPIENTRY eglWaitClient (void);
+#endif /* EGL_VERSION_1_2 */
+
+#ifndef EGL_VERSION_1_3
+#define EGL_VERSION_1_3 1
+#define EGL_CONFORMANT                    0x3042
+#define EGL_CONTEXT_CLIENT_VERSION        0x3098
+#define EGL_MATCH_NATIVE_PIXMAP           0x3041
+#define EGL_OPENGL_ES2_BIT                0x0004
+#define EGL_VG_ALPHA_FORMAT               0x3088
+#define EGL_VG_ALPHA_FORMAT_NONPRE        0x308B
+#define EGL_VG_ALPHA_FORMAT_PRE           0x308C
+#define EGL_VG_ALPHA_FORMAT_PRE_BIT       0x0040
+#define EGL_VG_COLORSPACE                 0x3087
+#define EGL_VG_COLORSPACE_sRGB            0x3089
+#define EGL_VG_COLORSPACE_LINEAR          0x308A
+#define EGL_VG_COLORSPACE_LINEAR_BIT      0x0020
+#endif /* EGL_VERSION_1_3 */
+
+#ifndef EGL_VERSION_1_4
+#define EGL_VERSION_1_4 1
+#define EGL_DEFAULT_DISPLAY               ((EGLNativeDisplayType)0)
+#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT   0x0200
+#define EGL_MULTISAMPLE_RESOLVE           0x3099
+#define EGL_MULTISAMPLE_RESOLVE_DEFAULT   0x309A
+#define EGL_MULTISAMPLE_RESOLVE_BOX       0x309B
+#define EGL_OPENGL_API                    0x30A2
+#define EGL_OPENGL_BIT                    0x0008
+#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT   0x0400
+EGLAPI EGLContext EGLAPIENTRY eglGetCurrentContext (void);
+#endif /* EGL_VERSION_1_4 */
+
+#ifndef EGL_VERSION_1_5
+#define EGL_VERSION_1_5 1
+typedef void *EGLSync;
+typedef intptr_t EGLAttrib;
+typedef khronos_utime_nanoseconds_t EGLTime;
+typedef void *EGLImage;
+#define EGL_CONTEXT_MAJOR_VERSION         0x3098
+#define EGL_CONTEXT_MINOR_VERSION         0x30FB
+#define EGL_CONTEXT_OPENGL_PROFILE_MASK   0x30FD
+#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY 0x31BD
+#define EGL_NO_RESET_NOTIFICATION         0x31BE
+#define EGL_LOSE_CONTEXT_ON_RESET         0x31BF
+#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT 0x00000001
+#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT 0x00000002
+#define EGL_CONTEXT_OPENGL_DEBUG          0x31B0
+#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE 0x31B1
+#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS  0x31B2
+#define EGL_OPENGL_ES3_BIT                0x00000040
+#define EGL_CL_EVENT_HANDLE               0x309C
+#define EGL_SYNC_CL_EVENT                 0x30FE
+#define EGL_SYNC_CL_EVENT_COMPLETE        0x30FF
+#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE  0x30F0
+#define EGL_SYNC_TYPE                     0x30F7
+#define EGL_SYNC_STATUS                   0x30F1
+#define EGL_SYNC_CONDITION                0x30F8
+#define EGL_SIGNALED                      0x30F2
+#define EGL_UNSIGNALED                    0x30F3
+#define EGL_SYNC_FLUSH_COMMANDS_BIT       0x0001
+#define EGL_FOREVER                       0xFFFFFFFFFFFFFFFFull
+#define EGL_TIMEOUT_EXPIRED               0x30F5
+#define EGL_CONDITION_SATISFIED           0x30F6
+#define EGL_NO_SYNC                       ((EGLSync)0)
+#define EGL_SYNC_FENCE                    0x30F9
+#define EGL_GL_COLORSPACE                 0x309D
+#define EGL_GL_COLORSPACE_SRGB            0x3089
+#define EGL_GL_COLORSPACE_LINEAR          0x308A
+#define EGL_GL_RENDERBUFFER               0x30B9
+#define EGL_GL_TEXTURE_2D                 0x30B1
+#define EGL_GL_TEXTURE_LEVEL              0x30BC
+#define EGL_GL_TEXTURE_3D                 0x30B2
+#define EGL_GL_TEXTURE_ZOFFSET            0x30BD
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x30B3
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x30B4
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x30B5
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x30B6
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x30B7
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x30B8
+#define EGL_IMAGE_PRESERVED               0x30D2
+#define EGL_NO_IMAGE                      ((EGLImage)0)
+EGLAPI EGLSync EGLAPIENTRY eglCreateSync (EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroySync (EGLDisplay dpy, EGLSync sync);
+EGLAPI EGLint EGLAPIENTRY eglClientWaitSync (EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttrib (EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *value);
+EGLAPI EGLImage EGLAPIENTRY eglCreateImage (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroyImage (EGLDisplay dpy, EGLImage image);
+EGLAPI EGLDisplay EGLAPIENTRY eglGetPlatformDisplay (EGLenum platform, void *native_display, const EGLAttrib *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformWindowSurface (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLAttrib *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurface (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglWaitSync (EGLDisplay dpy, EGLSync sync, EGLint flags);
+#endif /* EGL_VERSION_1_5 */
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif /* __egl_h_ */
+#endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/include/EGL/eglmesaext.h mesa-11.0.0~git20150916+11.0.c4bae579/include/EGL/eglmesaext.h
--- mesa-10.6.5~git20150829+10.6.fa342251/include/EGL/eglmesaext.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/include/EGL/eglmesaext.h	2015-09-16 14:36:08.000000000 +0000
@@ -87,45 +87,14 @@
 
 #endif
 
-#ifndef EGL_NOK_swap_region
-#define EGL_NOK_swap_region 1
-
-#ifdef EGL_EGLEXT_PROTOTYPES
-EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint* rects);
-#endif
-
+/* remnant of EGL_NOK_swap_region kept for compatibility because of a non-standard type name */
 typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGIONNOK) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint* rects);
-#endif
-
-#ifndef EGL_NOK_texture_from_pixmap
-#define EGL_NOK_texture_from_pixmap 1
-
-#define EGL_Y_INVERTED_NOK			0x307F
-#endif /* EGL_NOK_texture_from_pixmap */
-
-#ifndef EGL_ANDROID_image_native_buffer
-#define EGL_ANDROID_image_native_buffer 1
-#define EGL_NATIVE_BUFFER_ANDROID       0x3140  /* eglCreateImageKHR target */
-#endif
 
 #ifndef EGL_MESA_configless_context
 #define EGL_MESA_configless_context 1
 #define EGL_NO_CONFIG_MESA			((EGLConfig)0)
 #endif
 
-#if KHRONOS_SUPPORT_INT64
-#ifndef EGL_MESA_image_dma_buf_export
-#define EGL_MESA_image_dma_buf_export 1
-#ifdef EGL_EGLEXT_PROTOTYPES
-EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageQueryMESA (EGLDisplay dpy, EGLImageKHR image, EGLint *fourcc, EGLint *nplanes, EGLuint64KHR *modifiers);
-EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageMESA (EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets);
-#endif
-#endif
-
-typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDMABUFIMAGEQUERYMESA) (EGLDisplay dpy, EGLImageKHR image, EGLint *fourcc, EGLint *nplanes, EGLuint64KHR *modifiers);
-typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDMABUFIMAGEMESA) (EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets);
-
-#endif
 #ifdef __cplusplus
 }
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/include/EGL/eglplatform.h mesa-11.0.0~git20150916+11.0.c4bae579/include/EGL/eglplatform.h
--- mesa-10.6.5~git20150829+10.6.fa342251/include/EGL/eglplatform.h	2015-02-25 15:10:41.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/include/EGL/eglplatform.h	2015-09-16 14:37:00.000000000 +0000
@@ -2,7 +2,7 @@
 #define __eglplatform_h_
 
 /*
-** Copyright (c) 2007-2009 The Khronos Group Inc.
+** Copyright (c) 2007-2013 The Khronos Group Inc.
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a
 ** copy of this software and/or associated documentation files (the
@@ -25,7 +25,7 @@
 */
 
 /* Platform-specific types and definitions for egl.h
- * $Revision: 12306 $ on $Date: 2010-08-25 09:51:28 -0700 (Wed, 25 Aug 2010) $
+ * $Revision: 30994 $ on $Date: 2015-04-30 13:36:48 -0700 (Thu, 30 Apr 2015) $
  *
  * Adopters may modify khrplatform.h and this file to suit their platform.
  * You are encouraged to submit all modifications to the Khronos group so that
@@ -95,16 +95,17 @@
 typedef struct gbm_bo      *EGLNativePixmapType;
 typedef void               *EGLNativeWindowType;
 
-#elif defined(ANDROID) /* Android */
+#elif defined(__ANDROID__) || defined(ANDROID)
+
+#include <android/native_window.h>
 
-struct ANativeWindow;
 struct egl_native_pixmap_t;
 
-typedef struct ANativeWindow        *EGLNativeWindowType;
-typedef struct egl_native_pixmap_t  *EGLNativePixmapType;
-typedef void                        *EGLNativeDisplayType;
+typedef struct ANativeWindow*           EGLNativeWindowType;
+typedef struct egl_native_pixmap_t*     EGLNativePixmapType;
+typedef void*                           EGLNativeDisplayType;
 
-#elif defined(__unix__)
+#elif defined(__unix__) || defined(__APPLE__)
 
 #if defined(MESA_EGL_NO_X11_HEADERS)
 
@@ -131,9 +132,7 @@
 typedef khronos_uintptr_t	 EGLNativeWindowType;
 
 #else
-
 #error "Platform not recognized"
-
 #endif
 
 /* EGL 1.2 types, renamed for consistency in EGL 1.3 */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/include/GL/glext.h mesa-11.0.0~git20150916+11.0.c4bae579/include/GL/glext.h
--- mesa-10.6.5~git20150829+10.6.fa342251/include/GL/glext.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/include/GL/glext.h	2015-09-16 14:36:08.000000000 +0000
@@ -6,7 +6,7 @@
 #endif
 
 /*
-** Copyright (c) 2013-2014 The Khronos Group Inc.
+** Copyright (c) 2013-2015 The Khronos Group Inc.
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a
 ** copy of this software and/or associated documentation files (the
@@ -33,7 +33,7 @@
 ** used to make the header, and the header can be found at
 **   http://www.opengl.org/registry/
 **
-** Khronos $Revision: 29735 $ on $Date: 2015-02-02 19:00:01 -0800 (Mon, 02 Feb 2015) $
+** Khronos $Revision: 31811 $ on $Date: 2015-08-10 17:01:11 +1000 (Mon, 10 Aug 2015) $
 */
 
 #if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__)
@@ -53,7 +53,7 @@
 #define GLAPI extern
 #endif
 
-#define GL_GLEXT_VERSION 20150202
+#define GL_GLEXT_VERSION 20150809
 
 /* Generated C header for:
  * API: gl
@@ -1041,6 +1041,22 @@
 #define GL_COLOR_ATTACHMENT13             0x8CED
 #define GL_COLOR_ATTACHMENT14             0x8CEE
 #define GL_COLOR_ATTACHMENT15             0x8CEF
+#define GL_COLOR_ATTACHMENT16             0x8CF0
+#define GL_COLOR_ATTACHMENT17             0x8CF1
+#define GL_COLOR_ATTACHMENT18             0x8CF2
+#define GL_COLOR_ATTACHMENT19             0x8CF3
+#define GL_COLOR_ATTACHMENT20             0x8CF4
+#define GL_COLOR_ATTACHMENT21             0x8CF5
+#define GL_COLOR_ATTACHMENT22             0x8CF6
+#define GL_COLOR_ATTACHMENT23             0x8CF7
+#define GL_COLOR_ATTACHMENT24             0x8CF8
+#define GL_COLOR_ATTACHMENT25             0x8CF9
+#define GL_COLOR_ATTACHMENT26             0x8CFA
+#define GL_COLOR_ATTACHMENT27             0x8CFB
+#define GL_COLOR_ATTACHMENT28             0x8CFC
+#define GL_COLOR_ATTACHMENT29             0x8CFD
+#define GL_COLOR_ATTACHMENT30             0x8CFE
+#define GL_COLOR_ATTACHMENT31             0x8CFF
 #define GL_DEPTH_ATTACHMENT               0x8D00
 #define GL_STENCIL_ATTACHMENT             0x8D20
 #define GL_FRAMEBUFFER                    0x8D40
@@ -2859,6 +2875,17 @@
 #define GL_ARB_ES3_1_compatibility 1
 #endif /* GL_ARB_ES3_1_compatibility */
 
+#ifndef GL_ARB_ES3_2_compatibility
+#define GL_ARB_ES3_2_compatibility 1
+#define GL_PRIMITIVE_BOUNDING_BOX_ARB     0x92BE
+#define GL_MULTISAMPLE_LINE_WIDTH_RANGE_ARB 0x9381
+#define GL_MULTISAMPLE_LINE_WIDTH_GRANULARITY_ARB 0x9382
+typedef void (APIENTRYP PFNGLPRIMITIVEBOUNDINGBOXARBPROC) (GLfloat minX, GLfloat minY, GLfloat minZ, GLfloat minW, GLfloat maxX, GLfloat maxY, GLfloat maxZ, GLfloat maxW);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glPrimitiveBoundingBoxARB (GLfloat minX, GLfloat minY, GLfloat minZ, GLfloat minW, GLfloat maxX, GLfloat maxY, GLfloat maxZ, GLfloat maxW);
+#endif
+#endif /* GL_ARB_ES3_2_compatibility */
+
 #ifndef GL_ARB_ES3_compatibility
 #define GL_ARB_ES3_compatibility 1
 #endif /* GL_ARB_ES3_compatibility */
@@ -3272,6 +3299,10 @@
 #define GL_FRAGMENT_SHADER_DERIVATIVE_HINT_ARB 0x8B8B
 #endif /* GL_ARB_fragment_shader */
 
+#ifndef GL_ARB_fragment_shader_interlock
+#define GL_ARB_fragment_shader_interlock 1
+#endif /* GL_ARB_fragment_shader_interlock */
+
 #ifndef GL_ARB_framebuffer_no_attachments
 #define GL_ARB_framebuffer_no_attachments 1
 #endif /* GL_ARB_framebuffer_no_attachments */
@@ -3332,6 +3363,91 @@
 #define GL_ARB_gpu_shader_fp64 1
 #endif /* GL_ARB_gpu_shader_fp64 */
 
+#ifndef GL_ARB_gpu_shader_int64
+#define GL_ARB_gpu_shader_int64 1
+#define GL_INT64_ARB                      0x140E
+#define GL_INT64_VEC2_ARB                 0x8FE9
+#define GL_INT64_VEC3_ARB                 0x8FEA
+#define GL_INT64_VEC4_ARB                 0x8FEB
+#define GL_UNSIGNED_INT64_VEC2_ARB        0x8FF5
+#define GL_UNSIGNED_INT64_VEC3_ARB        0x8FF6
+#define GL_UNSIGNED_INT64_VEC4_ARB        0x8FF7
+typedef void (APIENTRYP PFNGLUNIFORM1I64ARBPROC) (GLint location, GLint64 x);
+typedef void (APIENTRYP PFNGLUNIFORM2I64ARBPROC) (GLint location, GLint64 x, GLint64 y);
+typedef void (APIENTRYP PFNGLUNIFORM3I64ARBPROC) (GLint location, GLint64 x, GLint64 y, GLint64 z);
+typedef void (APIENTRYP PFNGLUNIFORM4I64ARBPROC) (GLint location, GLint64 x, GLint64 y, GLint64 z, GLint64 w);
+typedef void (APIENTRYP PFNGLUNIFORM1I64VARBPROC) (GLint location, GLsizei count, const GLint64 *value);
+typedef void (APIENTRYP PFNGLUNIFORM2I64VARBPROC) (GLint location, GLsizei count, const GLint64 *value);
+typedef void (APIENTRYP PFNGLUNIFORM3I64VARBPROC) (GLint location, GLsizei count, const GLint64 *value);
+typedef void (APIENTRYP PFNGLUNIFORM4I64VARBPROC) (GLint location, GLsizei count, const GLint64 *value);
+typedef void (APIENTRYP PFNGLUNIFORM1UI64ARBPROC) (GLint location, GLuint64 x);
+typedef void (APIENTRYP PFNGLUNIFORM2UI64ARBPROC) (GLint location, GLuint64 x, GLuint64 y);
+typedef void (APIENTRYP PFNGLUNIFORM3UI64ARBPROC) (GLint location, GLuint64 x, GLuint64 y, GLuint64 z);
+typedef void (APIENTRYP PFNGLUNIFORM4UI64ARBPROC) (GLint location, GLuint64 x, GLuint64 y, GLuint64 z, GLuint64 w);
+typedef void (APIENTRYP PFNGLUNIFORM1UI64VARBPROC) (GLint location, GLsizei count, const GLuint64 *value);
+typedef void (APIENTRYP PFNGLUNIFORM2UI64VARBPROC) (GLint location, GLsizei count, const GLuint64 *value);
+typedef void (APIENTRYP PFNGLUNIFORM3UI64VARBPROC) (GLint location, GLsizei count, const GLuint64 *value);
+typedef void (APIENTRYP PFNGLUNIFORM4UI64VARBPROC) (GLint location, GLsizei count, const GLuint64 *value);
+typedef void (APIENTRYP PFNGLGETUNIFORMI64VARBPROC) (GLuint program, GLint location, GLint64 *params);
+typedef void (APIENTRYP PFNGLGETUNIFORMUI64VARBPROC) (GLuint program, GLint location, GLuint64 *params);
+typedef void (APIENTRYP PFNGLGETNUNIFORMI64VARBPROC) (GLuint program, GLint location, GLsizei bufSize, GLint64 *params);
+typedef void (APIENTRYP PFNGLGETNUNIFORMUI64VARBPROC) (GLuint program, GLint location, GLsizei bufSize, GLuint64 *params);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1I64ARBPROC) (GLuint program, GLint location, GLint64 x);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2I64ARBPROC) (GLuint program, GLint location, GLint64 x, GLint64 y);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3I64ARBPROC) (GLuint program, GLint location, GLint64 x, GLint64 y, GLint64 z);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4I64ARBPROC) (GLuint program, GLint location, GLint64 x, GLint64 y, GLint64 z, GLint64 w);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1I64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLint64 *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2I64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLint64 *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3I64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLint64 *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4I64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLint64 *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1UI64ARBPROC) (GLuint program, GLint location, GLuint64 x);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2UI64ARBPROC) (GLuint program, GLint location, GLuint64 x, GLuint64 y);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3UI64ARBPROC) (GLuint program, GLint location, GLuint64 x, GLuint64 y, GLuint64 z);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4UI64ARBPROC) (GLuint program, GLint location, GLuint64 x, GLuint64 y, GLuint64 z, GLuint64 w);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM1UI64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLuint64 *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM2UI64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLuint64 *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM3UI64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLuint64 *value);
+typedef void (APIENTRYP PFNGLPROGRAMUNIFORM4UI64VARBPROC) (GLuint program, GLint location, GLsizei count, const GLuint64 *value);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glUniform1i64ARB (GLint location, GLint64 x);
+GLAPI void APIENTRY glUniform2i64ARB (GLint location, GLint64 x, GLint64 y);
+GLAPI void APIENTRY glUniform3i64ARB (GLint location, GLint64 x, GLint64 y, GLint64 z);
+GLAPI void APIENTRY glUniform4i64ARB (GLint location, GLint64 x, GLint64 y, GLint64 z, GLint64 w);
+GLAPI void APIENTRY glUniform1i64vARB (GLint location, GLsizei count, const GLint64 *value);
+GLAPI void APIENTRY glUniform2i64vARB (GLint location, GLsizei count, const GLint64 *value);
+GLAPI void APIENTRY glUniform3i64vARB (GLint location, GLsizei count, const GLint64 *value);
+GLAPI void APIENTRY glUniform4i64vARB (GLint location, GLsizei count, const GLint64 *value);
+GLAPI void APIENTRY glUniform1ui64ARB (GLint location, GLuint64 x);
+GLAPI void APIENTRY glUniform2ui64ARB (GLint location, GLuint64 x, GLuint64 y);
+GLAPI void APIENTRY glUniform3ui64ARB (GLint location, GLuint64 x, GLuint64 y, GLuint64 z);
+GLAPI void APIENTRY glUniform4ui64ARB (GLint location, GLuint64 x, GLuint64 y, GLuint64 z, GLuint64 w);
+GLAPI void APIENTRY glUniform1ui64vARB (GLint location, GLsizei count, const GLuint64 *value);
+GLAPI void APIENTRY glUniform2ui64vARB (GLint location, GLsizei count, const GLuint64 *value);
+GLAPI void APIENTRY glUniform3ui64vARB (GLint location, GLsizei count, const GLuint64 *value);
+GLAPI void APIENTRY glUniform4ui64vARB (GLint location, GLsizei count, const GLuint64 *value);
+GLAPI void APIENTRY glGetUniformi64vARB (GLuint program, GLint location, GLint64 *params);
+GLAPI void APIENTRY glGetUniformui64vARB (GLuint program, GLint location, GLuint64 *params);
+GLAPI void APIENTRY glGetnUniformi64vARB (GLuint program, GLint location, GLsizei bufSize, GLint64 *params);
+GLAPI void APIENTRY glGetnUniformui64vARB (GLuint program, GLint location, GLsizei bufSize, GLuint64 *params);
+GLAPI void APIENTRY glProgramUniform1i64ARB (GLuint program, GLint location, GLint64 x);
+GLAPI void APIENTRY glProgramUniform2i64ARB (GLuint program, GLint location, GLint64 x, GLint64 y);
+GLAPI void APIENTRY glProgramUniform3i64ARB (GLuint program, GLint location, GLint64 x, GLint64 y, GLint64 z);
+GLAPI void APIENTRY glProgramUniform4i64ARB (GLuint program, GLint location, GLint64 x, GLint64 y, GLint64 z, GLint64 w);
+GLAPI void APIENTRY glProgramUniform1i64vARB (GLuint program, GLint location, GLsizei count, const GLint64 *value);
+GLAPI void APIENTRY glProgramUniform2i64vARB (GLuint program, GLint location, GLsizei count, const GLint64 *value);
+GLAPI void APIENTRY glProgramUniform3i64vARB (GLuint program, GLint location, GLsizei count, const GLint64 *value);
+GLAPI void APIENTRY glProgramUniform4i64vARB (GLuint program, GLint location, GLsizei count, const GLint64 *value);
+GLAPI void APIENTRY glProgramUniform1ui64ARB (GLuint program, GLint location, GLuint64 x);
+GLAPI void APIENTRY glProgramUniform2ui64ARB (GLuint program, GLint location, GLuint64 x, GLuint64 y);
+GLAPI void APIENTRY glProgramUniform3ui64ARB (GLuint program, GLint location, GLuint64 x, GLuint64 y, GLuint64 z);
+GLAPI void APIENTRY glProgramUniform4ui64ARB (GLuint program, GLint location, GLuint64 x, GLuint64 y, GLuint64 z, GLuint64 w);
+GLAPI void APIENTRY glProgramUniform1ui64vARB (GLuint program, GLint location, GLsizei count, const GLuint64 *value);
+GLAPI void APIENTRY glProgramUniform2ui64vARB (GLuint program, GLint location, GLsizei count, const GLuint64 *value);
+GLAPI void APIENTRY glProgramUniform3ui64vARB (GLuint program, GLint location, GLsizei count, const GLuint64 *value);
+GLAPI void APIENTRY glProgramUniform4ui64vARB (GLuint program, GLint location, GLsizei count, const GLuint64 *value);
+#endif
+#endif /* GL_ARB_gpu_shader_int64 */
+
 #ifndef GL_ARB_half_float_pixel
 #define GL_ARB_half_float_pixel 1
 typedef unsigned short GLhalfARB;
@@ -3711,6 +3827,16 @@
 #define GL_ARB_occlusion_query2 1
 #endif /* GL_ARB_occlusion_query2 */
 
+#ifndef GL_ARB_parallel_shader_compile
+#define GL_ARB_parallel_shader_compile 1
+#define GL_MAX_SHADER_COMPILER_THREADS_ARB 0x91B0
+#define GL_COMPLETION_STATUS_ARB          0x91B1
+typedef void (APIENTRYP PFNGLMAXSHADERCOMPILERTHREADSARBPROC) (GLuint count);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glMaxShaderCompilerThreadsARB (GLuint count);
+#endif
+#endif /* GL_ARB_parallel_shader_compile */
+
 #ifndef GL_ARB_pipeline_statistics_query
 #define GL_ARB_pipeline_statistics_query 1
 #define GL_VERTICES_SUBMITTED_ARB         0x82EE
@@ -3753,6 +3879,10 @@
 #define GL_COORD_REPLACE_ARB              0x8862
 #endif /* GL_ARB_point_sprite */
 
+#ifndef GL_ARB_post_depth_coverage
+#define GL_ARB_post_depth_coverage 1
+#endif /* GL_ARB_post_depth_coverage */
+
 #ifndef GL_ARB_program_interface_query
 #define GL_ARB_program_interface_query 1
 #endif /* GL_ARB_program_interface_query */
@@ -3826,6 +3956,26 @@
 #define GL_ARB_robustness_isolation 1
 #endif /* GL_ARB_robustness_isolation */
 
+#ifndef GL_ARB_sample_locations
+#define GL_ARB_sample_locations 1
+#define GL_SAMPLE_LOCATION_SUBPIXEL_BITS_ARB 0x933D
+#define GL_SAMPLE_LOCATION_PIXEL_GRID_WIDTH_ARB 0x933E
+#define GL_SAMPLE_LOCATION_PIXEL_GRID_HEIGHT_ARB 0x933F
+#define GL_PROGRAMMABLE_SAMPLE_LOCATION_TABLE_SIZE_ARB 0x9340
+#define GL_SAMPLE_LOCATION_ARB            0x8E50
+#define GL_PROGRAMMABLE_SAMPLE_LOCATION_ARB 0x9341
+#define GL_FRAMEBUFFER_PROGRAMMABLE_SAMPLE_LOCATIONS_ARB 0x9342
+#define GL_FRAMEBUFFER_SAMPLE_LOCATION_PIXEL_GRID_ARB 0x9343
+typedef void (APIENTRYP PFNGLFRAMEBUFFERSAMPLELOCATIONSFVARBPROC) (GLenum target, GLuint start, GLsizei count, const GLfloat *v);
+typedef void (APIENTRYP PFNGLNAMEDFRAMEBUFFERSAMPLELOCATIONSFVARBPROC) (GLuint framebuffer, GLuint start, GLsizei count, const GLfloat *v);
+typedef void (APIENTRYP PFNGLEVALUATEDEPTHVALUESARBPROC) (void);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glFramebufferSampleLocationsfvARB (GLenum target, GLuint start, GLsizei count, const GLfloat *v);
+GLAPI void APIENTRY glNamedFramebufferSampleLocationsfvARB (GLuint framebuffer, GLuint start, GLsizei count, const GLfloat *v);
+GLAPI void APIENTRY glEvaluateDepthValuesARB (void);
+#endif
+#endif /* GL_ARB_sample_locations */
+
 #ifndef GL_ARB_sample_shading
 #define GL_ARB_sample_shading 1
 #define GL_SAMPLE_SHADING_ARB             0x8C36
@@ -3852,14 +4002,26 @@
 #define GL_ARB_separate_shader_objects 1
 #endif /* GL_ARB_separate_shader_objects */
 
+#ifndef GL_ARB_shader_atomic_counter_ops
+#define GL_ARB_shader_atomic_counter_ops 1
+#endif /* GL_ARB_shader_atomic_counter_ops */
+
 #ifndef GL_ARB_shader_atomic_counters
 #define GL_ARB_shader_atomic_counters 1
 #endif /* GL_ARB_shader_atomic_counters */
 
+#ifndef GL_ARB_shader_ballot
+#define GL_ARB_shader_ballot 1
+#endif /* GL_ARB_shader_ballot */
+
 #ifndef GL_ARB_shader_bit_encoding
 #define GL_ARB_shader_bit_encoding 1
 #endif /* GL_ARB_shader_bit_encoding */
 
+#ifndef GL_ARB_shader_clock
+#define GL_ARB_shader_clock 1
+#endif /* GL_ARB_shader_clock */
+
 #ifndef GL_ARB_shader_draw_parameters
 #define GL_ARB_shader_draw_parameters 1
 #endif /* GL_ARB_shader_draw_parameters */
@@ -3879,7 +4041,12 @@
 #ifndef GL_ARB_shader_objects
 #define GL_ARB_shader_objects 1
 #ifdef __APPLE__
+#ifdef BUILDING_MESA
+/* Avoid uint <-> void* warnings */
+typedef unsigned long GLhandleARB;
+#else
 typedef void *GLhandleARB;
+#endif
 #else
 typedef unsigned int GLhandleARB;
 #endif
@@ -4024,6 +4191,10 @@
 #define GL_ARB_shader_texture_lod 1
 #endif /* GL_ARB_shader_texture_lod */
 
+#ifndef GL_ARB_shader_viewport_layer_array
+#define GL_ARB_shader_viewport_layer_array 1
+#endif /* GL_ARB_shader_viewport_layer_array */
+
 #ifndef GL_ARB_shading_language_100
 #define GL_ARB_shading_language_100 1
 #define GL_SHADING_LANGUAGE_VERSION_ARB   0x8B8C
@@ -4097,12 +4268,20 @@
 #define GL_MAX_SPARSE_3D_TEXTURE_SIZE_ARB 0x9199
 #define GL_MAX_SPARSE_ARRAY_TEXTURE_LAYERS_ARB 0x919A
 #define GL_SPARSE_TEXTURE_FULL_ARRAY_CUBE_MIPMAPS_ARB 0x91A9
-typedef void (APIENTRYP PFNGLTEXPAGECOMMITMENTARBPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean resident);
+typedef void (APIENTRYP PFNGLTEXPAGECOMMITMENTARBPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean commit);
 #ifdef GL_GLEXT_PROTOTYPES
-GLAPI void APIENTRY glTexPageCommitmentARB (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean resident);
+GLAPI void APIENTRY glTexPageCommitmentARB (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean commit);
 #endif
 #endif /* GL_ARB_sparse_texture */
 
+#ifndef GL_ARB_sparse_texture2
+#define GL_ARB_sparse_texture2 1
+#endif /* GL_ARB_sparse_texture2 */
+
+#ifndef GL_ARB_sparse_texture_clamp
+#define GL_ARB_sparse_texture_clamp 1
+#endif /* GL_ARB_sparse_texture_clamp */
+
 #ifndef GL_ARB_stencil_texturing
 #define GL_ARB_stencil_texturing 1
 #endif /* GL_ARB_stencil_texturing */
@@ -4255,6 +4434,12 @@
 #define GL_DOT3_RGBA_ARB                  0x86AF
 #endif /* GL_ARB_texture_env_dot3 */
 
+#ifndef GL_ARB_texture_filter_minmax
+#define GL_ARB_texture_filter_minmax 1
+#define GL_TEXTURE_REDUCTION_MODE_ARB     0x9366
+#define GL_WEIGHTED_AVERAGE_ARB           0x9367
+#endif /* GL_ARB_texture_filter_minmax */
+
 #ifndef GL_ARB_texture_float
 #define GL_ARB_texture_float 1
 #define GL_TEXTURE_RED_TYPE_ARB           0x8C10
@@ -4749,6 +4934,11 @@
 #define GL_KHR_debug 1
 #endif /* GL_KHR_debug */
 
+#ifndef GL_KHR_no_error
+#define GL_KHR_no_error 1
+#define GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR  0x00000008
+#endif /* GL_KHR_no_error */
+
 #ifndef GL_KHR_robust_buffer_access_behavior
 #define GL_KHR_robust_buffer_access_behavior 1
 #endif /* GL_KHR_robust_buffer_access_behavior */
@@ -4891,7 +5081,6 @@
 typedef void (APIENTRYP PFNGLPOINTSIZEXOESPROC) (GLfixed size);
 typedef void (APIENTRYP PFNGLPOLYGONOFFSETXOESPROC) (GLfixed factor, GLfixed units);
 typedef void (APIENTRYP PFNGLROTATEXOESPROC) (GLfixed angle, GLfixed x, GLfixed y, GLfixed z);
-typedef void (APIENTRYP PFNGLSAMPLECOVERAGEOESPROC) (GLfixed value, GLboolean invert);
 typedef void (APIENTRYP PFNGLSCALEXOESPROC) (GLfixed x, GLfixed y, GLfixed z);
 typedef void (APIENTRYP PFNGLTEXENVXOESPROC) (GLenum target, GLenum pname, GLfixed param);
 typedef void (APIENTRYP PFNGLTEXENVXVOESPROC) (GLenum target, GLenum pname, const GLfixed *params);
@@ -4996,7 +5185,6 @@
 GLAPI void APIENTRY glPointSizexOES (GLfixed size);
 GLAPI void APIENTRY glPolygonOffsetxOES (GLfixed factor, GLfixed units);
 GLAPI void APIENTRY glRotatexOES (GLfixed angle, GLfixed x, GLfixed y, GLfixed z);
-GLAPI void APIENTRY glSampleCoverageOES (GLfixed value, GLboolean invert);
 GLAPI void APIENTRY glScalexOES (GLfixed x, GLfixed y, GLfixed z);
 GLAPI void APIENTRY glTexEnvxOES (GLenum target, GLenum pname, GLfixed param);
 GLAPI void APIENTRY glTexEnvxvOES (GLenum target, GLenum pname, const GLfixed *params);
@@ -6710,7 +6898,7 @@
 typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBBINDINGEXTPROC) (GLuint vaobj, GLuint attribindex, GLuint bindingindex);
 typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXBINDINGDIVISOREXTPROC) (GLuint vaobj, GLuint bindingindex, GLuint divisor);
 typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBLOFFSETEXTPROC) (GLuint vaobj, GLuint buffer, GLuint index, GLint size, GLenum type, GLsizei stride, GLintptr offset);
-typedef void (APIENTRYP PFNGLTEXTUREPAGECOMMITMENTEXTPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean resident);
+typedef void (APIENTRYP PFNGLTEXTUREPAGECOMMITMENTEXTPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean commit);
 typedef void (APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBDIVISOREXTPROC) (GLuint vaobj, GLuint index, GLuint divisor);
 #ifdef GL_GLEXT_PROTOTYPES
 GLAPI void APIENTRY glMatrixLoadfEXT (GLenum mode, const GLfloat *m);
@@ -6966,7 +7154,7 @@
 GLAPI void APIENTRY glVertexArrayVertexAttribBindingEXT (GLuint vaobj, GLuint attribindex, GLuint bindingindex);
 GLAPI void APIENTRY glVertexArrayVertexBindingDivisorEXT (GLuint vaobj, GLuint bindingindex, GLuint divisor);
 GLAPI void APIENTRY glVertexArrayVertexAttribLOffsetEXT (GLuint vaobj, GLuint buffer, GLuint index, GLint size, GLenum type, GLsizei stride, GLintptr offset);
-GLAPI void APIENTRY glTexturePageCommitmentEXT (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean resident);
+GLAPI void APIENTRY glTexturePageCommitmentEXT (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLboolean commit);
 GLAPI void APIENTRY glVertexArrayVertexAttribDivisorEXT (GLuint vaobj, GLuint index, GLuint divisor);
 #endif
 #endif /* GL_EXT_direct_state_access */
@@ -8630,6 +8818,14 @@
 #define GL_INTEL_fragment_shader_ordering 1
 #endif /* GL_INTEL_fragment_shader_ordering */
 
+#ifndef GL_INTEL_framebuffer_CMAA
+#define GL_INTEL_framebuffer_CMAA 1
+typedef void (APIENTRYP PFNGLAPPLYFRAMEBUFFERATTACHMENTCMAAINTELPROC) (void);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glApplyFramebufferAttachmentCMAAINTEL (void);
+#endif
+#endif /* GL_INTEL_framebuffer_CMAA */
+
 #ifndef GL_INTEL_map_texture
 #define GL_INTEL_map_texture 1
 #define GL_TEXTURE_MEMORY_LAYOUT_INTEL    0x83FF
@@ -8934,6 +9130,65 @@
 #define GL_NV_blend_square 1
 #endif /* GL_NV_blend_square */
 
+#ifndef GL_NV_command_list
+#define GL_NV_command_list 1
+#define GL_TERMINATE_SEQUENCE_COMMAND_NV  0x0000
+#define GL_NOP_COMMAND_NV                 0x0001
+#define GL_DRAW_ELEMENTS_COMMAND_NV       0x0002
+#define GL_DRAW_ARRAYS_COMMAND_NV         0x0003
+#define GL_DRAW_ELEMENTS_STRIP_COMMAND_NV 0x0004
+#define GL_DRAW_ARRAYS_STRIP_COMMAND_NV   0x0005
+#define GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV 0x0006
+#define GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV 0x0007
+#define GL_ELEMENT_ADDRESS_COMMAND_NV     0x0008
+#define GL_ATTRIBUTE_ADDRESS_COMMAND_NV   0x0009
+#define GL_UNIFORM_ADDRESS_COMMAND_NV     0x000A
+#define GL_BLEND_COLOR_COMMAND_NV         0x000B
+#define GL_STENCIL_REF_COMMAND_NV         0x000C
+#define GL_LINE_WIDTH_COMMAND_NV          0x000D
+#define GL_POLYGON_OFFSET_COMMAND_NV      0x000E
+#define GL_ALPHA_REF_COMMAND_NV           0x000F
+#define GL_VIEWPORT_COMMAND_NV            0x0010
+#define GL_SCISSOR_COMMAND_NV             0x0011
+#define GL_FRONT_FACE_COMMAND_NV          0x0012
+typedef void (APIENTRYP PFNGLCREATESTATESNVPROC) (GLsizei n, GLuint *states);
+typedef void (APIENTRYP PFNGLDELETESTATESNVPROC) (GLsizei n, const GLuint *states);
+typedef GLboolean (APIENTRYP PFNGLISSTATENVPROC) (GLuint state);
+typedef void (APIENTRYP PFNGLSTATECAPTURENVPROC) (GLuint state, GLenum mode);
+typedef GLuint (APIENTRYP PFNGLGETCOMMANDHEADERNVPROC) (GLenum tokenID, GLuint size);
+typedef GLushort (APIENTRYP PFNGLGETSTAGEINDEXNVPROC) (GLenum shadertype);
+typedef void (APIENTRYP PFNGLDRAWCOMMANDSNVPROC) (GLenum primitiveMode, GLuint buffer, const GLintptr *indirects, const GLsizei *sizes, GLuint count);
+typedef void (APIENTRYP PFNGLDRAWCOMMANDSADDRESSNVPROC) (GLenum primitiveMode, const GLuint64 *indirects, const GLsizei *sizes, GLuint count);
+typedef void (APIENTRYP PFNGLDRAWCOMMANDSSTATESNVPROC) (GLuint buffer, const GLintptr *indirects, const GLsizei *sizes, const GLuint *states, const GLuint *fbos, GLuint count);
+typedef void (APIENTRYP PFNGLDRAWCOMMANDSSTATESADDRESSNVPROC) (const GLuint64 *indirects, const GLsizei *sizes, const GLuint *states, const GLuint *fbos, GLuint count);
+typedef void (APIENTRYP PFNGLCREATECOMMANDLISTSNVPROC) (GLsizei n, GLuint *lists);
+typedef void (APIENTRYP PFNGLDELETECOMMANDLISTSNVPROC) (GLsizei n, const GLuint *lists);
+typedef GLboolean (APIENTRYP PFNGLISCOMMANDLISTNVPROC) (GLuint list);
+typedef void (APIENTRYP PFNGLLISTDRAWCOMMANDSSTATESCLIENTNVPROC) (GLuint list, GLuint segment, const void **indirects, const GLsizei *sizes, const GLuint *states, const GLuint *fbos, GLuint count);
+typedef void (APIENTRYP PFNGLCOMMANDLISTSEGMENTSNVPROC) (GLuint list, GLuint segments);
+typedef void (APIENTRYP PFNGLCOMPILECOMMANDLISTNVPROC) (GLuint list);
+typedef void (APIENTRYP PFNGLCALLCOMMANDLISTNVPROC) (GLuint list);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glCreateStatesNV (GLsizei n, GLuint *states);
+GLAPI void APIENTRY glDeleteStatesNV (GLsizei n, const GLuint *states);
+GLAPI GLboolean APIENTRY glIsStateNV (GLuint state);
+GLAPI void APIENTRY glStateCaptureNV (GLuint state, GLenum mode);
+GLAPI GLuint APIENTRY glGetCommandHeaderNV (GLenum tokenID, GLuint size);
+GLAPI GLushort APIENTRY glGetStageIndexNV (GLenum shadertype);
+GLAPI void APIENTRY glDrawCommandsNV (GLenum primitiveMode, GLuint buffer, const GLintptr *indirects, const GLsizei *sizes, GLuint count);
+GLAPI void APIENTRY glDrawCommandsAddressNV (GLenum primitiveMode, const GLuint64 *indirects, const GLsizei *sizes, GLuint count);
+GLAPI void APIENTRY glDrawCommandsStatesNV (GLuint buffer, const GLintptr *indirects, const GLsizei *sizes, const GLuint *states, const GLuint *fbos, GLuint count);
+GLAPI void APIENTRY glDrawCommandsStatesAddressNV (const GLuint64 *indirects, const GLsizei *sizes, const GLuint *states, const GLuint *fbos, GLuint count);
+GLAPI void APIENTRY glCreateCommandListsNV (GLsizei n, GLuint *lists);
+GLAPI void APIENTRY glDeleteCommandListsNV (GLsizei n, const GLuint *lists);
+GLAPI GLboolean APIENTRY glIsCommandListNV (GLuint list);
+GLAPI void APIENTRY glListDrawCommandsStatesClientNV (GLuint list, GLuint segment, const void **indirects, const GLsizei *sizes, const GLuint *states, const GLuint *fbos, GLuint count);
+GLAPI void APIENTRY glCommandListSegmentsNV (GLuint list, GLuint segments);
+GLAPI void APIENTRY glCompileCommandListNV (GLuint list);
+GLAPI void APIENTRY glCallCommandListNV (GLuint list);
+#endif
+#endif /* GL_NV_command_list */
+
 #ifndef GL_NV_compute_program5
 #define GL_NV_compute_program5 1
 #define GL_COMPUTE_PROGRAM_NV             0x90FB
@@ -8966,6 +9221,17 @@
 #endif
 #endif /* GL_NV_conservative_raster */
 
+#ifndef GL_NV_conservative_raster_dilate
+#define GL_NV_conservative_raster_dilate 1
+#define GL_CONSERVATIVE_RASTER_DILATE_NV  0x9379
+#define GL_CONSERVATIVE_RASTER_DILATE_RANGE_NV 0x937A
+#define GL_CONSERVATIVE_RASTER_DILATE_GRANULARITY_NV 0x937B
+typedef void (APIENTRYP PFNGLCONSERVATIVERASTERPARAMETERFNVPROC) (GLenum pname, GLfloat value);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glConservativeRasterParameterfNV (GLenum pname, GLfloat value);
+#endif
+#endif /* GL_NV_conservative_raster_dilate */
+
 #ifndef GL_NV_copy_depth_to_color
 #define GL_NV_copy_depth_to_color 1
 #define GL_DEPTH_STENCIL_TO_RGBA_NV       0x886E
@@ -10845,6 +11111,21 @@
 #define GL_FORMAT_SUBSAMPLE_244_244_OML   0x8983
 #endif /* GL_OML_subsample */
 
+#ifndef GL_OVR_multiview
+#define GL_OVR_multiview 1
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_NUM_VIEWS_OVR 0x9630
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_BASE_VIEW_INDEX_OVR 0x9632
+#define GL_MAX_VIEWS_OVR                  0x9631
+typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint baseViewIndex, GLsizei numViews);
+#ifdef GL_GLEXT_PROTOTYPES
+GLAPI void APIENTRY glFramebufferTextureMultiviewOVR (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint baseViewIndex, GLsizei numViews);
+#endif
+#endif /* GL_OVR_multiview */
+
+#ifndef GL_OVR_multiview2
+#define GL_OVR_multiview2 1
+#endif /* GL_OVR_multiview2 */
+
 #ifndef GL_PGI_misc_hints
 #define GL_PGI_misc_hints 1
 #define GL_PREFER_DOUBLEBUFFER_HINT_PGI   0x1A1F8
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/include/GL/glx.h mesa-11.0.0~git20150916+11.0.c4bae579/include/GL/glx.h
--- mesa-10.6.5~git20150829+10.6.fa342251/include/GL/glx.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/include/GL/glx.h	2015-09-16 14:36:08.000000000 +0000
@@ -369,18 +369,6 @@
 
 
 /*
- * Remove this when glxext.h is updated.
- */
-#ifndef GLX_NV_float_buffer
-#define GLX_NV_float_buffer 1
-
-#define GLX_FLOAT_COMPONENTS_NV         0x20B0
-
-#endif /* GLX_NV_float_buffer */
-
-
-
-/*
  * #?. GLX_MESA_swap_frame_usage
  */
 #ifndef GLX_MESA_swap_frame_usage
@@ -415,86 +403,6 @@
 #endif /* GLX_MESA_swap_control */
 
 
-
-/*
- * #?. GLX_EXT_texture_from_pixmap
- * XXX not finished?
- */
-#ifndef GLX_EXT_texture_from_pixmap
-#define GLX_EXT_texture_from_pixmap 1
-
-#define GLX_BIND_TO_TEXTURE_RGB_EXT        0x20D0
-#define GLX_BIND_TO_TEXTURE_RGBA_EXT       0x20D1
-#define GLX_BIND_TO_MIPMAP_TEXTURE_EXT     0x20D2
-#define GLX_BIND_TO_TEXTURE_TARGETS_EXT    0x20D3
-#define GLX_Y_INVERTED_EXT                 0x20D4
-
-#define GLX_TEXTURE_FORMAT_EXT             0x20D5
-#define GLX_TEXTURE_TARGET_EXT             0x20D6
-#define GLX_MIPMAP_TEXTURE_EXT             0x20D7
-
-#define GLX_TEXTURE_FORMAT_NONE_EXT        0x20D8
-#define GLX_TEXTURE_FORMAT_RGB_EXT         0x20D9
-#define GLX_TEXTURE_FORMAT_RGBA_EXT        0x20DA
-
-#define GLX_TEXTURE_1D_BIT_EXT             0x00000001
-#define GLX_TEXTURE_2D_BIT_EXT             0x00000002
-#define GLX_TEXTURE_RECTANGLE_BIT_EXT      0x00000004
-
-#define GLX_TEXTURE_1D_EXT                 0x20DB
-#define GLX_TEXTURE_2D_EXT                 0x20DC
-#define GLX_TEXTURE_RECTANGLE_EXT          0x20DD
-
-#define GLX_FRONT_LEFT_EXT                 0x20DE
-#define GLX_FRONT_RIGHT_EXT                0x20DF
-#define GLX_BACK_LEFT_EXT                  0x20E0
-#define GLX_BACK_RIGHT_EXT                 0x20E1
-#define GLX_FRONT_EXT                      GLX_FRONT_LEFT_EXT
-#define GLX_BACK_EXT                       GLX_BACK_LEFT_EXT
-#define GLX_AUX0_EXT                       0x20E2
-#define GLX_AUX1_EXT                       0x20E3 
-#define GLX_AUX2_EXT                       0x20E4 
-#define GLX_AUX3_EXT                       0x20E5 
-#define GLX_AUX4_EXT                       0x20E6 
-#define GLX_AUX5_EXT                       0x20E7 
-#define GLX_AUX6_EXT                       0x20E8
-#define GLX_AUX7_EXT                       0x20E9 
-#define GLX_AUX8_EXT                       0x20EA 
-#define GLX_AUX9_EXT                       0x20EB
-
-extern void glXBindTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer, const int *attrib_list);
-extern void glXReleaseTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer);
-
-#endif /* GLX_EXT_texture_from_pixmap */
-
-
-#ifndef GLX_MESA_query_renderer
-#define GLX_MESA_query_renderer 1
-
-#define GLX_RENDERER_VENDOR_ID_MESA                      0x8183
-#define GLX_RENDERER_DEVICE_ID_MESA                      0x8184
-#define GLX_RENDERER_VERSION_MESA                        0x8185
-#define GLX_RENDERER_ACCELERATED_MESA                    0x8186
-#define GLX_RENDERER_VIDEO_MEMORY_MESA                   0x8187
-#define GLX_RENDERER_UNIFIED_MEMORY_ARCHITECTURE_MESA    0x8188
-#define GLX_RENDERER_PREFERRED_PROFILE_MESA              0x8189
-#define GLX_RENDERER_OPENGL_CORE_PROFILE_VERSION_MESA    0x818A
-#define GLX_RENDERER_OPENGL_COMPATIBILITY_PROFILE_VERSION_MESA    0x818B
-#define GLX_RENDERER_OPENGL_ES_PROFILE_VERSION_MESA      0x818C
-#define GLX_RENDERER_OPENGL_ES2_PROFILE_VERSION_MESA     0x818D
-#define GLX_RENDERER_ID_MESA                             0x818E
-
-Bool glXQueryRendererIntegerMESA(Display *dpy, int screen, int renderer, int attribute, unsigned int *value);
-Bool glXQueryCurrentRendererIntegerMESA(int attribute, unsigned int *value);
-const char *glXQueryRendererStringMESA(Display *dpy, int screen, int renderer, int attribute);
-const char *glXQueryCurrentRendererStringMESA(int attribute);
-
-typedef Bool (*PFNGLXQUERYRENDERERINTEGERMESAPROC) (Display *dpy, int screen, int renderer, int attribute, unsigned int *value);
-typedef Bool (*PFNGLXQUERYCURRENTRENDERERINTEGERMESAPROC) (int attribute, unsigned int *value);
-typedef const char *(*PFNGLXQUERYRENDERERSTRINGMESAPROC) (Display *dpy, int screen, int renderer, int attribute);
-typedef const char *(*PFNGLXQUERYCURRENTRENDERERSTRINGMESAPROC) (int attribute);
-#endif /* GLX_MESA_query_renderer */
-
 /*** Should these go here, or in another header? */
 /*
 ** GLX Events
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/include/GL/internal/dri_interface.h mesa-11.0.0~git20150916+11.0.c4bae579/include/GL/internal/dri_interface.h
--- mesa-10.6.5~git20150829+10.6.fa342251/include/GL/internal/dri_interface.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/include/GL/internal/dri_interface.h	2015-09-16 14:37:00.000000000 +0000
@@ -40,14 +40,7 @@
 #ifndef DRI_INTERFACE_H
 #define DRI_INTERFACE_H
 
-/* For archs with no drm.h */
-#if defined(__APPLE__) || defined(__CYGWIN__) || defined(__GNU__)
-#ifndef __NOT_HAVE_DRM_H
-#define __NOT_HAVE_DRM_H
-#endif
-#endif
-
-#ifndef __NOT_HAVE_DRM_H
+#ifdef HAVE_LIBDRM
 #include <drm.h>
 #else
 typedef unsigned int drm_context_t;
@@ -1101,12 +1094,15 @@
 
 
 /**
- * Four CC formats that matches with WL_DRM_FORMAT_* from wayland_drm.h
- * and GBM_FORMAT_* from gbm.h, used with createImageFromNames.
+ * Four CC formats that matches with WL_DRM_FORMAT_* from wayland_drm.h,
+ * GBM_FORMAT_* from gbm.h, and DRM_FORMAT_* from drm_fourcc.h. Used with
+ * createImageFromNames.
  *
  * \since 5
  */
 
+#define __DRI_IMAGE_FOURCC_R8		0x20203852
+#define __DRI_IMAGE_FOURCC_GR88		0x38385247
 #define __DRI_IMAGE_FOURCC_RGB565	0x36314752
 #define __DRI_IMAGE_FOURCC_ARGB8888	0x34325241
 #define __DRI_IMAGE_FOURCC_XRGB8888	0x34325258
@@ -1141,6 +1137,8 @@
 #define __DRI_IMAGE_COMPONENTS_Y_U_V	0x3003
 #define __DRI_IMAGE_COMPONENTS_Y_UV	0x3004
 #define __DRI_IMAGE_COMPONENTS_Y_XUXV	0x3005
+#define __DRI_IMAGE_COMPONENTS_R	0x3006
+#define __DRI_IMAGE_COMPONENTS_RG	0x3007
 
 
 /**
@@ -1448,6 +1446,11 @@
 #define __DRI2_RENDERER_OPENGL_COMPATIBILITY_PROFILE_VERSION  0x0008
 #define __DRI2_RENDERER_OPENGL_ES_PROFILE_VERSION             0x0009
 #define __DRI2_RENDERER_OPENGL_ES2_PROFILE_VERSION            0x000a
+#define __DRI2_RENDERER_HAS_TEXTURE_3D                        0x000b
+/* Whether there is an sRGB format support for every supported 32-bit UNORM
+ * color format.
+ */
+#define __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB                  0x000c
 
 typedef struct __DRI2rendererQueryExtensionRec __DRI2rendererQueryExtension;
 struct __DRI2rendererQueryExtensionRec {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/include/KHR/khrplatform.h mesa-11.0.0~git20150916+11.0.c4bae579/include/KHR/khrplatform.h
--- mesa-10.6.5~git20150829+10.6.fa342251/include/KHR/khrplatform.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/include/KHR/khrplatform.h	2015-09-16 14:36:08.000000000 +0000
@@ -26,7 +26,7 @@
 
 /* Khronos platform-specific types and definitions.
  *
- * $Revision: 9356 $ on $Date: 2009-10-21 02:52:25 -0700 (Wed, 21 Oct 2009) $
+ * $Revision: 23298 $ on $Date: 2013-09-30 17:07:13 -0700 (Mon, 30 Sep 2013) $
  *
  * Adopters may modify this file to suit their platform. Adopters are
  * encouraged to submit platform specific modifications to the Khronos
@@ -106,9 +106,9 @@
 #elif defined (__SYMBIAN32__)
 #   define KHRONOS_APICALL IMPORT_C
 #elif (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303) \
-	|| (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
+       || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
 /* KHRONOS_APIATTRIBUTES is not used by the client API headers yet */
-#  define KHRONOS_APICALL __attribute__((visibility("default")))
+#   define KHRONOS_APICALL __attribute__((visibility("default")))
 #else
 #   define KHRONOS_APICALL
 #endif
@@ -229,10 +229,23 @@
 typedef unsigned char          khronos_uint8_t;
 typedef signed   short int     khronos_int16_t;
 typedef unsigned short int     khronos_uint16_t;
+
+/*
+ * Types that differ between LLP64 and LP64 architectures - in LLP64, 
+ * pointers are 64 bits, but 'long' is still 32 bits. Win64 appears
+ * to be the only LLP64 architecture in current use.
+ */
+#ifdef _WIN64
+typedef signed   long long int khronos_intptr_t;
+typedef unsigned long long int khronos_uintptr_t;
+typedef signed   long long int khronos_ssize_t;
+typedef unsigned long long int khronos_usize_t;
+#else
 typedef signed   long  int     khronos_intptr_t;
 typedef unsigned long  int     khronos_uintptr_t;
 typedef signed   long  int     khronos_ssize_t;
 typedef unsigned long  int     khronos_usize_t;
+#endif
 
 #if KHRONOS_SUPPORT_FLOAT
 /*
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/include/pci_ids/i965_pci_ids.h mesa-11.0.0~git20150916+11.0.c4bae579/include/pci_ids/i965_pci_ids.h
--- mesa-10.6.5~git20150829+10.6.fa342251/include/pci_ids/i965_pci_ids.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/include/pci_ids/i965_pci_ids.h	2015-09-16 14:36:08.000000000 +0000
@@ -128,3 +128,6 @@
 CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B3, chv,     "Intel(R) HD Graphics (Cherryview)")
+CHIPSET(0x0A84, bxt,     "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x1A84, bxt,     "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x5A84, bxt,     "Intel(R) HD Graphics (Broxton)")
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/include/pci_ids/radeonsi_pci_ids.h mesa-11.0.0~git20150916+11.0.c4bae579/include/pci_ids/radeonsi_pci_ids.h
--- mesa-10.6.5~git20150829+10.6.fa342251/include/pci_ids/radeonsi_pci_ids.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/include/pci_ids/radeonsi_pci_ids.h	2015-09-16 14:36:08.000000000 +0000
@@ -157,3 +157,27 @@
 CHIPSET(0x67B9, HAWAII_67B9, HAWAII)
 CHIPSET(0x67BA, HAWAII_67BA, HAWAII)
 CHIPSET(0x67BE, HAWAII_67BE, HAWAII)
+
+CHIPSET(0x6900, ICELAND_, ICELAND)
+CHIPSET(0x6901, ICELAND_, ICELAND)
+CHIPSET(0x6902, ICELAND_, ICELAND)
+CHIPSET(0x6903, ICELAND_, ICELAND)
+CHIPSET(0x6907, ICELAND_, ICELAND)
+
+CHIPSET(0x6920, TONGA_, TONGA)
+CHIPSET(0x6921, TONGA_, TONGA)
+CHIPSET(0x6928, TONGA_, TONGA)
+CHIPSET(0x6929, TONGA_, TONGA)
+CHIPSET(0x692B, TONGA_, TONGA)
+CHIPSET(0x692F, TONGA_, TONGA)
+CHIPSET(0x6930, TONGA_, TONGA)
+CHIPSET(0x6938, TONGA_, TONGA)
+CHIPSET(0x6939, TONGA_, TONGA)
+
+CHIPSET(0x9870, CARRIZO_, CARRIZO)
+CHIPSET(0x9874, CARRIZO_, CARRIZO)
+CHIPSET(0x9875, CARRIZO_, CARRIZO)
+CHIPSET(0x9876, CARRIZO_, CARRIZO)
+CHIPSET(0x9877, CARRIZO_, CARRIZO)
+
+CHIPSET(0x7300, FIJI_, FIJI)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/.lastcommit mesa-11.0.0~git20150916+11.0.c4bae579/.lastcommit
--- mesa-10.6.5~git20150829+10.6.fa342251/.lastcommit	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/.lastcommit	2015-09-16 14:36:10.000000000 +0000
@@ -1 +1 @@
-commit fa34225167396008e75e93f23696666caba8a7bf
+commit c4bae5792bb5515da42e23f166f5ba5d68f79615
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/Makefile.am	2015-09-16 14:36:08.000000000 +0000
@@ -32,7 +32,9 @@
 	--enable-vdpau \
 	--enable-xa \
 	--enable-xvmc \
-	--with-egl-platforms=x11,wayland,drm
+	--with-egl-platforms=x11,wayland,drm \
+	--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
+	--with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast
 
 ACLOCAL_AMFLAGS = -I m4
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/scons/gallium.py mesa-11.0.0~git20150916+11.0.c4bae579/scons/gallium.py
--- mesa-10.6.5~git20150829+10.6.fa342251/scons/gallium.py	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/scons/gallium.py	2015-09-16 14:36:08.000000000 +0000
@@ -300,6 +300,7 @@
 
     # C preprocessor options
     cppdefines = []
+    cppdefines += ['__STDC_LIMIT_MACROS']
     if env['build'] in ('debug', 'checked'):
         cppdefines += ['DEBUG']
     else:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/scons/llvm.py mesa-11.0.0~git20150916+11.0.c4bae579/scons/llvm.py
--- mesa-10.6.5~git20150829+10.6.fa342251/scons/llvm.py	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/scons/llvm.py	2015-09-16 14:36:08.000000000 +0000
@@ -120,6 +120,7 @@
             ])
         elif llvm_version >= distutils.version.LooseVersion('3.5'):
             env.Prepend(LIBS = [
+                'LLVMMCDisassembler',
                 'LLVMBitWriter', 'LLVMMCJIT', 'LLVMRuntimeDyld',
                 'LLVMX86Disassembler', 'LLVMX86AsmParser', 'LLVMX86CodeGen',
                 'LLVMSelectionDAG', 'LLVMAsmPrinter', 'LLVMX86Desc',
@@ -132,6 +133,7 @@
             ])
         else:
             env.Prepend(LIBS = [
+                'LLVMMCDisassembler',
                 'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
                 'LLVMX86CodeGen', 'LLVMX86Desc', 'LLVMSelectionDAG',
                 'LLVMAsmPrinter', 'LLVMMCParser', 'LLVMX86AsmPrinter',
@@ -189,7 +191,7 @@
             if '-fno-rtti' in cxxflags:
                 env.Append(CXXFLAGS = ['-fno-rtti'])
 
-            components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter']
+            components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter', 'mcdisassembler']
 
             env.ParseConfig('llvm-config --libs ' + ' '.join(components))
             env.ParseConfig('llvm-config --ldflags')
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/Android.mk	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/Android.mk	2015-09-16 14:36:08.000000000 +0000
@@ -0,0 +1,97 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# Android.mk for libGLES_mesa
+
+LOCAL_PATH := $(call my-dir)
+
+include $(LOCAL_PATH)/Makefile.sources
+
+# ---------------------------------------
+# Build libGLES_mesa
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(LIBEGL_C_FILES) \
+	$(dri2_backend_core_FILES) \
+	drivers/dri2/platform_android.c
+
+LOCAL_CFLAGS := \
+	-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_ANDROID \
+	-D_EGL_BUILT_IN_DRIVER_DRI2 \
+	-DHAVE_ANDROID_PLATFORM
+
+ifeq ($(MESA_LOLLIPOP_BUILD),true)
+LOCAL_CFLAGS_arm := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
+LOCAL_CFLAGS_x86 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
+LOCAL_CFLAGS_x86_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\"
+else
+LOCAL_CFLAGS += -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
+endif
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/egl/main \
+	$(MESA_TOP)/src/egl/drivers/dri2 \
+
+LOCAL_STATIC_LIBRARIES := \
+	libmesa_loader
+
+LOCAL_SHARED_LIBRARIES := \
+	libdl \
+	libhardware \
+	liblog \
+	libcutils \
+	libgralloc_drm \
+
+ifeq ($(shell echo "$(MESA_ANDROID_VERSION) >= 4.2" | bc),1)
+LOCAL_SHARED_LIBRARIES += libsync
+endif
+
+# add libdrm if there are hardware drivers
+ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
+LOCAL_CFLAGS += -DHAVE_LIBDRM
+LOCAL_SHARED_LIBRARIES += libdrm
+endif
+
+ifeq ($(strip $(MESA_BUILD_CLASSIC)),true)
+# require i915_dri and/or i965_dri
+LOCAL_REQUIRED_MODULES += \
+	$(addsuffix _dri, $(filter i915 i965, $(MESA_GPU_DRIVERS)))
+endif # MESA_BUILD_CLASSIC
+
+ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
+LOCAL_REQUIRED_MODULES += gallium_dri
+endif # MESA_BUILD_GALLIUM
+
+
+LOCAL_MODULE := libGLES_mesa
+ifeq ($(MESA_LOLLIPOP_BUILD),true)
+LOCAL_MODULE_RELATIVE_PATH := egl
+else
+LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/egl
+endif
+
+include $(MESA_COMMON_MK)
+include $(BUILD_SHARED_LIBRARY)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/Android.mk	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/Android.mk	1970-01-01 00:00:00.000000000 +0000
@@ -1,64 +0,0 @@
-# Mesa 3-D graphics library
-#
-# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
-# Copyright (C) 2010-2011 LunarG Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-# Android.mk for egl_dri2
-
-LOCAL_PATH := $(call my-dir)
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
-	egl_dri2.c \
-	platform_android.c
-
-LOCAL_CFLAGS := \
-	-DHAVE_SHARED_GLAPI \
-	-DHAVE_ANDROID_PLATFORM
-
-ifeq ($(MESA_LOLLIPOP_BUILD),true)
-LOCAL_CFLAGS_x86 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
-LOCAL_CFLAGS_x86_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\"
-else
-LOCAL_CFLAGS += -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
-endif
-
-LOCAL_C_INCLUDES := \
-	$(MESA_TOP)/src/mapi \
-	$(MESA_TOP)/src/egl/main \
-	$(MESA_TOP)/src/loader \
-	$(DRM_GRALLOC_TOP)
-
-LOCAL_STATIC_LIBRARIES := \
-	libmesa_loader
-
-LOCAL_SHARED_LIBRARIES := libdrm
-
-ifeq ($(shell echo "$(MESA_ANDROID_VERSION) >= 4.2" | bc),1)
-LOCAL_SHARED_LIBRARIES += \
-	libsync
-endif
-
-LOCAL_MODULE := libmesa_egl_dri2
-
-include $(MESA_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/egl_dri2.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/egl_dri2.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/egl_dri2.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/egl_dri2.c	2015-09-16 14:37:00.000000000 +0000
@@ -28,6 +28,7 @@
 #define WL_HIDE_DEPRECATED
 
 #include <stdint.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
@@ -51,7 +52,23 @@
 #endif
 
 #include "egl_dri2.h"
-#include "../util/u_atomic.h"
+#include "util/u_atomic.h"
+
+/* The kernel header drm_fourcc.h defines the DRM formats below.  We duplicate
+ * some of the definitions here so that building Mesa won't bleeding-edge
+ * kernel headers.
+ */
+#ifndef DRM_FORMAT_R8
+#define DRM_FORMAT_R8            fourcc_code('R', '8', ' ', ' ') /* [7:0] R */
+#endif
+
+#ifndef DRM_FORMAT_RG88
+#define DRM_FORMAT_RG88          fourcc_code('R', 'G', '8', '8') /* [15:0] R:G 8:8 little endian */
+#endif
+
+#ifndef DRM_FORMAT_GR88
+#define DRM_FORMAT_GR88          fourcc_code('G', 'R', '8', '8') /* [15:0] G:R 8:8 little endian */
+#endif
 
 const __DRIuseInvalidateExtension use_invalidate = {
    .base = { __DRI_USE_INVALIDATE, 1 }
@@ -109,6 +126,18 @@
    0,				/* __DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE */
 };
 
+const __DRIconfig *
+dri2_get_dri_config(struct dri2_egl_config *conf, EGLint surface_type,
+                    EGLenum colorspace)
+{
+   if (colorspace == EGL_GL_COLORSPACE_SRGB_KHR)
+      return surface_type == EGL_WINDOW_BIT ? conf->dri_srgb_double_config :
+                                              conf->dri_srgb_single_config;
+   else
+      return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config :
+                                              conf->dri_single_config;
+}
+
 static EGLBoolean
 dri2_match_config(const _EGLConfig *conf, const _EGLConfig *criteria)
 {
@@ -130,6 +159,7 @@
    struct dri2_egl_display *dri2_dpy;
    _EGLConfig base;
    unsigned int attrib, value, double_buffer;
+   bool srgb = false;
    EGLint key, bind_to_texture_rgb, bind_to_texture_rgba;
    unsigned int dri_masks[4] = { 0, 0, 0, 0 };
    _EGLConfig *matching_config;
@@ -139,7 +169,7 @@
 
    dri2_dpy = disp->DriverData;
    _eglInitConfig(&base, disp, id);
-   
+
    i = 0;
    double_buffer = 0;
    bind_to_texture_rgb = 0;
@@ -155,7 +185,7 @@
 	 else
 	    return NULL;
 	 _eglSetConfigKey(&base, EGL_COLOR_BUFFER_TYPE, value);
-	 break;	 
+	 break;
 
       case __DRI_ATTRIB_CONFIG_CAVEAT:
          if (value & __DRI_ATTRIB_NON_CONFORMANT_CONFIG)
@@ -204,6 +234,10 @@
             return NULL;
          break;
 
+      case __DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE:
+         srgb = value != 0;
+         break;
+
       default:
 	 key = dri2_to_egl_attribute_map[attrib];
 	 if (key != 0)
@@ -249,28 +283,35 @@
    if (num_configs == 1) {
       conf = (struct dri2_egl_config *) matching_config;
 
-      if (double_buffer && !conf->dri_double_config)
+      if (double_buffer && srgb && !conf->dri_srgb_double_config)
+         conf->dri_srgb_double_config = dri_config;
+      else if (double_buffer && !srgb && !conf->dri_double_config)
          conf->dri_double_config = dri_config;
-      else if (!double_buffer && !conf->dri_single_config)
+      else if (!double_buffer && srgb && !conf->dri_srgb_single_config)
+         conf->dri_srgb_single_config = dri_config;
+      else if (!double_buffer && !srgb && !conf->dri_single_config)
          conf->dri_single_config = dri_config;
       else
          /* a similar config type is already added (unlikely) => discard */
          return NULL;
    }
    else if (num_configs == 0) {
-      conf = malloc(sizeof *conf);
+      conf = calloc(1, sizeof *conf);
       if (conf == NULL)
          return NULL;
 
       memcpy(&conf->base, &base, sizeof base);
       if (double_buffer) {
-         conf->dri_double_config = dri_config;
-         conf->dri_single_config = NULL;
+         if (srgb)
+            conf->dri_srgb_double_config = dri_config;
+         else
+            conf->dri_double_config = dri_config;
       } else {
-         conf->dri_single_config = dri_config;
-         conf->dri_double_config = NULL;
+         if (srgb)
+            conf->dri_srgb_single_config = dri_config;
+         else
+            conf->dri_single_config = dri_config;
       }
-      conf->base.SurfaceType = 0;
       conf->base.ConfigID = config_id;
 
       _eglLinkConfig(&conf->base);
@@ -365,7 +406,7 @@
 	 }
       }
    }
-   
+
    for (j = 0; matches[j].name; j++) {
       field = ((char *) dri2_dpy + matches[j].offset);
       if (*(const __DRIextension **) field == NULL) {
@@ -397,7 +438,7 @@
 
    dri2_dpy->driver = NULL;
    end = search_paths + strlen(search_paths);
-   for (p = search_paths; p < end && dri2_dpy->driver == NULL; p = next + 1) {
+   for (p = search_paths; p < end; p = next + 1) {
       int len;
       next = strchr(p, ':');
       if (next == NULL)
@@ -419,6 +460,15 @@
       /* not need continue to loop all paths once the driver is found */
       if (dri2_dpy->driver != NULL)
          break;
+
+#ifdef ANDROID
+      snprintf(path, sizeof path, "%.*s/gallium_dri.so", len, p);
+      dri2_dpy->driver = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
+      if (dri2_dpy->driver == NULL)
+         _eglLog(_EGL_DEBUG, "failed to open %s: %s\n", path, dlerror());
+      else
+         break;
+#endif
    }
 
    if (dri2_dpy->driver == NULL) {
@@ -491,6 +541,19 @@
    return EGL_TRUE;
 }
 
+static unsigned
+dri2_renderer_query_integer(struct dri2_egl_display *dri2_dpy, int param)
+{
+   const __DRI2rendererQueryExtension *rendererQuery = dri2_dpy->rendererQuery;
+   unsigned int value = 0;
+
+   if (!rendererQuery ||
+       rendererQuery->queryInteger(dri2_dpy->dri_screen, param, &value) == -1)
+      return 0;
+
+   return value;
+}
+
 void
 dri2_setup_screen(_EGLDisplay *disp)
 {
@@ -521,6 +584,10 @@
    disp->Extensions.KHR_surfaceless_context = EGL_TRUE;
    disp->Extensions.MESA_configless_context = EGL_TRUE;
 
+   if (dri2_renderer_query_integer(dri2_dpy,
+                                   __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
+      disp->Extensions.KHR_gl_colorspace = EGL_TRUE;
+
    if (dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) {
       disp->Extensions.KHR_create_context = EGL_TRUE;
 
@@ -558,6 +625,9 @@
          disp->Extensions.KHR_gl_texture_2D_image = EGL_TRUE;
          disp->Extensions.KHR_gl_texture_cubemap_image = EGL_TRUE;
       }
+      if (dri2_renderer_query_integer(dri2_dpy,
+                                      __DRI2_RENDERER_HAS_TEXTURE_3D))
+         disp->Extensions.KHR_gl_texture_3D_image = EGL_TRUE;
 #ifdef HAVE_LIBDRM
       if (dri2_dpy->image->base.version >= 8 &&
           dri2_dpy->image->createImageFromDmaBufs) {
@@ -615,7 +685,7 @@
    dri2_dpy->own_dri_screen = 1;
 
    extensions = dri2_dpy->core->getExtensions(dri2_dpy->dri_screen);
-   
+
    if (dri2_dpy->dri2) {
       if (!dri2_bind_extensions(dri2_dpy, dri2_core_extensions, extensions))
          goto cleanup_dri_screen;
@@ -635,6 +705,9 @@
       if (strcmp(extensions[i]->name, __DRI2_FENCE) == 0) {
          dri2_dpy->fence = (__DRI2fenceExtension *) extensions[i];
       }
+      if (strcmp(extensions[i]->name, __DRI2_RENDERER_QUERY) == 0) {
+         dri2_dpy->rendererQuery = (__DRI2rendererQueryExtension *) extensions[i];
+      }
    }
 
    dri2_setup_screen(disp);
@@ -658,6 +731,13 @@
       return EGL_FALSE;
 
    switch (disp->Platform) {
+#ifdef HAVE_SURFACELESS_PLATFORM
+   case _EGL_PLATFORM_SURFACELESS:
+      if (disp->Options.TestOnly)
+         return EGL_TRUE;
+      return dri2_initialize_surfaceless(drv, disp);
+#endif
+
 #ifdef HAVE_X11_PLATFORM
    case _EGL_PLATFORM_X11:
       if (disp->Options.TestOnly)
@@ -1256,7 +1336,8 @@
       format = __DRI_TEXTURE_FORMAT_RGBA;
       break;
    default:
-      assert(0);
+      assert(!"Unexpected texture format in dri2_bind_tex_image()");
+      format = __DRI_TEXTURE_FORMAT_RGBA;
    }
 
    switch (dri2_surf->base.TextureTarget) {
@@ -1264,7 +1345,8 @@
       target = GL_TEXTURE_2D;
       break;
    default:
-      assert(0);
+      target = GL_TEXTURE_2D;
+      assert(!"Unexpected texture target in dri2_bind_tex_image()");
    }
 
    (*dri2_dpy->tex_buffer->setTexBuffer2)(dri2_ctx->dri_context,
@@ -1366,53 +1448,6 @@
    return dri2_create_image_from_dri(disp, dri_image);
 }
 
-#ifdef HAVE_LIBDRM
-static _EGLImage *
-dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx,
-				  EGLClientBuffer buffer, const EGLint *attr_list)
-{
-   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   EGLint format, name, pitch, err;
-   _EGLImageAttribs attrs;
-   __DRIimage *dri_image;
-
-   name = (EGLint) (uintptr_t) buffer;
-
-   err = _eglParseImageAttribList(&attrs, disp, attr_list);
-   if (err != EGL_SUCCESS)
-      return NULL;
-
-   if (attrs.Width <= 0 || attrs.Height <= 0 ||
-       attrs.DRMBufferStrideMESA <= 0) {
-      _eglError(EGL_BAD_PARAMETER,
-		"bad width, height or stride");
-      return NULL;
-   }
-
-   switch (attrs.DRMBufferFormatMESA) {
-   case EGL_DRM_BUFFER_FORMAT_ARGB32_MESA:
-      format = __DRI_IMAGE_FORMAT_ARGB8888;
-      pitch = attrs.DRMBufferStrideMESA;
-      break;
-   default:
-      _eglError(EGL_BAD_PARAMETER,
-		"dri2_create_image_khr: unsupported pixmap depth");
-      return NULL;
-   }
-
-   dri_image =
-      dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen,
-					   attrs.Width,
-					   attrs.Height,
-					   format,
-					   name,
-					   pitch,
-					   NULL);
-
-   return dri2_create_image_from_dri(disp, dri_image);
-}
-#endif
-
 #ifdef HAVE_WAYLAND_PLATFORM
 
 /* This structure describes how a wl_buffer maps to one or more
@@ -1552,9 +1587,15 @@
       gl_target = GL_TEXTURE_2D;
       break;
    case EGL_GL_TEXTURE_3D_KHR:
-      depth = attrs.GLTextureZOffset;
-      gl_target = GL_TEXTURE_3D;
-      break;
+      if (disp->Extensions.KHR_gl_texture_3D_image) {
+         depth = attrs.GLTextureZOffset;
+         gl_target = GL_TEXTURE_3D;
+         break;
+      }
+      else {
+         _eglError(EGL_BAD_PARAMETER, "dri2_create_image_khr");
+         return EGL_NO_IMAGE_KHR;
+      }
    case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X_KHR:
    case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X_KHR:
    case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y_KHR:
@@ -1607,6 +1648,51 @@
 }
 
 #ifdef HAVE_LIBDRM
+static _EGLImage *
+dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx,
+				  EGLClientBuffer buffer, const EGLint *attr_list)
+{
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+   EGLint format, name, pitch, err;
+   _EGLImageAttribs attrs;
+   __DRIimage *dri_image;
+
+   name = (EGLint) (uintptr_t) buffer;
+
+   err = _eglParseImageAttribList(&attrs, disp, attr_list);
+   if (err != EGL_SUCCESS)
+      return NULL;
+
+   if (attrs.Width <= 0 || attrs.Height <= 0 ||
+       attrs.DRMBufferStrideMESA <= 0) {
+      _eglError(EGL_BAD_PARAMETER,
+		"bad width, height or stride");
+      return NULL;
+   }
+
+   switch (attrs.DRMBufferFormatMESA) {
+   case EGL_DRM_BUFFER_FORMAT_ARGB32_MESA:
+      format = __DRI_IMAGE_FORMAT_ARGB8888;
+      pitch = attrs.DRMBufferStrideMESA;
+      break;
+   default:
+      _eglError(EGL_BAD_PARAMETER,
+		"dri2_create_image_khr: unsupported pixmap depth");
+      return NULL;
+   }
+
+   dri_image =
+      dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen,
+					   attrs.Width,
+					   attrs.Height,
+					   format,
+					   name,
+					   pitch,
+					   NULL);
+
+   return dri2_create_image_from_dri(disp, dri_image);
+}
+
 static EGLBoolean
 dri2_check_dma_buf_attribs(const _EGLImageAttribs *attrs)
 {
@@ -1659,6 +1745,9 @@
    unsigned i, plane_n;
 
    switch (attrs->DMABufFourCC.Value) {
+   case DRM_FORMAT_R8:
+   case DRM_FORMAT_RG88:
+   case DRM_FORMAT_GR88:
    case DRM_FORMAT_RGB332:
    case DRM_FORMAT_BGR233:
    case DRM_FORMAT_XRGB4444:
@@ -1836,59 +1925,6 @@
 
    return res;
 }
-#endif
-
-_EGLImage *
-dri2_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
-		      _EGLContext *ctx, EGLenum target,
-		      EGLClientBuffer buffer, const EGLint *attr_list)
-{
-   (void) drv;
-
-   switch (target) {
-   case EGL_GL_TEXTURE_2D_KHR:
-   case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X_KHR:
-   case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X_KHR:
-   case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y_KHR:
-   case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_KHR:
-   case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z_KHR:
-   case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_KHR:
-      return dri2_create_image_khr_texture(disp, ctx, target, buffer, attr_list);
-   case EGL_GL_RENDERBUFFER_KHR:
-      return dri2_create_image_khr_renderbuffer(disp, ctx, buffer, attr_list);
-#ifdef HAVE_LIBDRM
-   case EGL_DRM_BUFFER_MESA:
-      return dri2_create_image_mesa_drm_buffer(disp, ctx, buffer, attr_list);
-#endif
-#ifdef HAVE_WAYLAND_PLATFORM
-   case EGL_WAYLAND_BUFFER_WL:
-      return dri2_create_image_wayland_wl_buffer(disp, ctx, buffer, attr_list);
-#endif
-#ifdef HAVE_LIBDRM
-   case EGL_LINUX_DMA_BUF_EXT:
-      return dri2_create_image_dma_buf(disp, ctx, buffer, attr_list);
-#endif
-   default:
-      _eglError(EGL_BAD_PARAMETER, "dri2_create_image_khr");
-      return EGL_NO_IMAGE_KHR;
-   }
-}
-
-static EGLBoolean
-dri2_destroy_image_khr(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *image)
-{
-   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   struct dri2_egl_image *dri2_img = dri2_egl_image(image);
-
-   (void) drv;
-
-   dri2_dpy->image->destroyImage(dri2_img->dri_image);
-   free(dri2_img);
-
-   return EGL_TRUE;
-}
-
-#ifdef HAVE_LIBDRM
 static _EGLImage *
 dri2_create_drm_image_mesa(_EGLDriver *drv, _EGLDisplay *disp,
 			   const EGLint *attr_list)
@@ -1956,7 +1992,7 @@
    if (attrs.DRMBufferUseMESA & EGL_DRM_BUFFER_USE_CURSOR_MESA)
       dri_use |= __DRI_IMAGE_USE_CURSOR;
 
-   dri2_img->dri_image = 
+   dri2_img->dri_image =
       dri2_dpy->image->createImage(dri2_dpy->dri_screen,
 				   attrs.Width, attrs.Height,
                                    format, dri_use, dri2_img);
@@ -2048,8 +2084,65 @@
 
    return EGL_TRUE;
 }
+
 #endif
 
+_EGLImage *
+dri2_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
+		      _EGLContext *ctx, EGLenum target,
+		      EGLClientBuffer buffer, const EGLint *attr_list)
+{
+   (void) drv;
+
+   switch (target) {
+   case EGL_GL_TEXTURE_2D_KHR:
+   case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X_KHR:
+   case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X_KHR:
+   case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y_KHR:
+   case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_KHR:
+   case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z_KHR:
+   case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_KHR:
+      return dri2_create_image_khr_texture(disp, ctx, target, buffer, attr_list);
+   case EGL_GL_TEXTURE_3D_KHR:
+      if (disp->Extensions.KHR_gl_texture_3D_image) {
+         return dri2_create_image_khr_texture(disp, ctx, target, buffer, attr_list);
+      }
+      else {
+         _eglError(EGL_BAD_PARAMETER, "dri2_create_image_khr");
+         return EGL_NO_IMAGE_KHR;
+      }
+   case EGL_GL_RENDERBUFFER_KHR:
+      return dri2_create_image_khr_renderbuffer(disp, ctx, buffer, attr_list);
+#ifdef HAVE_LIBDRM
+   case EGL_DRM_BUFFER_MESA:
+      return dri2_create_image_mesa_drm_buffer(disp, ctx, buffer, attr_list);
+   case EGL_LINUX_DMA_BUF_EXT:
+      return dri2_create_image_dma_buf(disp, ctx, buffer, attr_list);
+#endif
+#ifdef HAVE_WAYLAND_PLATFORM
+   case EGL_WAYLAND_BUFFER_WL:
+      return dri2_create_image_wayland_wl_buffer(disp, ctx, buffer, attr_list);
+#endif
+   default:
+      _eglError(EGL_BAD_PARAMETER, "dri2_create_image_khr");
+      return EGL_NO_IMAGE_KHR;
+   }
+}
+
+static EGLBoolean
+dri2_destroy_image_khr(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *image)
+{
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+   struct dri2_egl_image *dri2_img = dri2_egl_image(image);
+
+   (void) drv;
+
+   dri2_dpy->image->destroyImage(dri2_img->dri_image);
+   free(dri2_img);
+
+   return EGL_TRUE;
+}
+
 #ifdef HAVE_WAYLAND_PLATFORM
 
 static void
@@ -2216,7 +2309,7 @@
 static _EGLSync *
 dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy,
                  EGLenum type, const EGLint *attrib_list,
-                 const EGLAttribKHR *attrib_list64)
+                 const EGLAttrib *attrib_list64)
 {
    _EGLContext *ctx = _eglGetCurrentContext();
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
@@ -2282,7 +2375,7 @@
 
 static EGLint
 dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
-                      EGLint flags, EGLTimeKHR timeout)
+                      EGLint flags, EGLTime timeout)
 {
    _EGLContext *ctx = _eglGetCurrentContext();
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
@@ -2335,19 +2428,13 @@
 dri2_load(_EGLDriver *drv)
 {
    struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
-#ifdef HAVE_SHARED_GLAPI
 #ifdef HAVE_ANDROID_PLATFORM
    const char *libname = "libglapi.so";
+#elif defined(__APPLE__)
+   const char *libname = "libglapi.0.dylib";
 #else
    const char *libname = "libglapi.so.0";
 #endif
-#else
-   /*
-    * Both libGL.so and libglapi.so are glapi providers.  There is no way to
-    * tell which one to load.
-    */
-   const char *libname = NULL;
-#endif
    void *handle;
 
    /* RTLD_GLOBAL to make sure glapi symbols are visible to DRI drivers */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/egl_dri2.h mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/egl_dri2.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/egl_dri2.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/egl_dri2.h	2015-09-16 14:37:00.000000000 +0000
@@ -120,9 +120,9 @@
    EGLBoolean (*swap_buffers)(_EGLDriver *drv, _EGLDisplay *dpy,
                               _EGLSurface *surf);
 
-   EGLBoolean (*swap_buffers_with_damage)(_EGLDriver *drv, _EGLDisplay *dpy,     
-                                          _EGLSurface *surface,                  
-                                          const EGLint *rects, EGLint n_rects);  
+   EGLBoolean (*swap_buffers_with_damage)(_EGLDriver *drv, _EGLDisplay *dpy,
+                                          _EGLSurface *surface,
+                                          const EGLint *rects, EGLint n_rects);
 
    EGLBoolean (*swap_buffers_region)(_EGLDriver *drv, _EGLDisplay *dpy,
                                      _EGLSurface *surf, EGLint numRects,
@@ -166,6 +166,7 @@
    const __DRIrobustnessExtension *robustness;
    const __DRI2configQueryExtension *config;
    const __DRI2fenceExtension *fence;
+   const __DRI2rendererQueryExtension *rendererQuery;
    int                       fd;
 
    int                       own_device;
@@ -285,6 +286,8 @@
    _EGLConfig         base;
    const __DRIconfig *dri_single_config;
    const __DRIconfig *dri_double_config;
+   const __DRIconfig *dri_srgb_single_config;
+   const __DRIconfig *dri_srgb_double_config;
 };
 
 struct dri2_egl_image
@@ -351,7 +354,14 @@
 EGLBoolean
 dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp);
 
+EGLBoolean
+dri2_initialize_surfaceless(_EGLDriver *drv, _EGLDisplay *disp);
+
 void
 dri2_flush_drawable_for_swapbuffers(_EGLDisplay *disp, _EGLSurface *draw);
 
+const __DRIconfig *
+dri2_get_dri_config(struct dri2_egl_config *conf, EGLint surface_type,
+                    EGLenum colorspace);
+
 #endif /* EGL_DRI2_INCLUDED */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/Makefile.am	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/Makefile.am	1970-01-01 00:00:00.000000000 +0000
@@ -1,68 +0,0 @@
-# Copyright © 2012 Intel Corporation
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-AM_CFLAGS = \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/src/egl/main \
-	-I$(top_srcdir)/src/loader \
-	-I$(top_srcdir)/src/gbm/main \
-	-I$(top_srcdir)/src/gbm/backends/dri \
-	-I$(top_srcdir)/src/egl/wayland/wayland-egl \
-	-I$(top_srcdir)/src/egl/wayland/wayland-drm \
-	-I$(top_builddir)/src/egl/wayland/wayland-drm \
-	$(DEFINES) \
-	$(VISIBILITY_CFLAGS) \
-	$(LIBDRM_CFLAGS) \
-	-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\"
-
-noinst_LTLIBRARIES = libegl_dri2.la
-
-libegl_dri2_la_SOURCES = \
-	egl_dri2.c \
-	egl_dri2.h \
-	egl_dri2_fallbacks.h
-
-libegl_dri2_la_LIBADD = \
-	$(top_builddir)/src/loader/libloader.la \
-	$(EGL_LIB_DEPS)
-
-if HAVE_SHARED_GLAPI
-AM_CFLAGS += -DHAVE_SHARED_GLAPI
-endif
-
-if HAVE_EGL_PLATFORM_X11
-libegl_dri2_la_SOURCES += platform_x11.c
-AM_CFLAGS += -DHAVE_X11_PLATFORM
-AM_CFLAGS += $(XCB_DRI2_CFLAGS)
-endif
-
-if HAVE_EGL_PLATFORM_WAYLAND
-libegl_dri2_la_SOURCES += platform_wayland.c
-AM_CFLAGS += -DHAVE_WAYLAND_PLATFORM
-AM_CFLAGS += $(WAYLAND_CFLAGS)
-endif
-
-if HAVE_EGL_PLATFORM_DRM
-libegl_dri2_la_SOURCES += platform_drm.c
-AM_CFLAGS += -DHAVE_DRM_PLATFORM
-endif
-
-EXTRA_DIST = SConscript
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/platform_android.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/platform_android.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/platform_android.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/platform_android.c	2015-09-16 14:36:08.000000000 +0000
@@ -199,6 +199,7 @@
    struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
    struct dri2_egl_surface *dri2_surf;
    struct ANativeWindow *window = native_window;
+   const __DRIconfig *config;
 
    dri2_surf = calloc(1, sizeof *dri2_surf);
    if (!dri2_surf) {
@@ -230,9 +231,11 @@
       window->query(window, NATIVE_WINDOW_HEIGHT, &dri2_surf->base.Height);
    }
 
+   config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+                                dri2_surf->base.GLColorspace);
+
    dri2_surf->dri_drawable =
-      (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen,
-					   dri2_conf->dri_double_config,
+      (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
                                            dri2_surf);
    if (dri2_surf->dri_drawable == NULL) {
       _eglError(EGL_BAD_ALLOC, "dri2->createNewDrawable");
@@ -707,10 +710,6 @@
    dpy->Extensions.ANDROID_image_native_buffer = EGL_TRUE;
    dpy->Extensions.KHR_image_base = EGL_TRUE;
 
-   /* we're supporting EGL 1.4 */
-   dpy->VersionMajor = 1;
-   dpy->VersionMinor = 4;
-
    /* Fill vtbl last to prevent accidentally calling virtual function during
     * initialization.
     */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/platform_drm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/platform_drm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/platform_drm.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/platform_drm.c	2015-09-16 14:36:08.000000000 +0000
@@ -68,7 +68,7 @@
 {
    struct gbm_dri_surface *surf = (struct gbm_dri_surface *) _surf;
    struct dri2_egl_surface *dri2_surf = surf->dri_private;
-   int i;
+   unsigned i;
 
    for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
       if (dri2_surf->color_buffers[i].bo == bo) {
@@ -82,7 +82,7 @@
 {
    struct gbm_dri_surface *surf = (struct gbm_dri_surface *) _surf;
    struct dri2_egl_surface *dri2_surf = surf->dri_private;
-   int i;
+   unsigned i;
 
    for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++)
       if (!dri2_surf->color_buffers[i].locked)
@@ -115,8 +115,11 @@
 
    switch (type) {
    case EGL_WINDOW_BIT:
-      if (!window)
-         return NULL;
+      if (!window) {
+         _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
+         goto cleanup_surf;
+      }
+
       surf = gbm_dri_surface(window);
       dri2_surf->gbm_surf = surf;
       dri2_surf->base.Width =  surf->base.width;
@@ -128,10 +131,13 @@
    }
 
    if (dri2_dpy->dri2) {
+      const __DRIconfig *config =
+         dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+                             dri2_surf->base.GLColorspace);
+
       dri2_surf->dri_drawable =
-         (*dri2_dpy->dri2->createNewDrawable) (dri2_dpy->dri_screen,
-                                               dri2_conf->dri_double_config,
-                                               dri2_surf->gbm_surf);
+         (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
+                                              dri2_surf->gbm_surf);
 
    } else {
       assert(dri2_dpy->swrast != NULL);
@@ -183,7 +189,7 @@
 {
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
-   int i;
+   unsigned i;
 
    if (!_eglPutSurface(surf))
       return EGL_TRUE;
@@ -212,7 +218,7 @@
    struct dri2_egl_display *dri2_dpy =
       dri2_egl_display(dri2_surf->base.Resource.Display);
    struct gbm_dri_surface *surf = dri2_surf->gbm_surf;
-   int i;
+   unsigned i;
 
    if (dri2_surf->back == NULL) {
       for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
@@ -408,7 +414,7 @@
 {
    struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
    struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw);
-   int i;
+   unsigned i;
 
    if (dri2_dpy->swrast) {
       (*dri2_dpy->core->swapBuffers)(dri2_surf->dri_drawable);
@@ -611,9 +617,9 @@
       char buf[64];
       int n = snprintf(buf, sizeof(buf), DRM_DEV_NAME, DRM_DIR_NAME, 0);
       if (n != -1 && n < sizeof(buf))
-         fd = open(buf, O_RDWR);
+         fd = loader_open_device(buf);
       if (fd < 0)
-         fd = open("/dev/dri/card0", O_RDWR);
+         fd = loader_open_device("/dev/dri/card0");
       dri2_dpy->own_device = 1;
       gbm = gbm_create_device(fd);
       if (gbm == NULL)
@@ -632,7 +638,7 @@
    }
 
    if (fd < 0) {
-      fd = dup(gbm_device_get_fd(gbm));
+      fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
       if (fd < 0) {
          free(dri2_dpy);
          return EGL_FALSE;
@@ -715,10 +721,6 @@
    }
 #endif
 
-   /* we're supporting EGL 1.4 */
-   disp->VersionMajor = 1;
-   disp->VersionMinor = 4;
-
    /* Fill vtbl last to prevent accidentally calling virtual function during
     * initialization.
     */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/platform_surfaceless.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/platform_surfaceless.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/platform_surfaceless.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/platform_surfaceless.c	2015-09-16 14:36:08.000000000 +0000
@@ -0,0 +1,162 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (c) 2014 The Chromium OS Authors.
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <xf86drm.h>
+#include <dlfcn.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "egl_dri2.h"
+#include "egl_dri2_fallbacks.h"
+#include "loader.h"
+
+static struct dri2_egl_display_vtbl dri2_surfaceless_display_vtbl = {
+   .create_pixmap_surface = dri2_fallback_create_pixmap_surface,
+   .create_image = dri2_create_image_khr,
+   .swap_interval = dri2_fallback_swap_interval,
+   .swap_buffers_with_damage = dri2_fallback_swap_buffers_with_damage,
+   .swap_buffers_region = dri2_fallback_swap_buffers_region,
+   .post_sub_buffer = dri2_fallback_post_sub_buffer,
+   .copy_buffers = dri2_fallback_copy_buffers,
+   .query_buffer_age = dri2_fallback_query_buffer_age,
+   .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
+   .get_sync_values = dri2_fallback_get_sync_values,
+};
+
+static void
+surfaceless_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate)
+{
+}
+
+static __DRIbuffer *
+surfaceless_get_buffers_with_format(__DRIdrawable * driDrawable,
+                             int *width, int *height,
+                             unsigned int *attachments, int count,
+                             int *out_count, void *loaderPrivate)
+{
+   struct dri2_egl_surface *dri2_surf = loaderPrivate;
+
+   dri2_surf->buffer_count = 1;
+   if (width)
+      *width = dri2_surf->base.Width;
+   if (height)
+      *height = dri2_surf->base.Height;
+   *out_count = dri2_surf->buffer_count;;
+   return dri2_surf->buffers;
+}
+
+#define DRM_RENDER_DEV_NAME  "%s/renderD%d"
+
+EGLBoolean
+dri2_initialize_surfaceless(_EGLDriver *drv, _EGLDisplay *disp)
+{
+   struct dri2_egl_display *dri2_dpy;
+   const char* err;
+   int i;
+   int driver_loaded = 0;
+
+   loader_set_logger(_eglLog);
+
+   dri2_dpy = calloc(1, sizeof *dri2_dpy);
+   if (!dri2_dpy)
+      return _eglError(EGL_BAD_ALLOC, "eglInitialize");
+
+   disp->DriverData = (void *) dri2_dpy;
+
+   const int limit = 64;
+   const int base = 128;
+   for (i = 0; i < limit; ++i) {
+      char *card_path;
+      if (asprintf(&card_path, DRM_RENDER_DEV_NAME, DRM_DIR_NAME, base + i) < 0)
+         continue;
+
+      dri2_dpy->fd = loader_open_device(card_path);
+
+      free(card_path);
+      if (dri2_dpy->fd < 0)
+         continue;
+
+      dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd, 0);
+      if (dri2_dpy->driver_name) {
+         if (dri2_load_driver(disp)) {
+            driver_loaded = 1;
+            break;
+         }
+         free(dri2_dpy->driver_name);
+      }
+      close(dri2_dpy->fd);
+   }
+
+   if (!driver_loaded) {
+      err = "DRI2: failed to load driver";
+      goto cleanup_display;
+   }
+
+   dri2_dpy->dri2_loader_extension.base.name = __DRI_DRI2_LOADER;
+   dri2_dpy->dri2_loader_extension.base.version = 3;
+   dri2_dpy->dri2_loader_extension.getBuffers = NULL;
+   dri2_dpy->dri2_loader_extension.flushFrontBuffer =
+      surfaceless_flush_front_buffer;
+   dri2_dpy->dri2_loader_extension.getBuffersWithFormat =
+      surfaceless_get_buffers_with_format;
+
+   dri2_dpy->extensions[0] = &dri2_dpy->dri2_loader_extension.base;
+   dri2_dpy->extensions[1] = &image_lookup_extension.base;
+   dri2_dpy->extensions[2] = &use_invalidate.base;
+   dri2_dpy->extensions[3] = NULL;
+
+   if (!dri2_create_screen(disp)) {
+      err = "DRI2: failed to create screen";
+      goto cleanup_driver;
+   }
+
+   for (i = 0; dri2_dpy->driver_configs[i]; i++) {
+      dri2_add_config(disp, dri2_dpy->driver_configs[i],
+                      i + 1, EGL_WINDOW_BIT, NULL, NULL);
+   }
+
+   disp->Extensions.KHR_image_base = EGL_TRUE;
+
+   /* Fill vtbl last to prevent accidentally calling virtual function during
+    * initialization.
+    */
+   dri2_dpy->vtbl = &dri2_surfaceless_display_vtbl;
+
+   return EGL_TRUE;
+
+cleanup_driver:
+   dlclose(dri2_dpy->driver);
+   free(dri2_dpy->driver_name);
+   close(dri2_dpy->fd);
+cleanup_display:
+   free(dri2_dpy);
+
+   return _eglError(EGL_NOT_INITIALIZED, err);
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/platform_wayland.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/platform_wayland.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/platform_wayland.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/platform_wayland.c	2015-09-16 14:36:08.000000000 +0000
@@ -65,7 +65,7 @@
 }
 
 static const struct wl_callback_listener sync_listener = {
-   sync_callback
+   .done = sync_callback
 };
 
 static int
@@ -104,8 +104,8 @@
    dri2_surf->color_buffers[i].locked = 0;
 }
 
-static struct wl_buffer_listener wl_buffer_listener = {
-   wl_buffer_release
+static const struct wl_buffer_listener wl_buffer_listener = {
+   .release = wl_buffer_release
 };
 
 static void
@@ -130,6 +130,7 @@
    struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
    struct wl_egl_window *window = native_window;
    struct dri2_egl_surface *dri2_surf;
+   const __DRIconfig *config;
 
    (void) drv;
 
@@ -138,7 +139,7 @@
       _eglError(EGL_BAD_ALLOC, "dri2_create_surface");
       return NULL;
    }
-   
+
    if (!_eglInitSurface(&dri2_surf->base, disp, EGL_WINDOW_BIT, conf, attrib_list))
       goto cleanup_surf;
 
@@ -149,6 +150,11 @@
    else
       dri2_surf->format = WL_DRM_FORMAT_ARGB8888;
 
+   if (!window) {
+      _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
+      goto cleanup_surf;
+   }
+
    dri2_surf->wl_win = window;
 
    dri2_surf->wl_win->private = dri2_surf;
@@ -157,19 +163,19 @@
    dri2_surf->base.Width =  -1;
    dri2_surf->base.Height = -1;
 
+   config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+                                dri2_surf->base.GLColorspace);
+
    dri2_surf->dri_drawable = 
-      (*dri2_dpy->dri2->createNewDrawable) (dri2_dpy->dri_screen,
-					    dri2_conf->dri_double_config,
-					    dri2_surf);
+      (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
+                                           dri2_surf);
    if (dri2_surf->dri_drawable == NULL) {
       _eglError(EGL_BAD_ALLOC, "dri2->createNewDrawable");
-      goto cleanup_dri_drawable;
+      goto cleanup_surf;
    }
 
    return &dri2_surf->base;
 
- cleanup_dri_drawable:
-   dri2_dpy->core->destroyDrawable(dri2_surf->dri_drawable);
  cleanup_surf:
    free(dri2_surf);
 
@@ -361,7 +367,7 @@
    }
 
    if (dri2_surf->back->dri_image == NULL) {
-      dri2_surf->back->dri_image = 
+      dri2_surf->back->dri_image =
          dri2_dpy->image->createImage(dri2_dpy->dri_screen,
                                       dri2_surf->base.Width,
                                       dri2_surf->base.Height,
@@ -595,7 +601,7 @@
 }
 
 static const struct wl_callback_listener throttle_listener = {
-   wayland_throttle_callback
+   .done = wayland_throttle_callback
 };
 
 static void
@@ -839,22 +845,6 @@
    return NULL;
 }
 
-static char
-is_fd_render_node(int fd)
-{
-   struct stat render;
-
-   if (fstat(fd, &render))
-      return 0;
-
-   if (!S_ISCHR(render.st_mode))
-      return 0;
-
-   if (render.st_rdev & 0x80)
-      return 1;
-   return 0;
-}
-
 static int
 dri2_wl_authenticate(_EGLDisplay *disp, uint32_t id)
 {
@@ -891,23 +881,14 @@
    if (!dri2_dpy->device_name)
       return;
 
-#ifdef O_CLOEXEC
-   dri2_dpy->fd = open(dri2_dpy->device_name, O_RDWR | O_CLOEXEC);
-   if (dri2_dpy->fd == -1 && errno == EINVAL)
-#endif
-   {
-      dri2_dpy->fd = open(dri2_dpy->device_name, O_RDWR);
-      if (dri2_dpy->fd != -1)
-         fcntl(dri2_dpy->fd, F_SETFD, fcntl(dri2_dpy->fd, F_GETFD) |
-            FD_CLOEXEC);
-   }
+   dri2_dpy->fd = loader_open_device(dri2_dpy->device_name);
    if (dri2_dpy->fd == -1) {
       _eglLog(_EGL_WARNING, "wayland-egl: could not open %s (%s)",
 	      dri2_dpy->device_name, strerror(errno));
       return;
    }
 
-   if (is_fd_render_node(dri2_dpy->fd)) {
+   if (drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER) {
       dri2_dpy->authenticated = 1;
    } else {
       drmGetMagic(dri2_dpy->fd, &magic);
@@ -950,10 +931,10 @@
 }
 
 static const struct wl_drm_listener drm_listener = {
-	drm_handle_device,
-	drm_handle_format,
-	drm_handle_authenticated,
-	drm_handle_capabilities
+   .device = drm_handle_device,
+   .format = drm_handle_format,
+   .authenticated = drm_handle_authenticated,
+   .capabilities = drm_handle_capabilities
 };
 
 static void
@@ -978,8 +959,8 @@
 }
 
 static const struct wl_registry_listener registry_listener_drm = {
-   registry_handle_global_drm,
-   registry_handle_global_remove
+   .global = registry_handle_global_drm,
+   .global_remove = registry_handle_global_remove
 };
 
 static EGLBoolean
@@ -1117,7 +1098,7 @@
     * will return a render-node when the requested gpu is different
     * to the server, but also if the client asks for the same gpu than
     * the server by requesting its pci-id */
-   dri2_dpy->is_render_node = is_fd_render_node(dri2_dpy->fd);
+   dri2_dpy->is_render_node = drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER;
 
    dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd, 0);
    if (dri2_dpy->driver_name == NULL) {
@@ -1206,10 +1187,6 @@
 
    disp->Extensions.EXT_swap_buffers_with_damage = EGL_TRUE;
 
-   /* we're supporting EGL 1.4 */
-   disp->VersionMajor = 1;
-   disp->VersionMinor = 4;
-
    /* Fill vtbl last to prevent accidentally calling virtual function during
     * initialization.
     */
@@ -1233,7 +1210,7 @@
    wl_event_queue_destroy(dri2_dpy->wl_queue);
  cleanup_dpy:
    free(dri2_dpy);
-   
+
    return EGL_FALSE;
 }
 
@@ -1250,6 +1227,8 @@
  * Taken from weston shared/os-compatibility.c
  */
 
+#ifndef HAVE_MKOSTEMP
+
 static int
 set_cloexec_or_close(int fd)
 {
@@ -1272,6 +1251,8 @@
    return -1;
 }
 
+#endif
+
 /*
  * Taken from weston shared/os-compatibility.c
  */
@@ -1739,7 +1720,7 @@
 }
 
 static const struct wl_shm_listener shm_listener = {
-   shm_handle_format
+   .format = shm_handle_format
 };
 
 static void
@@ -1756,8 +1737,8 @@
 }
 
 static const struct wl_registry_listener registry_listener_swrast = {
-   registry_handle_global_swrast,
-   registry_handle_global_remove
+   .global = registry_handle_global_swrast,
+   .global_remove = registry_handle_global_remove
 };
 
 static struct dri2_egl_display_vtbl dri2_wl_swrast_display_vtbl = {
@@ -1853,10 +1834,6 @@
         dri2_add_config(disp, config, i + 1, types, NULL, rgb565_masks);
    }
 
-   /* we're supporting EGL 1.4 */
-   disp->VersionMajor = 1;
-   disp->VersionMinor = 4;
-
    /* Fill vtbl last to prevent accidentally calling virtual function during
     * initialization.
     */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/platform_x11.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/platform_x11.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/platform_x11.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/platform_x11.c	2015-09-16 14:36:08.000000000 +0000
@@ -43,6 +43,7 @@
 
 #include "egl_dri2.h"
 #include "egl_dri2_fallbacks.h"
+#include "loader.h"
 
 static EGLBoolean
 dri2_x11_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
@@ -55,7 +56,7 @@
    uint32_t           mask;
    const uint32_t     function = GXcopy;
    uint32_t           valgc[2];
-   
+
    /* create GC's */
    dri2_surf->gc = xcb_generate_id(dri2_dpy->conn);
    mask = XCB_GC_FUNCTION;
@@ -225,7 +226,7 @@
       s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
       screen = get_xcb_screen(s, dri2_dpy->screen);
       if (!screen) {
-         _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
+         _eglError(EGL_BAD_ALLOC, "failed to get xcb screen");
          goto cleanup_surf;
       }
 
@@ -234,16 +235,23 @@
                        dri2_surf->drawable, screen->root,
 			dri2_surf->base.Width, dri2_surf->base.Height);
    } else {
+      if (!drawable) {
+         if (type == EGL_WINDOW_BIT)
+            _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
+         else
+            _eglError(EGL_BAD_NATIVE_PIXMAP, "dri2_create_surface");
+         goto cleanup_surf;
+      }
       dri2_surf->drawable = drawable;
    }
 
    if (dri2_dpy->dri2) {
-      dri2_surf->dri_drawable = 
-	 (*dri2_dpy->dri2->createNewDrawable) (dri2_dpy->dri_screen,
-					       type == EGL_WINDOW_BIT ?
-					       dri2_conf->dri_double_config : 
-					       dri2_conf->dri_single_config,
-					       dri2_surf);
+      const __DRIconfig *config =
+         dri2_get_dri_config(dri2_conf, type, dri2_surf->base.GLColorspace);
+
+      dri2_surf->dri_drawable =
+	 (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
+					      dri2_surf);
    } else {
       assert(dri2_dpy->swrast);
       dri2_surf->dri_drawable = 
@@ -260,10 +268,18 @@
    if (type != EGL_PBUFFER_BIT) {
       cookie = xcb_get_geometry (dri2_dpy->conn, dri2_surf->drawable);
       reply = xcb_get_geometry_reply (dri2_dpy->conn, cookie, &error);
-      if (reply == NULL || error != NULL) {
-	 _eglError(EGL_BAD_ALLOC, "xcb_get_geometry");
-	 free(error);
-	 goto cleanup_dri_drawable;
+      if (error != NULL) {
+         if (error->error_code == BadAlloc)
+            _eglError(EGL_BAD_ALLOC, "xcb_get_geometry");
+         else if (type == EGL_WINDOW_BIT)
+            _eglError(EGL_BAD_NATIVE_WINDOW, "xcb_get_geometry");
+         else
+            _eglError(EGL_BAD_NATIVE_PIXMAP, "xcb_get_geometry");
+         free(error);
+         goto cleanup_dri_drawable;
+      } else if (reply == NULL) {
+         _eglError(EGL_BAD_ALLOC, "xcb_get_geometry");
+         goto cleanup_dri_drawable;
       }
 
       dri2_surf->base.Width = reply->width;
@@ -532,7 +548,7 @@
    xcb_generic_error_t *error;
    xcb_screen_iterator_t s;
    xcb_screen_t *screen;
-   char *driver_name, *device_name;
+   char *driver_name, *loader_driver_name, *device_name;
    const xcb_query_extension_reply_t *extension;
 
    xcb_prefetch_extension_data (dri2_dpy->conn, &xcb_xfixes_id);
@@ -557,7 +573,7 @@
    s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
    screen = get_xcb_screen(s, dri2_dpy->screen);
    if (!screen) {
-      _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_x11_connect");
+      _eglLog(_EGL_WARNING, "DRI2: failed to get xcb screen");
       return EGL_FALSE;
    }
    connect_cookie = xcb_dri2_connect_unchecked(dri2_dpy->conn, screen->root,
@@ -592,18 +608,38 @@
       return EGL_FALSE;
    }
 
-   driver_name = xcb_dri2_connect_driver_name (connect);
-   dri2_dpy->driver_name =
-      strndup(driver_name,
-              xcb_dri2_connect_driver_name_length(connect));
-
    device_name = xcb_dri2_connect_device_name (connect);
 
    dri2_dpy->device_name =
       strndup(device_name,
               xcb_dri2_connect_device_name_length(connect));
 
+   dri2_dpy->fd = loader_open_device(dri2_dpy->device_name);
+   if (dri2_dpy->fd == -1) {
+      _eglLog(_EGL_WARNING,
+              "DRI2: could not open %s (%s)", dri2_dpy->device_name,
+              strerror(errno));
+      free(dri2_dpy->device_name);
+      free(connect);
+      return EGL_FALSE;
+   }
+
+   driver_name = xcb_dri2_connect_driver_name (connect);
+
+   /* If Mesa knows about the appropriate driver for this fd, then trust it.
+    * Otherwise, default to the server's value.
+    */
+   loader_driver_name = loader_get_driver_for_fd(dri2_dpy->fd, 0);
+   if (loader_driver_name) {
+      dri2_dpy->driver_name = loader_driver_name;
+   } else {
+      dri2_dpy->driver_name =
+         strndup(driver_name,
+                 xcb_dri2_connect_driver_name_length(connect));
+   }
+
    if (dri2_dpy->device_name == NULL || dri2_dpy->driver_name == NULL) {
+      close(dri2_dpy->fd);
       free(dri2_dpy->device_name);
       free(dri2_dpy->driver_name);
       free(connect);
@@ -628,7 +664,7 @@
 
    screen = get_xcb_screen(s, dri2_dpy->screen);
    if (!screen) {
-      _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_x11_authenticate");
+      _eglLog(_EGL_WARNING, "DRI2: failed to get xcb screen");
       return -1;
    }
 
@@ -1116,7 +1152,7 @@
       dri2_dpy->screen = DefaultScreen(dpy);
    }
 
-   if (xcb_connection_has_error(dri2_dpy->conn)) {
+   if (!dri2_dpy->conn || xcb_connection_has_error(dri2_dpy->conn)) {
       _eglLog(_EGL_WARNING, "DRI2: xcb_connect failed");
       goto cleanup_dpy;
    }
@@ -1142,14 +1178,8 @@
    if (!dri2_create_screen(disp))
       goto cleanup_driver;
 
-   if (dri2_dpy->conn) {
-      if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
-         goto cleanup_configs;
-   }
-
-   /* we're supporting EGL 1.4 */
-   disp->VersionMajor = 1;
-   disp->VersionMinor = 4;
+   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
+      goto cleanup_configs;
 
    /* Fill vtbl last to prevent accidentally calling virtual function during
     * initialization.
@@ -1239,40 +1269,19 @@
       dri2_dpy->screen = DefaultScreen(dpy);
    }
 
-   if (xcb_connection_has_error(dri2_dpy->conn)) {
+   if (!dri2_dpy->conn || xcb_connection_has_error(dri2_dpy->conn)) {
       _eglLog(_EGL_WARNING, "DRI2: xcb_connect failed");
       goto cleanup_dpy;
    }
 
-   if (dri2_dpy->conn) {
-      if (!dri2_x11_connect(dri2_dpy))
-	 goto cleanup_conn;
-   }
-
-   if (!dri2_load_driver(disp))
+   if (!dri2_x11_connect(dri2_dpy))
       goto cleanup_conn;
 
-#ifdef O_CLOEXEC
-   dri2_dpy->fd = open(dri2_dpy->device_name, O_RDWR | O_CLOEXEC);
-   if (dri2_dpy->fd == -1 && errno == EINVAL)
-#endif
-   {
-      dri2_dpy->fd = open(dri2_dpy->device_name, O_RDWR);
-      if (dri2_dpy->fd != -1)
-         fcntl(dri2_dpy->fd, F_SETFD, fcntl(dri2_dpy->fd, F_GETFD) |
-            FD_CLOEXEC);
-   }
-   if (dri2_dpy->fd == -1) {
-      _eglLog(_EGL_WARNING,
-	      "DRI2: could not open %s (%s)", dri2_dpy->device_name,
-              strerror(errno));
-      goto cleanup_driver;
-   }
+   if (!dri2_x11_local_authenticate(disp))
+      goto cleanup_fd;
 
-   if (dri2_dpy->conn) {
-      if (!dri2_x11_local_authenticate(disp))
-	 goto cleanup_fd;
-   }
+   if (!dri2_load_driver(disp))
+      goto cleanup_fd;
 
    if (dri2_dpy->dri2_minor >= 1) {
       dri2_dpy->dri2_loader_extension.base.name = __DRI_DRI2_LOADER;
@@ -1297,7 +1306,7 @@
    dri2_dpy->invalidate_available = (dri2_dpy->dri2_minor >= 3);
 
    if (!dri2_create_screen(disp))
-      goto cleanup_fd;
+      goto cleanup_driver;
 
    dri2_x11_setup_swap_interval(dri2_dpy);
 
@@ -1311,14 +1320,8 @@
    disp->Extensions.WL_bind_wayland_display = EGL_TRUE;
 #endif
 
-   if (dri2_dpy->conn) {
-      if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
-	 goto cleanup_configs;
-   }
-
-   /* we're supporting EGL 1.4 */
-   disp->VersionMajor = 1;
-   disp->VersionMinor = 4;
+   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
+      goto cleanup_configs;
 
    /* Fill vtbl last to prevent accidentally calling virtual function during
     * initialization.
@@ -1330,10 +1333,10 @@
  cleanup_configs:
    _eglCleanupDisplay(disp);
    dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
- cleanup_fd:
-   close(dri2_dpy->fd);
  cleanup_driver:
    dlclose(dri2_dpy->driver);
+ cleanup_fd:
+   close(dri2_dpy->fd);
  cleanup_conn:
    if (disp->PlatformDisplay == NULL)
       xcb_disconnect(dri2_dpy->conn);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/dri2/SConscript	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/dri2/SConscript	1970-01-01 00:00:00.000000000 +0000
@@ -1,40 +0,0 @@
-Import('*')
-
-env = env.Clone()
-
-env.Append(CPPDEFINES = [
-	'DEFAULT_DRIVER_DIR=\\"\\"'
-])
-
-env.Append(CPPPATH = [
-	'#/include',
-	'#/src/egl/main',
-	'#/src/loader',
-])
-
-sources = [
-	'egl_dri2.c',
-]
-
-if env['x11']:
-	sources.append('platform_x11.c')
-	env.Append(CPPDEFINES = [
-		'HAVE_X11_PLATFORM',
-	])
-	#env.Append(CPPPATH = [
-	#	'XCB_DRI2_CFLAGS',
-	#])
-
-if env['drm']:
-	env.PkgUseModules('DRM')
-
-env.Prepend(LIBS = [
-	libloader,
-])
-
-egl_dri2 = env.ConvenienceLibrary(
-	target = 'egl_dri2',
-	source = sources,
-)
-
-Export('egl_dri2')
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/haiku/egl_haiku.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/haiku/egl_haiku.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/haiku/egl_haiku.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/haiku/egl_haiku.cpp	2015-09-16 14:36:08.000000000 +0000
@@ -27,7 +27,6 @@
 #include <stdint.h>
 #include <stdio.h>
 
-#include "loader.h"
 #include "eglconfig.h"
 #include "eglcontext.h"
 #include "egldisplay.h"
@@ -42,7 +41,14 @@
 #include <OpenGLKit.h>
 
 
-#define CALLOC_STRUCT(T)   (struct T *) calloc(1, sizeof(struct T))
+#ifdef DEBUG
+#	define TRACE(x...) printf("egl_haiku: " x)
+#	define CALLED() TRACE("CALLED: %s\n", __PRETTY_FUNCTION__)
+#else
+#	define TRACE(x...)
+#	define CALLED()
+#endif
+#define ERROR(x...) printf("egl_haiku: " x)
 
 
 _EGL_DRIVER_STANDARD_TYPECASTS(haiku_egl)
@@ -51,10 +57,6 @@
 struct haiku_egl_driver
 {
 	_EGLDriver base;
-
-	void *handle;
-	_EGLProc (*get_proc_address)(const char *procname);
-	void (*glFlush)(void);
 };
 
 struct haiku_egl_config
@@ -74,81 +76,6 @@
 };
 
 
-/*
-static void
-swrastCreateDrawable(struct dri2_egl_display * dri2_dpy,
-	struct dri2_egl_surface * dri2_surf, int depth)
-{
-
-}
-
-
-static void
-swrastDestroyDrawable(struct dri2_egl_display * dri2_dpy,
-	struct dri2_egl_surface * dri2_surf)
-{
-
-}
-
-
-static void
-swrastGetDrawableInfo(__DRIdrawable * draw, int *x, int *y,
-	int *w, int *h, void *loaderPrivate)
-{
-
-}
-
-
-static void
-swrastPutImage(__DRIdrawable * draw, int op, int x, int y,
-	int w, int h, char *data, void *loaderPrivate)
-{
-
-}
-
-
-static void
-swrastGetImage(__DRIdrawable * read, int x, int y,
-	int w, int h, char *data, void *loaderPrivate)
-{
-
-}
-*/
-
-
-static void
-haiku_log(EGLint level, const char *msg)
-{
-	switch (level) {
-		case _EGL_DEBUG:
-			fprintf(stderr,"%s", msg);
-			break;
-		case _EGL_INFO:
-			fprintf(stderr,"%s", msg);
-			break;
-		case _EGL_WARNING:
-			fprintf(stderr,"%s", msg);
-			break;
-		case _EGL_FATAL:
-			fprintf(stderr,"%s", msg);
-			break;
-		default:
-			break;
-	}
-}
-
-
-/**
- * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface().
- */
-static _EGLSurface *
-haiku_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
-	_EGLConfig *conf, void *native_surface, const EGLint *attrib_list)
-{
-	return NULL;
-}
-
-
 /**
  * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface().
  */
@@ -156,23 +83,34 @@
 haiku_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
 	_EGLConfig *conf, void *native_window, const EGLint *attrib_list)
 {
+	CALLED();
+
 	struct haiku_egl_surface* surface;
-	surface = (struct haiku_egl_surface*)calloc(1,sizeof (*surface));
+	surface = (struct haiku_egl_surface*) calloc(1, sizeof (*surface));
+	if (!surface) {
+		_eglError(EGL_BAD_ALLOC, "haiku_create_window_surface");
+		return NULL;
+	}
+
+	if (!_eglInitSurface(&surface->surf, disp, EGL_WINDOW_BIT,
+		conf, attrib_list)) {
+		free(surface);
+		return NULL;
+	}
 
-	_eglInitSurface(&surface->surf, disp, EGL_WINDOW_BIT, conf, attrib_list);
 	(&surface->surf)->SwapInterval = 1;
 
-	_eglLog(_EGL_DEBUG, "Creating window");
+	TRACE("Creating window\n");
 	BWindow* win = (BWindow*)native_window;
 
-	_eglLog(_EGL_DEBUG, "Creating GL view");
+	TRACE("Creating GL view\n");
 	surface->gl = new BGLView(win->Bounds(), "OpenGL", B_FOLLOW_ALL_SIDES, 0,
 		BGL_RGB | BGL_DOUBLE | BGL_ALPHA);
 
-	_eglLog(_EGL_DEBUG, "Adding GL");
+	TRACE("Adding GL\n");
 	win->AddChild(surface->gl);
 
-	_eglLog(_EGL_DEBUG, "Showing window");
+	TRACE("Showing window\n");
 	win->Show();
 	return &surface->surf;
 }
@@ -197,6 +135,10 @@
 static EGLBoolean
 haiku_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
 {
+	if (_eglPutSurface(surf)) {
+		// XXX: detach haiku_egl_surface::gl from the native window and destroy it
+		free(surf);
+	}
 	return EGL_TRUE;
 }
 
@@ -204,20 +146,25 @@
 static EGLBoolean
 haiku_add_configs_for_visuals(_EGLDisplay *dpy)
 {
-	printf("Adding configs\n");
+	CALLED();
 
 	struct haiku_egl_config* conf;
-	conf = CALLOC_STRUCT(haiku_egl_config);
+	conf = (struct haiku_egl_config*) calloc(1, sizeof (*conf));
+	if (!conf) {
+		_eglError(EGL_BAD_ALLOC, "haiku_add_configs_for_visuals");
+		return EGL_FALSE;
+	}
 
 	_eglInitConfig(&conf->base, dpy, 1);
-	_eglLog(_EGL_DEBUG,"Config inited\n");
+	TRACE("Config inited\n");
+
 	_eglSetConfigKey(&conf->base, EGL_RED_SIZE, 8);
 	_eglSetConfigKey(&conf->base, EGL_BLUE_SIZE, 8);
 	_eglSetConfigKey(&conf->base, EGL_GREEN_SIZE, 8);
 	_eglSetConfigKey(&conf->base, EGL_LUMINANCE_SIZE, 0);
 	_eglSetConfigKey(&conf->base, EGL_ALPHA_SIZE, 8);
 	_eglSetConfigKey(&conf->base, EGL_COLOR_BUFFER_TYPE, EGL_RGB_BUFFER);
-	EGLint r = (_eglGetConfigKey(&conf->base, EGL_RED_SIZE) 
+	EGLint r = (_eglGetConfigKey(&conf->base, EGL_RED_SIZE)
 		+ _eglGetConfigKey(&conf->base, EGL_GREEN_SIZE)
 		+ _eglGetConfigKey(&conf->base, EGL_BLUE_SIZE)
 		+ _eglGetConfigKey(&conf->base, EGL_ALPHA_SIZE));
@@ -241,76 +188,41 @@
 	_eglSetConfigKey(&conf->base, EGL_MAX_PBUFFER_PIXELS, 0); // TODO: How to get the right value ?
 	_eglSetConfigKey(&conf->base, EGL_SURFACE_TYPE, EGL_WINDOW_BIT /*| EGL_PIXMAP_BIT | EGL_PBUFFER_BIT*/);
 
-	printf("Config configuated\n");
+	TRACE("Config configuated\n");
 	if (!_eglValidateConfig(&conf->base, EGL_FALSE)) {
-		_eglLog(_EGL_DEBUG, "Haiku failed to validate config");
-		return EGL_FALSE;
+		_eglLog(_EGL_DEBUG, "Haiku: failed to validate config");
+		goto cleanup;
 	}
-	printf("Validated config\n");
-   
+	TRACE("Validated config\n");
+
 	_eglLinkConfig(&conf->base);
 	if (!_eglGetArraySize(dpy->Configs)) {
 		_eglLog(_EGL_WARNING, "Haiku: failed to create any config");
-		return EGL_FALSE;
+		goto cleanup;
 	}
-	printf("Config successful!\n");
-   
+	TRACE("Config successfull\n");
+
 	return EGL_TRUE;
+
+cleanup:
+	free(conf);
+	return EGL_FALSE;
 }
 
+
 extern "C"
 EGLBoolean
 init_haiku(_EGLDriver *drv, _EGLDisplay *dpy)
 {
-	_eglLog(_EGL_DEBUG,"\nInitializing Haiku EGL\n");
-	//_EGLDisplay* egl_dpy;
+	CALLED();
 
-	printf("Initializing Haiku EGL\n");
-	_eglSetLogProc(haiku_log);
-
-	loader_set_logger(_eglLog);
+	TRACE("Add configs\n");
+	if (!haiku_add_configs_for_visuals(dpy))
+		return EGL_FALSE;
 
-	/*egl_dpy = (_EGLDisplay*) calloc(1, sizeof(_EGLDisplay));
-	if (!egl_dpy)
-		return _eglError(EGL_BAD_ALLOC, "eglInitialize");
-
-	dpy->DriverData=(void*) egl_dpy;
-	if (!dpy->PlatformDisplay) {
-		// OPEN DEVICE 
-		//dri2_dpy->bwindow = (void*)haiku_create_window();
-		//dri2_dpy->own_device = true;
-	} else {
-		//dri2_dpy->bwindow = (BWindow*)dpy->PlatformDisplay;
-	}*/
-	
-	//dri2_dpy->driver_name = strdup("swrast");
-	//if (!dri2_load_driver_swrast(dpy))
-	//   goto cleanup_conn;
-
-	/*dri2_dpy->swrast_loader_extension.base.name = __DRI_SWRAST_LOADER;
-	dri2_dpy->swrast_loader_extension.base.version = __DRI_SWRAST_LOADER_VERSION;
-	dri2_dpy->swrast_loader_extension.getDrawableInfo = swrastGetDrawableInfo;
-	dri2_dpy->swrast_loader_extension.putImage = swrastPutImage;
-	dri2_dpy->swrast_loader_extension.getImage = swrastGetImage;
-
-	dri2_dpy->extensions[0] = &dri2_dpy->swrast_loader_extension.base;
-	dri2_dpy->extensions[1] = NULL;
-	dri2_dpy->extensions[2] = NULL;*/
-
-	/*if (dri2_dpy->bwindow) {
-		if (!dri2_haiku_add_configs_for_visuals(dri2_dpy, dpy))
-			goto cleanup_configs;
-	}*/
-	_eglLog(_EGL_DEBUG,"Add configs");
-    haiku_add_configs_for_visuals(dpy);
-
-	dpy->VersionMajor=1;
-	dpy->VersionMinor=4;
-   
-   //dpy->Extensions.KHR_create_context = true;
+	dpy->Version = 14;
 
-	//dri2_dpy->vtbl = &dri2_haiku_display_vtbl;
-	_eglLog(_EGL_DEBUG, "Initialization finished");
+	TRACE("Initialization finished\n");
 
 	return EGL_TRUE;
 }
@@ -329,13 +241,24 @@
 haiku_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
 	_EGLContext *share_list, const EGLint *attrib_list)
 {
-	_eglLog(_EGL_DEBUG,"Creating context");
+	CALLED();
+
 	struct haiku_egl_context* context;
-	context=(struct haiku_egl_context*)calloc(1,sizeof (*context));
-	if(!_eglInitContext(&context->ctx, disp, conf, attrib_list))
-		printf("ERROR creating context");
-	_eglLog(_EGL_DEBUG, "Context created");
+	context = (struct haiku_egl_context*) calloc(1, sizeof (*context));
+	if (!context) {
+		_eglError(EGL_BAD_ALLOC, "haiku_create_context");
+		return NULL;
+	}
+
+	if (!_eglInitContext(&context->ctx, disp, conf, attrib_list))
+		goto cleanup;
+
+	TRACE("Context created\n");
 	return &context->ctx;
+
+cleanup:
+	free(context);
+	return NULL;
 }
 
 
@@ -343,7 +266,13 @@
 EGLBoolean
 haiku_destroy_context(_EGLDriver* drv, _EGLDisplay *disp, _EGLContext* ctx)
 {
-	ctx=NULL;
+	struct haiku_egl_context* context = haiku_egl_context(ctx);
+
+	if (_eglPutContext(ctx)) {
+		// XXX: teardown the context ?
+		free(context);
+		ctx = NULL;
+	}
 	return EGL_TRUE;
 }
 
@@ -351,13 +280,18 @@
 extern "C"
 EGLBoolean
 haiku_make_current(_EGLDriver* drv, _EGLDisplay* dpy, _EGLSurface *dsurf,
-		  _EGLSurface *rsurf, _EGLContext *ctx)
+	_EGLSurface *rsurf, _EGLContext *ctx)
 {
-	struct haiku_egl_context* cont=haiku_egl_context(ctx);
-	struct haiku_egl_surface* surf=haiku_egl_surface(dsurf);
+	CALLED();
+
+	struct haiku_egl_context* cont = haiku_egl_context(ctx);
+	struct haiku_egl_surface* surf = haiku_egl_surface(dsurf);
 	_EGLContext *old_ctx;
-    _EGLSurface *old_dsurf, *old_rsurf;
-	_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf);
+	_EGLSurface *old_dsurf, *old_rsurf;
+
+	if (!_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf))
+		return EGL_FALSE;
+
 	//cont->ctx.DrawSurface=&surf->surf;
 	surf->gl->LockGL();
 	return EGL_TRUE;
@@ -368,7 +302,8 @@
 EGLBoolean
 haiku_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf)
 {
-	struct haiku_egl_surface* surface=haiku_egl_surface(surf);
+	struct haiku_egl_surface* surface = haiku_egl_surface(surf);
+
 	surface->gl->SwapBuffers();
 	//gl->Render();
 	return EGL_TRUE;
@@ -379,7 +314,7 @@
 void
 haiku_unload(_EGLDriver* drv)
 {
-	
+
 }
 
 
@@ -391,9 +326,15 @@
 _EGLDriver*
 _eglBuiltInDriverHaiku(const char *args)
 {
-	_eglLog(_EGL_DEBUG,"Driver loaded");
+	CALLED();
+
 	struct haiku_egl_driver* driver;
-	driver=(struct haiku_egl_driver*)calloc(1,sizeof(*driver));
+	driver = (struct haiku_egl_driver*) calloc(1, sizeof(*driver));
+	if (!driver) {
+		_eglError(EGL_BAD_ALLOC, "_eglBuiltInDriverHaiku");
+		return NULL;
+	}
+
 	_eglInitDriverFallbacks(&driver->base);
 	driver->base.API.Initialize = init_haiku;
 	driver->base.API.Terminate = haiku_terminate;
@@ -404,32 +345,13 @@
 	driver->base.API.CreatePixmapSurface = haiku_create_pixmap_surface;
 	driver->base.API.CreatePbufferSurface = haiku_create_pbuffer_surface;
 	driver->base.API.DestroySurface = haiku_destroy_surface;
-	/*
-	driver->API.GetProcAddress = dri2_get_proc_address;
-	driver->API.WaitClient = dri2_wait_client;
-	driver->API.WaitNative = dri2_wait_native;
-	driver->API.BindTexImage = dri2_bind_tex_image;
-	driver->API.ReleaseTexImage = dri2_release_tex_image;
-	driver->API.SwapInterval = dri2_swap_interval;
-	*/
 
 	driver->base.API.SwapBuffers = haiku_swap_buffers;
-	/*
-	driver->API.SwapBuffersWithDamageEXT = dri2_swap_buffers_with_damage;
-	driver->API.SwapBuffersRegionNOK = dri2_swap_buffers_region;
-	driver->API.PostSubBufferNV = dri2_post_sub_buffer;
-	driver->API.CopyBuffers = dri2_copy_buffers,
-	driver->API.QueryBufferAge = dri2_query_buffer_age;
-	driver->API.CreateImageKHR = dri2_create_image;
-	driver->API.DestroyImageKHR = dri2_destroy_image_khr;
-	driver->API.CreateWaylandBufferFromImageWL = dri2_create_wayland_buffer_from_image;
-	driver->API.GetSyncValuesCHROMIUM = dri2_get_sync_values_chromium;
-	*/
 
 	driver->base.Name = "Haiku";
 	driver->base.Unload = haiku_unload;
 
-	_eglLog(_EGL_DEBUG, "API Calls defined");
-	
+	TRACE("API Calls defined\n");
+
 	return &driver->base;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/haiku/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/haiku/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/drivers/haiku/SConscript	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/drivers/haiku/SConscript	1970-01-01 00:00:00.000000000 +0000
@@ -1,34 +0,0 @@
-Import('*')
-
-env = env.Clone()
-
-env.Append(CPPDEFINES = [
-	'DEFAULT_DRIVER_DIR=\\"\\"',
-])
-
-env.Append(CPPPATH = [
-	'#/include',
-	'#/src/egl/main',
-	'#/src/loader',
-])
-
-sources = [
-	'egl_haiku.cpp'
-]
-
-if env['platform'] == 'haiku':
-	env.Append(CPPDEFINES = [
-		'HAVE_HAIKU_PLATFORM',
-		'_EGL_NATIVE_PLATFORM=haiku',
-	])
-
-env.Prepend(LIBS = [
-	libloader,
-])
-
-egl_haiku = env.ConvenienceLibrary(
-	target = 'egl_haiku',
-	source = sources,
-)
-
-Export('egl_haiku')
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/Android.mk	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/Android.mk	1970-01-01 00:00:00.000000000 +0000
@@ -1,164 +0,0 @@
-# Mesa 3-D graphics library
-#
-# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
-# Copyright (C) 2010-2011 LunarG Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-# Android.mk for libGLES_mesa
-
-LOCAL_PATH := $(call my-dir)
-
-include $(LOCAL_PATH)/Makefile.sources
-
-SOURCES := \
-	${LIBEGL_C_FILES}
-
-# ---------------------------------------
-# Build libGLES_mesa
-# ---------------------------------------
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := $(SOURCES)
-
-LOCAL_CFLAGS := \
-	-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_ANDROID \
-	-D_EGL_DRIVER_SEARCH_DIR=\"/system/lib/egl\" \
-	-D_EGL_OS_UNIX=1
-
-LOCAL_STATIC_LIBRARIES :=
-
-LOCAL_SHARED_LIBRARIES := \
-	libglapi \
-	libdl \
-	libhardware \
-	liblog \
-	libcutils \
-	libgralloc_drm \
-
-ifeq ($(shell echo "$(MESA_ANDROID_VERSION) >= 4.2" | bc),1)
-LOCAL_SHARED_LIBRARIES += libsync
-endif
-
-# add libdrm if there are hardware drivers
-ifneq ($(MESA_GPU_DRIVERS),swrast)
-LOCAL_SHARED_LIBRARIES += libdrm
-endif
-
-ifeq ($(strip $(MESA_BUILD_CLASSIC)),true)
-LOCAL_CFLAGS += -D_EGL_BUILT_IN_DRIVER_DRI2
-LOCAL_STATIC_LIBRARIES += libmesa_egl_dri2
-
-# require i915_dri and/or i965_dri
-LOCAL_REQUIRED_MODULES += \
-	$(addsuffix _dri, $(filter i915 i965, $(MESA_GPU_DRIVERS)))
-endif # MESA_BUILD_CLASSIC
-
-ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
-
-gallium_DRIVERS :=
-
-# swrast
-gallium_DRIVERS += libmesa_pipe_softpipe libmesa_winsys_sw_android
-
-# freedreno
-ifneq ($(filter freedreno, $(MESA_GPU_DRIVERS)),)
-gallium_DRIVERS += libmesa_winsys_freedreno libmesa_pipe_freedreno
-LOCAL_SHARED_LIBRARIES += libdrm_freedreno
-endif
-
-# i915g
-ifneq ($(filter i915g, $(MESA_GPU_DRIVERS)),)
-gallium_DRIVERS += libmesa_winsys_i915 libmesa_pipe_i915
-LOCAL_SHARED_LIBRARIES += libdrm_intel
-endif
-
-# ilo
-ifneq ($(filter ilo, $(MESA_GPU_DRIVERS)),)
-gallium_DRIVERS += libmesa_winsys_intel libmesa_pipe_ilo
-LOCAL_SHARED_LIBRARIES += libdrm_intel
-endif
-
-# nouveau
-ifneq ($(filter nouveau, $(MESA_GPU_DRIVERS)),)
-gallium_DRIVERS +=  libmesa_winsys_nouveau libmesa_pipe_nouveau
-LOCAL_SHARED_LIBRARIES += libdrm_nouveau
-LOCAL_SHARED_LIBRARIES += libstlport
-endif
-
-# r300g/r600g/radeonsi
-ifneq ($(filter r300g r600g radeonsi, $(MESA_GPU_DRIVERS)),)
-gallium_DRIVERS += libmesa_winsys_radeon
-LOCAL_SHARED_LIBRARIES += libdrm_radeon
-ifneq ($(filter r300g, $(MESA_GPU_DRIVERS)),)
-gallium_DRIVERS += libmesa_pipe_r300
-endif # r300g
-ifneq ($(filter r600g radeonsi, $(MESA_GPU_DRIVERS)),)
-ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),)
-gallium_DRIVERS += libmesa_pipe_r600
-LOCAL_SHARED_LIBRARIES += libstlport
-endif # r600g
-ifneq ($(filter radeonsi, $(MESA_GPU_DRIVERS)),)
-gallium_DRIVERS += libmesa_pipe_radeonsi
-endif # radeonsi
-gallium_DRIVERS += libmesa_pipe_radeon
-endif # r600g || radeonsi
-endif # r300g || r600g || radeonsi
-
-# vmwgfx
-ifneq ($(filter vmwgfx, $(MESA_GPU_DRIVERS)),)
-gallium_DRIVERS += libmesa_winsys_svga libmesa_pipe_svga
-endif
-
-#
-# Notes about the order here:
-#
-#  * libmesa_st_egl depends on libmesa_winsys_sw_android in $(gallium_DRIVERS)
-#  * libmesa_pipe_r300 in $(gallium_DRIVERS) depends on libmesa_st_mesa and
-#    libmesa_glsl
-#  * libmesa_st_mesa depends on libmesa_glsl
-#  * libmesa_glsl depends on libmesa_glsl_utils
-#
-LOCAL_STATIC_LIBRARIES := \
-	libmesa_egl_gallium \
-	libmesa_st_egl \
-	$(gallium_DRIVERS) \
-	libmesa_st_mesa \
-	libmesa_util \
-	libmesa_glsl \
-	libmesa_glsl_utils \
-	libmesa_gallium \
-	$(LOCAL_STATIC_LIBRARIES)
-
-endif # MESA_BUILD_GALLIUM
-
-LOCAL_STATIC_LIBRARIES := \
-	$(LOCAL_STATIC_LIBRARIES) \
-	libmesa_loader
-
-LOCAL_MODULE := libGLES_mesa
-ifeq ($(MESA_LOLLIPOP_BUILD),true)
-LOCAL_MODULE_RELATIVE_PATH := egl
-else
-LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/egl
-endif
-
-include $(MESA_COMMON_MK)
-include $(BUILD_SHARED_LIBRARY)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglapi.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglapi.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglapi.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglapi.c	2015-09-16 14:36:08.000000000 +0000
@@ -100,7 +100,6 @@
 #include "eglconfig.h"
 #include "eglimage.h"
 #include "eglsync.h"
-#include "eglstring.h"
 
 
 /**
@@ -251,6 +250,31 @@
 }
 
 
+static EGLint *
+_eglConvertAttribsToInt(const EGLAttrib *attr_list)
+{
+   EGLint *int_attribs = NULL;
+
+   /* Convert attributes from EGLAttrib[] to EGLint[] */
+   if (attr_list) {
+      int i, size = 0;
+
+      while (attr_list[size] != EGL_NONE)
+         size += 2;
+
+      size += 1; /* add space for EGL_NONE */
+
+      int_attribs = calloc(size, sizeof(int_attribs[0]));
+      if (!int_attribs)
+         return NULL;
+
+      for (i = 0; i < size; i++)
+         int_attribs[i] = attr_list[i];
+   }
+   return int_attribs;
+}
+
+
 /**
  * This is typically the first EGL function that an application calls.
  * It associates a private _EGLDisplay object to the native display.
@@ -301,6 +325,21 @@
    return _eglGetDisplayHandle(dpy);
 }
 
+EGLDisplay EGLAPIENTRY
+eglGetPlatformDisplay(EGLenum platform, void *native_display,
+                      const EGLAttrib *attrib_list)
+{
+   EGLDisplay display;
+   EGLint *int_attribs = _eglConvertAttribsToInt(attrib_list);
+
+   if (attrib_list && !int_attribs)
+      RETURN_EGL_ERROR(NULL, EGL_BAD_ALLOC, NULL);
+
+   display = eglGetPlatformDisplayEXT(platform, native_display, int_attribs);
+   free(int_attribs);
+   return display;
+}
+
 /**
  * Copy the extension into the string and update the string pointer.
  */
@@ -341,48 +380,47 @@
 
    char *exts = dpy->ExtensionsString;
 
-   _EGL_CHECK_EXTENSION(MESA_drm_display);
-   _EGL_CHECK_EXTENSION(MESA_drm_image);
-   _EGL_CHECK_EXTENSION(MESA_configless_context);
+   /* Please keep these sorted alphabetically. */
+   _EGL_CHECK_EXTENSION(ANDROID_image_native_buffer);
 
-   _EGL_CHECK_EXTENSION(WL_bind_wayland_display);
-   _EGL_CHECK_EXTENSION(WL_create_wayland_buffer_from_image);
+   _EGL_CHECK_EXTENSION(CHROMIUM_sync_control);
 
-   _EGL_CHECK_EXTENSION(KHR_image_base);
-   _EGL_CHECK_EXTENSION(KHR_image_pixmap);
-   if (dpy->Extensions.KHR_image_base && dpy->Extensions.KHR_image_pixmap)
-      _eglAppendExtension(&exts, "EGL_KHR_image");
+   _EGL_CHECK_EXTENSION(EXT_buffer_age);
+   _EGL_CHECK_EXTENSION(EXT_create_context_robustness);
+   _EGL_CHECK_EXTENSION(EXT_image_dma_buf_import);
+   _EGL_CHECK_EXTENSION(EXT_swap_buffers_with_damage);
 
-   _EGL_CHECK_EXTENSION(KHR_vg_parent_image);
+   _EGL_CHECK_EXTENSION(KHR_cl_event2);
+   _EGL_CHECK_EXTENSION(KHR_create_context);
+   _EGL_CHECK_EXTENSION(KHR_fence_sync);
    _EGL_CHECK_EXTENSION(KHR_get_all_proc_addresses);
+   _EGL_CHECK_EXTENSION(KHR_gl_colorspace);
+   _EGL_CHECK_EXTENSION(KHR_gl_renderbuffer_image);
    _EGL_CHECK_EXTENSION(KHR_gl_texture_2D_image);
-   _EGL_CHECK_EXTENSION(KHR_gl_texture_cubemap_image);
    _EGL_CHECK_EXTENSION(KHR_gl_texture_3D_image);
-   _EGL_CHECK_EXTENSION(KHR_gl_renderbuffer_image);
-
+   _EGL_CHECK_EXTENSION(KHR_gl_texture_cubemap_image);
+   if (dpy->Extensions.KHR_image_base && dpy->Extensions.KHR_image_pixmap)
+      _eglAppendExtension(&exts, "EGL_KHR_image");
+   _EGL_CHECK_EXTENSION(KHR_image_base);
+   _EGL_CHECK_EXTENSION(KHR_image_pixmap);
    _EGL_CHECK_EXTENSION(KHR_reusable_sync);
-   _EGL_CHECK_EXTENSION(KHR_fence_sync);
+   _EGL_CHECK_EXTENSION(KHR_surfaceless_context);
+   _EGL_CHECK_EXTENSION(KHR_vg_parent_image);
    _EGL_CHECK_EXTENSION(KHR_wait_sync);
-   _EGL_CHECK_EXTENSION(KHR_cl_event2);
 
-   _EGL_CHECK_EXTENSION(KHR_surfaceless_context);
-   _EGL_CHECK_EXTENSION(KHR_create_context);
+   _EGL_CHECK_EXTENSION(MESA_configless_context);
+   _EGL_CHECK_EXTENSION(MESA_drm_display);
+   _EGL_CHECK_EXTENSION(MESA_drm_image);
+   _EGL_CHECK_EXTENSION(MESA_image_dma_buf_export);
 
    _EGL_CHECK_EXTENSION(NOK_swap_region);
    _EGL_CHECK_EXTENSION(NOK_texture_from_pixmap);
 
-   _EGL_CHECK_EXTENSION(ANDROID_image_native_buffer);
-
-   _EGL_CHECK_EXTENSION(CHROMIUM_sync_control);
-
-   _EGL_CHECK_EXTENSION(EXT_create_context_robustness);
-   _EGL_CHECK_EXTENSION(EXT_buffer_age);
-   _EGL_CHECK_EXTENSION(EXT_swap_buffers_with_damage);
-   _EGL_CHECK_EXTENSION(EXT_image_dma_buf_import);
-
    _EGL_CHECK_EXTENSION(NV_post_sub_buffer);
 
-   _EGL_CHECK_EXTENSION(MESA_image_dma_buf_export);
+   _EGL_CHECK_EXTENSION(WL_bind_wayland_display);
+   _EGL_CHECK_EXTENSION(WL_create_wayland_buffer_from_image);
+
 #undef _EGL_CHECK_EXTENSION
 }
 
@@ -407,6 +445,26 @@
    assert(strlen(dpy->ClientAPIsString) < sizeof(dpy->ClientAPIsString));
 }
 
+static void
+_eglComputeVersion(_EGLDisplay *disp)
+{
+   disp->Version = 14;
+
+   if (disp->Extensions.KHR_fence_sync &&
+       disp->Extensions.KHR_cl_event2 &&
+       disp->Extensions.KHR_wait_sync &&
+       disp->Extensions.KHR_image_base &&
+       disp->Extensions.KHR_gl_texture_2D_image &&
+       disp->Extensions.KHR_gl_texture_3D_image &&
+       disp->Extensions.KHR_gl_texture_cubemap_image &&
+       disp->Extensions.KHR_gl_renderbuffer_image &&
+       disp->Extensions.KHR_create_context &&
+       disp->Extensions.EXT_create_context_robustness &&
+       disp->Extensions.KHR_get_all_proc_addresses &&
+       disp->Extensions.KHR_gl_colorspace &&
+       disp->Extensions.KHR_surfaceless_context)
+      disp->Version = 15;
+}
 
 /**
  * This is typically the second EGL function that an application calls.
@@ -444,17 +502,18 @@
        */
       disp->Extensions.KHR_get_all_proc_addresses = EGL_TRUE;
 
+      _eglComputeVersion(disp);
       _eglCreateExtensionsString(disp);
       _eglCreateAPIsString(disp);
-      _eglsnprintf(disp->VersionString, sizeof(disp->VersionString),
-              "%d.%d (%s)", disp->VersionMajor, disp->VersionMinor,
+      snprintf(disp->VersionString, sizeof(disp->VersionString),
+              "%d.%d (%s)", disp->Version / 10, disp->Version % 10,
               disp->Driver->Name);
    }
 
    /* Update applications version of major and minor if not NULL */
    if ((major != NULL) && (minor != NULL)) {
-      *major = disp->VersionMajor;
-      *minor = disp->VersionMinor;
+      *major = disp->Version / 10;
+      *minor = disp->Version % 10;
    }
 
    RETURN_EGL_SUCCESS(disp, EGL_TRUE);
@@ -721,6 +780,24 @@
 }
 
 
+EGLSurface EGLAPIENTRY
+eglCreatePlatformWindowSurface(EGLDisplay dpy, EGLConfig config,
+                               void *native_window,
+                               const EGLAttrib *attrib_list)
+{
+   EGLSurface surface;
+   EGLint *int_attribs = _eglConvertAttribsToInt(attrib_list);
+
+   if (attrib_list && !int_attribs)
+      RETURN_EGL_ERROR(NULL, EGL_BAD_ALLOC, EGL_NO_SURFACE);
+
+   surface = eglCreatePlatformWindowSurfaceEXT(dpy, config, native_window,
+                                               int_attribs);
+   free(int_attribs);
+   return surface;
+}
+
+
 static EGLSurface
 _eglCreatePixmapSurfaceCommon(_EGLDisplay *disp, EGLConfig config,
                               void *native_pixmap, const EGLint *attrib_list)
@@ -775,6 +852,24 @@
 
 
 EGLSurface EGLAPIENTRY
+eglCreatePlatformPixmapSurface(EGLDisplay dpy, EGLConfig config,
+                               void *native_pixmap,
+                               const EGLAttrib *attrib_list)
+{
+   EGLSurface surface;
+   EGLint *int_attribs = _eglConvertAttribsToInt(attrib_list);
+
+   if (attrib_list && !int_attribs)
+      RETURN_EGL_ERROR(NULL, EGL_BAD_ALLOC, EGL_NO_SURFACE);
+
+   surface = eglCreatePlatformPixmapSurfaceEXT(dpy, config, native_pixmap,
+                                               int_attribs);
+   free(int_attribs);
+   return surface;
+}
+
+
+EGLSurface EGLAPIENTRY
 eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config,
                         const EGLint *attrib_list)
 {
@@ -918,8 +1013,6 @@
 }
 
 
-#ifdef EGL_EXT_swap_buffers_with_damage
-
 static EGLBoolean EGLAPIENTRY
 eglSwapBuffersWithDamageEXT(EGLDisplay dpy, EGLSurface surface,
                             EGLint *rects, EGLint n_rects)
@@ -945,8 +1038,6 @@
    RETURN_EGL_EVAL(disp, ret);
 }
 
-#endif /* EGL_EXT_swap_buffers_with_damage */
-
 EGLBoolean EGLAPIENTRY
 eglCopyBuffers(EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target)
 {
@@ -1107,8 +1198,6 @@
 }
 
 
-#ifdef EGL_MESA_drm_display
-
 static EGLDisplay EGLAPIENTRY
 eglGetDRMDisplayMESA(int fd)
 {
@@ -1116,8 +1205,6 @@
    return _eglGetDisplayHandle(dpy);
 }
 
-#endif /* EGL_MESA_drm_display */
-
 /**
  ** EGL 1.2
  **/
@@ -1220,7 +1307,7 @@
 }
 
 
-static EGLImageKHR EGLAPIENTRY
+static EGLImage EGLAPIENTRY
 eglCreateImageKHR(EGLDisplay dpy, EGLContext ctx, EGLenum target,
                   EGLClientBuffer buffer, const EGLint *attr_list)
 {
@@ -1228,7 +1315,7 @@
    _EGLContext *context = _eglLookupContext(ctx, disp);
    _EGLDriver *drv;
    _EGLImage *img;
-   EGLImageKHR ret;
+   EGLImage ret;
 
    _EGL_CHECK_DISPLAY(disp, EGL_NO_IMAGE_KHR, drv);
    if (!disp->Extensions.KHR_image_base)
@@ -1249,8 +1336,24 @@
 }
 
 
-static EGLBoolean EGLAPIENTRY
-eglDestroyImageKHR(EGLDisplay dpy, EGLImageKHR image)
+EGLImage EGLAPIENTRY
+eglCreateImage(EGLDisplay dpy, EGLContext ctx, EGLenum target,
+               EGLClientBuffer buffer, const EGLAttrib *attr_list)
+{
+   EGLImage image;
+   EGLint *int_attribs = _eglConvertAttribsToInt(attr_list);
+
+   if (attr_list && !int_attribs)
+      RETURN_EGL_ERROR(NULL, EGL_BAD_ALLOC, EGL_NO_IMAGE);
+
+   image = eglCreateImageKHR(dpy, ctx, target, buffer, int_attribs);
+   free(int_attribs);
+   return image;
+}
+
+
+EGLBoolean EGLAPIENTRY
+eglDestroyImage(EGLDisplay dpy, EGLImage image)
 {
    _EGLDisplay *disp = _eglLockDisplay(dpy);
    _EGLImage *img = _eglLookupImage(image, disp);
@@ -1270,15 +1373,16 @@
 }
 
 
-static EGLSyncKHR
+static EGLSync
 _eglCreateSync(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list,
-               const EGLAttribKHR *attrib_list64, EGLBoolean is64)
+               const EGLAttrib *attrib_list64, EGLBoolean is64,
+               EGLenum invalid_type_error)
 {
    _EGLDisplay *disp = _eglLockDisplay(dpy);
    _EGLContext *ctx = _eglGetCurrentContext();
    _EGLDriver *drv;
    _EGLSync *sync;
-   EGLSyncKHR ret;
+   EGLSync ret;
 
    _EGL_CHECK_DISPLAY(disp, EGL_NO_SYNC_KHR, drv);
 
@@ -1293,18 +1397,18 @@
    switch (type) {
    case EGL_SYNC_FENCE_KHR:
       if (!disp->Extensions.KHR_fence_sync)
-         RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR);
+         RETURN_EGL_ERROR(disp, invalid_type_error, EGL_NO_SYNC_KHR);
       break;
    case EGL_SYNC_REUSABLE_KHR:
       if (!disp->Extensions.KHR_reusable_sync)
-         RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR);
+         RETURN_EGL_ERROR(disp, invalid_type_error, EGL_NO_SYNC_KHR);
       break;
    case EGL_SYNC_CL_EVENT_KHR:
       if (!disp->Extensions.KHR_cl_event2)
-         RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR);
+         RETURN_EGL_ERROR(disp, invalid_type_error, EGL_NO_SYNC_KHR);
       break;
    default:
-      RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR);
+      RETURN_EGL_ERROR(disp, invalid_type_error, EGL_NO_SYNC_KHR);
    }
 
    sync = drv->API.CreateSyncKHR(drv, disp, type, attrib_list, attrib_list64);
@@ -1314,22 +1418,32 @@
 }
 
 
-static EGLSyncKHR EGLAPIENTRY
+static EGLSync EGLAPIENTRY
 eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list)
 {
-   return _eglCreateSync(dpy, type, attrib_list, NULL, EGL_FALSE);
+   return _eglCreateSync(dpy, type, attrib_list, NULL, EGL_FALSE,
+                         EGL_BAD_ATTRIBUTE);
 }
 
 
-static EGLSyncKHR EGLAPIENTRY
-eglCreateSync64KHR(EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list)
+static EGLSync EGLAPIENTRY
+eglCreateSync64KHR(EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list)
 {
-   return _eglCreateSync(dpy, type, NULL, attrib_list, EGL_TRUE);
+   return _eglCreateSync(dpy, type, NULL, attrib_list, EGL_TRUE,
+                         EGL_BAD_ATTRIBUTE);
 }
 
 
-static EGLBoolean EGLAPIENTRY
-eglDestroySyncKHR(EGLDisplay dpy, EGLSyncKHR sync)
+EGLSync EGLAPIENTRY
+eglCreateSync(EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list)
+{
+   return _eglCreateSync(dpy, type, NULL, attrib_list, EGL_TRUE,
+                         EGL_BAD_PARAMETER);
+}
+
+
+EGLBoolean EGLAPIENTRY
+eglDestroySync(EGLDisplay dpy, EGLSync sync)
 {
    _EGLDisplay *disp = _eglLockDisplay(dpy);
    _EGLSync *s = _eglLookupSync(sync, disp);
@@ -1347,8 +1461,8 @@
 }
 
 
-static EGLint EGLAPIENTRY
-eglClientWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout)
+EGLint EGLAPIENTRY
+eglClientWaitSync(EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout)
 {
    _EGLDisplay *disp = _eglLockDisplay(dpy);
    _EGLSync *s = _eglLookupSync(sync, disp);
@@ -1369,7 +1483,7 @@
 
 
 static EGLint EGLAPIENTRY
-eglWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags)
+eglWaitSyncKHR(EGLDisplay dpy, EGLSync sync, EGLint flags)
 {
    _EGLDisplay *disp = _eglLockDisplay(dpy);
    _EGLSync *s = _eglLookupSync(sync, disp);
@@ -1394,8 +1508,19 @@
 }
 
 
+EGLBoolean EGLAPIENTRY
+eglWaitSync(EGLDisplay dpy, EGLSync sync, EGLint flags)
+{
+   /* The KHR version returns EGLint, while the core version returns
+    * EGLBoolean. In both cases, the return values can only be EGL_FALSE and
+    * EGL_TRUE.
+    */
+   return eglWaitSyncKHR(dpy, sync, flags);
+}
+
+
 static EGLBoolean EGLAPIENTRY
-eglSignalSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode)
+eglSignalSyncKHR(EGLDisplay dpy, EGLSync sync, EGLenum mode)
 {
    _EGLDisplay *disp = _eglLockDisplay(dpy);
    _EGLSync *s = _eglLookupSync(sync, disp);
@@ -1410,8 +1535,8 @@
 }
 
 
-static EGLBoolean EGLAPIENTRY
-eglGetSyncAttribKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value)
+EGLBoolean EGLAPIENTRY
+eglGetSyncAttrib(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *value)
 {
    _EGLDisplay *disp = _eglLockDisplay(dpy);
    _EGLSync *s = _eglLookupSync(sync, disp);
@@ -1421,13 +1546,29 @@
    _EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv);
    assert(disp->Extensions.KHR_reusable_sync ||
           disp->Extensions.KHR_fence_sync);
-   ret = drv->API.GetSyncAttribKHR(drv, disp, s, attribute, value);
+   ret = drv->API.GetSyncAttrib(drv, disp, s, attribute, value);
 
    RETURN_EGL_EVAL(disp, ret);
 }
 
 
-#ifdef EGL_NOK_swap_region
+static EGLBoolean EGLAPIENTRY
+eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *value)
+{
+   EGLAttrib attrib = *value;
+   EGLBoolean result = eglGetSyncAttrib(dpy, sync, attribute, &attrib);
+
+   /* The EGL_KHR_fence_sync spec says this about eglGetSyncAttribKHR:
+    *
+    *    If any error occurs, <*value> is not modified.
+    */
+   if (result == EGL_FALSE)
+      return result;
+
+   *value = attrib;
+   return result;
+}
+
 
 static EGLBoolean EGLAPIENTRY
 eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface,
@@ -1454,18 +1595,14 @@
    RETURN_EGL_EVAL(disp, ret);
 }
 
-#endif /* EGL_NOK_swap_region */
-
-
-#ifdef EGL_MESA_drm_image
 
-static EGLImageKHR EGLAPIENTRY
+static EGLImage EGLAPIENTRY
 eglCreateDRMImageMESA(EGLDisplay dpy, const EGLint *attr_list)
 {
    _EGLDisplay *disp = _eglLockDisplay(dpy);
    _EGLDriver *drv;
    _EGLImage *img;
-   EGLImageKHR ret;
+   EGLImage ret;
 
    _EGL_CHECK_DISPLAY(disp, EGL_NO_IMAGE_KHR, drv);
    if (!disp->Extensions.MESA_drm_image)
@@ -1478,7 +1615,7 @@
 }
 
 static EGLBoolean EGLAPIENTRY
-eglExportDRMImageMESA(EGLDisplay dpy, EGLImageKHR image,
+eglExportDRMImageMESA(EGLDisplay dpy, EGLImage image,
 		      EGLint *name, EGLint *handle, EGLint *stride)
 {
    _EGLDisplay *disp = _eglLockDisplay(dpy);
@@ -1497,9 +1634,7 @@
    RETURN_EGL_EVAL(disp, ret);
 }
 
-#endif
 
-#ifdef EGL_WL_bind_wayland_display
 struct wl_display;
 
 static EGLBoolean EGLAPIENTRY
@@ -1556,11 +1691,10 @@
 
    RETURN_EGL_EVAL(disp, ret);
 }
-#endif
 
-#ifdef EGL_WL_create_wayland_buffer_from_image
+
 static struct wl_buffer * EGLAPIENTRY
-eglCreateWaylandBufferFromImageWL(EGLDisplay dpy, EGLImageKHR image)
+eglCreateWaylandBufferFromImageWL(EGLDisplay dpy, EGLImage image)
 {
    _EGLDisplay *disp = _eglLockDisplay(dpy);
    _EGLImage *img;
@@ -1579,7 +1713,6 @@
 
    RETURN_EGL_EVAL(disp, ret);
 }
-#endif
 
 static EGLBoolean EGLAPIENTRY
 eglPostSubBufferNV(EGLDisplay dpy, EGLSurface surface,
@@ -1622,9 +1755,8 @@
    RETURN_EGL_EVAL(disp, ret);
 }
 
-#ifdef EGL_MESA_image_dma_buf_export
 static EGLBoolean EGLAPIENTRY
-eglExportDMABUFImageQueryMESA(EGLDisplay dpy, EGLImageKHR image,
+eglExportDMABUFImageQueryMESA(EGLDisplay dpy, EGLImage image,
                               EGLint *fourcc, EGLint *nplanes,
                               EGLuint64KHR *modifiers)
 {
@@ -1646,7 +1778,7 @@
 }
 
 static EGLBoolean EGLAPIENTRY
-eglExportDMABUFImageMESA(EGLDisplay dpy, EGLImageKHR image,
+eglExportDMABUFImageMESA(EGLDisplay dpy, EGLImage image,
                          int *fds, EGLint *strides, EGLint *offsets)
 {
    _EGLDisplay *disp = _eglLockDisplay(dpy);
@@ -1664,7 +1796,6 @@
 
    RETURN_EGL_EVAL(disp, ret);
 }
-#endif
 
 __eglMustCastToProperFunctionPointerType EGLAPIENTRY
 eglGetProcAddress(const char *procname)
@@ -1711,45 +1842,41 @@
       { "eglWaitClient", (_EGLProc) eglWaitClient },
       { "eglWaitGL", (_EGLProc) eglWaitGL },
       { "eglWaitNative", (_EGLProc) eglWaitNative },
-#ifdef EGL_MESA_drm_display
+      { "eglCreateSync", (_EGLProc) eglCreateSync },
+      { "eglDestroySync", (_EGLProc) eglDestroySync },
+      { "eglClientWaitSync", (_EGLProc) eglClientWaitSync },
+      { "eglGetSyncAttrib", (_EGLProc) eglGetSyncAttrib },
+      { "eglWaitSync", (_EGLProc) eglWaitSync },
+      { "eglCreateImage", (_EGLProc) eglCreateImage },
+      { "eglDestroyImage", (_EGLProc) eglDestroyImage },
+      { "eglGetPlatformDisplay", (_EGLProc) eglGetPlatformDisplay },
+      { "eglCreatePlatformWindowSurface", (_EGLProc) eglCreatePlatformWindowSurface },
+      { "eglCreatePlatformPixmapSurface", (_EGLProc) eglCreatePlatformPixmapSurface },
       { "eglGetDRMDisplayMESA", (_EGLProc) eglGetDRMDisplayMESA },
-#endif
       { "eglCreateImageKHR", (_EGLProc) eglCreateImageKHR },
-      { "eglDestroyImageKHR", (_EGLProc) eglDestroyImageKHR },
+      { "eglDestroyImageKHR", (_EGLProc) eglDestroyImage },
       { "eglCreateSyncKHR", (_EGLProc) eglCreateSyncKHR },
       { "eglCreateSync64KHR", (_EGLProc) eglCreateSync64KHR },
-      { "eglDestroySyncKHR", (_EGLProc) eglDestroySyncKHR },
-      { "eglClientWaitSyncKHR", (_EGLProc) eglClientWaitSyncKHR },
+      { "eglDestroySyncKHR", (_EGLProc) eglDestroySync },
+      { "eglClientWaitSyncKHR", (_EGLProc) eglClientWaitSync },
       { "eglWaitSyncKHR", (_EGLProc) eglWaitSyncKHR },
       { "eglSignalSyncKHR", (_EGLProc) eglSignalSyncKHR },
       { "eglGetSyncAttribKHR", (_EGLProc) eglGetSyncAttribKHR },
-#ifdef EGL_NOK_swap_region
       { "eglSwapBuffersRegionNOK", (_EGLProc) eglSwapBuffersRegionNOK },
-#endif
-#ifdef EGL_MESA_drm_image
       { "eglCreateDRMImageMESA", (_EGLProc) eglCreateDRMImageMESA },
       { "eglExportDRMImageMESA", (_EGLProc) eglExportDRMImageMESA },
-#endif
-#ifdef EGL_WL_bind_wayland_display
       { "eglBindWaylandDisplayWL", (_EGLProc) eglBindWaylandDisplayWL },
       { "eglUnbindWaylandDisplayWL", (_EGLProc) eglUnbindWaylandDisplayWL },
       { "eglQueryWaylandBufferWL", (_EGLProc) eglQueryWaylandBufferWL },
-#endif
-#ifdef EGL_WL_create_wayland_buffer_from_image
       { "eglCreateWaylandBufferFromImageWL", (_EGLProc) eglCreateWaylandBufferFromImageWL },
-#endif
       { "eglPostSubBufferNV", (_EGLProc) eglPostSubBufferNV },
-#ifdef EGL_EXT_swap_buffers_with_damage
       { "eglSwapBuffersWithDamageEXT", (_EGLProc) eglSwapBuffersWithDamageEXT },
-#endif
       { "eglGetPlatformDisplayEXT", (_EGLProc) eglGetPlatformDisplayEXT },
       { "eglCreatePlatformWindowSurfaceEXT", (_EGLProc) eglCreatePlatformWindowSurfaceEXT },
       { "eglCreatePlatformPixmapSurfaceEXT", (_EGLProc) eglCreatePlatformPixmapSurfaceEXT },
       { "eglGetSyncValuesCHROMIUM", (_EGLProc) eglGetSyncValuesCHROMIUM },
-#ifdef EGL_MESA_image_dma_buf_export
       { "eglExportDMABUFImageQueryMESA", (_EGLProc) eglExportDMABUFImageQueryMESA },
       { "eglExportDMABUFImageMESA", (_EGLProc) eglExportDMABUFImageMESA },
-#endif
       { NULL, NULL }
    };
    EGLint i;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglapi.h mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglapi.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglapi.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglapi.h	2015-09-16 14:36:08.000000000 +0000
@@ -91,49 +91,37 @@
 typedef EGLBoolean (*DestroyImageKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLImage *image);
 
 
-typedef _EGLSync *(*CreateSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLenum type, const EGLint *attrib_list, const EGLAttribKHR *attrib_list64);
+typedef _EGLSync *(*CreateSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLenum type, const EGLint *attrib_list, const EGLAttrib *attrib_list64);
 typedef EGLBoolean (*DestroySyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync);
-typedef EGLint (*ClientWaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint flags, EGLTimeKHR timeout);
+typedef EGLint (*ClientWaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint flags, EGLTime timeout);
 typedef EGLint (*WaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync);
 typedef EGLBoolean (*SignalSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLenum mode);
-typedef EGLBoolean (*GetSyncAttribKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint attribute, EGLint *value);
+typedef EGLBoolean (*GetSyncAttrib_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint attribute, EGLAttrib *value);
 
 
-#ifdef EGL_NOK_swap_region
 typedef EGLBoolean (*SwapBuffersRegionNOK_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf, EGLint numRects, const EGLint *rects);
-#endif
 
-#ifdef EGL_MESA_drm_image
 typedef _EGLImage *(*CreateDRMImageMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, const EGLint *attr_list);
 typedef EGLBoolean (*ExportDRMImageMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, EGLint *name, EGLint *handle, EGLint *stride);
-#endif
 
-#ifdef EGL_WL_bind_wayland_display
 struct wl_display;
 typedef EGLBoolean (*BindWaylandDisplayWL_t)(_EGLDriver *drv, _EGLDisplay *disp, struct wl_display *display);
 typedef EGLBoolean (*UnbindWaylandDisplayWL_t)(_EGLDriver *drv, _EGLDisplay *disp, struct wl_display *display);
 typedef EGLBoolean (*QueryWaylandBufferWL_t)(_EGLDriver *drv, _EGLDisplay *displ, struct wl_resource *buffer, EGLint attribute, EGLint *value);
-#endif
 
-#ifdef EGL_WL_create_wayland_buffer_from_image
 typedef struct wl_buffer * (*CreateWaylandBufferFromImageWL_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img);
-#endif
 
 typedef EGLBoolean (*PostSubBufferNV_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surface, EGLint x, EGLint y, EGLint width, EGLint height);
 
 typedef EGLint (*QueryBufferAge_t)(_EGLDriver *drv,
                                    _EGLDisplay *dpy, _EGLSurface *surface);
 
-#ifdef EGL_EXT_swap_buffers_with_damage
 typedef EGLBoolean (*SwapBuffersWithDamageEXT_t) (_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface, const EGLint *rects, EGLint n_rects);
-#endif
 
 typedef EGLBoolean (*GetSyncValuesCHROMIUM_t) (_EGLDisplay *dpy, _EGLSurface *surface, EGLuint64KHR *ust, EGLuint64KHR *msc, EGLuint64KHR *sbc);
 
-#ifdef EGL_MESA_image_dma_buf_export
 typedef EGLBoolean (*ExportDMABUFImageQueryMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, EGLint *fourcc, EGLint *nplanes, EGLuint64KHR *modifiers);
 typedef EGLBoolean (*ExportDMABUFImageMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, EGLint *fds, EGLint *strides, EGLint *offsets);
-#endif
 
 /**
  * The API dispatcher jumps through these functions
@@ -178,40 +166,28 @@
    ClientWaitSyncKHR_t ClientWaitSyncKHR;
    WaitSyncKHR_t WaitSyncKHR;
    SignalSyncKHR_t SignalSyncKHR;
-   GetSyncAttribKHR_t GetSyncAttribKHR;
+   GetSyncAttrib_t GetSyncAttrib;
 
-#ifdef EGL_NOK_swap_region
    SwapBuffersRegionNOK_t SwapBuffersRegionNOK;
-#endif
 
-#ifdef EGL_MESA_drm_image
    CreateDRMImageMESA_t CreateDRMImageMESA;
    ExportDRMImageMESA_t ExportDRMImageMESA;
-#endif
 
-#ifdef EGL_WL_bind_wayland_display
    BindWaylandDisplayWL_t BindWaylandDisplayWL;
    UnbindWaylandDisplayWL_t UnbindWaylandDisplayWL;
    QueryWaylandBufferWL_t QueryWaylandBufferWL;
-#endif
 
-#ifdef EGL_WL_create_wayland_buffer_from_image
    CreateWaylandBufferFromImageWL_t CreateWaylandBufferFromImageWL;
-#endif
 
-#ifdef EGL_EXT_swap_buffers_with_damage
    SwapBuffersWithDamageEXT_t SwapBuffersWithDamageEXT;
-#endif /* EGL_EXT_swap_buffers_with_damage */
 
    PostSubBufferNV_t PostSubBufferNV;
 
    QueryBufferAge_t QueryBufferAge;
    GetSyncValuesCHROMIUM_t GetSyncValuesCHROMIUM;
 
-#ifdef EGL_MESA_image_dma_buf_export
    ExportDMABUFImageQueryMESA_t ExportDMABUFImageQueryMESA;
    ExportDMABUFImageMESA_t ExportDMABUFImageMESA;
-#endif
 };
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglarray.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglarray.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglarray.c	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglarray.c	2015-09-16 14:36:08.000000000 +0000
@@ -197,6 +197,9 @@
 
    count = array->Size;
    if (buffer) {
+      /* clamp size to 0 */
+      if (size < 0)
+         size = 0;
       /* do not exceed buffer size */
       if (count > size)
          count = size;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglconfig.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglconfig.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglconfig.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglconfig.c	2015-09-16 14:36:08.000000000 +0000
@@ -83,7 +83,8 @@
    _EGLDisplay *dpy = conf->Display;
 
    /* sanity check */
-   assert(dpy && conf->ConfigID > 0);
+   assert(dpy);
+   assert(conf->ConfigID > 0);
 
    if (!dpy->Configs) {
       dpy->Configs = _eglCreateArray("Config", 16);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglcontext.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglcontext.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglcontext.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglcontext.c	2015-09-16 14:36:08.000000000 +0000
@@ -101,11 +101,42 @@
 
       switch (attr) {
       case EGL_CONTEXT_CLIENT_VERSION:
+         /* The EGL 1.4 spec says:
+          *
+          *     "attribute EGL_CONTEXT_CLIENT_VERSION is only valid when the
+          *      current rendering API is EGL_OPENGL_ES_API"
+          *
+          * The EGL_KHR_create_context spec says:
+          *
+          *     "EGL_CONTEXT_MAJOR_VERSION_KHR           0x3098
+          *      (this token is an alias for EGL_CONTEXT_CLIENT_VERSION)"
+          *
+          *     "The values for attributes EGL_CONTEXT_MAJOR_VERSION_KHR and
+          *      EGL_CONTEXT_MINOR_VERSION_KHR specify the requested client API
+          *      version. They are only meaningful for OpenGL and OpenGL ES
+          *      contexts, and specifying them for other types of contexts will
+          *      generate an error."
+          */
+         if ((api != EGL_OPENGL_ES_API &&
+             (!dpy->Extensions.KHR_create_context || api != EGL_OPENGL_API))) {
+               err = EGL_BAD_ATTRIBUTE;
+               break;
+         }
+
          ctx->ClientMajorVersion = val;
          break;
 
       case EGL_CONTEXT_MINOR_VERSION_KHR:
-         if (!dpy->Extensions.KHR_create_context) {
+         /* The EGL_KHR_create_context spec says:
+          *
+          *     "The values for attributes EGL_CONTEXT_MAJOR_VERSION_KHR and
+          *      EGL_CONTEXT_MINOR_VERSION_KHR specify the requested client API
+          *      version. They are only meaningful for OpenGL and OpenGL ES
+          *      contexts, and specifying them for other types of contexts will
+          *      generate an error."
+          */
+         if (!dpy->Extensions.KHR_create_context ||
+             (api != EGL_OPENGL_ES_API && api != EGL_OPENGL_API)) {
             err = EGL_BAD_ATTRIBUTE;
             break;
          }
@@ -198,6 +229,36 @@
             ctx->Flags |= EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR;
          break;
 
+      case EGL_CONTEXT_OPENGL_ROBUST_ACCESS:
+         if (dpy->Version < 15) {
+            err = EGL_BAD_ATTRIBUTE;
+            break;
+         }
+
+         if (val == EGL_TRUE)
+            ctx->Flags |= EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR;
+         break;
+
+      case EGL_CONTEXT_OPENGL_DEBUG:
+         if (dpy->Version < 15) {
+            err = EGL_BAD_ATTRIBUTE;
+            break;
+         }
+
+         if (val == EGL_TRUE)
+            ctx->Flags |= EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR;
+         break;
+
+      case EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE:
+         if (dpy->Version < 15) {
+            err = EGL_BAD_ATTRIBUTE;
+            break;
+         }
+
+         if (val == EGL_TRUE)
+            ctx->Flags |= EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR;
+         break;
+
       default:
          err = EGL_BAD_ATTRIBUTE;
          break;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/egldisplay.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/egldisplay.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/egldisplay.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/egldisplay.c	2015-09-16 14:37:00.000000000 +0000
@@ -65,13 +65,12 @@
    _EGLPlatformType platform;
    const char *name;
 } egl_platforms[_EGL_NUM_PLATFORMS] = {
-   { _EGL_PLATFORM_WINDOWS, "gdi" },
    { _EGL_PLATFORM_X11, "x11" },
    { _EGL_PLATFORM_WAYLAND, "wayland" },
    { _EGL_PLATFORM_DRM, "drm" },
-   { _EGL_PLATFORM_NULL, "null" },
    { _EGL_PLATFORM_ANDROID, "android" },
-   { _EGL_PLATFORM_HAIKU, "haiku" }
+   { _EGL_PLATFORM_HAIKU, "haiku" },
+   { _EGL_PLATFORM_SURFACELESS, "surfaceless" },
 };
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/egldisplay.h mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/egldisplay.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/egldisplay.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/egldisplay.h	2015-09-16 14:37:00.000000000 +0000
@@ -44,13 +44,12 @@
 #endif
 
 enum _egl_platform_type {
-   _EGL_PLATFORM_WINDOWS,
    _EGL_PLATFORM_X11,
    _EGL_PLATFORM_WAYLAND,
    _EGL_PLATFORM_DRM,
-   _EGL_PLATFORM_NULL,
    _EGL_PLATFORM_ANDROID,
    _EGL_PLATFORM_HAIKU,
+   _EGL_PLATFORM_SURFACELESS,
 
    _EGL_NUM_PLATFORMS,
    _EGL_INVALID_PLATFORM = -1
@@ -90,45 +89,44 @@
  */
 struct _egl_extensions
 {
-   EGLBoolean MESA_drm_display;
-   EGLBoolean MESA_drm_image;
-   EGLBoolean MESA_configless_context;
+   /* Please keep these sorted alphabetically. */
+   EGLBoolean ANDROID_image_native_buffer;
 
-   EGLBoolean WL_bind_wayland_display;
-   EGLBoolean WL_create_wayland_buffer_from_image;
+   EGLBoolean CHROMIUM_sync_control;
 
-   EGLBoolean KHR_image_base;
-   EGLBoolean KHR_image_pixmap;
-   EGLBoolean KHR_vg_parent_image;
+   EGLBoolean EXT_buffer_age;
+   EGLBoolean EXT_create_context_robustness;
+   EGLBoolean EXT_image_dma_buf_import;
+   EGLBoolean EXT_swap_buffers_with_damage;
+
+   EGLBoolean KHR_cl_event2;
+   EGLBoolean KHR_create_context;
+   EGLBoolean KHR_fence_sync;
    EGLBoolean KHR_get_all_proc_addresses;
+   EGLBoolean KHR_gl_colorspace;
+   EGLBoolean KHR_gl_renderbuffer_image;
    EGLBoolean KHR_gl_texture_2D_image;
-   EGLBoolean KHR_gl_texture_cubemap_image;
    EGLBoolean KHR_gl_texture_3D_image;
-   EGLBoolean KHR_gl_renderbuffer_image;
-
+   EGLBoolean KHR_gl_texture_cubemap_image;
+   EGLBoolean KHR_image_base;
+   EGLBoolean KHR_image_pixmap;
    EGLBoolean KHR_reusable_sync;
-   EGLBoolean KHR_fence_sync;
+   EGLBoolean KHR_surfaceless_context;
+   EGLBoolean KHR_vg_parent_image;
    EGLBoolean KHR_wait_sync;
-   EGLBoolean KHR_cl_event2;
 
-   EGLBoolean KHR_surfaceless_context;
-   EGLBoolean KHR_create_context;
+   EGLBoolean MESA_configless_context;
+   EGLBoolean MESA_drm_display;
+   EGLBoolean MESA_drm_image;
+   EGLBoolean MESA_image_dma_buf_export;
 
    EGLBoolean NOK_swap_region;
    EGLBoolean NOK_texture_from_pixmap;
 
-   EGLBoolean ANDROID_image_native_buffer;
-
-   EGLBoolean CHROMIUM_sync_control;
-
    EGLBoolean NV_post_sub_buffer;
 
-   EGLBoolean EXT_create_context_robustness;
-   EGLBoolean EXT_buffer_age;
-   EGLBoolean EXT_swap_buffers_with_damage;
-   EGLBoolean EXT_image_dma_buf_import;
-
-   EGLBoolean MESA_image_dma_buf_export;
+   EGLBoolean WL_bind_wayland_display;
+   EGLBoolean WL_create_wayland_buffer_from_image;
 };
 
 
@@ -153,8 +151,7 @@
 
    /* these fields are set by the driver during init */
    void *DriverData;          /**< Driver private data */
-   EGLint VersionMajor;       /**< EGL major version */
-   EGLint VersionMinor;       /**< EGL minor version */
+   EGLint Version;            /**< EGL version major*10+minor */
    EGLint ClientAPIs;         /**< Bitmask of APIs supported (EGL_xxx_BIT) */
    _EGLExtensions Extensions; /**< Extensions supported */
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/egldriver.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/egldriver.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/egldriver.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/egldriver.c	2015-09-16 14:36:08.000000000 +0000
@@ -39,7 +39,6 @@
 #include <stdlib.h>
 #include "c11/threads.h"
 
-#include "eglstring.h"
 #include "egldefines.h"
 #include "egldisplay.h"
 #include "egldriver.h"
@@ -97,15 +96,10 @@
 static void
 _eglUnloadModule(_EGLModule *mod)
 {
-#if defined(_EGL_OS_UNIX)
    /* destroy the driver */
    if (mod->Driver && mod->Driver->Unload)
       mod->Driver->Unload(mod->Driver);
 
-#elif defined(_EGL_OS_WINDOWS)
-   /* XXX Windows unloads DLLs before atexit */
-#endif
-
    mod->Driver = NULL;
 }
 
@@ -135,7 +129,7 @@
    /* allocate a new one */
    mod = calloc(1, sizeof(*mod));
    if (mod) {
-      mod->Name = _eglstrdup(name);
+      mod->Name = strdup(name);
       if (!mod->Name) {
          free(mod);
          mod = NULL;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglfallbacks.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglfallbacks.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglfallbacks.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglfallbacks.c	2015-09-16 14:36:08.000000000 +0000
@@ -91,19 +91,13 @@
    drv->API.ClientWaitSyncKHR = NULL;
    drv->API.WaitSyncKHR = NULL;
    drv->API.SignalSyncKHR = NULL;
-   drv->API.GetSyncAttribKHR = _eglGetSyncAttribKHR;
+   drv->API.GetSyncAttrib = _eglGetSyncAttrib;
 
-#ifdef EGL_MESA_drm_image
    drv->API.CreateDRMImageMESA = NULL;
    drv->API.ExportDRMImageMESA = NULL;
-#endif
 
-#ifdef EGL_NOK_swap_region
    drv->API.SwapBuffersRegionNOK = NULL;
-#endif
 
-#ifdef EGL_MESA_image_dma_buf_export
    drv->API.ExportDMABUFImageQueryMESA = NULL;
    drv->API.ExportDMABUFImageMESA = NULL;
-#endif
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglglobals.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglglobals.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglglobals.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglglobals.c	2015-09-16 14:36:08.000000000 +0000
@@ -50,23 +50,13 @@
       _eglFiniDisplay
    },
 
-   /* ClientExtensions */
-   {
-      true, /* EGL_EXT_client_extensions */
-      true, /* EGL_EXT_platform_base */
-      true, /* EGL_EXT_platform_x11 */
-      true, /* EGL_EXT_platform_wayland */
-      true, /* EGL_MESA_platform_gbm */
-      true, /* EGL_KHR_client_get_all_proc_addresses */
-   },
-
    /* ClientExtensionsString */
    "EGL_EXT_client_extensions"
    " EGL_EXT_platform_base"
-   " EGL_EXT_platform_x11"
    " EGL_EXT_platform_wayland"
-   " EGL_MESA_platform_gbm"
+   " EGL_EXT_platform_x11"
    " EGL_KHR_client_get_all_proc_addresses"
+   " EGL_MESA_platform_gbm"
 };
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglglobals.h mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglglobals.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglglobals.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglglobals.h	2015-09-16 14:36:08.000000000 +0000
@@ -50,15 +50,6 @@
    EGLint NumAtExitCalls;
    void (*AtExitCalls[10])(void);
 
-   struct _egl_client_extensions {
-      bool EXT_client_extensions;
-      bool EXT_platform_base;
-      bool EXT_platform_x11;
-      bool EXT_platform_wayland;
-      bool MESA_platform_gbm;
-      bool KHR_get_all_proc_addresses;
-   } ClientExtensions;
-
    const char *ClientExtensionString;
 };
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglimage.h mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglimage.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglimage.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglimage.h	2015-09-16 14:36:08.000000000 +0000
@@ -121,11 +121,11 @@
  * Link an image to its display and return the handle of the link.
  * The handle can be passed to client directly.
  */
-static inline EGLImageKHR
+static inline EGLImage
 _eglLinkImage(_EGLImage *img)
 {
    _eglLinkResource(&img->Resource, _EGL_RESOURCE_IMAGE);
-   return (EGLImageKHR) img;
+   return (EGLImage) img;
 }
 
 
@@ -145,7 +145,7 @@
  * Return NULL if the handle has no corresponding linked image.
  */
 static inline _EGLImage *
-_eglLookupImage(EGLImageKHR image, _EGLDisplay *dpy)
+_eglLookupImage(EGLImage image, _EGLDisplay *dpy)
 {
    _EGLImage *img = (_EGLImage *) image;
    if (!dpy || !_eglCheckResource((void *) img, _EGL_RESOURCE_IMAGE, dpy))
@@ -157,12 +157,12 @@
 /**
  * Return the handle of a linked image, or EGL_NO_IMAGE_KHR.
  */
-static inline EGLImageKHR
+static inline EGLImage
 _eglGetImageHandle(_EGLImage *img)
 {
    _EGLResource *res = (_EGLResource *) img;
    return (res && _eglIsResourceLinked(res)) ?
-      (EGLImageKHR) img : EGL_NO_IMAGE_KHR;
+      (EGLImage) img : EGL_NO_IMAGE_KHR;
 }
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/egllog.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/egllog.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/egllog.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/egllog.c	2015-09-16 14:36:08.000000000 +0000
@@ -38,10 +38,11 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
+#include <strings.h>
 #include "c11/threads.h"
 
 #include "egllog.h"
-#include "eglstring.h"
 
 #define MAXSTRING 1000
 #define FALLBACK_LOG_LEVEL _EGL_WARNING
@@ -146,7 +147,7 @@
    log_env = getenv("EGL_LOG_LEVEL");
    if (log_env) {
       for (i = 0; level_strings[i]; i++) {
-         if (_eglstrcasecmp(log_env, level_strings[i]) == 0) {
+         if (strcasecmp(log_env, level_strings[i]) == 0) {
             level = i;
             break;
          }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglstring.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglstring.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglstring.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglstring.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,54 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 VMware, Inc.
- * Copyright 2009-2010 Chia-I Wu <olvaffe@gmail.com>
- * Copyright 2010-2011 LunarG, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * String utils.
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include "eglstring.h"
-
-
-char *
-_eglstrdup(const char *s)
-{
-   if (s) {
-      size_t l = strlen(s);
-      char *s2 = malloc(l + 1);
-      if (s2)
-         strcpy(s2, s);
-      return s2;
-   }
-   return NULL;
-}
-
-
-
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglstring.h mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglstring.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglstring.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglstring.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,50 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 VMware, Inc.
- * Copyright 2009-2010 Chia-I Wu <olvaffe@gmail.com>
- * Copyright 2010-2011 LunarG, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef EGLSTRING_INCLUDED
-#define EGLSTRING_INCLUDED
-
-#include <string.h>
-#include <stdio.h>
-
-#ifdef _EGL_OS_WINDOWS
-#define _eglstrcasecmp _stricmp
-#define _eglsnprintf _snprintf
-#else
-#include <strings.h> // for strcasecmp
-#define _eglstrcasecmp strcasecmp
-#define _eglsnprintf snprintf
-#endif
-
-extern char *
-_eglstrdup(const char *s);
-
-
-#endif /* EGLSTRING_INCLUDED */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglsurface.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglsurface.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglsurface.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglsurface.c	2015-09-16 14:36:08.000000000 +0000
@@ -84,6 +84,22 @@
 
       switch (attr) {
       /* common attributes */
+      case EGL_GL_COLORSPACE_KHR:
+         if (!dpy->Extensions.KHR_gl_colorspace) {
+            err = EGL_BAD_ATTRIBUTE;
+            break;
+         }
+         switch (val) {
+         case EGL_GL_COLORSPACE_SRGB_KHR:
+         case EGL_GL_COLORSPACE_LINEAR_KHR:
+            break;
+         default:
+            err = EGL_BAD_ATTRIBUTE;
+         }
+         if (err != EGL_SUCCESS)
+            break;
+         surf->GLColorspace = val;
+         break;
       case EGL_VG_COLORSPACE:
          switch (val) {
          case EGL_VG_COLORSPACE_sRGB:
@@ -272,6 +288,7 @@
    surf->RenderBuffer = renderBuffer;
    surf->VGAlphaFormat = EGL_VG_ALPHA_FORMAT_NONPRE;
    surf->VGColorspace = EGL_VG_COLORSPACE_sRGB;
+   surf->GLColorspace = EGL_GL_COLORSPACE_LINEAR_KHR;
 
    surf->MipmapLevel = 0;
    surf->MultisampleResolve = EGL_MULTISAMPLE_RESOLVE_DEFAULT;
@@ -353,6 +370,13 @@
    case EGL_VG_COLORSPACE:
       *value = surface->VGColorspace;
       break;
+   case EGL_GL_COLORSPACE_KHR:
+      if (!dpy->Extensions.KHR_gl_colorspace) {
+         _eglError(EGL_BAD_ATTRIBUTE, "eglQuerySurface");
+         return EGL_FALSE;
+      }
+      *value = surface->GLColorspace;
+      break;
    case EGL_POST_SUB_BUFFER_SUPPORTED_NV:
       *value = surface->PostSubBufferSupportedNV;
       break;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglsurface.h mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglsurface.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglsurface.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglsurface.h	2015-09-16 14:36:08.000000000 +0000
@@ -65,6 +65,7 @@
    EGLenum RenderBuffer;
    EGLenum VGAlphaFormat;
    EGLenum VGColorspace;
+   EGLenum GLColorspace;
 
    /* attributes set by eglSurfaceAttrib */
    EGLint MipmapLevel;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglsync.c mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglsync.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglsync.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglsync.c	2015-09-16 14:36:08.000000000 +0000
@@ -67,7 +67,7 @@
 
 
 static EGLint
-_eglParseSyncAttribList64(_EGLSync *sync, const EGLAttribKHR *attrib_list)
+_eglParseSyncAttribList64(_EGLSync *sync, const EGLAttrib *attrib_list)
 {
    EGLint i, err = EGL_SUCCESS;
 
@@ -103,7 +103,7 @@
 
 EGLBoolean
 _eglInitSync(_EGLSync *sync, _EGLDisplay *dpy, EGLenum type,
-             const EGLint *attrib_list, const EGLAttribKHR *attrib_list64)
+             const EGLint *attrib_list, const EGLAttrib *attrib_list64)
 {
    EGLint err;
 
@@ -141,8 +141,8 @@
 
 
 EGLBoolean
-_eglGetSyncAttribKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
-                     EGLint attribute, EGLint *value)
+_eglGetSyncAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
+                  EGLint attribute, EGLAttrib *value)
 {
    if (!value)
       return _eglError(EGL_BAD_PARAMETER, "eglGetSyncAttribKHR");
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglsync.h mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglsync.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/eglsync.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/eglsync.h	2015-09-16 14:36:08.000000000 +0000
@@ -47,18 +47,18 @@
    EGLenum Type;
    EGLenum SyncStatus;
    EGLenum SyncCondition;
-   EGLAttribKHR CLEvent;
+   EGLAttrib CLEvent;
 };
 
 
 extern EGLBoolean
 _eglInitSync(_EGLSync *sync, _EGLDisplay *dpy, EGLenum type,
-             const EGLint *attrib_list, const EGLAttribKHR *attrib_list64);
+             const EGLint *attrib_list, const EGLAttrib *attrib_list64);
 
 
 extern EGLBoolean
-_eglGetSyncAttribKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
-                     EGLint attribute, EGLint *value);
+_eglGetSyncAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
+                  EGLint attribute, EGLAttrib *value);
 
 
 /**
@@ -87,11 +87,11 @@
  * Link a sync to its display and return the handle of the link.
  * The handle can be passed to client directly.
  */
-static inline EGLSyncKHR
+static inline EGLSync
 _eglLinkSync(_EGLSync *sync)
 {
    _eglLinkResource(&sync->Resource, _EGL_RESOURCE_SYNC);
-   return (EGLSyncKHR) sync;
+   return (EGLSync) sync;
 }
 
 
@@ -110,7 +110,7 @@
  * Return NULL if the handle has no corresponding linked sync.
  */
 static inline _EGLSync *
-_eglLookupSync(EGLSyncKHR handle, _EGLDisplay *dpy)
+_eglLookupSync(EGLSync handle, _EGLDisplay *dpy)
 {
    _EGLSync *sync = (_EGLSync *) handle;
    if (!dpy || !_eglCheckResource((void *) sync, _EGL_RESOURCE_SYNC, dpy))
@@ -122,12 +122,12 @@
 /**
  * Return the handle of a linked sync, or EGL_NO_SYNC_KHR.
  */
-static inline EGLSyncKHR
+static inline EGLSync
 _eglGetSyncHandle(_EGLSync *sync)
 {
    _EGLResource *res = (_EGLResource *) sync;
    return (res && _eglIsResourceLinked(res)) ?
-      (EGLSyncKHR) sync : EGL_NO_SYNC_KHR;
+      (EGLSync) sync : EGL_NO_SYNC_KHR;
 }
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/Makefile.am	1970-01-01 00:00:00.000000000 +0000
@@ -1,98 +0,0 @@
-# Copyright © 2012 Intel Corporation
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-include Makefile.sources
-
-AM_CFLAGS = \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/src/gbm/main \
-	$(DEFINES) \
-	$(VISIBILITY_CFLAGS) \
-	$(EGL_CFLAGS) \
-	-D_EGL_NATIVE_PLATFORM=$(EGL_NATIVE_PLATFORM) \
-	-D_EGL_DRIVER_SEARCH_DIR=\"$(libdir)/egl\" \
-	-D_EGL_OS_UNIX=1
-
-lib_LTLIBRARIES = libEGL.la
-
-libEGL_la_SOURCES = \
-	${LIBEGL_C_FILES}
-
-libEGL_la_LIBADD = \
-	$(EGL_LIB_DEPS)
-libEGL_la_LDFLAGS = \
-	-no-undefined \
-	-version-number 1:0 \
-	$(BSYMBOLIC) \
-	$(GC_SECTIONS) \
-	$(LD_NO_UNDEFINED)
-
-if HAVE_EGL_PLATFORM_X11
-AM_CFLAGS += -DHAVE_X11_PLATFORM
-AM_CFLAGS += $(XCB_DRI2_CFLAGS)
-libEGL_la_LIBADD += $(XCB_DRI2_LIBS)
-endif
-
-if HAVE_EGL_PLATFORM_WAYLAND
-AM_CFLAGS += -DHAVE_WAYLAND_PLATFORM
-AM_CFLAGS += $(WAYLAND_CFLAGS)
-libEGL_la_LIBADD += $(WAYLAND_LIBS)
-libEGL_la_LIBADD += $(LIBDRM_LIBS)
-libEGL_la_LIBADD += ../wayland/wayland-drm/libwayland-drm.la
-endif
-
-if HAVE_EGL_PLATFORM_DRM
-AM_CFLAGS += -DHAVE_DRM_PLATFORM
-libEGL_la_LIBADD += ../../gbm/libgbm.la
-endif
-
-if HAVE_EGL_PLATFORM_NULL
-AM_CFLAGS += -DHAVE_NULL_PLATFORM
-endif
-
-if HAVE_EGL_DRIVER_DRI2
-AM_CFLAGS += -D_EGL_BUILT_IN_DRIVER_DRI2
-AM_CFLAGS += -DHAVE_XCB_DRI2
-libEGL_la_LIBADD += ../drivers/dri2/libegl_dri2.la
-libEGL_la_LIBADD += $(DLOPEN_LIBS) $(LIBDRM_LIBS)
-endif
-
-include $(top_srcdir)/install-lib-links.mk
-
-pkgconfigdir = $(libdir)/pkgconfig
-
-pkgconfig_DATA = egl.pc
-
-khrdir = $(includedir)/KHR
-khr_HEADERS = $(top_srcdir)/include/KHR/khrplatform.h
-
-egldir = $(includedir)/EGL
-egl_HEADERS = \
-	$(top_srcdir)/include/EGL/eglext.h \
-	$(top_srcdir)/include/EGL/egl.h \
-	$(top_srcdir)/include/EGL/eglextchromium.h \
-	$(top_srcdir)/include/EGL/eglmesaext.h \
-	$(top_srcdir)/include/EGL/eglplatform.h
-
-EXTRA_DIST = \
-	egl.def \
-	README.txt \
-	SConscript
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/Makefile.sources	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/Makefile.sources	1970-01-01 00:00:00.000000000 +0000
@@ -1,31 +0,0 @@
-LIBEGL_C_FILES := \
-	eglapi.c \
-	eglapi.h \
-	eglarray.c \
-	eglarray.h \
-	eglcompiler.h \
-	eglconfig.c \
-	eglconfig.h \
-	eglcontext.c \
-	eglcontext.h \
-	eglcurrent.c \
-	eglcurrent.h \
-	egldefines.h \
-	egldisplay.c \
-	egldisplay.h \
-	egldriver.c \
-	egldriver.h \
-	eglfallbacks.c \
-	eglglobals.c \
-	eglglobals.h \
-	eglimage.c \
-	eglimage.h \
-	egllog.c \
-	egllog.h \
-	eglstring.c \
-	eglstring.h \
-	eglsurface.c \
-	eglsurface.h \
-	eglsync.c \
-	eglsync.h \
-	egltypedefs.h
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/main/SConscript	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/main/SConscript	1970-01-01 00:00:00.000000000 +0000
@@ -1,52 +0,0 @@
-#######################################################################
-# SConscript for EGL
-
-
-Import('*')
-
-env = env.Clone()
-
-env.Append(CPPDEFINES = [
-    '_EGL_DRIVER_SEARCH_DIR=\\"\\"',
-])
-
-if env['platform'] == 'haiku':
-    env.Append(CPPDEFINES = [
-        '_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_HAIKU',
-        '_EGL_OS_UNIX',
-        '_EGL_BUILT_IN_DRIVER_HAIKU',
-    ])
-    env.Prepend(LIBS = [
-        egl_haiku,
-        libloader,
-    ])
-else:
-    env.Append(CPPDEFINES = [
-        '_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_X11',
-        '_EGL_OS_UNIX',
-    ])
-    if env['dri']:
-        env.Prepend(LIBS = [
-            egl_dri2,
-            libloader,
-        ])
-    # Disallow undefined symbols
-    if env['platform'] != 'darwin':
-        env.Append(SHLINKFLAGS = ['-Wl,-z,defs'])
-
-env.Append(CPPPATH = [
-    '#/include',
-])
-
-
-# parse Makefile.sources
-egl_sources = env.ParseSourceList('Makefile.sources', 'LIBEGL_C_FILES')
-
-egl = env.SharedLibrary(
-    target = 'EGL',
-    source = egl_sources,
-)
-
-egl = env.InstallSharedLibrary(egl, version=(1, 0, 0))
-
-env.Alias('egl', egl)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/Makefile.am	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/Makefile.am	2015-09-16 14:37:00.000000000 +0000
@@ -0,0 +1,119 @@
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+include Makefile.sources
+
+AM_CFLAGS = \
+	-I$(top_srcdir)/include \
+	-I$(top_srcdir)/src/egl/main \
+	-I$(top_srcdir)/src/gbm/main \
+	-I$(top_srcdir)/src \
+	$(DEFINES) \
+	$(VISIBILITY_CFLAGS) \
+	$(LIBDRM_CFLAGS) \
+	$(EGL_CFLAGS) \
+	-D_EGL_NATIVE_PLATFORM=$(EGL_NATIVE_PLATFORM)
+
+lib_LTLIBRARIES = libEGL.la
+
+libEGL_la_SOURCES = \
+	$(LIBEGL_C_FILES)
+
+libEGL_la_LIBADD = \
+	$(EGL_LIB_DEPS)
+libEGL_la_LDFLAGS = \
+	-no-undefined \
+	-version-number 1:0 \
+	$(BSYMBOLIC) \
+	$(GC_SECTIONS) \
+	$(LD_NO_UNDEFINED)
+
+dri2_backend_FILES =
+
+if HAVE_EGL_PLATFORM_X11
+AM_CFLAGS += -DHAVE_X11_PLATFORM
+AM_CFLAGS += $(XCB_DRI2_CFLAGS)
+libEGL_la_LIBADD += $(XCB_DRI2_LIBS)
+dri2_backend_FILES += drivers/dri2/platform_x11.c
+endif
+
+if HAVE_EGL_PLATFORM_WAYLAND
+AM_CFLAGS += -DHAVE_WAYLAND_PLATFORM
+AM_CFLAGS += $(WAYLAND_CFLAGS)
+libEGL_la_LIBADD += $(WAYLAND_LIBS)
+libEGL_la_LIBADD += $(LIBDRM_LIBS)
+libEGL_la_LIBADD += $(top_builddir)/src/egl/wayland/wayland-drm/libwayland-drm.la
+dri2_backend_FILES += drivers/dri2/platform_wayland.c
+endif
+
+if HAVE_EGL_PLATFORM_DRM
+AM_CFLAGS += -DHAVE_DRM_PLATFORM
+libEGL_la_LIBADD += $(top_builddir)/src/gbm/libgbm.la
+dri2_backend_FILES += drivers/dri2/platform_drm.c
+endif
+
+if HAVE_EGL_PLATFORM_SURFACELESS
+AM_CFLAGS += -DHAVE_SURFACELESS_PLATFORM
+dri2_backend_FILES += drivers/dri2/platform_surfaceless.c
+endif
+
+if HAVE_EGL_DRIVER_DRI2
+AM_CFLAGS += \
+	-I$(top_srcdir)/src/loader \
+	-I$(top_srcdir)/src/egl/drivers/dri2 \
+	-I$(top_srcdir)/src/gbm/backends/dri \
+	-I$(top_srcdir)/src/egl/wayland/wayland-egl \
+	-I$(top_srcdir)/src/egl/wayland/wayland-drm \
+	-I$(top_builddir)/src/egl/wayland/wayland-drm \
+	-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
+	-D_EGL_BUILT_IN_DRIVER_DRI2
+
+libEGL_la_SOURCES += \
+	$(dri2_backend_core_FILES) \
+	$(dri2_backend_FILES)
+
+libEGL_la_LIBADD += $(top_builddir)/src/loader/libloader.la
+libEGL_la_LIBADD += $(DLOPEN_LIBS) $(LIBDRM_LIBS)
+endif
+
+include $(top_srcdir)/install-lib-links.mk
+
+pkgconfigdir = $(libdir)/pkgconfig
+
+pkgconfig_DATA = main/egl.pc
+
+khrdir = $(includedir)/KHR
+khr_HEADERS = $(top_srcdir)/include/KHR/khrplatform.h
+
+egldir = $(includedir)/EGL
+egl_HEADERS = \
+	$(top_srcdir)/include/EGL/eglext.h \
+	$(top_srcdir)/include/EGL/egl.h \
+	$(top_srcdir)/include/EGL/eglextchromium.h \
+	$(top_srcdir)/include/EGL/eglmesaext.h \
+	$(top_srcdir)/include/EGL/eglplatform.h
+
+EXTRA_DIST = \
+	SConscript \
+	drivers/haiku \
+	docs \
+	main/egl.def \
+	main/README.txt
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/Makefile.sources	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/Makefile.sources	2015-09-16 14:36:08.000000000 +0000
@@ -0,0 +1,34 @@
+LIBEGL_C_FILES := \
+	main/eglapi.c \
+	main/eglapi.h \
+	main/eglarray.c \
+	main/eglarray.h \
+	main/eglcompiler.h \
+	main/eglconfig.c \
+	main/eglconfig.h \
+	main/eglcontext.c \
+	main/eglcontext.h \
+	main/eglcurrent.c \
+	main/eglcurrent.h \
+	main/egldefines.h \
+	main/egldisplay.c \
+	main/egldisplay.h \
+	main/egldriver.c \
+	main/egldriver.h \
+	main/eglfallbacks.c \
+	main/eglglobals.c \
+	main/eglglobals.h \
+	main/eglimage.c \
+	main/eglimage.h \
+	main/egllog.c \
+	main/egllog.h \
+	main/eglsurface.c \
+	main/eglsurface.h \
+	main/eglsync.c \
+	main/eglsync.h \
+	main/egltypedefs.h
+
+dri2_backend_core_FILES := \
+	drivers/dri2/egl_dri2.c \
+	drivers/dri2/egl_dri2.h \
+	drivers/dri2/egl_dri2_fallbacks.h
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/egl/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/egl/SConscript	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/egl/SConscript	2015-09-16 14:36:08.000000000 +0000
@@ -0,0 +1,33 @@
+#######################################################################
+# SConscript for EGL
+
+
+Import('*')
+
+env = env.Clone()
+
+env.Append(CPPPATH = [
+    '#/include',
+    '#/src/egl/main',
+    '#/src',
+])
+
+
+# parse Makefile.sources
+egl_sources = env.ParseSourceList('Makefile.sources', 'LIBEGL_C_FILES')
+
+env.Append(CPPDEFINES = [
+    '_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_HAIKU',
+    '_EGL_BUILT_IN_DRIVER_HAIKU',
+    'HAVE_HAIKU_PLATFORM',
+])
+egl_sources.append('drivers/haiku/egl_haiku.cpp')
+
+egl = env.SharedLibrary(
+    target = 'EGL',
+    source = egl_sources,
+)
+
+egl = env.InstallSharedLibrary(egl, version=(1, 0, 0))
+
+env.Alias('egl', egl)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/Android.common.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/Android.common.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/Android.common.mk	2012-01-02 08:23:27.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/Android.common.mk	2015-09-16 14:36:08.000000000 +0000
@@ -29,4 +29,12 @@
 	$(GALLIUM_TOP)/winsys \
 	$(GALLIUM_TOP)/drivers
 
+ifeq ($(MESA_ENABLE_LLVM),true)
+LOCAL_C_INCLUDES += \
+	external/llvm/include \
+	external/llvm/device/include \
+	external/libcxx/include \
+	external/elfutils/$(if $(filter true,$(MESA_LOLLIPOP_BUILD)),0.153/)libelf
+endif
+
 include $(MESA_COMMON_MK)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/Android.mk	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/Android.mk	2015-09-16 14:36:08.000000000 +0000
@@ -33,7 +33,9 @@
 #
 
 # swrast
-SUBDIRS += winsys/sw/android drivers/softpipe
+ifneq ($(filter swrast,$(MESA_GPU_DRIVERS)),)
+SUBDIRS += winsys/sw/dri drivers/softpipe
+endif
 
 # freedreno
 ifneq ($(filter freedreno, $(MESA_GPU_DRIVERS)),)
@@ -70,8 +72,14 @@
 endif
 ifneq ($(filter radeonsi, $(MESA_GPU_DRIVERS)),)
 SUBDIRS += drivers/radeonsi
+SUBDIRS += winsys/amdgpu/drm
+endif
 endif
 endif
+
+# vc4
+ifneq ($(filter vc4, $(MESA_GPU_DRIVERS)),)
+SUBDIRS += winsys/vc4/drm drivers/vc4
 endif
 
 # vmwgfx
@@ -79,5 +87,7 @@
 SUBDIRS += winsys/svga/drm drivers/svga
 endif
 
-mkfiles := $(patsubst %,$(GALLIUM_TOP)/%/Android.mk,$(SUBDIRS))
-include $(mkfiles)
+# Gallium state trackers and target for dri
+SUBDIRS += state_trackers/dri targets/dri
+
+include $(call all-named-subdir-makefiles,$(SUBDIRS))
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/Automake.inc mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/Automake.inc
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/Automake.inc	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/Automake.inc	2015-09-16 14:36:08.000000000 +0000
@@ -67,10 +67,3 @@
 GALLIUM_PIPE_LOADER_WINSYS_LIBS += \
 	$(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
 endif
-
-if NEED_WINSYS_XLIB
-GALLIUM_PIPE_LOADER_WINSYS_LIBS += \
-	$(top_builddir)/src/gallium/winsys/sw/xlib/libws_xlib.la \
-	-lX11 -lXext -lXfixes \
-	$(LIBDRM_LIBS)
-endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/Android.mk	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/Android.mk	2015-09-16 14:36:08.000000000 +0000
@@ -30,12 +30,23 @@
 
 LOCAL_SRC_FILES := \
 	$(C_SOURCES) \
+	$(NIR_SOURCES) \
 	$(VL_STUB_SOURCES)
 
 LOCAL_C_INCLUDES := \
 	$(GALLIUM_TOP)/auxiliary/util
 
+ifeq ($(MESA_ENABLE_LLVM),true)
+LOCAL_SRC_FILES += \
+	$(GALLIVM_SOURCES) \
+	$(GALLIVM_CPP_SOURCES)
+
+LOCAL_CPPFLAGS := -std=c++11
+endif
+
+# We need libmesa_glsl to get NIR's generated include directories.
 LOCAL_MODULE := libmesa_gallium
+LOCAL_STATIC_LIBRARIES += libmesa_glsl
 
 # generate sources
 LOCAL_MODULE_CLASS := STATIC_LIBRARIES
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/cso_cache/cso_cache.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/cso_cache/cso_cache.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/cso_cache/cso_cache.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/cso_cache/cso_cache.c	2015-09-16 14:36:08.000000000 +0000
@@ -80,7 +80,7 @@
    return hash_key((item), item_size);
 }
 
-static INLINE struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type)
+static inline struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type)
 {
    struct cso_hash *hash;
    hash = sc->hashes[type];
@@ -127,7 +127,7 @@
    FREE(state);
 }
 
-static INLINE void delete_cso(void *state, enum cso_cache_type type)
+static inline void delete_cso(void *state, enum cso_cache_type type)
 {
    switch (type) {
    case CSO_BLEND:
@@ -152,7 +152,7 @@
 }
 
 
-static INLINE void sanitize_hash(struct cso_cache *sc,
+static inline void sanitize_hash(struct cso_cache *sc,
                                  struct cso_hash *hash,
                                  enum cso_cache_type type,
                                  int max_size)
@@ -162,7 +162,7 @@
 }
 
 
-static INLINE void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type,
+static inline void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type,
                                int max_size, void *user_data)
 {
    /* if we're approach the maximum size, remove fourth of the entries
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/cso_cache/cso_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/cso_cache/cso_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/cso_cache/cso_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/cso_cache/cso_context.c	2015-09-16 14:36:08.000000000 +0000
@@ -56,22 +56,8 @@
  */
 struct sampler_info
 {
-   struct {
-      void *samplers[PIPE_MAX_SAMPLERS];
-      unsigned nr_samplers;
-   } hw;
-
    void *samplers[PIPE_MAX_SAMPLERS];
    unsigned nr_samplers;
-
-   void *samplers_saved[PIPE_MAX_SAMPLERS];
-   unsigned nr_samplers_saved;
-
-   struct pipe_sampler_view *views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
-   unsigned nr_views;
-
-   struct pipe_sampler_view *views_saved[PIPE_MAX_SHADER_SAMPLER_VIEWS];
-   unsigned nr_views_saved;
 };
 
 
@@ -85,6 +71,15 @@
    boolean has_tessellation;
    boolean has_streamout;
 
+   struct pipe_sampler_view *fragment_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   unsigned nr_fragment_views;
+
+   struct pipe_sampler_view *fragment_views_saved[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   unsigned nr_fragment_views_saved;
+
+   void *fragment_samplers_saved[PIPE_MAX_SAMPLERS];
+   unsigned nr_fragment_samplers_saved;
+
    struct sampler_info samplers[PIPE_SHADER_TYPES];
 
    struct pipe_vertex_buffer aux_vertex_buffer_current;
@@ -116,9 +111,6 @@
    uint render_condition_mode, render_condition_mode_saved;
    boolean render_condition_cond, render_condition_cond_saved;
 
-   struct pipe_clip_state clip;
-   struct pipe_clip_state clip_saved;
-
    struct pipe_framebuffer_state fb, fb_saved;
    struct pipe_viewport_state vp, vp_saved;
    struct pipe_blend_color blend_color;
@@ -192,7 +184,7 @@
 }
 
 
-static INLINE boolean delete_cso(struct cso_context *ctx,
+static inline boolean delete_cso(struct cso_context *ctx,
                                  void *state, enum cso_cache_type type)
 {
    switch (type) {
@@ -213,7 +205,7 @@
    return FALSE;
 }
 
-static INLINE void
+static inline void
 sanitize_hash(struct cso_hash *hash, enum cso_cache_type type,
               int max_size, void *user_data)
 {
@@ -297,7 +289,7 @@
  */
 void cso_destroy_context( struct cso_context *ctx )
 {
-   unsigned i, shader;
+   unsigned i;
 
    if (ctx->pipe) {
       ctx->pipe->set_index_buffer(ctx->pipe, NULL);
@@ -347,13 +339,9 @@
          ctx->pipe->set_stream_output_targets(ctx->pipe, 0, NULL, NULL);
    }
 
-   /* free sampler views for each shader stage */
-   for (shader = 0; shader < Elements(ctx->samplers); shader++) {
-      struct sampler_info *info = &ctx->samplers[shader];
-      for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
-         pipe_sampler_view_reference(&info->views[i], NULL);
-         pipe_sampler_view_reference(&info->views_saved[i], NULL);
-      }
+   for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
+      pipe_sampler_view_reference(&ctx->fragment_views[i], NULL);
+      pipe_sampler_view_reference(&ctx->fragment_views_saved[i], NULL);
    }
 
    util_unreference_framebuffer_state(&ctx->fb);
@@ -919,47 +907,6 @@
    ctx->tesseval_shader_saved = NULL;
 }
 
-/* clip state */
-
-static INLINE void
-clip_state_cpy(struct pipe_clip_state *dst,
-               const struct pipe_clip_state *src)
-{
-   memcpy(dst->ucp, src->ucp, sizeof(dst->ucp));
-}
-
-static INLINE int
-clip_state_cmp(const struct pipe_clip_state *a,
-               const struct pipe_clip_state *b)
-{
-   return memcmp(a->ucp, b->ucp, sizeof(a->ucp));
-}
-
-void
-cso_set_clip(struct cso_context *ctx,
-             const struct pipe_clip_state *clip)
-{
-   if (clip_state_cmp(&ctx->clip, clip)) {
-      clip_state_cpy(&ctx->clip, clip);
-      ctx->pipe->set_clip_state(ctx->pipe, clip);
-   }
-}
-
-void
-cso_save_clip(struct cso_context *ctx)
-{
-   clip_state_cpy(&ctx->clip_saved, &ctx->clip);
-}
-
-void
-cso_restore_clip(struct cso_context *ctx)
-{
-   if (clip_state_cmp(&ctx->clip, &ctx->clip_saved)) {
-      clip_state_cpy(&ctx->clip, &ctx->clip_saved);
-      ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved);
-   }
-}
-
 enum pipe_error
 cso_set_vertex_elements(struct cso_context *ctx,
                         unsigned count,
@@ -1122,11 +1069,9 @@
 
 /**************** fragment/vertex sampler view state *************************/
 
-static enum pipe_error
-single_sampler(struct cso_context *ctx,
-               struct sampler_info *info,
-               unsigned idx,
-               const struct pipe_sampler_state *templ)
+enum pipe_error
+cso_single_sampler(struct cso_context *ctx, unsigned shader_stage,
+                   unsigned idx, const struct pipe_sampler_state *templ)
 {
    void *handle = NULL;
 
@@ -1162,24 +1107,13 @@
       }
    }
 
-   info->samplers[idx] = handle;
-
+   ctx->samplers[shader_stage].samplers[idx] = handle;
    return PIPE_OK;
 }
 
-enum pipe_error
-cso_single_sampler(struct cso_context *ctx,
-                   unsigned shader_stage,
-                   unsigned idx,
-                   const struct pipe_sampler_state *templ)
-{
-   return single_sampler(ctx, &ctx->samplers[shader_stage], idx, templ);
-}
-
 
-
-static void
-single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
+void
+cso_single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
 {
    struct sampler_info *info = &ctx->samplers[shader_stage];
    unsigned i;
@@ -1191,33 +1125,8 @@
    }
 
    info->nr_samplers = i;
-
-   if (info->hw.nr_samplers != info->nr_samplers ||
-       memcmp(info->hw.samplers,
-              info->samplers,
-              info->nr_samplers * sizeof(void *)) != 0)
-   {
-      memcpy(info->hw.samplers,
-             info->samplers,
-             info->nr_samplers * sizeof(void *));
-
-      /* set remaining slots/pointers to null */
-      for (i = info->nr_samplers; i < info->hw.nr_samplers; i++)
-         info->samplers[i] = NULL;
-
-      ctx->pipe->bind_sampler_states(ctx->pipe, shader_stage, 0,
-                                     MAX2(info->nr_samplers,
-                                          info->hw.nr_samplers),
-                                     info->samplers);
-
-      info->hw.nr_samplers = info->nr_samplers;
-   }
-}
-
-void
-cso_single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
-{
-   single_sampler_done(ctx, shader_stage);
+   ctx->pipe->bind_sampler_states(ctx->pipe, shader_stage, 0, i,
+                                  info->samplers);
 }
 
 
@@ -1240,38 +1149,42 @@
     */
 
    for (i = 0; i < nr; i++) {
-      temp = single_sampler(ctx, info, i, templates[i]);
+      temp = cso_single_sampler(ctx, shader_stage, i, templates[i]);
       if (temp != PIPE_OK)
          error = temp;
    }
 
    for ( ; i < info->nr_samplers; i++) {
-      temp = single_sampler(ctx, info, i, NULL);
+      temp = cso_single_sampler(ctx, shader_stage, i, NULL);
       if (temp != PIPE_OK)
          error = temp;
    }
 
-   single_sampler_done(ctx, shader_stage);
+   cso_single_sampler_done(ctx, shader_stage);
 
    return error;
 }
 
 void
-cso_save_samplers(struct cso_context *ctx, unsigned shader_stage)
+cso_save_fragment_samplers(struct cso_context *ctx)
 {
-   struct sampler_info *info = &ctx->samplers[shader_stage];
-   info->nr_samplers_saved = info->nr_samplers;
-   memcpy(info->samplers_saved, info->samplers, sizeof(info->samplers));
+   struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
+
+   ctx->nr_fragment_samplers_saved = info->nr_samplers;
+   memcpy(ctx->fragment_samplers_saved, info->samplers,
+          sizeof(info->samplers));
 }
 
 
 void
-cso_restore_samplers(struct cso_context *ctx, unsigned shader_stage)
+cso_restore_fragment_samplers(struct cso_context *ctx)
 {
-   struct sampler_info *info = &ctx->samplers[shader_stage];
-   info->nr_samplers = info->nr_samplers_saved;
-   memcpy(info->samplers, info->samplers_saved, sizeof(info->samplers));
-   single_sampler_done(ctx, shader_stage);
+   struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
+
+   info->nr_samplers = ctx->nr_fragment_samplers_saved;
+   memcpy(info->samplers, ctx->fragment_samplers_saved,
+          sizeof(info->samplers));
+   cso_single_sampler_done(ctx, PIPE_SHADER_FRAGMENT);
 }
 
 
@@ -1281,71 +1194,74 @@
                       unsigned count,
                       struct pipe_sampler_view **views)
 {
-   struct sampler_info *info = &ctx->samplers[shader_stage];
-   unsigned i;
-   boolean any_change = FALSE;
+   if (shader_stage == PIPE_SHADER_FRAGMENT) {
+      unsigned i;
+      boolean any_change = FALSE;
+
+      /* reference new views */
+      for (i = 0; i < count; i++) {
+         any_change |= ctx->fragment_views[i] != views[i];
+         pipe_sampler_view_reference(&ctx->fragment_views[i], views[i]);
+      }
+      /* unref extra old views, if any */
+      for (; i < ctx->nr_fragment_views; i++) {
+         any_change |= ctx->fragment_views[i] != NULL;
+         pipe_sampler_view_reference(&ctx->fragment_views[i], NULL);
+      }
 
-   /* reference new views */
-   for (i = 0; i < count; i++) {
-      any_change |= info->views[i] != views[i];
-      pipe_sampler_view_reference(&info->views[i], views[i]);
-   }
-   /* unref extra old views, if any */
-   for (; i < info->nr_views; i++) {
-      any_change |= info->views[i] != NULL;
-      pipe_sampler_view_reference(&info->views[i], NULL);
-   }
+      /* bind the new sampler views */
+      if (any_change) {
+         ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0,
+                                      MAX2(ctx->nr_fragment_views, count),
+                                      ctx->fragment_views);
+      }
 
-   /* bind the new sampler views */
-   if (any_change) {
-      ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0,
-                                   MAX2(info->nr_views, count),
-                                   info->views);
+      ctx->nr_fragment_views = count;
    }
-
-   info->nr_views = count;
+   else
+      ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0, count, views);
 }
 
 
 void
-cso_save_sampler_views(struct cso_context *ctx, unsigned shader_stage)
+cso_save_fragment_sampler_views(struct cso_context *ctx)
 {
-   struct sampler_info *info = &ctx->samplers[shader_stage];
    unsigned i;
 
-   info->nr_views_saved = info->nr_views;
+   ctx->nr_fragment_views_saved = ctx->nr_fragment_views;
 
-   for (i = 0; i < info->nr_views; i++) {
-      assert(!info->views_saved[i]);
-      pipe_sampler_view_reference(&info->views_saved[i], info->views[i]);
+   for (i = 0; i < ctx->nr_fragment_views; i++) {
+      assert(!ctx->fragment_views_saved[i]);
+      pipe_sampler_view_reference(&ctx->fragment_views_saved[i],
+                                  ctx->fragment_views[i]);
    }
 }
 
 
 void
-cso_restore_sampler_views(struct cso_context *ctx, unsigned shader_stage)
+cso_restore_fragment_sampler_views(struct cso_context *ctx)
 {
-   struct sampler_info *info = &ctx->samplers[shader_stage];
-   unsigned i, nr_saved = info->nr_views_saved;
+   unsigned i, nr_saved = ctx->nr_fragment_views_saved;
    unsigned num;
 
    for (i = 0; i < nr_saved; i++) {
-      pipe_sampler_view_reference(&info->views[i], NULL);
+      pipe_sampler_view_reference(&ctx->fragment_views[i], NULL);
       /* move the reference from one pointer to another */
-      info->views[i] = info->views_saved[i];
-      info->views_saved[i] = NULL;
+      ctx->fragment_views[i] = ctx->fragment_views_saved[i];
+      ctx->fragment_views_saved[i] = NULL;
    }
-   for (; i < info->nr_views; i++) {
-      pipe_sampler_view_reference(&info->views[i], NULL);
+   for (; i < ctx->nr_fragment_views; i++) {
+      pipe_sampler_view_reference(&ctx->fragment_views[i], NULL);
    }
 
-   num = MAX2(info->nr_views, nr_saved);
+   num = MAX2(ctx->nr_fragment_views, nr_saved);
 
    /* bind the old/saved sampler views */
-   ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0, num, info->views);
+   ctx->pipe->set_sampler_views(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, num,
+                                ctx->fragment_views);
 
-   info->nr_views = nr_saved;
-   info->nr_views_saved = 0;
+   ctx->nr_fragment_views = nr_saved;
+   ctx->nr_fragment_views_saved = 0;
 }
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/cso_cache/cso_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/cso_cache/cso_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/cso_cache/cso_context.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/cso_cache/cso_context.h	2015-09-16 14:36:08.000000000 +0000
@@ -72,19 +72,17 @@
                  const struct pipe_sampler_state **states);
 
 void
-cso_save_samplers(struct cso_context *cso, unsigned shader_stage);
+cso_save_fragment_samplers(struct cso_context *cso);
 
 void
-cso_restore_samplers(struct cso_context *cso, unsigned shader_stage);
+cso_restore_fragment_samplers(struct cso_context *cso);
 
 /* Alternate interface to support state trackers that like to modify
  * samplers one at a time:
  */
 enum pipe_error
-cso_single_sampler(struct cso_context *cso,
-                   unsigned shader_stage,
-                   unsigned count,
-                   const struct pipe_sampler_state *states);
+cso_single_sampler(struct cso_context *cso, unsigned shader_stage,
+                   unsigned idx, const struct pipe_sampler_state *states);
 
 void
 cso_single_sampler_done(struct cso_context *cso, unsigned shader_stage);
@@ -188,19 +186,6 @@
 void cso_restore_render_condition(struct cso_context *cso);
 
 
-/* clip state */
-
-void
-cso_set_clip(struct cso_context *cso,
-             const struct pipe_clip_state *clip);
-
-void
-cso_save_clip(struct cso_context *cso);
-
-void
-cso_restore_clip(struct cso_context *cso);
-
-
 /* sampler view state */
 
 void
@@ -210,10 +195,10 @@
                       struct pipe_sampler_view **views);
 
 void
-cso_save_sampler_views(struct cso_context *cso, unsigned shader_stage);
+cso_save_fragment_sampler_views(struct cso_context *ctx);
 
 void
-cso_restore_sampler_views(struct cso_context *cso, unsigned shader_stage);
+cso_restore_fragment_sampler_views(struct cso_context *ctx);
 
 
 /* constant buffers */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_gs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_gs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_gs.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_gs.c	2015-09-16 14:36:08.000000000 +0000
@@ -45,7 +45,7 @@
 /* fixme: move it from here */
 #define MAX_PRIMITIVES 64
 
-static INLINE int
+static inline int
 draw_gs_get_input_index(int semantic, int index,
                         const struct tgsi_shader_info *input_info)
 {
@@ -66,7 +66,7 @@
  * the number of elements in the SOA vector. This ensures that the
  * throughput is optimized for the given vector instruction set.
  */
-static INLINE boolean
+static inline boolean
 draw_gs_should_flush(struct draw_geometry_shader *shader)
 {
    return (shader->fetched_prim_count == shader->vector_length);
@@ -190,9 +190,15 @@
                             const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS])
 {
    struct tgsi_exec_machine *machine = shader->machine;
-
+   int j;
    tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
                                   constants, constants_size);
+
+   if (shader->info.uses_invocationid) {
+      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INVOCATIONID];
+      for (j = 0; j < TGSI_QUAD_SIZE; j++)
+         machine->SystemValue[i].i[j] = shader->invocation_id;
+   }
 }
 
 static unsigned tgsi_gs_run(struct draw_geometry_shader *shader,
@@ -385,7 +391,8 @@
       (struct vertex_header*)input,
       input_primitives,
       shader->draw->instance_id,
-      shader->llvm_prim_ids);
+      shader->llvm_prim_ids,
+      shader->invocation_id);
 
    return ret;
 }
@@ -555,7 +562,7 @@
     * overflown vertices into some area where they won't harm anyone */
    unsigned total_verts_per_buffer = shader->primitive_boundary *
       num_in_primitives;
-
+   unsigned invocation;
    //Assume at least one primitive
    max_out_prims = MAX2(max_out_prims, 1);
 
@@ -564,7 +571,7 @@
    output_verts->stride = output_verts->vertex_size;
    output_verts->verts =
       (struct vertex_header *)MALLOC(output_verts->vertex_size *
-                                     total_verts_per_buffer);
+                                     total_verts_per_buffer * shader->num_invocations);
    debug_assert(output_verts->verts);
 
 #if 0
@@ -592,7 +599,7 @@
    shader->input = input;
    shader->input_info = input_info;
    FREE(shader->primitive_lengths);
-   shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned));
+   shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned) * shader->num_invocations);
 
 
 #ifdef HAVE_LLVM
@@ -622,24 +629,27 @@
    }
 #endif
 
-   shader->prepare(shader, constants, constants_size);
+   for (invocation = 0; invocation < shader->num_invocations; invocation++) {
+      shader->invocation_id = invocation;
 
-   if (input_prim->linear)
-      gs_run(shader, input_prim, input_verts,
-             output_prims, output_verts);
-   else
-      gs_run_elts(shader, input_prim, input_verts,
-                  output_prims, output_verts);
+      shader->prepare(shader, constants, constants_size);
 
-   /* Flush the remaining primitives. Will happen if
-    * num_input_primitives % 4 != 0
-    */
-   if (shader->fetched_prim_count > 0) {
-      gs_flush(shader);
+      if (input_prim->linear)
+         gs_run(shader, input_prim, input_verts,
+                output_prims, output_verts);
+      else
+         gs_run_elts(shader, input_prim, input_verts,
+                     output_prims, output_verts);
+
+      /* Flush the remaining primitives. Will happen if
+       * num_input_primitives % 4 != 0
+       */
+      if (shader->fetched_prim_count > 0) {
+         gs_flush(shader);
+      }
+      debug_assert(shader->fetched_prim_count == 0);
    }
 
-   debug_assert(shader->fetched_prim_count == 0);
-
    /* Update prim_info:
     */
    output_prims->linear = TRUE;
@@ -771,6 +781,8 @@
          gs->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
    gs->max_output_vertices =
          gs->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
+   gs->num_invocations =
+      gs->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
    if (!gs->max_output_vertices)
       gs->max_output_vertices = 32;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_gs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_gs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_gs.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_gs.h	2015-09-16 14:36:08.000000000 +0000
@@ -90,6 +90,8 @@
    unsigned vector_length;
    unsigned max_out_prims;
 
+   unsigned num_invocations;
+   unsigned invocation_id;
 #ifdef HAVE_LLVM
    struct draw_gs_inputs *gs_input;
    struct draw_gs_jit_context *jit_context;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_llvm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_llvm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_llvm.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_llvm.c	2015-09-16 14:36:08.000000000 +0000
@@ -72,7 +72,7 @@
    LLVMValueRef input;
 };
 
-static INLINE const struct draw_gs_llvm_iface *
+static inline const struct draw_gs_llvm_iface *
 draw_gs_llvm_iface(const struct lp_build_tgsi_gs_iface *iface)
 {
    return (const struct draw_gs_llvm_iface *)iface;
@@ -97,6 +97,7 @@
    dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                            Elements(elem_types), 0);
 
+   (void) target; /* silence unused var warning for non-debug build */
    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map,
                           target, dvbuffer_type,
                           DRAW_JIT_DVBUFFER_MAP);
@@ -133,6 +134,7 @@
    texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                           Elements(elem_types), 0);
 
+   (void) target; /* silence unused var warning for non-debug build */
    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
                           target, texture_type,
                           DRAW_JIT_TEXTURE_WIDTH);
@@ -290,6 +292,7 @@
    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                           Elements(elem_types), 0);
 
+   (void) target; /* silence unused var warning for non-debug build */
    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants,
                           target, context_type, DRAW_GS_JIT_CTX_CONSTANTS);
    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, num_constants,
@@ -353,6 +356,7 @@
    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                      Elements(elem_types), 0);
 
+   (void) target; /* silence unused var warning for non-debug build */
    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
                           target, vb_type, 0);
    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
@@ -2065,7 +2069,7 @@
    struct gallivm_state *gallivm = variant->gallivm;
    LLVMContextRef context = gallivm->context;
    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
-   LLVMTypeRef arg_types[6];
+   LLVMTypeRef arg_types[7];
    LLVMTypeRef func_type;
    LLVMValueRef variant_func;
    LLVMValueRef context_ptr;
@@ -2101,6 +2105,7 @@
    arg_types[4] = int32_type;                          /* instance_id */
    arg_types[5] = LLVMPointerType(
       LLVMVectorType(int32_type, vector_length), 0);   /* prim_id_ptr */
+   arg_types[6] = int32_type;
 
    func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
 
@@ -2121,6 +2126,7 @@
    num_prims                 = LLVMGetParam(variant_func, 3);
    system_values.instance_id = LLVMGetParam(variant_func, 4);
    prim_id_ptr               = LLVMGetParam(variant_func, 5);
+   system_values.invocation_id = LLVMGetParam(variant_func, 6);
 
    lp_build_name(context_ptr, "context");
    lp_build_name(input_array, "input");
@@ -2128,6 +2134,7 @@
    lp_build_name(num_prims, "num_prims");
    lp_build_name(system_values.instance_id, "instance_id");
    lp_build_name(prim_id_ptr, "prim_id_ptr");
+   lp_build_name(system_values.invocation_id, "invocation_id");
 
    variant->context_ptr = context_ptr;
    variant->io_ptr = io_ptr;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_llvm.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_llvm.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_llvm.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_llvm.h	2015-09-16 14:36:08.000000000 +0000
@@ -298,7 +298,8 @@
                     struct vertex_header *output,
                     unsigned num_prims,
                     unsigned instance_id,
-                    int *prim_ids);
+                    int *prim_ids,
+                    unsigned invocation_id);
 
 struct draw_llvm_variant_key
 {
@@ -349,7 +350,7 @@
     PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state))
 
 
-static INLINE size_t
+static inline size_t
 draw_llvm_variant_key_size(unsigned nr_vertex_elements,
                            unsigned nr_samplers)
 {
@@ -359,7 +360,7 @@
 }
 
 
-static INLINE size_t
+static inline size_t
 draw_gs_llvm_variant_key_size(unsigned nr_samplers)
 {
    return (sizeof(struct draw_gs_llvm_variant_key) +
@@ -367,7 +368,7 @@
 }
 
 
-static INLINE struct draw_sampler_static_state *
+static inline struct draw_sampler_static_state *
 draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key)
 {
    return (struct draw_sampler_static_state *)
@@ -475,13 +476,13 @@
 };
 
 
-static INLINE struct llvm_vertex_shader *
+static inline struct llvm_vertex_shader *
 llvm_vertex_shader(struct draw_vertex_shader *vs)
 {
    return (struct llvm_vertex_shader *)vs;
 }
 
-static INLINE struct llvm_geometry_shader *
+static inline struct llvm_geometry_shader *
 llvm_geometry_shader(struct draw_geometry_shader *gs)
 {
    return (struct llvm_geometry_shader *)gs;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_aaline.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_aaline.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_aaline.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_aaline.c	2015-09-16 14:36:08.000000000 +0000
@@ -51,7 +51,7 @@
 
 
 /** Approx number of new tokens for instructions in aa_transform_inst() */
-#define NUM_NEW_TOKENS 50
+#define NUM_NEW_TOKENS 53
 
 
 /**
@@ -137,6 +137,7 @@
    uint tempsUsed;  /**< bitmask */
    int colorOutput; /**< which output is the primary color */
    uint samplersUsed;  /**< bitfield of samplers used */
+   bool hasSview;
    int freeSampler;  /** an available sampler for the pstipple */
    int maxInput, maxGeneric;  /**< max input index found */
    int colorTemp, texTemp;  /**< temp registers */
@@ -165,6 +166,9 @@
          aactx->samplersUsed |= 1 << i;
       }
    }
+   else if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
+      aactx->hasSview = true;
+   }
    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
       if ((int) decl->Range.Last > aactx->maxInput)
          aactx->maxInput = decl->Range.Last;
@@ -232,6 +236,17 @@
    /* declare new sampler */
    tgsi_transform_sampler_decl(ctx, aactx->freeSampler);
 
+   /* if the src shader has SVIEW decl's for each SAMP decl, we
+    * need to continue the trend and ensure there is a matching
+    * SVIEW for the new SAMP we just created
+    */
+   if (aactx->hasSview) {
+      tgsi_transform_sampler_view_decl(ctx,
+                                       aactx->freeSampler,
+                                       TGSI_TEXTURE_2D,
+                                       TGSI_RETURN_TYPE_FLOAT);
+   }
+
    /* declare new temp regs */
    tgsi_transform_temp_decl(ctx, aactx->texTemp);
    tgsi_transform_temp_decl(ctx, aactx->colorTemp);
@@ -496,7 +511,7 @@
 
 
 
-static INLINE struct aaline_stage *
+static inline struct aaline_stage *
 aaline_stage( struct draw_stage *stage )
 {
    return (struct aaline_stage *) stage;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_aapoint.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_aapoint.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_aapoint.c	2015-09-16 14:36:08.000000000 +0000
@@ -427,7 +427,7 @@
 
 
 
-static INLINE struct aapoint_stage *
+static inline struct aapoint_stage *
 aapoint_stage( struct draw_stage *stage )
 {
    return (struct aapoint_stage *) stage;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_clip.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_clip.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_clip.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_clip.c	2015-09-16 14:36:08.000000000 +0000
@@ -70,12 +70,12 @@
 
 
 /** Cast wrapper */
-static INLINE struct clip_stage *clip_stage( struct draw_stage *stage )
+static inline struct clip_stage *clip_stage( struct draw_stage *stage )
 {
    return (struct clip_stage *)stage;
 }
 
-static INLINE unsigned
+static inline unsigned
 draw_viewport_index(struct draw_context *draw,
                     const struct vertex_header *leading_vertex)
 {
@@ -210,7 +210,7 @@
  * true, otherwise returns false.
  * Triangle is considered null/empty if it's area is qual to zero.
  */
-static INLINE boolean
+static inline boolean
 is_tri_null(struct draw_context *draw, const struct prim_header *header)
 {
    const unsigned pos_attr = draw_current_shader_position_output(draw);
@@ -322,7 +322,7 @@
 }
 
 
-static INLINE float
+static inline float
 dot4(const float *a, const float *b)
 {
    return (a[0] * b[0] +
@@ -336,7 +336,7 @@
  * it first checks if the shader provided a clip distance, otherwise
  * it works out the value using the clipvertex
  */
-static INLINE float getclipdist(const struct clip_stage *clipper,
+static inline float getclipdist(const struct clip_stage *clipper,
                                 struct vertex_header *vert,
                                 int plane_idx)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_cull.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_cull.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_cull.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_cull.c	2015-09-16 14:36:08.000000000 +0000
@@ -46,12 +46,12 @@
 };
 
 
-static INLINE struct cull_stage *cull_stage( struct draw_stage *stage )
+static inline struct cull_stage *cull_stage( struct draw_stage *stage )
 {
    return (struct cull_stage *)stage;
 }
 
-static INLINE boolean
+static inline boolean
 cull_distance_is_out(float dist)
 {
    return (dist < 0.0f) || util_is_inf_or_nan(dist);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_flatshade.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_flatshade.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_flatshade.c	2015-09-16 14:36:08.000000000 +0000
@@ -47,7 +47,7 @@
 };
 
 
-static INLINE struct flat_stage *
+static inline struct flat_stage *
 flat_stage(struct draw_stage *stage)
 {
    return (struct flat_stage *) stage;
@@ -55,7 +55,7 @@
 
 
 /** Copy all the constant attributes from 'src' vertex to 'dst' vertex */
-static INLINE void copy_flats( struct draw_stage *stage,
+static inline void copy_flats( struct draw_stage *stage,
                                struct vertex_header *dst,
                                const struct vertex_header *src )
 {
@@ -70,7 +70,7 @@
 
 
 /** Copy all the color attributes from src vertex to dst0 & dst1 vertices */
-static INLINE void copy_flats2( struct draw_stage *stage,
+static inline void copy_flats2( struct draw_stage *stage,
                                 struct vertex_header *dst0,
                                 struct vertex_header *dst1,
                                 const struct vertex_header *src )
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe.h	2015-09-16 14:36:08.000000000 +0000
@@ -115,7 +115,7 @@
  * \param idx  index into stage's tmp[] array to put the copy (dest)
  * \return  pointer to the copied vertex
  */
-static INLINE struct vertex_header *
+static inline struct vertex_header *
 dup_vert( struct draw_stage *stage,
 	  const struct vertex_header *vert,
 	  unsigned idx )
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_offset.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_offset.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_offset.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_offset.c	2015-09-16 14:36:08.000000000 +0000
@@ -49,7 +49,7 @@
 
 
 
-static INLINE struct offset_stage *offset_stage( struct draw_stage *stage )
+static inline struct offset_stage *offset_stage( struct draw_stage *stage )
 {
    return (struct offset_stage *) stage;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_pstipple.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_pstipple.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_pstipple.c	2015-09-16 14:36:08.000000000 +0000
@@ -53,7 +53,7 @@
 
 
 /** Approx number of new tokens for instructions in pstip_transform_inst() */
-#define NUM_NEW_TOKENS 50
+#define NUM_NEW_TOKENS 53
 
 
 /**
@@ -126,6 +126,7 @@
    int wincoordInput;
    int maxInput;
    uint samplersUsed;  /**< bitfield of samplers used */
+   bool hasSview;
    int freeSampler;  /** an available sampler for the pstipple */
    int texTemp;  /**< temp registers */
    int numImmed;
@@ -149,6 +150,9 @@
          pctx->samplersUsed |= 1 << i;
       }
    }
+   else if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
+      pctx->hasSview = true;
+   }
    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
       pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last);
       if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
@@ -232,6 +236,17 @@
    /* declare new sampler */
    tgsi_transform_sampler_decl(ctx, pctx->freeSampler);
 
+   /* if the src shader has SVIEW decl's for each SAMP decl, we
+    * need to continue the trend and ensure there is a matching
+    * SVIEW for the new SAMP we just created
+    */
+   if (pctx->hasSview) {
+      tgsi_transform_sampler_view_decl(ctx,
+                                       pctx->freeSampler,
+                                       TGSI_TEXTURE_2D,
+                                       TGSI_RETURN_TYPE_FLOAT);
+   }
+
    /* declare new temp regs */
    tgsi_transform_temp_decl(ctx, pctx->texTemp);
 
@@ -447,7 +462,7 @@
 }
 
 
-static INLINE struct pstip_stage *
+static inline struct pstip_stage *
 pstip_stage( struct draw_stage *stage )
 {
    return (struct pstip_stage *) stage;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_stipple.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_stipple.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_stipple.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_stipple.c	2015-09-16 14:36:08.000000000 +0000
@@ -53,7 +53,7 @@
 };
 
 
-static INLINE struct stipple_stage *
+static inline struct stipple_stage *
 stipple_stage(struct draw_stage *stage)
 {
    return (struct stipple_stage *) stage;
@@ -108,7 +108,7 @@
 }
 
 
-static INLINE unsigned
+static inline unsigned
 stipple_test(int counter, ushort pattern, int factor)
 {
    int b = (counter / factor) & 0xf;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_twoside.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_twoside.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_twoside.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_twoside.c	2015-09-16 14:36:08.000000000 +0000
@@ -43,7 +43,7 @@
 };
 
 
-static INLINE struct twoside_stage *twoside_stage( struct draw_stage *stage )
+static inline struct twoside_stage *twoside_stage( struct draw_stage *stage )
 {
    return (struct twoside_stage *)stage;
 }
@@ -51,7 +51,7 @@
 /**
  * Copy back color(s) to front color(s).
  */
-static INLINE struct vertex_header *
+static inline struct vertex_header *
 copy_bfc( struct twoside_stage *twoside, 
           const struct vertex_header *v,
           unsigned idx )
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_unfilled.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_unfilled.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_unfilled.c	2015-09-16 14:36:08.000000000 +0000
@@ -53,7 +53,7 @@
 };
 
 
-static INLINE struct unfilled_stage *unfilled_stage( struct draw_stage *stage )
+static inline struct unfilled_stage *unfilled_stage( struct draw_stage *stage )
 {
    return (struct unfilled_stage *)stage;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_vbuf.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_vbuf.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_vbuf.c	2015-09-16 14:36:08.000000000 +0000
@@ -85,7 +85,7 @@
 /**
  * Basically a cast wrapper.
  */
-static INLINE struct vbuf_stage *
+static inline struct vbuf_stage *
 vbuf_stage( struct draw_stage *stage )
 {
    assert(stage);
@@ -97,7 +97,7 @@
 static void vbuf_alloc_vertices( struct vbuf_stage *vbuf );
 
 
-static INLINE boolean 
+static inline boolean 
 overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz )
 {
    unsigned long used = (unsigned long) ((char *)ptr - (char *)map);
@@ -105,7 +105,7 @@
 }
 
 
-static INLINE void 
+static inline void 
 check_space( struct vbuf_stage *vbuf, unsigned nr )
 {
    if (vbuf->nr_vertices + nr > vbuf->max_vertices ||
@@ -126,7 +126,7 @@
  * have a couple of slots at the beginning (1-dword header, 4-dword
  * clip pos) that we ignore here.  We only use the vertex->data[] fields.
  */
-static INLINE ushort 
+static inline ushort 
 emit_vertex( struct vbuf_stage *vbuf,
              struct vertex_header *vertex )
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_wide_line.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_wide_line.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_wide_line.c	2015-09-16 14:36:08.000000000 +0000
@@ -45,7 +45,7 @@
 
 
 
-static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage )
+static inline struct wideline_stage *wideline_stage( struct draw_stage *stage )
 {
    return (struct wideline_stage *)stage;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_wide_point.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pipe_wide_point.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pipe_wide_point.c	2015-09-16 14:36:08.000000000 +0000
@@ -83,7 +83,7 @@
 
 
 
-static INLINE struct widepoint_stage *
+static inline struct widepoint_stage *
 widepoint_stage( struct draw_stage *stage )
 {
    return (struct widepoint_stage *)stage;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_private.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_private.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_private.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_private.h	2015-09-16 14:36:08.000000000 +0000
@@ -494,7 +494,7 @@
  * Return index of the given viewport clamping it
  * to be between 0 <= and < PIPE_MAX_VIEWPORTS
  */
-static INLINE unsigned
+static inline unsigned
 draw_clamp_viewport_idx(int idx)
 {
    return ((PIPE_MAX_VIEWPORTS > idx && idx >= 0) ? idx : 0);
@@ -505,7 +505,7 @@
  * overflows then it returns the value from
  * the overflow_value variable.
  */
-static INLINE unsigned
+static inline unsigned
 draw_overflow_uadd(unsigned a, unsigned b,
                    unsigned overflow_value)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c	2015-09-16 14:36:08.000000000 +0000
@@ -54,7 +54,7 @@
 
 
 /** cast wrapper */
-static INLINE struct fetch_pipeline_middle_end *
+static inline struct fetch_pipeline_middle_end *
 fetch_pipeline_middle_end(struct draw_pt_middle_end *middle)
 {
    return (struct fetch_pipeline_middle_end *) middle;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c	2015-09-16 14:36:08.000000000 +0000
@@ -60,7 +60,7 @@
 
 
 /** cast wrapper */
-static INLINE struct llvm_middle_end *
+static inline struct llvm_middle_end *
 llvm_middle_end(struct draw_pt_middle_end *middle)
 {
    return (struct llvm_middle_end *) middle;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pt_post_vs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pt_post_vs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pt_post_vs.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pt_post_vs.c	2015-09-16 14:36:08.000000000 +0000
@@ -53,7 +53,7 @@
                    const struct draw_prim_info *prim_info );
 };
 
-static INLINE void
+static inline void
 initialize_vertex_header(struct vertex_header *header)
 {
    header->clipmask = 0;
@@ -62,7 +62,7 @@
    header->vertex_id = UNDEFINED_VERTEX_ID;
 }
 
-static INLINE float
+static inline float
 dot4(const float *a, const float *b)
 {
    return (a[0]*b[0] +
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pt_so_emit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pt_so_emit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pt_so_emit.c	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pt_so_emit.c	2015-09-16 14:36:08.000000000 +0000
@@ -65,7 +65,7 @@
    return state;
 }
 
-static INLINE boolean
+static inline boolean
 draw_has_so(const struct draw_context *draw)
 {
    const struct pipe_stream_output_info *state = draw_so_info(draw);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pt_vsplit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pt_vsplit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pt_vsplit.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pt_vsplit.c	2015-09-16 14:36:08.000000000 +0000
@@ -84,7 +84,7 @@
 /**
  * Add a fetch element and add it to the draw elements.
  */
-static INLINE void
+static inline void
 vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch, unsigned ofbias)
 {
    unsigned hash;
@@ -111,7 +111,7 @@
  * The value is checked for overflows (both integer overflows
  * and the elements array overflow).
  */
-static INLINE unsigned
+static inline unsigned
 vsplit_get_base_idx(struct vsplit_frontend *vsplit,
                     unsigned start, unsigned fetch, unsigned *ofbit)
 {
@@ -137,7 +137,7 @@
  * index, plus the element bias, clamped to maximum elememt
  * index if that addition overflows.
  */
-static INLINE unsigned
+static inline unsigned
 vsplit_get_bias_idx(struct vsplit_frontend *vsplit,
                     int idx, int bias, unsigned *ofbias)
 {
@@ -170,7 +170,7 @@
    elt_idx = vsplit_get_base_idx(vsplit, start, fetch, &ofbit);          \
    elt_idx = vsplit_get_bias_idx(vsplit, ofbit ? 0 : DRAW_GET_IDX(elts, elt_idx), elt_bias, &ofbias)
 
-static INLINE void
+static inline void
 vsplit_add_cache_ubyte(struct vsplit_frontend *vsplit, const ubyte *elts,
                        unsigned start, unsigned fetch, int elt_bias)
 {
@@ -179,7 +179,7 @@
    vsplit_add_cache(vsplit, elt_idx, ofbias);
 }
 
-static INLINE void
+static inline void
 vsplit_add_cache_ushort(struct vsplit_frontend *vsplit, const ushort *elts,
                        unsigned start, unsigned fetch, int elt_bias)
 {
@@ -193,7 +193,7 @@
  * Add a fetch element and add it to the draw elements.  The fetch element is
  * in full range (uint).
  */
-static INLINE void
+static inline void
 vsplit_add_cache_uint(struct vsplit_frontend *vsplit, const uint *elts,
                       unsigned start, unsigned fetch, int elt_bias)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h	2015-09-16 14:36:08.000000000 +0000
@@ -129,7 +129,7 @@
  * When spoken is TRUE, ispoken replaces istart;  When close is TRUE, iclose is
  * appended.
  */
-static INLINE void
+static inline void
 CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
                                         unsigned flags,
                                         unsigned istart, unsigned icount,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_vertex.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_vertex.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_vertex.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_vertex.h	2015-09-16 14:36:08.000000000 +0000
@@ -91,13 +91,13 @@
    } attrib[PIPE_MAX_SHADER_OUTPUTS];
 };
 
-static INLINE size_t
+static inline size_t
 draw_vinfo_size( const struct vertex_info *a )
 {
    return offsetof(const struct vertex_info, attrib[a->num_attribs]);
 }
 
-static INLINE int
+static inline int
 draw_vinfo_compare( const struct vertex_info *a,
                     const struct vertex_info *b )
 {
@@ -105,7 +105,7 @@
    return memcmp( a, b, sizea );
 }
 
-static INLINE void
+static inline void
 draw_vinfo_copy( struct vertex_info *dst,
                  const struct vertex_info *src )
 {
@@ -121,7 +121,7 @@
  *                   corresponds to this attribute.
  * \return slot in which the attribute was added
  */
-static INLINE uint
+static inline uint
 draw_emit_vertex_attr(struct vertex_info *vinfo,
                       enum attrib_emit emit, 
                       enum interp_mode interp, /* only used by softpipe??? */
@@ -150,7 +150,7 @@
                               const uint8_t *data);
 
 
-static INLINE enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit)
+static inline enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit)
 {
    switch (emit) {
    case EMIT_OMIT:
@@ -174,7 +174,7 @@
    }
 }
 
-static INLINE unsigned draw_translate_vinfo_size(enum attrib_emit emit)
+static inline unsigned draw_translate_vinfo_size(enum attrib_emit emit)
 {
    switch (emit) {
    case EMIT_OMIT:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_vs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_vs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/draw/draw_vs.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/draw/draw_vs.h	2015-09-16 14:36:08.000000000 +0000
@@ -191,12 +191,12 @@
 
 
 
-static INLINE int draw_vs_variant_keysize( const struct draw_vs_variant_key *key )
+static inline int draw_vs_variant_keysize( const struct draw_vs_variant_key *key )
 {
    return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_variant_element);
 }
 
-static INLINE int draw_vs_variant_key_compare( const struct draw_vs_variant_key *a,
+static inline int draw_vs_variant_key_compare( const struct draw_vs_variant_key *a,
                                          const struct draw_vs_variant_key *b )
 {
    int keysize = draw_vs_variant_keysize(a);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_arit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_arit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_arit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_arit.c	2015-09-16 14:36:08.000000000 +0000
@@ -1135,7 +1135,7 @@
  *
  * @sa http://www.stereopsis.com/doubleblend.html
  */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_lerp_simple(struct lp_build_context *bld,
                      LLVMValueRef x,
                      LLVMValueRef v0,
@@ -1674,7 +1674,7 @@
  * NOTE: In the SSE4.1's nearest mode, if two values are equally close, the
  * result is the even value.  That is, rounding 2.5 will be 2.0, and not 3.0.
  */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_round_sse41(struct lp_build_context *bld,
                      LLVMValueRef a,
                      enum lp_build_round_mode mode)
@@ -1717,7 +1717,7 @@
       args[2] = LLVMConstInt(i32t, mode, 0);
 
       res = lp_build_intrinsic(builder, intrinsic,
-                               vec_type, args, Elements(args));
+                               vec_type, args, Elements(args), 0);
 
       res = LLVMBuildExtractElement(builder, res, index0, "");
    }
@@ -1761,7 +1761,7 @@
 }
 
 
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_iround_nearest_sse2(struct lp_build_context *bld,
                              LLVMValueRef a)
 {
@@ -1817,7 +1817,7 @@
 
 /*
  */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_round_altivec(struct lp_build_context *bld,
                        LLVMValueRef a,
                        enum lp_build_round_mode mode)
@@ -1851,7 +1851,7 @@
    return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
 }
 
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_round_arch(struct lp_build_context *bld,
                     LLVMValueRef a,
                     enum lp_build_round_mode mode)
@@ -1997,6 +1997,12 @@
       LLVMTypeRef int_vec_type = bld->int_vec_type;
       LLVMTypeRef vec_type = bld->vec_type;
 
+      if (type.width != 32) {
+         char intrinsic[32];
+         util_snprintf(intrinsic, sizeof intrinsic, "llvm.floor.v%uf%u", type.length, type.width);
+         return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
+      }
+
       assert(type.width == 32); /* might want to handle doubles at some point */
 
       inttype = type;
@@ -2066,6 +2072,12 @@
       LLVMTypeRef int_vec_type = bld->int_vec_type;
       LLVMTypeRef vec_type = bld->vec_type;
 
+      if (type.width != 32) {
+         char intrinsic[32];
+         util_snprintf(intrinsic, sizeof intrinsic, "llvm.ceil.v%uf%u", type.length, type.width);
+         return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
+      }
+
       assert(type.width == 32); /* might want to handle doubles at some point */
 
       inttype = type;
@@ -2427,7 +2439,7 @@
  * - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division
  * - http://softwarecommunity.intel.com/articles/eng/1818.htm
  */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_rcp_refine(struct lp_build_context *bld,
                     LLVMValueRef a,
                     LLVMValueRef rcp_a)
@@ -2512,7 +2524,7 @@
  *
  * See also Intel 64 and IA-32 Architectures Optimization Manual.
  */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_rsqrt_refine(struct lp_build_context *bld,
                       LLVMValueRef a,
                       LLVMValueRef rsqrt_a)
@@ -3535,7 +3547,7 @@
       lp_build_intrinsic(builder,
                          "llvm.x86.sse.stmxcsr",
                          LLVMVoidTypeInContext(gallivm->context),
-                         &mxcsr_ptr8, 1);
+                         &mxcsr_ptr8, 1, 0);
       return mxcsr_ptr;
    }
    return 0;
@@ -3582,6 +3594,6 @@
       lp_build_intrinsic(builder,
                          "llvm.x86.sse.ldmxcsr",
                          LLVMVoidTypeInContext(gallivm->context),
-                         &mxcsr_ptr, 1);
+                         &mxcsr_ptr, 1, 0);
    }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_const.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_const.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_const.c	2013-05-08 13:13:18.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_const.c	2015-09-16 14:36:08.000000000 +0000
@@ -311,7 +311,7 @@
    else {
       double dscale = lp_const_scale(type);
 
-      elem = LLVMConstInt(elem_type, round(val*dscale), 0);
+      elem = LLVMConstInt(elem_type, (long long) round(val*dscale), 0);
    }
 
    return elem;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_const.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_const.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_const.h	2013-05-08 13:13:18.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_const.h	2015-09-16 14:36:08.000000000 +0000
@@ -120,14 +120,14 @@
                                  const unsigned char *swizzle);
 
 
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_const_int32(struct gallivm_state *gallivm, int i)
 {
    return LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), i, 0);
 }
 
 
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_const_float(struct gallivm_state *gallivm, float x)
 {
    return LLVMConstReal(LLVMFloatTypeInContext(gallivm->context), x);
@@ -135,7 +135,7 @@
 
 
 /** Return constant-valued pointer to int */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_const_int_pointer(struct gallivm_state *gallivm, const void *ptr)
 {
    LLVMTypeRef int_type;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp	2015-09-16 14:36:08.000000000 +0000
@@ -28,40 +28,12 @@
 #include <stddef.h>
 
 #include <llvm-c/Core.h>
-#include <llvm/Target/TargetMachine.h>
-#include <llvm/Target/TargetInstrInfo.h>
+#include <llvm-c/Disassembler.h>
 #include <llvm/Support/raw_ostream.h>
 #include <llvm/Support/Format.h>
-
-#if HAVE_LLVM >= 0x0306
-#include <llvm/Target/TargetSubtargetInfo.h>
-#else
-#include <llvm/Support/MemoryObject.h>
-#endif
-
-#include <llvm/Support/TargetRegistry.h>
-#include <llvm/MC/MCSubtargetInfo.h>
-
 #include <llvm/Support/Host.h>
-
 #include <llvm/IR/Module.h>
 
-#include <llvm/MC/MCDisassembler.h>
-#include <llvm/MC/MCAsmInfo.h>
-#include <llvm/MC/MCInst.h>
-#include <llvm/MC/MCInstPrinter.h>
-#include <llvm/MC/MCRegisterInfo.h>
-
-#if HAVE_LLVM >= 0x0305
-#define OwningPtr std::unique_ptr
-#else
-#include <llvm/ADT/OwningPtr.h>
-#endif
-
-#if HAVE_LLVM >= 0x0305
-#include <llvm/MC/MCContext.h>
-#endif
-
 #include "util/u_math.h"
 #include "util/u_debug.h"
 
@@ -89,6 +61,7 @@
    return ((uintptr_t)ptr & (alignment - 1)) == 0;
 }
 
+#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBEDDED)
 
 class raw_debug_ostream :
    public llvm::raw_ostream
@@ -119,6 +92,7 @@
    }
 }
 
+#endif
 
 extern "C" const char *
 lp_get_module_id(LLVMModuleRef module)
@@ -133,7 +107,7 @@
 extern "C" void
 lp_debug_dump_value(LLVMValueRef value)
 {
-#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBDDED)
+#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBEDDED)
    raw_debug_ostream os;
    llvm::unwrap(value)->print(os);
    os.flush();
@@ -143,46 +117,6 @@
 }
 
 
-#if HAVE_LLVM < 0x0306
-
-/*
- * MemoryObject wrapper around a buffer of memory, to be used by MC
- * disassembler.
- */
-class BufferMemoryObject:
-   public llvm::MemoryObject
-{
-private:
-   const uint8_t *Bytes;
-   uint64_t Length;
-public:
-   BufferMemoryObject(const uint8_t *bytes, uint64_t length) :
-      Bytes(bytes), Length(length)
-   {
-   }
-
-   uint64_t getBase() const
-   {
-      return 0;
-   }
-
-   uint64_t getExtent() const
-   {
-      return Length;
-   }
-
-   int readByte(uint64_t addr, uint8_t *byte) const
-   {
-      if (addr > getExtent())
-         return -1;
-      *byte = Bytes[addr];
-      return 0;
-   }
-};
-
-#endif /* HAVE_LLVM < 0x0306 */
-
-
 /*
  * Disassemble a function, using the LLVM MC disassembler.
  *
@@ -191,10 +125,8 @@
  * - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
  */
 static size_t
-disassemble(const void* func, llvm::raw_ostream & Out)
+disassemble(const void* func)
 {
-   using namespace llvm;
-
    const uint8_t *bytes = (const uint8_t *)func;
 
    /*
@@ -202,112 +134,39 @@
     */
    const uint64_t extent = 96 * 1024;
 
-   uint64_t max_pc = 0;
-
    /*
     * Initialize all used objects.
     */
 
-   std::string Triple = sys::getDefaultTargetTriple();
-
-   std::string Error;
-   const Target *T = TargetRegistry::lookupTarget(Triple, Error);
-
-#if HAVE_LLVM >= 0x0304
-   OwningPtr<const MCAsmInfo> AsmInfo(T->createMCAsmInfo(*T->createMCRegInfo(Triple), Triple));
-#else
-   OwningPtr<const MCAsmInfo> AsmInfo(T->createMCAsmInfo(Triple));
-#endif
-
-   if (!AsmInfo) {
-      Out << "error: no assembly info for target " << Triple << "\n";
-      Out.flush();
+   std::string Triple = llvm::sys::getProcessTriple();
+   LLVMDisasmContextRef D = LLVMCreateDisasm(Triple.c_str(), NULL, 0, NULL, NULL);
+   char outline[1024];
+
+   if (!D) {
+      _debug_printf("error: couldn't create disassembler for triple %s\n",
+                    Triple.c_str());
       return 0;
    }
 
-   unsigned int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
-
-   OwningPtr<const MCRegisterInfo> MRI(T->createMCRegInfo(Triple));
-   if (!MRI) {
-      Out << "error: no register info for target " << Triple.c_str() << "\n";
-      Out.flush();
-      return 0;
-   }
-
-   OwningPtr<const MCInstrInfo> MII(T->createMCInstrInfo());
-   if (!MII) {
-      Out << "error: no instruction info for target " << Triple.c_str() << "\n";
-      Out.flush();
-      return 0;
-   }
-
-#if HAVE_LLVM >= 0x0305
-   OwningPtr<const MCSubtargetInfo> STI(T->createMCSubtargetInfo(Triple, sys::getHostCPUName(), ""));
-   OwningPtr<MCContext> MCCtx(new MCContext(AsmInfo.get(), MRI.get(), 0));
-   OwningPtr<const MCDisassembler> DisAsm(T->createMCDisassembler(*STI, *MCCtx));
-#else
-   OwningPtr<const MCSubtargetInfo> STI(T->createMCSubtargetInfo(Triple, sys::getHostCPUName(), ""));
-   OwningPtr<const MCDisassembler> DisAsm(T->createMCDisassembler(*STI));
-#endif
-   if (!DisAsm) {
-      Out << "error: no disassembler for target " << Triple << "\n";
-      Out.flush();
-      return 0;
-   }
-
-
-#if HAVE_LLVM >= 0x0307
-   OwningPtr<MCInstPrinter> Printer(
-         T->createMCInstPrinter(llvm::Triple(Triple), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
-#else
-   OwningPtr<MCInstPrinter> Printer(
-         T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
-#endif
-   if (!Printer) {
-      Out << "error: no instruction printer for target " << Triple.c_str() << "\n";
-      Out.flush();
-      return 0;
-   }
-
-   TargetOptions options;
-#if defined(DEBUG) && HAVE_LLVM < 0x0307
-   options.JITEmitDebugInfo = true;
-#endif
-#if defined(PIPE_ARCH_X86)
-   options.StackAlignmentOverride = 4;
-#endif
-#if defined(DEBUG) || defined(PROFILE)
-   options.NoFramePointerElim = true;
-#endif
-   OwningPtr<TargetMachine> TM(T->createTargetMachine(Triple, sys::getHostCPUName(), "", options));
-
-   /*
-    * Wrap the data in a MemoryObject
-    */
-#if HAVE_LLVM >= 0x0306
-   ArrayRef<uint8_t> memoryObject((const uint8_t *)bytes, extent);
-#else
-   BufferMemoryObject memoryObject((const uint8_t *)bytes, extent);
-#endif
-
    uint64_t pc;
    pc = 0;
-   while (true) {
-      MCInst Inst;
-      uint64_t Size;
+   while (pc < extent) {
+      size_t Size;
 
       /*
        * Print address.  We use addresses relative to the start of the function,
        * so that between runs.
        */
 
-      Out << llvm::format("%6lu:\t", (unsigned long)pc);
+      _debug_printf("%6lu:\t", (unsigned long)pc);
 
-      if (!DisAsm->getInstruction(Inst, Size, memoryObject,
-                                 pc,
-				  nulls(), nulls())) {
-         Out << "invalid";
+      Size = LLVMDisasmInstruction(D, (uint8_t *)bytes + pc, extent - pc, 0, outline,
+                                   sizeof outline);
+
+      if (!Size) {
+         _debug_printf("invalid\n");
          pc += 1;
+         break;
       }
 
       /*
@@ -317,100 +176,47 @@
       if (0) {
          unsigned i;
          for (i = 0; i < Size; ++i) {
-            Out << llvm::format("%02x ", ((const uint8_t*)bytes)[pc + i]);
+            _debug_printf("%02x ", bytes[pc + i]);
          }
          for (; i < 16; ++i) {
-            Out << "   ";
+            _debug_printf("   ");
          }
       }
 
       /*
        * Print the instruction.
        */
-#if HAVE_LLVM >= 0x0307
-      Printer->printInst(&Inst, Out, "", *STI);
-#else
-      Printer->printInst(&Inst, Out, "");
-#endif
 
-      /*
-       * Advance.
-       */
-
-      pc += Size;
+      _debug_printf("%*s", Size, outline);
 
-      const MCInstrDesc &TID = MII->get(Inst.getOpcode());
+      _debug_printf("\n");
 
       /*
-       * Keep track of forward jumps to a nearby address.
+       * Stop disassembling on return statements, if there is no record of a
+       * jump to a successive address.
+       *
+       * XXX: This currently assumes x86
        */
 
-      if (TID.isBranch()) {
-         for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
-            const MCOperand &operand = Inst.getOperand(i);
-            if (operand.isImm()) {
-               uint64_t jump;
-
-               /*
-                * FIXME: Handle both relative and absolute addresses correctly.
-                * EDInstInfo actually has this info, but operandTypes and
-                * operandFlags enums are not exposed in the public interface.
-                */
-
-               if (1) {
-                  /*
-                   * PC relative addr.
-                   */
-
-                  jump = pc + operand.getImm();
-               } else {
-                  /*
-                   * Absolute addr.
-                   */
-
-                  jump = (uint64_t)operand.getImm();
-               }
-
-               /*
-                * Output the address relative to the function start, given
-                * that MC will print the addresses relative the current pc.
-                */
-               Out << "\t\t; " << jump;
-
-               /*
-                * Ignore far jumps given it could be actually a tail return to
-                * a random address.
-                */
-
-               if (jump > max_pc &&
-                   jump < extent) {
-                  max_pc = jump;
-               }
-            }
-         }
+      if (Size == 1 && bytes[pc] == 0xc3) {
+         break;
       }
 
-      Out << "\n";
-
       /*
-       * Stop disassembling on return statements, if there is no record of a
-       * jump to a successive address.
+       * Advance.
        */
 
-      if (TID.isReturn()) {
-         if (pc > max_pc) {
-            break;
-         }
-      }
+      pc += Size;
 
       if (pc >= extent) {
-         Out << "disassembly larger than " << extent << "bytes, aborting\n";
+         _debug_printf("disassembly larger than %ull bytes, aborting\n", extent);
          break;
       }
    }
 
-   Out << "\n";
-   Out.flush();
+   _debug_printf("\n");
+
+   LLVMDisasmDispose(D);
 
    /*
     * Print GDB command, useful to verify output.
@@ -425,9 +231,8 @@
 
 extern "C" void
 lp_disassemble(LLVMValueRef func, const void *code) {
-   raw_debug_ostream Out;
-   Out << LLVMGetValueName(func) << ":\n";
-   disassemble(code, Out);
+   _debug_printf("%s:\n", LLVMGetValueName(func));
+   disassemble(code);
 }
 
 
@@ -442,7 +247,7 @@
 extern "C" void
 lp_profile(LLVMValueRef func, const void *code)
 {
-#if defined(__linux__) && (defined(DEBUG) || defined(PROFILE))
+#if defined(__linux__) && defined(PROFILE)
    static boolean first_time = TRUE;
    static FILE *perf_map_file = NULL;
    static int perf_asm_fd = -1;
@@ -469,7 +274,7 @@
       unsigned long addr = (uintptr_t)code;
       llvm::raw_fd_ostream Out(perf_asm_fd, false);
       Out << symbol << ":\n";
-      unsigned long size = disassemble(code, Out);
+      unsigned long size = disassemble(code);
       fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol);
       fflush(perf_map_file);
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_debug.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_debug.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_debug.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_debug.h	2015-09-16 14:36:08.000000000 +0000
@@ -59,7 +59,7 @@
 #endif
 
 
-static INLINE void
+static inline void
 lp_build_name(LLVMValueRef val, const char *format, ...)
 {
 #ifdef DEBUG
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c	2015-09-16 14:36:08.000000000 +0000
@@ -95,7 +95,7 @@
 /**
  * Whether the format matches the vector type, apart of swizzles.
  */
-static INLINE boolean
+static inline boolean
 format_matches_type(const struct util_format_description *desc,
                     struct lp_type type)
 {
@@ -146,7 +146,7 @@
  *
  * @return XYZW in a float[4] or ubyte[4] or ushort[4] vector.
  */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
                                const struct util_format_description *desc,
                                LLVMValueRef packed)
@@ -405,6 +405,7 @@
                                aligned, base_ptr, offset, TRUE);
 
       assert(format_desc->block.bits <= vec_len);
+      (void) vec_len; /* silence unused var warning for non-debug build */
 
       packed = LLVMBuildBitCast(gallivm->builder, packed, dst_vec_type, "");
       return lp_build_format_swizzle_aos(format_desc, &bld, packed);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c	2015-09-16 14:36:08.000000000 +0000
@@ -212,7 +212,7 @@
 }
 
 
-static INLINE void
+static inline void
 yuv_to_rgb_soa(struct gallivm_state *gallivm,
                unsigned n,
                LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_init.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_init.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_init.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_init.c	2015-09-16 14:36:08.000000000 +0000
@@ -106,7 +106,6 @@
 static boolean
 create_pass_manager(struct gallivm_state *gallivm)
 {
-   char *td_str;
    assert(!gallivm->passmgr);
    assert(gallivm->target);
 
@@ -122,10 +121,29 @@
    // Old versions of LLVM get the DataLayout from the pass manager.
    LLVMAddTargetData(gallivm->target, gallivm->passmgr);
 
-   // New ones from the Module.
-   td_str = LLVMCopyStringRepOfTargetData(gallivm->target);
-   LLVMSetDataLayout(gallivm->module, td_str);
-   free(td_str);
+   /* Setting the module's DataLayout to an empty string will cause the
+    * ExecutionEngine to copy to the DataLayout string from its target
+    * machine to the module.  As of LLVM 3.8 the module and the execution
+    * engine are required to have the same DataLayout.
+    *
+    * TODO: This is just a temporary work-around.  The correct solution is
+    * for gallivm_init_state() to create a TargetMachine and pull the
+    * DataLayout from there.  Currently, the TargetMachine used by llvmpipe
+    * is being implicitly created by the EngineBuilder in
+    * lp_build_create_jit_compiler_for_module()
+    */
+
+#if HAVE_LLVM < 0x0308
+   {
+      char *td_str;
+      // New ones from the Module.
+      td_str = LLVMCopyStringRepOfTargetData(gallivm->target);
+      LLVMSetDataLayout(gallivm->module, td_str);
+      free(td_str);
+   }
+#else
+   LLVMSetDataLayout(gallivm->module, "");
+#endif
 
    if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
       /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
@@ -533,6 +551,16 @@
       if (0) {
          debug_printf("optimizing func %s...\n", LLVMGetValueName(func));
       }
+
+   /* Disable frame pointer omission on debug/profile builds */
+   /* XXX: And workaround http://llvm.org/PR21435 */
+#if HAVE_LLVM >= 0x0307 && \
+    (defined(DEBUG) || defined(PROFILE) || \
+     defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
+      LLVMAddTargetDependentFunctionAttr(func, "no-frame-pointer-elim", "true");
+      LLVMAddTargetDependentFunctionAttr(func, "no-frame-pointer-elim-non-leaf", "true");
+#endif
+
       LLVMRunFunctionPassManager(gallivm->passmgr, func);
       func = LLVMGetNextFunction(func);
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_intr.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_intr.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_intr.c	2012-08-30 05:23:50.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_intr.c	2015-09-16 14:36:08.000000000 +0000
@@ -81,7 +81,8 @@
                    const char *name,
                    LLVMTypeRef ret_type,
                    LLVMValueRef *args,
-                   unsigned num_args)
+                   unsigned num_args,
+                   LLVMAttribute attr)
 {
    LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
    LLVMValueRef function;
@@ -99,6 +100,9 @@
       }
 
       function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
+
+      if (attr)
+          LLVMAddFunctionAttr(function, attr);
    }
 
    return LLVMBuildCall(builder, function, args, num_args, "");
@@ -111,7 +115,7 @@
                          LLVMTypeRef ret_type,
                          LLVMValueRef a)
 {
-   return lp_build_intrinsic(builder, name, ret_type, &a, 1);
+   return lp_build_intrinsic(builder, name, ret_type, &a, 1, 0);
 }
 
 
@@ -127,7 +131,7 @@
    args[0] = a;
    args[1] = b;
 
-   return lp_build_intrinsic(builder, name, ret_type, args, 2);
+   return lp_build_intrinsic(builder, name, ret_type, args, 2, 0);
 }
 
 
@@ -242,7 +246,7 @@
       LLVMValueRef res_elem;
       for(j = 0; j < num_args; ++j)
          arg_elems[j] = LLVMBuildExtractElement(builder, args[j], index, "");
-      res_elem = lp_build_intrinsic(builder, name, ret_elem_type, arg_elems, num_args);
+      res_elem = lp_build_intrinsic(builder, name, ret_elem_type, arg_elems, num_args, 0);
       res = LLVMBuildInsertElement(builder, res, res_elem, index, "");
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_intr.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_intr.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_intr.h	2012-08-30 05:23:50.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_intr.h	2015-09-16 14:36:08.000000000 +0000
@@ -59,7 +59,8 @@
                    const char *name,
                    LLVMTypeRef ret_type,
                    LLVMValueRef *args,
-                   unsigned num_args);
+                   unsigned num_args,
+                   LLVMAttribute attr);
 
 
 LLVMValueRef
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_limits.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_limits.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_limits.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_limits.h	2015-09-16 14:36:08.000000000 +0000
@@ -51,8 +51,12 @@
 
 #define LP_MAX_TGSI_PREDS 16
 
+#define LP_MAX_TGSI_CONSTS 4096
+
 #define LP_MAX_TGSI_CONST_BUFFERS 16
 
+#define LP_MAX_TGSI_CONST_BUFFER_SIZE (LP_MAX_TGSI_CONSTS * sizeof(float[4]))
+
 /*
  * For quick access we cache registers in statically
  * allocated arrays. Here we define the maximum size
@@ -84,7 +88,7 @@
  * actually try to allocate the maximum and run out of memory and crash.  So
  * stick with something reasonable here.
  */
-static INLINE int
+static inline int
 gallivm_get_shader_param(enum pipe_shader_cap param)
 {
    switch(param) {
@@ -96,11 +100,11 @@
    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
       return LP_MAX_TGSI_NESTING;
    case PIPE_SHADER_CAP_MAX_INPUTS:
-      return PIPE_MAX_SHADER_INPUTS;
+      return 32;
    case PIPE_SHADER_CAP_MAX_OUTPUTS:
       return 32;
    case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
-      return sizeof(float[4]) * 4096;
+      return LP_MAX_TGSI_CONST_BUFFER_SIZE;
    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
       return PIPE_MAX_CONSTANT_BUFFERS;
    case PIPE_SHADER_CAP_MAX_TEMPS:
@@ -125,8 +129,10 @@
    case PIPE_SHADER_CAP_PREFERRED_IR:
       return PIPE_SHADER_IR_TGSI;
    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       return 1;
    case PIPE_SHADER_CAP_DOUBLES:
+      return 1;
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_logic.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_logic.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_logic.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_logic.c	2015-09-16 14:36:08.000000000 +0000
@@ -395,7 +395,7 @@
       args[2] = mask;
 
       res = lp_build_intrinsic(builder, intrinsic,
-                               arg_type, args, Elements(args));
+                               arg_type, args, Elements(args), 0);
 
       if (arg_type != bld->vec_type) {
          res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp	2015-09-16 14:36:08.000000000 +0000
@@ -50,6 +50,12 @@
 
 #include <stddef.h>
 
+// Workaround http://llvm.org/PR23628
+#if HAVE_LLVM >= 0x0307
+#  pragma push_macro("DEBUG")
+#  undef DEBUG
+#endif
+
 #include <llvm-c/Core.h>
 #include <llvm-c/ExecutionEngine.h>
 #include <llvm/Target/TargetOptions.h>
@@ -70,6 +76,11 @@
 #include <llvm/IR/Module.h>
 #include <llvm/Support/CBindingWrapping.h>
 
+// Workaround http://llvm.org/PR23628
+#if HAVE_LLVM >= 0x0307
+#  pragma pop_macro("DEBUG")
+#endif
+
 #include "pipe/p_config.h"
 #include "util/u_debug.h"
 #include "util/u_cpu_detect.h"
@@ -439,8 +450,10 @@
 #if HAVE_LLVM < 0x0304
    options.NoFramePointerElimNonLeaf = true;
 #endif
+#if HAVE_LLVM < 0x0307
    options.NoFramePointerElim = true;
 #endif
+#endif
 
    builder.setEngineKind(EngineKind::JIT)
           .setErrorStr(&Error)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_sample.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_sample.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_sample.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_sample.h	2015-09-16 14:36:08.000000000 +0000
@@ -371,7 +371,7 @@
  * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at
  * this time.  Return whether the given mode is supported by that function.
  */
-static INLINE boolean
+static inline boolean
 lp_is_simple_wrap_mode(unsigned mode)
 {
    switch (mode) {
@@ -384,7 +384,7 @@
 }
 
 
-static INLINE void
+static inline void
 apply_sampler_swizzle(struct lp_build_sample_context *bld,
                       LLVMValueRef *texel)
 {
@@ -402,7 +402,7 @@
  * not really dimension as such, this indicates the amount of
  * "normal" texture coords subject to minification, wrapping etc.
  */
-static INLINE unsigned
+static inline unsigned
 texture_dims(enum pipe_texture_target tex)
 {
    switch (tex) {
@@ -424,7 +424,7 @@
    }
 }
 
-static INLINE boolean
+static inline boolean
 has_layer_coord(enum pipe_texture_target tex)
 {
    switch (tex) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c	2015-09-16 14:36:08.000000000 +0000
@@ -2501,7 +2501,7 @@
        * all zero as mandated by d3d10 in this case.
        */
       unsigned chan;
-      LLVMValueRef zero = lp_build_const_vec(gallivm, type, 0.0F);
+      LLVMValueRef zero = lp_build_zero(gallivm, type);
       for (chan = 0; chan < 4; chan++) {
          texel_out[chan] = zero;
       }
@@ -2748,11 +2748,37 @@
    else {
       LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
       LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
-      boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
-                        op_is_tex &&
-                        /* not sure this is strictly needed or simply impossible */
-                        derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
-                        lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
+      boolean use_aos;
+
+      if (util_format_is_pure_integer(static_texture_state->format) &&
+          !util_format_has_depth(bld.format_desc) &&
+          (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
+           static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
+           static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
+         /*
+          * Bail if impossible filtering is specified (the awkard additional
+          * depth check is because it is legal in gallium to have things like S8Z24
+          * here which would say it's pure int despite such formats should sample
+          * the depth component).
+          * In GL such filters make the texture incomplete, this makes it robust
+          * against state trackers which set this up regardless (we'd crash in the
+          * lerp later (except for gather)).
+          * Must do this after fetch_texel code since with GL state tracker we'll
+          * get some junk sampler for buffer textures.
+          */
+         unsigned chan;
+         LLVMValueRef zero = lp_build_zero(gallivm, type);
+         for (chan = 0; chan < 4; chan++) {
+            texel_out[chan] = zero;
+         }
+         return;
+      }
+
+      use_aos = util_format_fits_8unorm(bld.format_desc) &&
+                op_is_tex &&
+                /* not sure this is strictly needed or simply impossible */
+                derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
+                lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
 
       use_aos &= bld.num_lods <= num_quads ||
                  derived_sampler_state.min_img_filter ==
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c	2015-09-16 14:36:08.000000000 +0000
@@ -894,6 +894,125 @@
    xpd_emit	 /* emit */
 };
 
+/* TGSI_OPCODE_D2F */
+static void
+d2f_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildFPTrunc(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                       bld_base->base.vec_type, "");
+}
+
+/* TGSI_OPCODE_D2I */
+static void
+d2i_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildFPToSI(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->base.int_vec_type, "");
+}
+
+/* TGSI_OPCODE_D2U */
+static void
+d2u_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildFPToUI(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->base.int_vec_type, "");
+}
+
+/* TGSI_OPCODE_F2D */
+static void
+f2d_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildFPExt(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->dbl_bld.vec_type, "");
+}
+
+/* TGSI_OPCODE_U2D */
+static void
+u2d_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildUIToFP(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->dbl_bld.vec_type, "");
+}
+
+/* TGSI_OPCODE_I2D */
+static void
+i2d_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildSIToFP(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->dbl_bld.vec_type, "");
+}
+
+/* TGSI_OPCODE_DMAD */
+static void
+dmad_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp;
+   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DMUL,
+                                   emit_data->args[0],
+                                   emit_data->args[1]);
+   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+                                       TGSI_OPCODE_DADD, tmp, emit_data->args[2]);
+}
+
+/*.TGSI_OPCODE_DRCP.*/
+static void drcp_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef one;
+   one = lp_build_const_vec(bld_base->dbl_bld.gallivm, bld_base->dbl_bld.type, 1.0f);
+   emit_data->output[emit_data->chan] = LLVMBuildFDiv(
+      bld_base->base.gallivm->builder,
+      one, emit_data->args[0], "");
+}
+
+/* TGSI_OPCODE_DFRAC */
+static void dfrac_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp;
+   tmp = lp_build_floor(&bld_base->dbl_bld,
+			emit_data->args[0]);
+   emit_data->output[emit_data->chan] =  LLVMBuildFSub(bld_base->base.gallivm->builder,
+                                                       emit_data->args[0], tmp, "");
+}
+
 void
 lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
 {
@@ -948,6 +1067,25 @@
 
    bld_base->op_actions[TGSI_OPCODE_MAX].emit = fmax_emit;
    bld_base->op_actions[TGSI_OPCODE_MIN].emit = fmin_emit;
+
+   bld_base->op_actions[TGSI_OPCODE_DADD].emit = add_emit;
+   bld_base->op_actions[TGSI_OPCODE_DMAX].emit = fmax_emit;
+   bld_base->op_actions[TGSI_OPCODE_DMIN].emit = fmin_emit;
+   bld_base->op_actions[TGSI_OPCODE_DMUL].emit = mul_emit;
+
+   bld_base->op_actions[TGSI_OPCODE_D2F].emit = d2f_emit;
+   bld_base->op_actions[TGSI_OPCODE_D2I].emit = d2i_emit;
+   bld_base->op_actions[TGSI_OPCODE_D2U].emit = d2u_emit;
+
+   bld_base->op_actions[TGSI_OPCODE_F2D].emit = f2d_emit;
+   bld_base->op_actions[TGSI_OPCODE_I2D].emit = i2d_emit;
+   bld_base->op_actions[TGSI_OPCODE_U2D].emit = u2d_emit;
+
+   bld_base->op_actions[TGSI_OPCODE_DMAD].emit = dmad_emit;
+
+   bld_base->op_actions[TGSI_OPCODE_DRCP].emit = drcp_emit;
+   bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = dfrac_emit;
+
 }
 
 /* CPU Only default actions */
@@ -1792,6 +1930,107 @@
                                                      emit_data->args[1]);
 }
 
+/* TGSI_OPCODE_DABS (CPU Only) */
+static void
+dabs_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->dbl_bld,
+                                                       emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_DNEG (CPU Only) */
+static void
+dneg_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->dbl_bld,
+                                                     bld_base->dbl_bld.zero,
+                                                     emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_DSET Helper (CPU Only) */
+static void
+dset_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data,
+   unsigned pipe_func)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef cond = lp_build_cmp(&bld_base->dbl_bld, pipe_func,
+                                    emit_data->args[0], emit_data->args[1]);
+   /* arguments were 64 bit but store as 32 bit */
+   cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
+   emit_data->output[emit_data->chan] = cond;
+}
+
+/* TGSI_OPCODE_DSEQ (CPU Only) */
+static void
+dseq_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
+}
+
+/* TGSI_OPCODE_DSGE (CPU Only) */
+static void
+dsge_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
+}
+
+/* TGSI_OPCODE_DSLT (CPU Only) */
+static void
+dslt_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
+}
+
+/* TGSI_OPCODE_DSNE (CPU Only) */
+static void
+dsne_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
+}
+
+/* Double Reciprocal squareroot (CPU Only) */
+static void
+drecip_sqrt_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->dbl_bld,
+                                                         emit_data->args[0]);
+}
+
+/* Double Squareroot (CPU Only) */
+static void
+dsqrt_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->dbl_bld,
+                                                      emit_data->args[0]);
+}
+
 void
 lp_set_default_actions_cpu(
    struct lp_build_tgsi_context * bld_base)
@@ -1864,4 +2103,14 @@
 
    bld_base->op_actions[TGSI_OPCODE_XOR].emit = xor_emit_cpu;
 
+   bld_base->op_actions[TGSI_OPCODE_DABS].emit = dabs_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_DNEG].emit = dneg_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = dseq_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_DSGE].emit = dsge_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_DSLT].emit = dslt_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_DSNE].emit = dsne_emit_cpu;
+
+   bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = drecip_sqrt_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = dsqrt_emit_cpu;
+
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h	2015-09-16 14:36:08.000000000 +0000
@@ -71,6 +71,11 @@
     */
    unsigned chan;
 
+   /**
+    * This is used to specify the src channel to read from for doubles.
+    */
+   unsigned src_chan;
+
    /** The lp_build_tgsi_action::emit 'executes' the opcode and writes the
     * results to this array.
     */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c	2015-09-16 14:36:08.000000000 +0000
@@ -232,23 +232,9 @@
    /*
     * Saturate the value
     */
-
-   switch (inst->Instruction.Saturate) {
-   case TGSI_SAT_NONE:
-      break;
-
-   case TGSI_SAT_ZERO_ONE:
+   if (inst->Instruction.Saturate) {
       value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
       value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
-      break;
-
-   case TGSI_SAT_MINUS_PLUS_ONE:
-      value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
-      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
-      break;
-
-   default:
-      assert(0);
    }
 
    /*
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c	2015-09-16 14:36:08.000000000 +0000
@@ -104,7 +104,7 @@
    struct lp_build_context * base = &bld_base->base;
    emit_data->output[emit_data->chan] = lp_build_intrinsic(
                base->gallivm->builder, action->intr_name,
-               emit_data->dst_type, emit_data->args, emit_data->arg_count);
+               emit_data->dst_type, emit_data->args, emit_data->arg_count, 0);
 }
 
 LLVMValueRef
@@ -175,13 +175,52 @@
    unsigned src;
    for (src = 0; src < emit_data->info->num_src; src++) {
       emit_data->args[src] = lp_build_emit_fetch(bld_base, emit_data->inst, src,
-                                               emit_data->chan);
+                                                 emit_data->src_chan);
    }
    emit_data->arg_count = emit_data->info->num_src;
    lp_build_action_set_dst_type(emit_data, bld_base,
 		emit_data->inst->Instruction.Opcode);
 }
 
+/**
+ * with doubles src and dst channels aren't 1:1.
+ * check the src/dst types for the opcode,
+ * 1. if neither is double then src == dst;
+ * 2. if dest is double
+ *     - don't store to y or w
+ *     - if src is double then src == dst.
+ *     - else for f2d, d.xy = s.x
+ *     - else for f2d, d.zw = s.y
+ * 3. if dst is single, src is double
+ *    - map dst x,z to src xy;
+ *    - map dst y,w to src zw;
+ */
+static int get_src_chan_idx(unsigned opcode,
+                            int dst_chan_index)
+{
+   enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(opcode);
+   enum tgsi_opcode_type stype = tgsi_opcode_infer_src_type(opcode);
+
+   if (dtype != TGSI_TYPE_DOUBLE && stype != TGSI_TYPE_DOUBLE)
+      return dst_chan_index;
+   if (dtype == TGSI_TYPE_DOUBLE) {
+      if (dst_chan_index == 1 || dst_chan_index == 3)
+         return -1;
+      if (stype == TGSI_TYPE_DOUBLE)
+         return dst_chan_index;
+      if (dst_chan_index == 0)
+         return 0;
+      if (dst_chan_index == 2)
+         return 1;
+   } else {
+      if (dst_chan_index == 0 || dst_chan_index == 2)
+         return 0;
+      if (dst_chan_index == 1 || dst_chan_index == 3)
+         return 2;
+   }
+   return -1;
+}
+
 /* XXX: COMMENT
  * It should be assumed that this function ignores writemasks
  */
@@ -197,7 +236,6 @@
    struct lp_build_emit_data emit_data;
    unsigned chan_index;
    LLVMValueRef val;
-
    bld_base->pc++;
 
    if (bld_base->emit_debug) {
@@ -240,7 +278,12 @@
    /* Emit the instructions */
    if (info->output_mode == TGSI_OUTPUT_COMPONENTWISE && bld_base->soa) {
       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+         int src_index = get_src_chan_idx(inst->Instruction.Opcode, chan_index);
+         /* ignore channels 1/3 in double dst */
+         if (src_index == -1)
+            continue;
          emit_data.chan = chan_index;
+         emit_data.src_chan = src_index;
          if (!action->fetch_args) {
             lp_build_fetch_args(bld_base, &emit_data);
          } else {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h	2015-09-16 14:36:08.000000000 +0000
@@ -165,6 +165,7 @@
    LLVMValueRef vertex_id_nobase;
    LLVMValueRef prim_id;
    LLVMValueRef basevertex;
+   LLVMValueRef invocation_id;
 };
 
 
@@ -337,6 +338,7 @@
    struct lp_build_context uint_bld;
    struct lp_build_context int_bld;
 
+   struct lp_build_context dbl_bld;
    /** This array stores functions that are used to transform TGSI opcodes to
      * LLVM instructions.
      */
@@ -348,6 +350,9 @@
 
    struct lp_build_tgsi_action sqrt_action;
 
+   struct lp_build_tgsi_action drsq_action;
+
+   struct lp_build_tgsi_action dsqrt_action;
    const struct tgsi_shader_info *info;
 
    lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT];
@@ -557,13 +562,13 @@
 
 };
 
-static INLINE struct lp_build_tgsi_soa_context *
+static inline struct lp_build_tgsi_soa_context *
 lp_soa_context(struct lp_build_tgsi_context *bld_base)
 {
    return (struct lp_build_tgsi_soa_context *)bld_base;
 }
 
-static INLINE struct lp_build_tgsi_aos_context *
+static inline struct lp_build_tgsi_aos_context *
 lp_aos_context(struct lp_build_tgsi_context *bld_base)
 {
    return (struct lp_build_tgsi_aos_context *)bld_base;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c	2015-09-16 14:36:08.000000000 +0000
@@ -462,7 +462,7 @@
 }
 
 
-static INLINE void
+static inline void
 dump_info(const struct tgsi_token *tokens,
           struct lp_tgsi_info *info)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c	2015-09-16 14:36:08.000000000 +0000
@@ -106,7 +106,7 @@
  * Return the context for the current function.
  * (always 'main', if shader doesn't do any function calls)
  */
-static INLINE struct function_ctx *
+static inline struct function_ctx *
 func_ctx(struct lp_exec_mask *mask)
 {
    assert(mask->function_stack_size > 0);
@@ -120,7 +120,7 @@
  * no loop inside the current function, but we were inside
  * a loop inside another function, from which this one was called.
  */
-static INLINE boolean
+static inline boolean
 mask_has_loop(struct lp_exec_mask *mask)
 {
    int i;
@@ -138,7 +138,7 @@
  * no switch in the current function, but we were inside
  * a switch inside another function, from which this one was called.
  */
-static INLINE boolean
+static inline boolean
 mask_has_switch(struct lp_exec_mask *mask)
 {
    int i;
@@ -156,7 +156,7 @@
  * no conditional in the current function, but we were inside
  * a conditional inside another function, from which this one was called.
  */
-static INLINE boolean
+static inline boolean
 mask_has_cond(struct lp_exec_mask *mask)
 {
    int i;
@@ -947,15 +947,20 @@
 build_gather(struct lp_build_tgsi_context *bld_base,
              LLVMValueRef base_ptr,
              LLVMValueRef indexes,
-             LLVMValueRef overflow_mask)
+             LLVMValueRef overflow_mask,
+             LLVMValueRef indexes2)
 {
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    struct lp_build_context *uint_bld = &bld_base->uint_bld;
    struct lp_build_context *bld = &bld_base->base;
-   LLVMValueRef res = bld->undef;
+   LLVMValueRef res;
    unsigned i;
 
+   if (indexes2)
+      res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
+   else
+      res = bld->undef;
    /*
     * overflow_mask is a vector telling us which channels
     * in the vector overflowed. We use the overflow behavior for
@@ -976,26 +981,47 @@
        * control flow.
        */
       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
+      if (indexes2)
+         indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
    }
 
    /*
     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
     */
-   for (i = 0; i < bld->type.length; i++) {
-      LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
-      LLVMValueRef index = LLVMBuildExtractElement(builder,
-                                                   indexes, ii, "");
+   for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
+      LLVMValueRef si, di;
+      LLVMValueRef index;
       LLVMValueRef scalar_ptr, scalar;
 
+      di = lp_build_const_int32(bld->gallivm, i);
+      if (indexes2)
+         si = lp_build_const_int32(bld->gallivm, i >> 1);
+      else
+         si = di;
+
+      if (indexes2 && (i & 1)) {
+         index = LLVMBuildExtractElement(builder,
+                                         indexes2, si, "");
+      } else {
+         index = LLVMBuildExtractElement(builder,
+                                         indexes, si, "");
+      }
       scalar_ptr = LLVMBuildGEP(builder, base_ptr,
                                 &index, 1, "gather_ptr");
       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
 
-      res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
+      res = LLVMBuildInsertElement(builder, res, scalar, di, "");
    }
 
    if (overflow_mask) {
-      res = lp_build_select(bld, overflow_mask, bld->zero, res);
+      if (indexes2) {
+         res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
+         overflow_mask = LLVMBuildSExt(builder, overflow_mask,
+                                       bld_base->dbl_bld.int_vec_type, "");
+         res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
+                               bld_base->dbl_bld.zero, res);
+      } else
+         res = lp_build_select(bld, overflow_mask, bld->zero, res);
    }
 
    return res;
@@ -1139,8 +1165,10 @@
    case TGSI_TYPE_SIGNED:
       bld_fetch = &bld_base->int_bld;
       break;
-   case TGSI_TYPE_VOID:
    case TGSI_TYPE_DOUBLE:
+      bld_fetch = &bld_base->dbl_bld;
+      break;
+   case TGSI_TYPE_VOID:
    default:
       assert(0);
       bld_fetch = NULL;
@@ -1216,6 +1244,7 @@
          lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
       LLVMValueRef index_vec;  /* index into the const buffer */
       LLVMValueRef overflow_mask;
+      LLVMValueRef index_vec2 = NULL;
 
       indirect_index = get_indirect_index(bld,
                                           reg->Register.File,
@@ -1235,22 +1264,33 @@
       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
 
+      if (stype == TGSI_TYPE_DOUBLE) {
+         LLVMValueRef swizzle_vec2;
+         swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
+         index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
+         index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
+      }
       /* Gather values from the constant buffer */
-      res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask);
+      res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
    }
    else {
       LLVMValueRef index;  /* index into the const buffer */
       LLVMValueRef scalar, scalar_ptr;
-
+      struct lp_build_context *bld_broad = &bld_base->base;
       index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
 
       scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
                                 &index, 1, "");
+      if (stype == TGSI_TYPE_DOUBLE) {
+         LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
+         scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
+         bld_broad = &bld_base->dbl_bld;
+      }
       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
-      res = lp_build_broadcast_scalar(&bld_base->base, scalar);
+      res = lp_build_broadcast_scalar(bld_broad, scalar);
    }
 
-   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
+   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
    }
@@ -1258,6 +1298,39 @@
    return res;
 }
 
+/**
+ * Fetch double values from two separate channels.
+ * Doubles are stored split across two channels, like xy and zw.
+ * This function creates a set of 16 floats,
+ * extracts the values from the two channels,
+ * puts them in the correct place, then casts to 8 doubles.
+ */
+static LLVMValueRef
+emit_fetch_double(
+   struct lp_build_tgsi_context * bld_base,
+   enum tgsi_opcode_type stype,
+   LLVMValueRef input,
+   LLVMValueRef input2)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef res;
+   struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
+   int i;
+   LLVMValueRef shuffles[16];
+   int len = bld_base->base.type.length * 2;
+   assert(len <= 16);
+
+   for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
+      shuffles[i] = lp_build_const_int32(gallivm, i / 2);
+      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+   }
+   res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
+
+   return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
+}
+
 static LLVMValueRef
 emit_fetch_immediate(
    struct lp_build_tgsi_context * bld_base,
@@ -1281,7 +1354,7 @@
       if (reg->Register.Indirect) {
          LLVMValueRef indirect_index;
          LLVMValueRef index_vec;  /* index into the immediate register array */
-
+         LLVMValueRef index_vec2 = NULL;
          indirect_index = get_indirect_index(bld,
                                              reg->Register.File,
                                              reg->Register.Index,
@@ -1296,25 +1369,46 @@
                                            indirect_index,
                                            swizzle,
                                            FALSE);
-
+         if (stype == TGSI_TYPE_DOUBLE)
+            index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
+                                              indirect_index,
+                                              swizzle + 1,
+                                              FALSE);
          /* Gather values from the immediate register array */
-         res = build_gather(bld_base, imms_array, index_vec, NULL);
+         res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
       } else {
          LLVMValueRef lindex = lp_build_const_int32(gallivm,
                                         reg->Register.Index * 4 + swizzle);
          LLVMValueRef imms_ptr =  LLVMBuildGEP(builder,
                                                 bld->imms_array, &lindex, 1, "");
          res = LLVMBuildLoad(builder, imms_ptr, "");
+
+         if (stype == TGSI_TYPE_DOUBLE) {
+            LLVMValueRef lindex1;
+            LLVMValueRef imms_ptr2;
+            LLVMValueRef res2;
+
+            lindex1 = lp_build_const_int32(gallivm,
+                                           reg->Register.Index * 4 + swizzle + 1);
+            imms_ptr2 = LLVMBuildGEP(builder,
+                                      bld->imms_array, &lindex1, 1, "");
+            res2 = LLVMBuildLoad(builder, imms_ptr2, "");
+            res = emit_fetch_double(bld_base, stype, res, res2);
+         }
       }
    }
    else {
       res = bld->immediates[reg->Register.Index][swizzle];
+      if (stype == TGSI_TYPE_DOUBLE)
+         res = emit_fetch_double(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
    }
 
    if (stype == TGSI_TYPE_UNSIGNED) {
       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
    } else if (stype == TGSI_TYPE_SIGNED) {
       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+   } else if (stype == TGSI_TYPE_DOUBLE) {
+      res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
    }
    return res;
 }
@@ -1334,6 +1428,7 @@
    if (reg->Register.Indirect) {
       LLVMValueRef indirect_index;
       LLVMValueRef index_vec;  /* index into the input reg array */
+      LLVMValueRef index_vec2 = NULL;
       LLVMValueRef inputs_array;
       LLVMTypeRef fptr_type;
 
@@ -1346,23 +1441,43 @@
                                         indirect_index,
                                         swizzle,
                                         TRUE);
-
+      if (stype == TGSI_TYPE_DOUBLE) {
+         index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
+                                           indirect_index,
+                                           swizzle + 1,
+                                           TRUE);
+      }
       /* cast inputs_array pointer to float* */
       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
       inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
 
       /* Gather values from the input register array */
-      res = build_gather(bld_base, inputs_array, index_vec, NULL);
+      res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
    } else {
       if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
          LLVMValueRef lindex = lp_build_const_int32(gallivm,
                                         reg->Register.Index * 4 + swizzle);
-         LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
-                                                bld->inputs_array, &lindex, 1, "");
+         LLVMValueRef input_ptr = LLVMBuildGEP(builder,
+                                               bld->inputs_array, &lindex, 1, "");
+
          res = LLVMBuildLoad(builder, input_ptr, "");
+         if (stype == TGSI_TYPE_DOUBLE) {
+            LLVMValueRef lindex1;
+            LLVMValueRef input_ptr2;
+            LLVMValueRef res2;
+
+            lindex1 = lp_build_const_int32(gallivm,
+                                           reg->Register.Index * 4 + swizzle + 1);
+            input_ptr2 = LLVMBuildGEP(builder,
+                                      bld->inputs_array, &lindex1, 1, "");
+            res2 = LLVMBuildLoad(builder, input_ptr2, "");
+            res = emit_fetch_double(bld_base, stype, res, res2);
+         }
       }
       else {
          res = bld->inputs[reg->Register.Index][swizzle];
+         if (stype == TGSI_TYPE_DOUBLE)
+            res = emit_fetch_double(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
       }
    }
 
@@ -1372,6 +1487,8 @@
       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
    } else if (stype == TGSI_TYPE_SIGNED) {
       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+   } else if (stype == TGSI_TYPE_DOUBLE) {
+      res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
    }
 
    return res;
@@ -1413,7 +1530,7 @@
    } else {
       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
    }
-   
+
    if (reg->Dimension.Indirect) {
       vertex_index = get_indirect_index(bld,
                                         reg->Register.File,
@@ -1436,6 +1553,8 @@
       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
    } else if (stype == TGSI_TYPE_SIGNED) {
       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+   } else if (stype == TGSI_TYPE_DOUBLE) {
+      res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
    }
 
    return res;
@@ -1455,7 +1574,7 @@
 
    if (reg->Register.Indirect) {
       LLVMValueRef indirect_index;
-      LLVMValueRef index_vec;  /* index into the temp reg array */
+      LLVMValueRef index_vec, index_vec2 = NULL;  /* index into the temp reg array */
       LLVMValueRef temps_array;
       LLVMTypeRef fptr_type;
 
@@ -1468,21 +1587,35 @@
                                         indirect_index,
                                         swizzle,
                                         TRUE);
+      if (stype == TGSI_TYPE_DOUBLE) {
+               index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
+                                                  indirect_index,
+                                                  swizzle + 1,
+                                                  TRUE);
+      }
 
       /* cast temps_array pointer to float* */
       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
 
       /* Gather values from the temporary register array */
-      res = build_gather(bld_base, temps_array, index_vec, NULL);
+      res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
    }
    else {
       LLVMValueRef temp_ptr;
       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
       res = LLVMBuildLoad(builder, temp_ptr, "");
+
+      if (stype == TGSI_TYPE_DOUBLE) {
+         LLVMValueRef temp_ptr2, res2;
+
+         temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
+         res2 = LLVMBuildLoad(builder, temp_ptr2, "");
+         res = emit_fetch_double(bld_base, stype, res, res2);
+      }
    }
 
-   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
+   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
    }
@@ -1532,6 +1665,11 @@
       atype = TGSI_TYPE_UNSIGNED;
       break;
 
+   case TGSI_SEMANTIC_INVOCATIONID:
+      res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
+      atype = TGSI_TYPE_UNSIGNED;
+      break;
+
    default:
       assert(!"unexpected semantic in emit_fetch_system_value");
       res = bld_base->base.zero;
@@ -1643,6 +1781,50 @@
    }
 }
 
+/**
+ * store an array of 8 doubles into two arrays of 8 floats
+ * i.e.
+ * value is d0, d1, d2, d3 etc.
+ * each double has high and low pieces x, y
+ * so gets stored into the separate channels as:
+ * chan_ptr = d0.x, d1.x, d2.x, d3.x
+ * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
+ */
+static void
+emit_store_double_chan(struct lp_build_tgsi_context *bld_base,
+                       int dtype,
+                       LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
+                       LLVMValueRef pred,
+                       LLVMValueRef value)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *float_bld = &bld_base->base;
+   int i;
+   LLVMValueRef temp, temp2;
+   LLVMValueRef shuffles[8];
+   LLVMValueRef shuffles2[8];
+
+   for (i = 0; i < bld_base->base.type.length; i++) {
+      shuffles[i] = lp_build_const_int32(gallivm, i * 2);
+      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
+   }
+
+   temp = LLVMBuildShuffleVector(builder, value,
+                                 LLVMGetUndef(LLVMTypeOf(value)),
+                                 LLVMConstVector(shuffles,
+                                                 bld_base->base.type.length),
+                                 "");
+   temp2 = LLVMBuildShuffleVector(builder, value,
+                                  LLVMGetUndef(LLVMTypeOf(value)),
+                                  LLVMConstVector(shuffles2,
+                                                  bld_base->base.type.length),
+                                  "");
+
+   lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp, chan_ptr);
+   lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp2, chan_ptr2);
+}
 
 /**
  * Register store.
@@ -1670,33 +1852,19 @@
     *
     * It is always assumed to be float.
     */
-   switch( inst->Instruction.Saturate ) {
-   case TGSI_SAT_NONE:
-      break;
-
-   case TGSI_SAT_ZERO_ONE:
+   if (inst->Instruction.Saturate) {
       assert(dtype == TGSI_TYPE_FLOAT ||
              dtype == TGSI_TYPE_UNTYPED);
       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
       value = lp_build_clamp_zero_one_nanzero(float_bld, value);
-      break;
-
-   case TGSI_SAT_MINUS_PLUS_ONE:
-      assert(dtype == TGSI_TYPE_FLOAT ||
-             dtype == TGSI_TYPE_UNTYPED);
-      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
-      /* This will give -1.0 for NaN which is probably not what we want. */
-      value = lp_build_max_ext(float_bld, value,
-                               lp_build_const_vec(gallivm, float_bld->type, -1.0),
-                               GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
-      value = lp_build_min(float_bld, value, float_bld->one);
-      break;
-
-   default:
-      assert(0);
    }
 
    if (reg->Register.Indirect) {
+      /*
+       * Currently the mesa/st doesn't generate indirect stores
+       * to doubles, it normally uses MOV to do indirect stores.
+       */
+      assert(dtype != TGSI_TYPE_DOUBLE);
       indirect_index = get_indirect_index(bld,
                                           reg->Register.File,
                                           reg->Register.Index,
@@ -1735,13 +1903,23 @@
       else {
          LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
                                                   chan_index);
-         lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
+
+         if (dtype == TGSI_TYPE_DOUBLE) {
+            LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
+                                                      chan_index + 1);
+            emit_store_double_chan(bld_base, dtype, out_ptr, out_ptr2,
+                                   pred, value);
+         } else
+            lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
       }
       break;
 
    case TGSI_FILE_TEMPORARY:
       /* Temporaries are always stored as floats */
-      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
+      if (dtype != TGSI_TYPE_DOUBLE)
+         value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
+      else
+         value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
 
       if (reg->Register.Indirect) {
          LLVMValueRef index_vec;  /* indexes into the temp registers */
@@ -1763,7 +1941,16 @@
       else {
          LLVMValueRef temp_ptr;
          temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
-         lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
+
+         if (dtype == TGSI_TYPE_DOUBLE) {
+            LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
+                                                         reg->Register.Index,
+                                                         chan_index + 1);
+            emit_store_double_chan(bld_base, dtype, temp_ptr, temp_ptr2,
+                                   pred, value);
+         }
+         else
+            lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
       }
       break;
 
@@ -1832,13 +2019,16 @@
 {
    unsigned chan_index;
    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
-
+   enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
    if(info->num_dst) {
       LLVMValueRef pred[TGSI_NUM_CHANNELS];
 
       emit_fetch_predicate( bld, inst, pred );
 
       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+
+         if (dtype == TGSI_TYPE_DOUBLE && (chan_index == 1 || chan_index == 3))
+             continue;
          emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
       }
    }
@@ -2837,6 +3027,7 @@
                lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
 
       break;
+   case TGSI_IMM_FLOAT64:
    case TGSI_IMM_UINT32:
       for( i = 0; i < size; ++i ) {
          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
@@ -2871,8 +3062,7 @@
    } else {
       /* simply copy the immediate values into the next immediates[] slot */
       unsigned i;
-      const uint size = imm->Immediate.NrTokens - 1;
-      assert(size <= 4);
+      assert(imm->Immediate.NrTokens - 1 <= 4);
       assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
 
       for(i = 0; i < 4; ++i )
@@ -3688,6 +3878,12 @@
    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
+   {
+      struct lp_type dbl_type;
+      dbl_type = type;
+      dbl_type.width *= 2;
+      lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
+   }
    bld.mask = mask;
    bld.inputs = inputs;
    bld.outputs = outputs;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_type.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_type.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/gallivm/lp_bld_type.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/gallivm/lp_bld_type.h	2015-09-16 14:36:08.000000000 +0000
@@ -173,7 +173,7 @@
  *
  * e.g. With PIPE_FORMAT_R32G32B32A32_FLOAT returns an lp_type with float[4]
  */
-static INLINE void
+static inline void
 lp_type_from_format_desc(struct lp_type* type, const struct util_format_description *format_desc)
 {
    assert(format_desc->is_array);
@@ -189,14 +189,14 @@
 }
 
 
-static INLINE void
+static inline void
 lp_type_from_format(struct lp_type* type, enum pipe_format format)
 {
    lp_type_from_format_desc(type, util_format_description(format));
 }
 
 
-static INLINE unsigned
+static inline unsigned
 lp_type_width(struct lp_type type)
 {
    return type.width * type.length;
@@ -204,7 +204,7 @@
 
 
 /** Create scalar float type */
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_float(unsigned width)
 {
    struct lp_type res_type;
@@ -220,7 +220,7 @@
 
 
 /** Create vector of float type */
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_float_vec(unsigned width, unsigned total_width)
 {
    struct lp_type res_type;
@@ -236,7 +236,7 @@
 
 
 /** Create scalar int type */
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_int(unsigned width)
 {
    struct lp_type res_type;
@@ -251,7 +251,7 @@
 
 
 /** Create vector int type */
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_int_vec(unsigned width, unsigned total_width)
 {
    struct lp_type res_type;
@@ -266,7 +266,7 @@
 
 
 /** Create scalar uint type */
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_uint(unsigned width)
 {
    struct lp_type res_type;
@@ -280,7 +280,7 @@
 
 
 /** Create vector uint type */
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_uint_vec(unsigned width, unsigned total_width)
 {
    struct lp_type res_type;
@@ -293,7 +293,7 @@
 }
 
 
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_unorm(unsigned width, unsigned total_width)
 {
    struct lp_type res_type;
@@ -307,7 +307,7 @@
 }
 
 
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_fixed(unsigned width, unsigned total_width)
 {
    struct lp_type res_type;
@@ -322,7 +322,7 @@
 }
 
 
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_ufixed(unsigned width, unsigned total_width)
 {
    struct lp_type res_type;
@@ -364,7 +364,7 @@
 lp_build_int_vec_type(struct gallivm_state *gallivm, struct lp_type type);
 
 
-static INLINE struct lp_type
+static inline struct lp_type
 lp_float32_vec4_type(void)
 {
    struct lp_type type;
@@ -380,7 +380,7 @@
 }
 
 
-static INLINE struct lp_type
+static inline struct lp_type
 lp_int32_vec4_type(void)
 {
    struct lp_type type;
@@ -396,7 +396,7 @@
 }
 
 
-static INLINE struct lp_type
+static inline struct lp_type
 lp_unorm8_vec4_type(void)
 {
    struct lp_type type;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/hud/hud_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/hud/hud_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/hud/hud_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/hud/hud_context.c	2015-09-16 14:36:08.000000000 +0000
@@ -231,18 +231,53 @@
 }
 
 static void
-number_to_human_readable(uint64_t num, boolean is_in_bytes, char *out)
+number_to_human_readable(uint64_t num, uint64_t max_value,
+                         enum pipe_driver_query_type type, char *out)
 {
    static const char *byte_units[] =
-      {"", " KB", " MB", " GB", " TB", " PB", " EB"};
+      {" B", " KB", " MB", " GB", " TB", " PB", " EB"};
    static const char *metric_units[] =
       {"", " k", " M", " G", " T", " P", " E"};
-   const char **units = is_in_bytes ? byte_units : metric_units;
-   double divisor = is_in_bytes ? 1024 : 1000;
-   int unit = 0;
+   static const char *time_units[] =
+      {" us", " ms", " s"};  /* based on microseconds */
+   static const char *hz_units[] =
+      {" Hz", " KHz", " MHz", " GHz"};
+   static const char *percent_units[] = {"%"};
+
+   const char **units;
+   unsigned max_unit;
+   double divisor = (type == PIPE_DRIVER_QUERY_TYPE_BYTES) ? 1024 : 1000;
+   unsigned unit = 0;
    double d = num;
 
-   while (d > divisor) {
+   switch (type) {
+   case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+      max_unit = ARRAY_SIZE(time_units)-1;
+      units = time_units;
+      break;
+   case PIPE_DRIVER_QUERY_TYPE_PERCENTAGE:
+      max_unit = ARRAY_SIZE(percent_units)-1;
+      units = percent_units;
+      break;
+   case PIPE_DRIVER_QUERY_TYPE_BYTES:
+      max_unit = ARRAY_SIZE(byte_units)-1;
+      units = byte_units;
+      break;
+   case PIPE_DRIVER_QUERY_TYPE_HZ:
+      max_unit = ARRAY_SIZE(hz_units)-1;
+      units = hz_units;
+      break;
+   default:
+      if (max_value == 100) {
+         max_unit = ARRAY_SIZE(percent_units)-1;
+         units = percent_units;
+      } else {
+         max_unit = ARRAY_SIZE(metric_units)-1;
+         units = metric_units;
+      }
+   }
+
+   while (d > divisor && unit < max_unit) {
       d /= divisor;
       unit++;
    }
@@ -300,9 +335,9 @@
       unsigned y = pane->inner_y1 + pane->inner_height * (5 - i) / 5 -
                    hud->font.glyph_height / 2;
 
-      number_to_human_readable(pane->max_value * i / 5,
-                               pane->uses_byte_units, str);
-      hud_draw_string(hud, x, y, str);
+      number_to_human_readable(pane->max_value * i / 5, pane->max_value,
+                               pane->type, str);
+      hud_draw_string(hud, x, y, "%s", str);
    }
 
    /* draw info below the pane */
@@ -311,8 +346,8 @@
       unsigned x = pane->x1 + 2;
       unsigned y = pane->y2 + 2 + i*hud->font.glyph_height;
 
-      number_to_human_readable(gr->current_value,
-                               pane->uses_byte_units, str);
+      number_to_human_readable(gr->current_value, pane->max_value,
+                               pane->type, str);
       hud_draw_string(hud, x, y, "  %s: %s", gr->name, str);
       i++;
    }
@@ -417,8 +452,8 @@
    cso_save_blend(cso);
    cso_save_depth_stencil_alpha(cso);
    cso_save_fragment_shader(cso);
-   cso_save_sampler_views(cso, PIPE_SHADER_FRAGMENT);
-   cso_save_samplers(cso, PIPE_SHADER_FRAGMENT);
+   cso_save_fragment_sampler_views(cso);
+   cso_save_fragment_samplers(cso);
    cso_save_rasterizer(cso);
    cso_save_viewport(cso);
    cso_save_stream_outputs(cso);
@@ -547,8 +582,8 @@
    cso_restore_blend(cso);
    cso_restore_depth_stencil_alpha(cso);
    cso_restore_fragment_shader(cso);
-   cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT);
-   cso_restore_samplers(cso, PIPE_SHADER_FRAGMENT);
+   cso_restore_fragment_sampler_views(cso);
+   cso_restore_fragment_samplers(cso);
    cso_restore_rasterizer(cso);
    cso_restore_viewport(cso);
    cso_restore_stream_outputs(cso);
@@ -869,12 +904,16 @@
       else if (strcmp(name, "samples-passed") == 0 &&
                has_occlusion_query(hud->pipe->screen)) {
          hud_pipe_query_install(pane, hud->pipe, "samples-passed",
-                                PIPE_QUERY_OCCLUSION_COUNTER, 0, 0, FALSE);
+                                PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
+                                PIPE_DRIVER_QUERY_TYPE_UINT64,
+                                PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
       }
       else if (strcmp(name, "primitives-generated") == 0 &&
                has_streamout(hud->pipe->screen)) {
          hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
-                                PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0, FALSE);
+                                PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
+                                PIPE_DRIVER_QUERY_TYPE_UINT64,
+                                PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
       }
       else {
          boolean processed = FALSE;
@@ -901,7 +940,8 @@
             if (i < Elements(pipeline_statistics_names)) {
                hud_pipe_query_install(pane, hud->pipe, name,
                                       PIPE_QUERY_PIPELINE_STATISTICS, i,
-                                      0, FALSE);
+                                      0, PIPE_DRIVER_QUERY_TYPE_UINT64,
+                                      PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
                processed = TRUE;
             }
          }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/hud/hud_driver_query.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/hud/hud_driver_query.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/hud/hud_driver_query.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/hud/hud_driver_query.c	2015-09-16 14:36:08.000000000 +0000
@@ -43,6 +43,7 @@
    struct pipe_context *pipe;
    unsigned query_type;
    unsigned result_index; /* unit depends on query_type */
+   enum pipe_driver_query_result_type result_type;
 
    /* Ring of queries. If a query is busy, we use another slot. */
    struct pipe_query *query[NUM_QUERIES];
@@ -62,7 +63,8 @@
    uint64_t now = os_time_get();
 
    if (info->last_time) {
-      pipe->end_query(pipe, info->query[info->head]);
+      if (info->query[info->head])
+         pipe->end_query(pipe, info->query[info->head]);
 
       /* read query results */
       while (1) {
@@ -70,7 +72,7 @@
          union pipe_query_result result;
          uint64_t *res64 = (uint64_t *)&result;
 
-         if (pipe->get_query_result(pipe, query, FALSE, &result)) {
+         if (query && pipe->get_query_result(pipe, query, FALSE, &result)) {
             info->results_cumulative += res64[info->result_index];
             info->num_results++;
 
@@ -88,7 +90,8 @@
                        "gallium_hud: all queries are busy after %i frames, "
                        "can't add another query\n",
                        NUM_QUERIES);
-               pipe->destroy_query(pipe, info->query[info->head]);
+               if (info->query[info->head])
+                  pipe->destroy_query(pipe, info->query[info->head]);
                info->query[info->head] =
                      pipe->create_query(pipe, info->query_type, 0);
             }
@@ -106,22 +109,33 @@
       }
 
       if (info->num_results && info->last_time + gr->pane->period <= now) {
-         /* compute the average value across all frames */
-         hud_graph_add_value(gr, info->results_cumulative / info->num_results);
+         uint64_t value;
+
+         switch (info->result_type) {
+         default:
+         case PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE:
+            value = info->results_cumulative / info->num_results;
+            break;
+         case PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE:
+            value = info->results_cumulative;
+            break;
+         }
+
+         hud_graph_add_value(gr, value);
 
          info->last_time = now;
          info->results_cumulative = 0;
          info->num_results = 0;
       }
-
-      pipe->begin_query(pipe, info->query[info->head]);
    }
    else {
       /* initialize */
       info->last_time = now;
       info->query[info->head] = pipe->create_query(pipe, info->query_type, 0);
-      pipe->begin_query(pipe, info->query[info->head]);
    }
+
+   if (info->query[info->head])
+      pipe->begin_query(pipe, info->query[info->head]);
 }
 
 static void
@@ -148,7 +162,8 @@
 hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
                        const char *name, unsigned query_type,
                        unsigned result_index,
-                       uint64_t max_value, boolean uses_byte_units)
+                       uint64_t max_value, enum pipe_driver_query_type type,
+                       enum pipe_driver_query_result_type result_type)
 {
    struct hud_graph *gr;
    struct query_info *info;
@@ -172,12 +187,12 @@
    info->pipe = pipe;
    info->query_type = query_type;
    info->result_index = result_index;
+   info->result_type = result_type;
 
    hud_pane_add_graph(pane, gr);
    if (pane->max_value < max_value)
       hud_pane_set_max_value(pane, max_value);
-   if (uses_byte_units)
-      pane->uses_byte_units = TRUE;
+   pane->type = type;
 }
 
 boolean
@@ -187,7 +202,6 @@
    struct pipe_screen *screen = pipe->screen;
    struct pipe_driver_query_info query;
    unsigned num_queries, i;
-   boolean uses_byte_units;
    boolean found = FALSE;
 
    if (!screen->get_driver_query_info)
@@ -206,9 +220,8 @@
    if (!found)
       return FALSE;
 
-   uses_byte_units = query.type == PIPE_DRIVER_QUERY_TYPE_BYTES;
    hud_pipe_query_install(pane, pipe, query.name, query.query_type, 0,
-                          query.max_value.u64, uses_byte_units);
+                          query.max_value.u64, query.type, query.result_type);
 
    return TRUE;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/hud/hud_private.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/hud/hud_private.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/hud/hud_private.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/hud/hud_private.h	2015-09-16 14:36:08.000000000 +0000
@@ -66,7 +66,7 @@
    uint64_t ceiling;
    unsigned dyn_ceil_last_ran;
    boolean dyn_ceiling;
-   boolean uses_byte_units;
+   enum pipe_driver_query_type type;
    uint64_t period; /* in microseconds */
 
    struct list_head graph_list;
@@ -89,7 +89,9 @@
 void hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
                             const char *name, unsigned query_type,
                             unsigned result_index,
-                            uint64_t max_value, boolean uses_byte_units);
+                            uint64_t max_value,
+                            enum pipe_driver_query_type type,
+                            enum pipe_driver_query_result_type result_type);
 boolean hud_driver_query_install(struct hud_pane *pane,
                                  struct pipe_context *pipe, const char *name);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/Makefile.am	2015-09-16 14:36:08.000000000 +0000
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
 if HAVE_LOADER_GALLIUM
 SUBDIRS := pipe-loader
 endif
@@ -10,6 +8,7 @@
 noinst_LTLIBRARIES = libgallium.la
 
 AM_CFLAGS = \
+	-I$(top_srcdir)/src/loader \
 	-I$(top_builddir)/src/glsl/nir \
 	-I$(top_srcdir)/src/gallium/auxiliary/util \
 	$(GALLIUM_CFLAGS) \
@@ -39,18 +38,23 @@
 
 endif
 
-indices/u_indices_gen.c: $(srcdir)/indices/u_indices_gen.py
-	$(AM_V_at)$(MKDIR_P) indices
-	$(AM_V_GEN) $(PYTHON2) $< > $@
-
-indices/u_unfilled_gen.c: $(srcdir)/indices/u_unfilled_gen.py
-	$(AM_V_at)$(MKDIR_P) indices
-	$(AM_V_GEN) $(PYTHON2) $< > $@
-
-util/u_format_table.c: $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format_pack.py $(srcdir)/util/u_format_parse.py $(srcdir)/util/u_format.csv
-	$(AM_V_at)$(MKDIR_P) util
-	$(AM_V_GEN) $(PYTHON2) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN =  $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
 
+indices/u_indices_gen.c: indices/u_indices_gen.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/indices/u_indices_gen.py > $@
+
+indices/u_unfilled_gen.c: indices/u_unfilled_gen.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/indices/u_unfilled_gen.py > $@
+
+util/u_format_table.c: util/u_format_table.py \
+                       util/u_format_pack.py \
+                       util/u_format_parse.py \
+                       util/u_format.csv
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
 
 noinst_LTLIBRARIES += libgalliumvl_stub.la
 libgalliumvl_stub_la_SOURCES = \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/Makefile.sources	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/Makefile.sources	2015-09-16 14:36:08.000000000 +0000
@@ -274,7 +274,6 @@
 	util/u_simple_shaders.h \
 	util/u_slab.c \
 	util/u_slab.h \
-	util/u_snprintf.c \
 	util/u_split_prim.h \
 	util/u_sse.h \
 	util/u_staging.c \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/nir/tgsi_to_nir.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/nir/tgsi_to_nir.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/nir/tgsi_to_nir.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/nir/tgsi_to_nir.c	2015-09-16 14:36:08.000000000 +0000
@@ -58,6 +58,9 @@
    struct ttn_reg_info *temp_regs;
    nir_ssa_def **imm_defs;
 
+   unsigned num_samp_types;
+   nir_alu_type *samp_types;
+
    nir_register *addr_reg;
 
    /**
@@ -156,8 +159,33 @@
       /* Nothing to record for system values. */
    } else if (file == TGSI_FILE_SAMPLER) {
       /* Nothing to record for samplers. */
+   } else if (file == TGSI_FILE_SAMPLER_VIEW) {
+      struct tgsi_declaration_sampler_view *sview = &decl->SamplerView;
+      nir_alu_type type;
+
+      assert((sview->ReturnTypeX == sview->ReturnTypeY) &&
+             (sview->ReturnTypeX == sview->ReturnTypeZ) &&
+             (sview->ReturnTypeX == sview->ReturnTypeW));
+
+      switch (sview->ReturnTypeX) {
+      case TGSI_RETURN_TYPE_SINT:
+         type = nir_type_int;
+         break;
+      case TGSI_RETURN_TYPE_UINT:
+         type = nir_type_unsigned;
+         break;
+      case TGSI_RETURN_TYPE_FLOAT:
+      default:
+         type = nir_type_float;
+         break;
+      }
+
+      for (i = 0; i < array_size; i++) {
+         c->samp_types[decl->Range.First + i] = type;
+      }
    } else {
-      nir_variable *var;
+      bool is_array = (array_size > 1);
+
       assert(file == TGSI_FILE_INPUT ||
              file == TGSI_FILE_OUTPUT ||
              file == TGSI_FILE_CONSTANT);
@@ -166,76 +194,99 @@
       if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension)
          return;
 
-      var = rzalloc(b->shader, nir_variable);
-      var->data.driver_location = decl->Range.First;
+      if ((file == TGSI_FILE_INPUT) || (file == TGSI_FILE_OUTPUT)) {
+         is_array = (is_array && decl->Declaration.Array &&
+                     (decl->Array.ArrayID != 0));
+      }
 
-      var->type = glsl_vec4_type();
-      if (array_size > 1)
-         var->type = glsl_array_type(var->type, array_size);
+      for (i = 0; i < array_size; i++) {
+         unsigned idx = decl->Range.First + i;
+         nir_variable *var = rzalloc(b->shader, nir_variable);
 
-      switch (file) {
-      case TGSI_FILE_INPUT:
-         var->data.read_only = true;
-         var->data.mode = nir_var_shader_in;
-         var->name = ralloc_asprintf(var, "in_%d", decl->Range.First);
-
-         /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
-          * instead, but nothing in NIR core is looking at the value
-          * currently, and this is less change to drivers.
-          */
-         var->data.location = decl->Semantic.Name;
-         var->data.index = decl->Semantic.Index;
+         var->data.driver_location = idx;
 
-         /* We definitely need to translate the interpolation field, because
-          * nir_print will decode it.
-          */
-         switch (decl->Interp.Interpolate) {
-         case TGSI_INTERPOLATE_CONSTANT:
-            var->data.interpolation = INTERP_QUALIFIER_FLAT;
-            break;
-         case TGSI_INTERPOLATE_LINEAR:
-            var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
-            break;
-         case TGSI_INTERPOLATE_PERSPECTIVE:
-            var->data.interpolation = INTERP_QUALIFIER_SMOOTH;
-            break;
-         }
+         var->type = glsl_vec4_type();
+         if (is_array)
+            var->type = glsl_array_type(var->type, array_size);
+
+         switch (file) {
+         case TGSI_FILE_INPUT:
+            var->data.read_only = true;
+            var->data.mode = nir_var_shader_in;
+            var->name = ralloc_asprintf(var, "in_%d", idx);
+
+            /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
+             * instead, but nothing in NIR core is looking at the value
+             * currently, and this is less change to drivers.
+             */
+            var->data.location = decl->Semantic.Name;
+            var->data.index = decl->Semantic.Index;
 
-         exec_list_push_tail(&b->shader->inputs, &var->node);
-         break;
-      case TGSI_FILE_OUTPUT: {
-         /* Since we can't load from outputs in the IR, we make temporaries
-          * for the outputs and emit stores to the real outputs at the end of
-          * the shader.
-          */
-         nir_register *reg = nir_local_reg_create(b->impl);
-         reg->num_components = 4;
-         if (array_size > 1)
-            reg->num_array_elems = array_size;
+            /* We definitely need to translate the interpolation field, because
+             * nir_print will decode it.
+             */
+            switch (decl->Interp.Interpolate) {
+            case TGSI_INTERPOLATE_CONSTANT:
+               var->data.interpolation = INTERP_QUALIFIER_FLAT;
+               break;
+            case TGSI_INTERPOLATE_LINEAR:
+               var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
+               break;
+            case TGSI_INTERPOLATE_PERSPECTIVE:
+               var->data.interpolation = INTERP_QUALIFIER_SMOOTH;
+               break;
+            }
 
-         var->data.mode = nir_var_shader_out;
-         var->name = ralloc_asprintf(var, "out_%d", decl->Range.First);
+            exec_list_push_tail(&b->shader->inputs, &var->node);
+            break;
+         case TGSI_FILE_OUTPUT: {
+            /* Since we can't load from outputs in the IR, we make temporaries
+             * for the outputs and emit stores to the real outputs at the end of
+             * the shader.
+             */
+            nir_register *reg = nir_local_reg_create(b->impl);
+            reg->num_components = 4;
+            if (is_array)
+               reg->num_array_elems = array_size;
 
-         var->data.location = decl->Semantic.Name;
-         var->data.index = decl->Semantic.Index;
+            var->data.mode = nir_var_shader_out;
+            var->name = ralloc_asprintf(var, "out_%d", idx);
 
-         for (i = 0; i < array_size; i++) {
-            c->output_regs[decl->Range.First + i].offset = i;
-            c->output_regs[decl->Range.First + i].reg = reg;
+            var->data.location = decl->Semantic.Name;
+            if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+                decl->Semantic.Index == 0 &&
+                c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
+               var->data.index = -1;
+            else
+               var->data.index = decl->Semantic.Index;
+
+            if (is_array) {
+               unsigned j;
+               for (j = 0; j < array_size; j++) {
+                  c->output_regs[idx + j].offset = i + j;
+                  c->output_regs[idx + j].reg = reg;
+               }
+            } else {
+               c->output_regs[idx].offset = i;
+               c->output_regs[idx].reg = reg;
+            }
+
+            exec_list_push_tail(&b->shader->outputs, &var->node);
          }
+            break;
+         case TGSI_FILE_CONSTANT:
+            var->data.mode = nir_var_uniform;
+            var->name = ralloc_asprintf(var, "uniform_%d", idx);
 
-         exec_list_push_tail(&b->shader->outputs, &var->node);
-      }
-         break;
-      case TGSI_FILE_CONSTANT:
-         var->data.mode = nir_var_uniform;
-         var->name = ralloc_asprintf(var, "uniform_%d", decl->Range.First);
+            exec_list_push_tail(&b->shader->uniforms, &var->node);
+            break;
+         default:
+            unreachable("bad declaration file");
+            return;
+         }
 
-         exec_list_push_tail(&b->shader->uniforms, &var->node);
-         break;
-      default:
-         unreachable("bad declaration file");
-         return;
+         if (is_array)
+            break;
       }
 
    }
@@ -401,7 +452,6 @@
 
       load->num_components = 4;
       load->const_index[0] = index;
-      load->const_index[1] = 1;
       if (dim) {
          if (dimind) {
             load->src[srcn] =
@@ -949,6 +999,9 @@
 setup_texture_info(nir_tex_instr *instr, unsigned texture)
 {
    switch (texture) {
+   case TGSI_TEXTURE_BUFFER:
+      instr->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+      break;
    case TGSI_TEXTURE_1D:
       instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
       break;
@@ -1027,7 +1080,7 @@
    struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
    nir_tex_instr *instr;
    nir_texop op;
-   unsigned num_srcs, samp = 1, i;
+   unsigned num_srcs, samp = 1, sview, i;
 
    switch (tgsi_inst->Instruction.Opcode) {
    case TGSI_OPCODE_TEX:
@@ -1042,6 +1095,11 @@
       op = nir_texop_txb;
       num_srcs = 2;
       break;
+   case TGSI_OPCODE_TXB2:
+      op = nir_texop_txb;
+      num_srcs = 2;
+      samp = 2;
+      break;
    case TGSI_OPCODE_TXL:
       op = nir_texop_txl;
       num_srcs = 2;
@@ -1052,7 +1110,12 @@
       samp = 2;
       break;
    case TGSI_OPCODE_TXF:
-      op = nir_texop_txf;
+      if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
+          tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) {
+         op = nir_texop_txf_ms;
+      } else {
+         op = nir_texop_txf;
+      }
       num_srcs = 2;
       break;
    case TGSI_OPCODE_TXD:
@@ -1106,6 +1169,18 @@
    assert(tgsi_inst->Src[samp].Register.File == TGSI_FILE_SAMPLER);
    instr->sampler_index = tgsi_inst->Src[samp].Register.Index;
 
+   /* TODO if we supported any opc's which take an explicit SVIEW
+    * src, we would use that here instead.  But for the "legacy"
+    * texture opc's the SVIEW index is same as SAMP index:
+    */
+   sview = instr->sampler_index;
+
+   if (sview < c->num_samp_types) {
+      instr->dest_type = c->samp_types[sview];
+   } else {
+      instr->dest_type = nir_type_float;
+   }
+
    unsigned src_number = 0;
 
    instr->src[src_number].src =
@@ -1126,6 +1201,12 @@
       src_number++;
    }
 
+   if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
+      instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
+      instr->src[src_number].src_type = nir_tex_src_bias;
+      src_number++;
+   }
+
    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
       instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
       instr->src[src_number].src_type = nir_tex_src_lod;
@@ -1140,7 +1221,10 @@
 
    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
       instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
-      instr->src[src_number].src_type = nir_tex_src_lod;
+      if (op == nir_texop_txf_ms)
+         instr->src[src_number].src_type = nir_tex_src_ms_index;
+      else
+         instr->src[src_number].src_type = nir_tex_src_lod;
       src_number++;
    }
 
@@ -1286,6 +1370,7 @@
    [TGSI_OPCODE_SEQ] = nir_op_seq,
    [TGSI_OPCODE_SGT] = 0,
    [TGSI_OPCODE_SIN] = nir_op_fsin,
+   [TGSI_OPCODE_SNE] = nir_op_sne,
    [TGSI_OPCODE_SLE] = 0,
    [TGSI_OPCODE_TEX] = 0,
    [TGSI_OPCODE_TXD] = 0,
@@ -1433,7 +1518,7 @@
       return;
 
    nir_ssa_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
-   for (i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++) {
+   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
       src[i] = ttn_get_src(c, &tgsi_inst->Src[i]);
    }
    nir_alu_dest dest = ttn_get_dest(c, tgsi_dst);
@@ -1625,7 +1710,6 @@
    }
 
    if (tgsi_inst->Instruction.Saturate) {
-      assert(tgsi_inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
       assert(!dest.dest.is_ssa);
       ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest)));
    }
@@ -1670,10 +1754,11 @@
       for (i = 0; i < array_len; i++) {
          nir_intrinsic_instr *store =
             nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+         unsigned loc = var->data.driver_location + i;
          store->num_components = 4;
-         store->const_index[0] = var->data.driver_location + i;
-         store->const_index[1] = 1;
-         store->src[0].reg.reg = c->output_regs[var->data.driver_location].reg;
+         store->const_index[0] = loc;
+         store->src[0].reg.reg = c->output_regs[loc].reg;
+         store->src[0].reg.base_offset = c->output_regs[loc].offset;
          nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
       }
    }
@@ -1713,6 +1798,9 @@
    c->imm_defs = rzalloc_array(c, nir_ssa_def *,
                                scan.file_max[TGSI_FILE_IMMEDIATE] + 1);
 
+   c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
+   c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);
+
    c->if_stack = rzalloc_array(c, struct exec_list *,
                                (scan.opcode_count[TGSI_OPCODE_IF] +
                                 scan.opcode_count[TGSI_OPCODE_UIF]) * 2);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/os/os_memory_aligned.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/os/os_memory_aligned.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/os/os_memory_aligned.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/os/os_memory_aligned.h	2015-09-16 14:36:08.000000000 +0000
@@ -55,7 +55,7 @@
 /**
  * Return memory on given byte alignment
  */
-static INLINE void *
+static inline void *
 os_malloc_aligned(size_t size, size_t alignment)
 {
    char *ptr, *buf;
@@ -87,7 +87,7 @@
 /**
  * Free memory returned by align_malloc().
  */
-static INLINE void
+static inline void
 os_free_aligned(void *ptr)
 {
    if (ptr) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/os/os_memory_stdc.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/os/os_memory_stdc.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/os/os_memory_stdc.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/os/os_memory_stdc.h	2015-09-16 14:36:08.000000000 +0000
@@ -50,7 +50,7 @@
 
 #if defined(HAVE_POSIX_MEMALIGN)
 
-static INLINE void *
+static inline void *
 os_malloc_aligned(size_t size, size_t alignment)
 {
    void *ptr;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/os/os_mman.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/os/os_mman.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/os/os_mman.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/os/os_mman.h	2015-09-16 14:36:08.000000000 +0000
@@ -58,7 +58,7 @@
 
 extern void *__mmap2(void *, size_t, int, int, int, size_t);
 
-static INLINE void *os_mmap(void *addr, size_t length, int prot, int flags,
+static inline void *os_mmap(void *addr, size_t length, int prot, int flags,
                             int fd, loff_t offset)
 {
    /* offset must be aligned to 4096 (not necessarily the page size) */
@@ -78,7 +78,7 @@
 #  define os_mmap(addr, length, prot, flags, fd, offset) \
              mmap(addr, length, prot, flags, fd, offset)
 
-static INLINE int os_munmap(void *addr, size_t length)
+static inline int os_munmap(void *addr, size_t length)
 {
    /* Copied from configure code generated by AC_SYS_LARGEFILE */
 #define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/os/os_thread.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/os/os_thread.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/os/os_thread.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/os/os_thread.h	2015-09-16 14:36:08.000000000 +0000
@@ -54,7 +54,7 @@
 #define PIPE_THREAD_ROUTINE( name, param ) \
    int name( void *param )
 
-static INLINE pipe_thread pipe_thread_create( PIPE_THREAD_ROUTINE((*routine), ), void *param )
+static inline pipe_thread pipe_thread_create( PIPE_THREAD_ROUTINE((*routine), ), void *param )
 {
    pipe_thread thread;
 #ifdef HAVE_PTHREAD
@@ -75,17 +75,17 @@
    return thread;
 }
 
-static INLINE int pipe_thread_wait( pipe_thread thread )
+static inline int pipe_thread_wait( pipe_thread thread )
 {
    return thrd_join( thread, NULL );
 }
 
-static INLINE int pipe_thread_destroy( pipe_thread thread )
+static inline int pipe_thread_destroy( pipe_thread thread )
 {
    return thrd_detach( thread );
 }
 
-static INLINE void pipe_thread_setname( const char *name )
+static inline void pipe_thread_setname( const char *name )
 {
 #if defined(HAVE_PTHREAD)
 #  if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
@@ -145,17 +145,17 @@
 
 typedef pthread_barrier_t pipe_barrier;
 
-static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
+static inline void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
 {
    pthread_barrier_init(barrier, NULL, count);
 }
 
-static INLINE void pipe_barrier_destroy(pipe_barrier *barrier)
+static inline void pipe_barrier_destroy(pipe_barrier *barrier)
 {
    pthread_barrier_destroy(barrier);
 }
 
-static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
+static inline void pipe_barrier_wait(pipe_barrier *barrier)
 {
    pthread_barrier_wait(barrier);
 }
@@ -171,7 +171,7 @@
    pipe_condvar condvar;
 } pipe_barrier;
 
-static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
+static inline void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
 {
    barrier->count = count;
    barrier->waiters = 0;
@@ -180,14 +180,14 @@
    pipe_condvar_init(barrier->condvar);
 }
 
-static INLINE void pipe_barrier_destroy(pipe_barrier *barrier)
+static inline void pipe_barrier_destroy(pipe_barrier *barrier)
 {
    assert(barrier->waiters == 0);
    pipe_mutex_destroy(barrier->mutex);
    pipe_condvar_destroy(barrier->condvar);
 }
 
-static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
+static inline void pipe_barrier_wait(pipe_barrier *barrier)
 {
    pipe_mutex_lock(barrier->mutex);
 
@@ -225,7 +225,7 @@
 } pipe_semaphore;
 
 
-static INLINE void
+static inline void
 pipe_semaphore_init(pipe_semaphore *sema, int init_val)
 {
    pipe_mutex_init(sema->mutex);
@@ -233,7 +233,7 @@
    sema->counter = init_val;
 }
 
-static INLINE void
+static inline void
 pipe_semaphore_destroy(pipe_semaphore *sema)
 {
    pipe_mutex_destroy(sema->mutex);
@@ -241,7 +241,7 @@
 }
 
 /** Signal/increment semaphore counter */
-static INLINE void
+static inline void
 pipe_semaphore_signal(pipe_semaphore *sema)
 {
    pipe_mutex_lock(sema->mutex);
@@ -251,7 +251,7 @@
 }
 
 /** Wait for semaphore counter to be greater than zero */
-static INLINE void
+static inline void
 pipe_semaphore_wait(pipe_semaphore *sema)
 {
    pipe_mutex_lock(sema->mutex);
@@ -277,7 +277,7 @@
 #define PIPE_TSD_INIT_MAGIC 0xff8adc98
 
 
-static INLINE void
+static inline void
 pipe_tsd_init(pipe_tsd *tsd)
 {
    if (tss_create(&tsd->key, NULL/*free*/) != 0) {
@@ -286,7 +286,7 @@
    tsd->initMagic = PIPE_TSD_INIT_MAGIC;
 }
 
-static INLINE void *
+static inline void *
 pipe_tsd_get(pipe_tsd *tsd)
 {
    if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) {
@@ -295,7 +295,7 @@
    return tss_get(tsd->key);
 }
 
-static INLINE void
+static inline void
 pipe_tsd_set(pipe_tsd *tsd, void *value)
 {
    if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/os/os_time.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/os/os_time.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/os/os_time.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/os/os_time.c	2015-09-16 14:36:08.000000000 +0000
@@ -33,11 +33,13 @@
  */
 
 
-#include "pipe/p_config.h"
+#include "pipe/p_defines.h"
+#include "util/u_atomic.h"
 
 #if defined(PIPE_OS_UNIX)
 #  include <time.h> /* timeval */
 #  include <sys/time.h> /* timeval */
+#  include <sched.h> /* sched_yield */
 #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
 #  include <windows.h>
 #else
@@ -92,3 +94,78 @@
 }
 
 #endif
+
+
+int64_t
+os_time_get_absolute_timeout(uint64_t timeout)
+{
+   int64_t time, abs_timeout;
+
+   /* Also check for the type upper bound. */
+   if (timeout == PIPE_TIMEOUT_INFINITE || timeout > INT64_MAX)
+      return PIPE_TIMEOUT_INFINITE;
+
+   time = os_time_get_nano();
+   abs_timeout = time + (int64_t)timeout;
+
+   /* Check for overflow. */
+   if (abs_timeout < time)
+      return PIPE_TIMEOUT_INFINITE;
+
+   return abs_timeout;
+}
+
+
+bool
+os_wait_until_zero(volatile int *var, uint64_t timeout)
+{
+   if (!p_atomic_read(var))
+      return true;
+
+   if (!timeout)
+      return false;
+
+   if (timeout == PIPE_TIMEOUT_INFINITE) {
+      while (p_atomic_read(var)) {
+#if defined(PIPE_OS_UNIX)
+         sched_yield();
+#endif
+      }
+      return true;
+   }
+   else {
+      int64_t start_time = os_time_get_nano();
+      int64_t end_time = start_time + timeout;
+
+      while (p_atomic_read(var)) {
+         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
+            return false;
+
+#if defined(PIPE_OS_UNIX)
+         sched_yield();
+#endif
+      }
+      return true;
+   }
+}
+
+
+bool
+os_wait_until_zero_abs_timeout(volatile int *var, int64_t timeout)
+{
+   if (!p_atomic_read(var))
+      return true;
+
+   if (timeout == PIPE_TIMEOUT_INFINITE)
+      return os_wait_until_zero(var, PIPE_TIMEOUT_INFINITE);
+
+   while (p_atomic_read(var)) {
+      if (os_time_get_nano() >= timeout)
+         return false;
+
+#if defined(PIPE_OS_UNIX)
+      sched_yield();
+#endif
+   }
+   return true;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/os/os_time.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/os/os_time.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/os/os_time.h	2013-05-08 13:13:18.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/os/os_time.h	2015-09-16 14:36:08.000000000 +0000
@@ -45,7 +45,7 @@
 #include "pipe/p_compiler.h"
 
 
-#ifdef	__cplusplus
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -60,9 +60,10 @@
 /*
  * Get the current time in microseconds from an unknown base.
  */
-static INLINE int64_t
-os_time_get(void) {
-    return os_time_get_nano() / 1000;
+static inline int64_t
+os_time_get(void)
+{
+   return os_time_get_nano() / 1000;
 }
 
 
@@ -82,19 +83,56 @@
  *
  * Returns true if the current time has elapsed beyond the specified interval.
  */
-static INLINE boolean
+static inline boolean
 os_time_timeout(int64_t start,
                 int64_t end,
                 int64_t curr)
 {
-   if(start <= end)
+   if (start <= end)
       return !(start <= curr && curr < end);
    else
       return !((start <= curr) || (curr < end));
 }
 
 
-#ifdef	__cplusplus
+/**
+ * Convert a relative timeout in nanoseconds into an absolute timeout,
+ * in other words, it returns current time + timeout.
+ * os_time_get_nano() must be monotonic.
+ * PIPE_TIMEOUT_INFINITE is passed through unchanged. If the calculation
+ * overflows, PIPE_TIMEOUT_INFINITE is returned.
+ */
+int64_t
+os_time_get_absolute_timeout(uint64_t timeout);
+
+
+/**
+ * Wait until the variable at the given memory location is zero.
+ *
+ * \param var           variable
+ * \param timeout       timeout in ns, can be anything from 0 (no wait) to
+ *                      PIPE_TIME_INFINITE (wait forever)
+ * \return     true if the variable is zero
+ */
+bool
+os_wait_until_zero(volatile int *var, uint64_t timeout);
+
+
+/**
+ * Wait until the variable at the given memory location is zero.
+ * The timeout is the absolute time when the waiting should stop. If it is
+ * less than or equal to the current time, it only returns the status and
+ * doesn't wait. PIPE_TIME_INFINITE waits forever. This requires that
+ * os_time_get_nano is monotonic.
+ *
+ * \param var       variable
+ * \param timeout   the time in ns when the waiting should stop
+ * \return     true if the variable is zero
+ */
+bool
+os_wait_until_zero_abs_timeout(volatile int *var, int64_t timeout);
+
+#ifdef __cplusplus
 }
 #endif
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c	2015-09-16 14:36:09.000000000 +0000
@@ -149,7 +149,7 @@
 };
 
 
-static INLINE struct fenced_manager *
+static inline struct fenced_manager *
 fenced_manager(struct pb_manager *mgr)
 {
    assert(mgr);
@@ -157,7 +157,7 @@
 }
 
 
-static INLINE struct fenced_buffer *
+static inline struct fenced_buffer *
 fenced_buffer(struct pb_buffer *buf)
 {
    assert(buf);
@@ -240,7 +240,7 @@
 }
 
 
-static INLINE void
+static inline void
 fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr,
                              struct fenced_buffer *fenced_buf)
 {
@@ -265,7 +265,7 @@
  *
  * Reference count should be incremented before calling this function.
  */
-static INLINE void
+static inline void
 fenced_buffer_add_locked(struct fenced_manager *fenced_mgr,
                          struct fenced_buffer *fenced_buf)
 {
@@ -289,7 +289,7 @@
  *
  * Returns TRUE if the buffer was detroyed.
  */
-static INLINE boolean
+static inline boolean
 fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr,
                             struct fenced_buffer *fenced_buf)
 {
@@ -326,7 +326,7 @@
  * This function will release and re-acquire the mutex, so any copy of mutable
  * state must be discarded after calling it.
  */
-static INLINE enum pipe_error
+static inline enum pipe_error
 fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr,
                             struct fenced_buffer *fenced_buf)
 {
@@ -376,6 +376,7 @@
          /* TODO: remove consequents buffers with the same fence? */
 
          assert(!destroyed);
+         (void) destroyed; /* silence unused var warning for non-debug build */
 
          fenced_buf->flags &= ~PB_USAGE_GPU_READ_WRITE;
 
@@ -549,7 +550,7 @@
  * This function is a shorthand around pb_manager::create_buffer for
  * fenced_buffer_create_gpu_storage_locked()'s benefit.
  */
-static INLINE boolean
+static inline boolean
 fenced_buffer_try_create_gpu_storage_locked(struct fenced_manager *fenced_mgr,
                                             struct fenced_buffer *fenced_buf)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_buffer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_buffer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_buffer.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_buffer.h	2015-09-16 14:36:09.000000000 +0000
@@ -158,7 +158,7 @@
 
 /* Accessor functions for pb->vtbl:
  */
-static INLINE void *
+static inline void *
 pb_map(struct pb_buffer *buf, 
        unsigned flags, void *flush_ctx)
 {
@@ -170,7 +170,7 @@
 }
 
 
-static INLINE void 
+static inline void 
 pb_unmap(struct pb_buffer *buf)
 {
    assert(buf);
@@ -181,7 +181,7 @@
 }
 
 
-static INLINE void
+static inline void
 pb_get_base_buffer( struct pb_buffer *buf,
 		    struct pb_buffer **base_buf,
 		    pb_size *offset )
@@ -200,7 +200,7 @@
 }
 
 
-static INLINE enum pipe_error 
+static inline enum pipe_error 
 pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags)
 {
    assert(buf);
@@ -211,7 +211,7 @@
 }
 
 
-static INLINE void 
+static inline void 
 pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence)
 {
    assert(buf);
@@ -222,7 +222,7 @@
 }
 
 
-static INLINE void 
+static inline void 
 pb_destroy(struct pb_buffer *buf)
 {
    assert(buf);
@@ -232,7 +232,7 @@
    buf->vtbl->destroy(buf);
 }
 
-static INLINE void
+static inline void
 pb_reference(struct pb_buffer **dst,
              struct pb_buffer *src)
 {
@@ -248,7 +248,7 @@
  * Utility function to check whether the provided alignment is consistent with
  * the requested or not.
  */
-static INLINE boolean
+static inline boolean
 pb_check_alignment(pb_size requested, pb_size provided)
 {
    if(!requested)
@@ -265,7 +265,7 @@
  * Utility function to check whether the provided alignment is consistent with
  * the requested or not.
  */
-static INLINE boolean
+static inline boolean
 pb_check_usage(unsigned requested, unsigned provided)
 {
    return (requested & provided) == requested ? TRUE : FALSE;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c	2015-09-16 14:36:09.000000000 +0000
@@ -49,7 +49,7 @@
 
 extern const struct pb_vtbl malloc_buffer_vtbl;
 
-static INLINE struct malloc_buffer *
+static inline struct malloc_buffer *
 malloc_buffer(struct pb_buffer *buf)
 {
    assert(buf);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c	2015-09-16 14:36:09.000000000 +0000
@@ -50,7 +50,7 @@
 };
 
 
-static INLINE struct pb_alt_manager *
+static inline struct pb_alt_manager *
 pb_alt_manager(struct pb_manager *mgr)
 {
    assert(mgr);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c	2015-09-16 14:36:09.000000000 +0000
@@ -88,7 +88,7 @@
 };
 
 
-static INLINE struct pb_cache_buffer *
+static inline struct pb_cache_buffer *
 pb_cache_buffer(struct pb_buffer *buf)
 {
    assert(buf);
@@ -96,7 +96,7 @@
 }
 
 
-static INLINE struct pb_cache_manager *
+static inline struct pb_cache_manager *
 pb_cache_manager(struct pb_manager *mgr)
 {
    assert(mgr);
@@ -104,18 +104,42 @@
 }
 
 
+static void
+_pb_cache_manager_remove_buffer_locked(struct pb_cache_buffer *buf)
+{
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (buf->head.next) {
+      LIST_DEL(&buf->head);
+      assert(mgr->numDelayed);
+      --mgr->numDelayed;
+      mgr->cache_size -= buf->base.size;
+   }
+   buf->mgr = NULL;
+}
+
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *pb_buf)
+{
+   struct pb_cache_buffer *buf = (struct pb_cache_buffer*)pb_buf;
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (!mgr)
+      return;
+
+   pipe_mutex_lock(mgr->mutex);
+   _pb_cache_manager_remove_buffer_locked(buf);
+   pipe_mutex_unlock(mgr->mutex);
+}
+
 /**
  * Actually destroy the buffer.
  */
-static INLINE void
+static inline void
 _pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
 {
-   struct pb_cache_manager *mgr = buf->mgr;
-
-   LIST_DEL(&buf->head);
-   assert(mgr->numDelayed);
-   --mgr->numDelayed;
-   mgr->cache_size -= buf->base.size;
+   if (buf->mgr)
+      _pb_cache_manager_remove_buffer_locked(buf);
    assert(!pipe_is_referenced(&buf->base.reference));
    pb_reference(&buf->buffer, NULL);
    FREE(buf);
@@ -156,6 +180,12 @@
    struct pb_cache_buffer *buf = pb_cache_buffer(_buf);   
    struct pb_cache_manager *mgr = buf->mgr;
 
+   if (!mgr) {
+      pb_reference(&buf->buffer, NULL);
+      FREE(buf);
+      return;
+   }
+
    pipe_mutex_lock(mgr->mutex);
    assert(!pipe_is_referenced(&buf->base.reference));
    
@@ -235,7 +265,7 @@
 };
 
 
-static INLINE int
+static inline int
 pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,  
                           pb_size size,
                           const struct pb_desc *desc)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c	2015-09-16 14:36:09.000000000 +0000
@@ -99,7 +99,7 @@
 };
 
 
-static INLINE struct pb_debug_buffer *
+static inline struct pb_debug_buffer *
 pb_debug_buffer(struct pb_buffer *buf)
 {
    assert(buf);
@@ -107,7 +107,7 @@
 }
 
 
-static INLINE struct pb_debug_manager *
+static inline struct pb_debug_manager *
 pb_debug_manager(struct pb_manager *mgr)
 {
    assert(mgr);
@@ -123,7 +123,7 @@
 };
 
 
-static INLINE void 
+static inline void 
 fill_random_pattern(uint8_t *dst, pb_size size)
 {
    pb_size i = 0;
@@ -134,7 +134,7 @@
 }
 
 
-static INLINE boolean 
+static inline boolean 
 check_random_pattern(const uint8_t *dst, pb_size size, 
                      pb_size *min_ofs, pb_size *max_ofs) 
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h	2015-09-16 14:36:09.000000000 +0000
@@ -166,6 +166,11 @@
                         unsigned bypass_usage,
                         uint64_t maximum_cache_size);
 
+/**
+ * Remove a buffer from the cache, but keep it alive.
+ */
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *buf);
 
 struct pb_fence_ops;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c	2015-09-16 14:36:09.000000000 +0000
@@ -65,7 +65,7 @@
 };
 
 
-static INLINE struct mm_pb_manager *
+static inline struct mm_pb_manager *
 mm_pb_manager(struct pb_manager *mgr)
 {
    assert(mgr);
@@ -83,7 +83,7 @@
 };
 
 
-static INLINE struct mm_buffer *
+static inline struct mm_buffer *
 mm_buffer(struct pb_buffer *buf)
 {
    assert(buf);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c	2015-09-16 14:36:09.000000000 +0000
@@ -70,7 +70,7 @@
 
 extern const struct pb_vtbl pb_ondemand_buffer_vtbl;
 
-static INLINE struct pb_ondemand_buffer *
+static inline struct pb_ondemand_buffer *
 pb_ondemand_buffer(struct pb_buffer *buf)
 {
    assert(buf);
@@ -80,7 +80,7 @@
    return (struct pb_ondemand_buffer *)buf;
 }
 
-static INLINE struct pb_ondemand_manager *
+static inline struct pb_ondemand_manager *
 pb_ondemand_manager(struct pb_manager *mgr)
 {
    assert(mgr);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c	2015-09-16 14:36:09.000000000 +0000
@@ -73,7 +73,7 @@
 };
 
 
-static INLINE struct pool_pb_manager *
+static inline struct pool_pb_manager *
 pool_pb_manager(struct pb_manager *mgr)
 {
    assert(mgr);
@@ -93,7 +93,7 @@
 };
 
 
-static INLINE struct pool_buffer *
+static inline struct pool_buffer *
 pool_buffer(struct pb_buffer *buf)
 {
    assert(buf);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c	2015-09-16 14:36:09.000000000 +0000
@@ -163,7 +163,7 @@
 };
 
 
-static INLINE struct pb_slab_buffer *
+static inline struct pb_slab_buffer *
 pb_slab_buffer(struct pb_buffer *buf)
 {
    assert(buf);
@@ -171,7 +171,7 @@
 }
 
 
-static INLINE struct pb_slab_manager *
+static inline struct pb_slab_manager *
 pb_slab_manager(struct pb_manager *mgr)
 {
    assert(mgr);
@@ -179,7 +179,7 @@
 }
 
 
-static INLINE struct pb_slab_range_manager *
+static inline struct pb_slab_range_manager *
 pb_slab_range_manager(struct pb_manager *mgr)
 {
    assert(mgr);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipe-loader/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipe-loader/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipe-loader/Makefile.am	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipe-loader/Makefile.am	2015-09-16 14:36:08.000000000 +0000
@@ -1,37 +1,28 @@
 include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
 
-AM_CPPFLAGS = $(DEFINES) \
-	$(VISIBILITY_CFLAGS) \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/src \
+# XXX: check if we need the gallium/winsys include
+AM_CFLAGS = \
 	-I$(top_srcdir)/src/loader \
-	-I$(top_srcdir)/src/gallium/include \
-	-I$(top_srcdir)/src/gallium/auxiliary \
-	-I$(top_srcdir)/src/gallium/winsys
+	-I$(top_srcdir)/src/gallium/winsys \
+	$(GALLIUM_PIPE_LOADER_DEFINES) \
+	$(GALLIUM_CFLAGS) \
+	$(VISIBILITY_CFLAGS)
 
 noinst_LTLIBRARIES = libpipe_loader.la
-noinst_LTLIBRARIES += libpipe_loader_client.la
+
+libpipe_loader_la_SOURCES = \
+	$(COMMON_SOURCES)
 
 if HAVE_DRM_LOADER_GALLIUM
-AM_CFLAGS = $(LIBDRM_CFLAGS)
+AM_CFLAGS += \
+	$(LIBDRM_CFLAGS)
 
-COMMON_SOURCES += $(DRM_SOURCES)
+libpipe_loader_la_SOURCES += \
+	$(DRM_SOURCES)
 
-COMMON_LIBADD = \
+libpipe_loader_la_LIBADD = \
 	$(top_builddir)/src/loader/libloader.la
 
 endif
 
-libpipe_loader_la_CFLAGS  = \
-	$(GALLIUM_PIPE_LOADER_DEFINES) \
-	$(AM_CFLAGS) $(AM_CPPFLAGS)
-libpipe_loader_la_SOURCES = $(COMMON_SOURCES)
-libpipe_loader_la_LIBADD  = $(COMMON_LIBADD) \
-	$(GALLIUM_PIPE_LOADER_LIBS)
-
-libpipe_loader_client_la_CFLAGS  = \
-	$(GALLIUM_PIPE_LOADER_CLIENT_DEFINES) \
-	$(AM_CFLAGS) $(AM_CPPFLAGS)
-libpipe_loader_client_la_SOURCES = $(COMMON_SOURCES)
-libpipe_loader_client_la_LIBADD  = $(COMMON_LIBADD) \
-	$(GALLIUM_PIPE_LOADER_CLIENT_LIBS)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c	2015-09-16 14:36:08.000000000 +0000
@@ -35,12 +35,6 @@
 #include <xf86drm.h>
 #include <unistd.h>
 
-#ifdef HAVE_PIPE_LOADER_XCB
-
-#include <xcb/dri2.h>
-
-#endif
-
 #include "loader.h"
 #include "state_tracker/drm_driver.h"
 #include "pipe_loader_priv.h"
@@ -64,78 +58,8 @@
 
 static struct pipe_loader_ops pipe_loader_drm_ops;
 
-#ifdef HAVE_PIPE_LOADER_XCB
-
-static xcb_screen_t *
-get_xcb_screen(xcb_screen_iterator_t iter, int screen)
-{
-    for (; iter.rem; --screen, xcb_screen_next(&iter))
-        if (screen == 0)
-            return iter.data;
-
-    return NULL;
-}
-
-#endif
-
-static void
-pipe_loader_drm_x_auth(int fd)
-{
-#ifdef HAVE_PIPE_LOADER_XCB
-   /* Try authenticate with the X server to give us access to devices that X
-    * is running on. */
-   xcb_connection_t *xcb_conn;
-   const xcb_setup_t *xcb_setup;
-   xcb_screen_iterator_t s;
-   xcb_dri2_connect_cookie_t connect_cookie;
-   xcb_dri2_connect_reply_t *connect;
-   drm_magic_t magic;
-   xcb_dri2_authenticate_cookie_t authenticate_cookie;
-   xcb_dri2_authenticate_reply_t *authenticate;
-   int screen;
-
-   xcb_conn = xcb_connect(NULL, &screen);
-
-   if(!xcb_conn)
-      return;
-
-   xcb_setup = xcb_get_setup(xcb_conn);
-
-  if (!xcb_setup)
-    goto disconnect;
-
-   s = xcb_setup_roots_iterator(xcb_setup);
-   connect_cookie = xcb_dri2_connect_unchecked(xcb_conn,
-                                               get_xcb_screen(s, screen)->root,
-                                               XCB_DRI2_DRIVER_TYPE_DRI);
-   connect = xcb_dri2_connect_reply(xcb_conn, connect_cookie, NULL);
-
-   if (!connect || connect->driver_name_length
-                   + connect->device_name_length == 0) {
-
-      goto disconnect;
-   }
-
-   if (drmGetMagic(fd, &magic))
-      goto disconnect;
-
-   authenticate_cookie = xcb_dri2_authenticate_unchecked(xcb_conn,
-                                                         s.data->root,
-                                                         magic);
-   authenticate = xcb_dri2_authenticate_reply(xcb_conn,
-                                              authenticate_cookie,
-                                              NULL);
-   FREE(authenticate);
-
-disconnect:
-   xcb_disconnect(xcb_conn);
-
-#endif
-}
-
 bool
-pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd,
-                         boolean auth_x)
+pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd)
 {
    struct pipe_loader_drm_device *ddev = CALLOC_STRUCT(pipe_loader_drm_device);
    int vendor_id, chip_id;
@@ -153,9 +77,6 @@
    ddev->base.ops = &pipe_loader_drm_ops;
    ddev->fd = fd;
 
-   if (auth_x)
-      pipe_loader_drm_x_auth(fd);
-
    ddev->base.driver_name = loader_get_driver_for_fd(fd, _LOADER_GALLIUM);
    if (!ddev->base.driver_name)
       goto fail;
@@ -169,34 +90,19 @@
 }
 
 static int
-open_drm_minor(int minor)
-{
-   char path[PATH_MAX];
-   snprintf(path, sizeof(path), DRM_DEV_NAME, DRM_DIR_NAME, minor);
-   return open(path, O_RDWR, 0);
-}
-
-static int
 open_drm_render_node_minor(int minor)
 {
    char path[PATH_MAX];
    snprintf(path, sizeof(path), DRM_RENDER_NODE_DEV_NAME_FORMAT, DRM_DIR_NAME,
             minor);
-   return open(path, O_RDWR, 0);
+   return loader_open_device(path);
 }
 
 int
 pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev)
 {
-   int i, k, fd, num_render_node_devs;
-   int j = 0;
+   int i, j, fd;
 
-   struct {
-      unsigned vendor_id;
-      unsigned chip_id;
-   } render_node_devs[DRM_RENDER_NODE_MAX_NODES];
-
-   /* Look for render nodes first */
    for (i = DRM_RENDER_NODE_MIN_MINOR, j = 0;
         i <= DRM_RENDER_NODE_MAX_MINOR; i++) {
       fd = open_drm_render_node_minor(i);
@@ -204,14 +110,11 @@
       if (fd < 0)
          continue;
 
-      if (!pipe_loader_drm_probe_fd(&dev, fd, false)) {
+      if (!pipe_loader_drm_probe_fd(&dev, fd)) {
          close(fd);
          continue;
       }
 
-      render_node_devs[j].vendor_id = dev->u.pci.vendor_id;
-      render_node_devs[j].chip_id = dev->u.pci.chip_id;
-
       if (j < ndev) {
          devs[j] = dev;
       } else {
@@ -221,46 +124,6 @@
       j++;
    }
 
-   num_render_node_devs = j;
-
-   /* Next look for drm devices. */
-   for (i = 0; i < DRM_MAX_MINOR; i++) {
-      struct pipe_loader_device *dev;
-      boolean duplicate = FALSE;
-      fd = open_drm_minor(i);
-      if (fd < 0)
-         continue;
-
-      if (!pipe_loader_drm_probe_fd(&dev, fd, true)) {
-         close(fd);
-         continue;
-      }
-
-      /* Check to make sure we aren't already accessing this device via
-       * render nodes.
-       */
-      for (k = 0; k < num_render_node_devs; k++) {
-         if (dev->u.pci.vendor_id == render_node_devs[k].vendor_id &&
-             dev->u.pci.chip_id == render_node_devs[k].chip_id) {
-            close(fd);
-            dev->ops->release(&dev);
-            duplicate = TRUE;
-            break;
-         }
-      }
-
-      if (duplicate)
-         continue;
-
-      if (j < ndev) {
-         devs[j] = dev;
-      } else {
-         dev->ops->release(&dev);
-      }
-
-      j++;
-   }
-
    return j;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipe-loader/pipe_loader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipe-loader/pipe_loader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipe-loader/pipe_loader.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipe-loader/pipe_loader.h	2015-09-16 14:36:08.000000000 +0000
@@ -36,10 +36,6 @@
 #include "pipe/p_compiler.h"
 #include "state_tracker/drm_driver.h"
 
-#ifdef HAVE_PIPE_LOADER_XLIB
-#include <X11/Xlib.h>
-#endif
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -116,21 +112,6 @@
 void
 pipe_loader_release(struct pipe_loader_device **devs, int ndev);
 
-#ifdef HAVE_PIPE_LOADER_XLIB
-
-/**
- * Initialize Xlib for an associated display.
- *
- * This function is platform-specific.
- *
- * \sa pipe_loader_probe
- */
-bool
-pipe_loader_sw_probe_xlib(struct pipe_loader_device **devs, Display *display);
-
-#endif
-
-
 #ifdef HAVE_PIPE_LOADER_DRI
 
 /**
@@ -195,13 +176,9 @@
  * This function is platform-specific.
  *
  * \sa pipe_loader_probe
- *
- * \param auth_x If true, the pipe-loader will attempt to
- *               authenticate with the X server.
  */
 bool
-pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd,
-                         boolean auth_x);
+pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd);
 
 #endif
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c	2015-09-16 14:36:08.000000000 +0000
@@ -32,10 +32,6 @@
 #include "sw/dri/dri_sw_winsys.h"
 #include "sw/null/null_sw_winsys.h"
 #include "sw/wrapper/wrapper_sw_winsys.h"
-#ifdef HAVE_PIPE_LOADER_XLIB
-/* Explicitly wrap the header to ease build without X11 headers */
-#include "sw/xlib/xlib_sw_winsys.h"
-#endif
 #include "target-helpers/inline_sw_helper.h"
 #include "state_tracker/drisw_api.h"
 
@@ -53,29 +49,6 @@
    null_sw_create
 };
 
-#ifdef HAVE_PIPE_LOADER_XLIB
-bool
-pipe_loader_sw_probe_xlib(struct pipe_loader_device **devs, Display *display)
-{
-   struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
-
-   if (!sdev)
-      return false;
-
-   sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
-   sdev->base.driver_name = "swrast";
-   sdev->base.ops = &pipe_loader_sw_ops;
-   sdev->ws = xlib_create_sw_winsys(display);
-   if (!sdev->ws) {
-      FREE(sdev);
-      return false;
-   }
-   *devs = &sdev->base;
-
-   return true;
-}
-#endif
-
 #ifdef HAVE_PIPE_LOADER_DRI
 bool
 pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/postprocess/pp_colors.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/postprocess/pp_colors.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/postprocess/pp_colors.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/postprocess/pp_colors.c	2015-09-16 14:36:09.000000000 +0000
@@ -37,6 +37,7 @@
 {
 
    struct pp_program *p = ppq->p;
+   const struct pipe_sampler_state *samplers[] = {&p->sampler_point};
 
    pp_filter_setup_in(p, in);
    pp_filter_setup_out(p, out);
@@ -44,8 +45,7 @@
    pp_filter_set_fb(p);
    pp_filter_misc_state(p);
 
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler_point);
-   cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+   cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 1, samplers);
    cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->view);
 
    cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][0]);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/postprocess/pp_mlaa.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/postprocess/pp_mlaa.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/postprocess/pp_mlaa.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/postprocess/pp_mlaa.c	2015-09-16 14:36:09.000000000 +0000
@@ -141,8 +141,10 @@
    p->pipe->clear(p->pipe, PIPE_CLEAR_STENCIL | PIPE_CLEAR_COLOR0,
                   &p->clear_color, 0, 0);
 
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler_point);
-   cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+   {
+      const struct pipe_sampler_state *samplers[] = {&p->sampler_point};
+      cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 1, samplers);
+   }
    cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->view);
 
    cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][1]);    /* offsetvs */
@@ -168,10 +170,11 @@
 
    pp_filter_set_clear_fb(p);
 
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler_point);
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->sampler_point);
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 2, &p->sampler);
-   cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+   {
+      const struct pipe_sampler_state *samplers[] =
+         {&p->sampler_point, &p->sampler_point, &p->sampler};
+      cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 3, samplers);
+   }
 
    arr[0] = p->view;
    cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 3, arr);
@@ -199,9 +202,11 @@
    u_sampler_view_default_template(&v_tmp, in, in->format);
    arr[0] = p->pipe->create_sampler_view(p->pipe, in, &v_tmp);
 
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler_point);
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->sampler_point);
-   cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+   {
+      const struct pipe_sampler_state *samplers[] =
+         {&p->sampler_point, &p->sampler_point};
+      cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 2, samplers);
+   }
 
    arr[1] = p->view;
    cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 2, arr);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/postprocess/pp_run.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/postprocess/pp_run.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/postprocess/pp_run.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/postprocess/pp_run.c	2015-09-16 14:36:09.000000000 +0000
@@ -125,8 +125,8 @@
    cso_save_rasterizer(cso);
    cso_save_sample_mask(cso);
    cso_save_min_samples(cso);
-   cso_save_samplers(cso, PIPE_SHADER_FRAGMENT);
-   cso_save_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+   cso_save_fragment_samplers(cso);
+   cso_save_fragment_sampler_views(cso);
    cso_save_stencil_ref(cso);
    cso_save_stream_outputs(cso);
    cso_save_vertex_elements(cso);
@@ -196,8 +196,8 @@
    cso_restore_rasterizer(cso);
    cso_restore_sample_mask(cso);
    cso_restore_min_samples(cso);
-   cso_restore_samplers(cso, PIPE_SHADER_FRAGMENT);
-   cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+   cso_restore_fragment_samplers(cso);
+   cso_restore_fragment_sampler_views(cso);
    cso_restore_stencil_ref(cso);
    cso_restore_stream_outputs(cso);
    cso_restore_vertex_elements(cso);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/rtasm/rtasm_execmem.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/rtasm/rtasm_execmem.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/rtasm/rtasm_execmem.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/rtasm/rtasm_execmem.c	2015-09-16 14:36:09.000000000 +0000
@@ -49,7 +49,7 @@
 #include <windows.h>
 #endif
 
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_CYGWIN)
+#if defined(PIPE_OS_UNIX)
 
 
 /*
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/rtasm/rtasm_x86sse.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/rtasm/rtasm_x86sse.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/rtasm/rtasm_x86sse.c	2015-09-16 14:36:09.000000000 +0000
@@ -510,7 +510,7 @@
 /**
  * Immediate group 1 instructions.
  */
-static INLINE void 
+static inline void 
 x86_group1_imm( struct x86_function *p, 
                 unsigned op, struct x86_reg dst, int imm )
 {
@@ -2196,7 +2196,7 @@
 }
 
 
-static INLINE x86_func
+static inline x86_func
 voidptr_to_x86_func(void *v)
 {
    union {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/rtasm/rtasm_x86sse.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/rtasm/rtasm_x86sse.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/rtasm/rtasm_x86sse.h	2015-09-16 14:36:09.000000000 +0000
@@ -136,7 +136,7 @@
 };
 
 /* make this read a member of x86_function if target != host is desired */
-static INLINE enum x86_target x86_target( struct x86_function* p )
+static inline enum x86_target x86_target( struct x86_function* p )
 {
 #ifdef PIPE_ARCH_X86
    return X86_32;
@@ -147,7 +147,7 @@
 #endif
 }
 
-static INLINE unsigned x86_target_caps( struct x86_function* p )
+static inline unsigned x86_target_caps( struct x86_function* p )
 {
    return p->caps;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/target-helpers/inline_debug_helper.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/target-helpers/inline_debug_helper.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/target-helpers/inline_debug_helper.h	2015-09-16 14:36:09.000000000 +0000
@@ -27,7 +27,7 @@
  * TODO: Audit the following *screen_create() - all of
  * them should return the original screen on failuire.
  */
-static INLINE struct pipe_screen *
+static inline struct pipe_screen *
 debug_screen_wrap(struct pipe_screen *screen)
 {
 #if defined(GALLIUM_RBUG)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/target-helpers/inline_drm_helper.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/target-helpers/inline_drm_helper.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/target-helpers/inline_drm_helper.h	2015-09-16 14:36:09.000000000 +0000
@@ -42,6 +42,7 @@
 #if GALLIUM_RADEONSI
 #include "radeon/radeon_winsys.h"
 #include "radeon/drm/radeon_drm_public.h"
+#include "amdgpu/drm/amdgpu_public.h"
 #include "radeonsi/si_public.h"
 #endif
 
@@ -228,7 +229,12 @@
 {
    struct radeon_winsys *rw;
 
-   rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
+   /* First, try amdgpu. */
+   rw = amdgpu_winsys_create(fd, radeonsi_screen_create);
+
+   if (!rw)
+      rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
+
    return rw ? debug_screen_wrap(rw->screen) : NULL;
 }
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/target-helpers/inline_sw_helper.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/target-helpers/inline_sw_helper.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/target-helpers/inline_sw_helper.h	2015-09-16 14:36:09.000000000 +0000
@@ -20,7 +20,7 @@
 #endif
 
 
-static INLINE struct pipe_screen *
+static inline struct pipe_screen *
 sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
 {
    struct pipe_screen *screen = NULL;
@@ -39,7 +39,7 @@
 }
 
 
-static INLINE struct pipe_screen *
+static inline struct pipe_screen *
 sw_screen_create(struct sw_winsys *winsys)
 {
    const char *default_driver;
@@ -71,7 +71,7 @@
    return galliumsw_driver_extensions;
 }
 
-INLINE struct pipe_screen *
+inline struct pipe_screen *
 drisw_create_screen(struct drisw_loader_funcs *lf)
 {
    struct sw_winsys *winsys = NULL;
@@ -98,7 +98,7 @@
 
 extern struct pipe_screen *ninesw_create_screen(struct pipe_screen *screen);
 
-INLINE struct pipe_screen *
+inline struct pipe_screen *
 ninesw_create_screen(struct pipe_screen *pscreen)
 {
    struct sw_winsys *winsys = NULL;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h	2012-05-02 13:56:27.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h	2015-09-16 14:36:09.000000000 +0000
@@ -9,7 +9,7 @@
  * Try to wrap a hw screen with a software screen.
  * On failure will return given screen.
  */
-static INLINE struct pipe_screen *
+static inline struct pipe_screen *
 sw_screen_wrap(struct pipe_screen *screen)
 {
 #if defined(GALLIUM_SOFTPIPE) || defined(GALLIUM_LLVMPIPE)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_build.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_build.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_build.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_build.c	2015-09-16 14:36:09.000000000 +0000
@@ -610,7 +610,7 @@
    instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
    instruction.NrTokens = 0;
    instruction.Opcode = TGSI_OPCODE_MOV;
-   instruction.Saturate = TGSI_SAT_NONE;
+   instruction.Saturate = 0;
    instruction.Predicate = 0;
    instruction.NumDstRegs = 1;
    instruction.NumSrcRegs = 1;
@@ -632,7 +632,7 @@
    struct tgsi_instruction instruction;
 
    assert (opcode <= TGSI_OPCODE_LAST);
-   assert (saturate <= TGSI_SAT_MINUS_PLUS_ONE);
+   assert (saturate <= 1);
    assert (num_dst_regs <= 3);
    assert (num_src_regs <= 15);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_dump.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_dump.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_dump.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_dump.c	2015-09-16 14:36:09.000000000 +0000
@@ -48,6 +48,7 @@
    int indent;
    
    uint indentation;
+   FILE *file;
 
    void (*dump_printf)(struct dump_ctx *ctx, const char *format, ...);
 };
@@ -58,7 +59,10 @@
    va_list ap;
    (void)ctx;
    va_start(ap, format);
-   _debug_vprintf(format, ap);
+   if (ctx->file)
+      vfprintf(ctx->file, format, ap);
+   else
+      _debug_vprintf(format, ap);
    va_end(ap);
 }
 
@@ -286,15 +290,15 @@
    if (decl->Declaration.File == TGSI_FILE_INPUT &&
        (iter->processor.Processor == TGSI_PROCESSOR_GEOMETRY ||
         (!patch &&
-         (iter->processor.Processor == TGSI_PROCESSOR_TESSCTRL ||
-          iter->processor.Processor == TGSI_PROCESSOR_TESSEVAL)))) {
+         (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL ||
+          iter->processor.Processor == TGSI_PROCESSOR_TESS_EVAL)))) {
       TXT("[]");
    }
 
    /* all non-patch tess ctrl shader outputs are two dimensional */
    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
        !patch &&
-       iter->processor.Processor == TGSI_PROCESSOR_TESSCTRL) {
+       iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL) {
       TXT("[]");
    }
 
@@ -539,17 +543,8 @@
 
    TXT( info->mnemonic );
 
-   switch (inst->Instruction.Saturate) {
-   case TGSI_SAT_NONE:
-      break;
-   case TGSI_SAT_ZERO_ONE:
+   if (inst->Instruction.Saturate) {
       TXT( "_SAT" );
-      break;
-   case TGSI_SAT_MINUS_PLUS_ONE:
-      TXT( "_SATNV" );
-      break;
-   default:
-      assert( 0 );
    }
 
    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
@@ -668,9 +663,7 @@
 }
 
 void
-tgsi_dump(
-   const struct tgsi_token *tokens,
-   uint flags )
+tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file)
 {
    struct dump_ctx ctx;
 
@@ -686,10 +679,17 @@
    ctx.indent = 0;
    ctx.dump_printf = dump_ctx_printf;
    ctx.indentation = 0;
+   ctx.file = file;
 
    tgsi_iterate_shader( tokens, &ctx.iter );
 }
 
+void
+tgsi_dump(const struct tgsi_token *tokens, uint flags)
+{
+   tgsi_dump_to_file(tokens, flags, NULL);
+}
+
 struct str_dump_ctx
 {
    struct dump_ctx base;
@@ -742,6 +742,7 @@
    ctx.base.indent = 0;
    ctx.base.dump_printf = &str_dump_ctx_printf;
    ctx.base.indentation = 0;
+   ctx.base.file = NULL;
 
    ctx.str = str;
    ctx.str[0] = 0;
@@ -765,6 +766,7 @@
    ctx.base.indent = 0;
    ctx.base.dump_printf = &str_dump_ctx_printf;
    ctx.base.indentation = 0;
+   ctx.base.file = NULL;
 
    ctx.str = str;
    ctx.str[0] = 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_dump.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_dump.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_dump.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_dump.h	2015-09-16 14:36:09.000000000 +0000
@@ -32,6 +32,8 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
 
+#include <stdio.h>
+
 #if defined __cplusplus
 extern "C" {
 #endif
@@ -44,6 +46,9 @@
    size_t size);
 
 void
+tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file);
+
+void
 tgsi_dump(
    const struct tgsi_token *tokens,
    uint flags );
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_exec.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_exec.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_exec.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_exec.c	2015-09-16 14:36:09.000000000 +0000
@@ -735,7 +735,7 @@
  * not lead to crashes, etc.  But when debugging, it's helpful to catch
  * them.
  */
-static INLINE void
+static inline void
 check_inf_or_nan(const union tgsi_exec_channel *chan)
 {
    assert(!util_is_inf_or_nan((chan)->f[0]));
@@ -1765,14 +1765,12 @@
    if (!dst)
       return;
 
-   switch (inst->Instruction.Saturate) {
-   case TGSI_SAT_NONE:
+   if (!inst->Instruction.Saturate) {
       for (i = 0; i < TGSI_QUAD_SIZE; i++)
          if (execmask & (1 << i))
             dst->i[i] = chan->i[i];
-      break;
-
-   case TGSI_SAT_ZERO_ONE:
+   }
+   else {
       for (i = 0; i < TGSI_QUAD_SIZE; i++)
          if (execmask & (1 << i)) {
             if (chan->f[i] < 0.0f)
@@ -1782,22 +1780,6 @@
             else
                dst->i[i] = chan->i[i];
          }
-      break;
-
-   case TGSI_SAT_MINUS_PLUS_ONE:
-      for (i = 0; i < TGSI_QUAD_SIZE; i++)
-         if (execmask & (1 << i)) {
-            if (chan->f[i] < -1.0f)
-               dst->f[i] = -1.0f;
-            else if (chan->f[i] > 1.0f)
-               dst->f[i] = 1.0f;
-            else
-               dst->i[i] = chan->i[i];
-         }
-      break;
-
-   default:
-      assert( 0 );
    }
 }
 
@@ -1952,7 +1934,7 @@
 #define TEX_MODIFIER_LOD_BIAS       2
 #define TEX_MODIFIER_EXPLICIT_LOD   3
 #define TEX_MODIFIER_LEVEL_ZERO     4
-
+#define TEX_MODIFIER_GATHER         5
 
 /*
  * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
@@ -2006,6 +1988,35 @@
    derivs[1][3] = d.f[3];
 }
 
+static uint
+fetch_sampler_unit(struct tgsi_exec_machine *mach,
+                   const struct tgsi_full_instruction *inst,
+                   uint sampler)
+{
+   uint unit;
+
+   if (inst->Src[sampler].Register.Indirect) {
+      const struct tgsi_full_src_register *reg = &inst->Src[sampler];
+      union tgsi_exec_channel indir_index, index2;
+
+      index2.i[0] =
+      index2.i[1] =
+      index2.i[2] =
+      index2.i[3] = reg->Indirect.Index;
+
+      fetch_src_file_channel(mach,
+                             0,
+                             reg->Indirect.File,
+                             reg->Indirect.Swizzle,
+                             &index2,
+                             &ZeroVec,
+                             &indir_index);
+      unit = inst->Src[sampler].Register.Index + indir_index.i[0];
+   } else {
+      unit = inst->Src[sampler].Register.Index;
+   }
+   return unit;
+}
 
 /*
  * execute a texture instruction.
@@ -2019,14 +2030,15 @@
          const struct tgsi_full_instruction *inst,
          uint modifier, uint sampler)
 {
-   const uint unit = inst->Src[sampler].Register.Index;
    const union tgsi_exec_channel *args[5], *proj = NULL;
    union tgsi_exec_channel r[5];
    enum tgsi_sampler_control control =  tgsi_sampler_lod_none;
    uint chan;
+   uint unit;
    int8_t offsets[3];
    int dim, shadow_ref, i;
 
+   unit = fetch_sampler_unit(mach, inst, sampler);
    /* always fetch all 3 offsets, overkill but keeps code simple */
    fetch_texel_offsets(mach, inst, offsets);
 
@@ -2069,6 +2081,8 @@
          control = tgsi_sampler_lod_explicit;
       else if (modifier == TEX_MODIFIER_LOD_BIAS)
          control = tgsi_sampler_lod_bias;
+      else if (modifier == TEX_MODIFIER_GATHER)
+         control = tgsi_sampler_gather;
    }
    else {
       for (i = dim; i < Elements(args); i++)
@@ -2123,12 +2137,13 @@
 exec_txd(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst)
 {
-   const uint unit = inst->Src[3].Register.Index;
    union tgsi_exec_channel r[4];
    float derivs[3][2][TGSI_QUAD_SIZE];
    uint chan;
+   uint unit;
    int8_t offsets[3];
 
+   unit = fetch_sampler_unit(mach, inst, 3);
    /* always fetch all 3 offsets, overkill but keeps code simple */
    fetch_texel_offsets(mach, inst, offsets);
 
@@ -2230,14 +2245,15 @@
 exec_txf(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst)
 {
-   const uint unit = inst->Src[1].Register.Index;
    union tgsi_exec_channel r[4];
    uint chan;
+   uint unit;
    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
    int j;
    int8_t offsets[3];
    unsigned target;
 
+   unit = fetch_sampler_unit(mach, inst, 1);
    /* always fetch all 3 offsets, overkill but keeps code simple */
    fetch_texel_offsets(mach, inst, offsets);
 
@@ -2312,12 +2328,14 @@
 exec_txq(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst)
 {
-   const uint unit = inst->Src[1].Register.Index;
    int result[4];
    union tgsi_exec_channel r[4], src;
    uint chan;
+   uint unit;
    int i,j;
 
+   unit = fetch_sampler_unit(mach, inst, 1);
+
    fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
 
    /* XXX: This interface can't return per-pixel values */
@@ -3315,16 +3333,14 @@
    union tgsi_double_channel temp;
    const uint execmask = mach->ExecMask;
 
-   switch (inst->Instruction.Saturate) {
-   case TGSI_SAT_NONE:
+   if (!inst->Instruction.Saturate) {
       for (i = 0; i < TGSI_QUAD_SIZE; i++)
          if (execmask & (1 << i)) {
             dst[0].u[i] = chan->u[i][0];
             dst[1].u[i] = chan->u[i][1];
          }
-      break;
-
-   case TGSI_SAT_ZERO_ONE:
+   }
+   else {
       for (i = 0; i < TGSI_QUAD_SIZE; i++)
          if (execmask & (1 << i)) {
             if (chan->d[i] < 0.0)
@@ -3337,25 +3353,6 @@
             dst[0].u[i] = temp.u[i][0];
             dst[1].u[i] = temp.u[i][1];
          }
-      break;
-
-   case TGSI_SAT_MINUS_PLUS_ONE:
-      for (i = 0; i < TGSI_QUAD_SIZE; i++)
-         if (execmask & (1 << i)) {
-            if (chan->d[i] < -1.0)
-               temp.d[i] = -1.0;
-            else if (chan->d[i] > 1.0)
-               temp.d[i] = 1.0;
-            else
-               temp.d[i] = chan->d[i];
-
-            dst[0].u[i] = temp.u[i][0];
-            dst[1].u[i] = temp.u[i][1];
-         }
-      break;
-
-   default:
-      assert( 0 );
    }
 
    store_dest_double(mach, &dst[0], reg, inst, chan_0, TGSI_EXEC_DATA_UINT);
@@ -4374,6 +4371,13 @@
       exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
       break;
 
+   case TGSI_OPCODE_TG4:
+      /* src[0] = texcoord */
+      /* src[1] = component */
+      /* src[2] = sampler unit */
+      exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
+      break;
+
    case TGSI_OPCODE_UP2H:
       assert (0);
       break;
@@ -4431,8 +4435,12 @@
          mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
          mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
 
-         /* Finally, jump to the subroutine */
+         /* Finally, jump to the subroutine.  The label is a pointer
+          * (an instruction number) to the BGNSUB instruction.
+          */
          *pc = inst->Label.Label;
+         assert(mach->Instructions[*pc].Instruction.Opcode
+                == TGSI_OPCODE_BGNSUB);
       }
       break;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_exec.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_exec.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_exec.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_exec.h	2015-09-16 14:36:09.000000000 +0000
@@ -93,7 +93,8 @@
    tgsi_sampler_lod_bias,
    tgsi_sampler_lod_explicit,
    tgsi_sampler_lod_zero,
-   tgsi_sampler_derivs_explicit
+   tgsi_sampler_derivs_explicit,
+   tgsi_sampler_gather,
 };
 
 /**
@@ -212,7 +213,7 @@
  * input register files, this is the stride between two 1D
  * arrays.
  */
-#define TGSI_EXEC_MAX_INPUT_ATTRIBS PIPE_MAX_SHADER_INPUTS
+#define TGSI_EXEC_MAX_INPUT_ATTRIBS 32
 
 /* The maximum number of bytes per constant buffer.
  */
@@ -385,7 +386,7 @@
 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst);
 
 
-static INLINE void
+static inline void
 tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
 {
    mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] =
@@ -394,7 +395,7 @@
 
 
 /** Set execution mask values prior to executing the shader */
-static INLINE void
+static inline void
 tgsi_set_exec_mask(struct tgsi_exec_machine *mach,
                    boolean ch0, boolean ch1, boolean ch2, boolean ch3)
 {
@@ -413,7 +414,7 @@
                                const unsigned *buf_sizes);
 
 
-static INLINE int
+static inline int
 tgsi_exec_get_shader_param(enum pipe_shader_cap param)
 {
    switch(param) {
@@ -457,6 +458,7 @@
       return 1;
    case PIPE_SHADER_CAP_DOUBLES:
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       return 1;
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_info.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_info.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_info.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_info.c	2015-09-16 14:36:09.000000000 +0000
@@ -302,9 +302,9 @@
       return "fragment shader";
    case TGSI_PROCESSOR_GEOMETRY:
       return "geometry shader";
-   case TGSI_PROCESSOR_TESSCTRL:
+   case TGSI_PROCESSOR_TESS_CTRL:
       return "tessellation control shader";
-   case TGSI_PROCESSOR_TESSEVAL:
+   case TGSI_PROCESSOR_TESS_EVAL:
       return "tessellation evaluation shader";
    default:
       return "unknown shader type!";
@@ -316,7 +316,7 @@
  *
  * MOV and UCMP is special so return VOID
  */
-static INLINE enum tgsi_opcode_type
+static inline enum tgsi_opcode_type
 tgsi_opcode_infer_type( uint opcode )
 {
    switch (opcode) {
@@ -374,7 +374,34 @@
    case TGSI_OPCODE_IMUL_HI:
    case TGSI_OPCODE_IBFE:
    case TGSI_OPCODE_IMSB:
+   case TGSI_OPCODE_DSEQ:
+   case TGSI_OPCODE_DSGE:
+   case TGSI_OPCODE_DSLT:
+   case TGSI_OPCODE_DSNE:
       return TGSI_TYPE_SIGNED;
+   case TGSI_OPCODE_DADD:
+   case TGSI_OPCODE_DABS:
+   case TGSI_OPCODE_DFMA:
+   case TGSI_OPCODE_DNEG:
+   case TGSI_OPCODE_DMUL:
+   case TGSI_OPCODE_DMAX:
+   case TGSI_OPCODE_DMIN:
+   case TGSI_OPCODE_DRCP:
+   case TGSI_OPCODE_DSQRT:
+   case TGSI_OPCODE_DMAD:
+   case TGSI_OPCODE_DLDEXP:
+   case TGSI_OPCODE_DFRACEXP:
+   case TGSI_OPCODE_DFRAC:
+   case TGSI_OPCODE_DRSQ:
+   case TGSI_OPCODE_DTRUNC:
+   case TGSI_OPCODE_DCEIL:
+   case TGSI_OPCODE_DFLR:
+   case TGSI_OPCODE_DROUND:
+   case TGSI_OPCODE_DSSG:
+   case TGSI_OPCODE_F2D:
+   case TGSI_OPCODE_I2D:
+   case TGSI_OPCODE_U2D:
+      return TGSI_TYPE_DOUBLE;
    default:
       return TGSI_TYPE_FLOAT;
    }
@@ -391,6 +418,7 @@
    case TGSI_OPCODE_TXF:
    case TGSI_OPCODE_BREAKC:
    case TGSI_OPCODE_U2F:
+   case TGSI_OPCODE_U2D:
    case TGSI_OPCODE_UADD:
    case TGSI_OPCODE_SWITCH:
    case TGSI_OPCODE_CASE:
@@ -400,10 +428,12 @@
       return TGSI_TYPE_UNSIGNED;
    case TGSI_OPCODE_IMUL_HI:
    case TGSI_OPCODE_I2F:
+   case TGSI_OPCODE_I2D:
       return TGSI_TYPE_SIGNED;
    case TGSI_OPCODE_ARL:
    case TGSI_OPCODE_ARR:
    case TGSI_OPCODE_TXQ_LZ:
+   case TGSI_OPCODE_F2D:
    case TGSI_OPCODE_F2I:
    case TGSI_OPCODE_F2U:
    case TGSI_OPCODE_FSEQ:
@@ -412,6 +442,14 @@
    case TGSI_OPCODE_FSNE:
    case TGSI_OPCODE_UCMP:
       return TGSI_TYPE_FLOAT;
+   case TGSI_OPCODE_D2F:
+   case TGSI_OPCODE_D2U:
+   case TGSI_OPCODE_D2I:
+   case TGSI_OPCODE_DSEQ:
+   case TGSI_OPCODE_DSGE:
+   case TGSI_OPCODE_DSLT:
+   case TGSI_OPCODE_DSNE:
+      return TGSI_TYPE_DOUBLE;
    default:
       return tgsi_opcode_infer_type(opcode);
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_lowering.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_lowering.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_lowering.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_lowering.c	2015-09-16 14:36:09.000000000 +0000
@@ -1133,8 +1133,7 @@
 
    /* MOV_SAT tmpA.<mask>, tmpA */
    if (mask) {
-      create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask,
-                 TGSI_SAT_ZERO_ONE);
+      create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
    }
 
    /* modify the texture samp instruction to take fixed up coord: */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_parse.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_parse.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_parse.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_parse.c	2015-09-16 14:36:09.000000000 +0000
@@ -36,7 +36,7 @@
    const struct tgsi_token *tokens )
 {
    ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[0];
-   if( ctx->FullHeader.Header.HeaderSize >= 2 ) {
+   if (ctx->FullHeader.Header.HeaderSize >= 2) {
       ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[1];
    }
    else {
@@ -69,7 +69,7 @@
  * warnings.  The warnings seem harmless on x86 but on PPC they cause
  * real failures.
  */
-static INLINE void
+static inline void
 copy_token(void *dst, const void *src)
 {
    memcpy(dst, src, 4);
@@ -113,11 +113,11 @@
          next_token(ctx, &decl->Dim);
       }
 
-      if( decl->Declaration.Interpolate ) {
+      if (decl->Declaration.Interpolate) {
          next_token( ctx, &decl->Interp );
       }
 
-      if( decl->Declaration.Semantic ) {
+      if (decl->Declaration.Semantic) {
          next_token( ctx, &decl->Semantic );
       }
 
@@ -129,7 +129,7 @@
          next_token(ctx, &decl->SamplerView);
       }
 
-      if( decl->Declaration.Array ) {
+      if (decl->Declaration.Array) {
          next_token(ctx, &decl->Array);
       }
 
@@ -190,21 +190,21 @@
 
       if (inst->Instruction.Texture) {
          next_token( ctx, &inst->Texture);
-         for( i = 0; i < inst->Texture.NumOffsets; i++ ) {
+         for (i = 0; i < inst->Texture.NumOffsets; i++) {
             next_token( ctx, &inst->TexOffsets[i] );
          }
       }
 
       assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
 
-      for(  i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
+      for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
 
          next_token( ctx, &inst->Dst[i].Register );
 
-         if( inst->Dst[i].Register.Indirect )
+         if (inst->Dst[i].Register.Indirect)
             next_token( ctx, &inst->Dst[i].Indirect );
 
-         if( inst->Dst[i].Register.Dimension ) {
+         if (inst->Dst[i].Register.Dimension) {
             next_token( ctx, &inst->Dst[i].Dimension );
 
             /*
@@ -212,21 +212,21 @@
              */
             assert( !inst->Dst[i].Dimension.Dimension );
 
-            if( inst->Dst[i].Dimension.Indirect )
+            if (inst->Dst[i].Dimension.Indirect)
                next_token( ctx, &inst->Dst[i].DimIndirect );
          }
       }
 
       assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS );
 
-      for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
+      for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
 
          next_token( ctx, &inst->Src[i].Register );
 
-         if( inst->Src[i].Register.Indirect )
+         if (inst->Src[i].Register.Indirect)
             next_token( ctx, &inst->Src[i].Indirect );
 
-         if( inst->Src[i].Register.Dimension ) {
+         if (inst->Src[i].Register.Dimension) {
             next_token( ctx, &inst->Src[i].Dimension );
 
             /*
@@ -234,7 +234,7 @@
              */
             assert( !inst->Src[i].Dimension.Dimension );
 
-            if( inst->Src[i].Dimension.Indirect )
+            if (inst->Src[i].Dimension.Indirect)
                next_token( ctx, &inst->Src[i].DimIndirect );
          }
       }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_parse.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_parse.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_parse.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_parse.h	2015-09-16 14:36:09.000000000 +0000
@@ -133,7 +133,7 @@
 tgsi_parse_token(
    struct tgsi_parse_context *ctx );
 
-static INLINE unsigned
+static inline unsigned
 tgsi_num_tokens(const struct tgsi_token *tokens)
 {
    struct tgsi_header header;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_sanity.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_sanity.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_sanity.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_sanity.c	2015-09-16 14:36:09.000000000 +0000
@@ -63,7 +63,7 @@
    boolean print;
 };
 
-static INLINE unsigned
+static inline unsigned
 scan_register_key(const scan_register *reg)
 {
    unsigned key = reg->file;
@@ -415,8 +415,8 @@
          decl->Semantic.Name == TGSI_SEMANTIC_TESSINNER;
       if (file == TGSI_FILE_INPUT && !patch && (
                 processor == TGSI_PROCESSOR_GEOMETRY ||
-                processor == TGSI_PROCESSOR_TESSCTRL ||
-                processor == TGSI_PROCESSOR_TESSEVAL)) {
+                processor == TGSI_PROCESSOR_TESS_CTRL ||
+                processor == TGSI_PROCESSOR_TESS_EVAL)) {
          uint vert;
          for (vert = 0; vert < ctx->implied_array_size; ++vert) {
             scan_register *reg = MALLOC(sizeof(scan_register));
@@ -424,7 +424,7 @@
             check_and_declare(ctx, reg);
          }
       } else if (file == TGSI_FILE_OUTPUT && !patch &&
-                 processor == TGSI_PROCESSOR_TESSCTRL) {
+                 processor == TGSI_PROCESSOR_TESS_CTRL) {
          uint vert;
          for (vert = 0; vert < ctx->implied_out_array_size; ++vert) {
             scan_register *reg = MALLOC(sizeof(scan_register));
@@ -489,7 +489,7 @@
        prop->Property.PropertyName == TGSI_PROPERTY_GS_INPUT_PRIM) {
       ctx->implied_array_size = u_vertices_per_prim(prop->u[0].Data);
    }
-   if (iter->processor.Processor == TGSI_PROCESSOR_TESSCTRL &&
+   if (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL &&
        prop->Property.PropertyName == TGSI_PROPERTY_TCS_VERTICES_OUT)
       ctx->implied_out_array_size = prop->u[0].Data;
    return TRUE;
@@ -499,8 +499,8 @@
 prolog(struct tgsi_iterate_context *iter)
 {
    struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter;
-   if (iter->processor.Processor == TGSI_PROCESSOR_TESSCTRL ||
-       iter->processor.Processor == TGSI_PROCESSOR_TESSEVAL)
+   if (iter->processor.Processor == TGSI_PROCESSOR_TESS_CTRL ||
+       iter->processor.Processor == TGSI_PROCESSOR_TESS_EVAL)
       ctx->implied_array_size = 32;
    return TRUE;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_scan.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_scan.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_scan.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_scan.c	2015-09-16 14:36:09.000000000 +0000
@@ -62,6 +62,7 @@
       info->file_max[i] = -1;
    for (i = 0; i < Elements(info->const_file_max); i++)
       info->const_file_max[i] = -1;
+   info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = 1;
 
    /**
     ** Setup to begin parsing input shader
@@ -74,8 +75,8 @@
    assert(procType == TGSI_PROCESSOR_FRAGMENT ||
           procType == TGSI_PROCESSOR_VERTEX ||
           procType == TGSI_PROCESSOR_GEOMETRY ||
-          procType == TGSI_PROCESSOR_TESSCTRL ||
-          procType == TGSI_PROCESSOR_TESSEVAL ||
+          procType == TGSI_PROCESSOR_TESS_CTRL ||
+          procType == TGSI_PROCESSOR_TESS_EVAL ||
           procType == TGSI_PROCESSOR_COMPUTE);
    info->processor = procType;
 
@@ -167,13 +168,31 @@
                = &parse.FullToken.FullDeclaration;
             const uint file = fulldecl->Declaration.File;
             uint reg;
-            if (fulldecl->Declaration.Array)
-               info->array_max[file] = MAX2(info->array_max[file], fulldecl->Array.ArrayID);
+
+            if (fulldecl->Declaration.Array) {
+               unsigned array_id = fulldecl->Array.ArrayID;
+
+               switch (file) {
+               case TGSI_FILE_INPUT:
+                  assert(array_id < ARRAY_SIZE(info->input_array_first));
+                  info->input_array_first[array_id] = fulldecl->Range.First;
+                  info->input_array_last[array_id] = fulldecl->Range.Last;
+                  break;
+               case TGSI_FILE_OUTPUT:
+                  assert(array_id < ARRAY_SIZE(info->output_array_first));
+                  info->output_array_first[array_id] = fulldecl->Range.First;
+                  info->output_array_last[array_id] = fulldecl->Range.Last;
+                  break;
+               }
+               info->array_max[file] = MAX2(info->array_max[file], array_id);
+            }
+
             for (reg = fulldecl->Range.First;
                  reg <= fulldecl->Range.Last;
                  reg++) {
                unsigned semName = fulldecl->Semantic.Name;
-               unsigned semIndex = fulldecl->Semantic.Index;
+               unsigned semIndex =
+                  fulldecl->Semantic.Index + (reg - fulldecl->Range.First);
 
                /* only first 32 regs will appear in this bitfield */
                info->file_mask[file] |= (1 << reg);
@@ -230,6 +249,8 @@
                   }
                   else if (semName == TGSI_SEMANTIC_PRIMID) {
                      info->uses_primid = TRUE;
+                  } else if (semName == TGSI_SEMANTIC_INVOCATIONID) {
+                     info->uses_invocationid = TRUE;
                   }
                }
                else if (file == TGSI_FILE_OUTPUT) {
@@ -239,8 +260,8 @@
 
                   if (procType == TGSI_PROCESSOR_VERTEX ||
                       procType == TGSI_PROCESSOR_GEOMETRY ||
-                      procType == TGSI_PROCESSOR_TESSCTRL ||
-                      procType == TGSI_PROCESSOR_TESSEVAL) {
+                      procType == TGSI_PROCESSOR_TESS_CTRL ||
+                      procType == TGSI_PROCESSOR_TESS_EVAL) {
                      if (semName == TGSI_SEMANTIC_CLIPDIST) {
                         info->num_written_clipdistance +=
                            util_bitcount(fulldecl->Declaration.UsageMask);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_scan.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_scan.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_scan.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_scan.h	2015-09-16 14:36:09.000000000 +0000
@@ -65,6 +65,10 @@
    int file_max[TGSI_FILE_COUNT];  /**< highest index of declared registers */
    int const_file_max[PIPE_MAX_CONSTANT_BUFFERS];
 
+   ubyte input_array_first[PIPE_MAX_SHADER_INPUTS];
+   ubyte input_array_last[PIPE_MAX_SHADER_INPUTS];
+   ubyte output_array_first[PIPE_MAX_SHADER_OUTPUTS];
+   ubyte output_array_last[PIPE_MAX_SHADER_OUTPUTS];
    unsigned array_max[TGSI_FILE_COUNT];  /**< highest index array per register file */
 
    uint immediate_count; /**< number of immediates declared */
@@ -85,6 +89,7 @@
    boolean uses_basevertex;
    boolean uses_primid;
    boolean uses_frontface;
+   boolean uses_invocationid;
    boolean writes_psize;
    boolean writes_clipvertex;
    boolean writes_viewport_index;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_strings.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_strings.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_strings.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_strings.c	2015-09-16 14:36:09.000000000 +0000
@@ -203,7 +203,7 @@
 };
 
 
-static INLINE void
+static inline void
 tgsi_strings_check(void)
 {
    STATIC_ASSERT(Elements(tgsi_semantic_names) == TGSI_SEMANTIC_COUNT);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_text.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_text.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_text.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_text.c	2015-09-16 14:36:09.000000000 +0000
@@ -259,7 +259,7 @@
    struct tgsi_token *tokens_end;
    struct tgsi_header *header;
    unsigned processor : 4;
-   int implied_array_size : 5;
+   unsigned implied_array_size : 6;
    unsigned num_immediates;
 };
 
@@ -297,6 +297,10 @@
       processor = TGSI_PROCESSOR_VERTEX;
    else if (str_match_nocase_whole( &ctx->cur, "GEOM" ))
       processor = TGSI_PROCESSOR_GEOMETRY;
+   else if (str_match_nocase_whole( &ctx->cur, "TESS_CTRL" ))
+      processor = TGSI_PROCESSOR_TESS_CTRL;
+   else if (str_match_nocase_whole( &ctx->cur, "TESS_EVAL" ))
+      processor = TGSI_PROCESSOR_TESS_EVAL;
    else if (str_match_nocase_whole( &ctx->cur, "COMP" ))
       processor = TGSI_PROCESSOR_COMPUTE;
    else {
@@ -671,6 +675,9 @@
    eat_opt_white( &cur );
 
    if (cur[0] == '[') {
+      bool is_in = *file == TGSI_FILE_INPUT;
+      bool is_out = *file == TGSI_FILE_OUTPUT;
+
       ++cur;
       ctx->cur = cur;
       if (!parse_register_dcl_bracket( ctx, &brackets[1] ))
@@ -680,7 +687,11 @@
        * input primitive. so we want to declare just
        * the index relevant to the semantics which is in
        * the second bracket */
-      if (ctx->processor == TGSI_PROCESSOR_GEOMETRY && *file == TGSI_FILE_INPUT) {
+
+      /* tessellation has similar constraints to geometry shader */
+      if ((ctx->processor == TGSI_PROCESSOR_GEOMETRY && is_in) ||
+          (ctx->processor == TGSI_PROCESSOR_TESS_EVAL && is_in) ||
+          (ctx->processor == TGSI_PROCESSOR_TESS_CTRL && (is_in || is_out))) {
          brackets[0] = brackets[1];
          *num_brackets = 1;
       } else {
@@ -736,6 +747,14 @@
       dst->Dimension.Indirect = 0;
       dst->Dimension.Dimension = 0;
       dst->Dimension.Index = bracket[0].index;
+
+      if (bracket[0].ind_file != TGSI_FILE_NULL) {
+         dst->Dimension.Indirect = 1;
+         dst->DimIndirect.File = bracket[0].ind_file;
+         dst->DimIndirect.Index = bracket[0].ind_index;
+         dst->DimIndirect.Swizzle = bracket[0].ind_comp;
+         dst->DimIndirect.ArrayID = bracket[0].ind_array;
+      }
       bracket[0] = bracket[1];
    }
    dst->Register.Index = bracket[0].index;
@@ -903,7 +922,7 @@
    /* simple case: the whole string matches the instruction name */
    if (str_match_nocase_whole(&cur, info->mnemonic)) {
       *pcur = cur;
-      *saturate = TGSI_SAT_NONE;
+      *saturate = 0;
       return TRUE;
    }
 
@@ -911,13 +930,7 @@
       /* the instruction has a suffix, figure it out */
       if (str_match_nocase_whole(&cur, "_SAT")) {
          *pcur = cur;
-         *saturate = TGSI_SAT_ZERO_ONE;
-         return TRUE;
-      }
-
-      if (str_match_nocase_whole(&cur, "_SATNV")) {
-         *pcur = cur;
-         *saturate = TGSI_SAT_MINUS_PLUS_ONE;
+         *saturate = 1;
          return TRUE;
       }
    }
@@ -931,7 +944,7 @@
    boolean has_label )
 {
    uint i;
-   uint saturate = TGSI_SAT_NONE;
+   uint saturate = 0;
    const struct tgsi_opcode_info *info;
    struct tgsi_full_instruction inst;
    const char *cur;
@@ -1625,6 +1638,10 @@
    if (!parse_header( ctx ))
       return FALSE;
 
+   if (ctx->processor == TGSI_PROCESSOR_TESS_CTRL ||
+       ctx->processor == TGSI_PROCESSOR_TESS_EVAL)
+       ctx->implied_array_size = 32;
+
    while (*ctx->cur != '\0') {
       uint label_val = 0;
       if (!eat_white( &ctx->cur )) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_transform.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_transform.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_transform.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_transform.h	2015-09-16 14:36:09.000000000 +0000
@@ -94,7 +94,7 @@
 /**
  * Helper for emitting temporary register declarations.
  */
-static INLINE void
+static inline void
 tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
                          unsigned index)
 {
@@ -108,7 +108,7 @@
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
                           unsigned index,
                           unsigned sem_name, unsigned sem_index,
@@ -130,7 +130,7 @@
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx,
                             unsigned index)
 {
@@ -143,8 +143,29 @@
    ctx->emit_declaration(ctx, &decl);
 }
 
+static inline void
+tgsi_transform_sampler_view_decl(struct tgsi_transform_context *ctx,
+                                 unsigned index,
+                                 unsigned target,
+                                 enum tgsi_return_type type)
+{
+   struct tgsi_full_declaration decl;
+
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = TGSI_FILE_SAMPLER_VIEW;
+   decl.Declaration.UsageMask = 0xf;
+   decl.Range.First =
+   decl.Range.Last = index;
+   decl.SamplerView.Resource = target;
+   decl.SamplerView.ReturnTypeX = type;
+   decl.SamplerView.ReturnTypeY = type;
+   decl.SamplerView.ReturnTypeZ = type;
+   decl.SamplerView.ReturnTypeW = type;
+
+   ctx->emit_declaration(ctx, &decl);
+}
 
-static INLINE void
+static inline void
 tgsi_transform_immediate_decl(struct tgsi_transform_context *ctx,
                               float x, float y, float z, float w)
 {
@@ -165,7 +186,7 @@
 /**
  * Helper for emitting 1-operand instructions.
  */
-static INLINE void
+static inline void
 tgsi_transform_op1_inst(struct tgsi_transform_context *ctx,
                         unsigned opcode,
                         unsigned dst_file,
@@ -190,7 +211,7 @@
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_op2_inst(struct tgsi_transform_context *ctx,
                         unsigned opcode,
                         unsigned dst_file,
@@ -219,7 +240,7 @@
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_op1_swz_inst(struct tgsi_transform_context *ctx,
                             unsigned opcode,
                             unsigned dst_file,
@@ -261,7 +282,7 @@
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_op2_swz_inst(struct tgsi_transform_context *ctx,
                             unsigned opcode,
                             unsigned dst_file,
@@ -312,7 +333,7 @@
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_op3_swz_inst(struct tgsi_transform_context *ctx,
                             unsigned opcode,
                             unsigned dst_file,
@@ -374,7 +395,7 @@
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
                          unsigned src_file,
                          unsigned src_index,
@@ -398,7 +419,7 @@
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_tex_2d_inst(struct tgsi_transform_context *ctx,
                            unsigned dst_file,
                            unsigned dst_index,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_ureg.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_ureg.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_ureg.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_ureg.c	2015-09-16 14:36:09.000000000 +0000
@@ -26,6 +26,7 @@
  **************************************************************************/
 
 
+#include "pipe/p_screen.h"
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_ureg.h"
@@ -96,7 +97,7 @@
 struct ureg_program
 {
    unsigned processor;
-   struct pipe_context *pipe;
+   bool supports_any_inout_decl_range;
 
    struct {
       unsigned semantic_name;
@@ -104,17 +105,13 @@
       unsigned interp;
       unsigned char cylindrical_wrap;
       unsigned interp_location;
-   } fs_input[UREG_MAX_INPUT];
-   unsigned nr_fs_inputs;
-
-   unsigned vs_inputs[UREG_MAX_INPUT/32];
+      unsigned first;
+      unsigned last;
+      unsigned array_id;
+   } input[UREG_MAX_INPUT];
+   unsigned nr_inputs, nr_input_regs;
 
-   struct {
-      unsigned index;
-      unsigned semantic_name;
-      unsigned semantic_index;
-   } gs_input[UREG_MAX_INPUT];
-   unsigned nr_gs_inputs;
+   unsigned vs_inputs[PIPE_MAX_ATTRIBS/32];
 
    struct {
       unsigned index;
@@ -127,8 +124,11 @@
       unsigned semantic_name;
       unsigned semantic_index;
       unsigned usage_mask; /* = TGSI_WRITEMASK_* */
+      unsigned first;
+      unsigned last;
+      unsigned array_id;
    } output[UREG_MAX_OUTPUT];
-   unsigned nr_outputs;
+   unsigned nr_outputs, nr_output_regs;
 
    struct {
       union {
@@ -254,30 +254,42 @@
                        unsigned semantic_index,
                        unsigned interp_mode,
                        unsigned cylindrical_wrap,
-                       unsigned interp_location)
+                       unsigned interp_location,
+                       unsigned array_id,
+                       unsigned array_size)
 {
    unsigned i;
 
-   for (i = 0; i < ureg->nr_fs_inputs; i++) {
-      if (ureg->fs_input[i].semantic_name == semantic_name &&
-          ureg->fs_input[i].semantic_index == semantic_index) {
+   for (i = 0; i < ureg->nr_inputs; i++) {
+      if (ureg->input[i].semantic_name == semantic_name &&
+          ureg->input[i].semantic_index == semantic_index) {
+         assert(ureg->input[i].interp == interp_mode);
+         assert(ureg->input[i].cylindrical_wrap == cylindrical_wrap);
+         assert(ureg->input[i].interp_location == interp_location);
+         assert(ureg->input[i].array_id == array_id);
          goto out;
       }
    }
 
-   if (ureg->nr_fs_inputs < UREG_MAX_INPUT) {
-      ureg->fs_input[i].semantic_name = semantic_name;
-      ureg->fs_input[i].semantic_index = semantic_index;
-      ureg->fs_input[i].interp = interp_mode;
-      ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap;
-      ureg->fs_input[i].interp_location = interp_location;
-      ureg->nr_fs_inputs++;
+   if (ureg->nr_inputs < UREG_MAX_INPUT) {
+      assert(array_size >= 1);
+      ureg->input[i].semantic_name = semantic_name;
+      ureg->input[i].semantic_index = semantic_index;
+      ureg->input[i].interp = interp_mode;
+      ureg->input[i].cylindrical_wrap = cylindrical_wrap;
+      ureg->input[i].interp_location = interp_location;
+      ureg->input[i].first = ureg->nr_input_regs;
+      ureg->input[i].last = ureg->nr_input_regs + array_size - 1;
+      ureg->input[i].array_id = array_id;
+      ureg->nr_input_regs += array_size;
+      ureg->nr_inputs++;
    } else {
       set_bad(ureg);
    }
 
 out:
-   return ureg_src_register(TGSI_FILE_INPUT, i);
+   return ureg_src_array_register(TGSI_FILE_INPUT, ureg->input[i].first,
+                                  array_id);
 }
 
 
@@ -286,29 +298,22 @@
                     unsigned index )
 {
    assert(ureg->processor == TGSI_PROCESSOR_VERTEX);
-   
+   assert(index / 32 < ARRAY_SIZE(ureg->vs_inputs));
+
    ureg->vs_inputs[index/32] |= 1 << (index % 32);
    return ureg_src_register( TGSI_FILE_INPUT, index );
 }
 
 
 struct ureg_src
-ureg_DECL_gs_input(struct ureg_program *ureg,
-                   unsigned index,
-                   unsigned semantic_name,
-                   unsigned semantic_index)
+ureg_DECL_input(struct ureg_program *ureg,
+                unsigned semantic_name,
+                unsigned semantic_index,
+                unsigned array_id,
+                unsigned array_size)
 {
-   if (ureg->nr_gs_inputs < UREG_MAX_INPUT) {
-      ureg->gs_input[ureg->nr_gs_inputs].index = index;
-      ureg->gs_input[ureg->nr_gs_inputs].semantic_name = semantic_name;
-      ureg->gs_input[ureg->nr_gs_inputs].semantic_index = semantic_index;
-      ureg->nr_gs_inputs++;
-   } else {
-      set_bad(ureg);
-   }
-
-   /* XXX: Add suport for true 2D input registers. */
-   return ureg_src_register(TGSI_FILE_INPUT, index);
+   return ureg_DECL_fs_input_cyl_centroid(ureg, semantic_name, semantic_index,
+                                          0, 0, 0, array_id, array_size);
 }
 
 
@@ -332,10 +337,12 @@
 
 
 struct ureg_dst 
-ureg_DECL_output_masked( struct ureg_program *ureg,
-                         unsigned name,
-                         unsigned index,
-                         unsigned usage_mask )
+ureg_DECL_output_masked(struct ureg_program *ureg,
+                        unsigned name,
+                        unsigned index,
+                        unsigned usage_mask,
+                        unsigned array_id,
+                        unsigned array_size)
 {
    unsigned i;
 
@@ -343,7 +350,8 @@
 
    for (i = 0; i < ureg->nr_outputs; i++) {
       if (ureg->output[i].semantic_name == name &&
-          ureg->output[i].semantic_index == index) { 
+          ureg->output[i].semantic_index == index) {
+         assert(ureg->output[i].array_id == array_id);
          ureg->output[i].usage_mask |= usage_mask;
          goto out;
       }
@@ -353,6 +361,10 @@
       ureg->output[i].semantic_name = name;
       ureg->output[i].semantic_index = index;
       ureg->output[i].usage_mask = usage_mask;
+      ureg->output[i].first = ureg->nr_output_regs;
+      ureg->output[i].last = ureg->nr_output_regs + array_size - 1;
+      ureg->output[i].array_id = array_id;
+      ureg->nr_output_regs += array_size;
       ureg->nr_outputs++;
    }
    else {
@@ -360,16 +372,30 @@
    }
 
 out:
-   return ureg_dst_register( TGSI_FILE_OUTPUT, i );
+   return ureg_dst_array_register(TGSI_FILE_OUTPUT, ureg->output[i].first,
+                                  array_id);
 }
 
 
 struct ureg_dst 
-ureg_DECL_output( struct ureg_program *ureg,
-                  unsigned name,
-                  unsigned index )
+ureg_DECL_output(struct ureg_program *ureg,
+                 unsigned name,
+                 unsigned index)
 {
-   return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW);
+   return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW,
+                                  0, 1);
+}
+
+struct ureg_dst
+ureg_DECL_output_array(struct ureg_program *ureg,
+                       unsigned semantic_name,
+                       unsigned semantic_index,
+                       unsigned array_id,
+                       unsigned array_size)
+{
+   return ureg_DECL_output_masked(ureg, semantic_name, semantic_index,
+                                  TGSI_WRITEMASK_XYZW,
+                                  array_id, array_size);
 }
 
 
@@ -882,7 +908,11 @@
       out[n].ind.File = src.IndirectFile;
       out[n].ind.Swizzle = src.IndirectSwizzle;
       out[n].ind.Index = src.IndirectIndex;
-      out[n].ind.ArrayID = src.ArrayID;
+      if (!ureg->supports_any_inout_decl_range &&
+          (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT))
+         out[n].ind.ArrayID = 0;
+      else
+         out[n].ind.ArrayID = src.ArrayID;
       n++;
    }
 
@@ -898,7 +928,11 @@
          out[n].ind.File = src.DimIndFile;
          out[n].ind.Swizzle = src.DimIndSwizzle;
          out[n].ind.Index = src.DimIndIndex;
-         out[n].ind.ArrayID = src.ArrayID;
+         if (!ureg->supports_any_inout_decl_range &&
+             (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT))
+            out[n].ind.ArrayID = 0;
+         else
+            out[n].ind.ArrayID = src.ArrayID;
       } else {
          out[n].dim.Indirect = 0;
          out[n].dim.Index = src.DimensionIndex;
@@ -940,7 +974,11 @@
       out[n].ind.File = dst.IndirectFile;
       out[n].ind.Swizzle = dst.IndirectSwizzle;
       out[n].ind.Index = dst.IndirectIndex;
-      out[n].ind.ArrayID = dst.ArrayID;
+      if (!ureg->supports_any_inout_decl_range &&
+          (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT))
+         out[n].ind.ArrayID = 0;
+      else
+         out[n].ind.ArrayID = dst.ArrayID;
       n++;
    }
 
@@ -956,7 +994,11 @@
          out[n].ind.File = dst.DimIndFile;
          out[n].ind.Swizzle = dst.DimIndSwizzle;
          out[n].ind.Index = dst.DimIndIndex;
-         out[n].ind.ArrayID = dst.ArrayID;
+         if (!ureg->supports_any_inout_decl_range &&
+             (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT))
+            out[n].ind.ArrayID = 0;
+         else
+            out[n].ind.ArrayID = dst.ArrayID;
       } else {
          out[n].dim.Indirect = 0;
          out[n].dim.Index = dst.DimensionIndex;
@@ -1027,6 +1069,12 @@
 }
 
 
+/**
+ * Emit a label token.
+ * \param label_token returns a token number indicating where the label
+ * needs to be patched later.  Later, this value should be passed to the
+ * ureg_fixup_label() function.
+ */
 void
 ureg_emit_label(struct ureg_program *ureg,
                 unsigned extended_token,
@@ -1254,12 +1302,14 @@
 static void
 emit_decl_semantic(struct ureg_program *ureg,
                    unsigned file,
-                   unsigned index,
+                   unsigned first,
+                   unsigned last,
                    unsigned semantic_name,
                    unsigned semantic_index,
-                   unsigned usage_mask)
+                   unsigned usage_mask,
+                   unsigned array_id)
 {
-   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
+   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3);
 
    out[0].value = 0;
    out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
@@ -1267,28 +1317,37 @@
    out[0].decl.File = file;
    out[0].decl.UsageMask = usage_mask;
    out[0].decl.Semantic = 1;
+   out[0].decl.Array = array_id != 0;
 
    out[1].value = 0;
-   out[1].decl_range.First = index;
-   out[1].decl_range.Last = index;
+   out[1].decl_range.First = first;
+   out[1].decl_range.Last = last;
 
    out[2].value = 0;
    out[2].decl_semantic.Name = semantic_name;
    out[2].decl_semantic.Index = semantic_index;
+
+   if (array_id) {
+      out[3].value = 0;
+      out[3].array.ArrayID = array_id;
+   }
 }
 
 
 static void
 emit_decl_fs(struct ureg_program *ureg,
              unsigned file,
-             unsigned index,
+             unsigned first,
+             unsigned last,
              unsigned semantic_name,
              unsigned semantic_index,
              unsigned interpolate,
              unsigned cylindrical_wrap,
-             unsigned interpolate_location)
+             unsigned interpolate_location,
+             unsigned array_id)
 {
-   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 4);
+   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL,
+                                          array_id ? 5 : 4);
 
    out[0].value = 0;
    out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
@@ -1297,10 +1356,11 @@
    out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
    out[0].decl.Interpolate = 1;
    out[0].decl.Semantic = 1;
+   out[0].decl.Array = array_id != 0;
 
    out[1].value = 0;
-   out[1].decl_range.First = index;
-   out[1].decl_range.Last = index;
+   out[1].decl_range.First = first;
+   out[1].decl_range.Last = last;
 
    out[2].value = 0;
    out[2].decl_interp.Interpolate = interpolate;
@@ -1310,6 +1370,11 @@
    out[3].value = 0;
    out[3].decl_semantic.Name = semantic_name;
    out[3].decl_semantic.Index = semantic_index;
+
+   if (array_id) {
+      out[4].value = 0;
+      out[4].array.ArrayID = array_id;
+   }
 }
 
 static void
@@ -1448,37 +1513,73 @@
 
 static void emit_decls( struct ureg_program *ureg )
 {
-   unsigned i;
+   unsigned i,j;
 
    for (i = 0; i < Elements(ureg->properties); i++)
       if (ureg->properties[i] != ~0)
          emit_property(ureg, i, ureg->properties[i]);
 
    if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
-      for (i = 0; i < UREG_MAX_INPUT; i++) {
+      for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
          if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
             emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
          }
       }
    } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) {
-      for (i = 0; i < ureg->nr_fs_inputs; i++) {
-         emit_decl_fs(ureg,
-                      TGSI_FILE_INPUT,
-                      i,
-                      ureg->fs_input[i].semantic_name,
-                      ureg->fs_input[i].semantic_index,
-                      ureg->fs_input[i].interp,
-                      ureg->fs_input[i].cylindrical_wrap,
-                      ureg->fs_input[i].interp_location);
+      if (ureg->supports_any_inout_decl_range) {
+         for (i = 0; i < ureg->nr_inputs; i++) {
+            emit_decl_fs(ureg,
+                         TGSI_FILE_INPUT,
+                         ureg->input[i].first,
+                         ureg->input[i].last,
+                         ureg->input[i].semantic_name,
+                         ureg->input[i].semantic_index,
+                         ureg->input[i].interp,
+                         ureg->input[i].cylindrical_wrap,
+                         ureg->input[i].interp_location,
+                         ureg->input[i].array_id);
+         }
       }
-   } else {
-      for (i = 0; i < ureg->nr_gs_inputs; i++) {
-         emit_decl_semantic(ureg,
+      else {
+         for (i = 0; i < ureg->nr_inputs; i++) {
+            for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) {
+               emit_decl_fs(ureg,
                             TGSI_FILE_INPUT,
-                            ureg->gs_input[i].index,
-                            ureg->gs_input[i].semantic_name,
-                            ureg->gs_input[i].semantic_index,
-                            TGSI_WRITEMASK_XYZW);
+                            j, j,
+                            ureg->input[i].semantic_name,
+                            ureg->input[i].semantic_index +
+                            (j - ureg->input[i].first),
+                            ureg->input[i].interp,
+                            ureg->input[i].cylindrical_wrap,
+                            ureg->input[i].interp_location, 0);
+            }
+         }
+      }
+   } else {
+      if (ureg->supports_any_inout_decl_range) {
+         for (i = 0; i < ureg->nr_inputs; i++) {
+            emit_decl_semantic(ureg,
+                               TGSI_FILE_INPUT,
+                               ureg->input[i].first,
+                               ureg->input[i].last,
+                               ureg->input[i].semantic_name,
+                               ureg->input[i].semantic_index,
+                               TGSI_WRITEMASK_XYZW,
+                               ureg->input[i].array_id);
+         }
+      }
+      else {
+         for (i = 0; i < ureg->nr_inputs; i++) {
+            for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) {
+               emit_decl_semantic(ureg,
+                                  TGSI_FILE_INPUT,
+                                  j, j,
+                                  ureg->input[i].semantic_name,
+                                  ureg->input[i].semantic_index +
+                                  (j - ureg->input[i].first),
+                                  TGSI_WRITEMASK_XYZW, 0);
+            }
+         }
       }
    }
 
@@ -1486,18 +1587,36 @@
       emit_decl_semantic(ureg,
                          TGSI_FILE_SYSTEM_VALUE,
                          ureg->system_value[i].index,
+                         ureg->system_value[i].index,
                          ureg->system_value[i].semantic_name,
                          ureg->system_value[i].semantic_index,
-                         TGSI_WRITEMASK_XYZW);
+                         TGSI_WRITEMASK_XYZW, 0);
    }
 
-   for (i = 0; i < ureg->nr_outputs; i++) {
-      emit_decl_semantic(ureg,
-                         TGSI_FILE_OUTPUT,
-                         i,
-                         ureg->output[i].semantic_name,
-                         ureg->output[i].semantic_index,
-                         ureg->output[i].usage_mask);
+   if (ureg->supports_any_inout_decl_range) {
+      for (i = 0; i < ureg->nr_outputs; i++) {
+         emit_decl_semantic(ureg,
+                            TGSI_FILE_OUTPUT,
+                            ureg->output[i].first,
+                            ureg->output[i].last,
+                            ureg->output[i].semantic_name,
+                            ureg->output[i].semantic_index,
+                            ureg->output[i].usage_mask,
+                            ureg->output[i].array_id);
+      }
+   }
+   else {
+      for (i = 0; i < ureg->nr_outputs; i++) {
+         for (j = ureg->output[i].first; j <= ureg->output[i].last; j++) {
+            emit_decl_semantic(ureg,
+                               TGSI_FILE_OUTPUT,
+                               j, j,
+                               ureg->output[i].semantic_name,
+                               ureg->output[i].semantic_index +
+                               (j - ureg->output[i].first),
+                               ureg->output[i].usage_mask, 0);
+         }
+      }
    }
 
    for (i = 0; i < ureg->nr_samplers; i++) {
@@ -1667,10 +1786,20 @@
    else
       memset(&state.stream_output, 0, sizeof(state.stream_output));
 
-   if (ureg->processor == TGSI_PROCESSOR_VERTEX)
-      return pipe->create_vs_state( pipe, &state );
-   else
-      return pipe->create_fs_state( pipe, &state );
+   switch (ureg->processor) {
+   case TGSI_PROCESSOR_VERTEX:
+      return pipe->create_vs_state(pipe, &state);
+   case TGSI_PROCESSOR_TESS_CTRL:
+      return pipe->create_tcs_state(pipe, &state);
+   case TGSI_PROCESSOR_TESS_EVAL:
+      return pipe->create_tes_state(pipe, &state);
+   case TGSI_PROCESSOR_GEOMETRY:
+      return pipe->create_gs_state(pipe, &state);
+   case TGSI_PROCESSOR_FRAGMENT:
+      return pipe->create_fs_state(pipe, &state);
+   default:
+      return NULL;
+   }
 }
 
 
@@ -1701,7 +1830,38 @@
 }
 
 
-struct ureg_program *ureg_create( unsigned processor )
+static inline unsigned
+pipe_shader_from_tgsi_processor(unsigned processor)
+{
+   switch (processor) {
+   case TGSI_PROCESSOR_VERTEX:
+      return PIPE_SHADER_VERTEX;
+   case TGSI_PROCESSOR_TESS_CTRL:
+      return PIPE_SHADER_TESS_CTRL;
+   case TGSI_PROCESSOR_TESS_EVAL:
+      return PIPE_SHADER_TESS_EVAL;
+   case TGSI_PROCESSOR_GEOMETRY:
+      return PIPE_SHADER_GEOMETRY;
+   case TGSI_PROCESSOR_FRAGMENT:
+      return PIPE_SHADER_FRAGMENT;
+   case TGSI_PROCESSOR_COMPUTE:
+      return PIPE_SHADER_COMPUTE;
+   default:
+      assert(0);
+      return PIPE_SHADER_VERTEX;
+   }
+}
+
+
+struct ureg_program *
+ureg_create(unsigned processor)
+{
+   return ureg_create_with_screen(processor, NULL);
+}
+
+
+struct ureg_program *
+ureg_create_with_screen(unsigned processor, struct pipe_screen *screen)
 {
    int i;
    struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
@@ -1709,6 +1869,11 @@
       goto no_ureg;
 
    ureg->processor = processor;
+   ureg->supports_any_inout_decl_range =
+      screen &&
+      screen->get_shader_param(screen,
+                               pipe_shader_from_tgsi_processor(processor),
+                               PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0;
 
    for (i = 0; i < Elements(ureg->properties); i++)
       ureg->properties[i] = ~0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_ureg.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_ureg.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/tgsi/tgsi_ureg.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/tgsi/tgsi_ureg.h	2015-09-16 14:36:09.000000000 +0000
@@ -36,6 +36,7 @@
 extern "C" {
 #endif
    
+struct pipe_screen;
 struct ureg_program;
 struct pipe_stream_output_info;
 
@@ -98,7 +99,10 @@
 struct pipe_context;
 
 struct ureg_program *
-ureg_create( unsigned processor );
+ureg_create(unsigned processor);
+
+struct ureg_program *
+ureg_create_with_screen(unsigned processor, struct pipe_screen *screen);
 
 const struct tgsi_token *
 ureg_finalize( struct ureg_program * );
@@ -136,7 +140,7 @@
 /***********************************************************************
  * Convenience routine:
  */
-static INLINE void *
+static inline void *
 ureg_create_shader_with_so_and_destroy( struct ureg_program *p,
 			struct pipe_context *pipe,
 			const struct pipe_stream_output_info *so )
@@ -146,7 +150,7 @@
    return result;
 }
 
-static INLINE void *
+static inline void *
 ureg_create_shader_and_destroy( struct ureg_program *p,
                                 struct pipe_context *pipe )
 {
@@ -172,9 +176,11 @@
                        unsigned semantic_index,
                        unsigned interp_mode,
                        unsigned cylindrical_wrap,
-                       unsigned interp_location);
+                       unsigned interp_location,
+                       unsigned array_id,
+                       unsigned array_size);
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
                        unsigned semantic_name,
                        unsigned semantic_index,
@@ -186,10 +192,10 @@
                                  semantic_index,
                                  interp_mode,
                                  cylindrical_wrap,
-                                 0);
+                                 0, 0, 1);
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_DECL_fs_input(struct ureg_program *ureg,
                    unsigned semantic_name,
                    unsigned semantic_index,
@@ -199,7 +205,7 @@
                                  semantic_name,
                                  semantic_index,
                                  interp_mode,
-                                 0, 0);
+                                 0, 0, 0, 1);
 }
 
 struct ureg_src
@@ -207,10 +213,11 @@
                     unsigned index );
 
 struct ureg_src
-ureg_DECL_gs_input(struct ureg_program *,
-                   unsigned index,
-                   unsigned semantic_name,
-                   unsigned semantic_index);
+ureg_DECL_input(struct ureg_program *,
+                unsigned semantic_name,
+                unsigned semantic_index,
+                unsigned array_id,
+                unsigned array_size);
 
 struct ureg_src
 ureg_DECL_system_value(struct ureg_program *,
@@ -219,15 +226,24 @@
                        unsigned semantic_index);
 
 struct ureg_dst
-ureg_DECL_output_masked( struct ureg_program *,
-                         unsigned semantic_name,
-                         unsigned semantic_index,
-                         unsigned usage_mask );
+ureg_DECL_output_masked(struct ureg_program *,
+                        unsigned semantic_name,
+                        unsigned semantic_index,
+                        unsigned usage_mask,
+                        unsigned array_id,
+                        unsigned array_size);
 
 struct ureg_dst
-ureg_DECL_output( struct ureg_program *,
-                  unsigned semantic_name,
-                  unsigned semantic_index );
+ureg_DECL_output(struct ureg_program *,
+                 unsigned semantic_name,
+                 unsigned semantic_index);
+
+struct ureg_dst
+ureg_DECL_output_array(struct ureg_program *ureg,
+                       unsigned semantic_name,
+                       unsigned semantic_index,
+                       unsigned array_id,
+                       unsigned array_size);
 
 struct ureg_src
 ureg_DECL_immediate( struct ureg_program *,
@@ -312,7 +328,7 @@
                        unsigned return_type_w );
 
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm4f( struct ureg_program *ureg,
                        float a, float b,
                        float c, float d)
@@ -325,7 +341,7 @@
    return ureg_DECL_immediate( ureg, v, 4 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm3f( struct ureg_program *ureg,
                        float a, float b,
                        float c)
@@ -337,7 +353,7 @@
    return ureg_DECL_immediate( ureg, v, 3 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm2f( struct ureg_program *ureg,
                        float a, float b)
 {
@@ -347,7 +363,7 @@
    return ureg_DECL_immediate( ureg, v, 2 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm1f( struct ureg_program *ureg,
                        float a)
 {
@@ -356,7 +372,7 @@
    return ureg_DECL_immediate( ureg, v, 1 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm4u( struct ureg_program *ureg,
             unsigned a, unsigned b,
             unsigned c, unsigned d)
@@ -369,7 +385,7 @@
    return ureg_DECL_immediate_uint( ureg, v, 4 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm3u( struct ureg_program *ureg,
             unsigned a, unsigned b,
             unsigned c)
@@ -381,7 +397,7 @@
    return ureg_DECL_immediate_uint( ureg, v, 3 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm2u( struct ureg_program *ureg,
             unsigned a, unsigned b)
 {
@@ -391,14 +407,14 @@
    return ureg_DECL_immediate_uint( ureg, v, 2 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm1u( struct ureg_program *ureg,
             unsigned a)
 {
    return ureg_DECL_immediate_uint( ureg, &a, 1 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm4i( struct ureg_program *ureg,
             int a, int b,
             int c, int d)
@@ -411,7 +427,7 @@
    return ureg_DECL_immediate_int( ureg, v, 4 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm3i( struct ureg_program *ureg,
             int a, int b,
             int c)
@@ -423,7 +439,7 @@
    return ureg_DECL_immediate_int( ureg, v, 3 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm2i( struct ureg_program *ureg,
             int a, int b)
 {
@@ -433,7 +449,7 @@
    return ureg_DECL_immediate_int( ureg, v, 2 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm1i( struct ureg_program *ureg,
             int a)
 {
@@ -443,7 +459,7 @@
 /* Where the destination register has a valid file, but an empty
  * writemask.
  */
-static INLINE boolean
+static inline boolean
 ureg_dst_is_empty( struct ureg_dst dst )
 {
    return dst.File != TGSI_FILE_NULL &&
@@ -557,7 +573,7 @@
 
 
 #define OP00( op )                                              \
-static INLINE void ureg_##op( struct ureg_program *ureg )       \
+static inline void ureg_##op( struct ureg_program *ureg )       \
 {                                                               \
    unsigned opcode = TGSI_OPCODE_##op;                          \
    struct ureg_emit_insn_result insn;                           \
@@ -576,7 +592,7 @@
 }
 
 #define OP01( op )                                              \
-static INLINE void ureg_##op( struct ureg_program *ureg,        \
+static inline void ureg_##op( struct ureg_program *ureg,        \
                               struct ureg_src src )             \
 {                                                               \
    unsigned opcode = TGSI_OPCODE_##op;                          \
@@ -597,7 +613,7 @@
 }
 
 #define OP00_LBL( op )                                          \
-static INLINE void ureg_##op( struct ureg_program *ureg,        \
+static inline void ureg_##op( struct ureg_program *ureg,        \
                               unsigned *label_token )           \
 {                                                               \
    unsigned opcode = TGSI_OPCODE_##op;                          \
@@ -618,7 +634,7 @@
 }
 
 #define OP01_LBL( op )                                          \
-static INLINE void ureg_##op( struct ureg_program *ureg,        \
+static inline void ureg_##op( struct ureg_program *ureg,        \
                               struct ureg_src src,              \
                               unsigned *label_token )          \
 {                                                               \
@@ -641,7 +657,7 @@
 }
 
 #define OP10( op )                                                      \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst )                     \
 {                                                                       \
    unsigned opcode = TGSI_OPCODE_##op;                                  \
@@ -665,7 +681,7 @@
 
 
 #define OP11( op )                                                      \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src )                     \
 {                                                                       \
@@ -690,7 +706,7 @@
 }
 
 #define OP12( op )                                                      \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1 )                    \
@@ -717,7 +733,7 @@
 }
 
 #define OP12_TEX( op )                                                  \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               unsigned target,                          \
                               struct ureg_src src0,                     \
@@ -746,7 +762,7 @@
 }
 
 #define OP12_SAMPLE( op )                                               \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1 )                    \
@@ -775,7 +791,7 @@
 }
 
 #define OP13( op )                                                      \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1,                     \
@@ -804,7 +820,7 @@
 }
 
 #define OP13_SAMPLE( op )                                               \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1,                     \
@@ -835,7 +851,7 @@
 }
 
 #define OP14_TEX( op )                                                  \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               unsigned target,                          \
                               struct ureg_src src0,                     \
@@ -868,7 +884,7 @@
 }
 
 #define OP14_SAMPLE( op )                                               \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1,                     \
@@ -902,7 +918,7 @@
 
 
 #define OP14( op )                                                      \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1,                     \
@@ -934,7 +950,7 @@
 
 
 #define OP15( op )                                                      \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1,                     \
@@ -967,7 +983,7 @@
 }
 
 #define OP15_SAMPLE( op )                                               \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1,                     \
@@ -1010,7 +1026,7 @@
 /***********************************************************************
  * Inline helpers for manipulating register structs:
  */
-static INLINE struct ureg_src 
+static inline struct ureg_src 
 ureg_negate( struct ureg_src reg )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1018,7 +1034,7 @@
    return reg;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_abs( struct ureg_src reg )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1027,7 +1043,7 @@
    return reg;
 }
 
-static INLINE struct ureg_src 
+static inline struct ureg_src 
 ureg_swizzle( struct ureg_src reg, 
               int x, int y, int z, int w )
 {
@@ -1049,13 +1065,13 @@
    return reg;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_scalar( struct ureg_src reg, int x )
 {
    return ureg_swizzle(reg, x, x, x, x);
 }
 
-static INLINE struct ureg_dst 
+static inline struct ureg_dst 
 ureg_writemask( struct ureg_dst reg,
                 unsigned writemask )
 {
@@ -1064,7 +1080,7 @@
    return reg;
 }
 
-static INLINE struct ureg_dst 
+static inline struct ureg_dst 
 ureg_saturate( struct ureg_dst reg )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1072,7 +1088,7 @@
    return reg;
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 ureg_predicate(struct ureg_dst reg,
                boolean negate,
                unsigned swizzle_x,
@@ -1090,7 +1106,7 @@
    return reg;
 }
 
-static INLINE struct ureg_dst 
+static inline struct ureg_dst 
 ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1102,7 +1118,7 @@
    return reg;
 }
 
-static INLINE struct ureg_src 
+static inline struct ureg_src 
 ureg_src_indirect( struct ureg_src reg, struct ureg_src addr )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1114,7 +1130,7 @@
    return reg;
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 ureg_dst_dimension( struct ureg_dst reg, int index )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1124,7 +1140,7 @@
    return reg;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_src_dimension( struct ureg_src reg, int index )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1134,7 +1150,7 @@
    return reg;
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 ureg_dst_dimension_indirect( struct ureg_dst reg, struct ureg_src addr,
                              int index )
 {
@@ -1148,7 +1164,7 @@
    return reg;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_src_dimension_indirect( struct ureg_src reg, struct ureg_src addr,
                              int index )
 {
@@ -1162,17 +1178,24 @@
    return reg;
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_src
+ureg_src_array_offset(struct ureg_src reg, int offset)
+{
+   reg.Index += offset;
+   return reg;
+}
+
+static inline struct ureg_dst
 ureg_dst_array_offset( struct ureg_dst reg, int offset )
 {
-   assert(reg.File == TGSI_FILE_TEMPORARY);
    reg.Index += offset;
    return reg;
 }
 
-static INLINE struct ureg_dst
-ureg_dst_register( unsigned file,
-                   unsigned index )
+static inline struct ureg_dst
+ureg_dst_array_register(unsigned file,
+                        unsigned index,
+                        unsigned array_id)
 {
    struct ureg_dst dst;
 
@@ -1196,12 +1219,19 @@
    dst.DimIndFile = TGSI_FILE_NULL;
    dst.DimIndIndex = 0;
    dst.DimIndSwizzle = 0;
-   dst.ArrayID = 0;
+   dst.ArrayID = array_id;
 
    return dst;
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
+ureg_dst_register(unsigned file,
+                  unsigned index)
+{
+   return ureg_dst_array_register(file, index, 0);
+}
+
+static inline struct ureg_dst
 ureg_dst( struct ureg_src src )
 {
    struct ureg_dst dst;
@@ -1235,9 +1265,10 @@
    return dst;
 }
 
-static INLINE struct ureg_src
-ureg_src_register(unsigned file,
-                  unsigned index)
+static inline struct ureg_src
+ureg_src_array_register(unsigned file,
+                        unsigned index,
+                        unsigned array_id)
 {
    struct ureg_src src;
 
@@ -1259,12 +1290,19 @@
    src.DimIndFile = TGSI_FILE_NULL;
    src.DimIndIndex = 0;
    src.DimIndSwizzle = 0;
-   src.ArrayID = 0;
+   src.ArrayID = array_id;
 
    return src;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
+ureg_src_register(unsigned file,
+                  unsigned index)
+{
+   return ureg_src_array_register(file, index, 0);
+}
+
+static inline struct ureg_src
 ureg_src( struct ureg_dst dst )
 {
    struct ureg_src src;
@@ -1294,7 +1332,7 @@
 
 
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 ureg_dst_undef( void )
 {
    struct ureg_dst dst;
@@ -1324,7 +1362,7 @@
    return dst;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_src_undef( void )
 {
    struct ureg_src src;
@@ -1352,13 +1390,13 @@
    return src;
 }
 
-static INLINE boolean
+static inline boolean
 ureg_src_is_undef( struct ureg_src src )
 {
    return src.File == TGSI_FILE_NULL;
 }
 
-static INLINE boolean
+static inline boolean
 ureg_dst_is_undef( struct ureg_dst dst )
 {
    return dst.File == TGSI_FILE_NULL;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/translate/translate_cache.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/translate/translate_cache.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/translate/translate_cache.c	2014-04-29 19:36:58.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/translate/translate_cache.c	2015-09-16 14:36:09.000000000 +0000
@@ -49,7 +49,7 @@
 }
 
 
-static INLINE void delete_translates(struct translate_cache *cache)
+static inline void delete_translates(struct translate_cache *cache)
 {
    struct cso_hash *hash = cache->hash;
    struct cso_hash_iter iter = cso_hash_first_node(hash);
@@ -70,14 +70,14 @@
 }
 
 
-static INLINE unsigned translate_hash_key_size(struct translate_key *key)
+static inline unsigned translate_hash_key_size(struct translate_key *key)
 {
    unsigned size = sizeof(struct translate_key) -
                    sizeof(struct translate_element) * (TRANSLATE_MAX_ATTRIBS - key->nr_elements);
    return size;
 }
 
-static INLINE unsigned create_key(struct translate_key *key)
+static inline unsigned create_key(struct translate_key *key)
 {
    unsigned hash_key;
    unsigned size = translate_hash_key_size(key);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/translate/translate.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/translate/translate.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/translate/translate.h	2014-04-29 19:36:58.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/translate/translate.h	2015-09-16 14:36:09.000000000 +0000
@@ -130,12 +130,12 @@
 
 boolean translate_is_output_format_supported(enum pipe_format format);
 
-static INLINE int translate_keysize( const struct translate_key *key )
+static inline int translate_keysize( const struct translate_key *key )
 {
    return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element);
 }
 
-static INLINE int translate_key_compare( const struct translate_key *a,
+static inline int translate_key_compare( const struct translate_key *a,
                                          const struct translate_key *b )
 {
    int keysize_a = translate_keysize(a);
@@ -148,7 +148,7 @@
 }
 
 
-static INLINE void translate_key_sanitize( struct translate_key *a )
+static inline void translate_key_sanitize( struct translate_key *a )
 {
    int keysize = translate_keysize(a);
    char *ptr = (char *)a;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_bitmask.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_bitmask.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_bitmask.c	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_bitmask.c	2015-09-16 14:36:09.000000000 +0000
@@ -85,7 +85,7 @@
 /**
  * Resize the bitmask if necessary 
  */
-static INLINE boolean
+static inline boolean
 util_bitmask_resize(struct util_bitmask *bm,
                     unsigned minimum_index)
 {
@@ -131,7 +131,7 @@
 /**
  * Lazily update the filled.
  */
-static INLINE void
+static inline void
 util_bitmask_filled_set(struct util_bitmask *bm,
                         unsigned index)
 {
@@ -144,7 +144,7 @@
    }
 }
 
-static INLINE void
+static inline void
 util_bitmask_filled_unset(struct util_bitmask *bm,
                           unsigned index)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_blend.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_blend.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_blend.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_blend.h	2015-09-16 14:36:09.000000000 +0000
@@ -9,7 +9,7 @@
  * garbage that's there. Return a blend factor that will take that into
  * account.
  */
-static INLINE int
+static inline int
 util_blend_dst_alpha_to_one(int factor)
 {
    switch (factor) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_blit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_blit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_blit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_blit.c	2015-09-16 14:36:09.000000000 +0000
@@ -65,7 +65,7 @@
    struct pipe_vertex_element velem[2];
 
    void *vs;
-   void *fs[PIPE_MAX_TEXTURE_TYPES][TGSI_WRITEMASK_XYZW + 1];
+   void *fs[PIPE_MAX_TEXTURE_TYPES][TGSI_WRITEMASK_XYZW + 1][3];
 
    struct pipe_resource *vbuf;  /**< quad vertices */
    unsigned vbuf_slot;
@@ -135,15 +135,17 @@
 util_destroy_blit(struct blit_state *ctx)
 {
    struct pipe_context *pipe = ctx->pipe;
-   unsigned i, j;
+   unsigned i, j, k;
 
    if (ctx->vs)
       pipe->delete_vs_state(pipe, ctx->vs);
 
    for (i = 0; i < Elements(ctx->fs); i++) {
       for (j = 0; j < Elements(ctx->fs[i]); j++) {
-         if (ctx->fs[i][j])
-            pipe->delete_fs_state(pipe, ctx->fs[i][j]);
+         for (k = 0; k < Elements(ctx->fs[i][j]); k++) {
+            if (ctx->fs[i][j][k])
+               pipe->delete_fs_state(pipe, ctx->fs[i][j][k]);
+         }
       }
    }
 
@@ -156,27 +158,43 @@
 /**
  * Helper function to set the fragment shaders.
  */
-static INLINE void
+static inline void
 set_fragment_shader(struct blit_state *ctx, uint writemask,
+                    enum pipe_format format,
                     enum pipe_texture_target pipe_tex)
 {
-   if (!ctx->fs[pipe_tex][writemask]) {
+   enum tgsi_return_type stype;
+   unsigned idx;
+
+   if (util_format_is_pure_uint(format)) {
+      stype = TGSI_RETURN_TYPE_UINT;
+      idx = 0;
+   } else if (util_format_is_pure_sint(format)) {
+      stype = TGSI_RETURN_TYPE_SINT;
+      idx = 1;
+   } else {
+      stype = TGSI_RETURN_TYPE_FLOAT;
+      idx = 2;
+   }
+
+   if (!ctx->fs[pipe_tex][writemask][idx]) {
       unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(pipe_tex, 0);
 
-      ctx->fs[pipe_tex][writemask] =
+      ctx->fs[pipe_tex][writemask][idx] =
          util_make_fragment_tex_shader_writemask(ctx->pipe, tgsi_tex,
                                                  TGSI_INTERPOLATE_LINEAR,
-                                                 writemask);
+                                                 writemask,
+                                                 stype);
    }
 
-   cso_set_fragment_shader_handle(ctx->cso, ctx->fs[pipe_tex][writemask]);
+   cso_set_fragment_shader_handle(ctx->cso, ctx->fs[pipe_tex][writemask][idx]);
 }
 
 
 /**
  * Helper function to set the vertex shader.
  */
-static INLINE void
+static inline void
 set_vertex_shader(struct blit_state *ctx)
 {
    /* vertex shader - still required to provide the linkage between
@@ -528,8 +546,8 @@
    cso_save_rasterizer(ctx->cso);
    cso_save_sample_mask(ctx->cso);
    cso_save_min_samples(ctx->cso);
-   cso_save_samplers(ctx->cso, PIPE_SHADER_FRAGMENT);
-   cso_save_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT);
+   cso_save_fragment_samplers(ctx->cso);
+   cso_save_fragment_sampler_views(ctx->cso);
    cso_save_stream_outputs(ctx->cso);
    cso_save_viewport(ctx->cso);
    cso_save_framebuffer(ctx->cso);
@@ -554,8 +572,10 @@
    ctx->sampler.normalized_coords = normalized;
    ctx->sampler.min_img_filter = filter;
    ctx->sampler.mag_img_filter = filter;
-   cso_single_sampler(ctx->cso, PIPE_SHADER_FRAGMENT, 0, &ctx->sampler);
-   cso_single_sampler_done(ctx->cso, PIPE_SHADER_FRAGMENT);
+   {
+      const struct pipe_sampler_state *samplers[] = {&ctx->sampler};
+      cso_set_samplers(ctx->cso, PIPE_SHADER_FRAGMENT, 1, samplers);
+   }
 
    /* viewport */
    ctx->viewport.scale[0] = 0.5f * dst->width;
@@ -571,6 +591,7 @@
 
    /* shaders */
    set_fragment_shader(ctx, TGSI_WRITEMASK_XYZW,
+                       src_sampler_view->format,
                        src_sampler_view->texture->target);
    set_vertex_shader(ctx);
    cso_set_tessctrl_shader_handle(ctx->cso, NULL);
@@ -609,8 +630,8 @@
    cso_restore_rasterizer(ctx->cso);
    cso_restore_sample_mask(ctx->cso);
    cso_restore_min_samples(ctx->cso);
-   cso_restore_samplers(ctx->cso, PIPE_SHADER_FRAGMENT);
-   cso_restore_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT);
+   cso_restore_fragment_samplers(ctx->cso);
+   cso_restore_fragment_sampler_views(ctx->cso);
    cso_restore_viewport(ctx->cso);
    cso_restore_framebuffer(ctx->cso);
    cso_restore_fragment_shader(ctx->cso);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_blitter.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_blitter.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_blitter.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_blitter.c	2015-09-16 14:36:09.000000000 +0000
@@ -81,6 +81,8 @@
    /* FS which outputs a color from a texture,
       where the index is PIPE_TEXTURE_* to be sampled. */
    void *fs_texfetch_col[PIPE_MAX_TEXTURE_TYPES];
+   void *fs_texfetch_col_uint[PIPE_MAX_TEXTURE_TYPES];
+   void *fs_texfetch_col_sint[PIPE_MAX_TEXTURE_TYPES];
 
    /* FS which outputs a depth from a texture,
       where the index is PIPE_TEXTURE_* to be sampled. */
@@ -90,6 +92,8 @@
 
    /* FS which outputs one sample from a multisample texture. */
    void *fs_texfetch_col_msaa[PIPE_MAX_TEXTURE_TYPES];
+   void *fs_texfetch_col_msaa_uint[PIPE_MAX_TEXTURE_TYPES];
+   void *fs_texfetch_col_msaa_sint[PIPE_MAX_TEXTURE_TYPES];
    void *fs_texfetch_depth_msaa[PIPE_MAX_TEXTURE_TYPES];
    void *fs_texfetch_depthstencil_msaa[PIPE_MAX_TEXTURE_TYPES];
    void *fs_texfetch_stencil_msaa[PIPE_MAX_TEXTURE_TYPES];
@@ -100,7 +104,7 @@
    void *fs_resolve_uint[PIPE_MAX_TEXTURE_TYPES][NUM_RESOLVE_FRAG_SHADERS][2];
 
    /* Blend state. */
-   void *blend[PIPE_MASK_RGBA+1]; /**< blend state with writemask */
+   void *blend[PIPE_MASK_RGBA+1][2]; /**< blend state with writemask */
    void *blend_clear[GET_CLEAR_BLEND_STATE_IDX(PIPE_CLEAR_COLOR)+1];
 
    /* Depth stencil alpha state. */
@@ -155,7 +159,7 @@
    struct pipe_rasterizer_state rs_state;
    struct pipe_sampler_state sampler_state;
    struct pipe_vertex_element velem[2];
-   unsigned i;
+   unsigned i, j;
 
    ctx = CALLOC_STRUCT(blitter_context_priv);
    if (!ctx)
@@ -204,8 +208,20 @@
    memset(&blend, 0, sizeof(blend));
 
    for (i = 0; i <= PIPE_MASK_RGBA; i++) {
-      blend.rt[0].colormask = i;
-      ctx->blend[i] = pipe->create_blend_state(pipe, &blend);
+      for (j = 0; j < 2; j++) {
+         memset(&blend.rt[0], 0, sizeof(blend.rt[0]));
+         blend.rt[0].colormask = i;
+         if (j) {
+            blend.rt[0].blend_enable = 1;
+            blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+            blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+            blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA;
+            blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+            blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+            blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA;
+         }
+         ctx->blend[i][j] = pipe->create_blend_state(pipe, &blend);
+      }
    }
 
    /* depth stencil alpha state objects */
@@ -405,9 +421,10 @@
    struct pipe_context *pipe = blitter->pipe;
    int i, j, f;
 
-   for (i = 0; i <= PIPE_MASK_RGBA; i++) {
-      pipe->delete_blend_state(pipe, ctx->blend[i]);
-   }
+   for (i = 0; i <= PIPE_MASK_RGBA; i++)
+      for (j = 0; j < 2; j++)
+         pipe->delete_blend_state(pipe, ctx->blend[i][j]);
+
    for (i = 0; i < Elements(ctx->blend_clear); i++) {
       if (ctx->blend_clear[i])
          pipe->delete_blend_state(pipe, ctx->blend_clear[i]);
@@ -438,6 +455,10 @@
    for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) {
       if (ctx->fs_texfetch_col[i])
          ctx->delete_fs_state(pipe, ctx->fs_texfetch_col[i]);
+      if (ctx->fs_texfetch_col_sint[i])
+         ctx->delete_fs_state(pipe, ctx->fs_texfetch_col_sint[i]);
+      if (ctx->fs_texfetch_col_uint[i])
+         ctx->delete_fs_state(pipe, ctx->fs_texfetch_col_uint[i]);
       if (ctx->fs_texfetch_depth[i])
          ctx->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]);
       if (ctx->fs_texfetch_depthstencil[i])
@@ -447,6 +468,10 @@
 
       if (ctx->fs_texfetch_col_msaa[i])
          ctx->delete_fs_state(pipe, ctx->fs_texfetch_col_msaa[i]);
+      if (ctx->fs_texfetch_col_msaa_sint[i])
+         ctx->delete_fs_state(pipe, ctx->fs_texfetch_col_msaa_sint[i]);
+      if (ctx->fs_texfetch_col_msaa_uint[i])
+         ctx->delete_fs_state(pipe, ctx->fs_texfetch_col_msaa_uint[i]);
       if (ctx->fs_texfetch_depth_msaa[i])
          ctx->delete_fs_state(pipe, ctx->fs_texfetch_depth_msaa[i]);
       if (ctx->fs_texfetch_depthstencil_msaa[i])
@@ -844,25 +869,29 @@
 {
    struct pipe_context *pipe = ctx->base.pipe;
    unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(target, src_nr_samples);
+   enum tgsi_return_type stype;
 
    assert(target < PIPE_MAX_TEXTURE_TYPES);
 
+   if (util_format_is_pure_uint(format))
+      stype = TGSI_RETURN_TYPE_UINT;
+   else if (util_format_is_pure_sint(format))
+      stype = TGSI_RETURN_TYPE_SINT;
+   else
+      stype = TGSI_RETURN_TYPE_FLOAT;
+
    if (src_nr_samples > 1) {
       void **shader;
 
       if (dst_nr_samples <= 1) {
          /* The destination has one sample, so we'll do color resolve. */
-         boolean is_uint, is_sint;
          unsigned index = GET_MSAA_RESOLVE_FS_IDX(src_nr_samples);
 
-         is_uint = util_format_is_pure_uint(format);
-         is_sint = util_format_is_pure_sint(format);
-
          assert(filter < 2);
 
-         if (is_uint)
+         if (stype == TGSI_RETURN_TYPE_UINT)
             shader = &ctx->fs_resolve_uint[target][index][filter];
-         else if (is_sint)
+         else if (stype == TGSI_RETURN_TYPE_SINT)
             shader = &ctx->fs_resolve_sint[target][index][filter];
          else
             shader = &ctx->fs_resolve[target][index][filter];
@@ -872,12 +901,12 @@
             if (filter == PIPE_TEX_FILTER_LINEAR) {
                *shader = util_make_fs_msaa_resolve_bilinear(pipe, tgsi_tex,
                                                    src_nr_samples,
-                                                   is_uint, is_sint);
+                                                   stype);
             }
             else {
                *shader = util_make_fs_msaa_resolve(pipe, tgsi_tex,
                                                    src_nr_samples,
-                                                   is_uint, is_sint);
+                                                   stype);
             }
          }
       }
@@ -885,31 +914,44 @@
          /* The destination has multiple samples, we'll do
           * an MSAA->MSAA copy.
           */
-         shader = &ctx->fs_texfetch_col_msaa[target];
+          if (stype == TGSI_RETURN_TYPE_UINT)
+             shader = &ctx->fs_texfetch_col_msaa_uint[target];
+          else if (stype == TGSI_RETURN_TYPE_SINT)
+             shader = &ctx->fs_texfetch_col_msaa_sint[target];
+          else
+             shader = &ctx->fs_texfetch_col_msaa[target];
 
          /* Create the fragment shader on-demand. */
          if (!*shader) {
             assert(!ctx->cached_all_shaders);
-            *shader = util_make_fs_blit_msaa_color(pipe, tgsi_tex);
+            *shader = util_make_fs_blit_msaa_color(pipe, tgsi_tex, stype);
          }
       }
 
       return *shader;
    } else {
-      void **shader = &ctx->fs_texfetch_col[target];
+      void **shader;
+
+      if (stype == TGSI_RETURN_TYPE_UINT)
+         shader = &ctx->fs_texfetch_col_uint[target];
+      else if (stype == TGSI_RETURN_TYPE_SINT)
+         shader = &ctx->fs_texfetch_col_sint[target];
+      else
+         shader = &ctx->fs_texfetch_col[target];
 
       /* Create the fragment shader on-demand. */
       if (!*shader) {
          assert(!ctx->cached_all_shaders);
          *shader = util_make_fragment_tex_shader(pipe, tgsi_tex,
-                                                 TGSI_INTERPOLATE_LINEAR);
+                                                 TGSI_INTERPOLATE_LINEAR,
+                                                 stype);
       }
 
       return *shader;
    }
 }
 
-static INLINE
+static inline
 void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
                                     enum pipe_texture_target target,
                                     unsigned nr_samples)
@@ -947,7 +989,7 @@
    }
 }
 
-static INLINE
+static inline
 void *blitter_get_fs_texfetch_depthstencil(struct blitter_context_priv *ctx,
                                            enum pipe_texture_target target,
                                            unsigned nr_samples)
@@ -985,7 +1027,7 @@
    }
 }
 
-static INLINE
+static inline
 void *blitter_get_fs_texfetch_stencil(struct blitter_context_priv *ctx,
                                       enum pipe_texture_target target,
                                       unsigned nr_samples)
@@ -1066,6 +1108,10 @@
           */
          blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_FLOAT, target,
                                      samples, samples, 0);
+         blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_UINT, target,
+                                     samples, samples, 0);
+         blitter_get_fs_texfetch_col(ctx, PIPE_FORMAT_R32_SINT, target,
+                                     samples, samples, 0);
          blitter_get_fs_texfetch_depth(ctx, target, samples);
          if (ctx->has_stencil_export) {
             blitter_get_fs_texfetch_depthstencil(ctx, target, samples);
@@ -1184,7 +1230,7 @@
 
    /* Return an existing blend state. */
    if (!clear_buffers)
-      return ctx->blend[0];
+      return ctx->blend[0][0];
 
    index = GET_CLEAR_BLEND_STATE_IDX(clear_buffers);
 
@@ -1450,7 +1496,8 @@
    /* Copy. */
    util_blitter_blit_generic(blitter, dst_view, &dstbox,
                              src_view, srcbox, src->width0, src->height0,
-                             PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL);
+                             PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
+                             FALSE);
 
    pipe_surface_reference(&dst_view, NULL);
    pipe_sampler_view_reference(&src_view, NULL);
@@ -1463,7 +1510,8 @@
                                const struct pipe_box *srcbox,
                                unsigned src_width0, unsigned src_height0,
                                unsigned mask, unsigned filter,
-                               const struct pipe_scissor_state *scissor)
+                               const struct pipe_scissor_state *scissor,
+                               boolean alpha_blend)
 {
    struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
    struct pipe_context *pipe = ctx->base.pipe;
@@ -1517,7 +1565,7 @@
    fb_state.zsbuf = NULL;
 
    if (blit_depth || blit_stencil) {
-      pipe->bind_blend_state(pipe, ctx->blend[0]);
+      pipe->bind_blend_state(pipe, ctx->blend[0][0]);
 
       if (blit_depth && blit_stencil) {
          pipe->bind_depth_stencil_alpha_state(pipe,
@@ -1540,7 +1588,9 @@
       }
 
    } else {
-      pipe->bind_blend_state(pipe, ctx->blend[mask & PIPE_MASK_RGBA]);
+      unsigned colormask = mask & PIPE_MASK_RGBA;
+
+      pipe->bind_blend_state(pipe, ctx->blend[colormask][alpha_blend]);
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
       ctx->bind_fs_state(pipe,
             blitter_get_fs_texfetch_col(ctx, src->format, src_target,
@@ -1753,7 +1803,8 @@
    util_blitter_blit_generic(blitter, dst_view, &info->dst.box,
                              src_view, &info->src.box, src->width0, src->height0,
                              info->mask, info->filter,
-                             info->scissor_enable ? &info->scissor : NULL);
+                             info->scissor_enable ? &info->scissor : NULL,
+                             info->alpha_blend);
 
    pipe_surface_reference(&dst_view, NULL);
    pipe_sampler_view_reference(&src_view, NULL);
@@ -1782,7 +1833,7 @@
    blitter_disable_render_cond(ctx);
 
    /* bind states */
-   pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA]);
+   pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA][0]);
    pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
    bind_fs_write_one_cbuf(ctx);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
@@ -1834,7 +1885,7 @@
    blitter_disable_render_cond(ctx);
 
    /* bind states */
-   pipe->bind_blend_state(pipe, ctx->blend[0]);
+   pipe->bind_blend_state(pipe, ctx->blend[0][0]);
    if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
       sr.ref_value[0] = stencil & 0xff;
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
@@ -1900,8 +1951,8 @@
    blitter_disable_render_cond(ctx);
 
    /* bind states */
-   pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA] :
-                                         ctx->blend[0]);
+   pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA][0] :
+                                         ctx->blend[0][0]);
    pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage);
    if (cbsurf)
       bind_fs_write_one_cbuf(ctx);
@@ -2154,7 +2205,7 @@
 
    /* bind states */
    pipe->bind_blend_state(pipe, custom_blend ? custom_blend
-                                             : ctx->blend[PIPE_MASK_RGBA]);
+                                             : ctx->blend[PIPE_MASK_RGBA][0]);
    pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
    bind_fs_write_one_cbuf(ctx);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_blitter.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_blitter.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_blitter.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_blitter.h	2015-09-16 14:36:09.000000000 +0000
@@ -143,7 +143,7 @@
 /**
  * Return the pipe context associated with a blitter context.
  */
-static INLINE
+static inline
 struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter)
 {
    return blitter->pipe;
@@ -246,7 +246,8 @@
                                const struct pipe_box *srcbox,
                                unsigned src_width0, unsigned src_height0,
                                unsigned mask, unsigned filter,
-                               const struct pipe_scissor_state *scissor);
+                               const struct pipe_scissor_state *scissor,
+                               boolean alpha_blend);
 
 void util_blitter_blit(struct blitter_context *blitter,
 		       const struct pipe_blit_info *info);
@@ -371,77 +372,77 @@
  *
  * States not listed here are not affected by util_blitter. */
 
-static INLINE
+static inline
 void util_blitter_save_blend(struct blitter_context *blitter,
                              void *state)
 {
    blitter->saved_blend_state = state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
                                            void *state)
 {
    blitter->saved_dsa_state = state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_vertex_elements(struct blitter_context *blitter,
                                        void *state)
 {
    blitter->saved_velem_state = state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_stencil_ref(struct blitter_context *blitter,
                                    const struct pipe_stencil_ref *state)
 {
    blitter->saved_stencil_ref = *state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_rasterizer(struct blitter_context *blitter,
                                   void *state)
 {
    blitter->saved_rs_state = state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_fragment_shader(struct blitter_context *blitter,
                                        void *fs)
 {
    blitter->saved_fs = fs;
 }
 
-static INLINE
+static inline
 void util_blitter_save_vertex_shader(struct blitter_context *blitter,
                                      void *vs)
 {
    blitter->saved_vs = vs;
 }
 
-static INLINE
+static inline
 void util_blitter_save_geometry_shader(struct blitter_context *blitter,
                                        void *gs)
 {
    blitter->saved_gs = gs;
 }
 
-static INLINE void
+static inline void
 util_blitter_save_tessctrl_shader(struct blitter_context *blitter,
                                   void *sh)
 {
    blitter->saved_tcs = sh;
 }
 
-static INLINE void
+static inline void
 util_blitter_save_tesseval_shader(struct blitter_context *blitter,
                                   void *sh)
 {
    blitter->saved_tes = sh;
 }
 
-static INLINE
+static inline
 void util_blitter_save_framebuffer(struct blitter_context *blitter,
                                    const struct pipe_framebuffer_state *state)
 {
@@ -449,21 +450,21 @@
    util_copy_framebuffer_state(&blitter->saved_fb_state, state);
 }
 
-static INLINE
+static inline
 void util_blitter_save_viewport(struct blitter_context *blitter,
                                 struct pipe_viewport_state *state)
 {
    blitter->saved_viewport = *state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_scissor(struct blitter_context *blitter,
                                struct pipe_scissor_state *state)
 {
    blitter->saved_scissor = *state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_fragment_sampler_states(
                   struct blitter_context *blitter,
                   unsigned num_sampler_states,
@@ -476,7 +477,7 @@
           num_sampler_states * sizeof(void *));
 }
 
-static INLINE void
+static inline void
 util_blitter_save_fragment_sampler_views(struct blitter_context *blitter,
                                          unsigned num_views,
                                          struct pipe_sampler_view **views)
@@ -490,7 +491,7 @@
                                   views[i]);
 }
 
-static INLINE void
+static inline void
 util_blitter_save_vertex_buffer_slot(struct blitter_context *blitter,
                                      struct pipe_vertex_buffer *vertex_buffers)
 {
@@ -500,7 +501,7 @@
           sizeof(struct pipe_vertex_buffer));
 }
 
-static INLINE void
+static inline void
 util_blitter_save_so_targets(struct blitter_context *blitter,
                              unsigned num_targets,
                              struct pipe_stream_output_target **targets)
@@ -514,7 +515,7 @@
                                targets[i]);
 }
 
-static INLINE void
+static inline void
 util_blitter_save_sample_mask(struct blitter_context *blitter,
                               unsigned sample_mask)
 {
@@ -522,7 +523,7 @@
    blitter->saved_sample_mask = sample_mask;
 }
 
-static INLINE void
+static inline void
 util_blitter_save_render_condition(struct blitter_context *blitter,
                                    struct pipe_query *query,
                                    boolean condition,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_box.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_box.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_box.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_box.h	2015-09-16 14:36:09.000000000 +0000
@@ -4,7 +4,7 @@
 #include "pipe/p_state.h"
 #include "util/u_math.h"
 
-static INLINE
+static inline
 void u_box_1d( unsigned x,
 	       unsigned w,
 	       struct pipe_box *box )
@@ -17,7 +17,7 @@
    box->depth = 1;
 }
 
-static INLINE
+static inline
 void u_box_2d( unsigned x,
 	       unsigned y,
 	       unsigned w,
@@ -32,7 +32,7 @@
    box->depth = 1;
 }
 
-static INLINE
+static inline
 void u_box_origin_2d( unsigned w,
 		      unsigned h,
 		      struct pipe_box *box )
@@ -45,7 +45,7 @@
    box->depth = 1;
 }
 
-static INLINE
+static inline
 void u_box_2d_zslice( unsigned x,
 		      unsigned y,
 		      unsigned z,
@@ -61,7 +61,7 @@
    box->depth = 1;
 }
 
-static INLINE
+static inline
 void u_box_3d( unsigned x,
 	       unsigned y,
 	       unsigned z,
@@ -86,7 +86,7 @@
  *          3 if both width and height have been reduced.
  * Aliasing permitted.
  */
-static INLINE int
+static inline int
 u_box_clip_2d(struct pipe_box *dst,
               const struct pipe_box *box, int w, int h)
 {
@@ -129,14 +129,14 @@
    return res;
 }
 
-static INLINE int64_t
+static inline int64_t
 u_box_volume_3d(const struct pipe_box *box)
 {
    return (int64_t)box->width * box->height * box->depth;
 }
 
 /* Aliasing of @dst permitted. */
-static INLINE void
+static inline void
 u_box_union_2d(struct pipe_box *dst,
                const struct pipe_box *a, const struct pipe_box *b)
 {
@@ -148,7 +148,7 @@
 }
 
 /* Aliasing of @dst permitted. */
-static INLINE void
+static inline void
 u_box_union_3d(struct pipe_box *dst,
                const struct pipe_box *a, const struct pipe_box *b)
 {
@@ -161,7 +161,7 @@
    dst->depth = MAX2(a->z + a->depth, b->z + b->depth) - dst->z;
 }
 
-static INLINE boolean
+static inline boolean
 u_box_test_intersection_2d(const struct pipe_box *a,
                            const struct pipe_box *b)
 {
@@ -185,7 +185,7 @@
    return TRUE;
 }
 
-static INLINE void
+static inline void
 u_box_minify_2d(struct pipe_box *dst,
                 const struct pipe_box *src, unsigned l)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_cache.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_cache.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_cache.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_cache.c	2015-09-16 14:36:09.000000000 +0000
@@ -155,7 +155,7 @@
    return NULL;
 }
 
-static INLINE void
+static inline void
 util_cache_entry_destroy(struct util_cache *cache,
                          struct util_cache_entry *entry)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_clear.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_clear.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_clear.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_clear.h	2015-09-16 14:36:09.000000000 +0000
@@ -37,7 +37,7 @@
  * Clear the given buffers to the specified values.
  * No masking, no scissor (clear entire buffer).
  */
-static INLINE void
+static inline void
 util_clear(struct pipe_context *pipe,
            struct pipe_framebuffer_state *framebuffer, unsigned buffers,
            const union pipe_color_union *color, double depth, unsigned stencil)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_cpu_detect.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_cpu_detect.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_cpu_detect.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_cpu_detect.c	2015-09-16 14:36:09.000000000 +0000
@@ -179,7 +179,7 @@
  * @sa cpuid.h included in gcc-4.3 onwards.
  * @sa http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
  */
-static INLINE void
+static inline void
 cpuid(uint32_t ax, uint32_t *p)
 {
 #if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86)
@@ -216,7 +216,7 @@
  * @sa cpuid.h included in gcc-4.4 onwards.
  * @sa http://msdn.microsoft.com/en-us/library/hskdteyh%28v=vs.90%29.aspx
  */
-static INLINE void
+static inline void
 cpuid_count(uint32_t ax, uint32_t cx, uint32_t *p)
 {
 #if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86)
@@ -250,7 +250,7 @@
 }
 
 
-static INLINE uint64_t xgetbv(void)
+static inline uint64_t xgetbv(void)
 {
 #if defined(PIPE_CC_GCC)
    uint32_t eax, edx;
@@ -272,7 +272,7 @@
 
 
 #if defined(PIPE_ARCH_X86)
-PIPE_ALIGN_STACK static INLINE boolean sse2_has_daz(void)
+PIPE_ALIGN_STACK static inline boolean sse2_has_daz(void)
 {
    struct {
       uint32_t pad1[7];
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug.c	2015-09-16 14:36:09.000000000 +0000
@@ -41,6 +41,7 @@
 #include "util/u_tile.h" 
 #include "util/u_prim.h"
 #include "util/u_surface.h"
+#include <inttypes.h>
 
 #include <stdio.h>
 #include <limits.h> /* CHAR_BIT */
@@ -256,12 +257,12 @@
    return FALSE;
 }
 
-unsigned long
+uint64_t
 debug_get_flags_option(const char *name, 
                        const struct debug_named_value *flags,
-                       unsigned long dfault)
+                       uint64_t dfault)
 {
-   unsigned long result;
+   uint64_t result;
    const char *str;
    const struct debug_named_value *orig = flags;
    unsigned namealign = 0;
@@ -275,8 +276,8 @@
       for (; flags->name; ++flags)
          namealign = MAX2(namealign, strlen(flags->name));
       for (flags = orig; flags->name; ++flags)
-         _debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name,
-                      (int)sizeof(unsigned long)*CHAR_BIT/4, flags->value,
+         _debug_printf("| %*s [0x%0*"PRIu64"]%s%s\n", namealign, flags->name,
+                      (int)sizeof(uint64_t)*CHAR_BIT/4, flags->value,
                       flags->desc ? " " : "", flags->desc ? flags->desc : "");
    }
    else {
@@ -290,9 +291,9 @@
 
    if (debug_get_option_should_print()) {
       if (str) {
-         debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
+         debug_printf("%s: %s = 0x%"PRIu64" (%s)\n", __FUNCTION__, name, result, str);
       } else {
-         debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
+         debug_printf("%s: %s = 0x%"PRIu64"\n", __FUNCTION__, name, result);
       }
    }
 
@@ -758,7 +759,8 @@
       DEBUG_NAMED_VALUE(PIPE_BIND_CURSOR),
       DEBUG_NAMED_VALUE(PIPE_BIND_CUSTOM),
       DEBUG_NAMED_VALUE(PIPE_BIND_GLOBAL),
-      DEBUG_NAMED_VALUE(PIPE_BIND_SHADER_RESOURCE),
+      DEBUG_NAMED_VALUE(PIPE_BIND_SHADER_BUFFER),
+      DEBUG_NAMED_VALUE(PIPE_BIND_SHADER_IMAGE),
       DEBUG_NAMED_VALUE(PIPE_BIND_COMPUTE_RESOURCE),
       DEBUG_NAMED_VALUE(PIPE_BIND_COMMAND_ARGS_BUFFER),
       DEBUG_NAMED_VALUE(PIPE_BIND_SCANOUT),
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug_describe.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug_describe.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug_describe.c	2012-01-02 08:23:27.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug_describe.c	2015-09-16 14:36:09.000000000 +0000
@@ -81,6 +81,15 @@
 }
 
 void
+debug_describe_image_view(char* buf, const struct pipe_image_view *ptr)
+{
+   char res[128];
+   debug_describe_resource(res, ptr->resource);
+   util_sprintf(buf, "pipe_image_view<%s,%s>", res,
+                util_format_short_name(ptr->format));
+}
+
+void
 debug_describe_so_target(char* buf,
                          const struct pipe_stream_output_target *ptr)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug_describe.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug_describe.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug_describe.h	2012-01-02 08:23:27.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug_describe.h	2015-09-16 14:36:09.000000000 +0000
@@ -35,12 +35,14 @@
 struct pipe_resource;
 struct pipe_surface;
 struct pipe_sampler_view;
+struct pipe_image_view;
 
 /* a 256-byte buffer is necessary and sufficient */
 void debug_describe_reference(char* buf, const struct pipe_reference*ptr);
 void debug_describe_resource(char* buf, const struct pipe_resource *ptr);
 void debug_describe_surface(char* buf, const struct pipe_surface *ptr);
 void debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr);
+void debug_describe_image_view(char* buf, const struct pipe_image_view *ptr);
 void debug_describe_so_target(char* buf,
                               const struct pipe_stream_output_target *ptr);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug.h	2015-09-16 14:36:09.000000000 +0000
@@ -58,7 +58,7 @@
 void _debug_vprintf(const char *format, va_list ap);
    
 
-static INLINE void
+static inline void
 _debug_printf(const char *format, ...)
 {
    va_list ap;
@@ -78,10 +78,10 @@
  * that is guaranteed to be printed in all platforms)
  */
 #if !defined(PIPE_OS_HAIKU)
-static INLINE void
+static inline void
 debug_printf(const char *format, ...) _util_printf_format(1,2);
 
-static INLINE void
+static inline void
 debug_printf(const char *format, ...)
 {
 #ifdef DEBUG
@@ -269,7 +269,7 @@
 struct debug_named_value
 {
    const char *name;
-   unsigned long value;
+   uint64_t value;
    const char *desc;
 };
 
@@ -377,10 +377,10 @@
 long
 debug_get_num_option(const char *name, long dfault);
 
-unsigned long
+uint64_t
 debug_get_flags_option(const char *name, 
                        const struct debug_named_value *flags,
-                       unsigned long dfault);
+                       uint64_t dfault);
 
 #define DEBUG_GET_ONCE_BOOL_OPTION(sufix, name, dfault) \
 static boolean \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug_memory.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug_memory.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug_memory.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug_memory.c	2015-09-16 14:36:09.000000000 +0000
@@ -92,7 +92,7 @@
 static unsigned long last_no = 0;
 
 
-static INLINE struct debug_memory_header *
+static inline struct debug_memory_header *
 header_from_data(void *data)
 {
    if(data)
@@ -101,7 +101,7 @@
       return NULL;
 }
 
-static INLINE void *
+static inline void *
 data_from_header(struct debug_memory_header *hdr)
 {
    if(hdr)
@@ -110,7 +110,7 @@
       return NULL;
 }
 
-static INLINE struct debug_memory_footer *
+static inline struct debug_memory_footer *
 footer_from_header(struct debug_memory_header *hdr)
 {
    if(hdr)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug_refcnt.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug_refcnt.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug_refcnt.h	2012-01-02 08:23:27.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug_refcnt.h	2015-09-16 14:36:09.000000000 +0000
@@ -42,7 +42,7 @@
 
 void debug_reference_slowpath(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change);
 
-static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+static inline void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
 {
    if (debug_refcnt_state >= 0)
       debug_reference_slowpath(p, get_desc, change);
@@ -50,7 +50,7 @@
 
 #else
 
-static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+static inline void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
 {
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug_symbol.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug_symbol.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_debug_symbol.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_debug_symbol.c	2015-09-16 14:36:09.000000000 +0000
@@ -146,7 +146,7 @@
 #undef DBGHELP_DISPATCH
 
 
-static INLINE boolean
+static inline boolean
 debug_symbol_name_dbghelp(const void *addr, char* buf, unsigned size)
 {
    DWORD64 dwAddr = (DWORD64)(uintptr_t)addr;
@@ -227,7 +227,7 @@
  *
  * To fix this, post-process the output with tools/addr2line.sh
  */
-static INLINE boolean
+static inline boolean
 debug_symbol_name_glibc(const void *addr, char* buf, unsigned size)
 {
    char** syms = backtrace_symbols((void**)&addr, 1);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_dirty_surfaces.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_dirty_surfaces.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_dirty_surfaces.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_dirty_surfaces.h	2015-09-16 14:36:09.000000000 +0000
@@ -47,13 +47,13 @@
    struct list_head dirty_list;
 };
 
-static INLINE void
+static inline void
 util_dirty_surfaces_init(struct util_dirty_surfaces *ds)
 {
    LIST_INITHEAD(&ds->dirty_list);
 }
 
-static INLINE void
+static inline void
 util_dirty_surfaces_use_for_sampling(struct pipe_context *pipe, struct util_dirty_surfaces *dss, util_dirty_surface_flush_t flush)
 {
    struct list_head *p, *next;
@@ -66,7 +66,7 @@
    }
 }
 
-static INLINE void
+static inline void
 util_dirty_surfaces_use_levels_for_sampling(struct pipe_context *pipe, struct util_dirty_surfaces *dss, unsigned first, unsigned last, util_dirty_surface_flush_t flush)
 {
    struct list_head *p, *next;
@@ -82,7 +82,7 @@
    }
 }
 
-static INLINE void
+static inline void
 util_dirty_surfaces_use_for_sampling_with(struct pipe_context *pipe, struct util_dirty_surfaces *dss, struct pipe_sampler_view *psv, struct pipe_sampler_state *pss, util_dirty_surface_flush_t flush)
 {
    if(!LIST_IS_EMPTY(&dss->dirty_list))
@@ -90,26 +90,26 @@
 						  MIN2((unsigned)ceilf(pss->max_lod) + psv->u.tex.first_level, psv->u.tex.last_level), flush);
 }
 
-static INLINE void
+static inline void
 util_dirty_surface_init(struct util_dirty_surface *ds)
 {
    LIST_INITHEAD(&ds->dirty_list);
 }
 
-static INLINE boolean
+static inline boolean
 util_dirty_surface_is_dirty(struct util_dirty_surface *ds)
 {
    return !LIST_IS_EMPTY(&ds->dirty_list);
 }
 
-static INLINE void
+static inline void
 util_dirty_surface_set_dirty(struct util_dirty_surfaces *dss, struct util_dirty_surface *ds)
 {
    if(LIST_IS_EMPTY(&ds->dirty_list))
       LIST_ADDTAIL(&ds->dirty_list, &dss->dirty_list);
 }
 
-static INLINE void
+static inline void
 util_dirty_surface_set_clean(struct util_dirty_surfaces *dss, struct util_dirty_surface *ds)
 {
    if(!LIST_IS_EMPTY(&ds->dirty_list))
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_draw.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_draw.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_draw.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_draw.h	2015-09-16 14:36:09.000000000 +0000
@@ -39,7 +39,7 @@
 #endif
 
 
-static INLINE void
+static inline void
 util_draw_init_info(struct pipe_draw_info *info)
 {
    memset(info, 0, sizeof(*info));
@@ -48,7 +48,7 @@
 }
 
 
-static INLINE void
+static inline void
 util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count)
 {
    struct pipe_draw_info info;
@@ -63,7 +63,7 @@
    pipe->draw_vbo(pipe, &info);
 }
 
-static INLINE void
+static inline void
 util_draw_elements(struct pipe_context *pipe, int index_bias,
                    uint mode, uint start, uint count)
 {
@@ -79,7 +79,7 @@
    pipe->draw_vbo(pipe, &info);
 }
 
-static INLINE void
+static inline void
 util_draw_arrays_instanced(struct pipe_context *pipe,
                            uint mode, uint start, uint count,
                            uint start_instance,
@@ -99,7 +99,7 @@
    pipe->draw_vbo(pipe, &info);
 }
 
-static INLINE void
+static inline void
 util_draw_elements_instanced(struct pipe_context *pipe,
                              int index_bias,
                              uint mode, uint start, uint count,
@@ -120,7 +120,7 @@
    pipe->draw_vbo(pipe, &info);
 }
 
-static INLINE void
+static inline void
 util_draw_range_elements(struct pipe_context *pipe,
                          int index_bias,
                          uint min_index,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_dual_blend.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_dual_blend.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_dual_blend.h	2012-08-30 05:23:50.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_dual_blend.h	2015-09-16 14:36:09.000000000 +0000
@@ -3,7 +3,7 @@
 
 #include "pipe/p_state.h"
 
-static INLINE boolean util_blend_factor_is_dual_src(int factor)
+static inline boolean util_blend_factor_is_dual_src(int factor)
 {
    return (factor == PIPE_BLENDFACTOR_SRC1_COLOR) ||
           (factor == PIPE_BLENDFACTOR_SRC1_ALPHA) ||
@@ -11,7 +11,7 @@
           (factor == PIPE_BLENDFACTOR_INV_SRC1_ALPHA);
 }
 
-static INLINE boolean util_blend_state_is_dual(const struct pipe_blend_state *blend, 
+static inline boolean util_blend_state_is_dual(const struct pipe_blend_state *blend, 
 				  int index)
 {
    if (util_blend_factor_is_dual_src(blend->rt[index].rgb_src_factor) ||
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_dump_defines.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_dump_defines.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_dump_defines.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_dump_defines.c	2015-09-16 14:36:09.000000000 +0000
@@ -392,3 +392,44 @@
 };
 
 DEFINE_UTIL_DUMP_CONTINUOUS(query_type)
+
+
+static const char *
+util_dump_prim_mode_names[] = {
+   "PIPE_PRIM_POINTS",
+   "PIPE_PRIM_LINES",
+   "PIPE_PRIM_LINE_LOOP",
+   "PIPE_PRIM_LINE_STRIP",
+   "PIPE_PRIM_TRIANGLES",
+   "PIPE_PRIM_TRIANGLE_STRIP",
+   "PIPE_PRIM_TRIANGLE_FAN",
+   "PIPE_PRIM_QUADS",
+   "PIPE_PRIM_QUAD_STRIP",
+   "PIPE_PRIM_POLYGON",
+   "PIPE_PRIM_LINES_ADJACENCY",
+   "PIPE_PRIM_LINE_STRIP_ADJACENCY",
+   "PIPE_PRIM_TRIANGLES_ADJACENCY",
+   "PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY",
+   "PIPE_PRIM_PATCHES",
+};
+
+static const char *
+util_dump_prim_mode_short_names[] = {
+   "points",
+   "lines",
+   "line_loop",
+   "line_strip",
+   "triangles",
+   "triangle_strip",
+   "triangle_fan",
+   "quads",
+   "quad_strip",
+   "polygon",
+   "lines_adjacency",
+   "line_strip_adjacency",
+   "triangles_adjacency",
+   "triangle_strip_adjacency",
+   "patches",
+};
+
+DEFINE_UTIL_DUMP_CONTINUOUS(prim_mode)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_dump.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_dump.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_dump.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_dump.h	2015-09-16 14:36:09.000000000 +0000
@@ -88,14 +88,16 @@
 const char *
 util_dump_query_type(unsigned value, boolean shortened);
 
+const char *
+util_dump_prim_mode(unsigned value, boolean shortened);
+
 
 /*
  * p_state.h, through a FILE
  */
 
 void
-util_dump_template(FILE *stream,
-                   const struct pipe_resource *templat);
+util_dump_resource(FILE *stream, const struct pipe_resource *state);
 
 void
 util_dump_rasterizer_state(FILE *stream,
@@ -154,10 +156,23 @@
                   const struct pipe_surface *state);
 
 void
+util_dump_image_view(FILE *stream, const struct pipe_image_view *state);
+
+void
+util_dump_sampler_view(FILE *stream, const struct pipe_sampler_view *state);
+
+void
 util_dump_transfer(FILE *stream,
                    const struct pipe_transfer *state);
 
 void
+util_dump_constant_buffer(FILE *stream,
+                          const struct pipe_constant_buffer *state);
+
+void
+util_dump_index_buffer(FILE *stream, const struct pipe_index_buffer *state);
+
+void
 util_dump_vertex_buffer(FILE *stream,
                         const struct pipe_vertex_buffer *state);
 
@@ -166,6 +181,10 @@
                          const struct pipe_vertex_element *state);
 
 void
+util_dump_stream_output_target(FILE *stream,
+                               const struct pipe_stream_output_target *state);
+
+void
 util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state);
 
 void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_dump_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_dump_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_dump_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_dump_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -39,7 +39,7 @@
  * Dump primitives
  */
 
-static INLINE void
+static inline void
 util_stream_writef(FILE *stream, const char *format, ...)
 {
    static char buf[1024];
@@ -247,6 +247,42 @@
    util_dump_enum(stream, util_dump_func(value, TRUE));
 }
 
+static void
+util_dump_enum_prim_mode(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_prim_mode(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_target(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_tex_target(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_filter(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_tex_filter(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_mipfilter(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_tex_mipfilter(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_wrap(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_tex_wrap(value, TRUE));
+}
+
+static void
+util_dump_enum_stencil_op(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_stencil_op(value, TRUE));
+}
+
 
 /*
  * Public functions
@@ -254,38 +290,28 @@
 
 
 void
-util_dump_template(FILE *stream, const struct pipe_resource *templat)
+util_dump_resource(FILE *stream, const struct pipe_resource *state)
 {
-   if(!templat) {
+   if (!state) {
       util_dump_null(stream);
       return;
    }
 
    util_dump_struct_begin(stream, "pipe_resource");
 
-   util_dump_member(stream, int, templat, target);
-   util_dump_member(stream, format, templat, format);
-
-   util_dump_member_begin(stream, "width");
-   util_dump_uint(stream, templat->width0);
-   util_dump_member_end(stream);
-
-   util_dump_member_begin(stream, "height");
-   util_dump_uint(stream, templat->height0);
-   util_dump_member_end(stream);
-
-   util_dump_member_begin(stream, "depth");
-   util_dump_uint(stream, templat->depth0);
-   util_dump_member_end(stream);
+   util_dump_member(stream, enum_tex_target, state, target);
+   util_dump_member(stream, format, state, format);
 
-   util_dump_member_begin(stream, "array_size");
-   util_dump_uint(stream, templat->array_size);
-   util_dump_member_end(stream);
+   util_dump_member(stream, uint, state, width0);
+   util_dump_member(stream, uint, state, height0);
+   util_dump_member(stream, uint, state, depth0);
+   util_dump_member(stream, uint, state, array_size);
 
-   util_dump_member(stream, uint, templat, last_level);
-   util_dump_member(stream, uint, templat, usage);
-   util_dump_member(stream, uint, templat, bind);
-   util_dump_member(stream, uint, templat, flags);
+   util_dump_member(stream, uint, state, last_level);
+   util_dump_member(stream, uint, state, nr_samples);
+   util_dump_member(stream, uint, state, usage);
+   util_dump_member(stream, uint, state, bind);
+   util_dump_member(stream, uint, state, flags);
 
    util_dump_struct_end(stream);
 }
@@ -319,6 +345,7 @@
    util_dump_member(stream, uint, state, sprite_coord_enable);
    util_dump_member(stream, bool, state, sprite_coord_mode);
    util_dump_member(stream, bool, state, point_quad_rasterization);
+   util_dump_member(stream, bool, state, point_tri_clip);
    util_dump_member(stream, bool, state, point_size_per_vertex);
    util_dump_member(stream, bool, state, multisample);
    util_dump_member(stream, bool, state, line_smooth);
@@ -331,6 +358,7 @@
    util_dump_member(stream, bool, state, bottom_edge_rule);
    util_dump_member(stream, bool, state, rasterizer_discard);
    util_dump_member(stream, bool, state, depth_clip);
+   util_dump_member(stream, bool, state, clip_halfz);
    util_dump_member(stream, uint, state, clip_plane_enable);
 
    util_dump_member(stream, float, state, line_width);
@@ -426,7 +454,6 @@
 void
 util_dump_shader_state(FILE *stream, const struct pipe_shader_state *state)
 {
-   char str[8192];
    unsigned i;
 
    if(!state) {
@@ -434,33 +461,35 @@
       return;
    }
 
-   tgsi_dump_str(state->tokens, 0, str, sizeof(str));
-
    util_dump_struct_begin(stream, "pipe_shader_state");
 
    util_dump_member_begin(stream, "tokens");
-   util_dump_string(stream, str);
-   util_dump_member_end(stream);
-
-   util_dump_member_begin(stream, "stream_output");
-   util_dump_struct_begin(stream, "pipe_stream_output_info");
-   util_dump_member(stream, uint, &state->stream_output, num_outputs);
-   util_dump_array(stream, uint, state->stream_output.stride,
-                   Elements(state->stream_output.stride));
-   util_dump_array_begin(stream);
-   for(i = 0; i < state->stream_output.num_outputs; ++i) {
-      util_dump_elem_begin(stream);
-      util_dump_struct_begin(stream, ""); /* anonymous */
-      util_dump_member(stream, uint, &state->stream_output.output[i], register_index);
-      util_dump_member(stream, uint, &state->stream_output.output[i], start_component);
-      util_dump_member(stream, uint, &state->stream_output.output[i], num_components);
-      util_dump_member(stream, uint, &state->stream_output.output[i], output_buffer);
+   fprintf(stream, "\"\n");
+   tgsi_dump_to_file(state->tokens, 0, stream);
+   fprintf(stream, "\"");
+   util_dump_member_end(stream);
+
+   if (state->stream_output.num_outputs) {
+      util_dump_member_begin(stream, "stream_output");
+      util_dump_struct_begin(stream, "pipe_stream_output_info");
+      util_dump_member(stream, uint, &state->stream_output, num_outputs);
+      util_dump_array(stream, uint, state->stream_output.stride,
+                      Elements(state->stream_output.stride));
+      util_dump_array_begin(stream);
+      for(i = 0; i < state->stream_output.num_outputs; ++i) {
+         util_dump_elem_begin(stream);
+         util_dump_struct_begin(stream, ""); /* anonymous */
+         util_dump_member(stream, uint, &state->stream_output.output[i], register_index);
+         util_dump_member(stream, uint, &state->stream_output.output[i], start_component);
+         util_dump_member(stream, uint, &state->stream_output.output[i], num_components);
+         util_dump_member(stream, uint, &state->stream_output.output[i], output_buffer);
+         util_dump_struct_end(stream);
+         util_dump_elem_end(stream);
+      }
+      util_dump_array_end(stream);
       util_dump_struct_end(stream);
-      util_dump_elem_end(stream);
+      util_dump_member_end(stream);
    }
-   util_dump_array_end(stream);
-   util_dump_struct_end(stream);
-   util_dump_member_end(stream);
 
    util_dump_struct_end(stream);
 }
@@ -496,9 +525,12 @@
       util_dump_member(stream, bool, &state->stencil[i], enabled);
       if (state->stencil[i].enabled) {
          util_dump_member(stream, enum_func, &state->stencil[i], func);
-         util_dump_member(stream, uint, &state->stencil[i], fail_op);
-         util_dump_member(stream, uint, &state->stencil[i], zpass_op);
-         util_dump_member(stream, uint, &state->stencil[i], zfail_op);
+         util_dump_member(stream, enum_stencil_op,
+                          &state->stencil[i], fail_op);
+         util_dump_member(stream, enum_stencil_op,
+                          &state->stencil[i], zpass_op);
+         util_dump_member(stream, enum_stencil_op,
+                          &state->stencil[i], zfail_op);
          util_dump_member(stream, uint, &state->stencil[i], valuemask);
          util_dump_member(stream, uint, &state->stencil[i], writemask);
       }
@@ -555,6 +587,8 @@
    util_dump_struct_begin(stream, "pipe_blend_state");
 
    util_dump_member(stream, bool, state, dither);
+   util_dump_member(stream, bool, state, alpha_to_coverage);
+   util_dump_member(stream, bool, state, alpha_to_one);
 
    util_dump_member(stream, bool, state, logicop_enable);
    if (state->logicop_enable) {
@@ -629,16 +663,17 @@
 
    util_dump_struct_begin(stream, "pipe_sampler_state");
 
-   util_dump_member(stream, uint, state, wrap_s);
-   util_dump_member(stream, uint, state, wrap_t);
-   util_dump_member(stream, uint, state, wrap_r);
-   util_dump_member(stream, uint, state, min_img_filter);
-   util_dump_member(stream, uint, state, min_mip_filter);
-   util_dump_member(stream, uint, state, mag_img_filter);
+   util_dump_member(stream, enum_tex_wrap, state, wrap_s);
+   util_dump_member(stream, enum_tex_wrap, state, wrap_t);
+   util_dump_member(stream, enum_tex_wrap, state, wrap_r);
+   util_dump_member(stream, enum_tex_filter, state, min_img_filter);
+   util_dump_member(stream, enum_tex_mipfilter, state, min_mip_filter);
+   util_dump_member(stream, enum_tex_filter, state, mag_img_filter);
    util_dump_member(stream, uint, state, compare_mode);
    util_dump_member(stream, enum_func, state, compare_func);
    util_dump_member(stream, bool, state, normalized_coords);
    util_dump_member(stream, uint, state, max_anisotropy);
+   util_dump_member(stream, bool, state, seamless_cube_map);
    util_dump_member(stream, float, state, lod_bias);
    util_dump_member(stream, float, state, min_lod);
    util_dump_member(stream, float, state, max_lod);
@@ -672,6 +707,67 @@
 
 
 void
+util_dump_image_view(FILE *stream, const struct pipe_image_view *state)
+{
+   if (!state) {
+      util_dump_null(stream);
+      return;
+   }
+
+   util_dump_struct_begin(stream, "pipe_image_view");
+
+   util_dump_member(stream, ptr, state, resource);
+   util_dump_member(stream, format, state, format);
+
+   if (state->resource->target == PIPE_BUFFER) {
+      util_dump_member(stream, uint, state, u.buf.first_element);
+      util_dump_member(stream, uint, state, u.buf.last_element);
+   }
+   else {
+      util_dump_member(stream, uint, state, u.tex.first_layer);
+      util_dump_member(stream, uint, state, u.tex.last_layer);
+      util_dump_member(stream, uint, state, u.tex.level);
+   }
+
+   util_dump_struct_end(stream);
+}
+
+
+void
+util_dump_sampler_view(FILE *stream, const struct pipe_sampler_view *state)
+{
+   if (!state) {
+      util_dump_null(stream);
+      return;
+   }
+
+   util_dump_struct_begin(stream, "pipe_sampler_view");
+
+   util_dump_member(stream, enum_tex_target, state, target);
+   util_dump_member(stream, format, state, format);
+   util_dump_member(stream, ptr, state, texture);
+
+   if (state->target == PIPE_BUFFER) {
+      util_dump_member(stream, uint, state, u.buf.first_element);
+      util_dump_member(stream, uint, state, u.buf.last_element);
+   }
+   else {
+      util_dump_member(stream, uint, state, u.tex.first_layer);
+      util_dump_member(stream, uint, state, u.tex.last_layer);
+      util_dump_member(stream, uint, state, u.tex.last_level);
+      util_dump_member(stream, uint, state, u.tex.last_level);
+   }
+
+   util_dump_member(stream, uint, state, swizzle_r);
+   util_dump_member(stream, uint, state, swizzle_g);
+   util_dump_member(stream, uint, state, swizzle_b);
+   util_dump_member(stream, uint, state, swizzle_a);
+
+   util_dump_struct_end(stream);
+}
+
+
+void
 util_dump_transfer(FILE *stream, const struct pipe_transfer *state)
 {
    if(!state) {
@@ -695,6 +791,45 @@
 
 
 void
+util_dump_constant_buffer(FILE *stream,
+                          const struct pipe_constant_buffer *state)
+{
+   if (!state) {
+      util_dump_null(stream);
+      return;
+   }
+
+   util_dump_struct_begin(stream, "pipe_constant_buffer");
+
+   util_dump_member(stream, ptr, state, buffer);
+   util_dump_member(stream, uint, state, buffer_offset);
+   util_dump_member(stream, uint, state, buffer_size);
+   util_dump_member(stream, ptr, state, user_buffer);
+
+   util_dump_struct_end(stream);
+}
+
+
+void
+util_dump_index_buffer(FILE *stream, const struct pipe_index_buffer *state)
+{
+   if (!state) {
+      util_dump_null(stream);
+      return;
+   }
+
+   util_dump_struct_begin(stream, "pipe_index_buffer");
+
+   util_dump_member(stream, uint, state, index_size);
+   util_dump_member(stream, uint, state, offset);
+   util_dump_member(stream, ptr, state, buffer);
+   util_dump_member(stream, ptr, state, user_buffer);
+
+   util_dump_struct_end(stream);
+}
+
+
+void
 util_dump_vertex_buffer(FILE *stream, const struct pipe_vertex_buffer *state)
 {
    if(!state) {
@@ -707,6 +842,7 @@
    util_dump_member(stream, uint, state, stride);
    util_dump_member(stream, uint, state, buffer_offset);
    util_dump_member(stream, ptr, state, buffer);
+   util_dump_member(stream, ptr, state, user_buffer);
 
    util_dump_struct_end(stream);
 }
@@ -732,6 +868,25 @@
 
 
 void
+util_dump_stream_output_target(FILE *stream,
+                               const struct pipe_stream_output_target *state)
+{
+   if (!state) {
+      util_dump_null(stream);
+      return;
+   }
+
+   util_dump_struct_begin(stream, "pipe_stream_output_target");
+
+   util_dump_member(stream, ptr, state, buffer);
+   util_dump_member(stream, uint, state, buffer_offset);
+   util_dump_member(stream, uint, state, buffer_size);
+
+   util_dump_struct_end(stream);
+}
+
+
+void
 util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state)
 {
    if(!state) {
@@ -743,7 +898,7 @@
 
    util_dump_member(stream, bool, state, indexed);
 
-   util_dump_member(stream, uint, state, mode);
+   util_dump_member(stream, enum_prim_mode, state, mode);
    util_dump_member(stream, uint, state, start);
    util_dump_member(stream, uint, state, count);
 
@@ -830,12 +985,14 @@
    util_dump_member_begin(stream, "mask");
    util_dump_string(stream, mask);
    util_dump_member_end(stream);
-   util_dump_member(stream, uint, info, filter);
+   util_dump_member(stream, enum_tex_filter, info, filter);
 
    util_dump_member(stream, bool, info, scissor_enable);
    util_dump_member_begin(stream, "scissor");
    util_dump_scissor_state(stream, &info->scissor);
    util_dump_member_end(stream);
 
+   util_dump_member(stream, bool, info, render_condition_enable);
+
    util_dump_struct_end(stream);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_dynarray.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_dynarray.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_dynarray.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_dynarray.h	2015-09-16 14:36:09.000000000 +0000
@@ -43,13 +43,13 @@
    unsigned capacity;
 };
 
-static INLINE void
+static inline void
 util_dynarray_init(struct util_dynarray *buf)
 {
    memset(buf, 0, sizeof(*buf));
 }
 
-static INLINE void
+static inline void
 util_dynarray_fini(struct util_dynarray *buf)
 {
    if(buf->data)
@@ -60,7 +60,7 @@
 }
 
 /* use util_dynarray_trim to reduce the allocated storage */
-static INLINE void *
+static inline void *
 util_dynarray_resize(struct util_dynarray *buf, unsigned newsize)
 {
    char *p;
@@ -78,13 +78,13 @@
    return p;
 }
 
-static INLINE void *
+static inline void *
 util_dynarray_grow(struct util_dynarray *buf, int diff)
 {
    return util_dynarray_resize(buf, buf->size + diff);
 }
 
-static INLINE void
+static inline void
 util_dynarray_trim(struct util_dynarray *buf)
 {
    if (buf->size != buf->capacity) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_fifo.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_fifo.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_fifo.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_fifo.h	2015-09-16 14:36:09.000000000 +0000
@@ -36,7 +36,7 @@
    size_t size;
 };
 
-static INLINE struct util_fifo *
+static inline struct util_fifo *
 u_fifo_create(size_t size)
 {
    struct util_fifo *fifo;
@@ -50,7 +50,7 @@
    return fifo;
 }
 
-static INLINE boolean
+static inline boolean
 u_fifo_add(struct util_fifo *fifo, void *ptr)
 {
    void **array = (void**)&fifo[1];
@@ -67,7 +67,7 @@
    return TRUE;
 }
 
-static INLINE boolean
+static inline boolean
 u_fifo_pop(struct util_fifo *fifo, void **ptr)
 {
    void **array = (void**)&fifo[1];
@@ -85,7 +85,7 @@
    return TRUE;
 }
 
-static INLINE void
+static inline void
 u_fifo_destroy(struct util_fifo *fifo)
 {
    FREE(fifo);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_etc.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_etc.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_etc.c	2012-08-30 05:23:50.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_etc.c	2015-09-16 14:36:09.000000000 +0000
@@ -65,11 +65,10 @@
 void
 util_format_etc1_rgb8_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j)
 {
-   const unsigned bw = 4, bh = 4;
    struct etc1_block block;
    uint8_t tmp[3];
 
-   assert(i < bw && j < bh);
+   assert(i < 4 && j < 4); /* check i, j against 4x4 block size */
 
    etc1_parse_block(&block, src);
    etc1_fetch_texel(&block, i, j, tmp);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format.h	2015-09-16 14:36:09.000000000 +0000
@@ -425,7 +425,7 @@
  * Format query functions.
  */
 
-static INLINE const char *
+static inline const char *
 util_format_name(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -438,7 +438,7 @@
    return desc->name;
 }
 
-static INLINE const char *
+static inline const char *
 util_format_short_name(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -454,7 +454,7 @@
 /**
  * Whether this format is plain, see UTIL_FORMAT_LAYOUT_PLAIN for more info.
  */
-static INLINE boolean
+static inline boolean
 util_format_is_plain(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -466,7 +466,7 @@
    return desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ? TRUE : FALSE;
 }
 
-static INLINE boolean 
+static inline boolean 
 util_format_is_compressed(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -488,7 +488,7 @@
    }
 }
 
-static INLINE boolean 
+static inline boolean 
 util_format_is_s3tc(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -501,28 +501,28 @@
    return desc->layout == UTIL_FORMAT_LAYOUT_S3TC ? TRUE : FALSE;
 }
 
-static INLINE boolean 
+static inline boolean 
 util_format_is_srgb(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
    return desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB;
 }
 
-static INLINE boolean
+static inline boolean
 util_format_has_depth(const struct util_format_description *desc)
 {
    return desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
           desc->swizzle[0] != UTIL_FORMAT_SWIZZLE_NONE;
 }
 
-static INLINE boolean
+static inline boolean
 util_format_has_stencil(const struct util_format_description *desc)
 {
    return desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
           desc->swizzle[1] != UTIL_FORMAT_SWIZZLE_NONE;
 }
 
-static INLINE boolean
+static inline boolean
 util_format_is_depth_or_stencil(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -536,7 +536,7 @@
           util_format_has_stencil(desc);
 }
 
-static INLINE boolean
+static inline boolean
 util_format_is_depth_and_stencil(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -554,7 +554,7 @@
 /**
  * Calculates the depth format type based upon the incoming format description.
  */
-static INLINE unsigned
+static inline unsigned
 util_get_depth_format_type(const struct util_format_description *desc)
 {
    unsigned depth_channel = desc->swizzle[0];
@@ -581,7 +581,7 @@
  * Return whether this is an RGBA, Z, S, or combined ZS format.
  * Useful for initializing pipe_blit_info::mask.
  */
-static INLINE unsigned
+static inline unsigned
 util_format_get_mask(enum pipe_format format)
 {
    const struct util_format_description *desc =
@@ -611,7 +611,7 @@
  *
  * That is, the channels whose values are preserved.
  */
-static INLINE unsigned
+static inline unsigned
 util_format_colormask(const struct util_format_description *desc)
 {
    unsigned colormask;
@@ -643,7 +643,7 @@
  * @param desc       a format description to check colormask with
  * @param colormask  a bit mask for channels, matches format of PIPE_MASK_RGBA
  */
-static INLINE boolean
+static inline boolean
 util_format_colormask_full(const struct util_format_description *desc, unsigned colormask)
 {
    return (~colormask & util_format_colormask(desc)) == 0;
@@ -709,7 +709,7 @@
  *
  *   PIPE_FORMAT_?8?8?8?8_UNORM
  */
-static INLINE boolean
+static inline boolean
 util_format_is_rgba8_variant(const struct util_format_description *desc)
 {
    unsigned chan;
@@ -737,7 +737,7 @@
 /**
  * Return total bits needed for the pixel format per block.
  */
-static INLINE uint
+static inline uint
 util_format_get_blocksizebits(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -753,7 +753,7 @@
 /**
  * Return bytes per block (not pixel) for the given format.
  */
-static INLINE uint
+static inline uint
 util_format_get_blocksize(enum pipe_format format)
 {
    uint bits = util_format_get_blocksizebits(format);
@@ -768,7 +768,7 @@
    return bytes;
 }
 
-static INLINE uint
+static inline uint
 util_format_get_blockwidth(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -781,7 +781,7 @@
    return desc->block.width;
 }
 
-static INLINE uint
+static inline uint
 util_format_get_blockheight(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -794,7 +794,7 @@
    return desc->block.height;
 }
 
-static INLINE unsigned
+static inline unsigned
 util_format_get_nblocksx(enum pipe_format format,
                          unsigned x)
 {
@@ -802,7 +802,7 @@
    return (x + blockwidth - 1) / blockwidth;
 }
 
-static INLINE unsigned
+static inline unsigned
 util_format_get_nblocksy(enum pipe_format format,
                          unsigned y)
 {
@@ -810,7 +810,7 @@
    return (y + blockheight - 1) / blockheight;
 }
 
-static INLINE unsigned
+static inline unsigned
 util_format_get_nblocks(enum pipe_format format,
                         unsigned width,
                         unsigned height)
@@ -818,14 +818,14 @@
    return util_format_get_nblocksx(format, width) * util_format_get_nblocksy(format, height);
 }
 
-static INLINE size_t
+static inline size_t
 util_format_get_stride(enum pipe_format format,
                        unsigned width)
 {
    return util_format_get_nblocksx(format, width) * util_format_get_blocksize(format);
 }
 
-static INLINE size_t
+static inline size_t
 util_format_get_2d_size(enum pipe_format format,
                         size_t stride,
                         unsigned height)
@@ -833,7 +833,7 @@
    return util_format_get_nblocksy(format, height) * stride;
 }
 
-static INLINE uint
+static inline uint
 util_format_get_component_bits(enum pipe_format format,
                                enum util_format_colorspace colorspace,
                                uint component)
@@ -880,7 +880,7 @@
  * Given a linear RGB colorspace format, return the corresponding SRGB
  * format, or PIPE_FORMAT_NONE if none.
  */
-static INLINE enum pipe_format
+static inline enum pipe_format
 util_format_srgb(enum pipe_format format)
 {
    if (util_format_is_srgb(format))
@@ -930,7 +930,7 @@
  * Given an sRGB format, return the corresponding linear colorspace format.
  * For non sRGB formats, return the format unchanged.
  */
-static INLINE enum pipe_format
+static inline enum pipe_format
 util_format_linear(enum pipe_format format)
 {
    switch (format) {
@@ -977,7 +977,7 @@
  * Given a depth-stencil format, return the corresponding stencil-only format.
  * For stencil-only formats, return the format unchanged.
  */
-static INLINE enum pipe_format
+static inline enum pipe_format
 util_format_stencil_only(enum pipe_format format)
 {
    switch (format) {
@@ -1006,7 +1006,7 @@
  * Converts PIPE_FORMAT_*I* to PIPE_FORMAT_*R*.
  * This is identity for non-intensity formats.
  */
-static INLINE enum pipe_format
+static inline enum pipe_format
 util_format_intensity_to_red(enum pipe_format format)
 {
    switch (format) {
@@ -1044,7 +1044,7 @@
  * Converts PIPE_FORMAT_*L* to PIPE_FORMAT_*R*.
  * This is identity for non-luminance formats.
  */
-static INLINE enum pipe_format
+static inline enum pipe_format
 util_format_luminance_to_red(enum pipe_format format)
 {
    switch (format) {
@@ -1122,7 +1122,7 @@
  * Return the number of components stored.
  * Formats with block size != 1x1 will always have 1 component (the block).
  */
-static INLINE unsigned
+static inline unsigned
 util_format_get_nr_components(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -1133,7 +1133,7 @@
  * Return the index of the first non-void channel
  * -1 if no non-void channels
  */
-static INLINE int
+static inline int
 util_format_get_first_non_void_channel(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_pack.py mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_pack.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_pack.py	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_pack.py	2015-09-16 14:36:09.000000000 +0000
@@ -616,7 +616,7 @@
 
     name = format.short_name()
 
-    print 'static INLINE void'
+    print 'static inline void'
     print 'util_format_%s_unpack_%s(%s *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)' % (name, dst_suffix, dst_native_type)
     print '{'
 
@@ -645,7 +645,7 @@
 
     name = format.short_name()
 
-    print 'static INLINE void'
+    print 'static inline void'
     print 'util_format_%s_pack_%s(uint8_t *dst_row, unsigned dst_stride, const %s *src_row, unsigned src_stride, unsigned width, unsigned height)' % (name, src_suffix, src_native_type)
     print '{'
     
@@ -674,7 +674,7 @@
 
     name = format.short_name()
 
-    print 'static INLINE void'
+    print 'static inline void'
     print 'util_format_%s_fetch_%s(%s *dst, const uint8_t *src, unsigned i, unsigned j)' % (name, dst_suffix, dst_native_type)
     print '{'
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_r11g11b10f.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_r11g11b10f.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_r11g11b10f.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_r11g11b10f.h	2015-09-16 14:36:09.000000000 +0000
@@ -45,7 +45,7 @@
 
 #define F32_INFINITY         0x7f800000
 
-static INLINE unsigned f32_to_uf11(float val)
+static inline unsigned f32_to_uf11(float val)
 {
    union {
       float f;
@@ -94,7 +94,7 @@
    return uf11;
 }
 
-static INLINE float uf11_to_f32(uint16_t val)
+static inline float uf11_to_f32(uint16_t val)
 {
    union {
       float f;
@@ -131,7 +131,7 @@
    return f32.f;
 }
 
-static INLINE unsigned f32_to_uf10(float val)
+static inline unsigned f32_to_uf10(float val)
 {
    union {
       float f;
@@ -180,7 +180,7 @@
    return uf10;
 }
 
-static INLINE float uf10_to_f32(uint16_t val)
+static inline float uf10_to_f32(uint16_t val)
 {
    union {
       float f;
@@ -217,14 +217,14 @@
    return f32.f;
 }
 
-static INLINE unsigned float3_to_r11g11b10f(const float rgb[3])
+static inline unsigned float3_to_r11g11b10f(const float rgb[3])
 {
    return ( f32_to_uf11(rgb[0]) & 0x7ff) |
           ((f32_to_uf11(rgb[1]) & 0x7ff) << 11) |
           ((f32_to_uf10(rgb[2]) & 0x3ff) << 22);
 }
 
-static INLINE void r11g11b10f_to_float3(unsigned rgb, float retval[3])
+static inline void r11g11b10f_to_float3(unsigned rgb, float retval[3])
 {
    retval[0] = uf11_to_f32( rgb        & 0x7ff);
    retval[1] = uf11_to_f32((rgb >> 11) & 0x7ff);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_rgb9e5.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_rgb9e5.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_rgb9e5.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_rgb9e5.h	2015-09-16 14:36:09.000000000 +0000
@@ -26,9 +26,10 @@
 #ifndef RGB9E5_H
 #define RGB9E5_H
 
-#include <math.h>
 #include <assert.h>
 
+#include "c99_math.h"
+
 #define RGB9E5_EXPONENT_BITS          5
 #define RGB9E5_MANTISSA_BITS          9
 #define RGB9E5_EXP_BIAS               15
@@ -73,9 +74,9 @@
    } field;
 } rgb9e5;
 
-static INLINE float rgb9e5_ClampRange(float x)
+static inline float rgb9e5_ClampRange(float x)
 {
-   if (x > 0.0) {
+   if (x > 0.0f) {
       if (x >= MAX_RGB9E5) {
          return MAX_RGB9E5;
       } else {
@@ -90,7 +91,7 @@
 /* Ok, FloorLog2 is not correct for the denorm and zero values, but we
    are going to do a max of this value with the minimum rgb9e5 exponent
    that will hide these problem cases. */
-static INLINE int rgb9e5_FloorLog2(float x)
+static inline int rgb9e5_FloorLog2(float x)
 {
    float754 f;
 
@@ -98,7 +99,7 @@
    return (f.field.biasedexponent - 127);
 }
 
-static INLINE unsigned float3_to_rgb9e5(const float rgb[3])
+static inline unsigned float3_to_rgb9e5(const float rgb[3])
 {
    rgb9e5 retval;
    float maxrgb;
@@ -115,8 +116,8 @@
    exp_shared = MAX2(-RGB9E5_EXP_BIAS-1, rgb9e5_FloorLog2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
    assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
    assert(exp_shared >= 0);
-   /* This pow function could be replaced by a table. */
-   denom = pow(2, exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS);
+   /* This exp2 function could be replaced by a table. */
+   denom = exp2(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS);
 
    maxm = (int) floor(maxrgb / denom + 0.5);
    if (maxm == MAX_RGB9E5_MANTISSA+1) {
@@ -146,7 +147,7 @@
    return retval.raw;
 }
 
-static INLINE void rgb9e5_to_float3(unsigned rgb, float retval[3])
+static inline void rgb9e5_to_float3(unsigned rgb, float retval[3])
 {
    rgb9e5 v;
    int exponent;
@@ -154,7 +155,7 @@
 
    v.raw = rgb;
    exponent = v.field.biasedexponent - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS;
-   scale = (float) pow(2, exponent);
+   scale = exp2f(exponent);
 
    retval[0] = v.field.r * scale;
    retval[1] = v.field.g * scale;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_s3tc.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_s3tc.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_s3tc.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_s3tc.c	2015-09-16 14:36:09.000000000 +0000
@@ -235,7 +235,7 @@
  * Block decompression.
  */
 
-static INLINE void
+static inline void
 util_format_dxtn_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
                                         const uint8_t *src_row, unsigned src_stride,
                                         unsigned width, unsigned height,
@@ -312,7 +312,7 @@
                                            16, FALSE);
 }
 
-static INLINE void
+static inline void
 util_format_dxtn_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride,
                                        const uint8_t *src_row, unsigned src_stride,
                                        unsigned width, unsigned height,
@@ -400,7 +400,7 @@
  * Block compression.
  */
 
-static INLINE void
+static inline void
 util_format_dxtn_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
                                   const uint8_t *src, unsigned src_stride,
                                   unsigned width, unsigned height,
@@ -478,7 +478,7 @@
                                      16, FALSE);
 }
 
-static INLINE void
+static inline void
 util_format_dxtn_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
                                  const float *src, unsigned src_stride,
                                  unsigned width, unsigned height,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_yuv.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_yuv.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_yuv.h	2012-08-30 05:23:50.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_yuv.h	2015-09-16 14:36:09.000000000 +0000
@@ -54,7 +54,7 @@
  * precision in the coefficients.
  */
 
-static INLINE void
+static inline void
 util_format_rgb_float_to_yuv(float r, float g, float b,
                              uint8_t *y, uint8_t *u, uint8_t *v)
 {
@@ -74,7 +74,7 @@
 }
 
 
-static INLINE void
+static inline void
 util_format_yuv_to_rgb_float(uint8_t y, uint8_t u, uint8_t v,
                              float *r, float *g, float *b)
 {
@@ -92,7 +92,7 @@
 }
 
 
-static INLINE void
+static inline void
 util_format_rgb_8unorm_to_yuv(uint8_t r, uint8_t g, uint8_t b,
                 	      uint8_t *y, uint8_t *u, uint8_t *v)
 {
@@ -102,7 +102,7 @@
 }
 
 
-static INLINE void
+static inline void
 util_format_yuv_to_rgb_8unorm(uint8_t y, uint8_t u, uint8_t v,
                               uint8_t *r, uint8_t *g, uint8_t *b)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_zs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_zs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_format_zs.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_format_zs.c	2015-09-16 14:36:09.000000000 +0000
@@ -35,28 +35,28 @@
  * z32_unorm conversion functions
  */
 
-static INLINE uint16_t
+static inline uint16_t
 z32_unorm_to_z16_unorm(uint32_t z)
 {
    /* z * 0xffff / 0xffffffff */
    return z >> 16;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 z16_unorm_to_z32_unorm(uint16_t z)
 {
    /* z * 0xffffffff / 0xffff */
    return (z << 16) | z;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 z32_unorm_to_z24_unorm(uint32_t z)
 {
    /* z * 0xffffff / 0xffffffff */
    return z >> 8;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 z24_unorm_to_z32_unorm(uint32_t z)
 {
    /* z * 0xffffffff / 0xffffff */
@@ -68,42 +68,42 @@
  * z32_float conversion functions
  */
 
-static INLINE uint16_t
+static inline uint16_t
 z32_float_to_z16_unorm(float z)
 {
    const float scale = 0xffff;
    return (uint16_t)(z * scale + 0.5f);
 }
 
-static INLINE float
+static inline float
 z16_unorm_to_z32_float(uint16_t z)
 {
    const float scale = 1.0 / 0xffff;
    return (float)(z * scale);
 }
 
-static INLINE uint32_t
+static inline uint32_t
 z32_float_to_z24_unorm(float z)
 {
    const double scale = 0xffffff;
    return (uint32_t)(z * scale) & 0xffffff;
 }
 
-static INLINE float
+static inline float
 z24_unorm_to_z32_float(uint32_t z)
 {
    const double scale = 1.0 / 0xffffff;
    return (float)(z * scale);
 }
 
-static INLINE uint32_t
+static inline uint32_t
 z32_float_to_z32_unorm(float z)
 {
    const double scale = 0xffffffff;
    return (uint32_t)(z * scale);
 }
 
-static INLINE float
+static inline float
 z32_unorm_to_z32_float(uint32_t z)
 {
    const double scale = 1.0 / 0xffffffff;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_half.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_half.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_half.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_half.h	2015-09-16 14:36:09.000000000 +0000
@@ -43,7 +43,7 @@
  *  https://gist.github.com/2144712
  */
 
-static INLINE uint16_t
+static inline uint16_t
 util_float_to_half(float f)
 {
    uint32_t sign_mask  = 0x80000000;
@@ -96,7 +96,7 @@
    return f16;
 }
 
-static INLINE float
+static inline float
 util_half_to_float(uint16_t f16)
 {
    union fi infnan;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_handle_table.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_handle_table.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_handle_table.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_handle_table.c	2015-09-16 14:36:09.000000000 +0000
@@ -96,7 +96,7 @@
 /**
  * Resize the table if necessary 
  */
-static INLINE int
+static inline int
 handle_table_resize(struct handle_table *ht,
                     unsigned minimum_size)
 {
@@ -126,7 +126,7 @@
 }
 
 
-static INLINE void
+static inline void
 handle_table_clear(struct handle_table *ht, 
                    unsigned index)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_hash_table.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_hash_table.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_hash_table.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_hash_table.c	2015-09-16 14:36:09.000000000 +0000
@@ -68,7 +68,7 @@
 };
 
 
-static INLINE struct util_hash_table_item *
+static inline struct util_hash_table_item *
 util_hash_table_item(struct cso_hash_iter iter)
 {
    return (struct util_hash_table_item *)cso_hash_iter_data(iter);
@@ -98,7 +98,7 @@
 }
 
 
-static INLINE struct cso_hash_iter
+static inline struct cso_hash_iter
 util_hash_table_find_iter(struct util_hash_table *ht,
                           void *key,
                           unsigned key_hash)
@@ -118,7 +118,7 @@
 }
 
 
-static INLINE struct util_hash_table_item *
+static inline struct util_hash_table_item *
 util_hash_table_find_item(struct util_hash_table *ht,
                           void *key,
                           unsigned key_hash)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_inlines.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_inlines.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_inlines.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_inlines.h	2015-09-16 14:36:09.000000000 +0000
@@ -51,13 +51,13 @@
  */
 
 
-static INLINE void
+static inline void
 pipe_reference_init(struct pipe_reference *reference, unsigned count)
 {
    p_atomic_set(&reference->count, count);
 }
 
-static INLINE boolean
+static inline boolean
 pipe_is_referenced(struct pipe_reference *reference)
 {
    return p_atomic_read(&reference->count) != 0;
@@ -69,7 +69,7 @@
  * Both 'ptr' and 'reference' may be NULL.
  * \return TRUE if the object's refcount hits zero and should be destroyed.
  */
-static INLINE boolean
+static inline boolean
 pipe_reference_described(struct pipe_reference *ptr, 
                          struct pipe_reference *reference, 
                          debug_reference_descriptor get_desc)
@@ -96,14 +96,14 @@
    return destroy;
 }
 
-static INLINE boolean
+static inline boolean
 pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
 {
    return pipe_reference_described(ptr, reference, 
                                    (debug_reference_descriptor)debug_describe_reference);
 }
 
-static INLINE void
+static inline void
 pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
 {
    struct pipe_surface *old_surf = *ptr;
@@ -120,7 +120,7 @@
  * of using a deleted context's surface_destroy() method when freeing a surface
  * that's shared by multiple contexts.
  */
-static INLINE void
+static inline void
 pipe_surface_release(struct pipe_context *pipe, struct pipe_surface **ptr)
 {
    if (pipe_reference_described(&(*ptr)->reference, NULL,
@@ -130,7 +130,7 @@
 }
 
 
-static INLINE void
+static inline void
 pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
 {
    struct pipe_resource *old_tex = *ptr;
@@ -141,7 +141,7 @@
    *ptr = tex;
 }
 
-static INLINE void
+static inline void
 pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_view *view)
 {
    struct pipe_sampler_view *old_view = *ptr;
@@ -158,7 +158,7 @@
  * work-around for fixing a dangling context pointer problem when textures
  * are shared by multiple contexts.  XXX fix this someday.
  */
-static INLINE void
+static inline void
 pipe_sampler_view_release(struct pipe_context *ctx,
                           struct pipe_sampler_view **ptr)
 {
@@ -173,8 +173,18 @@
    *ptr = NULL;
 }
 
+static inline void
+pipe_image_view_reference(struct pipe_image_view **ptr, struct pipe_image_view *view)
+{
+   struct pipe_image_view *old_view = *ptr;
+
+   if (pipe_reference_described(&(*ptr)->reference, &view->reference,
+                                (debug_reference_descriptor)debug_describe_image_view))
+      old_view->context->image_view_destroy(old_view->context, old_view);
+   *ptr = view;
+}
 
-static INLINE void
+static inline void
 pipe_so_target_reference(struct pipe_stream_output_target **ptr,
                          struct pipe_stream_output_target *target)
 {
@@ -186,7 +196,7 @@
    *ptr = target;
 }
 
-static INLINE void
+static inline void
 pipe_surface_reset(struct pipe_context *ctx, struct pipe_surface* ps,
                    struct pipe_resource *pt, unsigned level, unsigned layer)
 {
@@ -199,7 +209,7 @@
    ps->context = ctx;
 }
 
-static INLINE void
+static inline void
 pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps,
                   struct pipe_resource *pt, unsigned level, unsigned layer)
 {
@@ -209,7 +219,7 @@
 }
 
 /* Return true if the surfaces are equal. */
-static INLINE boolean
+static inline boolean
 pipe_surface_equal(struct pipe_surface *s1, struct pipe_surface *s2)
 {
    return s1->texture == s2->texture &&
@@ -233,7 +243,7 @@
  * \param bind  bitmask of PIPE_BIND_x flags
  * \param usage  bitmask of PIPE_USAGE_x flags
  */
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
 pipe_buffer_create( struct pipe_screen *screen,
 		    unsigned bind,
 		    unsigned usage,
@@ -261,7 +271,7 @@
  * \param access  bitmask of PIPE_TRANSFER_x flags
  * \param transfer  returns a transfer object
  */
-static INLINE void *
+static inline void *
 pipe_buffer_map_range(struct pipe_context *pipe,
 		      struct pipe_resource *buffer,
 		      unsigned offset,
@@ -292,7 +302,7 @@
  * \param access  bitmask of PIPE_TRANSFER_x flags
  * \param transfer  returns a transfer object
  */
-static INLINE void *
+static inline void *
 pipe_buffer_map(struct pipe_context *pipe,
                 struct pipe_resource *buffer,
                 unsigned access,
@@ -302,14 +312,14 @@
 }
 
 
-static INLINE void
+static inline void
 pipe_buffer_unmap(struct pipe_context *pipe,
                   struct pipe_transfer *transfer)
 {
    pipe->transfer_unmap(pipe, transfer);
 }
 
-static INLINE void
+static inline void
 pipe_buffer_flush_mapped_range(struct pipe_context *pipe,
                                struct pipe_transfer *transfer,
                                unsigned offset,
@@ -333,7 +343,7 @@
    pipe->transfer_flush_region(pipe, transfer, &box);
 }
 
-static INLINE void
+static inline void
 pipe_buffer_write(struct pipe_context *pipe,
                   struct pipe_resource *buf,
                   unsigned offset,
@@ -367,7 +377,7 @@
  * We can avoid GPU/CPU synchronization when writing range that has never
  * been written before.
  */
-static INLINE void
+static inline void
 pipe_buffer_write_nooverlap(struct pipe_context *pipe,
                             struct pipe_resource *buf,
                             unsigned offset, unsigned size,
@@ -393,7 +403,7 @@
  * \param bind  bitmask of PIPE_BIND_x flags
  * \param usage  bitmask of PIPE_USAGE_x flags
  */
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
 pipe_buffer_create_with_data(struct pipe_context *pipe,
                              unsigned bind,
                              unsigned usage,
@@ -406,7 +416,7 @@
    return res;
 }
 
-static INLINE void
+static inline void
 pipe_buffer_read(struct pipe_context *pipe,
                  struct pipe_resource *buf,
                  unsigned offset,
@@ -433,7 +443,7 @@
  * Map a resource for reading/writing.
  * \param access  bitmask of PIPE_TRANSFER_x flags
  */
-static INLINE void *
+static inline void *
 pipe_transfer_map(struct pipe_context *context,
                   struct pipe_resource *resource,
                   unsigned level, unsigned layer,
@@ -456,7 +466,7 @@
  * Map a 3D (texture) resource for reading/writing.
  * \param access  bitmask of PIPE_TRANSFER_x flags
  */
-static INLINE void *
+static inline void *
 pipe_transfer_map_3d(struct pipe_context *context,
                      struct pipe_resource *resource,
                      unsigned level,
@@ -474,14 +484,14 @@
                                 &box, transfer);
 }
 
-static INLINE void
+static inline void
 pipe_transfer_unmap( struct pipe_context *context,
                      struct pipe_transfer *transfer )
 {
    context->transfer_unmap( context, transfer );
 }
 
-static INLINE void
+static inline void
 pipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
                          struct pipe_resource *buf)
 {
@@ -502,7 +512,7 @@
  * Get the polygon offset enable/disable flag for the given polygon fill mode.
  * \param fill_mode  one of PIPE_POLYGON_MODE_POINT/LINE/FILL
  */
-static INLINE boolean
+static inline boolean
 util_get_offset(const struct pipe_rasterizer_state *templ,
                 unsigned fill_mode)
 {
@@ -519,7 +529,7 @@
    }
 }
 
-static INLINE float
+static inline float
 util_get_min_point_size(const struct pipe_rasterizer_state *state)
 {
    /* The point size should be clamped to this value at the rasterizer stage.
@@ -529,7 +539,7 @@
           !state->multisample ? 1.0f : 0.0f;
 }
 
-static INLINE void
+static inline void
 util_query_clear_result(union pipe_query_result *result, unsigned type)
 {
    switch (type) {
@@ -560,7 +570,7 @@
 }
 
 /** Convert PIPE_TEXTURE_x to TGSI_TEXTURE_x */
-static INLINE unsigned
+static inline unsigned
 util_pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target,
                           unsigned nr_samples)
 {
@@ -605,7 +615,7 @@
 }
 
 
-static INLINE void
+static inline void
 util_copy_constant_buffer(struct pipe_constant_buffer *dst,
                           const struct pipe_constant_buffer *src)
 {
@@ -623,7 +633,7 @@
    }
 }
 
-static INLINE unsigned
+static inline unsigned
 util_max_layer(const struct pipe_resource *r, unsigned level)
 {
    switch (r->target) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_keymap.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_keymap.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_keymap.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_keymap.c	2015-09-16 14:36:09.000000000 +0000
@@ -71,7 +71,7 @@
 }
 
 
-static INLINE struct keymap_item *
+static inline struct keymap_item *
 hash_table_item(struct cso_hash_iter iter)
 {
    return (struct keymap_item *) cso_hash_iter_data(iter);
@@ -143,7 +143,7 @@
 }
 
 
-static INLINE struct cso_hash_iter
+static inline struct cso_hash_iter
 hash_table_find_iter(const struct keymap *map, const void *key,
                      unsigned key_hash)
 {
@@ -162,7 +162,7 @@
 }
 
 
-static INLINE struct keymap_item *
+static inline struct keymap_item *
 hash_table_find_item(const struct keymap *map, const void *key,
                      unsigned key_hash)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_linear.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_linear.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_linear.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_linear.h	2015-09-16 14:36:09.000000000 +0000
@@ -89,7 +89,7 @@
 			   unsigned tile_width, unsigned tile_height,
 			   unsigned tiles_x, unsigned tiles_y);
 
-static INLINE boolean pipe_linear_check_tile(const struct pipe_tile_info *t)
+static inline boolean pipe_linear_check_tile(const struct pipe_tile_info *t)
 {
    if (t->tile.size != t->block.size * t->cols * t->rows)
       return FALSE;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_math.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_math.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_math.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_math.c	2015-09-16 14:36:09.000000000 +0000
@@ -48,7 +48,7 @@
 {
    int i;
    for (i = 0; i < POW2_TABLE_SIZE; i++)
-      pow2_table[i] = (float) pow(2.0, (i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE);
+      pow2_table[i] = exp2f((i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE);
 }
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_math.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_math.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_math.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_math.h	2015-09-16 14:36:09.000000000 +0000
@@ -92,7 +92,7 @@
 /**
  * Extract the IEEE float32 exponent.
  */
-static INLINE signed
+static inline signed
 util_get_float32_exponent(float x)
 {
    union fi f;
@@ -112,7 +112,7 @@
  * Compute exp2(ipart) with i << ipart
  * Compute exp2(fpart) with lookup table.
  */
-static INLINE float
+static inline float
 util_fast_exp2(float x)
 {
    int32_t ipart;
@@ -143,7 +143,7 @@
 /**
  * Fast approximation to exp(x).
  */
-static INLINE float
+static inline float
 util_fast_exp(float x)
 {
    const float k = 1.44269f; /* = log2(e) */
@@ -160,7 +160,7 @@
 /**
  * Fast approximation to log2(x).
  */
-static INLINE float
+static inline float
 util_fast_log2(float x)
 {
    union fi num;
@@ -176,7 +176,7 @@
 /**
  * Fast approximation to x^y.
  */
-static INLINE float
+static inline float
 util_fast_pow(float x, float y)
 {
    return util_fast_exp2(util_fast_log2(x) * y);
@@ -184,7 +184,7 @@
 
 /* Note that this counts zero as a power of two.
  */
-static INLINE boolean
+static inline boolean
 util_is_power_of_two( unsigned v )
 {
    return (v & (v-1)) == 0;
@@ -194,7 +194,7 @@
 /**
  * Floor(x), returned as int.
  */
-static INLINE int
+static inline int
 util_ifloor(float f)
 {
    int ai, bi;
@@ -211,7 +211,7 @@
 /**
  * Round float to nearest int.
  */
-static INLINE int
+static inline int
 util_iround(float f)
 {
 #if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) 
@@ -237,10 +237,10 @@
 /**
  * Approximate floating point comparison
  */
-static INLINE boolean
+static inline boolean
 util_is_approx(float a, float b, float tol)
 {
-   return fabs(b - a) <= tol;
+   return fabsf(b - a) <= tol;
 }
 
 
@@ -256,7 +256,7 @@
 /**
  * Single-float
  */
-static INLINE boolean
+static inline boolean
 util_is_inf_or_nan(float x)
 {
    union fi tmp;
@@ -265,7 +265,7 @@
 }
 
 
-static INLINE boolean
+static inline boolean
 util_is_nan(float x)
 {
    union fi tmp;
@@ -274,7 +274,7 @@
 }
 
 
-static INLINE int
+static inline int
 util_inf_sign(float x)
 {
    union fi tmp;
@@ -290,7 +290,7 @@
 /**
  * Double-float
  */
-static INLINE boolean
+static inline boolean
 util_is_double_inf_or_nan(double x)
 {
    union di tmp;
@@ -299,7 +299,7 @@
 }
 
 
-static INLINE boolean
+static inline boolean
 util_is_double_nan(double x)
 {
    union di tmp;
@@ -308,7 +308,7 @@
 }
 
 
-static INLINE int
+static inline int
 util_double_inf_sign(double x)
 {
    union di tmp;
@@ -324,21 +324,21 @@
 /**
  * Half-float
  */
-static INLINE boolean
+static inline boolean
 util_is_half_inf_or_nan(int16_t x)
 {
    return (x & 0x7c00) == 0x7c00;
 }
 
 
-static INLINE boolean
+static inline boolean
 util_is_half_nan(int16_t x)
 {
    return (x & 0x7fff) > 0x7c00;
 }
 
 
-static INLINE int
+static inline int
 util_half_inf_sign(int16_t x)
 {
    if ((x & 0x7fff) != 0x7c00) {
@@ -359,7 +359,7 @@
 #if defined(_MSC_VER) && (_M_IX86 || _M_AMD64 || _M_IA64)
 unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask);
 #pragma intrinsic(_BitScanForward)
-static INLINE
+static inline
 unsigned long ffs( unsigned long u )
 {
    unsigned long i;
@@ -369,7 +369,7 @@
       return 0;
 }
 #elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
-static INLINE
+static inline
 unsigned ffs( unsigned u )
 {
    unsigned i;
@@ -409,7 +409,7 @@
  * Find last bit set in a word.  The least significant bit is 1.
  * Return 0 if no bits are set.
  */
-static INLINE unsigned
+static inline unsigned
 util_last_bit(unsigned u)
 {
 #if defined(HAVE___BUILTIN_CLZ)
@@ -425,11 +425,30 @@
 }
 
 /**
+ * Find last bit set in a word.  The least significant bit is 1.
+ * Return 0 if no bits are set.
+ */
+static inline unsigned
+util_last_bit64(uint64_t u)
+{
+#if defined(HAVE___BUILTIN_CLZLL)
+   return u == 0 ? 0 : 64 - __builtin_clzll(u);
+#else
+   unsigned r = 0;
+   while (u) {
+       r++;
+       u >>= 1;
+   }
+   return r;
+#endif
+}
+
+/**
  * Find last bit in a word that does not match the sign bit. The least
  * significant bit is 1.
  * Return 0 if no bits are set.
  */
-static INLINE unsigned
+static inline unsigned
 util_last_bit_signed(int i)
 {
    if (i >= 0)
@@ -446,7 +465,7 @@
  * }
  *
  */
-static INLINE int
+static inline int
 u_bit_scan(unsigned *mask)
 {
    int i = ffs(*mask) - 1;
@@ -455,7 +474,7 @@
 }
 
 #ifndef _MSC_VER
-static INLINE int
+static inline int
 u_bit_scan64(uint64_t *mask)
 {
    int i = ffsll(*mask) - 1;
@@ -467,7 +486,7 @@
 /**
  * Return float bits.
  */
-static INLINE unsigned
+static inline unsigned
 fui( float f )
 {
    union fi fi;
@@ -475,7 +494,7 @@
    return fi.ui;
 }
 
-static INLINE float
+static inline float
 uif(uint32_t ui)
 {
    union fi fi;
@@ -488,7 +507,7 @@
  * Convert ubyte to float in [0, 1].
  * XXX a 256-entry lookup table would be slightly faster.
  */
-static INLINE float
+static inline float
 ubyte_to_float(ubyte ub)
 {
    return (float) ub * (1.0f / 255.0f);
@@ -498,7 +517,7 @@
 /**
  * Convert float in [0,1] to ubyte in [0,255] with clamping.
  */
-static INLINE ubyte
+static inline ubyte
 float_to_ubyte(float f)
 {
    union fi tmp;
@@ -516,13 +535,13 @@
    }
 }
 
-static INLINE float
+static inline float
 byte_to_float_tex(int8_t b)
 {
    return (b == -128) ? -1.0F : b * 1.0F / 127.0F;
 }
 
-static INLINE int8_t
+static inline int8_t
 float_to_byte_tex(float f)
 {
    return (int8_t) (127.0F * f);
@@ -531,7 +550,7 @@
 /**
  * Calc log base 2
  */
-static INLINE unsigned
+static inline unsigned
 util_logbase2(unsigned n)
 {
 #if defined(HAVE___BUILTIN_CLZ)
@@ -551,7 +570,7 @@
 /**
  * Returns the smallest power of two >= x
  */
-static INLINE unsigned
+static inline unsigned
 util_next_power_of_two(unsigned x)
 {
 #if defined(HAVE___BUILTIN_CLZ)
@@ -583,7 +602,7 @@
 /**
  * Return number of bits set in n.
  */
-static INLINE unsigned
+static inline unsigned
 util_bitcount(unsigned n)
 {
 #if defined(HAVE___BUILTIN_POPCOUNT)
@@ -604,7 +623,7 @@
 }
 
 
-static INLINE unsigned
+static inline unsigned
 util_bitcount64(uint64_t n)
 {
 #ifdef HAVE___BUILTIN_POPCOUNTLL
@@ -620,7 +639,7 @@
  * Algorithm taken from:
  * http://stackoverflow.com/questions/9144800/c-reverse-bits-in-unsigned-integer
  */
-static INLINE unsigned
+static inline unsigned
 util_bitreverse(unsigned n)
 {
     n = ((n >> 1) & 0x55555555u) | ((n & 0x55555555u) << 1);
@@ -652,7 +671,7 @@
 /**
  * Reverse byte order of a 32 bit word.
  */
-static INLINE uint32_t
+static inline uint32_t
 util_bswap32(uint32_t n)
 {
 #if defined(HAVE___BUILTIN_BSWAP32)
@@ -668,7 +687,7 @@
 /**
  * Reverse byte order of a 64bit word.
  */
-static INLINE uint64_t
+static inline uint64_t
 util_bswap64(uint64_t n)
 {
 #if defined(HAVE___BUILTIN_BSWAP64)
@@ -683,14 +702,14 @@
 /**
  * Reverse byte order of a 16 bit word.
  */
-static INLINE uint16_t
+static inline uint16_t
 util_bswap16(uint16_t n)
 {
    return (n >> 8) |
           (n << 8);
 }
 
-static INLINE void*
+static inline void*
 util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t n)
 {
 #ifdef PIPE_ARCH_BIG_ENDIAN
@@ -727,7 +746,7 @@
 /**
  * Align a value, only works pot alignemnts.
  */
-static INLINE int
+static inline int
 align(int value, int alignment)
 {
    return (value + alignment - 1) & ~(alignment - 1);
@@ -736,7 +755,7 @@
 /**
  * Works like align but on npot alignments.
  */
-static INLINE size_t
+static inline size_t
 util_align_npot(size_t value, size_t alignment)
 {
    if (value % alignment)
@@ -744,7 +763,7 @@
    return value;
 }
 
-static INLINE unsigned
+static inline unsigned
 u_minify(unsigned value, unsigned levels)
 {
     return MAX2(1, value >> levels);
@@ -777,13 +796,13 @@
 #endif
 
 
-static INLINE uint32_t
+static inline uint32_t
 util_unsigned_fixed(float value, unsigned frac_bits)
 {
    return value < 0 ? 0 : (uint32_t)(value * (1<<frac_bits));
 }
 
-static INLINE int32_t
+static inline int32_t
 util_signed_fixed(float value, unsigned frac_bits)
 {
    return (int32_t)(value * (1<<frac_bits));
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_memory.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_memory.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_memory.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_memory.h	2015-09-16 14:36:09.000000000 +0000
@@ -67,7 +67,7 @@
 /**
  * Duplicate a block of memory.
  */
-static INLINE void *
+static inline void *
 mem_dup(const void *src, uint size)
 {
    void *dup = MALLOC(size);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_mm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_mm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_mm.c	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_mm.c	2015-09-16 14:36:09.000000000 +0000
@@ -224,7 +224,7 @@
 }
 
 
-static INLINE int
+static inline int
 Join2Blocks(struct mem_block *p)
 {
    /* XXX there should be some assertions here */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_pack_color.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_pack_color.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_pack_color.h	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_pack_color.h	2015-09-16 14:36:09.000000000 +0000
@@ -60,7 +60,7 @@
 /**
  * Pack ubyte R,G,B,A into dest pixel.
  */
-static INLINE void
+static inline void
 util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a,
                    enum pipe_format format, union util_color *uc)
 {
@@ -161,7 +161,7 @@
 /**
  * Unpack RGBA from a packed pixel, returning values as ubytes in [0,255].
  */
-static INLINE void
+static inline void
 util_unpack_color_ub(enum pipe_format format, union util_color *uc,
                      ubyte *r, ubyte *g, ubyte *b, ubyte *a)
 {
@@ -333,7 +333,7 @@
  * This will not work (and might not really be useful with float input)
  * for pure integer formats (which lack the pack_rgba_float function).
  */
-static INLINE void
+static inline void
 util_pack_color(const float rgba[4], enum pipe_format format, union util_color *uc)
 {
    ubyte r = 0;
@@ -437,7 +437,7 @@
 /* Integer versions of util_pack_z and util_pack_z_stencil - useful for
  * constructing clear masks.
  */
-static INLINE uint32_t
+static inline uint32_t
 util_pack_mask_z(enum pipe_format format, uint32_t z)
 {
    switch (format) {
@@ -462,7 +462,7 @@
 }
 
 
-static INLINE uint64_t
+static inline uint64_t
 util_pack64_mask_z(enum pipe_format format, uint32_t z)
 {
    switch (format) {
@@ -474,7 +474,7 @@
 }
 
 
-static INLINE uint32_t
+static inline uint32_t
 util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s)
 {
    uint32_t packed = util_pack_mask_z(format, z);
@@ -497,7 +497,7 @@
 }
 
 
-static INLINE uint64_t
+static inline uint64_t
 util_pack64_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s)
 {
    uint64_t packed;
@@ -516,7 +516,7 @@
 /**
  * Note: it's assumed that z is in [0,1]
  */
-static INLINE uint32_t
+static inline uint32_t
 util_pack_z(enum pipe_format format, double z)
 {
    union fi fui;
@@ -558,7 +558,7 @@
 }
 
 
-static INLINE uint64_t
+static inline uint64_t
 util_pack64_z(enum pipe_format format, double z)
 {
    union fi fui;
@@ -580,7 +580,7 @@
  * Pack Z and/or stencil values into a 32-bit value described by format.
  * Note: it's assumed that z is in [0,1] and s in [0,255]
  */
-static INLINE uint32_t
+static inline uint32_t
 util_pack_z_stencil(enum pipe_format format, double z, uint8_t s)
 {
    uint32_t packed = util_pack_z(format, z);
@@ -603,7 +603,7 @@
 }
 
 
-static INLINE uint64_t
+static inline uint64_t
 util_pack64_z_stencil(enum pipe_format format, double z, uint8_t s)
 {
    uint64_t packed;
@@ -624,7 +624,7 @@
 /**
  * Pack 4 ubytes into a 4-byte word
  */
-static INLINE unsigned
+static inline unsigned
 pack_ub4(ubyte b0, ubyte b1, ubyte b2, ubyte b3)
 {
    return ((((unsigned int)b0) << 0) |
@@ -637,7 +637,7 @@
 /**
  * Pack/convert 4 floats into one 4-byte word.
  */
-static INLINE unsigned
+static inline unsigned
 pack_ui32_float4(float a, float b, float c, float d)
 {
    return pack_ub4( float_to_ubyte(a),
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_pointer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_pointer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_pointer.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_pointer.h	2015-09-16 14:36:09.000000000 +0000
@@ -34,7 +34,7 @@
 extern "C" {
 #endif
 
-static INLINE intptr_t
+static inline intptr_t
 pointer_to_intptr( const void *p )
 {
    union {
@@ -45,7 +45,7 @@
    return pi.i;
 }
 
-static INLINE void *
+static inline void *
 intptr_to_pointer( intptr_t i )
 {
    union {
@@ -56,7 +56,7 @@
    return pi.p;
 }
 
-static INLINE uintptr_t
+static inline uintptr_t
 pointer_to_uintptr( const void *ptr )
 {
    union {
@@ -67,7 +67,7 @@
    return pu.u;
 }
 
-static INLINE void *
+static inline void *
 uintptr_to_pointer( uintptr_t u )
 {
    union {
@@ -81,7 +81,7 @@
 /**
  * Return a pointer aligned to next multiple of N bytes.
  */
-static INLINE void *
+static inline void *
 align_pointer( const void *unaligned, uintptr_t alignment )
 {
    uintptr_t aligned = (pointer_to_uintptr( unaligned ) + alignment - 1) & ~(alignment - 1);
@@ -92,7 +92,7 @@
 /**
  * Return a pointer aligned to next multiple of 16 bytes.
  */
-static INLINE void *
+static inline void *
 align16( void *unaligned )
 {
    return align_pointer( unaligned, 16 );
@@ -100,7 +100,7 @@
 
 typedef void (*func_pointer)(void);
 
-static INLINE func_pointer
+static inline func_pointer
 pointer_to_func( void *p )
 {
    union {
@@ -111,7 +111,7 @@
    return pf.f;
 }
 
-static INLINE void *
+static inline void *
 func_to_pointer( func_pointer f )
 {
    union {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_prim.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_prim.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_prim.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_prim.h	2015-09-16 14:36:09.000000000 +0000
@@ -46,7 +46,7 @@
  * Decompose a primitive that is a loop, a strip, or a fan.  Return the
  * original primitive if it is already decomposed.
  */
-static INLINE unsigned
+static inline unsigned
 u_decomposed_prim(unsigned prim)
 {
    switch (prim) {
@@ -71,7 +71,7 @@
  * Reduce a primitive to one of PIPE_PRIM_POINTS, PIPE_PRIM_LINES, and
  * PIPE_PRIM_TRIANGLES.
  */
-static INLINE unsigned
+static inline unsigned
 u_reduced_prim(unsigned prim)
 {
    switch (prim) {
@@ -91,7 +91,7 @@
 /**
  * Re-assemble a primitive to remove its adjacency.
  */
-static INLINE unsigned
+static inline unsigned
 u_assembled_prim(unsigned prim)
 {
    switch (prim) {
@@ -113,7 +113,7 @@
  * source file, it will increase the size of the binary slightly more than
  * expected because of the use of a table.
  */
-static INLINE const struct u_prim_vertex_count *
+static inline const struct u_prim_vertex_count *
 u_prim_vertex_count(unsigned prim)
 {
    static const struct u_prim_vertex_count prim_table[PIPE_PRIM_MAX] = {
@@ -140,7 +140,7 @@
  * Given a vertex count, return the number of primitives.
  * For polygons, return the number of triangles.
  */
-static INLINE unsigned
+static inline unsigned
 u_prims_for_vertices(unsigned prim, unsigned num)
 {
    const struct u_prim_vertex_count *info = u_prim_vertex_count(prim);
@@ -151,7 +151,7 @@
    return 1 + ((num - info->min) / info->incr);
 }
 
-static INLINE boolean u_validate_pipe_prim( unsigned pipe_prim, unsigned nr )
+static inline boolean u_validate_pipe_prim( unsigned pipe_prim, unsigned nr )
 {
    const struct u_prim_vertex_count *count = u_prim_vertex_count(pipe_prim);
 
@@ -159,7 +159,7 @@
 }
 
 
-static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr )
+static inline boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr )
 {
    const struct u_prim_vertex_count *count = u_prim_vertex_count(pipe_prim);
 
@@ -174,7 +174,7 @@
    }
 }
 
-static INLINE unsigned
+static inline unsigned
 u_vertices_per_prim(int primitive)
 {
    switch(primitive) {
@@ -216,7 +216,7 @@
  * statistics depend on knowing the exact number of decomposed
  * primitives for a set of vertices.
  */
-static INLINE unsigned
+static inline unsigned
 u_decomposed_prims_for_vertices(int primitive, int vertices)
 {
    switch (primitive) {
@@ -263,7 +263,7 @@
  * count.  Each quad is treated as two triangles.  Polygons are treated as
  * triangle fans.
  */
-static INLINE unsigned
+static inline unsigned
 u_reduced_prims_for_vertices(int primitive, int vertices)
 {
    switch (primitive) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_pstipple.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_pstipple.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_pstipple.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_pstipple.c	2015-09-16 14:36:09.000000000 +0000
@@ -55,7 +55,7 @@
 #include "tgsi/tgsi_scan.h"
 
 /** Approx number of new tokens for instructions in pstip_transform_inst() */
-#define NUM_NEW_TOKENS 50
+#define NUM_NEW_TOKENS 53
 
 
 static void
@@ -262,6 +262,7 @@
       (struct pstip_transform_context *) ctx;
    int wincoordInput;
    int texTemp;
+   int sampIdx;
 
    /* find free texture sampler */
    pctx->freeSampler = free_bit(pctx->samplersUsed);
@@ -280,9 +281,21 @@
                                 TGSI_INTERPOLATE_LINEAR);
    }
 
+   sampIdx = pctx->hasFixedUnit ? pctx->fixedUnit : pctx->freeSampler;
+
    /* declare new sampler */
-   tgsi_transform_sampler_decl(ctx,
-         pctx->hasFixedUnit ? pctx->fixedUnit : pctx->freeSampler);
+   tgsi_transform_sampler_decl(ctx, sampIdx);
+
+   /* if the src shader has SVIEW decl's for each SAMP decl, we
+    * need to continue the trend and ensure there is a matching
+    * SVIEW for the new SAMP we just created
+    */
+   if (pctx->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
+      tgsi_transform_sampler_view_decl(ctx,
+                                       sampIdx,
+                                       TGSI_TEXTURE_2D,
+                                       TGSI_RETURN_TYPE_FLOAT);
+   }
 
    /* Declare temp[0] reg if not already declared.
     * We can always use temp[0] since this code is before
@@ -321,8 +334,7 @@
    tgsi_transform_tex_2d_inst(ctx,
                               TGSI_FILE_TEMPORARY, texTemp,
                               TGSI_FILE_TEMPORARY, texTemp,
-                              pctx->hasFixedUnit ? pctx->fixedUnit
-                                                 : pctx->freeSampler);
+                              sampIdx);
 
    /* KILL_IF -texTemp;   # if -texTemp < 0, kill fragment */
    tgsi_transform_kill_inst(ctx,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_range.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_range.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_range.h	2014-04-29 19:36:58.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_range.h	2015-09-16 14:36:09.000000000 +0000
@@ -47,7 +47,7 @@
 };
 
 
-static INLINE void
+static inline void
 util_range_set_empty(struct util_range *range)
 {
    range->start = ~0;
@@ -55,7 +55,7 @@
 }
 
 /* This is like a union of two sets. */
-static INLINE void
+static inline void
 util_range_add(struct util_range *range, unsigned start, unsigned end)
 {
    if (start < range->start || end > range->end) {
@@ -66,7 +66,7 @@
    }
 }
 
-static INLINE boolean
+static inline boolean
 util_ranges_intersect(struct util_range *range, unsigned start, unsigned end)
 {
    return MAX2(start, range->start) < MIN2(end, range->end);
@@ -75,14 +75,14 @@
 
 /* Init/deinit */
 
-static INLINE void
+static inline void
 util_range_init(struct util_range *range)
 {
    pipe_mutex_init(range->write_mutex);
    util_range_set_empty(range);
 }
 
-static INLINE void
+static inline void
 util_range_destroy(struct util_range *range)
 {
    pipe_mutex_destroy(range->write_mutex);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_rect.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_rect.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_rect.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_rect.h	2015-09-16 14:36:09.000000000 +0000
@@ -43,7 +43,7 @@
 
 /* Do two rectangles intersect?
  */
-static INLINE boolean
+static inline boolean
 u_rect_test_intersection(const struct u_rect *a,
                          const struct u_rect *b)
 {
@@ -55,7 +55,7 @@
 
 /* Find the intersection of two rectangles known to intersect.
  */
-static INLINE void
+static inline void
 u_rect_find_intersection(const struct u_rect *a,
                          struct u_rect *b)
 {
@@ -68,13 +68,13 @@
 }
 
 
-static INLINE int
+static inline int
 u_rect_area(const struct u_rect *r)
 {
    return (r->x1 - r->x0) * (r->y1 - r->y0);
 }
 
-static INLINE void
+static inline void
 u_rect_possible_intersection(const struct u_rect *a,
                              struct u_rect *b)
 {
@@ -88,7 +88,7 @@
 
 /* Set @d to a rectangle that covers both @a and @b.
  */
-static INLINE void
+static inline void
 u_rect_union(struct u_rect *d, const struct u_rect *a, const struct u_rect *b)
 {
    d->x0 = MIN2(a->x0, b->x0);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_resource.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_resource.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_resource.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_resource.h	2015-09-16 14:36:09.000000000 +0000
@@ -36,7 +36,7 @@
  *
  * Note that this function returns true for single-layered array textures.
  */
-static INLINE boolean
+static inline boolean
 util_resource_is_array_texture(const struct pipe_resource *res)
 {
    switch (res->target) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_ringbuffer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_ringbuffer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_ringbuffer.c	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_ringbuffer.c	2015-09-16 14:36:09.000000000 +0000
@@ -56,7 +56,7 @@
 /**
  * Return number of free entries in the ring
  */
-static INLINE unsigned util_ringbuffer_space( const struct util_ringbuffer *ring )
+static inline unsigned util_ringbuffer_space( const struct util_ringbuffer *ring )
 {
    return (ring->tail - (ring->head + 1)) & ring->mask;
 }
@@ -64,7 +64,7 @@
 /**
  * Is the ring buffer empty?
  */
-static INLINE boolean util_ringbuffer_empty( const struct util_ringbuffer *ring )
+static inline boolean util_ringbuffer_empty( const struct util_ringbuffer *ring )
 {
    return util_ringbuffer_space(ring) == ring->mask;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_simple_shaders.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_simple_shaders.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_simple_shaders.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_simple_shaders.c	2015-09-16 14:36:09.000000000 +0000
@@ -216,7 +216,8 @@
 util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
                                         unsigned tex_target,
                                         unsigned interp_mode,
-                                        unsigned writemask )
+                                        unsigned writemask,
+                                        enum tgsi_return_type stype)
 {
    struct ureg_program *ureg;
    struct ureg_src sampler;
@@ -232,6 +233,8 @@
    
    sampler = ureg_DECL_sampler( ureg, 0 );
 
+   ureg_DECL_sampler_view(ureg, 0, tex_target, stype, stype, stype, stype);
+
    tex = ureg_DECL_fs_input( ureg, 
                              TGSI_SEMANTIC_GENERIC, 0, 
                              interp_mode );
@@ -268,12 +271,14 @@
  */
 void *
 util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target,
-                              unsigned interp_mode)
+                              unsigned interp_mode,
+                              enum tgsi_return_type stype)
 {
    return util_make_fragment_tex_shader_writemask( pipe,
                                                    tex_target,
                                                    interp_mode,
-                                                   TGSI_WRITEMASK_XYZW );
+                                                   TGSI_WRITEMASK_XYZW,
+                                                   stype );
 }
 
 
@@ -298,6 +303,12 @@
 
    sampler = ureg_DECL_sampler( ureg, 0 );
 
+   ureg_DECL_sampler_view(ureg, 0, tex_target,
+                          TGSI_RETURN_TYPE_FLOAT,
+                          TGSI_RETURN_TYPE_FLOAT,
+                          TGSI_RETURN_TYPE_FLOAT,
+                          TGSI_RETURN_TYPE_FLOAT);
+
    tex = ureg_DECL_fs_input( ureg,
                              TGSI_SEMANTIC_GENERIC, 0,
                              interp_mode );
@@ -343,7 +354,17 @@
       return NULL;
 
    depth_sampler = ureg_DECL_sampler( ureg, 0 );
+   ureg_DECL_sampler_view(ureg, 0, tex_target,
+                          TGSI_RETURN_TYPE_FLOAT,
+                          TGSI_RETURN_TYPE_FLOAT,
+                          TGSI_RETURN_TYPE_FLOAT,
+                          TGSI_RETURN_TYPE_FLOAT);
    stencil_sampler = ureg_DECL_sampler( ureg, 1 );
+   ureg_DECL_sampler_view(ureg, 0, tex_target,
+                          TGSI_RETURN_TYPE_UINT,
+                          TGSI_RETURN_TYPE_UINT,
+                          TGSI_RETURN_TYPE_UINT,
+                          TGSI_RETURN_TYPE_UINT);
 
    tex = ureg_DECL_fs_input( ureg,
                              TGSI_SEMANTIC_GENERIC, 0,
@@ -398,6 +419,12 @@
 
    stencil_sampler = ureg_DECL_sampler( ureg, 0 );
 
+   ureg_DECL_sampler_view(ureg, 0, tex_target,
+                          TGSI_RETURN_TYPE_UINT,
+                          TGSI_RETURN_TYPE_UINT,
+                          TGSI_RETURN_TYPE_UINT,
+                          TGSI_RETURN_TYPE_UINT);
+
    tex = ureg_DECL_fs_input( ureg,
                              TGSI_SEMANTIC_GENERIC, 0,
                              interp_mode );
@@ -512,6 +539,7 @@
 static void *
 util_make_fs_blit_msaa_gen(struct pipe_context *pipe,
                            unsigned tgsi_tex,
+                           const char *samp_type,
                            const char *output_semantic,
                            const char *output_mask)
 {
@@ -519,6 +547,7 @@
          "FRAG\n"
          "DCL IN[0], GENERIC[0], LINEAR\n"
          "DCL SAMP[0]\n"
+         "DCL SVIEW[0], %s, %s\n"
          "DCL OUT[0], %s\n"
          "DCL TEMP[0]\n"
 
@@ -534,7 +563,8 @@
    assert(tgsi_tex == TGSI_TEXTURE_2D_MSAA ||
           tgsi_tex == TGSI_TEXTURE_2D_ARRAY_MSAA);
 
-   sprintf(text, shader_templ, output_semantic, output_mask, type);
+   sprintf(text, shader_templ, type, samp_type,
+           output_semantic, output_mask, type);
 
    if (!tgsi_text_translate(text, tokens, Elements(tokens))) {
       puts(text);
@@ -556,9 +586,19 @@
  */
 void *
 util_make_fs_blit_msaa_color(struct pipe_context *pipe,
-                             unsigned tgsi_tex)
+                             unsigned tgsi_tex,
+                             enum tgsi_return_type stype)
 {
-   return util_make_fs_blit_msaa_gen(pipe, tgsi_tex,
+   const char *samp_type;
+
+   if (stype == TGSI_RETURN_TYPE_UINT)
+      samp_type = "UINT";
+   else if (stype == TGSI_RETURN_TYPE_SINT)
+      samp_type = "SINT";
+   else
+      samp_type = "FLOAT";
+
+   return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, samp_type,
                                      "COLOR[0]", "");
 }
 
@@ -572,7 +612,7 @@
 util_make_fs_blit_msaa_depth(struct pipe_context *pipe,
                              unsigned tgsi_tex)
 {
-   return util_make_fs_blit_msaa_gen(pipe, tgsi_tex,
+   return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, "FLOAT",
                                      "POSITION", ".z");
 }
 
@@ -586,7 +626,7 @@
 util_make_fs_blit_msaa_stencil(struct pipe_context *pipe,
                                unsigned tgsi_tex)
 {
-   return util_make_fs_blit_msaa_gen(pipe, tgsi_tex,
+   return util_make_fs_blit_msaa_gen(pipe, tgsi_tex, "UINT",
                                      "STENCIL", ".y");
 }
 
@@ -640,7 +680,7 @@
 void *
 util_make_fs_msaa_resolve(struct pipe_context *pipe,
                           unsigned tgsi_tex, unsigned nr_samples,
-                          boolean is_uint, boolean is_sint)
+                          enum tgsi_return_type stype)
 {
    struct ureg_program *ureg;
    struct ureg_src sampler, coord;
@@ -653,6 +693,7 @@
 
    /* Declarations. */
    sampler = ureg_DECL_sampler(ureg, 0);
+   ureg_DECL_sampler_view(ureg, 0, tgsi_tex, stype, stype, stype, stype);
    coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0,
                               TGSI_INTERPOLATE_LINEAR);
    out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
@@ -670,9 +711,9 @@
                ureg_imm1u(ureg, i));
       ureg_TXF(ureg, tmp, tgsi_tex, ureg_src(tmp_coord), sampler);
 
-      if (is_uint)
+      if (stype == TGSI_RETURN_TYPE_UINT)
          ureg_U2F(ureg, tmp, ureg_src(tmp));
-      else if (is_sint)
+      else if (stype == TGSI_RETURN_TYPE_SINT)
          ureg_I2F(ureg, tmp, ureg_src(tmp));
 
       /* Add it to the sum.*/
@@ -683,9 +724,9 @@
    ureg_MUL(ureg, tmp_sum, ureg_src(tmp_sum),
             ureg_imm1f(ureg, 1.0 / nr_samples));
 
-   if (is_uint)
+   if (stype == TGSI_RETURN_TYPE_UINT)
       ureg_F2U(ureg, out, ureg_src(tmp_sum));
-   else if (is_sint)
+   else if (stype == TGSI_RETURN_TYPE_SINT)
       ureg_F2I(ureg, out, ureg_src(tmp_sum));
    else
       ureg_MOV(ureg, out, ureg_src(tmp_sum));
@@ -699,7 +740,7 @@
 void *
 util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
                                    unsigned tgsi_tex, unsigned nr_samples,
-                                   boolean is_uint, boolean is_sint)
+                                   enum tgsi_return_type stype)
 {
    struct ureg_program *ureg;
    struct ureg_src sampler, coord;
@@ -713,6 +754,7 @@
 
    /* Declarations. */
    sampler = ureg_DECL_sampler(ureg, 0);
+   ureg_DECL_sampler_view(ureg, 0, tgsi_tex, stype, stype, stype, stype);
    coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0,
                               TGSI_INTERPOLATE_LINEAR);
    out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
@@ -744,9 +786,9 @@
                   ureg_imm1u(ureg, i));
          ureg_TXF(ureg, tmp, tgsi_tex, ureg_src(tmp_coord[c]), sampler);
 
-         if (is_uint)
+         if (stype == TGSI_RETURN_TYPE_UINT)
             ureg_U2F(ureg, tmp, ureg_src(tmp));
-         else if (is_sint)
+         else if (stype == TGSI_RETURN_TYPE_SINT)
             ureg_I2F(ureg, tmp, ureg_src(tmp));
 
          /* Add it to the sum.*/
@@ -778,9 +820,9 @@
             ureg_src(top));
 
    /* Convert to the texture format and return. */
-   if (is_uint)
+   if (stype == TGSI_RETURN_TYPE_UINT)
       ureg_F2U(ureg, out, ureg_src(tmp));
-   else if (is_sint)
+   else if (stype == TGSI_RETURN_TYPE_SINT)
       ureg_F2I(ureg, out, ureg_src(tmp));
    else
       ureg_MOV(ureg, out, ureg_src(tmp));
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_simple_shaders.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_simple_shaders.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_simple_shaders.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_simple_shaders.h	2015-09-16 14:36:09.000000000 +0000
@@ -68,15 +68,16 @@
 util_make_layered_clear_geometry_shader(struct pipe_context *pipe);
 
 extern void *
-util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, 
+util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
                                         unsigned tex_target,
                                         unsigned interp_mode,
-                                        unsigned writemask);
+                                        unsigned writemask,
+                                        enum tgsi_return_type stype);
 
 extern void *
 util_make_fragment_tex_shader(struct pipe_context *pipe, unsigned tex_target,
-                              unsigned interp_mode);
-
+                              unsigned interp_mode,
+                              enum tgsi_return_type stype);
 
 extern void *
 util_make_fragment_tex_shader_writedepth(struct pipe_context *pipe,
@@ -115,7 +116,8 @@
 
 extern void *
 util_make_fs_blit_msaa_color(struct pipe_context *pipe,
-                             unsigned tgsi_tex);
+                             unsigned tgsi_tex,
+                             enum tgsi_return_type stype);
 
 
 extern void *
@@ -136,13 +138,13 @@
 void *
 util_make_fs_msaa_resolve(struct pipe_context *pipe,
                           unsigned tgsi_tex, unsigned nr_samples,
-                          boolean is_uint, boolean is_sint);
+                          enum tgsi_return_type stype);
 
 
 void *
 util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
                                    unsigned tgsi_tex, unsigned nr_samples,
-                                   boolean is_uint, boolean is_sint);
+                                   enum tgsi_return_type stype);
 
 #ifdef __cplusplus
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_snprintf.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_snprintf.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_snprintf.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_snprintf.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,1480 +0,0 @@
-/*
- * Copyright (c) 1995 Patrick Powell.
- *
- * This code is based on code written by Patrick Powell <papowell@astart.com>.
- * It may be used for any purpose as long as this notice remains intact on all
- * source code distributions.
- */
-
-/*
- * Copyright (c) 2008 Holger Weiss.
- *
- * This version of the code is maintained by Holger Weiss <holger@jhweiss.de>.
- * My changes to the code may freely be used, modified and/or redistributed for
- * any purpose.  It would be nice if additions and fixes to this file (including
- * trivial code cleanups) would be sent back in order to let me include them in
- * the version available at <http://www.jhweiss.de/software/snprintf.html>.
- * However, this is not a requirement for using or redistributing (possibly
- * modified) versions of this file, nor is leaving this notice intact mandatory.
- */
-
-/*
- * History
- *
- * 2008-01-20 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.1:
- *
- * 	Fixed the detection of infinite floating point values on IRIX (and
- * 	possibly other systems) and applied another few minor cleanups.
- *
- * 2008-01-06 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.0:
- *
- * 	Added a lot of new features, fixed many bugs, and incorporated various
- * 	improvements done by Andrew Tridgell <tridge@samba.org>, Russ Allbery
- * 	<rra@stanford.edu>, Hrvoje Niksic <hniksic@xemacs.org>, Damien Miller
- * 	<djm@mindrot.org>, and others for the Samba, INN, Wget, and OpenSSH
- * 	projects.  The additions include: support the "e", "E", "g", "G", and
- * 	"F" conversion specifiers (and use conversion style "f" or "F" for the
- * 	still unsupported "a" and "A" specifiers); support the "hh", "ll", "j",
- * 	"t", and "z" length modifiers; support the "#" flag and the (non-C99)
- * 	"'" flag; use localeconv(3) (if available) to get both the current
- * 	locale's decimal point character and the separator between groups of
- * 	digits; fix the handling of various corner cases of field width and
- * 	precision specifications; fix various floating point conversion bugs;
- * 	handle infinite and NaN floating point values; don't attempt to write to
- * 	the output buffer (which may be NULL) if a size of zero was specified;
- * 	check for integer overflow of the field width, precision, and return
- * 	values and during the floating point conversion; use the OUTCHAR() macro
- * 	instead of a function for better performance; provide asprintf(3) and
- * 	vasprintf(3) functions; add new test cases.  The replacement functions
- * 	have been renamed to use an "rpl_" prefix, the function calls in the
- * 	main project (and in this file) must be redefined accordingly for each
- * 	replacement function which is needed (by using Autoconf or other means).
- * 	Various other minor improvements have been applied and the coding style
- * 	was cleaned up for consistency.
- *
- * 2007-07-23 Holger Weiss <holger@jhweiss.de> for Mutt 1.5.13:
- *
- * 	C99 compliant snprintf(3) and vsnprintf(3) functions return the number
- * 	of characters that would have been written to a sufficiently sized
- * 	buffer (excluding the '\0').  The original code simply returned the
- * 	length of the resulting output string, so that's been fixed.
- *
- * 1998-03-05 Michael Elkins <me@mutt.org> for Mutt 0.90.8:
- *
- * 	The original code assumed that both snprintf(3) and vsnprintf(3) were
- * 	missing.  Some systems only have snprintf(3) but not vsnprintf(3), so
- * 	the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF.
- *
- * 1998-01-27 Thomas Roessler <roessler@does-not-exist.org> for Mutt 0.89i:
- *
- * 	The PGP code was using unsigned hexadecimal formats.  Unfortunately,
- * 	unsigned formats simply didn't work.
- *
- * 1997-10-22 Brandon Long <blong@fiction.net> for Mutt 0.87.1:
- *
- * 	Ok, added some minimal floating point support, which means this probably
- * 	requires libm on most operating systems.  Don't yet support the exponent
- * 	(e,E) and sigfig (g,G).  Also, fmtint() was pretty badly broken, it just
- * 	wasn't being exercised in ways which showed it, so that's been fixed.
- * 	Also, formatted the code to Mutt conventions, and removed dead code left
- * 	over from the original.  Also, there is now a builtin-test, run with:
- * 	gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm && ./snprintf
- *
- * 2996-09-15 Brandon Long <blong@fiction.net> for Mutt 0.43:
- *
- * 	This was ugly.  It is still ugly.  I opted out of floating point
- * 	numbers, but the formatter understands just about everything from the
- * 	normal C string format, at least as far as I can tell from the Solaris
- * 	2.5 printf(3S) man page.
- */
-
-/*
- * ToDo
- *
- * - Add wide character support.
- * - Add support for "%a" and "%A" conversions.
- * - Create test routines which predefine the expected results.  Our test cases
- *   usually expose bugs in system implementations rather than in ours :-)
- */
-
-/*
- * Usage
- *
- * 1) The following preprocessor macros should be defined to 1 if the feature or
- *    file in question is available on the target system (by using Autoconf or
- *    other means), though basic functionality should be available as long as
- *    HAVE_STDARG_H and HAVE_STDLIB_H are defined correctly:
- *
- *    	HAVE_VSNPRINTF
- *    	HAVE_SNPRINTF
- *    	HAVE_VASPRINTF
- *    	HAVE_ASPRINTF
- *    	HAVE_STDARG_H
- *    	HAVE_STDDEF_H
- *    	HAVE_STDINT_H
- *    	HAVE_STDLIB_H
- *    	HAVE_INTTYPES_H
- *    	HAVE_LOCALE_H
- *    	HAVE_LOCALECONV
- *    	HAVE_LCONV_DECIMAL_POINT
- *    	HAVE_LCONV_THOUSANDS_SEP
- *    	HAVE_LONG_DOUBLE
- *    	HAVE_LONG_LONG_INT
- *    	HAVE_UNSIGNED_LONG_LONG_INT
- *    	HAVE_INTMAX_T
- *    	HAVE_UINTMAX_T
- *    	HAVE_UINTPTR_T
- *    	HAVE_PTRDIFF_T
- *    	HAVE_VA_COPY
- *    	HAVE___VA_COPY
- *
- * 2) The calls to the functions which should be replaced must be redefined
- *    throughout the project files (by using Autoconf or other means):
- *
- *    	#define vsnprintf rpl_vsnprintf
- *    	#define snprintf rpl_snprintf
- *    	#define vasprintf rpl_vasprintf
- *    	#define asprintf rpl_asprintf
- *
- * 3) The required replacement functions should be declared in some header file
- *    included throughout the project files:
- *
- *    	#if HAVE_CONFIG_H
- *    	#include <config.h>
- *    	#endif
- *    	#if HAVE_STDARG_H
- *    	#include <stdarg.h>
- *    	#if !HAVE_VSNPRINTF
- *    	int rpl_vsnprintf(char *, size_t, const char *, va_list);
- *    	#endif
- *    	#if !HAVE_SNPRINTF
- *    	int rpl_snprintf(char *, size_t, const char *, ...);
- *    	#endif
- *    	#if !HAVE_VASPRINTF
- *    	int rpl_vasprintf(char **, const char *, va_list);
- *    	#endif
- *    	#if !HAVE_ASPRINTF
- *    	int rpl_asprintf(char **, const char *, ...);
- *    	#endif
- *    	#endif
- *
- * Autoconf macros for handling step 1 and step 2 are available at
- * <http://www.jhweiss.de/software/snprintf.html>.
- */
-
-#include "pipe/p_config.h"
-
-#if HAVE_CONFIG_H
-#include <config.h>
-#else
-#ifdef _MSC_VER
-#define vsnprintf util_vsnprintf
-#define snprintf util_snprintf
-#define HAVE_VSNPRINTF 0
-#define HAVE_SNPRINTF 0
-#define HAVE_VASPRINTF 1 /* not needed */
-#define HAVE_ASPRINTF 1 /* not needed */
-#define HAVE_STDARG_H 1
-#define HAVE_STDDEF_H 1
-#define HAVE_STDINT_H 1
-#define HAVE_STDLIB_H 1
-#define HAVE_INTTYPES_H 0
-#define HAVE_LOCALE_H 0
-#define HAVE_LOCALECONV 0
-#define HAVE_LCONV_DECIMAL_POINT 0
-#define HAVE_LCONV_THOUSANDS_SEP 0
-#define HAVE_LONG_DOUBLE 0
-#define HAVE_LONG_LONG_INT 1
-#define HAVE_UNSIGNED_LONG_LONG_INT 1
-#define HAVE_INTMAX_T 0
-#define HAVE_UINTMAX_T 0
-#define HAVE_UINTPTR_T 1
-#define HAVE_PTRDIFF_T 1
-#define HAVE_VA_COPY 0
-#define HAVE___VA_COPY 0
-#else
-#define HAVE_VSNPRINTF 1
-#define HAVE_SNPRINTF 1
-#define HAVE_VASPRINTF 1
-#define HAVE_ASPRINTF 1
-#endif
-#endif	/* HAVE_CONFIG_H */
-
-#if !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || !HAVE_VASPRINTF
-#include <stdio.h>	/* For NULL, size_t, vsnprintf(3), and vasprintf(3). */
-#ifdef VA_START
-#undef VA_START
-#endif	/* defined(VA_START) */
-#ifdef VA_SHIFT
-#undef VA_SHIFT
-#endif	/* defined(VA_SHIFT) */
-#if HAVE_STDARG_H
-#include <stdarg.h>
-#define VA_START(ap, last) va_start(ap, last)
-#define VA_SHIFT(ap, value, type) /* No-op for ANSI C. */
-#else	/* Assume <varargs.h> is available. */
-#include <varargs.h>
-#define VA_START(ap, last) va_start(ap)	/* "last" is ignored. */
-#define VA_SHIFT(ap, value, type) value = va_arg(ap, type)
-#endif	/* HAVE_STDARG_H */
-
-#if !HAVE_VASPRINTF
-#if HAVE_STDLIB_H
-#include <stdlib.h>	/* For malloc(3). */
-#endif	/* HAVE_STDLIB_H */
-#ifdef VA_COPY
-#undef VA_COPY
-#endif	/* defined(VA_COPY) */
-#ifdef VA_END_COPY
-#undef VA_END_COPY
-#endif	/* defined(VA_END_COPY) */
-#if HAVE_VA_COPY
-#define VA_COPY(dest, src) va_copy(dest, src)
-#define VA_END_COPY(ap) va_end(ap)
-#elif HAVE___VA_COPY
-#define VA_COPY(dest, src) __va_copy(dest, src)
-#define VA_END_COPY(ap) va_end(ap)
-#else
-#define VA_COPY(dest, src) (void)mymemcpy(&dest, &src, sizeof(va_list))
-#define VA_END_COPY(ap) /* No-op. */
-#define NEED_MYMEMCPY 1
-static void *mymemcpy(void *, void *, size_t);
-#endif	/* HAVE_VA_COPY */
-#endif	/* !HAVE_VASPRINTF */
-
-#if !HAVE_VSNPRINTF
-#include <limits.h>	/* For *_MAX. */
-#if HAVE_INTTYPES_H
-#include <inttypes.h>	/* For intmax_t (if not defined in <stdint.h>). */
-#endif	/* HAVE_INTTYPES_H */
-#if HAVE_LOCALE_H
-#include <locale.h>	/* For localeconv(3). */
-#endif	/* HAVE_LOCALE_H */
-#if HAVE_STDDEF_H
-#include <stddef.h>	/* For ptrdiff_t. */
-#endif	/* HAVE_STDDEF_H */
-#if HAVE_STDINT_H
-#include <stdint.h>	/* For intmax_t. */
-#endif	/* HAVE_STDINT_H */
-
-/* Support for unsigned long long int.  We may also need ULLONG_MAX. */
-#ifndef ULONG_MAX	/* We may need ULONG_MAX as a fallback. */
-#ifdef UINT_MAX
-#define ULONG_MAX UINT_MAX
-#else
-#define ULONG_MAX INT_MAX
-#endif	/* defined(UINT_MAX) */
-#endif	/* !defined(ULONG_MAX) */
-#ifdef ULLONG
-#undef ULLONG
-#endif	/* defined(ULLONG) */
-#if HAVE_UNSIGNED_LONG_LONG_INT
-#define ULLONG unsigned long long int
-#ifndef ULLONG_MAX
-#define ULLONG_MAX ULONG_MAX
-#endif	/* !defined(ULLONG_MAX) */
-#else
-#define ULLONG unsigned long int
-#ifdef ULLONG_MAX
-#undef ULLONG_MAX
-#endif	/* defined(ULLONG_MAX) */
-#define ULLONG_MAX ULONG_MAX
-#endif	/* HAVE_LONG_LONG_INT */
-
-/* Support for uintmax_t.  We also need UINTMAX_MAX. */
-#ifdef UINTMAX_T
-#undef UINTMAX_T
-#endif	/* defined(UINTMAX_T) */
-#if HAVE_UINTMAX_T || defined(uintmax_t)
-#define UINTMAX_T uintmax_t
-#ifndef UINTMAX_MAX
-#define UINTMAX_MAX ULLONG_MAX
-#endif	/* !defined(UINTMAX_MAX) */
-#else
-#define UINTMAX_T ULLONG
-#ifdef UINTMAX_MAX
-#undef UINTMAX_MAX
-#endif	/* defined(UINTMAX_MAX) */
-#define UINTMAX_MAX ULLONG_MAX
-#endif	/* HAVE_UINTMAX_T || defined(uintmax_t) */
-
-/* Support for long double. */
-#ifndef LDOUBLE
-#if HAVE_LONG_DOUBLE
-#define LDOUBLE long double
-#else
-#define LDOUBLE double
-#endif	/* HAVE_LONG_DOUBLE */
-#endif	/* !defined(LDOUBLE) */
-
-/* Support for long long int. */
-#ifndef LLONG
-#if HAVE_LONG_LONG_INT
-#define LLONG long long int
-#else
-#define LLONG long int
-#endif	/* HAVE_LONG_LONG_INT */
-#endif	/* !defined(LLONG) */
-
-/* Support for intmax_t. */
-#ifndef INTMAX_T
-#if HAVE_INTMAX_T || defined(intmax_t)
-#define INTMAX_T intmax_t
-#else
-#define INTMAX_T LLONG
-#endif	/* HAVE_INTMAX_T || defined(intmax_t) */
-#endif	/* !defined(INTMAX_T) */
-
-/* Support for uintptr_t. */
-#ifndef UINTPTR_T
-#if HAVE_UINTPTR_T || defined(uintptr_t)
-#define UINTPTR_T uintptr_t
-#else
-#define UINTPTR_T unsigned long int
-#endif	/* HAVE_UINTPTR_T || defined(uintptr_t) */
-#endif	/* !defined(UINTPTR_T) */
-
-/* Support for ptrdiff_t. */
-#ifndef PTRDIFF_T
-#if HAVE_PTRDIFF_T || defined(ptrdiff_t)
-#define PTRDIFF_T ptrdiff_t
-#else
-#define PTRDIFF_T long int
-#endif	/* HAVE_PTRDIFF_T || defined(ptrdiff_t) */
-#endif	/* !defined(PTRDIFF_T) */
-
-/*
- * We need an unsigned integer type corresponding to ptrdiff_t (cf. C99:
- * 7.19.6.1, 7).  However, we'll simply use PTRDIFF_T and convert it to an
- * unsigned type if necessary.  This should work just fine in practice.
- */
-#ifndef UPTRDIFF_T
-#define UPTRDIFF_T PTRDIFF_T
-#endif	/* !defined(UPTRDIFF_T) */
-
-/*
- * We need a signed integer type corresponding to size_t (cf. C99: 7.19.6.1, 7).
- * However, we'll simply use size_t and convert it to a signed type if
- * necessary.  This should work just fine in practice.
- */
-#ifndef SSIZE_T
-#define SSIZE_T size_t
-#endif	/* !defined(SSIZE_T) */
-
-/* Either ERANGE or E2BIG should be available everywhere. */
-#ifndef ERANGE
-#define ERANGE E2BIG
-#endif	/* !defined(ERANGE) */
-#ifndef EOVERFLOW
-#define EOVERFLOW ERANGE
-#endif	/* !defined(EOVERFLOW) */
-
-/*
- * Buffer size to hold the octal string representation of UINT128_MAX without
- * nul-termination ("3777777777777777777777777777777777777777777").
- */
-#ifdef MAX_CONVERT_LENGTH
-#undef MAX_CONVERT_LENGTH
-#endif	/* defined(MAX_CONVERT_LENGTH) */
-#define MAX_CONVERT_LENGTH      43
-
-/* Format read states. */
-#define PRINT_S_DEFAULT         0
-#define PRINT_S_FLAGS           1
-#define PRINT_S_WIDTH           2
-#define PRINT_S_DOT             3
-#define PRINT_S_PRECISION       4
-#define PRINT_S_MOD             5
-#define PRINT_S_CONV            6
-
-/* Format flags. */
-#define PRINT_F_MINUS           (1 << 0)
-#define PRINT_F_PLUS            (1 << 1)
-#define PRINT_F_SPACE           (1 << 2)
-#define PRINT_F_NUM             (1 << 3)
-#define PRINT_F_ZERO            (1 << 4)
-#define PRINT_F_QUOTE           (1 << 5)
-#define PRINT_F_UP              (1 << 6)
-#define PRINT_F_UNSIGNED        (1 << 7)
-#define PRINT_F_TYPE_G          (1 << 8)
-#define PRINT_F_TYPE_E          (1 << 9)
-
-/* Conversion flags. */
-#define PRINT_C_CHAR            1
-#define PRINT_C_SHORT           2
-#define PRINT_C_LONG            3
-#define PRINT_C_LLONG           4
-#define PRINT_C_LDOUBLE         5
-#define PRINT_C_SIZE            6
-#define PRINT_C_PTRDIFF         7
-#define PRINT_C_INTMAX          8
-
-#ifndef MAX
-#define MAX(x, y) ((x >= y) ? x : y)
-#endif	/* !defined(MAX) */
-#ifndef CHARTOINT
-#define CHARTOINT(ch) (ch - '0')
-#endif	/* !defined(CHARTOINT) */
-#ifndef ISDIGIT
-#define ISDIGIT(ch) ('0' <= (unsigned char)ch && (unsigned char)ch <= '9')
-#endif	/* !defined(ISDIGIT) */
-#ifndef ISNAN
-#define ISNAN(x) (x != x)
-#endif	/* !defined(ISNAN) */
-#ifndef ISINF
-#define ISINF(x) (x != 0.0 && x + x == x)
-#endif	/* !defined(ISINF) */
-
-#ifdef OUTCHAR
-#undef OUTCHAR
-#endif	/* defined(OUTCHAR) */
-#define OUTCHAR(str, len, size, ch)                                          \
-do {                                                                         \
-	if (len + 1 < size)                                                  \
-		str[len] = ch;                                               \
-	(len)++;                                                             \
-} while (/* CONSTCOND */ 0)
-
-static void fmtstr(char *, size_t *, size_t, const char *, int, int, int);
-static void fmtint(char *, size_t *, size_t, INTMAX_T, int, int, int, int);
-static void fmtflt(char *, size_t *, size_t, LDOUBLE, int, int, int, int *);
-static void printsep(char *, size_t *, size_t);
-static int getnumsep(int);
-static int getexponent(LDOUBLE);
-static int convert(UINTMAX_T, char *, size_t, int, int);
-static UINTMAX_T cast(LDOUBLE);
-static UINTMAX_T myround(LDOUBLE);
-static LDOUBLE mypow10(int);
-
-int
-util_vsnprintf(char *str, size_t size, const char *format, va_list args)
-{
-	LDOUBLE fvalue;
-	INTMAX_T value;
-	unsigned char cvalue;
-	const char *strvalue;
-	INTMAX_T *intmaxptr;
-	PTRDIFF_T *ptrdiffptr;
-	SSIZE_T *sizeptr;
-	LLONG *llongptr;
-	long int *longptr;
-	int *intptr;
-	short int *shortptr;
-	signed char *charptr;
-	size_t len = 0;
-	int overflow = 0;
-	int base = 0;
-	int cflags = 0;
-	int flags = 0;
-	int width = 0;
-	int precision = -1;
-	int state = PRINT_S_DEFAULT;
-	char ch = *format++;
-
-	/*
-	 * C99 says: "If `n' is zero, nothing is written, and `s' may be a null
-	 * pointer." (7.19.6.5, 2)  We're forgiving and allow a NULL pointer
-	 * even if a size larger than zero was specified.  At least NetBSD's
-	 * snprintf(3) does the same, as well as other versions of this file.
-	 * (Though some of these versions will write to a non-NULL buffer even
-	 * if a size of zero was specified, which violates the standard.)
-	 */
-	if (str == NULL && size != 0)
-		size = 0;
-
-	while (ch != '\0')
-		switch (state) {
-		case PRINT_S_DEFAULT:
-			if (ch == '%')
-				state = PRINT_S_FLAGS;
-			else
-				OUTCHAR(str, len, size, ch);
-			ch = *format++;
-			break;
-		case PRINT_S_FLAGS:
-			switch (ch) {
-			case '-':
-				flags |= PRINT_F_MINUS;
-				ch = *format++;
-				break;
-			case '+':
-				flags |= PRINT_F_PLUS;
-				ch = *format++;
-				break;
-			case ' ':
-				flags |= PRINT_F_SPACE;
-				ch = *format++;
-				break;
-			case '#':
-				flags |= PRINT_F_NUM;
-				ch = *format++;
-				break;
-			case '0':
-				flags |= PRINT_F_ZERO;
-				ch = *format++;
-				break;
-			case '\'':	/* SUSv2 flag (not in C99). */
-				flags |= PRINT_F_QUOTE;
-				ch = *format++;
-				break;
-			default:
-				state = PRINT_S_WIDTH;
-				break;
-			}
-			break;
-		case PRINT_S_WIDTH:
-			if (ISDIGIT(ch)) {
-				ch = CHARTOINT(ch);
-				if (width > (INT_MAX - ch) / 10) {
-					overflow = 1;
-					goto out;
-				}
-				width = 10 * width + ch;
-				ch = *format++;
-			} else if (ch == '*') {
-				/*
-				 * C99 says: "A negative field width argument is
-				 * taken as a `-' flag followed by a positive
-				 * field width." (7.19.6.1, 5)
-				 */
-				if ((width = va_arg(args, int)) < 0) {
-					flags |= PRINT_F_MINUS;
-					width = -width;
-				}
-				ch = *format++;
-				state = PRINT_S_DOT;
-			} else
-				state = PRINT_S_DOT;
-			break;
-		case PRINT_S_DOT:
-			if (ch == '.') {
-				state = PRINT_S_PRECISION;
-				ch = *format++;
-			} else
-				state = PRINT_S_MOD;
-			break;
-		case PRINT_S_PRECISION:
-			if (precision == -1)
-				precision = 0;
-			if (ISDIGIT(ch)) {
-				ch = CHARTOINT(ch);
-				if (precision > (INT_MAX - ch) / 10) {
-					overflow = 1;
-					goto out;
-				}
-				precision = 10 * precision + ch;
-				ch = *format++;
-			} else if (ch == '*') {
-				/*
-				 * C99 says: "A negative precision argument is
-				 * taken as if the precision were omitted."
-				 * (7.19.6.1, 5)
-				 */
-				if ((precision = va_arg(args, int)) < 0)
-					precision = -1;
-				ch = *format++;
-				state = PRINT_S_MOD;
-			} else
-				state = PRINT_S_MOD;
-			break;
-		case PRINT_S_MOD:
-			switch (ch) {
-			case 'h':
-				ch = *format++;
-				if (ch == 'h') {	/* It's a char. */
-					ch = *format++;
-					cflags = PRINT_C_CHAR;
-				} else
-					cflags = PRINT_C_SHORT;
-				break;
-			case 'l':
-				ch = *format++;
-				if (ch == 'l') {	/* It's a long long. */
-					ch = *format++;
-					cflags = PRINT_C_LLONG;
-				} else
-					cflags = PRINT_C_LONG;
-				break;
-			case 'L':
-				cflags = PRINT_C_LDOUBLE;
-				ch = *format++;
-				break;
-			case 'j':
-				cflags = PRINT_C_INTMAX;
-				ch = *format++;
-				break;
-			case 't':
-				cflags = PRINT_C_PTRDIFF;
-				ch = *format++;
-				break;
-			case 'z':
-				cflags = PRINT_C_SIZE;
-				ch = *format++;
-				break;
-			}
-			state = PRINT_S_CONV;
-			break;
-		case PRINT_S_CONV:
-			switch (ch) {
-			case 'd':
-				/* FALLTHROUGH */
-			case 'i':
-				switch (cflags) {
-				case PRINT_C_CHAR:
-					value = (signed char)va_arg(args, int);
-					break;
-				case PRINT_C_SHORT:
-					value = (short int)va_arg(args, int);
-					break;
-				case PRINT_C_LONG:
-					value = va_arg(args, long int);
-					break;
-				case PRINT_C_LLONG:
-					value = va_arg(args, LLONG);
-					break;
-				case PRINT_C_SIZE:
-					value = va_arg(args, SSIZE_T);
-					break;
-				case PRINT_C_INTMAX:
-					value = va_arg(args, INTMAX_T);
-					break;
-				case PRINT_C_PTRDIFF:
-					value = va_arg(args, PTRDIFF_T);
-					break;
-				default:
-					value = va_arg(args, int);
-					break;
-				}
-				fmtint(str, &len, size, value, 10, width,
-				    precision, flags);
-				break;
-			case 'X':
-				flags |= PRINT_F_UP;
-				/* FALLTHROUGH */
-			case 'x':
-				base = 16;
-				/* FALLTHROUGH */
-			case 'o':
-				if (base == 0)
-					base = 8;
-				/* FALLTHROUGH */
-			case 'u':
-				if (base == 0)
-					base = 10;
-				flags |= PRINT_F_UNSIGNED;
-				switch (cflags) {
-				case PRINT_C_CHAR:
-					value = (unsigned char)va_arg(args,
-					    unsigned int);
-					break;
-				case PRINT_C_SHORT:
-					value = (unsigned short int)va_arg(args,
-					    unsigned int);
-					break;
-				case PRINT_C_LONG:
-					value = va_arg(args, unsigned long int);
-					break;
-				case PRINT_C_LLONG:
-					value = va_arg(args, ULLONG);
-					break;
-				case PRINT_C_SIZE:
-					value = va_arg(args, size_t);
-					break;
-				case PRINT_C_INTMAX:
-					value = va_arg(args, UINTMAX_T);
-					break;
-				case PRINT_C_PTRDIFF:
-					value = va_arg(args, UPTRDIFF_T);
-					break;
-				default:
-					value = va_arg(args, unsigned int);
-					break;
-				}
-				fmtint(str, &len, size, value, base, width,
-				    precision, flags);
-				break;
-			case 'A':
-				/* Not yet supported, we'll use "%F". */
-				/* FALLTHROUGH */
-			case 'F':
-				flags |= PRINT_F_UP;
-			case 'a':
-				/* Not yet supported, we'll use "%f". */
-				/* FALLTHROUGH */
-			case 'f':
-				if (cflags == PRINT_C_LDOUBLE)
-					fvalue = va_arg(args, LDOUBLE);
-				else
-					fvalue = va_arg(args, double);
-				fmtflt(str, &len, size, fvalue, width,
-				    precision, flags, &overflow);
-				if (overflow)
-					goto out;
-				break;
-			case 'E':
-				flags |= PRINT_F_UP;
-				/* FALLTHROUGH */
-			case 'e':
-				flags |= PRINT_F_TYPE_E;
-				if (cflags == PRINT_C_LDOUBLE)
-					fvalue = va_arg(args, LDOUBLE);
-				else
-					fvalue = va_arg(args, double);
-				fmtflt(str, &len, size, fvalue, width,
-				    precision, flags, &overflow);
-				if (overflow)
-					goto out;
-				break;
-			case 'G':
-				flags |= PRINT_F_UP;
-				/* FALLTHROUGH */
-			case 'g':
-				flags |= PRINT_F_TYPE_G;
-				if (cflags == PRINT_C_LDOUBLE)
-					fvalue = va_arg(args, LDOUBLE);
-				else
-					fvalue = va_arg(args, double);
-				/*
-				 * If the precision is zero, it is treated as
-				 * one (cf. C99: 7.19.6.1, 8).
-				 */
-				if (precision == 0)
-					precision = 1;
-				fmtflt(str, &len, size, fvalue, width,
-				    precision, flags, &overflow);
-				if (overflow)
-					goto out;
-				break;
-			case 'c':
-				cvalue = (unsigned char)va_arg(args, int);
-				OUTCHAR(str, len, size, cvalue);
-				break;
-			case 's':
-				strvalue = va_arg(args, char *);
-				fmtstr(str, &len, size, strvalue, width,
-				    precision, flags);
-				break;
-			case 'p':
-				/*
-				 * C99 says: "The value of the pointer is
-				 * converted to a sequence of printing
-				 * characters, in an implementation-defined
-				 * manner." (C99: 7.19.6.1, 8)
-				 */
-				if ((strvalue = va_arg(args, void *)) == NULL)
-					/*
-					 * We use the glibc format.  BSD prints
-					 * "0x0", SysV "0".
-					 */
-					fmtstr(str, &len, size, "(nil)", width,
-					    -1, flags);
-				else {
-					/*
-					 * We use the BSD/glibc format.  SysV
-					 * omits the "0x" prefix (which we emit
-					 * using the PRINT_F_NUM flag).
-					 */
-					flags |= PRINT_F_NUM;
-					flags |= PRINT_F_UNSIGNED;
-					fmtint(str, &len, size,
-					    (UINTPTR_T)strvalue, 16, width,
-					    precision, flags);
-				}
-				break;
-			case 'n':
-				switch (cflags) {
-				case PRINT_C_CHAR:
-					charptr = va_arg(args, signed char *);
-					*charptr = (signed char)len;
-					break;
-				case PRINT_C_SHORT:
-					shortptr = va_arg(args, short int *);
-					*shortptr = (short int)len;
-					break;
-				case PRINT_C_LONG:
-					longptr = va_arg(args, long int *);
-					*longptr = (long int)len;
-					break;
-				case PRINT_C_LLONG:
-					llongptr = va_arg(args, LLONG *);
-					*llongptr = (LLONG)len;
-					break;
-				case PRINT_C_SIZE:
-					/*
-					 * C99 says that with the "z" length
-					 * modifier, "a following `n' conversion
-					 * specifier applies to a pointer to a
-					 * signed integer type corresponding to
-					 * size_t argument." (7.19.6.1, 7)
-					 */
-					sizeptr = va_arg(args, SSIZE_T *);
-					*sizeptr = len;
-					break;
-				case PRINT_C_INTMAX:
-					intmaxptr = va_arg(args, INTMAX_T *);
-					*intmaxptr = len;
-					break;
-				case PRINT_C_PTRDIFF:
-					ptrdiffptr = va_arg(args, PTRDIFF_T *);
-					*ptrdiffptr = len;
-					break;
-				default:
-					intptr = va_arg(args, int *);
-					*intptr = (int)len;
-					break;
-				}
-				break;
-			case '%':	/* Print a "%" character verbatim. */
-				OUTCHAR(str, len, size, ch);
-				break;
-			default:	/* Skip other characters. */
-				break;
-			}
-			ch = *format++;
-			state = PRINT_S_DEFAULT;
-			base = cflags = flags = width = 0;
-			precision = -1;
-			break;
-		}
-out:
-	if (len < size)
-		str[len] = '\0';
-	else if (size > 0)
-		str[size - 1] = '\0';
-
-	if (overflow || len >= INT_MAX) {
-		return -1;
-	}
-	return (int)len;
-}
-
-static void
-fmtstr(char *str, size_t *len, size_t size, const char *value, int width,
-       int precision, int flags)
-{
-	int padlen, strln;	/* Amount to pad. */
-	int noprecision = (precision == -1);
-
-	if (value == NULL)	/* We're forgiving. */
-		value = "(null)";
-
-	/* If a precision was specified, don't read the string past it. */
-	for (strln = 0; value[strln] != '\0' &&
-	    (noprecision || strln < precision); strln++)
-		continue;
-
-	if ((padlen = width - strln) < 0)
-		padlen = 0;
-	if (flags & PRINT_F_MINUS)	/* Left justify. */
-		padlen = -padlen;
-
-	while (padlen > 0) {	/* Leading spaces. */
-		OUTCHAR(str, *len, size, ' ');
-		padlen--;
-	}
-	while (*value != '\0' && (noprecision || precision-- > 0)) {
-		OUTCHAR(str, *len, size, *value);
-		value++;
-	}
-	while (padlen < 0) {	/* Trailing spaces. */
-		OUTCHAR(str, *len, size, ' ');
-		padlen++;
-	}
-}
-
-static void
-fmtint(char *str, size_t *len, size_t size, INTMAX_T value, int base, int width,
-       int precision, int flags)
-{
-	UINTMAX_T uvalue;
-	char iconvert[MAX_CONVERT_LENGTH];
-	char sign = 0;
-	char hexprefix = 0;
-	int spadlen = 0;	/* Amount to space pad. */
-	int zpadlen = 0;	/* Amount to zero pad. */
-	int pos;
-	int separators = (flags & PRINT_F_QUOTE);
-	int noprecision = (precision == -1);
-
-	if (flags & PRINT_F_UNSIGNED)
-		uvalue = value;
-	else {
-		uvalue = (value >= 0) ? value : -value;
-		if (value < 0)
-			sign = '-';
-		else if (flags & PRINT_F_PLUS)	/* Do a sign. */
-			sign = '+';
-		else if (flags & PRINT_F_SPACE)
-			sign = ' ';
-	}
-
-	pos = convert(uvalue, iconvert, sizeof(iconvert), base,
-	    flags & PRINT_F_UP);
-
-	if (flags & PRINT_F_NUM && uvalue != 0) {
-		/*
-		 * C99 says: "The result is converted to an `alternative form'.
-		 * For `o' conversion, it increases the precision, if and only
-		 * if necessary, to force the first digit of the result to be a
-		 * zero (if the value and precision are both 0, a single 0 is
-		 * printed).  For `x' (or `X') conversion, a nonzero result has
-		 * `0x' (or `0X') prefixed to it." (7.19.6.1, 6)
-		 */
-		switch (base) {
-		case 8:
-			if (precision <= pos)
-				precision = pos + 1;
-			break;
-		case 16:
-			hexprefix = (flags & PRINT_F_UP) ? 'X' : 'x';
-			break;
-		}
-	}
-
-	if (separators)	/* Get the number of group separators we'll print. */
-		separators = getnumsep(pos);
-
-	zpadlen = precision - pos - separators;
-	spadlen = width                         /* Minimum field width. */
-	    - separators                        /* Number of separators. */
-	    - MAX(precision, pos)               /* Number of integer digits. */
-	    - ((sign != 0) ? 1 : 0)             /* Will we print a sign? */
-	    - ((hexprefix != 0) ? 2 : 0);       /* Will we print a prefix? */
-
-	if (zpadlen < 0)
-		zpadlen = 0;
-	if (spadlen < 0)
-		spadlen = 0;
-
-	/*
-	 * C99 says: "If the `0' and `-' flags both appear, the `0' flag is
-	 * ignored.  For `d', `i', `o', `u', `x', and `X' conversions, if a
-	 * precision is specified, the `0' flag is ignored." (7.19.6.1, 6)
-	 */
-	if (flags & PRINT_F_MINUS)	/* Left justify. */
-		spadlen = -spadlen;
-	else if (flags & PRINT_F_ZERO && noprecision) {
-		zpadlen += spadlen;
-		spadlen = 0;
-	}
-	while (spadlen > 0) {	/* Leading spaces. */
-		OUTCHAR(str, *len, size, ' ');
-		spadlen--;
-	}
-	if (sign != 0)	/* Sign. */
-		OUTCHAR(str, *len, size, sign);
-	if (hexprefix != 0) {	/* A "0x" or "0X" prefix. */
-		OUTCHAR(str, *len, size, '0');
-		OUTCHAR(str, *len, size, hexprefix);
-	}
-	while (zpadlen > 0) {	/* Leading zeros. */
-		OUTCHAR(str, *len, size, '0');
-		zpadlen--;
-	}
-	while (pos > 0) {	/* The actual digits. */
-		pos--;
-		OUTCHAR(str, *len, size, iconvert[pos]);
-		if (separators > 0 && pos > 0 && pos % 3 == 0)
-			printsep(str, len, size);
-	}
-	while (spadlen < 0) {	/* Trailing spaces. */
-		OUTCHAR(str, *len, size, ' ');
-		spadlen++;
-	}
-}
-
-static void
-fmtflt(char *str, size_t *len, size_t size, LDOUBLE fvalue, int width,
-       int precision, int flags, int *overflow)
-{
-	LDOUBLE ufvalue;
-	UINTMAX_T intpart;
-	UINTMAX_T fracpart;
-	UINTMAX_T mask;
-	const char *infnan = NULL;
-	char iconvert[MAX_CONVERT_LENGTH];
-	char fconvert[MAX_CONVERT_LENGTH];
-	char econvert[4];	/* "e-12" (without nul-termination). */
-	char esign = 0;
-	char sign = 0;
-	int leadfraczeros = 0;
-	int exponent = 0;
-	int emitpoint = 0;
-	int omitzeros = 0;
-	int omitcount = 0;
-	int padlen = 0;
-	int epos = 0;
-	int fpos = 0;
-	int ipos = 0;
-	int separators = (flags & PRINT_F_QUOTE);
-	int estyle = (flags & PRINT_F_TYPE_E);
-#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT
-	struct lconv *lc = localeconv();
-#endif	/* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */
-
-	/*
-	 * AIX' man page says the default is 0, but C99 and at least Solaris'
-	 * and NetBSD's man pages say the default is 6, and sprintf(3) on AIX
-	 * defaults to 6.
-	 */
-	if (precision == -1)
-		precision = 6;
-
-	if (fvalue < 0.0)
-		sign = '-';
-	else if (flags & PRINT_F_PLUS)	/* Do a sign. */
-		sign = '+';
-	else if (flags & PRINT_F_SPACE)
-		sign = ' ';
-
-	if (ISNAN(fvalue))
-		infnan = (flags & PRINT_F_UP) ? "NAN" : "nan";
-	else if (ISINF(fvalue))
-		infnan = (flags & PRINT_F_UP) ? "INF" : "inf";
-
-	if (infnan != NULL) {
-		if (sign != 0)
-			iconvert[ipos++] = sign;
-		while (*infnan != '\0')
-			iconvert[ipos++] = *infnan++;
-		fmtstr(str, len, size, iconvert, width, ipos, flags);
-		return;
-	}
-
-	/* "%e" (or "%E") or "%g" (or "%G") conversion. */
-	if (flags & PRINT_F_TYPE_E || flags & PRINT_F_TYPE_G) {
-		if (flags & PRINT_F_TYPE_G) {
-			/*
-			 * For "%g" (and "%G") conversions, the precision
-			 * specifies the number of significant digits, which
-			 * includes the digits in the integer part.  The
-			 * conversion will or will not be using "e-style" (like
-			 * "%e" or "%E" conversions) depending on the precision
-			 * and on the exponent.  However, the exponent can be
-			 * affected by rounding the converted value, so we'll
-			 * leave this decision for later.  Until then, we'll
-			 * assume that we're going to do an "e-style" conversion
-			 * (in order to get the exponent calculated).  For
-			 * "e-style", the precision must be decremented by one.
-			 */
-			precision--;
-			/*
-			 * For "%g" (and "%G") conversions, trailing zeros are
-			 * removed from the fractional portion of the result
-			 * unless the "#" flag was specified.
-			 */
-			if (!(flags & PRINT_F_NUM))
-				omitzeros = 1;
-		}
-		exponent = getexponent(fvalue);
-		estyle = 1;
-	}
-
-again:
-	/*
-	 * Sorry, we only support 9, 19, or 38 digits (that is, the number of
-	 * digits of the 32-bit, the 64-bit, or the 128-bit UINTMAX_MAX value
-	 * minus one) past the decimal point due to our conversion method.
-	 */
-	switch (sizeof(UINTMAX_T)) {
-	case 16:
-		if (precision > 38)
-			precision = 38;
-		break;
-	case 8:
-		if (precision > 19)
-			precision = 19;
-		break;
-	default:
-		if (precision > 9)
-			precision = 9;
-		break;
-	}
-
-	ufvalue = (fvalue >= 0.0) ? fvalue : -fvalue;
-	if (estyle)	/* We want exactly one integer digit. */
-		ufvalue /= mypow10(exponent);
-
-	if ((intpart = cast(ufvalue)) == UINTMAX_MAX) {
-		*overflow = 1;
-		return;
-	}
-
-	/*
-	 * Factor of ten with the number of digits needed for the fractional
-	 * part.  For example, if the precision is 3, the mask will be 1000.
-	 */
-	mask = (UINTMAX_T)mypow10(precision);
-	/*
-	 * We "cheat" by converting the fractional part to integer by
-	 * multiplying by a factor of ten.
-	 */
-	if ((fracpart = myround(mask * (ufvalue - intpart))) >= mask) {
-		/*
-		 * For example, ufvalue = 2.99962, intpart = 2, and mask = 1000
-		 * (because precision = 3).  Now, myround(1000 * 0.99962) will
-		 * return 1000.  So, the integer part must be incremented by one
-		 * and the fractional part must be set to zero.
-		 */
-		intpart++;
-		fracpart = 0;
-		if (estyle && intpart == 10) {
-			/*
-			 * The value was rounded up to ten, but we only want one
-			 * integer digit if using "e-style".  So, the integer
-			 * part must be set to one and the exponent must be
-			 * incremented by one.
-			 */
-			intpart = 1;
-			exponent++;
-		}
-	}
-
-	/*
-	 * Now that we know the real exponent, we can check whether or not to
-	 * use "e-style" for "%g" (and "%G") conversions.  If we don't need
-	 * "e-style", the precision must be adjusted and the integer and
-	 * fractional parts must be recalculated from the original value.
-	 *
-	 * C99 says: "Let P equal the precision if nonzero, 6 if the precision
-	 * is omitted, or 1 if the precision is zero.  Then, if a conversion
-	 * with style `E' would have an exponent of X:
-	 *
-	 * - if P > X >= -4, the conversion is with style `f' (or `F') and
-	 *   precision P - (X + 1).
-	 *
-	 * - otherwise, the conversion is with style `e' (or `E') and precision
-	 *   P - 1." (7.19.6.1, 8)
-	 *
-	 * Note that we had decremented the precision by one.
-	 */
-	if (flags & PRINT_F_TYPE_G && estyle &&
-	    precision + 1 > exponent && exponent >= -4) {
-		precision -= exponent;
-		estyle = 0;
-		goto again;
-	}
-
-	if (estyle) {
-		if (exponent < 0) {
-			exponent = -exponent;
-			esign = '-';
-		} else
-			esign = '+';
-
-		/*
-		 * Convert the exponent.  The sizeof(econvert) is 4.  So, the
-		 * econvert buffer can hold e.g. "e+99" and "e-99".  We don't
-		 * support an exponent which contains more than two digits.
-		 * Therefore, the following stores are safe.
-		 */
-		epos = convert(exponent, econvert, 2, 10, 0);
-		/*
-		 * C99 says: "The exponent always contains at least two digits,
-		 * and only as many more digits as necessary to represent the
-		 * exponent." (7.19.6.1, 8)
-		 */
-		if (epos == 1)
-			econvert[epos++] = '0';
-		econvert[epos++] = esign;
-		econvert[epos++] = (flags & PRINT_F_UP) ? 'E' : 'e';
-	}
-
-	/* Convert the integer part and the fractional part. */
-	ipos = convert(intpart, iconvert, sizeof(iconvert), 10, 0);
-	if (fracpart != 0)	/* convert() would return 1 if fracpart == 0. */
-		fpos = convert(fracpart, fconvert, sizeof(fconvert), 10, 0);
-
-	leadfraczeros = precision - fpos;
-
-	if (omitzeros) {
-		if (fpos > 0)	/* Omit trailing fractional part zeros. */
-			while (omitcount < fpos && fconvert[omitcount] == '0')
-				omitcount++;
-		else {	/* The fractional part is zero, omit it completely. */
-			omitcount = precision;
-			leadfraczeros = 0;
-		}
-		precision -= omitcount;
-	}
-
-	/*
-	 * Print a decimal point if either the fractional part is non-zero
-	 * and/or the "#" flag was specified.
-	 */
-	if (precision > 0 || flags & PRINT_F_NUM)
-		emitpoint = 1;
-	if (separators)	/* Get the number of group separators we'll print. */
-		separators = getnumsep(ipos);
-
-	padlen = width                  /* Minimum field width. */
-	    - ipos                      /* Number of integer digits. */
-	    - epos                      /* Number of exponent characters. */
-	    - precision                 /* Number of fractional digits. */
-	    - separators                /* Number of group separators. */
-	    - (emitpoint ? 1 : 0)       /* Will we print a decimal point? */
-	    - ((sign != 0) ? 1 : 0);    /* Will we print a sign character? */
-
-	if (padlen < 0)
-		padlen = 0;
-
-	/*
-	 * C99 says: "If the `0' and `-' flags both appear, the `0' flag is
-	 * ignored." (7.19.6.1, 6)
-	 */
-	if (flags & PRINT_F_MINUS)	/* Left justifty. */
-		padlen = -padlen;
-	else if (flags & PRINT_F_ZERO && padlen > 0) {
-		if (sign != 0) {	/* Sign. */
-			OUTCHAR(str, *len, size, sign);
-			sign = 0;
-		}
-		while (padlen > 0) {	/* Leading zeros. */
-			OUTCHAR(str, *len, size, '0');
-			padlen--;
-		}
-	}
-	while (padlen > 0) {	/* Leading spaces. */
-		OUTCHAR(str, *len, size, ' ');
-		padlen--;
-	}
-	if (sign != 0)	/* Sign. */
-		OUTCHAR(str, *len, size, sign);
-	while (ipos > 0) {	/* Integer part. */
-		ipos--;
-		OUTCHAR(str, *len, size, iconvert[ipos]);
-		if (separators > 0 && ipos > 0 && ipos % 3 == 0)
-			printsep(str, len, size);
-	}
-	if (emitpoint) {	/* Decimal point. */
-#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT
-		if (lc->decimal_point != NULL && *lc->decimal_point != '\0')
-			OUTCHAR(str, *len, size, *lc->decimal_point);
-		else	/* We'll always print some decimal point character. */
-#endif	/* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */
-			OUTCHAR(str, *len, size, '.');
-	}
-	while (leadfraczeros > 0) {	/* Leading fractional part zeros. */
-		OUTCHAR(str, *len, size, '0');
-		leadfraczeros--;
-	}
-	while (fpos > omitcount) {	/* The remaining fractional part. */
-		fpos--;
-		OUTCHAR(str, *len, size, fconvert[fpos]);
-	}
-	while (epos > 0) {	/* Exponent. */
-		epos--;
-		OUTCHAR(str, *len, size, econvert[epos]);
-	}
-	while (padlen < 0) {	/* Trailing spaces. */
-		OUTCHAR(str, *len, size, ' ');
-		padlen++;
-	}
-}
-
-static void
-printsep(char *str, size_t *len, size_t size)
-{
-#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP
-	struct lconv *lc = localeconv();
-	int i;
-
-	if (lc->thousands_sep != NULL)
-		for (i = 0; lc->thousands_sep[i] != '\0'; i++)
-			OUTCHAR(str, *len, size, lc->thousands_sep[i]);
-	else
-#endif	/* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */
-		OUTCHAR(str, *len, size, ',');
-}
-
-static int
-getnumsep(int digits)
-{
-	int separators = (digits - ((digits % 3 == 0) ? 1 : 0)) / 3;
-#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP
-	int strln;
-	struct lconv *lc = localeconv();
-
-	/* We support an arbitrary separator length (including zero). */
-	if (lc->thousands_sep != NULL) {
-		for (strln = 0; lc->thousands_sep[strln] != '\0'; strln++)
-			continue;
-		separators *= strln;
-	}
-#endif	/* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */
-	return separators;
-}
-
-static int
-getexponent(LDOUBLE value)
-{
-	LDOUBLE tmp = (value >= 0.0) ? value : -value;
-	int exponent = 0;
-
-	/*
-	 * We check for 99 > exponent > -99 in order to work around possible
-	 * endless loops which could happen (at least) in the second loop (at
-	 * least) if we're called with an infinite value.  However, we checked
-	 * for infinity before calling this function using our ISINF() macro, so
-	 * this might be somewhat paranoid.
-	 */
-	while (tmp < 1.0 && tmp > 0.0 && --exponent > -99)
-		tmp *= 10;
-	while (tmp >= 10.0 && ++exponent < 99)
-		tmp /= 10;
-
-	return exponent;
-}
-
-static int
-convert(UINTMAX_T value, char *buf, size_t size, int base, int caps)
-{
-	const char *digits = caps ? "0123456789ABCDEF" : "0123456789abcdef";
-	size_t pos = 0;
-
-	/* We return an unterminated buffer with the digits in reverse order. */
-	do {
-		buf[pos++] = digits[value % base];
-		value /= base;
-	} while (value != 0 && pos < size);
-
-	return (int)pos;
-}
-
-static UINTMAX_T
-cast(LDOUBLE value)
-{
-	UINTMAX_T result;
-
-	/*
-	 * We check for ">=" and not for ">" because if UINTMAX_MAX cannot be
-	 * represented exactly as an LDOUBLE value (but is less than LDBL_MAX),
-	 * it may be increased to the nearest higher representable value for the
-	 * comparison (cf. C99: 6.3.1.4, 2).  It might then equal the LDOUBLE
-	 * value although converting the latter to UINTMAX_T would overflow.
-	 */
-	if (value >= UINTMAX_MAX)
-		return UINTMAX_MAX;
-
-	result = (UINTMAX_T)value;
-	/*
-	 * At least on NetBSD/sparc64 3.0.2 and 4.99.30, casting long double to
-	 * an integer type converts e.g. 1.9 to 2 instead of 1 (which violates
-	 * the standard).  Sigh.
-	 */
-	return (result <= value) ? result : result - 1;
-}
-
-static UINTMAX_T
-myround(LDOUBLE value)
-{
-	UINTMAX_T intpart = cast(value);
-
-	return ((value -= intpart) < 0.5) ? intpart : intpart + 1;
-}
-
-static LDOUBLE
-mypow10(int exponent)
-{
-	LDOUBLE result = 1;
-
-	while (exponent > 0) {
-		result *= 10;
-		exponent--;
-	}
-	while (exponent < 0) {
-		result /= 10;
-		exponent++;
-	}
-	return result;
-}
-#endif	/* !HAVE_VSNPRINTF */
-
-#if !HAVE_VASPRINTF
-#if NEED_MYMEMCPY
-void *
-mymemcpy(void *dst, void *src, size_t len)
-{
-	const char *from = src;
-	char *to = dst;
-
-	/* No need for optimization, we use this only to replace va_copy(3). */
-	while (len-- > 0)
-		*to++ = *from++;
-	return dst;
-}
-#endif	/* NEED_MYMEMCPY */
-
-int
-util_vasprintf(char **ret, const char *format, va_list ap)
-{
-	size_t size;
-	int len;
-	va_list aq;
-
-	VA_COPY(aq, ap);
-	len = vsnprintf(NULL, 0, format, aq);
-	VA_END_COPY(aq);
-	if (len < 0 || (*ret = malloc(size = len + 1)) == NULL)
-		return -1;
-	return vsnprintf(*ret, size, format, ap);
-}
-#endif	/* !HAVE_VASPRINTF */
-
-#if !HAVE_SNPRINTF
-#if HAVE_STDARG_H
-int
-util_snprintf(char *str, size_t size, const char *format, ...)
-#else
-int
-util_snprintf(va_alist) va_dcl
-#endif	/* HAVE_STDARG_H */
-{
-#if !HAVE_STDARG_H
-	char *str;
-	size_t size;
-	char *format;
-#endif	/* HAVE_STDARG_H */
-	va_list ap;
-	int len;
-
-	VA_START(ap, format);
-	VA_SHIFT(ap, str, char *);
-	VA_SHIFT(ap, size, size_t);
-	VA_SHIFT(ap, format, const char *);
-	len = vsnprintf(str, size, format, ap);
-	va_end(ap);
-	return len;
-}
-#endif	/* !HAVE_SNPRINTF */
-
-#if !HAVE_ASPRINTF
-#if HAVE_STDARG_H
-int
-util_asprintf(char **ret, const char *format, ...)
-#else
-int
-util_asprintf(va_alist) va_dcl
-#endif	/* HAVE_STDARG_H */
-{
-#if !HAVE_STDARG_H
-	char **ret;
-	char *format;
-#endif	/* HAVE_STDARG_H */
-	va_list ap;
-	int len;
-
-	VA_START(ap, format);
-	VA_SHIFT(ap, ret, char **);
-	VA_SHIFT(ap, format, const char *);
-	len = vasprintf(ret, format, ap);
-	va_end(ap);
-	return len;
-}
-#endif	/* !HAVE_ASPRINTF */
-#else	/* Dummy declaration to avoid empty translation unit warnings. */
-int main(void);
-#endif	/* !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || [...] */
-
-
-/* vim: set joinspaces textwidth=80: */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_split_prim.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_split_prim.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_split_prim.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_split_prim.h	2015-09-16 14:36:09.000000000 +0000
@@ -23,7 +23,7 @@
    uint edgeflag_off:1;
 };
 
-static INLINE void
+static inline void
 util_split_prim_init(struct util_split_prim *s,
                   unsigned mode, unsigned start, unsigned count)
 {
@@ -41,7 +41,7 @@
    s->repeat_first = 0;
 }
 
-static INLINE boolean
+static inline boolean
 util_split_prim_next(struct util_split_prim *s, unsigned max_verts)
 {
    int repeat = 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_sse.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_sse.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_sse.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_sse.h	2015-09-16 14:36:09.000000000 +0000
@@ -51,7 +51,7 @@
    uint ui[4];
 };
 
-static INLINE void u_print_epi8(const char *name, __m128i r)
+static inline void u_print_epi8(const char *name, __m128i r)
 {
    union { __m128i m; ubyte ub[16]; } u;
    u.m = r;
@@ -80,7 +80,7 @@
                 u.ub[12], u.ub[13], u.ub[14], u.ub[15]);
 }
 
-static INLINE void u_print_epi16(const char *name, __m128i r)
+static inline void u_print_epi16(const char *name, __m128i r)
 {
    union { __m128i m; ushort us[8]; } u;
    u.m = r;
@@ -99,7 +99,7 @@
                 u.us[4],  u.us[5],  u.us[6],  u.us[7]);
 }
 
-static INLINE void u_print_epi32(const char *name, __m128i r)
+static inline void u_print_epi32(const char *name, __m128i r)
 {
    union { __m128i m; uint ui[4]; } u;
    u.m = r;
@@ -113,7 +113,7 @@
                 u.ui[0],  u.ui[1],  u.ui[2],  u.ui[3]);
 }
 
-static INLINE void u_print_ps(const char *name, __m128 r)
+static inline void u_print_ps(const char *name, __m128 r)
 {
    union { __m128 m; float f[4]; } u;
    u.m = r;
@@ -179,7 +179,7 @@
  * _mm_mullo_epi32() intrinsic as to not justify adding an sse4
  * dependency at this point.
  */
-static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b)
+static inline __m128i mm_mullo_epi32(const __m128i a, const __m128i b)
 {
    __m128i a4   = _mm_srli_epi64(a, 32);  /* shift by one dword */
    __m128i b4   = _mm_srli_epi64(b, 32);  /* shift by one dword */
@@ -204,7 +204,7 @@
 }
 
 
-static INLINE void
+static inline void
 transpose4_epi32(const __m128i * restrict a,
                  const __m128i * restrict b,
                  const __m128i * restrict c,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_string.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_string.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_string.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_string.h	2015-09-16 14:36:09.000000000 +0000
@@ -35,13 +35,14 @@
 #ifndef U_STRING_H_
 #define U_STRING_H_
 
-#if !defined(_MSC_VER) && !defined(XF86_LIBC_H)
+#if !defined(XF86_LIBC_H)
 #include <stdio.h>
 #endif
 #include <stddef.h>
 #include <stdarg.h>
 
 #include "pipe/p_compiler.h"
+#include "util/macros.h" // PRINTFLIKE
 
 
 #ifdef __cplusplus
@@ -54,7 +55,7 @@
 
 #else
 
-static INLINE char *
+static inline char *
 util_strchrnul(const char *s, char c)
 {
    for (; *s && *s != c; ++s);
@@ -64,18 +65,44 @@
 
 #endif
 
-#ifdef _MSC_VER
+#ifdef _WIN32
 
-int util_vsnprintf(char *, size_t, const char *, va_list);
-int util_snprintf(char *str, size_t size, const char *format, ...);
+static inline int
+util_vsnprintf(char *str, size_t size, const char *format, va_list ap)
+{
+   /* We need to use _vscprintf to calculate the length as vsnprintf returns -1
+    * if the number of characters to write is greater than count.
+    */
+   va_list ap_copy;
+   int ret;
+   va_copy(ap_copy, ap);
+   ret = _vsnprintf(str, size, format, ap);
+   if (ret < 0) {
+      ret = _vscprintf(format, ap_copy);
+   }
+   return ret;
+}
+
+static inline int
+   PRINTFLIKE(3, 4)
+util_snprintf(char *str, size_t size, const char *format, ...)
+{
+   va_list ap;
+   int ret;
+   va_start(ap, format);
+   ret = util_vsnprintf(str, size, format, ap);
+   va_end(ap);
+   return ret;
+}
 
-static INLINE void
+static inline void
 util_vsprintf(char *str, const char *format, va_list ap)
 {
    util_vsnprintf(str, (size_t)-1, format, ap);
 }
 
-static INLINE void
+static inline void
+   PRINTFLIKE(2, 3)
 util_sprintf(char *str, const char *format, ...)
 {
    va_list ap;
@@ -84,7 +111,7 @@
    va_end(ap);
 }
 
-static INLINE char *
+static inline char *
 util_strchr(const char *s, char c)
 {
    char *p = util_strchrnul(s, c);
@@ -92,7 +119,7 @@
    return *p ? p : NULL;
 }
 
-static INLINE char*
+static inline char*
 util_strncat(char *dst, const char *src, size_t n)
 {
    char *p = dst + strlen(dst);
@@ -106,7 +133,7 @@
    return dst;
 }
 
-static INLINE int
+static inline int
 util_strcmp(const char *s1, const char *s2)
 {
    unsigned char u1, u2;
@@ -122,7 +149,7 @@
    return 0;
 }
 
-static INLINE int
+static inline int
 util_strncmp(const char *s1, const char *s2, size_t n)
 {
    unsigned char u1, u2;
@@ -138,7 +165,7 @@
    return 0;
 }
 
-static INLINE char *
+static inline char *
 util_strstr(const char *haystack, const char *needle)
 {
    const char *p = haystack;
@@ -152,7 +179,7 @@
    return NULL;
 }
 
-static INLINE void *
+static inline void *
 util_memmove(void *dest, const void *src, size_t n)
 {
    char *p = (char *)dest;
@@ -199,7 +226,7 @@
 };
 
 
-static INLINE void
+static inline void
 util_strbuf_init(struct util_strbuf *sbuf, char *str, size_t size) 
 {
    sbuf->str = str;
@@ -209,7 +236,7 @@
 }
 
 
-static INLINE void
+static inline void
 util_strbuf_printf(struct util_strbuf *sbuf, const char *format, ...)
 {
    if(sbuf->left > 1) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_surface.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_surface.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_surface.c	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_surface.c	2015-09-16 14:36:09.000000000 +0000
@@ -676,6 +676,9 @@
       return FALSE;
    }
 
+   if (blit->alpha_blend)
+      return FALSE;
+
    ctx->resource_copy_region(ctx, blit->dst.resource, blit->dst.level,
                              blit->dst.box.x, blit->dst.box.y, blit->dst.box.z,
                              blit->src.resource, blit->src.level,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_surfaces.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_surfaces.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_surfaces.h	2013-05-08 13:13:18.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_surfaces.h	2015-09-16 14:36:09.000000000 +0000
@@ -50,7 +50,7 @@
                      struct pipe_surface **res);
 
 /* fast inline path for the very common case */
-static INLINE boolean
+static inline boolean
 util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size,
                   struct pipe_context *ctx, struct pipe_resource *pt,
                   unsigned level, unsigned layer,
@@ -70,7 +70,7 @@
    return util_surfaces_do_get(us, surface_struct_size, ctx, pt, level, layer, res);
 }
 
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
 util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned level, unsigned layer)
 {
    if(!us->u.pv)
@@ -84,7 +84,7 @@
 
 void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps);
 
-static INLINE void
+static inline void
 util_surfaces_detach(struct util_surfaces *us, struct pipe_surface *ps)
 {
    if(likely(ps->texture->target == PIPE_TEXTURE_2D || ps->texture->target == PIPE_TEXTURE_RECT))
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_tests.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_tests.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_tests.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_tests.c	2015-09-16 14:36:09.000000000 +0000
@@ -373,7 +373,8 @@
 
    /* Fragment shader. */
    fs = util_make_fragment_tex_shader(ctx, tgsi_tex_target,
-                                      TGSI_INTERPOLATE_LINEAR);
+                                      TGSI_INTERPOLATE_LINEAR,
+                                      TGSI_RETURN_TYPE_FLOAT);
    cso_set_fragment_shader_handle(cso, fs);
 
    /* Vertex shader. */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_tile.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_tile.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_tile.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_tile.h	2015-09-16 14:36:09.000000000 +0000
@@ -42,7 +42,7 @@
  *
  * \return TRUE if tile is totally clipped, FALSE otherwise
  */
-static INLINE boolean
+static inline boolean
 u_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_box *box)
 {
    if ((int) x >= box->width)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_time.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_time.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_time.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_time.h	2015-09-16 14:36:09.000000000 +0000
@@ -60,7 +60,7 @@
    
 
 PIPE_DEPRECATED
-static INLINE void
+static inline void
 util_time_get(struct util_time *t)
 {
    t->counter = os_time_get();
@@ -71,7 +71,7 @@
  * Return t2 = t1 + usecs
  */
 PIPE_DEPRECATED
-static INLINE void
+static inline void
 util_time_add(const struct util_time *t1,
               int64_t usecs,
               struct util_time *t2)
@@ -84,7 +84,7 @@
  * Return difference between times, in microseconds
  */
 PIPE_DEPRECATED
-static INLINE int64_t
+static inline int64_t
 util_time_diff(const struct util_time *t1, 
                const struct util_time *t2)
 {
@@ -98,7 +98,7 @@
  * Not publicly available because it does not take in account wrap-arounds.
  * Use util_time_timeout instead.
  */
-static INLINE int
+static inline int
 _util_time_compare(const struct util_time *t1,
                    const struct util_time *t2)
 {
@@ -115,7 +115,7 @@
  * Returns non-zero when the timeout expires.
  */
 PIPE_DEPRECATED
-static INLINE boolean
+static inline boolean
 util_time_timeout(const struct util_time *start, 
                   const struct util_time *end,
                   const struct util_time *curr)
@@ -128,7 +128,7 @@
  * Return current time in microseconds
  */
 PIPE_DEPRECATED
-static INLINE int64_t
+static inline int64_t
 util_time_micros(void)
 {
    return os_time_get();
@@ -136,7 +136,7 @@
 
 
 PIPE_DEPRECATED
-static INLINE void
+static inline void
 util_time_sleep(int64_t usecs)
 {
    os_time_sleep(usecs);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_transfer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_transfer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_transfer.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_transfer.c	2015-09-16 14:36:09.000000000 +0000
@@ -90,7 +90,7 @@
 }
 
 
-static INLINE struct u_resource *
+static inline struct u_resource *
 u_resource( struct pipe_resource *res )
 {
    return (struct u_resource *)res;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_vbuf.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_vbuf.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_vbuf.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_vbuf.c	2015-09-16 14:36:09.000000000 +0000
@@ -781,10 +781,11 @@
    ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
    ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
 
-   /* Align the formats to the size of DWORD if needed. */
+   /* Align the formats and offsets to the size of DWORD if needed. */
    if (!mgr->caps.velem_src_offset_unaligned) {
       for (i = 0; i < count; i++) {
          ve->native_format_size[i] = align(ve->native_format_size[i], 4);
+         driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4);
       }
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_video.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_video.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/util/u_video.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/util/u_video.h	2015-09-16 14:36:09.000000000 +0000
@@ -40,7 +40,7 @@
 #include "util/u_debug.h"
 #include "util/u_math.h"
 
-static INLINE enum pipe_video_format
+static inline enum pipe_video_format
 u_reduce_video_profile(enum pipe_video_profile profile)
 {
    switch (profile)
@@ -68,12 +68,19 @@
       case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH444:
          return PIPE_VIDEO_FORMAT_MPEG4_AVC;
 
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN:
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN_STILL:
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN_12:
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN_444:
+         return PIPE_VIDEO_FORMAT_HEVC;
+
       default:
          return PIPE_VIDEO_FORMAT_UNKNOWN;
    }
 }
 
-static INLINE void
+static inline void
 u_copy_nv12_to_yv12(void *const *destination_data,
                     uint32_t const *destination_pitches,
                     int src_plane, int src_field,
@@ -99,7 +106,7 @@
    }
 }
 
-static INLINE void
+static inline void
 u_copy_yv12_to_nv12(void *const *destination_data,
                     uint32_t const *destination_pitches,
                     int src_plane, int src_field,
@@ -122,7 +129,7 @@
    }
 }
 
-static INLINE void
+static inline void
 u_copy_swap422_packed(void *const *destination_data,
                        uint32_t const *destination_pitches,
                        int src_plane, int src_field,
@@ -147,7 +154,7 @@
    }
 }
 
-static INLINE uint32_t
+static inline uint32_t
 u_get_h264_level(uint32_t width, uint32_t height, uint32_t *max_reference)
 {
    uint32_t max_dpb_mbs;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/vl/vl_compositor.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/vl/vl_compositor.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/vl/vl_compositor.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/vl/vl_compositor.c	2015-09-16 14:36:09.000000000 +0000
@@ -538,7 +538,7 @@
    pipe_resource_reference(&c->vertex_buf.buffer, NULL);
 }
 
-static INLINE struct u_rect
+static inline struct u_rect
 default_rect(struct vl_compositor_layer *layer)
 {
    struct pipe_resource *res = layer->sampler_views[0]->texture;
@@ -546,21 +546,21 @@
    return rect;
 }
 
-static INLINE struct vertex2f
+static inline struct vertex2f
 calc_topleft(struct vertex2f size, struct u_rect rect)
 {
    struct vertex2f res = { rect.x0 / size.x, rect.y0 / size.y };
    return res;
 }
 
-static INLINE struct vertex2f
+static inline struct vertex2f
 calc_bottomright(struct vertex2f size, struct u_rect rect)
 {
    struct vertex2f res = { rect.x1 / size.x, rect.y1 / size.y };
    return res;
 }
 
-static INLINE void
+static inline void
 calc_src_and_dst(struct vl_compositor_layer *layer, unsigned width, unsigned height,
                  struct u_rect src, struct u_rect dst)
 {
@@ -658,7 +658,7 @@
    vb[19].y = layer->colors[3].w;
 }
 
-static INLINE struct u_rect
+static inline struct u_rect
 calc_drawn_area(struct vl_compositor_state *s, struct vl_compositor_layer *layer)
 {
    struct vertex2f tl, br;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c	2015-09-16 14:36:09.000000000 +0000
@@ -533,7 +533,7 @@
 static struct dct_coeff tbl_B14_AC[1 << 17];
 static struct dct_coeff tbl_B15[1 << 17];
 
-static INLINE void
+static inline void
 init_dct_coeff_table(struct dct_coeff *dst, const struct dct_coeff_compressed *src,
                      unsigned size, bool is_DC)
 {
@@ -594,7 +594,7 @@
    }
 }
 
-static INLINE void
+static inline void
 init_tables()
 {
    vl_vlc_init_table(tbl_B1, Elements(tbl_B1), macroblock_address_increment, Elements(macroblock_address_increment));
@@ -611,19 +611,19 @@
    init_dct_coeff_table(tbl_B15, dct_coeff_tbl_one, Elements(dct_coeff_tbl_one), false);
 }
 
-static INLINE int
+static inline int
 DIV2DOWN(int todiv)
 {
    return (todiv&~1)/2;
 }
 
-static INLINE int
+static inline int
 DIV2UP(int todiv)
 {
    return (todiv+1)/2;
 }
 
-static INLINE void
+static inline void
 motion_vector(struct vl_mpg12_bs *bs, int r, int s, int dmv, short delta[2], short dmvector[2])
 {
    int t;
@@ -647,7 +647,7 @@
    }
 }
 
-static INLINE int
+static inline int
 wrap(short f, int shift)
 {
    if (f < (-16 << shift))
@@ -658,7 +658,7 @@
       return f;
 }
 
-static INLINE void
+static inline void
 motion_vector_frame(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb)
 {
    int dmv = mb->macroblock_modes.bits.frame_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME;
@@ -682,7 +682,7 @@
    }
 }
 
-static INLINE void
+static inline void
 motion_vector_field(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb)
 {
    int dmv = mb->macroblock_modes.bits.field_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME;
@@ -701,12 +701,12 @@
    }
 }
 
-static INLINE void
+static inline void
 reset_predictor(struct vl_mpg12_bs *bs) {
    bs->pred_dc[0] = bs->pred_dc[1] = bs->pred_dc[2] = 0;
 }
 
-static INLINE void
+static inline void
 decode_dct(struct vl_mpg12_bs *bs, struct pipe_mpeg12_macroblock *mb, int scale)
 {
    static const unsigned blk2cc[] = { 0, 0, 0, 0, 1, 2 };
@@ -805,7 +805,7 @@
       vl_vlc_eatbits(&bs->vlc, 1);
 }
 
-static INLINE void
+static inline void
 decode_slice(struct vl_mpg12_bs *bs, struct pipe_video_buffer *target)
 {
    struct pipe_mpeg12_macroblock mb;
@@ -929,6 +929,7 @@
          mb.PMV[1][0][0] = mb.PMV[0][0][0];
          mb.PMV[1][0][1] = mb.PMV[0][0][1];
          assert(extra);
+         (void) extra;
       } else if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA ||
                 !(mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD |
                                         PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD))) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c	2015-09-16 14:36:09.000000000 +0000
@@ -317,7 +317,7 @@
       vl_mc_cleanup_buffer(&buf->mc[i]);
 }
 
-static INLINE void
+static inline void
 MacroBlockTypeToPipeWeights(const struct pipe_mpeg12_macroblock *mb, unsigned weights[2])
 {
    assert(mb);
@@ -352,7 +352,7 @@
    }
 }
 
-static INLINE struct vl_motionvector
+static inline struct vl_motionvector
 MotionVectorToPipe(const struct pipe_mpeg12_macroblock *mb, unsigned vector,
                    unsigned field_select_mask, unsigned weight)
 {
@@ -403,7 +403,7 @@
    return mv;
 }
 
-static INLINE void
+static inline void
 UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec,
                   struct vl_mpeg12_buffer *buf,
                   const struct pipe_mpeg12_macroblock *mb)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/vl/vl_rbsp.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/vl/vl_rbsp.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/vl/vl_rbsp.h	2014-04-29 19:36:58.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/vl/vl_rbsp.h	2015-09-16 14:36:09.000000000 +0000
@@ -48,7 +48,7 @@
 /**
  * Initialize the RBSP object
  */
-static INLINE void vl_rbsp_init(struct vl_rbsp *rbsp, struct vl_vlc *nal, unsigned num_bits)
+static inline void vl_rbsp_init(struct vl_rbsp *rbsp, struct vl_vlc *nal, unsigned num_bits)
 {
    unsigned bits_left = vl_vlc_bits_left(nal);
 
@@ -71,7 +71,7 @@
 /**
  * Make at least 16 more bits available
  */
-static INLINE void vl_rbsp_fillbits(struct vl_rbsp *rbsp)
+static inline void vl_rbsp_fillbits(struct vl_rbsp *rbsp)
 {
    unsigned valid = vl_vlc_valid_bits(&rbsp->nal);
    unsigned i, bits;
@@ -108,7 +108,7 @@
 /**
  * Return an unsigned integer from the first n bits
  */
-static INLINE unsigned vl_rbsp_u(struct vl_rbsp *rbsp, unsigned n)
+static inline unsigned vl_rbsp_u(struct vl_rbsp *rbsp, unsigned n)
 {
    if (n == 0)
       return 0;
@@ -120,7 +120,7 @@
 /**
  * Return an unsigned exponential Golomb encoded integer
  */
-static INLINE unsigned vl_rbsp_ue(struct vl_rbsp *rbsp)
+static inline unsigned vl_rbsp_ue(struct vl_rbsp *rbsp)
 {
    unsigned bits = 0;
 
@@ -134,7 +134,7 @@
 /**
  * Return an signed exponential Golomb encoded integer
  */
-static INLINE signed vl_rbsp_se(struct vl_rbsp *rbsp)
+static inline signed vl_rbsp_se(struct vl_rbsp *rbsp)
 {
    signed codeNum = vl_rbsp_ue(rbsp);
    if (codeNum & 1)
@@ -146,7 +146,7 @@
 /**
  * Are more data available in the RBSP ?
  */
-static INLINE bool vl_rbsp_more_data(struct vl_rbsp *rbsp)
+static inline bool vl_rbsp_more_data(struct vl_rbsp *rbsp)
 {
    unsigned bits, value;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/vl/vl_vlc.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/vl/vl_vlc.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/vl/vl_vlc.h	2015-04-16 07:17:52.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/vl/vl_vlc.h	2015-09-16 14:36:09.000000000 +0000
@@ -65,7 +65,7 @@
 /**
  * initalize and decompress a lookup table
  */
-static INLINE void
+static inline void
 vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_vlc_compressed *src, unsigned src_size)
 {
    unsigned i, bits = util_logbase2(dst_size);
@@ -87,7 +87,7 @@
 /**
  * switch over to next input buffer
  */
-static INLINE void
+static inline void
 vl_vlc_next_input(struct vl_vlc *vlc)
 {
    unsigned len = vlc->sizes[0];
@@ -112,7 +112,7 @@
 /**
  * align the data pointer to the next dword
  */
-static INLINE void
+static inline void
 vl_vlc_align_data_ptr(struct vl_vlc *vlc)
 {
    /* align the data pointer */
@@ -126,7 +126,7 @@
 /**
  * fill the bit buffer, so that at least 32 bits are valid
  */
-static INLINE void
+static inline void
 vl_vlc_fillbits(struct vl_vlc *vlc)
 {
    assert(vlc);
@@ -175,7 +175,7 @@
 /**
  * initialize vlc structure and start reading from first input buffer
  */
-static INLINE void
+static inline void
 vl_vlc_init(struct vl_vlc *vlc, unsigned num_inputs,
             const void *const *inputs, const unsigned *sizes)
 {
@@ -203,7 +203,7 @@
 /**
  * number of bits still valid in bit buffer
  */
-static INLINE unsigned
+static inline unsigned
 vl_vlc_valid_bits(struct vl_vlc *vlc)
 {
    return 32 - vlc->invalid_bits;
@@ -212,7 +212,7 @@
 /**
  * number of bits left over all inbut buffers
  */
-static INLINE unsigned
+static inline unsigned
 vl_vlc_bits_left(struct vl_vlc *vlc)
 {
    signed bytes_left = vlc->end - vlc->data;
@@ -223,7 +223,7 @@
 /**
  * get num_bits from bit buffer without removing them
  */
-static INLINE unsigned
+static inline unsigned
 vl_vlc_peekbits(struct vl_vlc *vlc, unsigned num_bits)
 {
    assert(vl_vlc_valid_bits(vlc) >= num_bits || vlc->data >= vlc->end);
@@ -233,7 +233,7 @@
 /**
  * remove num_bits from bit buffer
  */
-static INLINE void
+static inline void
 vl_vlc_eatbits(struct vl_vlc *vlc, unsigned num_bits)
 {
    assert(vl_vlc_valid_bits(vlc) >= num_bits);
@@ -245,7 +245,7 @@
 /**
  * get num_bits from bit buffer with removing them
  */
-static INLINE unsigned
+static inline unsigned
 vl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits)
 {
    unsigned value;
@@ -261,7 +261,7 @@
 /**
  * treat num_bits as signed value and remove them from bit buffer
  */
-static INLINE signed
+static inline signed
 vl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits)
 {
    signed value;
@@ -277,7 +277,7 @@
 /**
  * lookup a value and length in a decompressed table
  */
-static INLINE int8_t
+static inline int8_t
 vl_vlc_get_vlclbf(struct vl_vlc *vlc, const struct vl_vlc_entry *tbl, unsigned num_bits)
 {
    tbl += vl_vlc_peekbits(vlc, num_bits);
@@ -288,7 +288,7 @@
 /**
  * fast forward search for a specific byte value
  */
-static INLINE boolean
+static inline boolean
 vl_vlc_search_byte(struct vl_vlc *vlc, unsigned num_bits, uint8_t value)
 {
    /* make sure we are on a byte boundary */
@@ -345,7 +345,7 @@
 /**
  * remove num_bits bits starting at pos from the bitbuffer
  */
-static INLINE void
+static inline void
 vl_vlc_removebits(struct vl_vlc *vlc, unsigned pos, unsigned num_bits)
 {
    uint64_t lo = (vlc->buffer & (~0UL >> (pos + num_bits))) << num_bits;
@@ -357,7 +357,7 @@
 /**
  * limit the number of bits left for fetching
  */
-static INLINE void
+static inline void
 vl_vlc_limit(struct vl_vlc *vlc, unsigned bits_left)
 {
    assert(bits_left <= vl_vlc_bits_left(vlc));
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/vl/vl_winsys_dri.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/vl/vl_winsys_dri.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/auxiliary/vl/vl_winsys_dri.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/auxiliary/vl/vl_winsys_dri.c	2015-09-16 14:36:09.000000000 +0000
@@ -37,6 +37,8 @@
 #include <xf86drm.h>
 #include <errno.h>
 
+#include "loader.h"
+
 #include "pipe/p_screen.h"
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
@@ -370,7 +372,7 @@
    if (!device_name)
       goto free_connect;
    memcpy(device_name, xcb_dri2_connect_device_name(connect), device_name_length);
-   fd = open(device_name, O_RDWR);
+   fd = loader_open_device(device_name);
    free(device_name);
 
    if (fd < 0)
@@ -388,7 +390,7 @@
 #if GALLIUM_STATIC_TARGETS
    scrn->base.pscreen = dd_create_screen(fd);
 #else
-   if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd, false))
+   if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd))
       scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev, PIPE_SEARCH_DIR);
 #endif // GALLIUM_STATIC_TARGETS
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/docs/d3d11ddi.txt mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/docs/d3d11ddi.txt
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/docs/d3d11ddi.txt	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/docs/d3d11ddi.txt	1970-01-01 00:00:00.000000000 +0000
@@ -1,462 +0,0 @@
-This document compares the D3D10/D3D11 device driver interface with Gallium.
-It is written from the perspective of a developer implementing a D3D10/D3D11 driver as a Gallium state tracker.
-
-Note that naming and other cosmetic differences are not noted, since they don't really matter and would severely clutter the document.
-Gallium/OpenGL terminology is used in preference to D3D terminology.
-
-NOTE: this document tries to be complete but most likely isn't fully complete and also not fully correct: please submit patches if you spot anything incorrect
-
-Also note that this is specifically for the DirectX 10/11 Windows Vista/7 DDI interfaces.
-DirectX 9 has both user-mode (for Vista) and kernel mode (pre-Vista) interfaces, but they are significantly different from Gallium due to the presence of a lot of fixed function functionality.
-
-The user-visible DirectX 10/11 interfaces are distinct from the kernel DDI, but they match very closely.
-
-* Accessing Microsoft documentation
-
-See http://msdn.microsoft.com/en-us/library/dd445501.aspx ("D3D11DDI_DEVICEFUNCS") for D3D documentation.
-
-Also see http://download.microsoft.com/download/f/2/d/f2d5ee2c-b7ba-4cd0-9686-b6508b5479a1/direct3d10_web.pdf ("The Direct3D 10 System" by David Blythe) for an introduction to Direct3D 10 and the rationale for its design.
-
-The Windows Driver Kit contains the actual headers, as well as shader bytecode documentation.
-
-To get the headers from Linux, run the following, in a dedicated directory:
-wget http://download.microsoft.com/download/4/A/2/4A25C7D5-EFBE-4182-B6A9-AE6850409A78/GRMWDK_EN_7600_1.ISO
-sudo mount -o loop GRMWDK_EN_7600_1.ISO /mnt/tmp
-cabextract -x /mnt/tmp/wdk/headers_cab001.cab
-rename 's/^_(.*)_[0-9]*$/$1/' *
-sudo umount /mnt/tmp
-
-d3d10umddi.h contains the DDI interface analyzed in this document: note that it is much easier to read this online on MSDN.
-d3d{10,11}TokenizedProgramFormat.hpp contains the shader bytecode definitions: this is not available on MSDN.
-d3d9types.h contains DX9 shader bytecode, and DX9 types
-d3dumddi.h contains the DirectX 9 DDI interface
-
-* Glossary
-
-BC1: DXT1
-BC2: DXT3
-BC3: DXT5
-BC5: RGTC1
-BC6H: BPTC float
-BC7: BPTC
-CS = compute shader: OpenCL-like shader
-DS = domain shader: tessellation evaluation shader
-HS = hull shader: tessellation control shader
-IA = input assembler: primitive assembly
-Input layout: vertex elements
-OM = output merger: blender
-PS = pixel shader: fragment shader
-Primitive topology: primitive type
-Resource: buffer or texture
-Shader resource (view): sampler view
-SO = stream out: transform feedback
-Unordered access view: view supporting random read/write access (usually from compute shaders)
-
-* Legend
-
--: features D3D11 has and Gallium lacks
-+: features Gallium has and D3D11 lacks
-!: differences between D3D11 and Gallium
-*: possible improvements to Gallium
->: references to comparisons of special enumerations
-#: comment
-
-* Gallium functions with no direct D3D10/D3D11 equivalent
-
-clear
-	+ Gallium supports clearing both render targets and depth/stencil with a single call
-
-fence_signalled
-fence_finish
-	+ D3D10/D3D11 don't appear to support explicit fencing; queries can often substitute though, and flushing is supported
-
-set_clip_state
-	+ Gallium supports fixed function user clip planes, D3D10/D3D11 only support using the vertex shader for them
-
-set_polygon_stipple
-	+ Gallium supports polygon stipple
-
-clearRT/clearDS
-	+ Gallium supports subrectangle fills of surfaces, D3D10 only supports full clears of views
-
-* DirectX 10/11 DDI functions and Gallium equivalents
-
-AbandonCommandList (D3D11 only)
-	- Gallium does not support deferred contexts
-
-CalcPrivateBlendStateSize
-CalcPrivateDepthStencilStateSize
-CalcPrivateDepthStencilViewSize
-CalcPrivateElementLayoutSize
-CalcPrivateGeometryShaderWithStreamOutput
-CalcPrivateOpenedResourceSize
-CalcPrivateQuerySize
-CalcPrivateRasterizerStateSize
-CalcPrivateRenderTargetViewSize
-CalcPrivateResourceSize
-CalcPrivateSamplerSize
-CalcPrivateShaderResourceViewSize
-CalcPrivateShaderSize
-CalcDeferredContextHandleSize (D3D11 only)
-CalcPrivateCommandListSize (D3D11 only)
-CalcPrivateDeferredContextSize (D3D11 only)
-CalcPrivateTessellationShaderSize (D3D11 only)
-CalcPrivateUnorderedAccessViewSize (D3D11 only)
-	! D3D11 allocates private objects itself, using the size computed here
-	* Gallium could do something similar to be able to put the private data inline into state tracker objects: this would allow them to fit in the same cacheline and improve performance
-
-CheckDeferredContextHandleSizes (D3D11 only)
-	- Gallium does not support deferred contexts
-
-CheckFormatSupport -> screen->is_format_supported
-	! Gallium passes usages to this function, D3D11 returns them
-	- Gallium does not differentiate between blendable and non-blendable render targets
-	! Gallium includes sample count directly, D3D11 uses additional query 
-
-CheckMultisampleQualityLevels
-	! is merged with is_format_supported
-
-CommandListExecute (D3D11 only)
-	- Gallium does not support command lists
-
-CopyStructureCount (D3D11 only)
-	- Gallium does not support unordered access views (views that can be written to arbitrarily from compute shaders)
-
-ClearDepthStencilView -> clear_depth_stencil
-ClearRenderTargetView -> clear_render_target
-	# D3D11 is not totally clear about whether this applies to any view or only a "currently-bound view"
-	+ Gallium allows to clear both depth/stencil and render target(s) in a single operation
-	+ Gallium supports double-precision depth values (but not rgba values!)
-	* May want to also support double-precision rgba or use "float" for "depth"
-
-ClearUnorderedAccessViewFloat (D3D11 only)
-ClearUnorderedAccessViewUint (D3D11 only)
-	- Gallium does not support unordered access views (views that can be written to arbitrarily from compute shaders)
-
-CreateBlendState (extended in D3D10.1) -> create_blend_state
-	# D3D10 does not support per-RT blend modes (but per-RT blending), only D3D10.1 does
-	+ Gallium supports logic ops
-	+ Gallium supports dithering
-	+ Gallium supports using the broadcast alpha component of the blend constant color
-
-CreateCommandList (D3D11 only)
-	- Gallium does not support command lists
-
-CreateComputeShader (D3D11 only)
-	- Gallium does not support compute shaders
-
-CreateDeferredContext (D3D11 only)
-	- Gallium does not support deferred contexts
-
-CreateDomainShader (D3D11 only)
-	- Gallium does not support domain shaders
-
-CreateHullShader (D3D11 only)
-	- Gallium does not support hull shaders
-
-CreateUnorderedAccessView (D3D11 only)
-	- Gallium does not support unordered access views
-
-CreateDepthStencilState -> create_depth_stencil_alpha_state
-	! D3D11 has both a global stencil enable, and front/back enables; Gallium has only front/back enables
-	+ Gallium has per-face writemask/valuemasks, D3D11 uses the same value for back and front
-	+ Gallium supports the alpha test, which D3D11 lacks
-
-CreateDepthStencilView -> create_surface
-CreateRenderTargetView -> create_surface
-	! Gallium merges depthstencil and rendertarget views into pipe_surface
-	- lack of render-to-buffer support
-	+ Gallium supports using 3D texture zslices as a depth/stencil buffer (in theory)
-
-CreateElementLayout -> create_vertex_elements_state
-	! D3D11 allows sparse vertex elements (via InputRegister); in Gallium they must be specified sequentially
-	! D3D11 has an extra flag (InputSlotClass) that is the same as instance_divisor == 0
-
-CreateGeometryShader -> create_gs_state
-CreateGeometryShaderWithStreamOutput -> create_gs_state + create_stream_output_state
-CreatePixelShader -> create_fs_state
-CreateVertexShader -> create_vs_state
-	> bytecode is different (see D3d10tokenizedprogramformat.hpp)
-	! D3D11 describes input/outputs separately from bytecode; Gallium has the tgsi_scan.c module to extract it from TGSI
-	@ TODO: look into DirectX 10/11 semantics specification and bytecode
-
-CheckCounter
-CheckCounterInfo
-CreateQuery -> create_query
-	! D3D11 implements fences with "event" queries
-	* others are performance counters, we may want them but they are not critical
-
-CreateRasterizerState
-	+ Gallium, like OpenGL, supports PIPE_POLYGON_MODE_POINT
-	+ Gallium, like OpenGL, supports per-face polygon fill modes
-	+ Gallium, like OpenGL, supports culling everything
-	+ Gallium, like OpenGL, supports two-side lighting; D3D11 only has the facing attribute
-	+ Gallium, like OpenGL, supports per-fill-mode polygon offset enables
-	+ Gallium, like OpenGL, supports polygon smoothing
-	+ Gallium, like OpenGL, supports polygon stipple
-	+ Gallium, like OpenGL, supports point smoothing
-	+ Gallium, like OpenGL, supports point sprites
-	+ Gallium supports specifying point quad rasterization
-	+ Gallium, like OpenGL, supports per-point point size
-	+ Gallium, like OpenGL, supports line smoothing
-	+ Gallium, like OpenGL, supports line stipple
-	+ Gallium supports line last pixel rule specification
-	+ Gallium, like OpenGL, supports provoking vertex convention
-	+ Gallium supports D3D9 rasterization rules
-	+ Gallium supports fixed line width
-	+ Gallium supports fixed point size
-
-CreateResource -> texture_create or buffer_create
-	! D3D11 passes the dimensions of all mipmap levels to the create call, while Gallium has an implicit floor(x/2) rule
-	# Note that hardware often has the implicit rule, so the D3D11 interface seems to make little sense
-	# Also, the D3D11 API does not allow the user to specify mipmap sizes, so this really seems a dubious decision on Microsoft's part
-	- D3D11 supports specifying initial data to write in the resource
-	- Gallium does not support unordered access buffers
-	! D3D11 specifies mapping flags (i.e. read/write/discard);:it's unclear what they are used for here
-	- D3D11 supports odd things in the D3D10_DDI_RESOURCE_MISC_FLAG enum (D3D10_DDI_RESOURCE_MISC_DISCARD_ON_PRESENT, D3D11_DDI_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS, D3D11_DDI_RESOURCE_MISC_BUFFER_STRUCTURED)
-	- Gallium does not support indirect draw call parameter buffers
-	! D3D11 supports specifying hardware modes and other stuff here for scanout resources
-	! D3D11 implements cube maps as 2D array textures
-
-CreateSampler
-	- D3D11 supports a monochrome convolution filter for "text filtering"
-	+ Gallium supports non-normalized coordinates
-	+ Gallium supports CLAMP, MIRROR_CLAMP and MIRROR_CLAMP_TO_BORDER
-	+ Gallium supports setting min/max/mip filters and anisotropy independently
-
-CreateShaderResourceView (extended in D3D10.1) -> create_sampler_view
-	+ Gallium supports specifying a swizzle
-	! D3D11 implements "cube views" as views into a 2D array texture
-
-CsSetConstantBuffers (D3D11 only)
-CsSetSamplers (D3D11 only)
-CsSetShader (D3D11 only)
-CsSetShaderResources (D3D11 only)
-CsSetShaderWithIfaces (D3D11 only)
-CsSetUnorderedAccessViews (D3D11 only)
-	- Gallium does not support compute shaders
-
-DestroyBlendState
-DestroyCommandList (D3D11 only)
-DestroyDepthStencilState
-DestroyDepthStencilView
-DestroyDevice
-DestroyElementLayout
-DestroyQuery
-DestroyRasterizerState
-DestroyRenderTargetView
-DestroyResource
-DestroySampler
-DestroyShader
-DestroyShaderResourceView
-DestroyUnorderedAccessView (D3D11 only)
-	# these are trivial
-
-Dispatch (D3D11 only)
-	- Gallium does not support compute shaders
-
-DispatchIndirect (D3D11 only)
-	- Gallium does not support compute shaders
-
-Draw -> draw_vbo
-	! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better
-
-DrawAuto -> draw_auto
-
-DrawIndexed -> draw_vbo
-	! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better
-	+ D3D11 lacks explicit range, which is required for OpenGL
-
-DrawIndexedInstanced -> draw_vbo
-	! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better
-
-DrawIndexedInstancedIndirect (D3D11 only)
-	# this allows to use an hardware buffer to specify the parameters for multiple draw_vbo calls
-	- Gallium does not support draw call parameter buffers and indirect draw
-
-DrawInstanced -> draw_vbo
-	! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better
-
-DrawInstancedIndirect (D3D11 only)
-	# this allows to use an hardware buffer to specify the parameters for multiple draw_vbo calls
-	- Gallium does not support draw call parameter buffers and indirect draws
-
-DsSetConstantBuffers (D3D11 only)
-DsSetSamplers (D3D11 only)
-DsSetShader (D3D11 only)
-DsSetShaderResources (D3D11 only)
-DsSetShaderWithIfaces (D3D11 only)
-	- Gallium does not support domain shaders
-
-Flush -> flush
-	! Gallium supports fencing, D3D11 just has a dumb glFlush-like function
-
-GenMips
-	- Gallium lacks a mipmap generation interface, and does this manually with the 3D engine
-	* it may be useful to add a mipmap generation interface, since the hardware (especially older cards) may have a better way than using the 3D engine
-
-GsSetConstantBuffers -> for(i = StartBuffer; i < NumBuffers; ++i) set_constant_buffer(PIPE_SHADER_GEOMETRY, i, phBuffers[i])
-
-GsSetSamplers
-	- Gallium does not support sampling in geometry shaders
-
-GsSetShader -> bind_gs_state
-
-GsSetShaderWithIfaces (D3D11 only)
-	- Gallium does not support shader interfaces
-
-GsSetShaderResources
-	- Gallium does not support sampling in geometry shaders
-
-HsSetConstantBuffers (D3D11 only)
-HsSetSamplers (D3D11 only)
-HsSetShader (D3D11 only)
-HsSetShaderResources (D3D11 only)
-HsSetShaderWithIfaces (D3D11 only)
-	- Gallium does not support hull shaders
-
-IaSetIndexBuffer -> set_index_buffer
-	+ Gallium supports 8-bit indices
-	# the D3D11 interface allows index-size-unaligned byte offsets into the index buffer; most drivers will abort with an assertion
-
-IaSetInputLayout -> bind_vertex_elements_state
-
-IaSetTopology
-	! Gallium passes the topology = primitive type to the draw calls
-	* may want to add an interface for this
-	- Gallium lacks support for DirectX 11 tessellated primitives
-	+ Gallium supports line loops, triangle fans, quads, quad strips and polygons
-
-IaSetVertexBuffers -> set_vertex_buffers
-	- Gallium only allows setting all vertex buffers at once, while D3D11 supports setting a subset
-
-OpenResource -> texture_from_handle
-
-PsSetConstantBuffers -> for(i = StartBuffer; i < NumBuffers; ++i) set_constant_buffer(PIPE_SHADER_FRAGMENT, i, phBuffers[i])
-	* may want to split into fragment/vertex-specific versions
-
-PsSetSamplers -> bind_fragment_sampler_states
-	* may want to allow binding subsets instead of all at once
-
-PsSetShader -> bind_fs_state
-
-PsSetShaderWithIfaces (D3D11 only)
-	- Gallium does not support shader interfaces
-
-PsSetShaderResources -> set_sampler_views
-	* may want to allow binding subsets instead of all at once
-
-QueryBegin -> begin_query
-
-QueryEnd -> end_query
-
-QueryGetData -> get_query_result
-	- D3D11 supports reading an arbitrary data chunk for query results, Gallium only supports reading a 64-bit integer
-	+ D3D11 doesn't seem to support actually waiting for the query result (?!)
-	- D3D11 supports optionally not flushing command buffers here and instead returning DXGI_DDI_ERR_WASSTILLDRAWING
-
-RecycleCommandList (D3D11 only)
-RecycleCreateCommandList (D3D11 only)
-RecycleDestroyCommandList (D3D11 only)
-	- Gallium does not support command lists
-
-RecycleCreateDeferredContext (D3D11 only)
-	- Gallium does not support deferred contexts
-
-RelocateDeviceFuncs
-	- Gallium does not support moving pipe_context, while D3D11 seems to, using this
-
-ResetPrimitiveID (D3D10.1+ only, #ifdef D3D10PSGP)
-	# used to do vertex processing on the GPU on Intel G45 chipsets when it is faster this way (see www.intel.com/Assets/PDF/whitepaper/322931.pdf)
-	# presumably this resets the primitive id system value
-	- Gallium does not support vertex pipeline bypass anymore
-
-ResourceCopy
-ResourceCopyRegion
-ResourceConvert (D3D10.1+ only)
-ResourceConvertRegion (D3D10.1+ only)
-	-> resource_copy_region
-
-ResourceIsStagingBusy ->
-	- Gallium lacks this
-	+ Gallium can use fences
-
-ResourceReadAfterWriteHazard
-	- Gallium lacks this
-
-ResourceResolveSubresource -> blit
-
-ResourceMap
-ResourceUnmap
-DynamicConstantBufferMapDiscard
-DynamicConstantBufferUnmap
-DynamicIABufferMapDiscard
-DynamicIABufferMapNoOverwrite
-DynamicIABufferUnmap
-DynamicResourceMapDiscard
-DynamicResourceUnmap
-StagingResourceMap
-StagingResourceUnmap
-	-> transfer functions
-	! Gallium and D3D have different semantics for transfers
-	* D3D separates vertex/index buffers from constant buffers
-	! D3D separates some buffer flags into specialized calls
-
-ResourceUpdateSubresourceUP -> transfer functionality, transfer_inline_write in gallium-resources
-DefaultConstantBufferUpdateSubresourceUP -> transfer functionality, transfer_inline_write in gallium-resources
-
-SetBlendState -> bind_blend_state, set_blend_color and set_sample_mask
-	! D3D11 fuses bind_blend_state, set_blend_color and set_sample_mask in a single function
-
-SetDepthStencilState -> bind_depth_stencil_alpha_state and set_stencil_ref
-	! D3D11 fuses bind_depth_stencil_alpha_state and set_stencil_ref in a single function
-
-SetPredication -> render_condition
-	# here both D3D11 and Gallium seem very limited (hardware is too, probably though)
-	# ideally, we should support nested conditional rendering, as well as more complex tests (checking for an arbitrary range, after an AND with arbitrary mask )
-	# of couse, hardware support is probably as limited as OpenGL/D3D11
-	+ Gallium, like NV_conditional_render, supports by-region and wait flags
-	- D3D11 supports predication conditional on being equal any value (along with occlusion predicates); Gallium only supports on non-zero
-
-SetRasterizerState -> bind_rasterizer_state
-
-SetRenderTargets (extended in D3D11) -> set_framebuffer_state
-	! Gallium passed a width/height here, D3D11 does not
-	! Gallium lacks ClearTargets (but this is redundant and the driver can trivially compute this if desired)
-	- Gallium does not support unordered access views
-	- Gallium does not support geometry shader selection of texture array image / 3D texture zslice
-
-SetResourceMinLOD (D3D11 only) -> pipe_sampler_view::tex::first_level
-
-SetScissorRects
-	- Gallium lacks support for multiple geometry-shader-selectable scissor rectangles D3D11 has
-
-SetTextFilterSize
-	- Gallium lacks support for text filters
-
-SetVertexPipelineOutput (D3D10.1+ only)
-	# used to do vertex processing on the GPU on Intel G45 chipsets when it is faster this way (see www.intel.com/Assets/PDF/whitepaper/322931.pdf)
-	- Gallium does not support vertex pipeline bypass anymore
-
-SetViewports
-	- Gallium lacks support for multiple geometry-shader-selectable viewports D3D11 has
-
-ShaderResourceViewReadAfterWriteHazard
-	- Gallium lacks support for this
-	+ Gallium has texture_barrier
-
-SoSetTargets -> set_stream_output_buffers
-
-VsSetConstantBuffers -> for(i = StartBuffer; i < NumBuffers; ++i) set_constant_buffer(PIPE_SHADER_VERTEX, i, phBuffers[i])
-	* may want to split into fragment/vertex-specific versions
-
-VsSetSamplers -> bind_vertex_sampler_states
-	* may want to allow binding subsets instead of all at once
-
-VsSetShader -> bind_vs_state
-
-VsSetShaderWithIfaces (D3D11 only)
-	- Gallium does not support shader interfaces
-
-VsSetShaderResources  -> set_sampler_views
-	* may want to allow binding subsets instead of all at once
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/docs/source/context.rst mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/docs/source/context.rst
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/docs/source/context.rst	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/docs/source/context.rst	2015-09-16 14:36:09.000000000 +0000
@@ -131,14 +131,14 @@
 have no support for floating point coordinates, address wrap modes or
 filtering.
 
-Shader resources are specified for all the shader stages at once using
-the ``set_shader_resources`` method.  When binding texture resources,
-the ``level``, ``first_layer`` and ``last_layer`` pipe_surface fields
-specify the mipmap level and the range of layers the texture will be
-constrained to.  In the case of buffers, ``first_element`` and
-``last_element`` specify the range within the buffer that will be used
-by the shader resource.  Writes to a shader resource are only allowed
-when the ``writable`` flag is set.
+There are 2 types of shader resources: buffers and images.
+
+Buffers are specified using the ``set_shader_buffers`` method.
+
+Images are specified using the ``set_shader_images`` method. When binding
+images, the ``level``, ``first_layer`` and ``last_layer`` pipe_image_view
+fields specify the mipmap level and the range of layers the image will be
+constrained to.
 
 Surfaces
 ^^^^^^^^
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/docs/source/screen.rst mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/docs/source/screen.rst
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/docs/source/screen.rst	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/docs/source/screen.rst	2015-09-16 14:36:09.000000000 +0000
@@ -254,6 +254,19 @@
   and size must be page-aligned.
 * ``PIPE_CAP_DEVICE_RESET_STATUS_QUERY``:
   Whether pipe_context::get_device_reset_status is implemented.
+* ``PIPE_CAP_MAX_SHADER_PATCH_VARYINGS``:
+  How many per-patch outputs and inputs are supported between tessellation
+  control and tessellation evaluation shaders, not counting in TESSINNER and
+  TESSOUTER. The minimum allowed value for OpenGL is 30.
+* ``PIPE_CAP_TEXTURE_FLOAT_LINEAR``: Whether the linear minification and
+  magnification filters are supported with single-precision floating-point
+  textures.
+* ``PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR``: Whether the linear minification and
+  magnification filters are supported with half-precision floating-point
+  textures.
+* ``PIPE_CAP_DEPTH_BOUNDS_TEST``: Whether bounds_test, bounds_min, and
+  bounds_max states of pipe_depth_stencil_alpha_state behave according
+  to the GL_EXT_depth_bounds_test specification.
 
 
 .. _pipe_capf:
@@ -340,6 +353,8 @@
   DLDEXP are supported.
 * ``PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED``: Whether FMA and DFMA (doubles only)
   are supported.
+* ``PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE``: Whether the driver doesn't
+  ignore tgsi_declaration_range::Last for shader inputs and outputs.
 
 
 .. _pipe_compute_cap:
@@ -382,6 +397,8 @@
   Value type: ``uint32_t``
 * ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported
   non-zero means yes, zero means no. Value type: ``uint32_t``
+* ``PIPE_COMPUTE_CAP_SUBGROUP_SIZE``: The size of a basic execution unit in
+  threads. Also known as wavefront size, warp size or SIMD width.
 
 .. _pipe_bind:
 
@@ -422,8 +439,10 @@
   process.
 * ``PIPE_BIND_GLOBAL``: A buffer that can be mapped into the global
   address space of a compute program.
-* ``PIPE_BIND_SHADER_RESOURCE``: A buffer or texture that can be
-  bound to the graphics pipeline as a shader resource.
+* ``PIPE_BIND_SHADER_BUFFER``: A buffer without a format that can be bound
+  to a shader and can be used with load, store, and atomic instructions.
+* ``PIPE_BIND_SHADER_IMAGE``: A buffer or texture with a format that can be
+  bound to a shader and can be used with load, store, and atomic instructions.
 * ``PIPE_BIND_COMPUTE_RESOURCE``: A buffer or texture that can be
   bound to the compute program as a shader resource.
 * ``PIPE_BIND_COMMAND_ARGS_BUFFER``: A buffer that may be sourced by the
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/docs/source/tgsi.rst mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/docs/source/tgsi.rst
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/docs/source/tgsi.rst	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/docs/source/tgsi.rst	2015-09-16 14:36:09.000000000 +0000
@@ -2591,7 +2591,7 @@
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 Declarations can optional have an ArrayID attribute which can be referred by
-indirect addressing operands. An ArrayID of zero is reserved and treaded as
+indirect addressing operands. An ArrayID of zero is reserved and treated as
 if no ArrayID is specified.
 
 If an indirect addressing operand refers to a specific declaration by using
@@ -2603,6 +2603,7 @@
 If no ArrayID is specified with an indirect addressing operand the whole
 register file might be accessed by this operand. This is strongly discouraged
 and will prevent packing of scalar/vec2 arrays and effective alias analysis.
+This is only legal for TEMP and CONST register files.
 
 Declaration Semantic
 ^^^^^^^^^^^^^^^^^^^^^^^^
@@ -2965,6 +2966,18 @@
 type must be 1 or 4 entries (if specifying on a per-component
 level) out of UNORM, SNORM, SINT, UINT and FLOAT.
 
+For TEX\* style texture sample opcodes (as opposed to SAMPLE\* opcodes
+which take an explicit SVIEW[#] source register), there may be optionally
+SVIEW[#] declarations.  In this case, the SVIEW index is implied by the
+SAMP index, and there must be a corresponding SVIEW[#] declaration for
+each SAMP[#] declaration.  Drivers are free to ignore this if they wish.
+But note in particular that some drivers need to know the sampler type
+(float/int/unsigned) in order to generate the correct code, so cases
+where integer textures are sampled, SVIEW[#] declarations should be
+used.
+
+NOTE: It is NOT legal to mix SAMPLE\* style opcodes and TEX\* opcodes
+in the same shader.
 
 Declaration Resource
 ^^^^^^^^^^^^^^^^^^^^
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h	2015-09-16 14:36:09.000000000 +0000
@@ -8,15 +8,15 @@
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  59314 bytes, from 2015-04-19 16:21:40)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
 
-Copyright (C) 2013-2014 by the following authors:
+Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
 
 Permission is hereby granted, free of charge, to any person obtaining
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_blend.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_blend.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_blend.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_blend.h	2015-09-16 14:36:09.000000000 +0000
@@ -39,7 +39,7 @@
 	uint32_t rb_colormask;
 };
 
-static INLINE struct fd2_blend_stateobj *
+static inline struct fd2_blend_stateobj *
 fd2_blend_stateobj(struct pipe_blend_state *blend)
 {
 	return (struct fd2_blend_stateobj *)blend;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c	2015-09-16 14:36:09.000000000 +0000
@@ -414,32 +414,16 @@
 static void
 add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
 {
-	switch (inst->Instruction.Saturate) {
-	case TGSI_SAT_NONE:
-		break;
-	case TGSI_SAT_ZERO_ONE:
+	if (inst->Instruction.Saturate) {
 		alu->alu.vector_clamp = true;
-		break;
-	case TGSI_SAT_MINUS_PLUS_ONE:
-		DBG("unsupported saturate");
-		assert(0);
-		break;
 	}
 }
 
 static void
 add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
 {
-	switch (inst->Instruction.Saturate) {
-	case TGSI_SAT_NONE:
-		break;
-	case TGSI_SAT_ZERO_ONE:
+	if (inst->Instruction.Saturate) {
 		alu->alu.scalar_clamp = true;
-		break;
-	case TGSI_SAT_MINUS_PLUS_ONE:
-		DBG("unsupported saturate");
-		assert(0);
-		break;
 	}
 }
 
@@ -758,7 +742,7 @@
 	struct tgsi_src_register tmp_src;
 	const struct tgsi_src_register *coord;
 	bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) ||
-			(inst->Instruction.Saturate != TGSI_SAT_NONE);
+			inst->Instruction.Saturate;
 	int idx;
 
 	if (using_temp || (opc == TGSI_OPCODE_TXP))
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_context.c	2014-09-20 14:48:28.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -67,7 +67,7 @@
 }
 
 static const uint8_t a22x_primtypes[PIPE_PRIM_MAX] = {
-		[PIPE_PRIM_POINTS]         = DI_PT_POINTLIST_A2XX,
+		[PIPE_PRIM_POINTS]         = DI_PT_POINTLIST_PSIZE,
 		[PIPE_PRIM_LINES]          = DI_PT_LINELIST,
 		[PIPE_PRIM_LINE_STRIP]     = DI_PT_LINESTRIP,
 		[PIPE_PRIM_LINE_LOOP]      = DI_PT_LINELOOP,
@@ -77,7 +77,7 @@
 };
 
 static const uint8_t a20x_primtypes[PIPE_PRIM_MAX] = {
-		[PIPE_PRIM_POINTS]         = DI_PT_POINTLIST_A2XX,
+		[PIPE_PRIM_POINTS]         = DI_PT_POINTLIST_PSIZE,
 		[PIPE_PRIM_LINES]          = DI_PT_LINELIST,
 		[PIPE_PRIM_LINE_STRIP]     = DI_PT_LINESTRIP,
 		[PIPE_PRIM_TRIANGLES]      = DI_PT_TRILIST,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_context.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -40,7 +40,7 @@
 	struct pipe_resource *solid_vertexbuf;
 };
 
-static INLINE struct fd2_context *
+static inline struct fd2_context *
 fd2_context(struct fd_context *ctx)
 {
 	return (struct fd2_context *)ctx;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h	2015-09-16 14:36:09.000000000 +0000
@@ -43,7 +43,7 @@
 	uint32_t pa_su_sc_mode_cntl;
 };
 
-static INLINE struct fd2_rasterizer_stateobj *
+static inline struct fd2_rasterizer_stateobj *
 fd2_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
 {
 	return (struct fd2_rasterizer_stateobj *)rast;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_texture.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_texture.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_texture.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_texture.h	2015-09-16 14:36:09.000000000 +0000
@@ -42,7 +42,7 @@
 	uint32_t tex0, tex3, tex4, tex5;
 };
 
-static INLINE struct fd2_sampler_stateobj *
+static inline struct fd2_sampler_stateobj *
 fd2_sampler_stateobj(struct pipe_sampler_state *samp)
 {
 	return (struct fd2_sampler_stateobj *)samp;
@@ -54,7 +54,7 @@
 	uint32_t tex0, tex2, tex3;
 };
 
-static INLINE struct fd2_pipe_sampler_view *
+static inline struct fd2_pipe_sampler_view *
 fd2_pipe_sampler_view(struct pipe_sampler_view *pview)
 {
 	return (struct fd2_pipe_sampler_view *)pview;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_zsa.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_zsa.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a2xx/fd2_zsa.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a2xx/fd2_zsa.h	2015-09-16 14:36:09.000000000 +0000
@@ -44,7 +44,7 @@
 	uint32_t rb_stencilrefmask_bf;
 };
 
-static INLINE struct fd2_zsa_stateobj *
+static inline struct fd2_zsa_stateobj *
 fd2_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
 {
 	return (struct fd2_zsa_stateobj *)zsa;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h	2015-09-16 14:36:09.000000000 +0000
@@ -8,13 +8,13 @@
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  59314 bytes, from 2015-04-19 16:21:40)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -326,6 +326,13 @@
 	A3XX_TEX_3D = 3,
 };
 
+enum a3xx_tex_msaa {
+	A3XX_TPL1_MSAA1X = 0,
+	A3XX_TPL1_MSAA2X = 1,
+	A3XX_TPL1_MSAA4X = 2,
+	A3XX_TPL1_MSAA8X = 3,
+};
+
 #define A3XX_INT0_RBBM_GPU_IDLE					0x00000001
 #define A3XX_INT0_RBBM_AHB_ERROR				0x00000002
 #define A3XX_INT0_RBBM_REG_TIMEOUT				0x00000004
@@ -2652,6 +2659,7 @@
 #define REG_A3XX_VGT_IMMED_DATA					0x000021fd
 
 #define REG_A3XX_TEX_SAMP_0					0x00000000
+#define A3XX_TEX_SAMP_0_CLAMPENABLE				0x00000001
 #define A3XX_TEX_SAMP_0_MIPFILTER_LINEAR			0x00000002
 #define A3XX_TEX_SAMP_0_XY_MAG__MASK				0x0000000c
 #define A3XX_TEX_SAMP_0_XY_MAG__SHIFT				2
@@ -2695,6 +2703,7 @@
 {
 	return ((val) << A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT) & A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK;
 }
+#define A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF			0x01000000
 #define A3XX_TEX_SAMP_0_UNNORM_COORDS				0x80000000
 
 #define REG_A3XX_TEX_SAMP_1					0x00000001
@@ -2750,6 +2759,12 @@
 {
 	return ((val) << A3XX_TEX_CONST_0_MIPLVLS__SHIFT) & A3XX_TEX_CONST_0_MIPLVLS__MASK;
 }
+#define A3XX_TEX_CONST_0_MSAATEX__MASK				0x00300000
+#define A3XX_TEX_CONST_0_MSAATEX__SHIFT				20
+static inline uint32_t A3XX_TEX_CONST_0_MSAATEX(enum a3xx_tex_msaa val)
+{
+	return ((val) << A3XX_TEX_CONST_0_MSAATEX__SHIFT) & A3XX_TEX_CONST_0_MSAATEX__MASK;
+}
 #define A3XX_TEX_CONST_0_FMT__MASK				0x1fc00000
 #define A3XX_TEX_CONST_0_FMT__SHIFT				22
 static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val)
@@ -2785,7 +2800,7 @@
 }
 
 #define REG_A3XX_TEX_CONST_2					0x00000002
-#define A3XX_TEX_CONST_2_INDX__MASK				0x000000ff
+#define A3XX_TEX_CONST_2_INDX__MASK				0x000001ff
 #define A3XX_TEX_CONST_2_INDX__SHIFT				0
 static inline uint32_t A3XX_TEX_CONST_2_INDX(uint32_t val)
 {
@@ -2805,7 +2820,7 @@
 }
 
 #define REG_A3XX_TEX_CONST_3					0x00000003
-#define A3XX_TEX_CONST_3_LAYERSZ1__MASK				0x00007fff
+#define A3XX_TEX_CONST_3_LAYERSZ1__MASK				0x0001ffff
 #define A3XX_TEX_CONST_3_LAYERSZ1__SHIFT			0
 static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ1(uint32_t val)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_blend.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_blend.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_blend.h	2015-09-16 14:36:09.000000000 +0000
@@ -32,6 +32,8 @@
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
 
+#include "freedreno_util.h"
+
 struct fd3_blend_stateobj {
 	struct pipe_blend_state base;
 	struct {
@@ -42,10 +44,10 @@
 		/* Blend control bits for alpha channel */
 		uint32_t blend_control_alpha;
 		uint32_t control;
-	} rb_mrt[4];
+	} rb_mrt[A3XX_MAX_RENDER_TARGETS];
 };
 
-static INLINE struct fd3_blend_stateobj *
+static inline struct fd3_blend_stateobj *
 fd3_blend_stateobj(struct pipe_blend_state *blend)
 {
 	return (struct fd3_blend_stateobj *)blend;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_context.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -88,7 +88,7 @@
 }
 
 static const uint8_t primtypes[PIPE_PRIM_MAX] = {
-		[PIPE_PRIM_POINTS]         = DI_PT_POINTLIST_A3XX,
+		[PIPE_PRIM_POINTS]         = DI_PT_POINTLIST,
 		[PIPE_PRIM_LINES]          = DI_PT_LINELIST,
 		[PIPE_PRIM_LINE_STRIP]     = DI_PT_LINESTRIP,
 		[PIPE_PRIM_LINE_LOOP]      = DI_PT_LINELOOP,
@@ -121,6 +121,7 @@
 	fd3_gmem_init(pctx);
 	fd3_texture_init(pctx);
 	fd3_prog_init(pctx);
+	fd3_emit_init(pctx);
 
 	pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv);
 	if (!pctx)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_context.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -105,9 +105,6 @@
 	 */
 	unsigned fsaturate_s, fsaturate_t, fsaturate_r;
 
-	/* bitmask of integer texture samplers */
-	uint16_t vinteger_s, finteger_s;
-
 	/* some state changes require a different shader variant.  Keep
 	 * track of this so we know when we need to re-emit shader state
 	 * due to variant change.  See fixup_shader_state()
@@ -115,7 +112,7 @@
 	struct ir3_shader_key last_key;
 };
 
-static INLINE struct fd3_context *
+static inline struct fd3_context *
 fd3_context(struct fd_context *ctx)
 {
 	return (struct fd3_context *)ctx;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_draw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_draw.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_draw.c	2015-09-16 14:36:09.000000000 +0000
@@ -60,6 +60,9 @@
 	const struct pipe_draw_info *info = emit->info;
 	enum pc_di_primtype primtype = ctx->primtypes[info->mode];
 
+	if (!(fd3_emit_get_vp(emit) && fd3_emit_get_fp(emit)))
+		return;
+
 	fd3_emit_state(ctx, ring, emit);
 
 	if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
@@ -79,8 +82,8 @@
 			info->restart_index : 0xffffffff);
 
 	if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
-		info->mode == PIPE_PRIM_POINTS)
-		primtype = DI_PT_POINTLIST_A2XX;
+			(info->mode == PIPE_PRIM_POINTS))
+		primtype = DI_PT_POINTLIST_PSIZE;
 
 	fd_draw_emit(ctx, ring,
 			primtype,
@@ -104,14 +107,12 @@
 		if (last_key->has_per_samp || key->has_per_samp) {
 			if ((last_key->vsaturate_s != key->vsaturate_s) ||
 					(last_key->vsaturate_t != key->vsaturate_t) ||
-					(last_key->vsaturate_r != key->vsaturate_r) ||
-					(last_key->vinteger_s != key->vinteger_s))
+					(last_key->vsaturate_r != key->vsaturate_r))
 				ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
 
 			if ((last_key->fsaturate_s != key->fsaturate_s) ||
 					(last_key->fsaturate_t != key->fsaturate_t) ||
-					(last_key->fsaturate_r != key->fsaturate_r) ||
-					(last_key->finteger_s != key->finteger_s))
+					(last_key->fsaturate_r != key->fsaturate_r))
 				ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
 		}
 
@@ -140,16 +141,13 @@
 			// TODO set .half_precision based on render target format,
 			// ie. float16 and smaller use half, float32 use full..
 			.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
-			.has_per_samp = (fd3_ctx->fsaturate || fd3_ctx->vsaturate ||
-							 fd3_ctx->vinteger_s || fd3_ctx->finteger_s),
+			.has_per_samp = (fd3_ctx->fsaturate || fd3_ctx->vsaturate),
 			.vsaturate_s = fd3_ctx->vsaturate_s,
 			.vsaturate_t = fd3_ctx->vsaturate_t,
 			.vsaturate_r = fd3_ctx->vsaturate_r,
 			.fsaturate_s = fd3_ctx->fsaturate_s,
 			.fsaturate_t = fd3_ctx->fsaturate_t,
 			.fsaturate_r = fd3_ctx->fsaturate_r,
-			.vinteger_s = fd3_ctx->vinteger_s,
-			.finteger_s = fd3_ctx->finteger_s,
 		},
 		.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
 		.sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0,
@@ -245,10 +243,7 @@
 		.vtx  = &fd3_ctx->solid_vbuf_state,
 		.prog = &ctx->solid_prog,
 		.key = {
-			.half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
-							   fd3_half_precision(pfb->cbufs[1]) &&
-							   fd3_half_precision(pfb->cbufs[2]) &&
-							   fd3_half_precision(pfb->cbufs[3])),
+			.half_precision = fd_half_precision(pfb),
 		},
 	};
 
@@ -326,7 +321,7 @@
 				A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
 	}
 
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) {
 		OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
 		OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
 				A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) |
@@ -347,7 +342,7 @@
 
 	fd3_emit_vertex_bufs(ring, &emit);
 
-	fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+	fd3_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL);
 
 	OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
 	OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_emit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_emit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_emit.c	2015-09-16 14:36:09.000000000 +0000
@@ -43,19 +43,26 @@
 #include "fd3_format.h"
 #include "fd3_zsa.h"
 
+static const enum adreno_state_block sb[] = {
+	[SHADER_VERTEX]   = SB_VERT_SHADER,
+	[SHADER_FRAGMENT] = SB_FRAG_SHADER,
+};
+
 /* regid:          base const register
  * prsc or dwords: buffer containing constant values
  * sizedwords:     size of const value buffer
  */
 void
-fd3_emit_constant(struct fd_ringbuffer *ring,
-		enum adreno_state_block sb,
+fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
 		const uint32_t *dwords, struct pipe_resource *prsc)
 {
 	uint32_t i, sz;
 	enum adreno_state_src src;
 
+	debug_assert((regid % 4) == 0);
+	debug_assert((sizedwords % 4) == 0);
+
 	if (prsc) {
 		sz = 0;
 		src = SS_INDIRECT;
@@ -67,7 +74,7 @@
 	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
 			CP_LOAD_STATE_0_STATE_SRC(src) |
-			CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
 			CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
 	if (prsc) {
 		struct fd_bo *bo = fd_resource(prsc)->bo;
@@ -84,89 +91,31 @@
 }
 
 static void
-emit_constants(struct fd_ringbuffer *ring,
-		enum adreno_state_block sb,
-		struct fd_constbuf_stateobj *constbuf,
-		struct ir3_shader_variant *shader,
-		bool emit_immediates)
+fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+		uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
 {
-	uint32_t enabled_mask = constbuf->enabled_mask;
-	uint32_t max_const;
-	int i;
-
-	// XXX TODO only emit dirty consts.. but we need to keep track if
-	// they are clobbered by a clear, gmem2mem, or mem2gmem..
-	constbuf->dirty_mask = enabled_mask;
-
-	/* in particular, with binning shader we may end up with unused
-	 * consts, ie. we could end up w/ constlen that is smaller
-	 * than first_immediate.  In that case truncate the user consts
-	 * early to avoid HLSQ lockup caused by writing too many consts
-	 */
-	max_const = MIN2(shader->first_driver_param, shader->constlen);
-
-	/* emit user constants: */
-	if (enabled_mask & 1) {
-		const unsigned index = 0;
-		struct pipe_constant_buffer *cb = &constbuf->cb[index];
-		unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
-
-		// I expect that size should be a multiple of vec4's:
-		assert(size == align(size, 4));
-
-		/* and even if the start of the const buffer is before
-		 * first_immediate, the end may not be:
-		 */
-		size = MIN2(size, 4 * max_const);
+	uint32_t i;
 
-		if (size && constbuf->dirty_mask & (1 << index)) {
-			fd3_emit_constant(ring, sb, 0,
-							  cb->buffer_offset, size,
-							  cb->user_buffer, cb->buffer);
-			constbuf->dirty_mask &= ~(1 << index);
-		}
-
-		enabled_mask &= ~(1 << index);
-	}
-
-	if (shader->constlen > shader->first_driver_param) {
-		uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
-		/* emit ubos: */
-		OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
-		OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param * 2) |
-				 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
-				 CP_LOAD_STATE_0_STATE_BLOCK(sb) |
-				 CP_LOAD_STATE_0_NUM_UNIT(params * 2));
-		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
-				 CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
-
-		for (i = 1; i <= params * 4; i++) {
-			struct pipe_constant_buffer *cb = &constbuf->cb[i];
-			assert(!cb->user_buffer);
-			if ((enabled_mask & (1 << i)) && cb->buffer)
-				OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
-			else
-				OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
-		}
-	}
-
-	/* emit shader immediates: */
-	if (shader && emit_immediates) {
-		int size = shader->immediates_count;
-		uint32_t base = shader->first_immediate;
+	debug_assert((regid % 4) == 0);
+	debug_assert((num % 4) == 0);
 
-		/* truncate size to avoid writing constants that shader
-		 * does not use:
-		 */
-		size = MIN2(size + base, shader->constlen) - base;
-
-		/* convert out of vec4: */
-		base *= 4;
-		size *= 4;
-
-		if (size > 0) {
-			fd3_emit_constant(ring, sb, base,
-				0, size, shader->immediates[0].val, NULL);
+	OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
+			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+			CP_LOAD_STATE_0_NUM_UNIT(num/2));
+	OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+			CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+	for (i = 0; i < num; i++) {
+		if (bos[i]) {
+			if (write) {
+				OUT_RELOCW(ring, bos[i], offsets[i], 0, 0);
+			} else {
+				OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+			}
+		} else {
+			OUT_RING(ring, 0xbad00000 | (i << 16));
 		}
 	}
 }
@@ -302,14 +251,15 @@
 				CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
 		for (i = 0; i < tex->num_textures; i++) {
 			static const struct fd3_pipe_sampler_view dummy_view = {
+					.base.target = PIPE_TEXTURE_1D, /* anything !PIPE_BUFFER */
 					.base.u.tex.first_level = 1,
 			};
 			const struct fd3_pipe_sampler_view *view = tex->textures[i] ?
 					fd3_pipe_sampler_view(tex->textures[i]) :
 					&dummy_view;
 			struct fd_resource *rsc = fd_resource(view->base.texture);
-			unsigned start = view->base.u.tex.first_level;
-			unsigned end   = view->base.u.tex.last_level;
+			unsigned start = fd_sampler_first_level(&view->base);
+			unsigned end   = fd_sampler_last_level(&view->base);;
 
 			for (j = 0; j < (end - start + 1); j++) {
 				struct fd_resource_slice *slice =
@@ -392,6 +342,7 @@
 			format = fd3_gmem_restore_format(rsc->base.b.format);
 		}
 
+		/* note: PIPE_BUFFER disallowed for surfaces */
 		unsigned lvl = psurf[i]->u.tex.level;
 		struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
 
@@ -444,7 +395,9 @@
 	uint32_t total_in = 0;
 	const struct fd_vertex_state *vtx = emit->vtx;
 	struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
-	unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0);
+	unsigned vertex_regid = regid(63, 0);
+	unsigned instance_regid = regid(63, 0);
+	unsigned vtxcnt_regid = regid(63, 0);
 
 	for (i = 0; i < vp->inputs_count; i++) {
 		uint8_t semantic = sem2name(vp->inputs[i].semantic);
@@ -452,14 +405,17 @@
 			vertex_regid = vp->inputs[i].regid;
 		else if (semantic == TGSI_SEMANTIC_INSTANCEID)
 			instance_regid = vp->inputs[i].regid;
+		else if (semantic == IR3_SEMANTIC_VTXCNT)
+			vtxcnt_regid = vp->inputs[i].regid;
 		else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask)
 			last = i;
 	}
 
 	/* hw doesn't like to be configured for zero vbo's, it seems: */
-	if (vtx->vtx->num_elements == 0 &&
-		vertex_regid == regid(63, 0) &&
-		instance_regid == regid(63, 0))
+	if ((vtx->vtx->num_elements == 0) &&
+			(vertex_regid == regid(63, 0)) &&
+			(instance_regid == regid(63, 0)) &&
+			(vtxcnt_regid == regid(63, 0)))
 		return;
 
 	for (i = 0, j = 0; i <= last; i++) {
@@ -472,8 +428,9 @@
 			enum pipe_format pfmt = elem->src_format;
 			enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
 			bool switchnext = (i != last) ||
-				vertex_regid != regid(63, 0) ||
-				instance_regid != regid(63, 0);
+					(vertex_regid != regid(63, 0)) ||
+					(instance_regid != regid(63, 0)) ||
+					(vtxcnt_regid != regid(63, 0));
 			bool isint = util_format_is_pure_integer(pfmt);
 			uint32_t fs = util_format_get_blocksize(pfmt);
 
@@ -512,6 +469,10 @@
 	OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
 			A3XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
 			A3XX_VFD_CONTROL_1_REGID4INST(instance_regid));
+
+	OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
+	OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
+			A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(vtxcnt_regid));
 }
 
 void
@@ -602,10 +563,29 @@
 		val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
 		val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
 				A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
+		/* TODO only use if prog doesn't use clipvertex/clipdist */
+		val |= MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6) << 26;
 		OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
 		OUT_RING(ring, val);
 	}
 
+	if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) {
+		uint32_t planes = ctx->rasterizer->clip_plane_enable;
+		int count = 0;
+
+		while (planes && count < 6) {
+			int i = ffs(planes) - 1;
+
+			planes &= ~(1U << i);
+			fd_wfi(ctx, ring);
+			OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(count++), 4);
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][0]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][1]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][2]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][3]));
+		}
+	}
+
 	/* NOTE: since primitive_restart is not actually part of any
 	 * state object, we need to make sure that we always emit
 	 * PRIM_VTX_CNTL.. either that or be more clever and detect
@@ -669,33 +649,12 @@
 	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
 	OUT_RING(ring, HLSQ_FLUSH);
 
-	if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
-			/* evil hack to deal sanely with clear path: */
-			(emit->prog == &ctx->prog)) {
-		fd_wfi(ctx, ring);
-		emit_constants(ring,  SB_VERT_SHADER,
-				&ctx->constbuf[PIPE_SHADER_VERTEX],
-				vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
-		if (!emit->key.binning_pass) {
-			emit_constants(ring, SB_FRAG_SHADER,
-					&ctx->constbuf[PIPE_SHADER_FRAGMENT],
-					fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
-		}
-	}
-
-	/* emit driver params every time */
-	if (emit->info && emit->prog == &ctx->prog) {
-		uint32_t vertex_params[4] = {
-			emit->info->indexed ? emit->info->index_bias : emit->info->start,
-			0,
-			0,
-			0
-		};
-		if (vp->constlen >= vp->first_driver_param + 4) {
-			fd3_emit_constant(ring, SB_VERT_SHADER,
-							  (vp->first_driver_param + 4) * 4,
-							  0, 4, vertex_params, NULL);
-		}
+	if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
+		ir3_emit_consts(vp, ring, emit->info, dirty);
+		if (!emit->key.binning_pass)
+			ir3_emit_consts(fp, ring, emit->info, dirty);
+		/* mark clean after emitting consts: */
+		ctx->prog.dirty = 0;
 	}
 
 	if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) {
@@ -930,3 +889,11 @@
 
 	ctx->needs_rb_fbd = true;
 }
+
+void
+fd3_emit_init(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->emit_const = fd3_emit_const;
+	ctx->emit_const_bo = fd3_emit_const_bo;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_emit.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_emit.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_emit.h	2015-09-16 14:36:09.000000000 +0000
@@ -37,10 +37,8 @@
 #include "ir3_shader.h"
 
 struct fd_ringbuffer;
-enum adreno_state_block;
 
-void fd3_emit_constant(struct fd_ringbuffer *ring,
-		enum adreno_state_block sb,
+void fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
 		const uint32_t *dwords, struct pipe_resource *prsc);
 
@@ -90,4 +88,6 @@
 
 void fd3_emit_restore(struct fd_context *ctx);
 
+void fd3_emit_init(struct pipe_context *pctx);
+
 #endif /* FD3_EMIT_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_format.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_format.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_format.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_format.c	2015-09-16 14:36:09.000000000 +0000
@@ -262,6 +262,15 @@
 	_T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX),
 	_T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX),
 	_T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX),
+
+	_T(DXT1_RGB,   DXT1, NONE, WZYX),
+	_T(DXT1_SRGB,  DXT1, NONE, WZYX),
+	_T(DXT1_RGBA,  DXT1, NONE, WZYX),
+	_T(DXT1_SRGBA, DXT1, NONE, WZYX),
+	_T(DXT3_RGBA,  DXT3, NONE, WZYX),
+	_T(DXT3_SRGBA, DXT3, NONE, WZYX),
+	_T(DXT5_RGBA,  DXT5, NONE, WZYX),
+	_T(DXT5_SRGBA, DXT5, NONE, WZYX),
 };
 
 enum a3xx_vtx_fmt
@@ -301,7 +310,7 @@
 {
 	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
 		format = PIPE_FORMAT_Z32_FLOAT;
-	switch (util_format_get_blocksizebits(format)) {
+	switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) {
 	case 8: return TFETCH_1_BYTE;
 	case 16: return TFETCH_2_BYTE;
 	case 32: return TFETCH_4_BYTE;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_format.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_format.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_format.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_format.h	2015-09-16 14:36:09.000000000 +0000
@@ -41,27 +41,4 @@
 uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
 		unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
 
-static INLINE bool
-fd3_half_precision(const struct pipe_surface *surface)
-{
-	enum pipe_format format;
-	if (!surface)
-		return true;
-
-	format = surface->format;
-
-	/* colors are provided in consts, which go through cov.f32f16, which will
-	 * break these values
-	 */
-	if (util_format_is_pure_integer(format))
-		return false;
-
-	/* avoid losing precision on 32-bit float formats */
-	if (util_format_is_float(format) &&
-		util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == 32)
-		return false;
-
-	return true;
-}
-
 #endif /* FD3_FORMAT_H_ */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c	2015-09-16 14:36:09.000000000 +0000
@@ -57,7 +57,7 @@
 		tile_mode = LINEAR;
 	}
 
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) {
 		enum pipe_format pformat = 0;
 		enum a3xx_color_fmt format = 0;
 		enum a3xx_color_swap swap = WZYX;
@@ -537,10 +537,7 @@
 			/* NOTE: They all use the same VP, this is for vtx bufs. */
 			.prog = &ctx->blit_prog[0],
 			.key = {
-				.half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
-								   fd3_half_precision(pfb->cbufs[1]) &&
-								   fd3_half_precision(pfb->cbufs[2]) &&
-								   fd3_half_precision(pfb->cbufs[3]))
+				.half_precision = fd_half_precision(pfb),
 			},
 	};
 	float x0, y0, x1, y1;
@@ -654,6 +651,7 @@
 
 	if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) {
 		emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
+		emit.fp = NULL;      /* frag shader changed so clear cache */
 		fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
 		emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
 	}
@@ -674,6 +672,7 @@
 				emit.prog = &ctx->blit_zs;
 			emit.key.half_precision = false;
 		}
+		emit.fp = NULL;      /* frag shader changed so clear cache */
 		fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
 		emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
 	}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_program.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_program.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_program.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_program.c	2015-09-16 14:36:09.000000000 +0000
@@ -51,7 +51,7 @@
 		enum shader_t type)
 {
 	struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
-	so->shader = ir3_shader_create(pctx, cso->tokens, type);
+	so->shader = ir3_shader_create(pctx, cso, type);
 	return so;
 }
 
@@ -136,6 +136,8 @@
 	int constmode;
 	int i, j, k;
 
+	debug_assert(nr <= ARRAY_SIZE(color_regid));
+
 	vp = fd3_emit_get_vp(emit);
 
 	if (emit->key.binning_pass) {
@@ -202,12 +204,12 @@
 		color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
 			ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
 	} else {
-		for (int i = 0; i < fp->outputs_count; i++) {
+		for (i = 0; i < fp->outputs_count; i++) {
 			ir3_semantic sem = fp->outputs[i].semantic;
 			unsigned idx = sem2idx(sem);
 			if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
 				continue;
-			assert(idx < 4);
+			debug_assert(idx < ARRAY_SIZE(color_regid));
 			color_regid[idx] = fp->outputs[i].regid;
 		}
 	}
@@ -449,10 +451,6 @@
 		OUT_RING(ring, flatshade[1]);        /* SP_FS_FLAT_SHAD_MODE_REG_1 */
 	}
 
-	OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
-	OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
-			A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252));
-
 	if (vpbuffer == BUFFER)
 		emit_shader(ring, vp);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_query.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_query.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_query.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_query.c	2015-09-16 14:36:09.000000000 +0000
@@ -64,7 +64,7 @@
 
 	OUT_PKT3(ring, CP_DRAW_INDX, 3);
 	OUT_RING(ring, 0x00000000);
-	OUT_RING(ring, DRAW(DI_PT_POINTLIST_A2XX, DI_SRC_SEL_AUTO_INDEX,
+	OUT_RING(ring, DRAW(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
 						INDEX_SIZE_IGN, USE_VISIBILITY, 0));
 	OUT_RING(ring, 0);             /* NumIndices */
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c	2015-09-16 14:36:09.000000000 +0000
@@ -73,7 +73,7 @@
 	so->gras_su_poly_offset_scale =
 			A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale);
 	so->gras_su_poly_offset_offset =
-			A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
+			A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units * 2.0f);
 
 	so->gras_su_mode_control =
 			A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h	2015-09-16 14:36:09.000000000 +0000
@@ -44,7 +44,7 @@
 	uint32_t pc_prim_vtx_cntl;
 };
 
-static INLINE struct fd3_rasterizer_stateobj *
+static inline struct fd3_rasterizer_stateobj *
 fd3_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
 {
 	return (struct fd3_rasterizer_stateobj *)rast;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -32,6 +32,7 @@
 #include "fd3_screen.h"
 #include "fd3_context.h"
 #include "fd3_format.h"
+#include "ir3_compiler.h"
 
 static boolean
 fd3_screen_is_format_supported(struct pipe_screen *pscreen,
@@ -103,7 +104,9 @@
 void
 fd3_screen_init(struct pipe_screen *pscreen)
 {
-	fd_screen(pscreen)->max_rts = 4;
+	struct fd_screen *screen = fd_screen(pscreen);
+	screen->max_rts = A3XX_MAX_RENDER_TARGETS;
+	screen->compiler = ir3_compiler_create(screen->gpu_id);
 	pscreen->context_create = fd3_context_create;
 	pscreen->is_format_supported = fd3_screen_is_format_supported;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_texture.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_texture.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_texture.c	2015-09-16 14:36:09.000000000 +0000
@@ -115,6 +115,7 @@
 
 	so->texsamp0 =
 			COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
+			COND(!cso->seamless_cube_map, A3XX_TEX_SAMP_0_CUBEMAPSEAMLESSFILTOFF) |
 			COND(miplinear, A3XX_TEX_SAMP_0_MIPFILTER_LINEAR) |
 			A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
 			A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
@@ -210,8 +211,8 @@
 {
 	struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
 	struct fd_resource *rsc = fd_resource(prsc);
-	unsigned lvl = cso->u.tex.first_level;
-	unsigned miplevels = cso->u.tex.last_level - lvl;
+	unsigned lvl = fd_sampler_first_level(cso);
+	unsigned miplevels = fd_sampler_last_level(cso) - lvl;
 	uint32_t sz2 = 0;
 
 	if (!so)
@@ -239,7 +240,7 @@
 			A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
 	/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
 	so->texconst2 =
-			A3XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp);
+			A3XX_TEX_CONST_2_PITCH(util_format_get_nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp);
 	switch (prsc->target) {
 	case PIPE_TEXTURE_1D_ARRAY:
 	case PIPE_TEXTURE_2D_ARRAY:
@@ -263,44 +264,11 @@
 	return &so->base;
 }
 
-static void
-fd3_set_sampler_views(struct pipe_context *pctx, unsigned shader,
-					  unsigned start, unsigned nr,
-					  struct pipe_sampler_view **views)
-{
-	struct fd_context *ctx = fd_context(pctx);
-	struct fd3_context *fd3_ctx = fd3_context(ctx);
-	struct fd_texture_stateobj *tex;
-	uint16_t integer_s = 0, *ptr;
-	int i;
-
-	fd_set_sampler_views(pctx, shader, start, nr, views);
-
-	switch (shader) {
-	case PIPE_SHADER_FRAGMENT:
-		tex = &ctx->fragtex;
-		ptr = &fd3_ctx->finteger_s;
-		break;
-	case PIPE_SHADER_VERTEX:
-		tex = &ctx->verttex;
-		ptr = &fd3_ctx->vinteger_s;
-		break;
-	default:
-		return;
-	}
-
-	for (i = 0; i < tex->num_textures; i++)
-		if (util_format_is_pure_integer(tex->textures[i]->format))
-			integer_s |= 1 << i;
-	*ptr = integer_s;
-}
-
-
 void
 fd3_texture_init(struct pipe_context *pctx)
 {
 	pctx->create_sampler_state = fd3_sampler_state_create;
 	pctx->bind_sampler_states = fd3_sampler_states_bind;
 	pctx->create_sampler_view = fd3_sampler_view_create;
-	pctx->set_sampler_views = fd3_set_sampler_views;
+	pctx->set_sampler_views = fd_set_sampler_views;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_texture.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_texture.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_texture.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_texture.h	2015-09-16 14:36:09.000000000 +0000
@@ -43,7 +43,7 @@
 	bool saturate_s, saturate_t, saturate_r;
 };
 
-static INLINE struct fd3_sampler_stateobj *
+static inline struct fd3_sampler_stateobj *
 fd3_sampler_stateobj(struct pipe_sampler_state *samp)
 {
 	return (struct fd3_sampler_stateobj *)samp;
@@ -54,7 +54,7 @@
 	uint32_t texconst0, texconst1, texconst2, texconst3;
 };
 
-static INLINE struct fd3_pipe_sampler_view *
+static inline struct fd3_pipe_sampler_view *
 fd3_pipe_sampler_view(struct pipe_sampler_view *pview)
 {
 	return (struct fd3_pipe_sampler_view *)pview;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h	2015-09-16 14:36:09.000000000 +0000
@@ -45,7 +45,7 @@
 	uint32_t rb_stencilrefmask_bf;
 };
 
-static INLINE struct fd3_zsa_stateobj *
+static inline struct fd3_zsa_stateobj *
 fd3_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
 {
 	return (struct fd3_zsa_stateobj *)zsa;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h	2015-09-16 14:36:09.000000000 +0000
@@ -8,13 +8,13 @@
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  59314 bytes, from 2015-04-19 16:21:40)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -162,10 +162,13 @@
 	TFMT4_8_UNORM = 4,
 	TFMT4_8_8_UNORM = 14,
 	TFMT4_8_8_8_8_UNORM = 28,
+	TFMT4_8_SNORM = 5,
 	TFMT4_8_8_SNORM = 15,
 	TFMT4_8_8_8_8_SNORM = 29,
+	TFMT4_8_UINT = 6,
 	TFMT4_8_8_UINT = 16,
 	TFMT4_8_8_8_8_UINT = 30,
+	TFMT4_8_SINT = 7,
 	TFMT4_8_8_SINT = 17,
 	TFMT4_8_8_8_8_SINT = 31,
 	TFMT4_16_UINT = 21,
@@ -227,6 +230,7 @@
 	DEPTH4_NONE = 0,
 	DEPTH4_16 = 1,
 	DEPTH4_24_8 = 2,
+	DEPTH4_32 = 3,
 };
 
 enum a4xx_tess_spacing {
@@ -429,7 +433,7 @@
 	return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
 }
 #define A4XX_RB_MRT_BUF_INFO_COLOR_SRGB				0x00002000
-#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK		0x007fc000
+#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK		0xffffc000
 #define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT		14
 static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
 {
@@ -439,7 +443,7 @@
 static inline uint32_t REG_A4XX_RB_MRT_BASE(uint32_t i0) { return 0x000020a6 + 0x5*i0; }
 
 static inline uint32_t REG_A4XX_RB_MRT_CONTROL3(uint32_t i0) { return 0x000020a7 + 0x5*i0; }
-#define A4XX_RB_MRT_CONTROL3_STRIDE__MASK			0x0001fff8
+#define A4XX_RB_MRT_CONTROL3_STRIDE__MASK			0x03fffff8
 #define A4XX_RB_MRT_CONTROL3_STRIDE__SHIFT			3
 static inline uint32_t A4XX_RB_MRT_CONTROL3_STRIDE(uint32_t val)
 {
@@ -570,6 +574,15 @@
 	return ((val) << A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT) & A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK;
 }
 
+#define REG_A4XX_RB_SAMPLE_COUNT_CONTROL			0x000020fa
+#define A4XX_RB_SAMPLE_COUNT_CONTROL_COPY			0x00000002
+#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK			0xfffffffc
+#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT		2
+static inline uint32_t A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR(uint32_t val)
+{
+	return ((val >> 2) << A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT) & A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK;
+}
+
 #define REG_A4XX_RB_RENDER_COMPONENTS				0x000020fb
 #define A4XX_RB_RENDER_COMPONENTS_RT0__MASK			0x0000000f
 #define A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT			0
@@ -811,6 +824,23 @@
 #define REG_A4XX_RB_STENCIL_CONTROL2				0x00002107
 #define A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER			0x00000001
 
+#define REG_A4XX_RB_STENCIL_INFO				0x00002108
+#define A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL			0x00000001
+#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK			0xfffff000
+#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT		12
+static inline uint32_t A4XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val)
+{
+	return ((val >> 12) << A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK;
+}
+
+#define REG_A4XX_RB_STENCIL_PITCH				0x00002109
+#define A4XX_RB_STENCIL_PITCH__MASK				0xffffffff
+#define A4XX_RB_STENCIL_PITCH__SHIFT				0
+static inline uint32_t A4XX_RB_STENCIL_PITCH(uint32_t val)
+{
+	return ((val >> 5) << A4XX_RB_STENCIL_PITCH__SHIFT) & A4XX_RB_STENCIL_PITCH__MASK;
+}
+
 #define REG_A4XX_RB_STENCILREFMASK				0x0000210b
 #define A4XX_RB_STENCILREFMASK_STENCILREF__MASK			0x000000ff
 #define A4XX_RB_STENCILREFMASK_STENCILREF__SHIFT		0
@@ -1167,6 +1197,8 @@
 
 #define REG_A4XX_SP_VS_STATUS					0x00000ec0
 
+#define REG_A4XX_SP_MODE_CONTROL				0x00000ec3
+
 #define REG_A4XX_SP_PERFCTR_SP_SEL_11				0x00000ecf
 
 #define REG_A4XX_SP_SP_CTRL_REG					0x000022c0
@@ -1431,6 +1463,21 @@
 {
 	return ((val) << A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT) & A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK;
 }
+#define A4XX_SP_FS_MRT_REG_COLOR_SRGB				0x00040000
+
+#define REG_A4XX_SP_CS_CTRL_REG0				0x00002300
+
+#define REG_A4XX_SP_CS_OBJ_OFFSET_REG				0x00002301
+
+#define REG_A4XX_SP_CS_OBJ_START				0x00002302
+
+#define REG_A4XX_SP_CS_PVT_MEM_PARAM				0x00002303
+
+#define REG_A4XX_SP_CS_PVT_MEM_ADDR				0x00002304
+
+#define REG_A4XX_SP_CS_PVT_MEM_SIZE				0x00002305
+
+#define REG_A4XX_SP_CS_LENGTH_REG				0x00002306
 
 #define REG_A4XX_SP_HS_OBJ_OFFSET_REG				0x0000230d
 #define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
@@ -1454,6 +1501,76 @@
 
 #define REG_A4XX_SP_HS_LENGTH_REG				0x00002312
 
+#define REG_A4XX_SP_DS_PARAM_REG				0x0000231a
+#define A4XX_SP_DS_PARAM_REG_POSREGID__MASK			0x000000ff
+#define A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT			0
+static inline uint32_t A4XX_SP_DS_PARAM_REG_POSREGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_DS_PARAM_REG_POSREGID__MASK;
+}
+#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK		0xfff00000
+#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT		20
+static inline uint32_t A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_DS_OUT(uint32_t i0) { return 0x0000231b + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_DS_OUT_REG(uint32_t i0) { return 0x0000231b + 0x1*i0; }
+#define A4XX_SP_DS_OUT_REG_A_REGID__MASK			0x000001ff
+#define A4XX_SP_DS_OUT_REG_A_REGID__SHIFT			0
+static inline uint32_t A4XX_SP_DS_OUT_REG_A_REGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_A_REGID__MASK;
+}
+#define A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK			0x00001e00
+#define A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT			9
+static inline uint32_t A4XX_SP_DS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A4XX_SP_DS_OUT_REG_B_REGID__MASK			0x01ff0000
+#define A4XX_SP_DS_OUT_REG_B_REGID__SHIFT			16
+static inline uint32_t A4XX_SP_DS_OUT_REG_B_REGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_B_REGID__MASK;
+}
+#define A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK			0x1e000000
+#define A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT			25
+static inline uint32_t A4XX_SP_DS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_DS_VPC_DST(uint32_t i0) { return 0x0000232c + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_DS_VPC_DST_REG(uint32_t i0) { return 0x0000232c + 0x1*i0; }
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT			0
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT			8
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT			16
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT			24
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+	return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
 #define REG_A4XX_SP_DS_OBJ_OFFSET_REG				0x00002334
 #define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
 #define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
@@ -1476,6 +1593,82 @@
 
 #define REG_A4XX_SP_DS_LENGTH_REG				0x00002339
 
+#define REG_A4XX_SP_GS_PARAM_REG				0x00002341
+#define A4XX_SP_GS_PARAM_REG_POSREGID__MASK			0x000000ff
+#define A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT			0
+static inline uint32_t A4XX_SP_GS_PARAM_REG_POSREGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_POSREGID__MASK;
+}
+#define A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK			0x0000ff00
+#define A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT			8
+static inline uint32_t A4XX_SP_GS_PARAM_REG_PRIMREGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK;
+}
+#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK		0xfff00000
+#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT		20
+static inline uint32_t A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_GS_OUT(uint32_t i0) { return 0x00002342 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_GS_OUT_REG(uint32_t i0) { return 0x00002342 + 0x1*i0; }
+#define A4XX_SP_GS_OUT_REG_A_REGID__MASK			0x000001ff
+#define A4XX_SP_GS_OUT_REG_A_REGID__SHIFT			0
+static inline uint32_t A4XX_SP_GS_OUT_REG_A_REGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_A_REGID__MASK;
+}
+#define A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK			0x00001e00
+#define A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT			9
+static inline uint32_t A4XX_SP_GS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A4XX_SP_GS_OUT_REG_B_REGID__MASK			0x01ff0000
+#define A4XX_SP_GS_OUT_REG_B_REGID__SHIFT			16
+static inline uint32_t A4XX_SP_GS_OUT_REG_B_REGID(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_B_REGID__MASK;
+}
+#define A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK			0x1e000000
+#define A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT			25
+static inline uint32_t A4XX_SP_GS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_GS_VPC_DST(uint32_t i0) { return 0x00002353 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_GS_VPC_DST_REG(uint32_t i0) { return 0x00002353 + 0x1*i0; }
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT			0
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT			8
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT			16
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT			24
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+	return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
 #define REG_A4XX_SP_GS_OBJ_OFFSET_REG				0x0000235b
 #define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
 #define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
@@ -1677,6 +1870,18 @@
 {
 	return ((val) << A4XX_VFD_CONTROL_3_REGID_VTXCNT__SHIFT) & A4XX_VFD_CONTROL_3_REGID_VTXCNT__MASK;
 }
+#define A4XX_VFD_CONTROL_3_REGID_TESSX__MASK			0x00ff0000
+#define A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT			16
+static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val)
+{
+	return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSX__MASK;
+}
+#define A4XX_VFD_CONTROL_3_REGID_TESSY__MASK			0xff000000
+#define A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT			24
+static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val)
+{
+	return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSY__MASK;
+}
 
 #define REG_A4XX_VFD_CONTROL_4					0x00002204
 
@@ -1758,6 +1963,8 @@
 
 #define REG_A4XX_TPL1_DEBUG_ECO_CONTROL				0x00000f00
 
+#define REG_A4XX_TPL1_TP_MODE_CONTROL				0x00000f03
+
 #define REG_A4XX_TPL1_PERFCTR_TP_SEL_7				0x00000f0b
 
 #define REG_A4XX_TPL1_TP_TEX_OFFSET				0x00002380
@@ -1800,6 +2007,10 @@
 
 #define REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR		0x000023a1
 
+#define REG_A4XX_TPL1_TP_CS_BORDER_COLOR_BASE_ADDR		0x000023a4
+
+#define REG_A4XX_TPL1_TP_CS_SAMPLER_BASE_ADDR			0x000023a5
+
 #define REG_A4XX_TPL1_TP_CS_TEXMEMOBJ_BASE_ADDR			0x000023a6
 
 #define REG_A4XX_GRAS_TSE_STATUS				0x00000c80
@@ -2078,6 +2289,8 @@
 
 #define REG_A4XX_HLSQ_DEBUG_ECO_CONTROL				0x00000e04
 
+#define REG_A4XX_HLSQ_MODE_CONTROL				0x00000e05
+
 #define REG_A4XX_HLSQ_PERF_PIPE_MASK				0x00000e0e
 
 #define REG_A4XX_HLSQ_CONTROL_0_REG				0x000023c0
@@ -2158,6 +2371,8 @@
 	return ((val) << A4XX_HLSQ_CONTROL_3_REG_REGID__SHIFT) & A4XX_HLSQ_CONTROL_3_REG_REGID__MASK;
 }
 
+#define REG_A4XX_HLSQ_CONTROL_4_REG				0x000023c4
+
 #define REG_A4XX_HLSQ_VS_CONTROL_REG				0x000023c5
 #define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK		0x000000ff
 #define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT		0
@@ -2293,6 +2508,36 @@
 	return ((val) << A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK;
 }
 
+#define REG_A4XX_HLSQ_CS_CONTROL				0x000023ca
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_0				0x000023cd
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_1				0x000023ce
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_2				0x000023cf
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_3				0x000023d0
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_4				0x000023d1
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_5				0x000023d2
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_6				0x000023d3
+
+#define REG_A4XX_HLSQ_CL_CONTROL_0				0x000023d4
+
+#define REG_A4XX_HLSQ_CL_CONTROL_1				0x000023d5
+
+#define REG_A4XX_HLSQ_CL_KERNEL_CONST				0x000023d6
+
+#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_X				0x000023d7
+
+#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Y				0x000023d8
+
+#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Z				0x000023d9
+
+#define REG_A4XX_HLSQ_CL_WG_OFFSET				0x000023da
+
 #define REG_A4XX_HLSQ_UPDATE_CONTROL				0x000023db
 
 #define REG_A4XX_PC_BINNING_COMMAND				0x00000d00
@@ -2389,16 +2634,10 @@
 
 #define REG_A4XX_UNKNOWN_0D01					0x00000d01
 
-#define REG_A4XX_UNKNOWN_0E05					0x00000e05
-
 #define REG_A4XX_UNKNOWN_0E42					0x00000e42
 
 #define REG_A4XX_UNKNOWN_0EC2					0x00000ec2
 
-#define REG_A4XX_UNKNOWN_0EC3					0x00000ec3
-
-#define REG_A4XX_UNKNOWN_0F03					0x00000f03
-
 #define REG_A4XX_UNKNOWN_2001					0x00002001
 
 #define REG_A4XX_UNKNOWN_209B					0x0000209b
@@ -2439,6 +2678,8 @@
 
 #define REG_A4XX_UNKNOWN_22D7					0x000022d7
 
+#define REG_A4XX_UNKNOWN_2352					0x00002352
+
 #define REG_A4XX_TEX_SAMP_0					0x00000000
 #define A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR			0x00000001
 #define A4XX_TEX_SAMP_0_XY_MAG__MASK				0x00000006
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_blend.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_blend.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_blend.c	2015-09-16 14:36:09.000000000 +0000
@@ -61,7 +61,7 @@
 	struct fd4_blend_stateobj *so;
 //	enum a3xx_rop_code rop = ROP_COPY;
 	bool reads_dest = false;
-	int i;
+	unsigned i, mrt_blend = 0;
 
 	if (cso->logicop_enable) {
 //		rop = cso->logicop_func;  /* maps 1:1 */
@@ -84,11 +84,6 @@
 		}
 	}
 
-	if (cso->independent_blend_enable) {
-		DBG("Unsupported! independent blend state");
-		return NULL;
-	}
-
 	so = CALLOC_STRUCT(fd4_blend_stateobj);
 	if (!so)
 		return NULL;
@@ -96,7 +91,12 @@
 	so->base = *cso;
 
 	for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
-		const struct pipe_rt_blend_state *rt = &cso->rt[i];
+		const struct pipe_rt_blend_state *rt;
+
+		if (cso->independent_blend_enable)
+			rt = &cso->rt[i];
+		else
+			rt = &cso->rt[0];
 
 		so->rb_mrt[i].blend_control =
 				A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
@@ -115,7 +115,7 @@
 					A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
 					A4XX_RB_MRT_CONTROL_BLEND |
 					A4XX_RB_MRT_CONTROL_BLEND2;
-			so->rb_fs_output |= A4XX_RB_FS_OUTPUT_ENABLE_BLEND(1);
+			mrt_blend |= (1 << i);
 		}
 
 		if (reads_dest)
@@ -125,5 +125,7 @@
 			so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
 	}
 
+	so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend);
+
 	return so;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_blend.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_blend.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_blend.h	2015-09-16 14:36:09.000000000 +0000
@@ -32,17 +32,19 @@
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
 
+#include "freedreno_util.h"
+
 struct fd4_blend_stateobj {
 	struct pipe_blend_state base;
 	struct {
 		uint32_t control;
 		uint32_t buf_info;
 		uint32_t blend_control;
-	} rb_mrt[8];
+	} rb_mrt[A4XX_MAX_RENDER_TARGETS];
 	uint32_t rb_fs_output;
 };
 
-static INLINE struct fd4_blend_stateobj *
+static inline struct fd4_blend_stateobj *
 fd4_blend_stateobj(struct pipe_blend_state *blend)
 {
 	return (struct fd4_blend_stateobj *)blend;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_context.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -86,7 +86,7 @@
 }
 
 static const uint8_t primtypes[PIPE_PRIM_MAX] = {
-		[PIPE_PRIM_POINTS]         = DI_PT_POINTLIST_A3XX,
+		[PIPE_PRIM_POINTS]         = DI_PT_POINTLIST,
 		[PIPE_PRIM_LINES]          = DI_PT_LINELIST,
 		[PIPE_PRIM_LINE_STRIP]     = DI_PT_LINESTRIP,
 		[PIPE_PRIM_LINE_LOOP]      = DI_PT_LINELOOP,
@@ -119,6 +119,7 @@
 	fd4_gmem_init(pctx);
 	fd4_texture_init(pctx);
 	fd4_prog_init(pctx);
+	fd4_emit_init(pctx);
 
 	pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv);
 	if (!pctx)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_context.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -83,9 +83,6 @@
 	 */
 	uint16_t fsaturate_s, fsaturate_t, fsaturate_r;
 
-	/* bitmask of integer texture samplers */
-	uint16_t vinteger_s, finteger_s;
-
 	/* some state changes require a different shader variant.  Keep
 	 * track of this so we know when we need to re-emit shader state
 	 * due to variant change.  See fixup_shader_state()
@@ -93,7 +90,7 @@
 	struct ir3_shader_key last_key;
 };
 
-static INLINE struct fd4_context *
+static inline struct fd4_context *
 fd4_context(struct fd_context *ctx)
 {
 	return (struct fd4_context *)ctx;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_draw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_draw.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_draw.c	2015-09-16 14:36:09.000000000 +0000
@@ -48,6 +48,9 @@
 {
 	const struct pipe_draw_info *info = emit->info;
 
+	if (!(fd4_emit_get_vp(emit) && fd4_emit_get_fp(emit)))
+		return;
+
 	fd4_emit_state(ctx, ring, emit);
 
 	if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
@@ -82,8 +85,7 @@
 		if (last_key->has_per_samp || key->has_per_samp) {
 			if ((last_key->vsaturate_s != key->vsaturate_s) ||
 					(last_key->vsaturate_t != key->vsaturate_t) ||
-					(last_key->vsaturate_r != key->vsaturate_r) ||
-					(last_key->vinteger_s != key->vinteger_s))
+					(last_key->vsaturate_r != key->vsaturate_r))
 				ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
 
 			if ((last_key->fsaturate_s != key->fsaturate_s) ||
@@ -109,7 +111,6 @@
 fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
 {
 	struct fd4_context *fd4_ctx = fd4_context(ctx);
-	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
 	struct fd4_emit emit = {
 		.vtx  = &ctx->vtx,
 		.prog = &ctx->prog,
@@ -122,19 +123,17 @@
 			// TODO set .half_precision based on render target format,
 			// ie. float16 and smaller use half, float32 use full..
 			.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
-			.has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate ||
-					fd4_ctx->vinteger_s || fd4_ctx->finteger_s),
+			.has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate),
 			.vsaturate_s = fd4_ctx->vsaturate_s,
 			.vsaturate_t = fd4_ctx->vsaturate_t,
 			.vsaturate_r = fd4_ctx->vsaturate_r,
 			.fsaturate_s = fd4_ctx->fsaturate_s,
 			.fsaturate_t = fd4_ctx->fsaturate_t,
 			.fsaturate_r = fd4_ctx->fsaturate_r,
-			.vinteger_s = fd4_ctx->vinteger_s,
-			.finteger_s = fd4_ctx->finteger_s,
 		},
-		.format = fd4_emit_format(pfb->cbufs[0]),
-		.pformat = pipe_surface_format(pfb->cbufs[0]),
+		.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
+		.sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : false,
+		.sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false,
 	};
 	unsigned dirty;
 
@@ -174,20 +173,16 @@
 	struct fd4_context *fd4_ctx = fd4_context(ctx);
 	struct fd_ringbuffer *ring = ctx->ring;
 	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+	unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
 	unsigned dirty = ctx->dirty;
-	unsigned ce, i;
+	unsigned i;
 	struct fd4_emit emit = {
 		.vtx  = &fd4_ctx->solid_vbuf_state,
 		.prog = &ctx->solid_prog,
 		.key = {
-			.half_precision = true,
+			.half_precision = fd_half_precision(pfb),
 		},
-		.format = fd4_emit_format(pfb->cbufs[0]),
 	};
-	uint32_t colr = 0;
-
-	if ((buffers & PIPE_CLEAR_COLOR) && pfb->nr_cbufs)
-		colr  = pack_rgba(pfb->cbufs[0]->format, color->f);
 
 	dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
 	dirty |= FD_DIRTY_PROG;
@@ -261,16 +256,15 @@
 	if (buffers & PIPE_CLEAR_COLOR) {
 		OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
 		OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
-		ce = 0xf;
-	} else {
-		ce = 0x0;
 	}
 
-	for (i = 0; i < 8; i++) {
+	for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+		mrt_comp[i] = (buffers & (PIPE_CLEAR_COLOR0 << i)) ? 0xf : 0x0;
+
 		OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
 		OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
 				A4XX_RB_MRT_CONTROL_B11 |
-				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));
+				A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
 
 		OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
 		OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
@@ -281,6 +275,16 @@
 				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
 	}
 
+	OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+	OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+			A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+			A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+			A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+			A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+			A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+			A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+			A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+
 	fd4_emit_vertex_bufs(ring, &emit);
 
 	OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
@@ -289,14 +293,8 @@
 	OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
 	OUT_RING(ring, 0x00000000);
 
-	OUT_PKT0(ring, REG_A4XX_RB_CLEAR_COLOR_DW0, 4);
-	OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW0 */
-	OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW1 */
-	OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW2 */
-	OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW3 */
-
 	/* until fastclear works: */
-	fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+	fd4_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL);
 
 	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_draw.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_draw.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_draw.h	2015-09-16 14:36:09.000000000 +0000
@@ -106,6 +106,7 @@
 {
 	struct pipe_index_buffer *idx = &ctx->indexbuf;
 	struct fd_bo *idx_bo = NULL;
+	enum pc_di_primtype primtype = ctx->primtypes[info->mode];
 	enum a4xx_index_size idx_type;
 	enum pc_di_src_sel src_sel;
 	uint32_t idx_size, idx_offset;
@@ -126,7 +127,12 @@
 		src_sel = DI_SRC_SEL_AUTO_INDEX;
 	}
 
-	fd4_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel,
+	/* points + psize -> spritelist: */
+	if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
+			(info->mode == PIPE_PRIM_POINTS))
+		primtype = DI_PT_POINTLIST_PSIZE;
+
+	fd4_draw(ctx, ring, primtype, vismode, src_sel,
 			info->count, info->instance_count,
 			idx_type, idx_size, idx_offset, idx_bo);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_emit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_emit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_emit.c	2015-09-16 14:36:09.000000000 +0000
@@ -43,19 +43,26 @@
 #include "fd4_format.h"
 #include "fd4_zsa.h"
 
+static const enum adreno_state_block sb[] = {
+	[SHADER_VERTEX]   = SB_VERT_SHADER,
+	[SHADER_FRAGMENT] = SB_FRAG_SHADER,
+};
+
 /* regid:          base const register
  * prsc or dwords: buffer containing constant values
  * sizedwords:     size of const value buffer
  */
 void
-fd4_emit_constant(struct fd_ringbuffer *ring,
-		enum adreno_state_block sb,
+fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
 		const uint32_t *dwords, struct pipe_resource *prsc)
 {
 	uint32_t i, sz;
 	enum adreno_state_src src;
 
+	debug_assert((regid % 4) == 0);
+	debug_assert((sizedwords % 4) == 0);
+
 	if (prsc) {
 		sz = 0;
 		src = 0x2;  // TODO ??
@@ -67,7 +74,7 @@
 	OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
 			CP_LOAD_STATE_0_STATE_SRC(src) |
-			CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
 			CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
 	if (prsc) {
 		struct fd_bo *bo = fd_resource(prsc)->bo;
@@ -84,89 +91,31 @@
 }
 
 static void
-emit_constants(struct fd_ringbuffer *ring,
-		enum adreno_state_block sb,
-		struct fd_constbuf_stateobj *constbuf,
-		struct ir3_shader_variant *shader,
-		bool emit_immediates)
+fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+		uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
 {
-	uint32_t enabled_mask = constbuf->enabled_mask;
-	uint32_t max_const;
-	int i;
-
-	// XXX TODO only emit dirty consts.. but we need to keep track if
-	// they are clobbered by a clear, gmem2mem, or mem2gmem..
-	constbuf->dirty_mask = enabled_mask;
-
-	/* in particular, with binning shader we may end up with unused
-	 * consts, ie. we could end up w/ constlen that is smaller
-	 * than first_immediate.  In that case truncate the user consts
-	 * early to avoid HLSQ lockup caused by writing too many consts
-	 */
-	max_const = MIN2(shader->first_driver_param, shader->constlen);
-
-	/* emit user constants: */
-	if (enabled_mask & 1) {
-		const unsigned index = 0;
-		struct pipe_constant_buffer *cb = &constbuf->cb[index];
-		unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+	uint32_t i;
 
-		// I expect that size should be a multiple of vec4's:
-		assert(size == align(size, 4));
-
-		/* and even if the start of the const buffer is before
-		 * first_immediate, the end may not be:
-		 */
-		size = MIN2(size, 4 * max_const);
+	debug_assert((regid % 4) == 0);
+	debug_assert((num % 4) == 0);
 
-		if (size && (constbuf->dirty_mask & (1 << index))) {
-			fd4_emit_constant(ring, sb, 0,
-					cb->buffer_offset, size,
-					cb->user_buffer, cb->buffer);
-			constbuf->dirty_mask &= ~(1 << index);
-		}
-
-		enabled_mask &= ~(1 << index);
-	}
-
-	/* emit ubos: */
-	if (shader->constlen > shader->first_driver_param) {
-		uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
-		OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
-		OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) |
-				CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
-				CP_LOAD_STATE_0_STATE_BLOCK(sb) |
-				CP_LOAD_STATE_0_NUM_UNIT(params));
-		OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
-				CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
-
-		for (i = 1; i <= params * 4; i++) {
-			struct pipe_constant_buffer *cb = &constbuf->cb[i];
-			assert(!cb->user_buffer);
-			if ((enabled_mask & (1 << i)) && cb->buffer)
-				OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
-			else
-				OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
-		}
-	}
-
-	/* emit shader immediates: */
-	if (shader && emit_immediates) {
-		int size = shader->immediates_count;
-		uint32_t base = shader->first_immediate;
-
-		/* truncate size to avoid writing constants that shader
-		 * does not use:
-		 */
-		size = MIN2(size + base, shader->constlen) - base;
-
-		/* convert out of vec4: */
-		base *= 4;
-		size *= 4;
-
-		if (size > 0) {
-			fd4_emit_constant(ring, sb, base,
-				0, size, shader->immediates[0].val, NULL);
+	OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
+			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+			CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+			CP_LOAD_STATE_0_NUM_UNIT(num/4));
+	OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+			CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+
+	for (i = 0; i < num; i++) {
+		if (bos[i]) {
+			if (write) {
+				OUT_RELOCW(ring, bos[i], offsets[i], 0, 0);
+			} else {
+				OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+			}
+		} else {
+			OUT_RING(ring, 0xbad00000 | (i << 16));
 		}
 	}
 }
@@ -223,15 +172,19 @@
 			const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
 					fd4_pipe_sampler_view(tex->textures[i]) :
 					&dummy_view;
-			struct fd_resource *rsc = fd_resource(view->base.texture);
-			unsigned start = view->base.u.tex.first_level;
-			uint32_t offset = fd_resource_offset(rsc, start, 0);
+			unsigned start = fd_sampler_first_level(&view->base);
 
 			OUT_RING(ring, view->texconst0);
 			OUT_RING(ring, view->texconst1);
 			OUT_RING(ring, view->texconst2);
 			OUT_RING(ring, view->texconst3);
-			OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
+			if (view->base.texture) {
+				struct fd_resource *rsc = fd_resource(view->base.texture);
+				uint32_t offset = fd_resource_offset(rsc, start, 0);
+				OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
+			} else {
+				OUT_RING(ring, 0x00000000);
+			}
 			OUT_RING(ring, 0x00000000);
 			OUT_RING(ring, 0x00000000);
 			OUT_RING(ring, 0x00000000);
@@ -244,51 +197,110 @@
  * special cases..
  */
 void
-fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
+fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
+		struct pipe_surface **bufs)
 {
-	struct fd_resource *rsc = fd_resource(psurf->texture);
-	unsigned lvl = psurf->u.tex.level;
-	struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
-	uint32_t offset = fd_resource_offset(rsc, lvl, psurf->u.tex.first_layer);
-	enum pipe_format format = fd4_gmem_restore_format(psurf->format);
+	unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS];
+	int i;
 
-	debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+	for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+		mrt_comp[i] = (i < nr_bufs) ? 0xf : 0;
+	}
 
 	/* output sampler state: */
-	OUT_PKT3(ring, CP_LOAD_STATE, 4);
+	OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * nr_bufs));
 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
 			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
 			CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
-			CP_LOAD_STATE_0_NUM_UNIT(1));
+			CP_LOAD_STATE_0_NUM_UNIT(nr_bufs));
 	OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
 			CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
-	OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
-			A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
-			A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
-			A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
-			A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
-	OUT_RING(ring, 0x00000000);
+	for (i = 0; i < nr_bufs; i++) {
+		OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
+				A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
+				A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
+				A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
+				A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
+		OUT_RING(ring, 0x00000000);
+	}
 
 	/* emit texture state: */
-	OUT_PKT3(ring, CP_LOAD_STATE, 10);
+	OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * nr_bufs));
 	OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
 			CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
 			CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
-			CP_LOAD_STATE_0_NUM_UNIT(1));
+			CP_LOAD_STATE_0_NUM_UNIT(nr_bufs));
 	OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
 			CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
-	OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
-			A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
-			fd4_tex_swiz(format,  PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
-					PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
-	OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(psurf->width) |
-			A4XX_TEX_CONST_1_HEIGHT(psurf->height));
-	OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp));
-	OUT_RING(ring, 0x00000000);
-	OUT_RELOC(ring, rsc->bo, offset, 0, 0);
-	OUT_RING(ring, 0x00000000);
-	OUT_RING(ring, 0x00000000);
-	OUT_RING(ring, 0x00000000);
+	for (i = 0; i < nr_bufs; i++) {
+		if (bufs[i]) {
+			struct fd_resource *rsc = fd_resource(bufs[i]->texture);
+			/* note: PIPE_BUFFER disallowed for surfaces */
+			unsigned lvl = bufs[i]->u.tex.level;
+			struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
+			uint32_t offset = fd_resource_offset(rsc, lvl, bufs[i]->u.tex.first_layer);
+			enum pipe_format format = fd4_gmem_restore_format(bufs[i]->format);
+
+			/* The restore blit_zs shader expects stencil in sampler 0,
+			 * and depth in sampler 1
+			 */
+			if (rsc->stencil && (i == 0)) {
+				rsc = rsc->stencil;
+				format = fd4_gmem_restore_format(rsc->base.b.format);
+			}
+
+			/* z32 restore is accomplished using depth write.  If there is
+			 * no stencil component (ie. PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+			 * then no render target:
+			 *
+			 * (The same applies for z32_s8x24, since for stencil sampler
+			 * state the above 'if' will replace 'format' with s8)
+			 */
+			if ((format == PIPE_FORMAT_Z32_FLOAT) ||
+					(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT))
+				mrt_comp[i] = 0;
+
+			debug_assert(bufs[i]->u.tex.first_layer == bufs[i]->u.tex.last_layer);
+
+			OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
+					A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
+					fd4_tex_swiz(format,  PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
+							PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
+			OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) |
+					A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height));
+			OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp));
+			OUT_RING(ring, 0x00000000);
+			OUT_RELOC(ring, rsc->bo, offset, 0, 0);
+			OUT_RING(ring, 0x00000000);
+			OUT_RING(ring, 0x00000000);
+			OUT_RING(ring, 0x00000000);
+		} else {
+			OUT_RING(ring, A4XX_TEX_CONST_0_FMT(0) |
+					A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
+					A4XX_TEX_CONST_0_SWIZ_X(A4XX_TEX_ONE) |
+					A4XX_TEX_CONST_0_SWIZ_Y(A4XX_TEX_ONE) |
+					A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_ONE) |
+					A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_ONE));
+			OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(0) |
+					A4XX_TEX_CONST_1_HEIGHT(0));
+			OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(0));
+			OUT_RING(ring, 0x00000000);
+			OUT_RING(ring, 0x00000000);
+			OUT_RING(ring, 0x00000000);
+			OUT_RING(ring, 0x00000000);
+			OUT_RING(ring, 0x00000000);
+		}
+	}
+
+	OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+	OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+			A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+			A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+			A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+			A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+			A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+			A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+			A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
 }
 
 void
@@ -298,7 +310,9 @@
 	uint32_t total_in = 0;
 	const struct fd_vertex_state *vtx = emit->vtx;
 	struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
-	unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0);
+	unsigned vertex_regid = regid(63, 0);
+	unsigned instance_regid = regid(63, 0);
+	unsigned vtxcnt_regid = regid(63, 0);
 
 	for (i = 0; i < vp->inputs_count; i++) {
 		uint8_t semantic = sem2name(vp->inputs[i].semantic);
@@ -306,6 +320,8 @@
 			vertex_regid = vp->inputs[i].regid;
 		else if (semantic == TGSI_SEMANTIC_INSTANCEID)
 			instance_regid = vp->inputs[i].regid;
+		else if (semantic == IR3_SEMANTIC_VTXCNT)
+			vtxcnt_regid = vp->inputs[i].regid;
 		else if ((i < vtx->vtx->num_elements) && vp->inputs[i].compmask)
 			last = i;
 	}
@@ -313,7 +329,8 @@
 	/* hw doesn't like to be configured for zero vbo's, it seems: */
 	if ((vtx->vtx->num_elements == 0) &&
 			(vertex_regid == regid(63, 0)) &&
-			(instance_regid == regid(63, 0)))
+			(instance_regid == regid(63, 0)) &&
+			(vtxcnt_regid == regid(63, 0)))
 		return;
 
 	for (i = 0, j = 0; i <= last; i++) {
@@ -327,7 +344,8 @@
 			enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
 			bool switchnext = (i != last) ||
 					(vertex_regid != regid(63, 0)) ||
-					(instance_regid != regid(63, 0));
+					(instance_regid != regid(63, 0)) ||
+					(vtxcnt_regid != regid(63, 0));
 			bool isint = util_format_is_pure_integer(pfmt);
 			uint32_t fs = util_format_get_blocksize(pfmt);
 			uint32_t off = vb->buffer_offset + elem->src_offset;
@@ -368,7 +386,7 @@
 			A4XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
 			A4XX_VFD_CONTROL_1_REGID4INST(instance_regid));
 	OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_2 */
-	OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(regid(63, 0)));
+	OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(vtxcnt_regid));
 	OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_4 */
 
 	/* cache invalidate, otherwise vertex fetch could see
@@ -389,6 +407,25 @@
 
 	emit_marker(ring, 5);
 
+	if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) {
+		struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+		unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
+
+		for (unsigned i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+			mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
+		}
+
+		OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+		OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+				A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+				A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+				A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+				A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+				A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+				A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+				A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+	}
+
 	if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
 		uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control;
 
@@ -513,43 +550,24 @@
 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
 	}
 
-	if (dirty & FD_DIRTY_PROG)
-		fd4_program_emit(ring, emit);
-
-	if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
-			/* evil hack to deal sanely with clear path: */
-			(emit->prog == &ctx->prog)) {
-		fd_wfi(ctx, ring);
-		emit_constants(ring,  SB_VERT_SHADER,
-				&ctx->constbuf[PIPE_SHADER_VERTEX],
-				vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
-		if (!emit->key.binning_pass) {
-			emit_constants(ring, SB_FRAG_SHADER,
-					&ctx->constbuf[PIPE_SHADER_FRAGMENT],
-					fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
-		}
+	if (dirty & FD_DIRTY_PROG) {
+		struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+		fd4_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
 	}
 
-	/* emit driver params every time */
-	if (emit->info && emit->prog == &ctx->prog) {
-		uint32_t vertex_params[4] = {
-			emit->info->indexed ? emit->info->index_bias : emit->info->start,
-			0,
-			0,
-			0
-		};
-		if (vp->constlen >= vp->first_driver_param + 4) {
-			fd4_emit_constant(ring, SB_VERT_SHADER,
-							  (vp->first_driver_param + 4) * 4,
-							  0, 4, vertex_params, NULL);
-		}
+	if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
+		ir3_emit_consts(vp, ring, emit->info, dirty);
+		if (!emit->key.binning_pass)
+			ir3_emit_consts(fp, ring, emit->info, dirty);
+		/* mark clean after emitting consts: */
+		ctx->prog.dirty = 0;
 	}
 
 	if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
 		struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
 		uint32_t i;
 
-		for (i = 0; i < 8; i++) {
+		for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
 			OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
 			OUT_RING(ring, blend->rb_mrt[i].control);
 
@@ -607,10 +625,10 @@
 	OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
 	OUT_RING(ring, 0x00000000);
 
-	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC3, 1);
+	OUT_PKT0(ring, REG_A4XX_SP_MODE_CONTROL, 1);
 	OUT_RING(ring, 0x00000006);
 
-	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0F03, 1);
+	OUT_PKT0(ring, REG_A4XX_TPL1_TP_MODE_CONTROL, 1);
 	OUT_RING(ring, 0x0000003a);
 
 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
@@ -629,7 +647,7 @@
 	OUT_RING(ring, 0x00000000);
 	OUT_RING(ring, 0x00000012);
 
-	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E05, 1);
+	OUT_PKT0(ring, REG_A4XX_HLSQ_MODE_CONTROL, 1);
 	OUT_RING(ring, 0x00000000);
 
 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
@@ -752,9 +770,6 @@
 	OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
 	OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
 
-	OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
-	OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(0xf));
-
 	OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
 	OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);
 
@@ -763,3 +778,11 @@
 
 	ctx->needs_rb_fbd = true;
 }
+
+void
+fd4_emit_init(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->emit_const = fd4_emit_const;
+	ctx->emit_const_bo = fd4_emit_const_bo;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_emit.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_emit.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_emit.h	2015-09-16 14:36:09.000000000 +0000
@@ -37,15 +37,13 @@
 #include "ir3_shader.h"
 
 struct fd_ringbuffer;
-enum adreno_state_block;
 
-void fd4_emit_constant(struct fd_ringbuffer *ring,
-		enum adreno_state_block sb,
+void fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
 		uint32_t regid, uint32_t offset, uint32_t sizedwords,
 		const uint32_t *dwords, struct pipe_resource *prsc);
 
 void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
-		struct pipe_surface *psurf);
+		unsigned nr_bufs, struct pipe_surface **bufs);
 
 /* grouped together emit-state for prog/vertex/state emit: */
 struct fd4_emit {
@@ -53,10 +51,13 @@
 	const struct fd_program_stateobj *prog;
 	const struct pipe_draw_info *info;
 	struct ir3_shader_key key;
-	enum a4xx_color_fmt format;
-	enum pipe_format pformat;
 	uint32_t dirty;
 
+	uint32_t sprite_coord_enable;  /* bitmask */
+	bool sprite_coord_mode;
+	bool rasterflat;
+	bool no_decode_srgb;
+
 	/* cached to avoid repeated lookups of same variants: */
 	struct ir3_shader_variant *vp, *fp;
 	/* TODO: other shader stages.. */
@@ -96,4 +97,6 @@
 
 void fd4_emit_restore(struct fd_context *ctx);
 
+void fd4_emit_init(struct pipe_context *pctx);
+
 #endif /* FD4_EMIT_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_format.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_format.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_format.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_format.c	2015-09-16 14:36:09.000000000 +0000
@@ -79,9 +79,9 @@
 static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	/* 8-bit */
 	VT(R8_UNORM,   8_UNORM, R8_UNORM, WZYX),
-	V_(R8_SNORM,   8_SNORM, NONE,     WZYX),
-	V_(R8_UINT,    8_UINT,  NONE,     WZYX),
-	V_(R8_SINT,    8_SINT,  NONE,     WZYX),
+	VT(R8_SNORM,   8_SNORM, NONE,     WZYX),
+	VT(R8_UINT,    8_UINT,  NONE,     WZYX),
+	VT(R8_SINT,    8_SINT,  NONE,     WZYX),
 	V_(R8_USCALED, 8_UINT,  NONE,     WZYX),
 	V_(R8_SSCALED, 8_UINT,  NONE,     WZYX),
 
@@ -89,6 +89,14 @@
 	_T(L8_UNORM,   8_UNORM, R8_UNORM, WZYX),
 	_T(I8_UNORM,   8_UNORM, NONE,     WZYX),
 
+	/* NOTE: should be TFMT_8_UINT (which then gets remapped to
+	 * TFMT_8_UNORM for mem2gmem in _gmem_restore_format()), but
+	 * we don't know TFMT_8_UINT yet.. so just use TFMT_8_UNORM
+	 * for now.. sampling from stencil as a texture might not
+	 * work right, but at least should be fine for zsbuf..
+	 */
+	_T(S8_UINT,    8_UNORM,  R8_UNORM, WZYX),
+
 	/* 16-bit */
 	V_(R16_UNORM,   16_UNORM, NONE,     WZYX),
 	V_(R16_SNORM,   16_SNORM, NONE,     WZYX),
@@ -96,7 +104,7 @@
 	VT(R16_SINT,    16_SINT,  R16_SINT, WZYX),
 	V_(R16_USCALED, 16_UINT,  NONE,     WZYX),
 	V_(R16_SSCALED, 16_UINT,  NONE,     WZYX),
-	VT(R16_FLOAT,   16_FLOAT, NONE,     WZYX),
+	VT(R16_FLOAT,   16_FLOAT, R16_FLOAT,WZYX),
 
 	_T(A16_UINT,    16_UINT,  NONE,     WZYX),
 	_T(A16_SINT,    16_SINT,  NONE,     WZYX),
@@ -107,8 +115,8 @@
 
 	VT(R8G8_UNORM,   8_8_UNORM, R8G8_UNORM, WZYX),
 	VT(R8G8_SNORM,   8_8_SNORM, R8G8_SNORM, WZYX),
-	VT(R8G8_UINT,    8_8_UINT,  NONE,       WZYX),
-	VT(R8G8_SINT,    8_8_SINT,  NONE,       WZYX),
+	VT(R8G8_UINT,    8_8_UINT,  R8G8_UINT,  WZYX),
+	VT(R8G8_SINT,    8_8_SINT,  R8G8_SINT,  WZYX),
 	V_(R8G8_USCALED, 8_8_UINT,  NONE,       WZYX),
 	V_(R8G8_SSCALED, 8_8_SINT,  NONE,       WZYX),
 
@@ -132,7 +140,7 @@
 	VT(R32_SINT,    32_SINT,  R32_SINT, WZYX),
 	V_(R32_USCALED, 32_UINT,  NONE,     WZYX),
 	V_(R32_SSCALED, 32_UINT,  NONE,     WZYX),
-	VT(R32_FLOAT,   32_FLOAT, NONE,     WZYX),
+	VT(R32_FLOAT,   32_FLOAT, R32_FLOAT,WZYX),
 	V_(R32_FIXED,   32_FIXED, NONE,     WZYX),
 
 	_T(A32_UINT,    32_UINT,  NONE,     WZYX),
@@ -148,7 +156,7 @@
 	VT(R16G16_SINT,    16_16_SINT,  R16G16_SINT, WZYX),
 	V_(R16G16_USCALED, 16_16_UINT,  NONE,        WZYX),
 	V_(R16G16_SSCALED, 16_16_SINT,  NONE,        WZYX),
-	VT(R16G16_FLOAT,   16_16_FLOAT, NONE,        WZYX),
+	VT(R16G16_FLOAT,   16_16_FLOAT, R16G16_FLOAT,WZYX),
 
 	_T(L16A16_UINT,    16_16_UINT,  NONE,        WZYX),
 	_T(L16A16_SINT,    16_16_SINT,  NONE,        WZYX),
@@ -191,7 +199,8 @@
 
 	_T(Z24X8_UNORM,       X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
 	_T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
-	/*_T(Z32_FLOAT,         Z32_FLOAT,   R8G8B8A8_UNORM, WZYX),*/
+	_T(Z32_FLOAT,         32_FLOAT,   R8G8B8A8_UNORM, WZYX),
+	_T(Z32_FLOAT_S8X24_UINT, 32_FLOAT,R8G8B8A8_UNORM, WZYX),
 
 	/* 48-bit */
 	V_(R16G16B16_UNORM,   16_16_16_UNORM, NONE, WZYX),
@@ -218,7 +227,7 @@
 	VT(R32G32_SINT,    32_32_SINT,  R32G32_SINT, WZYX),
 	V_(R32G32_USCALED, 32_32_UINT,  NONE,        WZYX),
 	V_(R32G32_SSCALED, 32_32_SINT,  NONE,        WZYX),
-	VT(R32G32_FLOAT,   32_32_FLOAT, NONE,        WZYX),
+	VT(R32G32_FLOAT,   32_32_FLOAT, R32G32_FLOAT,WZYX),
 	V_(R32G32_FIXED,   32_32_FIXED, NONE,        WZYX),
 
 	_T(L32A32_UINT,    32_32_UINT,  NONE,        WZYX),
@@ -282,6 +291,9 @@
 enum a4xx_tex_fetchsize
 fd4_pipe2fetchsize(enum pipe_format format)
 {
+	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+		format = PIPE_FORMAT_Z32_FLOAT;
+
 	switch (util_format_get_blocksizebits(format)) {
 	case 8:   return TFETCH4_1_BYTE;
 	case 16:  return TFETCH4_2_BYTE;
@@ -312,6 +324,8 @@
 		return PIPE_FORMAT_R8G8B8A8_UNORM;
 	case PIPE_FORMAT_Z16_UNORM:
 		return PIPE_FORMAT_R8G8_UNORM;
+	case PIPE_FORMAT_S8_UINT:
+		return PIPE_FORMAT_R8_UNORM;
 	default:
 		return format;
 	}
@@ -328,6 +342,9 @@
 	case PIPE_FORMAT_X8Z24_UNORM:
 	case PIPE_FORMAT_S8_UINT_Z24_UNORM:
 		return DEPTH4_24_8;
+	case PIPE_FORMAT_Z32_FLOAT:
+	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+		return DEPTH4_32;
 	default:
 		return ~0;
 	}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c	2015-09-16 14:36:09.000000000 +0000
@@ -44,15 +44,10 @@
 #include "fd4_format.h"
 #include "fd4_zsa.h"
 
-static const struct ir3_shader_key key = {
-		// XXX should set this based on render target format!  We don't
-		// want half_precision if float32 render target!!!
-		.half_precision = true,
-};
-
 static void
 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
-		struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
+		struct pipe_surface **bufs, uint32_t *bases,
+		uint32_t bin_w, bool decode_srgb)
 {
 	enum a4xx_tile_mode tile_mode;
 	unsigned i;
@@ -63,9 +58,10 @@
 		tile_mode = TILE4_LINEAR;
 	}
 
-	for (i = 0; i < 8; i++) {
+	for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
 		enum a4xx_color_fmt format = 0;
 		enum a3xx_color_swap swap = WZYX;
+		bool srgb = false;
 		struct fd_resource *rsc = NULL;
 		struct fd_resource_slice *slice = NULL;
 		uint32_t stride = 0;
@@ -74,11 +70,27 @@
 
 		if ((i < nr_bufs) && bufs[i]) {
 			struct pipe_surface *psurf = bufs[i];
+			enum pipe_format pformat = psurf->format;
 
 			rsc = fd_resource(psurf->texture);
+
+			/* In case we're drawing to Z32F_S8, the "color" actually goes to
+			 * the stencil
+			 */
+			if (rsc->stencil) {
+				rsc = rsc->stencil;
+				pformat = rsc->base.b.format;
+				bases++;
+			}
+
 			slice = fd_resource_slice(rsc, psurf->u.tex.level);
-			format = fd4_pipe2color(psurf->format);
-			swap = fd4_pipe2swap(psurf->format);
+			format = fd4_pipe2color(pformat);
+			swap = fd4_pipe2swap(pformat);
+
+			if (decode_srgb)
+				srgb = util_format_is_srgb(pformat);
+			else
+				pformat = util_format_linear(pformat);
 
 			debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
 
@@ -94,14 +106,17 @@
 			} else {
 				stride = slice->pitch * rsc->cpp;
 			}
+		} else if ((i < nr_bufs) && bases) {
+			base = bases[i];
 		}
 
 		OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
 		OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
 				A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
 				A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
-				A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
-		if (bin_w || (i >= nr_bufs)) {
+				A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
+				COND(srgb, A4XX_RB_MRT_BUF_INFO_COLOR_SRGB));
+		if (bin_w || (i >= nr_bufs) || !bufs[i]) {
 			OUT_RING(ring, base);
 			OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
 		} else {
@@ -115,30 +130,26 @@
 	}
 }
 
-static uint32_t
-depth_base(struct fd_context *ctx)
-{
-	struct fd_gmem_stateobj *gmem = &ctx->gmem;
-	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
-	uint32_t cpp = 4;
-	if (pfb->cbufs[0]) {
-		struct fd_resource *rsc =
-				fd_resource(pfb->cbufs[0]->texture);
-		cpp = rsc->cpp;
-	}
-	return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000);
-}
-
 /* transfer from gmem to system memory (ie. normal RAM) */
 
 static void
-emit_gmem2mem_surf(struct fd_context *ctx,
+emit_gmem2mem_surf(struct fd_context *ctx, bool stencil,
 		uint32_t base, struct pipe_surface *psurf)
 {
 	struct fd_ringbuffer *ring = ctx->ring;
 	struct fd_resource *rsc = fd_resource(psurf->texture);
-	struct fd_resource_slice *slice = &rsc->slices[psurf->u.tex.level];
-	uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level,
+	enum pipe_format pformat = psurf->format;
+	struct fd_resource_slice *slice;
+	uint32_t offset;
+
+	if (stencil) {
+		debug_assert(rsc->stencil);
+		rsc = rsc->stencil;
+		pformat = rsc->base.b.format;
+	}
+
+	slice = &rsc->slices[psurf->u.tex.level];
+	offset = fd_resource_offset(rsc, psurf->u.tex.level,
 			psurf->u.tex.first_layer);
 
 	debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
@@ -150,10 +161,10 @@
 	OUT_RELOCW(ring, rsc->bo, offset, 0, 0);   /* RB_COPY_DEST_BASE */
 	OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
 	OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) |
-			A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(psurf->format)) |
+			A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(pformat)) |
 			A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
 			A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
-			A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(psurf->format)));
+			A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(pformat)));
 
 	fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
 			DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
@@ -163,13 +174,15 @@
 fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
 {
 	struct fd4_context *fd4_ctx = fd4_context(ctx);
+	struct fd_gmem_stateobj *gmem = &ctx->gmem;
 	struct fd_ringbuffer *ring = ctx->ring;
 	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
 	struct fd4_emit emit = {
 			.vtx = &fd4_ctx->solid_vbuf_state,
 			.prog = &ctx->solid_prog,
-			.key = key,
-			.format = fd4_emit_format(pfb->cbufs[0]),
+			.key = {
+				.half_precision = true,
+			},
 	};
 
 	OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
@@ -238,16 +251,26 @@
 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
 	OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */
 
-	fd4_program_emit(ring, &emit);
+	fd4_program_emit(ring, &emit, 0, NULL);
 	fd4_emit_vertex_bufs(ring, &emit);
 
 	if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
-		uint32_t base = depth_base(ctx);
-		emit_gmem2mem_surf(ctx, base, pfb->zsbuf);
+		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+		if (!rsc->stencil || (ctx->resolve & FD_BUFFER_DEPTH))
+			emit_gmem2mem_surf(ctx, false, ctx->gmem.zsbuf_base[0], pfb->zsbuf);
+		if (rsc->stencil && (ctx->resolve & FD_BUFFER_STENCIL))
+			emit_gmem2mem_surf(ctx, true, ctx->gmem.zsbuf_base[1], pfb->zsbuf);
 	}
 
 	if (ctx->resolve & FD_BUFFER_COLOR) {
-		emit_gmem2mem_surf(ctx, 0, pfb->cbufs[0]);
+		unsigned i;
+		for (i = 0; i < pfb->nr_cbufs; i++) {
+			if (!pfb->cbufs[i])
+				continue;
+			if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i)))
+				continue;
+			emit_gmem2mem_surf(ctx, false, gmem->cbuf_base[i], pfb->cbufs[i]);
+		}
 	}
 
 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
@@ -260,14 +283,25 @@
 /* transfer from system memory to gmem */
 
 static void
-emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
-		struct pipe_surface *psurf, uint32_t bin_w)
+emit_mem2gmem_surf(struct fd_context *ctx, uint32_t *bases,
+		struct pipe_surface **bufs, uint32_t nr_bufs, uint32_t bin_w)
 {
 	struct fd_ringbuffer *ring = ctx->ring;
+	struct pipe_surface *zsbufs[2];
 
-	emit_mrt(ring, 1, &psurf, &base, bin_w);
+	emit_mrt(ring, nr_bufs, bufs, bases, bin_w, false);
 
-	fd4_emit_gmem_restore_tex(ring, psurf);
+	if (bufs[0] && (bufs[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
+		/* The gmem_restore_tex logic will put the first buffer's stencil
+		 * as color. Supply it with the proper information to make that
+		 * happen.
+		 */
+		zsbufs[0] = zsbufs[1] = bufs[0];
+		bufs = zsbufs;
+		nr_bufs = 2;
+	}
+
+	fd4_emit_gmem_restore_tex(ring, nr_bufs, bufs);
 
 	fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
 			DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
@@ -282,10 +316,15 @@
 	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
 	struct fd4_emit emit = {
 			.vtx = &fd4_ctx->blit_vbuf_state,
+			.sprite_coord_enable = 1,
+			/* NOTE: They all use the same VP, this is for vtx bufs. */
 			.prog = &ctx->blit_prog[0],
-			.key = key,
-			.format = fd4_emit_format(pfb->cbufs[0]),
+			.key = {
+				.half_precision = fd_half_precision(pfb),
+			},
+			.no_decode_srgb = true,
 	};
+	unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
 	float x0, y0, x1, y1;
 	unsigned bin_w = tile->bin_w;
 	unsigned bin_h = tile->bin_h;
@@ -304,7 +343,9 @@
 	OUT_RING(ring, fui(x1));
 	OUT_RING(ring, fui(y1));
 
-	for (i = 0; i < 8; i++) {
+	for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+		mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
+
 		OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
 		OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
 				A4XX_RB_MRT_CONTROL_B11 |
@@ -319,6 +360,16 @@
 				A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
 	}
 
+	OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+	OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+			A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+			A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+			A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+			A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+			A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+			A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+			A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+
 	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
 	OUT_RING(ring, 0x8);          /* XXX RB_RENDER_CONTROL */
 
@@ -381,7 +432,6 @@
 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
 	OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */
 
-	fd4_program_emit(ring, &emit);
 	fd4_emit_vertex_bufs(ring, &emit);
 
 	/* for gmem pitch/base calculations, we need to use the non-
@@ -390,11 +440,46 @@
 	bin_w = gmem->bin_w;
 	bin_h = gmem->bin_h;
 
-	if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
-		emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w);
-
-	if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR))
-		emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w);
+	if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) {
+		emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
+		emit.fp = NULL;      /* frag shader changed so clear cache */
+		fd4_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
+		emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
+	}
+
+	if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+		switch (pfb->zsbuf->format) {
+		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+		case PIPE_FORMAT_Z32_FLOAT:
+			emit.prog = (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) ?
+					&ctx->blit_z : &ctx->blit_zs;
+			emit.key.half_precision = false;
+
+			OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+			OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
+					A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
+					A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS) |
+					A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE);
+
+			OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+			OUT_RING(ring, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE);
+
+			OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+			OUT_RING(ring, 0x80000);   /* GRAS_CL_CLIP_CNTL */
+
+			break;
+		default:
+			/* Non-float can use a regular color write. It's split over 8-bit
+			 * components, so half precision is always sufficient.
+			 */
+			emit.prog = &ctx->blit_prog[0];
+			emit.key.half_precision = true;
+			break;
+		}
+		emit.fp = NULL;      /* frag shader changed so clear cache */
+		fd4_program_emit(ring, &emit, 1, &pfb->zsbuf);
+		emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
+	}
 
 	OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
 	OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
@@ -443,7 +528,7 @@
 	OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
 			A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
 
-	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
+	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true);
 
 	/* setup scissor/offset for current tile: */
 	OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
@@ -534,21 +619,35 @@
 	struct fd_ringbuffer *ring = ctx->ring;
 	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
 	struct fd_gmem_stateobj *gmem = &ctx->gmem;
-	uint32_t reg;
 
-	OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
-	reg = A4XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx));
 	if (pfb->zsbuf) {
-		reg |= A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format));
-	}
-	OUT_RING(ring, reg);
-	if (pfb->zsbuf) {
-		uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
+		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+		uint32_t cpp = rsc->cpp;
+
+		OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
+		OUT_RING(ring, A4XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]) |
+				A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format)));
 		OUT_RING(ring, A4XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
 		OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(cpp * gmem->bin_w));
+
+		OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
+		if (rsc->stencil) {
+			OUT_RING(ring, A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL |
+					A4XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
+			OUT_RING(ring, A4XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w));
+		} else {
+			OUT_RING(ring, 0x00000000);
+			OUT_RING(ring, 0x00000000);
+		}
 	} else {
+		OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
 		OUT_RING(ring, 0x00000000);
 		OUT_RING(ring, 0x00000000);
+		OUT_RING(ring, 0x00000000);
+
+		OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
+		OUT_RING(ring, 0);            /* RB_STENCIL_INFO */
+		OUT_RING(ring, 0);            /* RB_STENCIL_PITCH */
 	}
 
 	OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
@@ -586,7 +685,7 @@
 	OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
 	OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
 
-	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
+	emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w, true);
 
 	/* setup scissor/offset for current tile: */
 	OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_program.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_program.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_program.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_program.c	2015-09-16 14:36:09.000000000 +0000
@@ -31,8 +31,6 @@
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
-#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_parse.h"
 
 #include "freedreno_program.h"
 
@@ -53,7 +51,7 @@
 		enum shader_t type)
 {
 	struct fd4_shader_stateobj *so = CALLOC_STRUCT(fd4_shader_stateobj);
-	so->shader = ir3_shader_create(pctx, cso->tokens, type);
+	so->shader = ir3_shader_create(pctx, cso, type);
 	return so;
 }
 
@@ -213,14 +211,17 @@
 }
 
 void
-fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
+fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
+		int nr, struct pipe_surface **bufs)
 {
 	struct stage s[MAX_STAGES];
-	uint32_t pos_regid, posz_regid, psize_regid, color_regid;
+	uint32_t pos_regid, posz_regid, psize_regid, color_regid[8];
 	uint32_t face_regid, coord_regid, zwcoord_regid;
 	int constmode;
 	int i, j, k;
 
+	debug_assert(nr <= ARRAY_SIZE(color_regid));
+
 	setup_stages(emit, s);
 
 	/* blob seems to always use constmode currently: */
@@ -232,11 +233,30 @@
 		ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
 	psize_regid = ir3_find_output_regid(s[VS].v,
 		ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
-	color_regid = ir3_find_output_regid(s[FS].v,
-		ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+	if (s[FS].v->color0_mrt) {
+		color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
+		color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
+			ir3_find_output_regid(s[FS].v, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+	} else {
+		const struct ir3_shader_variant *fp = s[FS].v;
+		memset(color_regid, 0, sizeof(color_regid));
+		for (i = 0; i < fp->outputs_count; i++) {
+			ir3_semantic sem = fp->outputs[i].semantic;
+			unsigned idx = sem2idx(sem);
+			if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
+				continue;
+			debug_assert(idx < ARRAY_SIZE(color_regid));
+			color_regid[idx] = fp->outputs[i].regid;
+		}
+	}
+
+	/* adjust regids for alpha output formats. there is no alpha render
+	 * format, so it's just treated like red
+	 */
+	for (i = 0; i < nr; i++)
+		if (util_format_is_alpha(pipe_surface_format(bufs[i])))
+			color_regid[i] += 3;
 
-	if (util_format_is_alpha(emit->pformat))
-		color_regid += 3;
 
 	/* TODO get these dynamically: */
 	face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
@@ -419,29 +439,29 @@
 					A4XX_RB_RENDER_CONTROL2_WCOORD));
 
 	OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
-	OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(1) |
+	OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(MAX2(1, nr)) |
 			COND(s[FS].v->writes_pos, A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z));
 
 	OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1);
-	if (s[FS].v->writes_pos) {
-		OUT_RING(ring, 0x00000001 |
-				A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE |
-				A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
-	} else {
-		OUT_RING(ring, 0x00000001);
-	}
+	OUT_RING(ring, A4XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr)) |
+			COND(s[FS].v->writes_pos, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
+			A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
 
 	OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8);
-	OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid) |
-			A4XX_SP_FS_MRT_REG_MRTFORMAT(emit->format) |
-			COND(emit->key.half_precision, A4XX_SP_FS_MRT_REG_HALF_PRECISION));
-	OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
-	OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
-	OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
-	OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
-	OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
-	OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
-	OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+	for (i = 0; i < 8; i++) {
+		enum a4xx_color_fmt format = 0;
+		bool srgb = false;
+		if (i < nr) {
+			format = fd4_emit_format(bufs[i]);
+			if (bufs[i] && !emit->no_decode_srgb)
+				srgb = util_format_is_srgb(bufs[i]->format);
+		}
+		OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
+				A4XX_SP_FS_MRT_REG_MRTFORMAT(format) |
+				COND(srgb, A4XX_SP_FS_MRT_REG_COLOR_SRGB) |
+				COND(emit->key.half_precision,
+					A4XX_SP_FS_MRT_REG_HALF_PRECISION));
+	}
 
 	if (emit->key.binning_pass) {
 		OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
@@ -450,10 +470,10 @@
 				COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
 		OUT_RING(ring, 0x00000000);
 	} else {
-		uint32_t vinterp[8], flatshade[2];
+		uint32_t vinterp[8], vpsrepl[8];
 
 		memset(vinterp, 0, sizeof(vinterp));
-		memset(flatshade, 0, sizeof(flatshade));
+		memset(vpsrepl, 0, sizeof(vpsrepl));
 
 		/* looks like we need to do int varyings in the frag
 		 * shader on a4xx (no flatshad reg?  or a420.0 bug?):
@@ -470,29 +490,40 @@
 		 * something like the code below instead of workaround
 		 * in the shader:
 		 */
-#if 0
-		/* figure out VARYING_INTERP / FLAT_SHAD register values: */
+		/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
 		for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
 			uint32_t interp = s[FS].v->inputs[j].interpolate;
+
+			/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
+			 * instead.. rather than -8 everywhere else..
+			 */
+			uint32_t inloc = s[FS].v->inputs[j].inloc - 8;
+
+			/* currently assuming varyings aligned to 4 (not
+			 * packed):
+			 */
+			debug_assert((inloc % 4) == 0);
+
 			if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
 					((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
-				/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
-				 * instead.. rather than -8 everywhere else..
-				 */
-				uint32_t loc = s[FS].v->inputs[j].inloc - 8;
-
-				/* currently assuming varyings aligned to 4 (not
-				 * packed):
-				 */
-				debug_assert((loc % 4) == 0);
+				uint32_t loc = inloc;
 
 				for (i = 0; i < 4; i++, loc++) {
 					vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
-					flatshade[loc / 32] |= 1 << (loc % 32);
+					//flatshade[loc / 32] |= 1 << (loc % 32);
 				}
 			}
+
+			/* Replace the .xy coordinates with S/T from the point sprite. Set
+			 * interpolation bits for .zw such that they become .01
+			 */
+			if (emit->sprite_coord_enable & (1 << sem2idx(s[FS].v->inputs[j].semantic))) {
+				vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
+					<< ((inloc % 16) * 2);
+				vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+				vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+			}
 		}
-#endif
 
 		OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
 		OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) |
@@ -509,7 +540,7 @@
 
 		OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8);
 		for (i = 0; i < 8; i++)
-			OUT_RING(ring, s[FS].v->shader->vpsrepl[i]);   /* VPC_VARYING_PS_REPL[i] */
+			OUT_RING(ring, vpsrepl[i]);   /* VPC_VARYING_PS_REPL[i] */
 	}
 
 	if (s[VS].instrlen)
@@ -520,19 +551,6 @@
 			emit_shader(ring, s[FS].v);
 }
 
-/* hack.. until we figure out how to deal w/ vpsrepl properly.. */
-static void
-fix_blit_fp(struct pipe_context *pctx)
-{
-	struct fd_context *ctx = fd_context(pctx);
-	struct fd4_shader_stateobj *so = ctx->blit_prog[0].fp;
-
-	so->shader->vpsrepl[0] = 0x99999999;
-	so->shader->vpsrepl[1] = 0x99999999;
-	so->shader->vpsrepl[2] = 0x99999999;
-	so->shader->vpsrepl[3] = 0x99999999;
-}
-
 void
 fd4_prog_init(struct pipe_context *pctx)
 {
@@ -543,6 +561,4 @@
 	pctx->delete_vs_state = fd4_vp_state_delete;
 
 	fd_prog_init(pctx);
-
-	fix_blit_fp(pctx);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_program.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_program.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_program.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_program.h	2015-09-16 14:36:09.000000000 +0000
@@ -39,7 +39,8 @@
 
 struct fd4_emit;
 
-void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit);
+void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
+		int nr, struct pipe_surface **bufs);
 
 void fd4_prog_init(struct pipe_context *pctx);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_query.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_query.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_query.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_query.c	2015-09-16 14:36:09.000000000 +0000
@@ -31,9 +31,93 @@
 #include "freedreno_util.h"
 
 #include "fd4_query.h"
+#include "fd4_draw.h"
 #include "fd4_format.h"
 
+
+struct fd_rb_samp_ctrs {
+	uint64_t ctr[16];
+};
+
+/*
+ * Occlusion Query:
+ *
+ * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
+ * interpret results
+ */
+
+static struct fd_hw_sample *
+occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+	struct fd_hw_sample *samp =
+			fd_hw_sample_init(ctx, sizeof(struct fd_rb_samp_ctrs));
+
+	/* low bits of sample addr should be zero (since they are control
+	 * flags in RB_SAMPLE_COUNT_CONTROL):
+	 */
+	debug_assert((samp->offset & 0x3) == 0);
+
+	/* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
+	 * HW_QUERY_BASE_REG register:
+	 */
+	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+	OUT_RING(ring, CP_REG(REG_A4XX_RB_SAMPLE_COUNT_CONTROL) | 0x80000000);
+	OUT_RING(ring, HW_QUERY_BASE_REG);
+	OUT_RING(ring, A4XX_RB_SAMPLE_COUNT_CONTROL_COPY |
+			samp->offset);
+
+	OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, 3);
+	OUT_RING(ring, DRAW4(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
+						INDEX4_SIZE_32_BIT, USE_VISIBILITY));
+	OUT_RING(ring, 1);             /* NumInstances */
+	OUT_RING(ring, 0);             /* NumIndices */
+
+	fd_event_write(ctx, ring, ZPASS_DONE);
+
+	return samp;
+}
+
+static uint64_t
+count_samples(const struct fd_rb_samp_ctrs *start,
+		const struct fd_rb_samp_ctrs *end)
+{
+	return end->ctr[0] - start->ctr[0];
+}
+
+static void
+occlusion_counter_accumulate_result(struct fd_context *ctx,
+		const void *start, const void *end,
+		union pipe_query_result *result)
+{
+	uint64_t n = count_samples(start, end);
+	result->u64 += n;
+}
+
+static void
+occlusion_predicate_accumulate_result(struct fd_context *ctx,
+		const void *start, const void *end,
+		union pipe_query_result *result)
+{
+	uint64_t n = count_samples(start, end);
+	result->b |= (n > 0);
+}
+
+static const struct fd_hw_sample_provider occlusion_counter = {
+		.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
+		.active = FD_STAGE_DRAW,
+		.get_sample = occlusion_get_sample,
+		.accumulate_result = occlusion_counter_accumulate_result,
+};
+
+static const struct fd_hw_sample_provider occlusion_predicate = {
+		.query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
+		.active = FD_STAGE_DRAW,
+		.get_sample = occlusion_get_sample,
+		.accumulate_result = occlusion_predicate_accumulate_result,
+};
+
 void fd4_query_context_init(struct pipe_context *pctx)
 {
-	/* TODO */
+	fd_hw_query_register_provider(pctx, &occlusion_counter);
+	fd_hw_query_register_provider(pctx, &occlusion_predicate);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c	2015-09-16 14:36:09.000000000 +0000
@@ -50,7 +50,7 @@
 
 	if (cso->point_size_per_vertex) {
 		psize_min = util_get_min_point_size(cso);
-		psize_max = 8192;
+		psize_max = 4092;
 	} else {
 		/* Force the point size to be as if the vertex output was disabled. */
 		psize_min = cso->point_size;
@@ -67,9 +67,9 @@
 */
 	so->gras_cl_clip_cntl = 0x80000; /* ??? */
 	so->gras_su_point_minmax =
-			A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) |
-			A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2);
-	so->gras_su_point_size   = A4XX_GRAS_SU_POINT_SIZE(cso->point_size/2);
+			A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
+			A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
+	so->gras_su_point_size   = A4XX_GRAS_SU_POINT_SIZE(cso->point_size);
 	so->gras_su_poly_offset_scale =
 			A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
 	so->gras_su_poly_offset_offset =
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h	2015-09-16 14:36:09.000000000 +0000
@@ -44,7 +44,7 @@
 	uint32_t pc_prim_vtx_cntl;
 };
 
-static INLINE struct fd4_rasterizer_stateobj *
+static inline struct fd4_rasterizer_stateobj *
 fd4_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
 {
 	return (struct fd4_rasterizer_stateobj *)rast;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -32,6 +32,7 @@
 #include "fd4_screen.h"
 #include "fd4_context.h"
 #include "fd4_format.h"
+#include "ir3_compiler.h"
 
 static boolean
 fd4_screen_is_format_supported(struct pipe_screen *pscreen,
@@ -100,7 +101,9 @@
 void
 fd4_screen_init(struct pipe_screen *pscreen)
 {
-	fd_screen(pscreen)->max_rts = 1;
+	struct fd_screen *screen = fd_screen(pscreen);
+	screen->max_rts = A4XX_MAX_RENDER_TARGETS;
+	screen->compiler = ir3_compiler_create(screen->gpu_id);
 	pscreen->context_create = fd4_context_create;
 	pscreen->is_format_supported = fd4_screen_is_format_supported;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_texture.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_texture.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_texture.c	2015-09-16 14:36:09.000000000 +0000
@@ -150,8 +150,8 @@
 {
 	struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
 	struct fd_resource *rsc = fd_resource(prsc);
-	unsigned lvl = cso->u.tex.first_level;
-	unsigned miplevels = cso->u.tex.last_level - lvl;
+	unsigned lvl = fd_sampler_first_level(cso);
+	unsigned miplevels = fd_sampler_last_level(cso) - lvl;
 
 	if (!so)
 		return NULL;
@@ -187,9 +187,9 @@
 			A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
 		break;
 	case PIPE_TEXTURE_CUBE:
-	case PIPE_TEXTURE_CUBE_ARRAY:  /* ?? not sure about _CUBE_ARRAY */
+	case PIPE_TEXTURE_CUBE_ARRAY:
 		so->texconst3 =
-			A4XX_TEX_CONST_3_DEPTH(1) |
+			A4XX_TEX_CONST_3_DEPTH(prsc->array_size / 6) |
 			A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
 		break;
 	case PIPE_TEXTURE_3D:
@@ -205,43 +205,11 @@
 	return &so->base;
 }
 
-static void
-fd4_set_sampler_views(struct pipe_context *pctx, unsigned shader,
-		unsigned start, unsigned nr, struct pipe_sampler_view **views)
-{
-	struct fd_context *ctx = fd_context(pctx);
-	struct fd4_context *fd4_ctx = fd4_context(ctx);
-	struct fd_texture_stateobj *tex;
-	uint16_t integer_s = 0, *ptr;
-	int i;
-
-	fd_set_sampler_views(pctx, shader, start, nr, views);
-
-	switch (shader) {
-	case PIPE_SHADER_FRAGMENT:
-		tex = &ctx->fragtex;
-		ptr = &fd4_ctx->finteger_s;
-		break;
-	case PIPE_SHADER_VERTEX:
-		tex = &ctx->verttex;
-		ptr = &fd4_ctx->vinteger_s;
-		break;
-	default:
-		return;
-	}
-
-	for (i = 0; i < tex->num_textures; i++)
-		if (util_format_is_pure_integer(tex->textures[i]->format))
-			integer_s |= 1 << i;
-
-	*ptr = integer_s;
-}
-
 void
 fd4_texture_init(struct pipe_context *pctx)
 {
 	pctx->create_sampler_state = fd4_sampler_state_create;
 	pctx->bind_sampler_states = fd_sampler_states_bind;
 	pctx->create_sampler_view = fd4_sampler_view_create;
-	pctx->set_sampler_views = fd4_set_sampler_views;
+	pctx->set_sampler_views = fd_set_sampler_views;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_texture.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_texture.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_texture.h	2015-09-16 14:36:09.000000000 +0000
@@ -42,7 +42,7 @@
 	uint32_t texsamp0, texsamp1;
 };
 
-static INLINE struct fd4_sampler_stateobj *
+static inline struct fd4_sampler_stateobj *
 fd4_sampler_stateobj(struct pipe_sampler_state *samp)
 {
 	return (struct fd4_sampler_stateobj *)samp;
@@ -53,7 +53,7 @@
 	uint32_t texconst0, texconst1, texconst2, texconst3, textconst4;
 };
 
-static INLINE struct fd4_pipe_sampler_view *
+static inline struct fd4_pipe_sampler_view *
 fd4_pipe_sampler_view(struct pipe_sampler_view *pview)
 {
 	return (struct fd4_pipe_sampler_view *)pview;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h	2015-09-16 14:36:09.000000000 +0000
@@ -47,7 +47,7 @@
 	uint32_t rb_stencilrefmask_bf;
 };
 
-static INLINE struct fd4_zsa_stateobj *
+static inline struct fd4_zsa_stateobj *
 fd4_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
 {
 	return (struct fd4_zsa_stateobj *)zsa;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/adreno_common.xml.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/adreno_common.xml.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/adreno_common.xml.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/adreno_common.xml.h	2015-09-16 14:36:09.000000000 +0000
@@ -8,15 +8,15 @@
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  59314 bytes, from 2015-04-19 16:21:40)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
 
-Copyright (C) 2013-2014 by the following authors:
+Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
 
 Permission is hereby granted, free of charge, to any person obtaining
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/adreno_pm4.xml.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/adreno_pm4.xml.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/adreno_pm4.xml.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/adreno_pm4.xml.h	2015-09-16 14:36:09.000000000 +0000
@@ -8,13 +8,13 @@
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  59314 bytes, from 2015-04-19 16:21:40)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -67,7 +67,7 @@
 
 enum pc_di_primtype {
 	DI_PT_NONE = 0,
-	DI_PT_POINTLIST_A2XX = 1,
+	DI_PT_POINTLIST_PSIZE = 1,
 	DI_PT_LINELIST = 2,
 	DI_PT_LINESTRIP = 3,
 	DI_PT_TRILIST = 4,
@@ -75,7 +75,7 @@
 	DI_PT_TRISTRIP = 6,
 	DI_PT_LINELOOP = 7,
 	DI_PT_RECTLIST = 8,
-	DI_PT_POINTLIST_A3XX = 9,
+	DI_PT_POINTLIST = 9,
 	DI_PT_LINE_ADJ = 10,
 	DI_PT_LINESTRIP_ADJ = 11,
 	DI_PT_TRI_ADJ = 12,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/Android.mk	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/Android.mk	2015-09-16 14:36:09.000000000 +0000
@@ -28,7 +28,9 @@
 LOCAL_SRC_FILES := \
 	$(C_SOURCES) \
 	$(a2xx_SOURCES) \
-	$(a3xx_SOURCES)
+	$(a3xx_SOURCES)	\
+	$(a4xx_SOURCES) \
+	$(ir3_SOURCES)
 
 LOCAL_CFLAGS := \
 	-Wno-packed-bitfield-compat
@@ -37,6 +39,7 @@
 	$(LOCAL_PATH)/ir3
 
 LOCAL_SHARED_LIBRARIES := libdrm libdrm_freedreno
+LOCAL_STATIC_LIBRARIES := libmesa_glsl
 LOCAL_MODULE := libmesa_pipe_freedreno
 
 include $(GALLIUM_COMMON_MK)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -94,9 +94,7 @@
 fd_context_render(struct pipe_context *pctx)
 {
 	struct fd_context *ctx = fd_context(pctx);
-	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
 	struct fd_resource *rsc, *rsc_tmp;
-	int i;
 
 	DBG("needs_flush: %d", ctx->needs_flush);
 
@@ -118,20 +116,11 @@
 	ctx->gmem_reason = 0;
 	ctx->num_draws = 0;
 
-	for (i = 0; i < pfb->nr_cbufs; i++)
-		if (pfb->cbufs[i])
-			fd_resource(pfb->cbufs[i]->texture)->dirty = false;
-	if (pfb->zsbuf) {
-		rsc = fd_resource(pfb->zsbuf->texture);
-		rsc->dirty = false;
-		if (rsc->stencil)
-			rsc->stencil->dirty = false;
-	}
-
 	/* go through all the used resources and clear their reading flag */
 	LIST_FOR_EACH_ENTRY_SAFE(rsc, rsc_tmp, &ctx->used_resources, list) {
-		assert(rsc->reading);
-		rsc->reading = false;
+		debug_assert(rsc->status != 0);
+		rsc->status = 0;
+		rsc->pending_ctx = NULL;
 		list_delinit(&rsc->list);
 	}
 
@@ -142,10 +131,14 @@
 fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
 		unsigned flags)
 {
+	struct fd_ringbuffer *ring = fd_context(pctx)->ring;
+
 	fd_context_render(pctx);
 
-	if (fence)
-		*fence = fd_fence_create(pctx);
+	if (fence) {
+		fd_screen_fence_ref(pctx->screen, fence, NULL);
+		*fence = fd_fence_create(pctx, fd_ringbuffer_timestamp(ring));
+	}
 }
 
 void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_context.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -82,6 +82,20 @@
 	unsigned num_elements;
 };
 
+struct fd_streamout_stateobj {
+	struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
+	unsigned num_targets;
+	/* Track offset from vtxcnt for streamout data.  This counter
+	 * is just incremented by # of vertices on each draw until
+	 * reset or new streamout buffer bound.
+	 *
+	 * When we eventually have GS, the CPU won't actually know the
+	 * number of vertices per draw, so I think we'll have to do
+	 * something more clever.
+	 */
+	unsigned offsets[PIPE_MAX_SO_BUFFERS];
+};
+
 /* group together the vertex and vertexbuf state.. for ease of passing
  * around, and because various internal operations (gmem<->mem, etc)
  * need their own vertex state:
@@ -179,7 +193,7 @@
 	struct fd_program_stateobj solid_prog; // TODO move to screen?
 
 	/* shaders used by mem->gmem blits: */
-	struct fd_program_stateobj blit_prog[8]; // TODO move to screen?
+	struct fd_program_stateobj blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen?
 	struct fd_program_stateobj blit_z, blit_zs;
 
 	/* do we need to mem2gmem before rendering.  We don't, if for example,
@@ -319,6 +333,8 @@
 		FD_DIRTY_VTXBUF      = (1 << 15),
 		FD_DIRTY_INDEXBUF    = (1 << 16),
 		FD_DIRTY_SCISSOR     = (1 << 17),
+		FD_DIRTY_STREAMOUT   = (1 << 18),
+		FD_DIRTY_UCP         = (1 << 19),
 	} dirty;
 
 	struct pipe_blend_state *blend;
@@ -339,6 +355,8 @@
 	struct pipe_viewport_state viewport;
 	struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
 	struct pipe_index_buffer indexbuf;
+	struct fd_streamout_stateobj streamout;
+	struct pipe_clip_state ucp;
 
 	/* GMEM/tile handling fxns: */
 	void (*emit_tile_init)(struct fd_context *ctx);
@@ -351,18 +369,25 @@
 	void (*emit_sysmem_prep)(struct fd_context *ctx);
 
 	/* draw: */
-	void (*draw_vbo)(struct fd_context *pctx, const struct pipe_draw_info *info);
+	void (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info);
 	void (*clear)(struct fd_context *ctx, unsigned buffers,
 			const union pipe_color_union *color, double depth, unsigned stencil);
+
+	/* constant emit:  (note currently not used/needed for a2xx) */
+	void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type,
+			uint32_t regid, uint32_t offset, uint32_t sizedwords,
+			const uint32_t *dwords, struct pipe_resource *prsc);
+	void (*emit_const_bo)(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+			uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets);
 };
 
-static INLINE struct fd_context *
+static inline struct fd_context *
 fd_context(struct pipe_context *pctx)
 {
 	return (struct fd_context *)pctx;
 }
 
-static INLINE struct pipe_scissor_state *
+static inline struct pipe_scissor_state *
 fd_context_get_scissor(struct fd_context *ctx)
 {
 	if (ctx->rasterizer && ctx->rasterizer->scissor)
@@ -370,13 +395,13 @@
 	return &ctx->disabled_scissor;
 }
 
-static INLINE bool
+static inline bool
 fd_supported_prim(struct fd_context *ctx, unsigned prim)
 {
 	return (1 << prim) & ctx->primtype_mask;
 }
 
-static INLINE void
+static inline void
 fd_reset_wfi(struct fd_context *ctx)
 {
 	ctx->needs_wfi = true;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_draw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_draw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_draw.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_draw.c	2015-09-16 14:36:09.000000000 +0000
@@ -40,7 +40,8 @@
 #include "freedreno_util.h"
 
 static void
-resource_reading(struct fd_context *ctx, struct pipe_resource *prsc)
+resource_used(struct fd_context *ctx, struct pipe_resource *prsc,
+		enum fd_resource_status status)
 {
 	struct fd_resource *rsc;
 
@@ -48,9 +49,29 @@
 		return;
 
 	rsc = fd_resource(prsc);
-	rsc->reading = true;
+	rsc->status |= status;
+	if (rsc->stencil)
+		rsc->stencil->status |= status;
+
+	/* TODO resources can actually be shared across contexts,
+	 * so I'm not sure a single list-head will do the trick?
+	 */
+	debug_assert((rsc->pending_ctx == ctx) || !rsc->pending_ctx);
 	list_delinit(&rsc->list);
 	list_addtail(&rsc->list, &ctx->used_resources);
+	rsc->pending_ctx = ctx;
+}
+
+static void
+resource_read(struct fd_context *ctx, struct pipe_resource *prsc)
+{
+	resource_used(ctx, prsc, FD_PENDING_READ);
+}
+
+static void
+resource_written(struct fd_context *ctx, struct pipe_resource *prsc)
+{
+	resource_used(ctx, prsc, FD_PENDING_WRITE);
 }
 
 static void
@@ -59,7 +80,7 @@
 	struct fd_context *ctx = fd_context(pctx);
 	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
 	struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
-	unsigned i, buffers = 0;
+	unsigned i, prims, buffers = 0;
 
 	/* if we supported transform feedback, we'd have to disable this: */
 	if (((scissor->maxx - scissor->minx) *
@@ -69,6 +90,8 @@
 
 	/* emulate unsupported primitives: */
 	if (!fd_supported_prim(ctx, info->mode)) {
+		if (ctx->streamout.num_targets > 0)
+			debug_error("stream-out with emulated prims");
 		util_primconvert_save_index_buffer(ctx->primconvert, &ctx->indexbuf);
 		util_primconvert_save_rasterizer_state(ctx->primconvert, ctx->rasterizer);
 		util_primconvert_draw_vbo(ctx->primconvert, info);
@@ -83,17 +106,13 @@
 
 	if (fd_depth_enabled(ctx)) {
 		buffers |= FD_BUFFER_DEPTH;
-		fd_resource(pfb->zsbuf->texture)->dirty = true;
+		resource_written(ctx, pfb->zsbuf->texture);
 		ctx->gmem_reason |= FD_GMEM_DEPTH_ENABLED;
 	}
 
 	if (fd_stencil_enabled(ctx)) {
-		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
 		buffers |= FD_BUFFER_STENCIL;
-		if (rsc->stencil)
-			rsc->stencil->dirty = true;
-		else
-			rsc->dirty = true;
+		resource_written(ctx, pfb->zsbuf->texture);
 		ctx->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
 	}
 
@@ -108,7 +127,7 @@
 
 		surf = pfb->cbufs[i]->texture;
 
-		fd_resource(surf)->dirty = true;
+		resource_written(ctx, surf);
 		buffers |= PIPE_CLEAR_COLOR0 << i;
 
 		if (surf->nr_samples > 1)
@@ -120,32 +139,38 @@
 
 	/* Skip over buffer 0, that is sent along with the command stream */
 	for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
-		resource_reading(ctx, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer);
-		resource_reading(ctx, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer);
+		resource_read(ctx, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer);
+		resource_read(ctx, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer);
 	}
 
 	/* Mark VBOs as being read */
 	for (i = 0; i < ctx->vtx.vertexbuf.count; i++) {
 		assert(!ctx->vtx.vertexbuf.vb[i].user_buffer);
-		resource_reading(ctx, ctx->vtx.vertexbuf.vb[i].buffer);
+		resource_read(ctx, ctx->vtx.vertexbuf.vb[i].buffer);
 	}
 
 	/* Mark index buffer as being read */
-	resource_reading(ctx, ctx->indexbuf.buffer);
+	resource_read(ctx, ctx->indexbuf.buffer);
 
 	/* Mark textures as being read */
 	for (i = 0; i < ctx->verttex.num_textures; i++)
 		if (ctx->verttex.textures[i])
-			resource_reading(ctx, ctx->verttex.textures[i]->texture);
+			resource_read(ctx, ctx->verttex.textures[i]->texture);
 	for (i = 0; i < ctx->fragtex.num_textures; i++)
 		if (ctx->fragtex.textures[i])
-			resource_reading(ctx, ctx->fragtex.textures[i]->texture);
+			resource_read(ctx, ctx->fragtex.textures[i]->texture);
+
+	/* Mark streamout buffers as being written.. */
+	for (i = 0; i < ctx->streamout.num_targets; i++)
+		if (ctx->streamout.targets[i])
+			resource_written(ctx, ctx->streamout.targets[i]->buffer);
 
 	ctx->num_draws++;
 
+	prims = u_reduced_prims_for_vertices(info->mode, info->count);
+
 	ctx->stats.draw_calls++;
-	ctx->stats.prims_emitted +=
-		u_reduced_prims_for_vertices(info->mode, info->count);
+	ctx->stats.prims_emitted += prims;
 
 	/* any buffers that haven't been cleared yet, we need to restore: */
 	ctx->restore |= buffers & (FD_BUFFER_ALL & ~ctx->cleared);
@@ -159,6 +184,9 @@
 	fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_DRAW);
 	ctx->draw_vbo(ctx, info);
 
+	for (i = 0; i < ctx->streamout.num_targets; i++)
+		ctx->streamout.offsets[i] += prims;
+
 	/* if an app (or, well, piglit test) does many thousands of draws
 	 * without flush (or anything which implicitly flushes, like
 	 * changing render targets), we can exceed the ringbuffer size.
@@ -216,15 +244,10 @@
 	if (buffers & PIPE_CLEAR_COLOR)
 		for (i = 0; i < pfb->nr_cbufs; i++)
 			if (buffers & (PIPE_CLEAR_COLOR0 << i))
-				fd_resource(pfb->cbufs[i]->texture)->dirty = true;
+				resource_written(ctx, pfb->cbufs[i]->texture);
 
 	if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
-		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
-		if (rsc->stencil && buffers & PIPE_CLEAR_STENCIL)
-			rsc->stencil->dirty = true;
-		if (!rsc->stencil || buffers & PIPE_CLEAR_DEPTH)
-			rsc->dirty = true;
-
+		resource_written(ctx, pfb->zsbuf->texture);
 		ctx->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
 	}
 
@@ -242,7 +265,8 @@
 			FD_DIRTY_SAMPLE_MASK |
 			FD_DIRTY_PROG |
 			FD_DIRTY_CONSTBUF |
-			FD_DIRTY_BLEND;
+			FD_DIRTY_BLEND |
+			FD_DIRTY_FRAMEBUFFER;
 
 	if (fd_mesa_debug & FD_DBG_DCLEAR)
 		ctx->dirty = 0xffffffff;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_fence.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_fence.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_fence.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_fence.c	2015-09-16 14:36:09.000000000 +0000
@@ -50,32 +50,18 @@
 	*ptr = pfence;
 }
 
-/* TODO we need to spiff out libdrm_freedreno a bit to allow passing
- * the timeout.. and maybe a better way to check if fence has been
- * signaled.  The current implementation is a bit lame for now to
- * avoid bumping libdrm version requirement.
- */
-
-boolean fd_screen_fence_signalled(struct pipe_screen *screen,
-		struct pipe_fence_handle *fence)
-{
-	uint32_t timestamp = fd_ringbuffer_timestamp(fence->ctx->ring);
-
-	/* TODO util helper for compare w/ rollover? */
-	return timestamp >= fence->timestamp;
-}
-
 boolean fd_screen_fence_finish(struct pipe_screen *screen,
 		struct pipe_fence_handle *fence,
 		uint64_t timeout)
 {
-	if (fd_pipe_wait(fence->screen->pipe, fence->timestamp))
+	if (fd_pipe_wait_timeout(fence->screen->pipe, fence->timestamp, timeout))
 		return false;
 
 	return true;
 }
 
-struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx)
+struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx,
+		uint32_t timestamp)
 {
 	struct pipe_fence_handle *fence;
 	struct fd_context *ctx = fd_context(pctx);
@@ -88,7 +74,7 @@
 
 	fence->ctx = ctx;
 	fence->screen = ctx->screen;
-	fence->timestamp = fd_ringbuffer_timestamp(ctx->ring);
+	fence->timestamp = timestamp;
 
 	return fence;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_fence.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_fence.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_fence.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_fence.h	2015-09-16 14:36:09.000000000 +0000
@@ -34,11 +34,10 @@
 void fd_screen_fence_ref(struct pipe_screen *pscreen,
 		struct pipe_fence_handle **ptr,
 		struct pipe_fence_handle *pfence);
-boolean fd_screen_fence_signalled(struct pipe_screen *screen,
-		struct pipe_fence_handle *pfence);
 boolean fd_screen_fence_finish(struct pipe_screen *screen,
 		struct pipe_fence_handle *pfence,
 		uint64_t timeout);
-struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx);
+struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx,
+		uint32_t timestamp);
 
 #endif /* FREEDRENO_FENCE_H_ */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_gmem.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_gmem.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_gmem.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_gmem.c	2015-09-16 14:36:09.000000000 +0000
@@ -82,7 +82,7 @@
 {
 	uint32_t total = 0, i;
 
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < MAX_RENDER_TARGETS; i++) {
 		if (cbuf_cpp[i]) {
 			gmem->cbuf_base[i] = align(total, 0x4000);
 			total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h;
@@ -113,7 +113,7 @@
 	uint32_t nbins_x = 1, nbins_y = 1;
 	uint32_t bin_w, bin_h;
 	uint32_t max_width = bin_width(ctx);
-	uint8_t cbuf_cpp[4] = {0}, zsbuf_cpp[2] = {0};
+	uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0};
 	uint32_t i, j, t, xoff, yoff;
 	uint32_t tpp_x, tpp_y;
 	bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
@@ -162,12 +162,17 @@
 		bin_w = align(width / nbins_x, 32);
 	}
 
+	if (fd_mesa_debug & FD_DBG_MSGS) {
+		debug_printf("binning input: cbuf cpp:");
+		for (i = 0; i < pfb->nr_cbufs; i++)
+			debug_printf(" %d", cbuf_cpp[i]);
+		debug_printf(", zsbuf cpp: %d; %dx%d\n",
+				zsbuf_cpp[0], width, height);
+	}
+
 	/* then find a bin width/height that satisfies the memory
 	 * constraints:
 	 */
-	DBG("binning input: cbuf cpp: %d %d %d %d, zsbuf cpp: %d; %dx%d",
-		cbuf_cpp[0], cbuf_cpp[1], cbuf_cpp[2], cbuf_cpp[3], zsbuf_cpp[0],
-		width, height);
 	while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) {
 		if (bin_w > bin_h) {
 			nbins_x++;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_gmem.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_gmem.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_gmem.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_gmem.h	2015-09-16 14:36:09.000000000 +0000
@@ -31,6 +31,8 @@
 
 #include "pipe/p_context.h"
 
+#include "freedreno_util.h"
+
 /* per-pipe configuration for hw binning: */
 struct fd_vsc_pipe {
 	struct fd_bo *bo;
@@ -47,9 +49,9 @@
 
 struct fd_gmem_stateobj {
 	struct pipe_scissor_state scissor;
-	uint32_t cbuf_base[4];
+	uint32_t cbuf_base[MAX_RENDER_TARGETS];
 	uint32_t zsbuf_base[2];
-	uint8_t cbuf_cpp[4];
+	uint8_t cbuf_cpp[MAX_RENDER_TARGETS];
 	uint8_t zsbuf_cpp[2];
 	uint16_t bin_h, nbins_y;
 	uint16_t bin_w, nbins_x;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_program.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_program.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_program.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_program.c	2015-09-16 14:36:09.000000000 +0000
@@ -96,7 +96,11 @@
 {
 	int i;
 	struct ureg_src tc;
-	struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+	struct ureg_program *ureg;
+
+	debug_assert(rts <= MAX_RENDER_TARGETS);
+
+	ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
 	if (!ureg)
 		return NULL;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_resource.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_resource.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_resource.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_resource.c	2015-09-16 14:36:09.000000000 +0000
@@ -42,6 +42,14 @@
 
 #include <errno.h>
 
+
+static bool
+pending(struct fd_resource *rsc, enum fd_resource_status status)
+{
+	return (rsc->status & status) ||
+		(rsc->stencil && (rsc->stencil->status & status));
+}
+
 static void
 fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
 {
@@ -72,11 +80,11 @@
 
 	/* Textures */
 	for (i = 0; i < ctx->verttex.num_textures && !(ctx->dirty & FD_DIRTY_VERTTEX); i++) {
-		if (ctx->verttex.textures[i]->texture == prsc)
+		if (ctx->verttex.textures[i] && (ctx->verttex.textures[i]->texture == prsc))
 			ctx->dirty |= FD_DIRTY_VERTTEX;
 	}
 	for (i = 0; i < ctx->fragtex.num_textures && !(ctx->dirty & FD_DIRTY_FRAGTEX); i++) {
-		if (ctx->fragtex.textures[i]->texture == prsc)
+		if (ctx->fragtex.textures[i] && (ctx->fragtex.textures[i]->texture == prsc))
 			ctx->dirty |= FD_DIRTY_FRAGTEX;
 	}
 }
@@ -97,7 +105,8 @@
 
 	rsc->bo = fd_bo_new(screen->dev, size, flags);
 	rsc->timestamp = 0;
-	rsc->dirty = rsc->reading = false;
+	rsc->status = 0;
+	rsc->pending_ctx = NULL;
 	list_delinit(&rsc->list);
 	util_range_set_empty(&rsc->valid_buffer_range);
 }
@@ -213,7 +222,7 @@
 	ptrans->level = level;
 	ptrans->usage = usage;
 	ptrans->box = *box;
-	ptrans->stride = slice->pitch * rsc->cpp;
+	ptrans->stride = util_format_get_nblocksx(format, slice->pitch) * rsc->cpp;
 	ptrans->layer_stride = slice->size0;
 
 	if (usage & PIPE_TRANSFER_READ)
@@ -238,8 +247,9 @@
 		/* If the GPU is writing to the resource, or if it is reading from the
 		 * resource and we're trying to write to it, flush the renders.
 		 */
-		if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty) ||
-			((ptrans->usage & PIPE_TRANSFER_WRITE) && rsc->reading))
+		if (((ptrans->usage & PIPE_TRANSFER_WRITE) &&
+					pending(rsc, FD_PENDING_READ | FD_PENDING_WRITE)) ||
+				pending(rsc, FD_PENDING_WRITE))
 			fd_context_render(pctx);
 
 		/* The GPU keeps track of how the various bo's are being used, and
@@ -365,9 +375,11 @@
 
 	for (level = 0; level <= prsc->last_level; level++) {
 		struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
+		uint32_t blocks;
 
 		slice->pitch = width = align(width, 32);
 		slice->offset = size;
+		blocks = util_format_get_nblocks(prsc->format, width, height);
 		/* 1d array and 2d array textures must all have the same layer size
 		 * for each miplevel on a3xx. 3d textures can have different layer
 		 * sizes for high levels, but the hw auto-sizer is buggy (or at least
@@ -377,9 +389,9 @@
 		if (prsc->target == PIPE_TEXTURE_3D && (
 					level == 1 ||
 					(level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
-			slice->size0 = align(slice->pitch * height * rsc->cpp, alignment);
+			slice->size0 = align(blocks * rsc->cpp, alignment);
 		else if (level == 0 || rsc->layer_first || alignment == 1)
-			slice->size0 = align(slice->pitch * height * rsc->cpp, alignment);
+			slice->size0 = align(blocks * rsc->cpp, alignment);
 		else
 			slice->size0 = rsc->slices[level - 1].size0;
 
@@ -449,7 +461,6 @@
 	if (is_a4xx(fd_screen(pscreen))) {
 		switch (tmpl->target) {
 		case PIPE_TEXTURE_3D:
-			/* TODO 3D_ARRAY? */
 			rsc->layer_first = false;
 			break;
 		default:
@@ -646,6 +657,8 @@
 	util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
 	util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
 	util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
+	util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
+			ctx->streamout.targets);
 	util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
 	util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
 	util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
@@ -675,7 +688,7 @@
 {
 	struct fd_resource *rsc = fd_resource(prsc);
 
-	if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty))
+	if (pending(rsc, FD_PENDING_WRITE | FD_PENDING_READ))
 		fd_context_render(pctx);
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_resource.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_resource.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_resource.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_resource.h	2015-09-16 14:36:09.000000000 +0000
@@ -60,6 +60,15 @@
 	uint32_t size0;          /* size of first layer in slice */
 };
 
+/* status of queued up but not flushed reads and write operations.
+ * In _transfer_map() we need to know if queued up rendering needs
+ * to be flushed to preserve the order of cpu and gpu access.
+ */
+enum fd_resource_status {
+	FD_PENDING_WRITE = 0x01,
+	FD_PENDING_READ  = 0x02,
+};
+
 struct fd_resource {
 	struct u_resource base;
 	struct fd_bo *bo;
@@ -68,17 +77,23 @@
 	uint32_t layer_size;
 	struct fd_resource_slice slices[MAX_MIP_LEVELS];
 	uint32_t timestamp;
-	bool dirty, reading;
 	/* buffer range that has been initialized */
 	struct util_range valid_buffer_range;
 
 	/* reference to the resource holding stencil data for a z32_s8 texture */
+	/* TODO rename to secondary or auxiliary? */
 	struct fd_resource *stencil;
 
+	/* pending read/write state: */
+	enum fd_resource_status status;
+	/* resources accessed by queued but not flushed draws are tracked
+	 * in the used_resources list.
+	 */
 	struct list_head list;
+	struct fd_context *pending_ctx;
 };
 
-static INLINE struct fd_resource *
+static inline struct fd_resource *
 fd_resource(struct pipe_resource *ptex)
 {
 	return (struct fd_resource *)ptex;
@@ -89,13 +104,13 @@
 	void *staging;
 };
 
-static INLINE struct fd_transfer *
+static inline struct fd_transfer *
 fd_transfer(struct pipe_transfer *ptrans)
 {
 	return (struct fd_transfer *)ptrans;
 }
 
-static INLINE struct fd_resource_slice *
+static inline struct fd_resource_slice *
 fd_resource_slice(struct fd_resource *rsc, unsigned level)
 {
 	assert(level <= rsc->base.b.last_level);
@@ -103,7 +118,7 @@
 }
 
 /* get offset for specified mipmap level and texture/array layer */
-static INLINE uint32_t
+static inline uint32_t
 fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
 {
 	struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -68,10 +68,8 @@
 		{"fraghalf",  FD_DBG_FRAGHALF, "Use half-precision in fragment shader"},
 		{"nobin",     FD_DBG_NOBIN,  "Disable hw binning"},
 		{"optmsgs",   FD_DBG_OPTMSGS,"Enable optimizer debug messages"},
-		{"optdump",   FD_DBG_OPTDUMP,"Dump shader DAG to .dot files"},
-		{"glsl120",   FD_DBG_GLSL120,"Temporary flag to force GLSL 120 (rather than 130) on a3xx+"},
-		{"nocp",      FD_DBG_NOCP,   "Disable copy-propagation"},
-		{"nir",       FD_DBG_NIR,    "Enable experimental NIR compiler"},
+		{"glsl120",   FD_DBG_GLSL120,"Temporary flag to force GLSL 1.20 (rather than 1.30) on a3xx+"},
+		{"shaderdb",  FD_DBG_SHADERDB, "Enable shaderdb output"},
 		DEBUG_NAMED_VALUE_END
 };
 
@@ -165,10 +163,6 @@
 	case PIPE_CAP_TEXTURE_MULTISAMPLE:
 	case PIPE_CAP_TEXTURE_BARRIER:
 	case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
-	case PIPE_CAP_CUBE_MAP_ARRAY:
-	case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
-	case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
-	case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
 	case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
 	case PIPE_CAP_START_INSTANCE:
 	case PIPE_CAP_COMPUTE:
@@ -178,23 +172,41 @@
 	case PIPE_CAP_PRIMITIVE_RESTART:
 	case PIPE_CAP_TGSI_INSTANCEID:
 	case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
-		return is_a3xx(screen) || is_a4xx(screen);
-
 	case PIPE_CAP_INDEP_BLEND_ENABLE:
 	case PIPE_CAP_INDEP_BLEND_FUNC:
+	case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+	case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+		return is_a3xx(screen) || is_a4xx(screen);
+
+	case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+		/* ignoring first/last_element.. but I guess that should be
+		 * easy to add..
+		 */
+		return 0;
+	case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+		/* I think 32k on a4xx.. and we could possibly emulate more
+		 * by pretending 2d/rect textures and splitting high bits
+		 * of index into 2nd dimension..
+		 */
+		return 16383;
+
 	case PIPE_CAP_DEPTH_CLIP_DISABLE:
+	case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
 		return is_a3xx(screen);
 
+	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+	case PIPE_CAP_CUBE_MAP_ARRAY:
+		return is_a4xx(screen);
+
 	case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
 		return 256;
 
 	case PIPE_CAP_GLSL_FEATURE_LEVEL:
 		if (glsl120)
 			return 120;
-		return (is_a3xx(screen) || is_a4xx(screen)) ? 130 : 120;
+		return is_ir3(screen) ? 130 : 120;
 
 	/* Unsupported features. */
-	case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
 	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
 	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
 	case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
@@ -221,6 +233,8 @@
 	case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
 	case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
 	case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+	case PIPE_CAP_DEPTH_BOUNDS_TEST:
 		return 0;
 
 	case PIPE_CAP_MAX_VIEWPORTS:
@@ -228,9 +242,17 @@
 
 	/* Stream output. */
 	case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+		if (is_ir3(screen))
+			return PIPE_MAX_SO_BUFFERS;
+		return 0;
 	case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+		if (is_ir3(screen))
+			return 1;
+		return 0;
 	case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
 	case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+		if (is_ir3(screen))
+			return 16 * 4;   /* should only be shader out limit? */
 		return 0;
 
 	/* Geometry shader output, unsupported. */
@@ -261,9 +283,6 @@
 	case PIPE_CAP_QUERY_TIMESTAMP:
 		return 0;
 	case PIPE_CAP_OCCLUSION_QUERY:
-		/* TODO still missing on a4xx, but we lie to get gl2..
-		 * it's not a feature, it's a bug!
-		 */
 		return is_a3xx(screen) || is_a4xx(screen);
 
 	case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
@@ -360,7 +379,7 @@
 		 */
 		return ((is_a3xx(screen) || is_a4xx(screen)) ? 4096 : 64) * sizeof(float[4]);
 	case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-		return (is_a3xx(screen) || is_a4xx(screen)) ? 16 : 1;
+		return is_ir3(screen) ? 16 : 1;
 	case PIPE_SHADER_CAP_MAX_PREDS:
 		return 0; /* nothing uses this */
 	case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -375,13 +394,14 @@
 	case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
 	case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
 	case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
 		return 0;
 	case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
 		return 1;
 	case PIPE_SHADER_CAP_INTEGERS:
 		if (glsl120)
 			return 0;
-		return (is_a3xx(screen) || is_a4xx(screen)) ? 1 : 0;
+		return is_ir3(screen) ? 1 : 0;
 	case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
 	case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
 		return 16;
@@ -548,7 +568,6 @@
 	pscreen->get_timestamp = fd_screen_get_timestamp;
 
 	pscreen->fence_reference = fd_screen_fence_ref;
-	pscreen->fence_signalled = fd_screen_fence_signalled;
 	pscreen->fence_finish = fd_screen_fence_finish;
 
 	util_format_s3tc_init();
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_screen.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -46,7 +46,9 @@
 	uint32_t device_id;
 	uint32_t gpu_id;         /* 220, 305, etc */
 	uint32_t chip_id;        /* coreid:8 majorrev:8 minorrev:8 patch:8 */
-	uint32_t max_rts;
+	uint32_t max_rts;        /* max # of render targets */
+
+	void *compiler;          /* currently unused for a2xx */
 
 	struct fd_device *dev;
 	struct fd_pipe *pipe;
@@ -54,7 +56,7 @@
 	int64_t cpu_gpu_time_delta;
 };
 
-static INLINE struct fd_screen *
+static inline struct fd_screen *
 fd_screen(struct pipe_screen *pscreen)
 {
 	return (struct fd_screen *)pscreen;
@@ -71,6 +73,7 @@
 struct pipe_screen * fd_screen_create(struct fd_device *dev);
 
 /* is a3xx patch revision 0? */
+/* TODO a306.0 probably doesn't need this.. be more clever?? */
 static inline boolean
 is_a3xx_p0(struct fd_screen *screen)
 {
@@ -89,4 +92,11 @@
 	return (screen->gpu_id >= 400) && (screen->gpu_id < 500);
 }
 
+/* is it using the ir3 compiler (shader isa introduced with a3xx)? */
+static inline boolean
+is_ir3(struct fd_screen *screen)
+{
+	return is_a3xx(screen) || is_a4xx(screen);
+}
+
 #endif /* FREEDRENO_SCREEN_H_ */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_state.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -65,7 +65,9 @@
 fd_set_clip_state(struct pipe_context *pctx,
 		const struct pipe_clip_state *clip)
 {
-	DBG("TODO: ");
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->ucp = *clip;
+	ctx->dirty |= FD_DIRTY_UCP;
 }
 
 static void
@@ -300,6 +302,67 @@
 	ctx->dirty |= FD_DIRTY_VTXSTATE;
 }
 
+static struct pipe_stream_output_target *
+fd_create_stream_output_target(struct pipe_context *pctx,
+		struct pipe_resource *prsc, unsigned buffer_offset,
+		unsigned buffer_size)
+{
+	struct pipe_stream_output_target *target;
+
+	target = CALLOC_STRUCT(pipe_stream_output_target);
+	if (!target)
+		return NULL;
+
+	pipe_reference_init(&target->reference, 1);
+	pipe_resource_reference(&target->buffer, prsc);
+
+	target->context = pctx;
+	target->buffer_offset = buffer_offset;
+	target->buffer_size = buffer_size;
+
+	return target;
+}
+
+static void
+fd_stream_output_target_destroy(struct pipe_context *pctx,
+		struct pipe_stream_output_target *target)
+{
+	pipe_resource_reference(&target->buffer, NULL);
+	FREE(target);
+}
+
+static void
+fd_set_stream_output_targets(struct pipe_context *pctx,
+		unsigned num_targets, struct pipe_stream_output_target **targets,
+		const unsigned *offsets)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct fd_streamout_stateobj *so = &ctx->streamout;
+	unsigned i;
+
+	debug_assert(num_targets <= ARRAY_SIZE(so->targets));
+
+	for (i = 0; i < num_targets; i++) {
+		boolean changed = targets[i] != so->targets[i];
+		boolean append = (offsets[i] == (unsigned)-1);
+
+		if (!changed && append)
+			continue;
+
+		so->offsets[i] = 0;
+
+		pipe_so_target_reference(&so->targets[i], targets[i]);
+	}
+
+	for (; i < so->num_targets; i++) {
+		pipe_so_target_reference(&so->targets[i], NULL);
+	}
+
+	so->num_targets = num_targets;
+
+	ctx->dirty |= FD_DIRTY_STREAMOUT;
+}
+
 void
 fd_state_init(struct pipe_context *pctx)
 {
@@ -328,4 +391,8 @@
 	pctx->create_vertex_elements_state = fd_vertex_state_create;
 	pctx->delete_vertex_elements_state = fd_vertex_state_delete;
 	pctx->bind_vertex_elements_state = fd_vertex_state_bind;
+
+	pctx->create_stream_output_target = fd_create_stream_output_target;
+	pctx->stream_output_target_destroy = fd_stream_output_target_destroy;
+	pctx->set_stream_output_targets = fd_set_stream_output_targets;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_surface.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_surface.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_surface.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_surface.c	2015-09-16 14:36:09.000000000 +0000
@@ -41,7 +41,8 @@
 //	struct fd_resource* tex = fd_resource(ptex);
 	struct fd_surface* surface = CALLOC_STRUCT(fd_surface);
 
-	assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+	debug_assert(ptex->target != PIPE_BUFFER);
+	debug_assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
 
 	if (surface) {
 		struct pipe_surface *psurf = &surface->base;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_surface.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_surface.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_surface.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_surface.h	2015-09-16 14:36:09.000000000 +0000
@@ -40,7 +40,7 @@
 	uint16_t depth;
 };
 
-static INLINE struct fd_surface *
+static inline struct fd_surface *
 fd_surface(struct pipe_surface *psurf)
 {
 	return (struct fd_surface *)psurf;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_util.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_util.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/freedreno_util.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/freedreno_util.h	2015-09-16 14:36:09.000000000 +0000
@@ -40,6 +40,7 @@
 #include "util/u_dynarray.h"
 #include "util/u_pack_color.h"
 
+#include "disasm.h"
 #include "adreno_common.xml.h"
 #include "adreno_pm4.xml.h"
 
@@ -53,6 +54,12 @@
 /* TBD if it is same on a2xx, but for now: */
 #define MAX_MIP_LEVELS A3XX_MAX_MIP_LEVELS
 
+#define A2XX_MAX_RENDER_TARGETS 1
+#define A3XX_MAX_RENDER_TARGETS 4
+#define A4XX_MAX_RENDER_TARGETS 8
+
+#define MAX_RENDER_TARGETS A4XX_MAX_RENDER_TARGETS
+
 #define FD_DBG_MSGS     0x0001
 #define FD_DBG_DISASM   0x0002
 #define FD_DBG_DCLEAR   0x0004
@@ -62,11 +69,9 @@
 #define FD_DBG_NOBYPASS 0x0040
 #define FD_DBG_FRAGHALF 0x0080
 #define FD_DBG_NOBIN    0x0100
-#define FD_DBG_OPTMSGS  0x0400
-#define FD_DBG_OPTDUMP  0x0800
-#define FD_DBG_GLSL120  0x1000
-#define FD_DBG_NOCP     0x2000
-#define FD_DBG_NIR      0x4000
+#define FD_DBG_OPTMSGS  0x0200
+#define FD_DBG_GLSL120  0x0400
+#define FD_DBG_SHADERDB 0x0800
 
 extern int fd_mesa_debug;
 extern bool fd_binning_enabled;
@@ -111,6 +116,58 @@
 	return psurf->format;
 }
 
+static inline bool
+fd_surface_half_precision(const struct pipe_surface *psurf)
+{
+	enum pipe_format format;
+
+	if (!psurf)
+		return true;
+
+	format = psurf->format;
+
+	/* colors are provided in consts, which go through cov.f32f16, which will
+	 * break these values
+	 */
+	if (util_format_is_pure_integer(format))
+		return false;
+
+	/* avoid losing precision on 32-bit float formats */
+	if (util_format_is_float(format) &&
+		util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == 32)
+		return false;
+
+	return true;
+}
+
+static inline unsigned
+fd_sampler_first_level(const struct pipe_sampler_view *view)
+{
+	if (view->target == PIPE_BUFFER)
+		return 0;
+	return view->u.tex.first_level;
+}
+
+static inline unsigned
+fd_sampler_last_level(const struct pipe_sampler_view *view)
+{
+	if (view->target == PIPE_BUFFER)
+		return 0;
+	return view->u.tex.last_level;
+}
+
+static inline bool
+fd_half_precision(struct pipe_framebuffer_state *pfb)
+{
+	unsigned i;
+
+	for (i = 0; i < pfb->nr_cbufs; i++)
+		if (!fd_surface_half_precision(pfb->cbufs[i]))
+			return false;
+
+	return true;
+}
+
 #define LOG_DWORDS 0
 
 static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c	2015-09-16 14:36:09.000000000 +0000
@@ -103,7 +103,7 @@
 	} else if ((reg.num == REG_P0) && !c) {
 		printf("p0.%c", component[reg.comp]);
 	} else {
-		printf("%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
+		printf("%s%c%d.%c", full ? "" : "h", type, reg.num & 0x3f, component[reg.comp]);
 	}
 }
 
@@ -122,6 +122,32 @@
 	print_reg(reg, full, r, c, im, neg, abs, addr_rel);
 }
 
+/* TODO switch to using reginfo struct everywhere, since more readable
+ * than passing a bunch of bools to print_reg_src
+ */
+
+struct reginfo {
+	reg_t reg;
+	bool full;
+	bool r;
+	bool c;
+	bool im;
+	bool neg;
+	bool abs;
+	bool addr_rel;
+};
+
+static void print_src(struct reginfo *info)
+{
+	print_reg_src(info->reg, info->full, info->r, info->c, info->im,
+			info->neg, info->abs, info->addr_rel);
+}
+
+//static void print_dst(struct reginfo *info)
+//{
+//	print_reg_dst(info->reg, info->full, info->addr_rel);
+//}
+
 static void print_instr_cat0(instr_t *instr)
 {
 	instr_cat0_t *cat0 = &instr->cat0;
@@ -133,16 +159,16 @@
 		break;
 	case OPC_BR:
 		printf(" %sp0.%c, #%d", cat0->inv ? "!" : "",
-				component[cat0->comp], cat0->immed);
+				component[cat0->comp], cat0->a3xx.immed);
 		break;
 	case OPC_JUMP:
 	case OPC_CALL:
-		printf(" #%d", cat0->immed);
+		printf(" #%d", cat0->a3xx.immed);
 		break;
 	}
 
-	if ((debug & PRINT_VERBOSE) && (cat0->dummy1|cat0->dummy2|cat0->dummy3|cat0->dummy4))
-		printf("\t{0: %x,%x,%x,%x}", cat0->dummy1, cat0->dummy2, cat0->dummy3, cat0->dummy4);
+	if ((debug & PRINT_VERBOSE) && (cat0->a3xx.dummy1|cat0->dummy2|cat0->dummy3|cat0->dummy4))
+		printf("\t{0: %x,%x,%x,%x}", cat0->a3xx.dummy1, cat0->dummy2, cat0->dummy3, cat0->dummy4);
 }
 
 static void print_instr_cat1(instr_t *instr)
@@ -454,10 +480,70 @@
 {
 	instr_cat6_t *cat6 = &instr->cat6;
 	char sd = 0, ss = 0;  /* dst/src address space */
-	bool full = type_size(cat6->type) == 32;
 	bool nodst = false;
+	struct reginfo dst, src1, src2;
+	int src1off = 0, dstoff = 0;
+
+	memset(&dst, 0, sizeof(dst));
+	memset(&src1, 0, sizeof(src1));
+	memset(&src2, 0, sizeof(src2));
+
+	switch (cat6->opc) {
+	case OPC_RESINFO:
+	case OPC_RESFMT:
+		dst.full  = type_size(cat6->type) == 32;
+		src1.full = type_size(cat6->type) == 32;
+		src2.full = type_size(cat6->type) == 32;
+		break;
+	case OPC_L2G:
+	case OPC_G2L:
+		dst.full = true;
+		src1.full = true;
+		src2.full = true;
+		break;
+	case OPC_STG:
+	case OPC_STL:
+	case OPC_STP:
+	case OPC_STI:
+	case OPC_STLW:
+	case OPC_STGB_4D_4:
+	case OPC_STIB:
+		dst.full  = true;
+		src1.full = type_size(cat6->type) == 32;
+		src2.full = type_size(cat6->type) == 32;
+		break;
+	default:
+		dst.full  = type_size(cat6->type) == 32;
+		src1.full = true;
+		src2.full = true;
+		break;
+	}
 
-	printf(".%s ", type[cat6->type]);
+	switch (cat6->opc) {
+	case OPC_PREFETCH:
+	case OPC_RESINFO:
+		break;
+	case OPC_ATOMIC_ADD:
+	case OPC_ATOMIC_SUB:
+	case OPC_ATOMIC_XCHG:
+	case OPC_ATOMIC_INC:
+	case OPC_ATOMIC_DEC:
+	case OPC_ATOMIC_CMPXCHG:
+	case OPC_ATOMIC_MIN:
+	case OPC_ATOMIC_MAX:
+	case OPC_ATOMIC_AND:
+	case OPC_ATOMIC_OR:
+	case OPC_ATOMIC_XOR:
+		ss = cat6->g ? 'g' : 'l';
+		printf(".%c", ss);
+		printf(".%s", type[cat6->type]);
+		break;
+	default:
+		dst.im = cat6->g && !cat6->dst_off;
+		printf(".%s", type[cat6->type]);
+		break;
+	}
+	printf(" ");
 
 	switch (cat6->opc) {
 	case OPC_STG:
@@ -499,68 +585,65 @@
 		break;
 
 	case OPC_STI:
-		full = false;  // XXX or inverts??
+		dst.full = false;  // XXX or inverts??
 		break;
 	}
 
-	if (cat6->has_off) {
-		if (!nodst) {
-			if (sd)
-				printf("%c[", sd);
-			print_reg_dst((reg_t)(cat6->a.dst), full, false);
-			if (sd)
-				printf("]");
-			printf(", ");
-		}
-		if (ss)
-			printf("%c[", ss);
-		print_reg_src((reg_t)(cat6->a.src1), true,
-				false, false, cat6->a.src1_im, false, false, false);
-		if (cat6->a.off)
-			printf("%+d", cat6->a.off);
-		if (ss)
-			printf("]");
-		printf(", ");
-		print_reg_src((reg_t)(cat6->a.src2), full,
-				false, false, cat6->a.src2_im, false, false, false);
+	if (cat6->dst_off) {
+		dst.reg = (reg_t)(cat6->c.dst);
+		dstoff  = cat6->c.off;
 	} else {
-		if (!nodst) {
-			if (sd)
-				printf("%c[", sd);
-			print_reg_dst((reg_t)(cat6->b.dst), full, false);
-			if (sd)
-				printf("]");
-			printf(", ");
-		}
-		if (ss)
-			printf("%c[", ss);
-		print_reg_src((reg_t)(cat6->b.src1), true,
-				false, false, cat6->b.src1_im, false, false, false);
-		if (ss)
+		dst.reg = (reg_t)(cat6->d.dst);
+	}
+
+	if (cat6->src_off) {
+		src1.reg = (reg_t)(cat6->a.src1);
+		src1.im  = cat6->a.src1_im;
+		src2.reg = (reg_t)(cat6->a.src2);
+		src2.im  = cat6->a.src2_im;
+		src1off  = cat6->a.off;
+	} else {
+		src1.reg = (reg_t)(cat6->b.src1);
+		src1.im  = cat6->b.src1_im;
+		src2.reg = (reg_t)(cat6->b.src2);
+		src2.im  = cat6->b.src2_im;
+	}
+
+	if (!nodst) {
+		if (sd)
+			printf("%c[", sd);
+		/* note: dst might actually be a src (ie. address to store to) */
+		print_src(&dst);
+		if (dstoff)
+			printf("%+d", dstoff);
+		if (sd)
 			printf("]");
 		printf(", ");
-		print_reg_src((reg_t)(cat6->b.src2), full,
-				false, false, cat6->b.src2_im, false, false, false);
 	}
 
-	if (debug & PRINT_VERBOSE) {
-		switch (cat6->opc) {
-		case OPC_LDG:
-		case OPC_LDP:
-			/* load instructions: */
-			if (cat6->a.dummy2|cat6->a.dummy3)
-				printf("\t{6: %x,%x}", cat6->a.dummy2, cat6->a.dummy3);
-			break;
-		case OPC_STG:
-		case OPC_STP:
-		case OPC_STI:
-			/* store instructions: */
-			if (cat6->b.dummy2|cat6->b.dummy2)
-				printf("\t{6: %x,%x}", cat6->b.dummy2, cat6->b.dummy3);
-			if (cat6->b.ignore0)
-				printf("\t{?? %x}", cat6->b.ignore0);
-			break;
-		}
+	if (ss)
+		printf("%c[", ss);
+
+	/* can have a larger than normal immed, so hack: */
+	if (src1.im) {
+		printf("%u", src1.reg.dummy13);
+	} else {
+		print_src(&src1);
+	}
+
+	if (src1off)
+		printf("%+d", src1off);
+	if (ss)
+		printf("]");
+
+	switch (cat6->opc) {
+	case OPC_RESINFO:
+	case OPC_RESFMT:
+		break;
+	default:
+		printf(", ");
+		print_src(&src2);
+		break;
 	}
 }
 
@@ -711,19 +794,19 @@
 	OPC(6, OPC_LDLW,         ldlw),
 	OPC(6, OPC_STLW,         stlw),
 	OPC(6, OPC_RESFMT,       resfmt),
-	OPC(6, OPC_RESINFO,      resinf),
-	OPC(6, OPC_ATOMIC_ADD_L,     atomic.add.l),
-	OPC(6, OPC_ATOMIC_SUB_L,     atomic.sub.l),
-	OPC(6, OPC_ATOMIC_XCHG_L,    atomic.xchg.l),
-	OPC(6, OPC_ATOMIC_INC_L,     atomic.inc.l),
-	OPC(6, OPC_ATOMIC_DEC_L,     atomic.dec.l),
-	OPC(6, OPC_ATOMIC_CMPXCHG_L, atomic.cmpxchg.l),
-	OPC(6, OPC_ATOMIC_MIN_L,     atomic.min.l),
-	OPC(6, OPC_ATOMIC_MAX_L,     atomic.max.l),
-	OPC(6, OPC_ATOMIC_AND_L,     atomic.and.l),
-	OPC(6, OPC_ATOMIC_OR_L,      atomic.or.l),
-	OPC(6, OPC_ATOMIC_XOR_L,     atomic.xor.l),
-	OPC(6, OPC_LDGB_TYPED_4D,    ldgb.typed.4d),
+	OPC(6, OPC_RESINFO,      resinfo),
+	OPC(6, OPC_ATOMIC_ADD,     atomic.add),
+	OPC(6, OPC_ATOMIC_SUB,     atomic.sub),
+	OPC(6, OPC_ATOMIC_XCHG,    atomic.xchg),
+	OPC(6, OPC_ATOMIC_INC,     atomic.inc),
+	OPC(6, OPC_ATOMIC_DEC,     atomic.dec),
+	OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
+	OPC(6, OPC_ATOMIC_MIN,     atomic.min),
+	OPC(6, OPC_ATOMIC_MAX,     atomic.max),
+	OPC(6, OPC_ATOMIC_AND,     atomic.and),
+	OPC(6, OPC_ATOMIC_OR,      atomic.or),
+	OPC(6, OPC_ATOMIC_XOR,     atomic.xor),
+	OPC(6, OPC_LDGB_TYPED_4D,    ldgb.typed.3d),
 	OPC(6, OPC_STGB_4D_4,    stgb.4d.4),
 	OPC(6, OPC_STIB,         stib),
 	OPC(6, OPC_LDC_4,        ldc.4),
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/instr-a3xx.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/instr-a3xx.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/instr-a3xx.h	2015-09-16 14:36:09.000000000 +0000
@@ -173,17 +173,17 @@
 	OPC_STLW = 11,
 	OPC_RESFMT = 14,
 	OPC_RESINFO = 15,
-	OPC_ATOMIC_ADD_L = 16,
-	OPC_ATOMIC_SUB_L = 17,
-	OPC_ATOMIC_XCHG_L = 18,
-	OPC_ATOMIC_INC_L = 19,
-	OPC_ATOMIC_DEC_L = 20,
-	OPC_ATOMIC_CMPXCHG_L = 21,
-	OPC_ATOMIC_MIN_L = 22,
-	OPC_ATOMIC_MAX_L = 23,
-	OPC_ATOMIC_AND_L = 24,
-	OPC_ATOMIC_OR_L = 25,
-	OPC_ATOMIC_XOR_L = 26,
+	OPC_ATOMIC_ADD = 16,
+	OPC_ATOMIC_SUB = 17,
+	OPC_ATOMIC_XCHG = 18,
+	OPC_ATOMIC_INC = 19,
+	OPC_ATOMIC_DEC = 20,
+	OPC_ATOMIC_CMPXCHG = 21,
+	OPC_ATOMIC_MIN = 22,
+	OPC_ATOMIC_MAX = 23,
+	OPC_ATOMIC_AND = 24,
+	OPC_ATOMIC_OR = 25,
+	OPC_ATOMIC_XOR = 26,
 	OPC_LDGB_TYPED_4D = 27,
 	OPC_STGB_4D_4 = 28,
 	OPC_STIB = 29,
@@ -191,9 +191,9 @@
 	OPC_LDLV = 31,
 
 	/* meta instructions (category -1): */
-	/* placeholder instr to mark inputs/outputs: */
+	/* placeholder instr to mark shader inputs: */
 	OPC_META_INPUT = 0,
-	OPC_META_OUTPUT = 1,
+	OPC_META_PHI = 1,
 	/* The "fan-in" and "fan-out" instructions are used for keeping
 	 * track of instructions that write to multiple dst registers
 	 * (fan-out) like texture sample instructions, or read multiple
@@ -201,9 +201,6 @@
 	 */
 	OPC_META_FO = 2,
 	OPC_META_FI = 3,
-	/* branches/flow control */
-	OPC_META_FLOW = 4,
-	OPC_META_PHI = 5,
 
 } opc_t;
 
@@ -281,8 +278,16 @@
 
 typedef struct PACKED {
 	/* dword0: */
-	int16_t  immed    : 16;
-	uint32_t dummy1   : 16;
+	union PACKED {
+		struct PACKED {
+			int16_t  immed    : 16;
+			uint32_t dummy1   : 16;
+		} a3xx;
+		struct PACKED {
+			int32_t  immed    : 20;
+			uint32_t dummy1   : 12;
+		} a4xx;
+	};
 
 	/* dword1: */
 	uint32_t dummy2   : 8;
@@ -570,7 +575,7 @@
 	uint32_t opc_cat  : 3;
 } instr_cat5_t;
 
-/* [src1 + off], src2: */
+/* dword0 encoding for src_off: [src1 + off], src2: */
 typedef struct PACKED {
 	/* dword0: */
 	uint32_t mustbe1  : 1;
@@ -581,37 +586,50 @@
 	uint32_t src2     : 8;
 
 	/* dword1: */
-	uint32_t dst      : 8;
-	uint32_t dummy2   : 9;
-	uint32_t type     : 3;
-	uint32_t dummy3   : 2;
-	uint32_t opc      : 5;
-	uint32_t jmp_tgt  : 1;
-	uint32_t sync     : 1;
-	uint32_t opc_cat  : 3;
+	uint32_t dword1;
 } instr_cat6a_t;
 
-/* [src1], src2: */
+/* dword0 encoding for !src_off: [src1], src2 */
 typedef struct PACKED {
 	/* dword0: */
 	uint32_t mustbe0  : 1;
-	uint32_t src1     : 8;
-	uint32_t ignore0  : 13;
+	uint32_t src1     : 13;
+	uint32_t ignore0  : 8;
 	uint32_t src1_im  : 1;
 	uint32_t src2_im  : 1;
 	uint32_t src2     : 8;
 
 	/* dword1: */
-	uint32_t dst      : 8;
-	uint32_t dummy2   : 9;
-	uint32_t type     : 3;
-	uint32_t dummy3   : 2;
-	uint32_t opc      : 5;
-	uint32_t jmp_tgt  : 1;
-	uint32_t sync     : 1;
-	uint32_t opc_cat  : 3;
+	uint32_t dword1;
 } instr_cat6b_t;
 
+/* dword1 encoding for dst_off: */
+typedef struct PACKED {
+	/* dword0: */
+	uint32_t dword0;
+
+	/* note: there is some weird stuff going on where sometimes
+	 * cat6->a.off is involved.. but that seems like a bug in
+	 * the blob, since it is used even if !cat6->src_off
+	 * It would make sense for there to be some more bits to
+	 * bring us to 11 bits worth of offset, but not sure..
+	 */
+	int32_t off       : 8;
+	uint32_t mustbe1  : 1;
+	uint32_t dst      : 8;
+	uint32_t pad1     : 15;
+} instr_cat6c_t;
+
+/* dword1 encoding for !dst_off: */
+typedef struct PACKED {
+	/* dword0: */
+	uint32_t dword0;
+
+	uint32_t dst      : 8;
+	uint32_t mustbe0  : 1;
+	uint32_t pad0     : 23;
+} instr_cat6d_t;
+
 /* I think some of the other cat6 instructions use additional
  * sub-encodings..
  */
@@ -619,16 +637,20 @@
 typedef union PACKED {
 	instr_cat6a_t a;
 	instr_cat6b_t b;
+	instr_cat6c_t c;
+	instr_cat6d_t d;
 	struct PACKED {
 		/* dword0: */
-		uint32_t has_off  : 1;
+		uint32_t src_off  : 1;
 		uint32_t pad1     : 31;
 
 		/* dword1: */
-		uint32_t dst      : 8;
-		uint32_t dummy2   : 9;
+		uint32_t pad2     : 8;
+		uint32_t dst_off  : 1;
+		uint32_t pad3     : 8;
 		uint32_t type     : 3;
-		uint32_t dummy3   : 2;
+		uint32_t g        : 1;  /* or in some cases it means dst immed */
+		uint32_t pad4     : 1;
 		uint32_t opc      : 5;
 		uint32_t jmp_tgt  : 1;
 		uint32_t sync     : 1;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3.c	2015-09-16 14:36:09.000000000 +0000
@@ -66,11 +66,22 @@
 	return ptr;
 }
 
-struct ir3 * ir3_create(void)
+struct ir3 * ir3_create(struct ir3_compiler *compiler,
+		unsigned nin, unsigned nout)
 {
-	struct ir3 *shader =
-			calloc(1, sizeof(struct ir3));
+	struct ir3 *shader = calloc(1, sizeof(struct ir3));
+
 	grow_heap(shader);
+
+	shader->compiler = compiler;
+	shader->ninputs = nin;
+	shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin);
+
+	shader->noutputs = nout;
+	shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout);
+
+	list_inithead(&shader->block_list);
+
 	return shader;
 }
 
@@ -81,7 +92,8 @@
 		shader->chunk = chunk->next;
 		free(chunk);
 	}
-	free(shader->instrs);
+	free(shader->indirects);
+	free(shader->predicates);
 	free(shader->baryfs);
 	free(shader);
 }
@@ -142,7 +154,11 @@
 {
 	instr_cat0_t *cat0 = ptr;
 
-	cat0->immed    = instr->cat0.immed;
+	if (info->gpu_id >= 400) {
+		cat0->a4xx.immed = instr->cat0.immed;
+	} else {
+		cat0->a3xx.immed = instr->cat0.immed;
+	}
 	cat0->repeat   = instr->repeat;
 	cat0->ss       = !!(instr->flags & IR3_INSTR_SS);
 	cat0->inv      = instr->cat0.inv;
@@ -483,32 +499,51 @@
 static int emit_cat6(struct ir3_instruction *instr, void *ptr,
 		struct ir3_info *info)
 {
-	struct ir3_register *dst  = instr->regs[0];
-	struct ir3_register *src1 = instr->regs[1];
-	struct ir3_register *src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
+	struct ir3_register *dst, *src1, *src2;
 	instr_cat6_t *cat6 = ptr;
 
-	iassert(instr->regs_count >= 2);
+	/* the "dst" for a store instruction is (from the perspective
+	 * of data flow in the shader, ie. register use/def, etc) in
+	 * fact a register that is read by the instruction, rather
+	 * than written:
+	 */
+	if (is_store(instr)) {
+		iassert(instr->regs_count >= 3);
+
+		dst  = instr->regs[1];
+		src1 = instr->regs[2];
+		src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL;
+	} else {
+		iassert(instr->regs_count >= 2);
+
+		dst  = instr->regs[0];
+		src1 = instr->regs[1];
+		src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
+	}
+
 
-	if (instr->cat6.offset || instr->opc == OPC_LDG) {
+	/* TODO we need a more comprehensive list about which instructions
+	 * can be encoded which way.  Or possibly use IR3_INSTR_0 flag to
+	 * indicate to use the src_off encoding even if offset is zero
+	 * (but then what to do about dst_off?)
+	 */
+	if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) {
 		instr_cat6a_t *cat6a = ptr;
 
-		cat6->has_off = true;
+		cat6->src_off = true;
 
-		cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
 		cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
 		cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
 		if (src2) {
 			cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
 			cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
 		}
-		cat6a->off = instr->cat6.offset;
+		cat6a->off = instr->cat6.src_offset;
 	} else {
 		instr_cat6b_t *cat6b = ptr;
 
-		cat6->has_off = false;
+		cat6->src_off = false;
 
-		cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
 		cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
 		cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
 		if (src2) {
@@ -517,10 +552,22 @@
 		}
 	}
 
+	if (instr->cat6.dst_offset || (instr->opc == OPC_STG)) {
+		instr_cat6c_t *cat6c = ptr;
+		cat6->dst_off = true;
+		cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+		cat6c->off = instr->cat6.dst_offset;
+	} else {
+		instr_cat6d_t *cat6d = ptr;
+		cat6->dst_off = false;
+		cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+	}
+
 	cat6->type     = instr->cat6.type;
 	cat6->opc      = instr->opc;
 	cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
 	cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
+	cat6->g        = !!(instr->flags & IR3_INSTR_G);
 	cat6->opc_cat  = 6;
 
 	return 0;
@@ -535,32 +582,40 @@
 		uint32_t gpu_id)
 {
 	uint32_t *ptr, *dwords;
-	uint32_t i;
 
+	info->gpu_id        = gpu_id;
 	info->max_reg       = -1;
 	info->max_half_reg  = -1;
 	info->max_const     = -1;
 	info->instrs_count  = 0;
+	info->sizedwords    = 0;
+
+	list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+			info->sizedwords += 2;
+		}
+	}
 
 	/* need a integer number of instruction "groups" (sets of 16
 	 * instructions on a4xx or sets of 4 instructions on a3xx),
 	 * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
 	 */
 	if (gpu_id >= 400) {
-		info->sizedwords = 2 * align(shader->instrs_count, 16);
+		info->sizedwords = align(info->sizedwords, 16 * 2);
 	} else {
-		info->sizedwords = 2 * align(shader->instrs_count, 4);
+		info->sizedwords = align(info->sizedwords, 4 * 2);
 	}
 
 	ptr = dwords = calloc(4, info->sizedwords);
 
-	for (i = 0; i < shader->instrs_count; i++) {
-		struct ir3_instruction *instr = shader->instrs[i];
-		int ret = emit[instr->category](instr, dwords, info);
-		if (ret)
-			goto fail;
-		info->instrs_count += 1 + instr->repeat;
-		dwords += 2;
+	list_for_each_entry (struct ir3_block, block, &shader->block_list, node) {
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+			int ret = emit[instr->category](instr, dwords, info);
+			if (ret)
+				goto fail;
+			info->instrs_count += 1 + instr->repeat;
+			dwords += 2;
+		}
 	}
 
 	return ptr;
@@ -581,50 +636,30 @@
 	return reg;
 }
 
-static void insert_instr(struct ir3 *shader,
+static void insert_instr(struct ir3_block *block,
 		struct ir3_instruction *instr)
 {
+	struct ir3 *shader = block->shader;
 #ifdef DEBUG
 	static uint32_t serialno = 0;
 	instr->serialno = ++serialno;
 #endif
-	array_insert(shader->instrs, instr);
+	list_addtail(&instr->node, &block->instr_list);
 
 	if (is_input(instr))
 		array_insert(shader->baryfs, instr);
 }
 
-struct ir3_block * ir3_block_create(struct ir3 *shader,
-		unsigned ntmp, unsigned nin, unsigned nout)
+struct ir3_block * ir3_block_create(struct ir3 *shader)
 {
-	struct ir3_block *block;
-	unsigned size;
-	char *ptr;
-
-	size = sizeof(*block);
-	size += sizeof(block->temporaries[0]) * ntmp;
-	size += sizeof(block->inputs[0]) * nin;
-	size += sizeof(block->outputs[0]) * nout;
-
-	ptr = ir3_alloc(shader, size);
-
-	block = (void *)ptr;
-	ptr += sizeof(*block);
-
-	block->temporaries = (void *)ptr;
-	block->ntemporaries = ntmp;
-	ptr += sizeof(block->temporaries[0]) * ntmp;
-
-	block->inputs = (void *)ptr;
-	block->ninputs = nin;
-	ptr += sizeof(block->inputs[0]) * nin;
-
-	block->outputs = (void *)ptr;
-	block->noutputs = nout;
-	ptr += sizeof(block->outputs[0]) * nout;
-
+	struct ir3_block *block = ir3_alloc(shader, sizeof(*block));
+#ifdef DEBUG
+	static uint32_t serialno = 0;
+	block->serialno = ++serialno;
+#endif
 	block->shader = shader;
-
+	list_inithead(&block->node);
+	list_inithead(&block->instr_list);
 	return block;
 }
 
@@ -652,7 +687,7 @@
 	instr->block = block;
 	instr->category = category;
 	instr->opc = opc;
-	insert_instr(block->shader, instr);
+	insert_instr(block, instr);
 	return instr;
 }
 
@@ -665,7 +700,6 @@
 	return ir3_instr_create2(block, category, opc, 4);
 }
 
-/* only used by old compiler: */
 struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
 {
 	struct ir3_instruction *new_instr = instr_create(instr->block,
@@ -677,7 +711,7 @@
 	*new_instr = *instr;
 	new_instr->regs = regs;
 
-	insert_instr(instr->block->shader, new_instr);
+	insert_instr(instr->block, new_instr);
 
 	/* clone registers: */
 	new_instr->regs_count = 0;
@@ -694,10 +728,52 @@
 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
 		int num, int flags)
 {
-	struct ir3_register *reg = reg_create(instr->block->shader, num, flags);
+	struct ir3 *shader = instr->block->shader;
+	struct ir3_register *reg = reg_create(shader, num, flags);
 #ifdef DEBUG
 	debug_assert(instr->regs_count < instr->regs_max);
 #endif
 	instr->regs[instr->regs_count++] = reg;
 	return reg;
 }
+
+void
+ir3_instr_set_address(struct ir3_instruction *instr,
+		struct ir3_instruction *addr)
+{
+	if (instr->address != addr) {
+		struct ir3 *ir = instr->block->shader;
+		instr->address = addr;
+		array_insert(ir->indirects, instr);
+	}
+}
+
+void
+ir3_block_clear_mark(struct ir3_block *block)
+{
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
+		instr->flags &= ~IR3_INSTR_MARK;
+}
+
+void
+ir3_clear_mark(struct ir3 *ir)
+{
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		ir3_block_clear_mark(block);
+	}
+}
+
+/* note: this will destroy instr->depth, don't do it until after sched! */
+unsigned
+ir3_count_instructions(struct ir3 *ir)
+{
+	unsigned cnt = 0;
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+			instr->ip = cnt++;
+		}
+		block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
+		block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
+	}
+	return cnt;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c	2015-09-16 14:36:09.000000000 +0000
@@ -30,6 +30,7 @@
 #include <fcntl.h>
 #include <stdint.h>
 #include <stdlib.h>
+#include <stdio.h>
 #include <err.h>
 
 #include "tgsi/tgsi_parse.h"
@@ -42,127 +43,15 @@
 #include "instr-a3xx.h"
 #include "ir3.h"
 
-static void dump_reg(const char *name, uint32_t r)
-{
-	if (r != regid(63,0))
-		debug_printf("; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
-}
-
-static void dump_semantic(struct ir3_shader_variant *so,
-		unsigned sem, const char *name)
-{
-	uint32_t regid;
-	regid = ir3_find_output_regid(so, ir3_semantic_name(sem, 0));
-	dump_reg(name, regid);
-}
-
 static void dump_info(struct ir3_shader_variant *so, const char *str)
 {
 	uint32_t *bin;
-	const char *type = (so->type == SHADER_VERTEX) ? "VERT" : "FRAG";
-
-	// for debug, dump some before/after info:
+	const char *type = ir3_shader_stage(so->shader);
 	// TODO make gpu_id configurable on cmdline
 	bin = ir3_shader_assemble(so, 320);
-	if (fd_mesa_debug & FD_DBG_DISASM) {
-		struct ir3_block *block = so->ir->block;
-		struct ir3_register *reg;
-		uint8_t regid;
-		unsigned i;
-
-		debug_printf("; %s: %s\n", type, str);
-
-		for (i = 0; i < block->ninputs; i++) {
-			if (!block->inputs[i]) {
-				debug_printf("; in%d unused\n", i);
-				continue;
-			}
-			reg = block->inputs[i]->regs[0];
-			regid = reg->num;
-			debug_printf("@in(%sr%d.%c)\tin%d\n",
-					(reg->flags & IR3_REG_HALF) ? "h" : "",
-					(regid >> 2), "xyzw"[regid & 0x3], i);
-		}
-
-		for (i = 0; i < block->noutputs; i++) {
-			if (!block->outputs[i]) {
-				debug_printf("; out%d unused\n", i);
-				continue;
-			}
-			/* kill shows up as a virtual output.. skip it! */
-			if (is_kill(block->outputs[i]))
-				continue;
-			reg = block->outputs[i]->regs[0];
-			regid = reg->num;
-			debug_printf("@out(%sr%d.%c)\tout%d\n",
-					(reg->flags & IR3_REG_HALF) ? "h" : "",
-					(regid >> 2), "xyzw"[regid & 0x3], i);
-		}
-
-		for (i = 0; i < so->immediates_count; i++) {
-			debug_printf("@const(c%d.x)\t", so->first_immediate + i);
-			debug_printf("0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
-					so->immediates[i].val[0],
-					so->immediates[i].val[1],
-					so->immediates[i].val[2],
-					so->immediates[i].val[3]);
-		}
-
-		disasm_a3xx(bin, so->info.sizedwords, 0, so->type);
-
-		debug_printf("; %s: outputs:", type);
-		for (i = 0; i < so->outputs_count; i++) {
-			uint8_t regid = so->outputs[i].regid;
-			ir3_semantic sem = so->outputs[i].semantic;
-			debug_printf(" r%d.%c (%u:%u)",
-					(regid >> 2), "xyzw"[regid & 0x3],
-					sem2name(sem), sem2idx(sem));
-		}
-		debug_printf("\n");
-		debug_printf("; %s: inputs:", type);
-		for (i = 0; i < so->inputs_count; i++) {
-			uint8_t regid = so->inputs[i].regid;
-			ir3_semantic sem = so->inputs[i].semantic;
-			debug_printf(" r%d.%c (%u:%u,cm=%x,il=%u,b=%u)",
-					(regid >> 2), "xyzw"[regid & 0x3],
-					sem2name(sem), sem2idx(sem),
-					so->inputs[i].compmask,
-					so->inputs[i].inloc,
-					so->inputs[i].bary);
-		}
-		debug_printf("\n");
-	}
-
-	/* print generic shader info: */
-	debug_printf("; %s: %u instructions, %d half, %d full\n", type,
-			so->info.instrs_count,
-			so->info.max_half_reg + 1,
-			so->info.max_reg + 1);
-
-	/* print shader type specific info: */
-	switch (so->type) {
-	case SHADER_VERTEX:
-		dump_semantic(so, TGSI_SEMANTIC_POSITION, "pos");
-		dump_semantic(so, TGSI_SEMANTIC_PSIZE, "psize");
-		break;
-	case SHADER_FRAGMENT:
-		dump_reg("pos (bary)", so->pos_regid);
-		dump_semantic(so, TGSI_SEMANTIC_POSITION, "posz");
-		dump_semantic(so, TGSI_SEMANTIC_COLOR, "color");
-		/* these two are hard-coded since we don't know how to
-		 * program them to anything but all 0's...
-		 */
-		if (so->frag_coord)
-			debug_printf("; fragcoord: r0.x\n");
-		if (so->frag_face)
-			debug_printf("; fragface: hr0.x\n");
-		break;
-	case SHADER_COMPUTE:
-		break;
-	}
+	debug_printf("; %s: %s\n", type, str);
+	ir3_shader_disasm(so, bin);
 	free(bin);
-
-	debug_printf("\n");
 }
 
 
@@ -194,16 +83,6 @@
 	return 0;
 }
 
-static void reset_variant(struct ir3_shader_variant *v, const char *msg)
-{
-	printf("; %s\n", msg);
-	v->inputs_count = 0;
-	v->outputs_count = 0;
-	v->total_in = 0;
-	v->has_samp = false;
-	v->immediates_count = 0;
-}
-
 static void print_usage(void)
 {
 	printf("Usage: ir3_compiler [OPTIONS]... FILE\n");
@@ -214,8 +93,7 @@
 	printf("    --saturate-s MASK - bitmask of samplers to saturate S coord\n");
 	printf("    --saturate-t MASK - bitmask of samplers to saturate T coord\n");
 	printf("    --saturate-r MASK - bitmask of samplers to saturate R coord\n");
-	printf("    --nocp            - disable copy propagation\n");
-	printf("    --nir             - use NIR compiler\n");
+	printf("    --stream-out      - enable stream-out (aka transform feedback)\n");
 	printf("    --help            - show this message\n");
 }
 
@@ -225,15 +103,19 @@
 	const char *filename;
 	struct tgsi_token toks[65536];
 	struct tgsi_parse_context parse;
+	struct ir3_compiler *compiler;
 	struct ir3_shader_variant v;
+	struct ir3_shader s;
 	struct ir3_shader_key key = {};
 	const char *info;
 	void *ptr;
 	size_t size;
-	int use_nir = 0;
 
 	fd_mesa_debug |= FD_DBG_DISASM;
 
+	memset(&s, 0, sizeof(s));
+	memset(&v, 0, sizeof(v));
+
 	/* cmdline args which impact shader variant get spit out in a
 	 * comment on the first line..  a quick/dirty way to preserve
 	 * that info so when ir3test recompiles the shader with a new
@@ -243,7 +125,7 @@
 
 	while (n < argc) {
 		if (!strcmp(argv[n], "--verbose")) {
-			fd_mesa_debug |=  FD_DBG_OPTDUMP | FD_DBG_MSGS | FD_DBG_OPTMSGS;
+			fd_mesa_debug |= FD_DBG_MSGS | FD_DBG_OPTMSGS;
 			n++;
 			continue;
 		}
@@ -290,13 +172,20 @@
 			continue;
 		}
 
-		if (!strcmp(argv[n], "--nocp")) {
-			fd_mesa_debug |= FD_DBG_NOCP;
-			n++;
-			continue;
-		}
-		if (!strcmp(argv[n], "--nir")) {
-			use_nir = true;
+		if (!strcmp(argv[n], "--stream-out")) {
+			struct pipe_stream_output_info *so = &s.stream_output;
+			debug_printf(" %s", argv[n]);
+			/* TODO more dynamic config based on number of outputs, etc
+			 * rather than just hard-code for first output:
+			 */
+			so->num_outputs = 1;
+			so->stride[0] = 4;
+			so->output[0].register_index = 0;
+			so->output[0].start_component = 0;
+			so->output[0].num_components = 4;
+			so->output[0].output_buffer = 0;
+			so->output[0].dst_offset = 2;
+			so->output[0].stream = 0;
 			n++;
 			continue;
 		}
@@ -312,9 +201,6 @@
 
 	filename = argv[n];
 
-	memset(&v, 0, sizeof(v));
-	v.key = key;
-
 	ret = read_file(filename, &ptr, &size);
 	if (ret) {
 		print_usage();
@@ -327,33 +213,29 @@
 	if (!tgsi_text_translate(ptr, toks, Elements(toks)))
 		errx(1, "could not parse `%s'", filename);
 
+	s.tokens = toks;
+
+	v.key = key;
+	v.shader = &s;
+
 	tgsi_parse_init(&parse, toks);
 	switch (parse.FullHeader.Processor.Processor) {
 	case TGSI_PROCESSOR_FRAGMENT:
-		v.type = SHADER_FRAGMENT;
+		s.type = v.type = SHADER_FRAGMENT;
 		break;
 	case TGSI_PROCESSOR_VERTEX:
-		v.type = SHADER_VERTEX;
+		s.type = v.type = SHADER_VERTEX;
 		break;
 	case TGSI_PROCESSOR_COMPUTE:
-		v.type = SHADER_COMPUTE;
+		s.type = v.type = SHADER_COMPUTE;
 		break;
 	}
 
-	if (use_nir) {
-		info = "NIR compiler";
-		ret = ir3_compile_shader_nir(&v, toks, key);
-	} else {
-		info = "TGSI compiler";
-		ret = ir3_compile_shader(&v, toks, key, true);
-	}
-
-	if (ret) {
-		reset_variant(&v, "compiler failed, trying without copy propagation!");
-		info = "compiler (no copy propagation)";
-		ret = ir3_compile_shader(&v, toks, key, false);
-	}
+	/* TODO cmdline option to target different gpus: */
+	compiler = ir3_compiler_create(320);
 
+	info = "NIR compiler";
+	ret = ir3_compile_shader_nir(compiler, &v);
 	if (ret) {
 		fprintf(stderr, "compiler failed!\n");
 		return ret;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_compiler.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_compiler.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_compiler.c	2015-09-16 14:36:09.000000000 +0000
@@ -1,7 +1,7 @@
 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
 
 /*
- * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -26,3714 +26,19 @@
  *    Rob Clark <robclark@freedesktop.org>
  */
 
-#include <stdarg.h>
-
-#include "pipe/p_state.h"
-#include "util/u_string.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "tgsi/tgsi_lowering.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_ureg.h"
-#include "tgsi/tgsi_info.h"
-#include "tgsi/tgsi_strings.h"
-#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_scan.h"
-
-#include "freedreno_util.h"
+#include "util/ralloc.h"
 
 #include "ir3_compiler.h"
-#include "ir3_shader.h"
-
-#include "instr-a3xx.h"
-#include "ir3.h"
-
-struct ir3_compile_context {
-	const struct tgsi_token *tokens;
-	bool free_tokens;
-	struct ir3 *ir;
-	struct ir3_shader_variant *so;
-	uint16_t integer_s;
-
-	struct ir3_block *block;
-	struct ir3_instruction *current_instr;
-
-	/* we need to defer updates to block->outputs[] until the end
-	 * of an instruction (so we don't see new value until *after*
-	 * the src registers are processed)
-	 */
-	struct {
-		struct ir3_instruction *instr, **instrp;
-	} output_updates[64];
-	unsigned num_output_updates;
-
-	/* are we in a sequence of "atomic" instructions?
-	 */
-	bool atomic;
-
-	/* For fragment shaders, from the hw perspective the only
-	 * actual input is r0.xy position register passed to bary.f.
-	 * But TGSI doesn't know that, it still declares things as
-	 * IN[] registers.  So we do all the input tracking normally
-	 * and fix things up after compile_instructions()
-	 *
-	 * NOTE that frag_pos is the hardware position (possibly it
-	 * is actually an index or tag or some such.. it is *not*
-	 * values that can be directly used for gl_FragCoord..)
-	 */
-	struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4];
-
-	/* For vertex shaders, keep track of the system values sources */
-	struct ir3_instruction *vertex_id, *basevertex, *instance_id;
-
-	struct tgsi_parse_context parser;
-	unsigned type;
-
-	struct tgsi_shader_info info;
-
-	/* hmm, would be nice if tgsi_scan_shader figured this out
-	 * for us:
-	 */
-	struct {
-		unsigned first, last;
-		struct ir3_instruction *fanin;
-	} array[MAX_ARRAYS];
-	uint32_t array_dirty;
-	/* offset into array[], per file, of first array info */
-	uint8_t array_offsets[TGSI_FILE_COUNT];
-
-	/* for calculating input/output positions/linkages: */
-	unsigned next_inloc;
-
-	/* a4xx (at least patchlevel 0) cannot seem to flat-interpolate
-	 * so we need to use ldlv.u32 to load the varying directly:
-	 */
-	bool flat_bypass;
-
-	unsigned num_internal_temps;
-	struct tgsi_src_register internal_temps[8];
-
-	/* for looking up which system value is which */
-	unsigned sysval_semantics[8];
-
-	/* idx/slot for last compiler generated immediate */
-	unsigned immediate_idx;
-
-	/* stack of branch instructions that mark (potentially nested)
-	 * branch if/else/loop/etc
-	 */
-	struct {
-		struct ir3_instruction *instr, *cond;
-		bool inv;   /* true iff in else leg of branch */
-	} branch[16];
-	unsigned int branch_count;
-
-	/* list of kill instructions: */
-	struct ir3_instruction *kill[16];
-	unsigned int kill_count;
-
-	/* used when dst is same as one of the src, to avoid overwriting a
-	 * src element before the remaining scalar instructions that make
-	 * up the vector operation
-	 */
-	struct tgsi_dst_register tmp_dst;
-	struct tgsi_src_register *tmp_src;
-
-	/* just for catching incorrect use of get_dst()/put_dst():
-	 */
-	bool using_tmp_dst;
-};
-
-
-static void vectorize(struct ir3_compile_context *ctx,
-		struct ir3_instruction *instr, struct tgsi_dst_register *dst,
-		int nsrcs, ...);
-static void create_mov(struct ir3_compile_context *ctx,
-		struct tgsi_dst_register *dst, struct tgsi_src_register *src);
-static type_t get_ftype(struct ir3_compile_context *ctx);
-static type_t get_utype(struct ir3_compile_context *ctx);
-
-static unsigned setup_arrays(struct ir3_compile_context *ctx, unsigned file, unsigned i)
-{
-	/* ArrayID 0 for a given file is the legacy array spanning the entire file: */
-	ctx->array[i].first = 0;
-	ctx->array[i].last = ctx->info.file_max[file];
-	ctx->array_offsets[file] = i;
-	i += ctx->info.array_max[file] + 1;
-	return i;
-}
-
-static unsigned
-compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
-		const struct tgsi_token *tokens)
-{
-	unsigned ret, i;
-	struct tgsi_shader_info *info = &ctx->info;
-	struct tgsi_lowering_config lconfig = {
-			.color_two_side = so->key.color_two_side,
-			.lower_DST  = true,
-			.lower_XPD  = true,
-			.lower_SCS  = true,
-			.lower_LRP  = true,
-			.lower_FRC  = true,
-			.lower_POW  = true,
-			.lower_LIT  = true,
-			.lower_EXP  = true,
-			.lower_LOG  = true,
-			.lower_DP4  = true,
-			.lower_DP3  = true,
-			.lower_DPH  = true,
-			.lower_DP2  = true,
-			.lower_DP2A = true,
-	};
-
-	switch (so->type) {
-	case SHADER_FRAGMENT:
-	case SHADER_COMPUTE:
-		lconfig.saturate_s = so->key.fsaturate_s;
-		lconfig.saturate_t = so->key.fsaturate_t;
-		lconfig.saturate_r = so->key.fsaturate_r;
-		ctx->integer_s = so->key.finteger_s;
-		break;
-	case SHADER_VERTEX:
-		lconfig.saturate_s = so->key.vsaturate_s;
-		lconfig.saturate_t = so->key.vsaturate_t;
-		lconfig.saturate_r = so->key.vsaturate_r;
-		ctx->integer_s = so->key.vinteger_s;
-		break;
-	}
-
-	if (!so->shader) {
-		/* hack for standalone compiler which does not have
-		 * screen/context:
-		 */
-	} else if (ir3_shader_gpuid(so->shader) >= 400) {
-		/* a4xx seems to have *no* sam.p */
-		lconfig.lower_TXP = ~0;  /* lower all txp */
-		/* need special handling for "flat" */
-		ctx->flat_bypass = true;
-	} else {
-		/* a3xx just needs to avoid sam.p for 3d tex */
-		lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D);
-		/* no special handling for "flat" */
-		ctx->flat_bypass = false;
-	}
-
-	ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info);
-	ctx->free_tokens = !!ctx->tokens;
-	if (!ctx->tokens) {
-		/* no lowering */
-		ctx->tokens = tokens;
-	}
-	ctx->ir = so->ir;
-	ctx->so = so;
-	ctx->array_dirty = 0;
-	ctx->next_inloc = 8;
-	ctx->num_internal_temps = 0;
-	ctx->branch_count = 0;
-	ctx->kill_count = 0;
-	ctx->block = NULL;
-	ctx->current_instr = NULL;
-	ctx->num_output_updates = 0;
-	ctx->atomic = false;
-	ctx->frag_pos = NULL;
-	ctx->frag_face = NULL;
-	ctx->vertex_id = NULL;
-	ctx->instance_id = NULL;
-	ctx->tmp_src = NULL;
-	ctx->using_tmp_dst = false;
-
-	memset(ctx->frag_coord, 0, sizeof(ctx->frag_coord));
-	memset(ctx->array, 0, sizeof(ctx->array));
-	memset(ctx->array_offsets, 0, sizeof(ctx->array_offsets));
-
-#define FM(x) (1 << TGSI_FILE_##x)
-	/* NOTE: if relative addressing is used, we set constlen in
-	 * the compiler (to worst-case value) since we don't know in
-	 * the assembler what the max addr reg value can be:
-	 */
-	if (info->indirect_files & FM(CONSTANT))
-		so->constlen = MIN2(255, ctx->info.const_file_max[0] + 1);
-
-	i = 0;
-	i += setup_arrays(ctx, TGSI_FILE_INPUT, i);
-	i += setup_arrays(ctx, TGSI_FILE_TEMPORARY, i);
-	i += setup_arrays(ctx, TGSI_FILE_OUTPUT, i);
-	/* any others? we don't track arrays for const..*/
-
-	/* Immediates go after constants: */
-	so->first_immediate = so->first_driver_param =
-		info->const_file_max[0] + 1;
-	/* 1 unit for the vertex id base */
-	if (so->type == SHADER_VERTEX)
-		so->first_immediate++;
-	/* 4 (vec4) units for ubo base addresses */
-	so->first_immediate += 4;
-	ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
-
-	ret = tgsi_parse_init(&ctx->parser, ctx->tokens);
-	if (ret != TGSI_PARSE_OK)
-		return ret;
-
-	ctx->type = ctx->parser.FullHeader.Processor.Processor;
-
-	return ret;
-}
-
-static void
-compile_error(struct ir3_compile_context *ctx, const char *format, ...)
-{
-	va_list ap;
-	va_start(ap, format);
-	_debug_vprintf(format, ap);
-	va_end(ap);
-	tgsi_dump(ctx->tokens, 0);
-	debug_assert(0);
-}
-
-#define compile_assert(ctx, cond) do { \
-		if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
-	} while (0)
-
-static void
-compile_free(struct ir3_compile_context *ctx)
-{
-	if (ctx->free_tokens)
-		free((void *)ctx->tokens);
-	tgsi_parse_free(&ctx->parser);
-}
-
-struct instr_translater {
-	void (*fxn)(const struct instr_translater *t,
-			struct ir3_compile_context *ctx,
-			struct tgsi_full_instruction *inst);
-	unsigned tgsi_opc;
-	opc_t opc;
-	opc_t hopc;    /* opc to use for half_precision mode, if different */
-	unsigned arg;
-};
-
-static void
-instr_finish(struct ir3_compile_context *ctx)
-{
-	unsigned i;
-
-	if (ctx->atomic)
-		return;
-
-	for (i = 0; i < ctx->num_output_updates; i++)
-		*(ctx->output_updates[i].instrp) = ctx->output_updates[i].instr;
-
-	ctx->num_output_updates = 0;
-
-	while (ctx->array_dirty) {
-		unsigned aid = ffs(ctx->array_dirty) - 1;
-		ctx->array[aid].fanin = NULL;
-		ctx->array_dirty &= ~(1 << aid);
-	}
-}
-
-/* For "atomic" groups of instructions, for example the four scalar
- * instructions to perform a vec4 operation.  Basically this just
- * blocks out handling of output_updates so the next scalar instruction
- * still sees the result from before the start of the atomic group.
- *
- * NOTE: when used properly, this could probably replace get/put_dst()
- * stuff.
- */
-static void
-instr_atomic_start(struct ir3_compile_context *ctx)
-{
-	ctx->atomic = true;
-}
-
-static void
-instr_atomic_end(struct ir3_compile_context *ctx)
-{
-	ctx->atomic = false;
-	instr_finish(ctx);
-}
-
-static struct ir3_instruction *
-instr_create(struct ir3_compile_context *ctx, int category, opc_t opc)
-{
-	instr_finish(ctx);
-	return (ctx->current_instr = ir3_instr_create(ctx->block, category, opc));
-}
-
-static struct ir3_block *
-push_block(struct ir3_compile_context *ctx)
-{
-	struct ir3_block *block;
-	unsigned ntmp, nin, nout;
-
-#define SCALAR_REGS(file) (4 * (ctx->info.file_max[TGSI_FILE_ ## file] + 1))
-
-	/* hmm, give ourselves room to create 8 extra temporaries (vec4):
-	 */
-	ntmp = SCALAR_REGS(TEMPORARY);
-	ntmp += 8 * 4;
-
-	nout = SCALAR_REGS(OUTPUT);
-	nin  = SCALAR_REGS(INPUT) + SCALAR_REGS(SYSTEM_VALUE);
-
-	/* for outermost block, 'inputs' are the actual shader INPUT
-	 * register file.  Reads from INPUT registers always go back to
-	 * top block.  For nested blocks, 'inputs' is used to track any
-	 * TEMPORARY file register from one of the enclosing blocks that
-	 * is ready in this block.
-	 */
-	if (!ctx->block) {
-		/* NOTE: fragment shaders actually have two inputs (r0.xy, the
-		 * position)
-		 */
-		if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
-			int n = 2;
-			if (ctx->info.reads_position)
-				n += 4;
-			if (ctx->info.uses_frontface)
-				n += 4;
-			nin = MAX2(n, nin);
-			nout += ARRAY_SIZE(ctx->kill);
-		}
-	} else {
-		nin = ntmp;
-	}
-
-	block = ir3_block_create(ctx->ir, ntmp, nin, nout);
-
-	if ((ctx->type == TGSI_PROCESSOR_FRAGMENT) && !ctx->block)
-		block->noutputs -= ARRAY_SIZE(ctx->kill);
-
-	block->parent = ctx->block;
-	ctx->block = block;
-
-	return block;
-}
-
-static void
-pop_block(struct ir3_compile_context *ctx)
-{
-	ctx->block = ctx->block->parent;
-	compile_assert(ctx, ctx->block);
-}
-
-static struct ir3_instruction *
-create_output(struct ir3_block *block, struct ir3_instruction *instr,
-		unsigned n)
-{
-	struct ir3_instruction *out;
-
-	out = ir3_instr_create(block, -1, OPC_META_OUTPUT);
-	out->inout.block = block;
-	ir3_reg_create(out, n, 0);
-	if (instr)
-		ir3_reg_create(out, 0, IR3_REG_SSA)->instr = instr;
-
-	return out;
-}
-
-static struct ir3_instruction *
-create_input(struct ir3_block *block, struct ir3_instruction *instr,
-		unsigned n)
-{
-	struct ir3_instruction *in;
-
-	in = ir3_instr_create(block, -1, OPC_META_INPUT);
-	in->inout.block = block;
-	ir3_reg_create(in, n, 0);
-	if (instr)
-		ir3_reg_create(in, 0, IR3_REG_SSA)->instr = instr;
-
-	return in;
-}
-
-static struct ir3_instruction *
-block_input(struct ir3_block *block, unsigned n)
-{
-	/* references to INPUT register file always go back up to
-	 * top level:
-	 */
-	if (block->parent)
-		return block_input(block->parent, n);
-	return block->inputs[n];
-}
-
-/* return temporary in scope, creating if needed meta-input node
- * to track block inputs
- */
-static struct ir3_instruction *
-block_temporary(struct ir3_block *block, unsigned n)
-{
-	/* references to TEMPORARY register file, find the nearest
-	 * enclosing block which has already assigned this temporary,
-	 * creating meta-input instructions along the way to keep
-	 * track of block inputs
-	 */
-	if (block->parent && !block->temporaries[n]) {
-		/* if already have input for this block, reuse: */
-		if (!block->inputs[n])
-			block->inputs[n] = block_temporary(block->parent, n);
-
-		/* and create new input to return: */
-		return create_input(block, block->inputs[n], n);
-	}
-	return block->temporaries[n];
-}
-
-static struct ir3_instruction *
-create_immed(struct ir3_compile_context *ctx, float val)
-{
-	/* NOTE: *don't* use instr_create() here!
-	 */
-	struct ir3_instruction *instr;
-	instr = ir3_instr_create(ctx->block, 1, 0);
-	instr->cat1.src_type = get_ftype(ctx);
-	instr->cat1.dst_type = get_ftype(ctx);
-	ir3_reg_create(instr, 0, 0);
-	ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = val;
-	return instr;
-}
-
-static void
-ssa_instr_set(struct ir3_compile_context *ctx, unsigned file, unsigned n,
-		struct ir3_instruction *instr)
-{
-	struct ir3_block *block = ctx->block;
-	unsigned idx = ctx->num_output_updates;
-
-	compile_assert(ctx, idx < ARRAY_SIZE(ctx->output_updates));
-
-	/* NOTE: defer update of temporaries[idx] or output[idx]
-	 * until instr_finish(), so that if the current instruction
-	 * reads the same TEMP/OUT[] it gets the old value:
-	 *
-	 * bleh.. this might be a bit easier to just figure out
-	 * in instr_finish().  But at that point we've already
-	 * lost information about OUTPUT vs TEMPORARY register
-	 * file..
-	 */
-
-	switch (file) {
-	case TGSI_FILE_OUTPUT:
-		compile_assert(ctx, n < block->noutputs);
-		ctx->output_updates[idx].instrp = &block->outputs[n];
-		ctx->output_updates[idx].instr = instr;
-		ctx->num_output_updates++;
-		break;
-	case TGSI_FILE_TEMPORARY:
-		compile_assert(ctx, n < block->ntemporaries);
-		ctx->output_updates[idx].instrp = &block->temporaries[n];
-		ctx->output_updates[idx].instr = instr;
-		ctx->num_output_updates++;
-		break;
-	case TGSI_FILE_ADDRESS:
-		compile_assert(ctx, n < 1);
-		ctx->output_updates[idx].instrp = &block->address;
-		ctx->output_updates[idx].instr = instr;
-		ctx->num_output_updates++;
-		break;
-	}
-}
-
-static struct ir3_instruction *
-ssa_instr_get(struct ir3_compile_context *ctx, unsigned file, unsigned n)
-{
-	struct ir3_block *block = ctx->block;
-	struct ir3_instruction *instr = NULL;
-
-	switch (file) {
-	case TGSI_FILE_INPUT:
-		instr = block_input(ctx->block, n);
-		break;
-	case TGSI_FILE_OUTPUT:
-		/* really this should just happen in case of 'MOV_SAT OUT[n], ..',
-		 * for the following clamp instructions:
-		 */
-		instr = block->outputs[n];
-		/* we don't have to worry about read from an OUTPUT that was
-		 * assigned outside of the current block, because the _SAT
-		 * clamp instructions will always be in the same block as
-		 * the original instruction which wrote the OUTPUT
-		 */
-		compile_assert(ctx, instr);
-		break;
-	case TGSI_FILE_TEMPORARY:
-		instr = block_temporary(ctx->block, n);
-		if (!instr) {
-			/* this can happen when registers (or components of a TGSI
-			 * register) are used as src before they have been assigned
-			 * (undefined contents).  To avoid confusing the rest of the
-			 * compiler, and to generally keep things peachy, substitute
-			 * an instruction that sets the src to 0.0.  Or to keep
-			 * things undefined, I could plug in a random number? :-P
-			 *
-			 * NOTE: *don't* use instr_create() here!
-			 */
-			instr = create_immed(ctx, 0.0);
-			/* no need to recreate the immed for every access: */
-			block->temporaries[n] = instr;
-		}
-		break;
-	case TGSI_FILE_SYSTEM_VALUE:
-		switch (ctx->sysval_semantics[n >> 2]) {
-		case TGSI_SEMANTIC_VERTEXID_NOBASE:
-			instr = ctx->vertex_id;
-			break;
-		case TGSI_SEMANTIC_BASEVERTEX:
-			instr = ctx->basevertex;
-			break;
-		case TGSI_SEMANTIC_INSTANCEID:
-			instr = ctx->instance_id;
-			break;
-		}
-		break;
-	}
-
-	return instr;
-}
-
-static int dst_array_id(struct ir3_compile_context *ctx,
-		const struct tgsi_dst_register *dst)
-{
-	// XXX complete hack to recover tgsi_full_dst_register...
-	// nothing that isn't wrapped in a tgsi_full_dst_register
-	// should be indirect
-	const struct tgsi_full_dst_register *fdst = (const void *)dst;
-	return fdst->Indirect.ArrayID + ctx->array_offsets[dst->File];
-}
-
-static int src_array_id(struct ir3_compile_context *ctx,
-		const struct tgsi_src_register *src)
-{
-	// XXX complete hack to recover tgsi_full_src_register...
-	// nothing that isn't wrapped in a tgsi_full_src_register
-	// should be indirect
-	const struct tgsi_full_src_register *fsrc = (const void *)src;
-	debug_assert(src->File != TGSI_FILE_CONSTANT);
-	return fsrc->Indirect.ArrayID + ctx->array_offsets[src->File];
-}
-
-static struct ir3_instruction *
-array_fanin(struct ir3_compile_context *ctx, unsigned aid, unsigned file)
-{
-	struct ir3_instruction *instr;
-
-	if (ctx->array[aid].fanin) {
-		instr = ctx->array[aid].fanin;
-	} else {
-		unsigned first = ctx->array[aid].first;
-		unsigned last  = ctx->array[aid].last;
-		unsigned i, j;
-
-		instr = ir3_instr_create2(ctx->block, -1, OPC_META_FI,
-				1 + (4 * (last + 1 - first)));
-		ir3_reg_create(instr, 0, 0);
-		for (i = first; i <= last; i++) {
-			for (j = 0; j < 4; j++) {
-				unsigned n = regid(i, j);
-				ir3_reg_create(instr, 0, IR3_REG_SSA)->instr =
-						ssa_instr_get(ctx, file, n);
-			}
-		}
-		ctx->array[aid].fanin = instr;
-		ctx->array_dirty |= (1 << aid);
-	}
-
-	return instr;
-}
-
-static void
-ssa_dst(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
-		const struct tgsi_dst_register *dst, unsigned chan)
-{
-	if (dst->Indirect) {
-		struct ir3_register *reg = instr->regs[0];
-		unsigned i, aid = dst_array_id(ctx, dst);
-		unsigned first = ctx->array[aid].first;
-		unsigned last  = ctx->array[aid].last;
-		unsigned off   = dst->Index - first; /* vec4 offset */
-
-		reg->size = 4 * (1 + last - first);
-		reg->offset = regid(off, chan);
-
-		instr->fanin = array_fanin(ctx, aid, dst->File);
-
-		/* annotate with the array-id, to help out the register-
-		 * assignment stage.  At least for the case of indirect
-		 * writes, we should capture enough dependencies to
-		 * preserve the order of reads/writes of the array, so
-		 * the multiple "names" for the array should end up all
-		 * assigned to the same registers.
-		 */
-		instr->fanin->fi.aid = aid;
 
-		/* Since we are scalarizing vec4 tgsi instructions/regs, we
-		 * run into a slight complication here.  To do the naive thing
-		 * and setup a fanout for each scalar array element would end
-		 * up with the result that the instructions generated for each
-		 * component of the vec4 would end up clobbering each other.
-		 * So we take advantage here of knowing that the array index
-		 * (after the shl.b) will be a multiple of four, and only set
-		 * every fourth scalar component in the array.  See also
-		 * fixup_ssa_dst_array()
-		 */
-		for (i = first; i <= last; i++) {
-			struct ir3_instruction *split;
-			unsigned n = regid(i, chan);
-			int off = (4 * (i - first)) + chan;
-
-			if (is_meta(instr) && (instr->opc == OPC_META_FO))
-				off -= instr->fo.off;
-
-			split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
-			split->fo.off = off;
-			ir3_reg_create(split, 0, 0);
-			ir3_reg_create(split, 0, IR3_REG_SSA)->instr = instr;
-
-			ssa_instr_set(ctx, dst->File, n, split);
-		}
-	} else {
-		/* normal case (not relative addressed GPR) */
-		ssa_instr_set(ctx, dst->File, regid(dst->Index, chan), instr);
-	}
-}
-
-static void
-ssa_src(struct ir3_compile_context *ctx, struct ir3_register *reg,
-		const struct tgsi_src_register *src, unsigned chan)
-{
-	struct ir3_instruction *instr;
-
-	if (src->Indirect && (src->File != TGSI_FILE_CONSTANT)) {
-		/* for relative addressing of gpr's (due to register assignment)
-		 * we must generate a fanin instruction to collect all possible
-		 * array elements that the instruction could address together:
-		 */
-		unsigned aid   = src_array_id(ctx, src);
-		unsigned first = ctx->array[aid].first;
-		unsigned last  = ctx->array[aid].last;
-		unsigned off   = src->Index - first; /* vec4 offset */
-
-		reg->size = 4 * (1 + last - first);
-		reg->offset = regid(off, chan);
-
-		instr = array_fanin(ctx, aid, src->File);
-	} else if (src->File == TGSI_FILE_CONSTANT && src->Dimension) {
-		const struct tgsi_full_src_register *fsrc = (const void *)src;
-		struct ir3_instruction *temp = NULL;
-		int ubo_regid = regid(ctx->so->first_driver_param, 0) +
-			fsrc->Dimension.Index - 1;
-		int offset = 0;
-
-		/* We don't handle indirect UBO array accesses... yet. */
-		compile_assert(ctx, !fsrc->Dimension.Indirect);
-		/* UBOs start at index 1. */
-		compile_assert(ctx, fsrc->Dimension.Index > 0);
-
-		if (src->Indirect) {
-			/* In case of an indirect index, it will have been loaded into an
-			 * address register. There will be a sequence of
-			 *
-			 *   shl.b x, val, 2
-			 *   mova a0, x
-			 *
-			 * We rely on this sequence to get the original val out and shift
-			 * it by 4, since we're dealing in vec4 units.
-			 */
-			compile_assert(ctx, ctx->block->address);
-			compile_assert(ctx, ctx->block->address->regs[1]->instr->opc ==
-						   OPC_SHL_B);
-
-			temp = instr = instr_create(ctx, 2, OPC_SHL_B);
-			ir3_reg_create(instr, 0, 0);
-			ir3_reg_create(instr, 0, IR3_REG_HALF | IR3_REG_SSA)->instr =
-				ctx->block->address->regs[1]->instr->regs[1]->instr;
-			ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4;
-		} else if (src->Index >= 64) {
-			/* Otherwise it's a plain index (in vec4 units). Move it into a
-			 * register.
-			 */
-			temp = instr = instr_create(ctx, 1, 0);
-			instr->cat1.src_type = get_utype(ctx);
-			instr->cat1.dst_type = get_utype(ctx);
-			ir3_reg_create(instr, 0, 0);
-			ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = src->Index * 16;
-		} else {
-			/* The offset is small enough to fit into the ldg instruction
-			 * directly.
-			 */
-			offset = src->Index * 16;
-		}
-
-		if (temp) {
-			/* If there was an offset (most common), add it to the buffer
-			 * address.
-			 */
-			instr = instr_create(ctx, 2, OPC_ADD_S);
-			ir3_reg_create(instr, 0, 0);
-			ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = temp;
-			ir3_reg_create(instr, ubo_regid, IR3_REG_CONST);
-		} else {
-			/* Otherwise just load the buffer address directly */
-			instr = instr_create(ctx, 1, 0);
-			instr->cat1.src_type = get_utype(ctx);
-			instr->cat1.dst_type = get_utype(ctx);
-			ir3_reg_create(instr, 0, 0);
-			ir3_reg_create(instr, ubo_regid, IR3_REG_CONST);
-		}
-
-		temp = instr;
-
-		instr = instr_create(ctx, 6, OPC_LDG);
-		instr->cat6.type = TYPE_U32;
-		instr->cat6.offset = offset + chan * 4;
-		ir3_reg_create(instr, 0, 0);
-		ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = temp;
-		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
-
-		reg->flags &= ~(IR3_REG_RELATIV | IR3_REG_CONST);
-	} else {
-		/* normal case (not relative addressed GPR) */
-		instr = ssa_instr_get(ctx, src->File, regid(src->Index, chan));
-	}
-
-	if (instr) {
-		reg->flags |= IR3_REG_SSA;
-		reg->instr = instr;
-	} else if (reg->flags & IR3_REG_SSA) {
-		/* special hack for trans_samp() which calls ssa_src() directly
-		 * to build up the collect (fanin) for const src.. (so SSA flag
-		 * set but no src instr... it basically gets lucky because we
-		 * default to 0.0 for "undefined" src instructions, which is
-		 * what it wants.  We probably need to give it a better way to
-		 * do this, but for now this hack:
-		 */
-		reg->instr = create_immed(ctx, 0.0);
-	}
-}
-
-static struct ir3_register *
-add_dst_reg_wrmask(struct ir3_compile_context *ctx,
-		struct ir3_instruction *instr, const struct tgsi_dst_register *dst,
-		unsigned chan, unsigned wrmask)
-{
-	unsigned flags = 0, num = 0;
-	struct ir3_register *reg;
-
-	switch (dst->File) {
-	case TGSI_FILE_OUTPUT:
-	case TGSI_FILE_TEMPORARY:
-		/* uses SSA */
-		break;
-	case TGSI_FILE_ADDRESS:
-		flags |= IR3_REG_ADDR;
-		/* uses SSA */
-		break;
-	default:
-		compile_error(ctx, "unsupported dst register file: %s\n",
-			tgsi_file_name(dst->File));
-		break;
-	}
-
-	if (dst->Indirect) {
-		flags |= IR3_REG_RELATIV;
-
-		/* shouldn't happen, and we can't cope with it below: */
-		compile_assert(ctx, wrmask == 0x1);
-
-		compile_assert(ctx, ctx->block->address);
-		if (instr->address)
-			compile_assert(ctx, ctx->block->address == instr->address);
-
-		instr->address = ctx->block->address;
-		array_insert(ctx->ir->indirects, instr);
-	}
-
-	reg = ir3_reg_create(instr, regid(num, chan), flags);
-	reg->wrmask = wrmask;
-
-	if (wrmask == 0x1) {
-		/* normal case */
-		ssa_dst(ctx, instr, dst, chan);
-	} else if ((dst->File == TGSI_FILE_TEMPORARY) ||
-			(dst->File == TGSI_FILE_OUTPUT) ||
-			(dst->File == TGSI_FILE_ADDRESS)) {
-		struct ir3_instruction *prev = NULL;
-		unsigned i;
-
-		compile_assert(ctx, !dst->Indirect);
-
-		/* if instruction writes multiple, we need to create
-		 * some place-holder collect the registers:
-		 */
-		for (i = 0; i < 4; i++) {
-			/* NOTE: slightly ugly that we setup neighbor ptrs
-			 * for FO here, but handle FI in CP pass.. we should
-			 * probably just always setup neighbor ptrs in the
-			 * frontend?
-			 */
-			struct ir3_instruction *split =
-					ir3_instr_create(ctx->block, -1, OPC_META_FO);
-			split->fo.off = i;
-			/* unused dst reg: */
-			/* NOTE: set SSA flag on dst here, because unused FO's
-			 * which don't get scheduled will end up not in the
-			 * instruction list when RA sets SSA flag on each dst.
-			 * Slight hack.  We really should set SSA flag on
-			 * every dst register in the frontend.
-			 */
-			ir3_reg_create(split, 0, IR3_REG_SSA);
-			/* and src reg used to hold original instr */
-			ir3_reg_create(split, 0, IR3_REG_SSA)->instr = instr;
-			if (prev) {
-				split->cp.left = prev;
-				split->cp.left_cnt++;
-				prev->cp.right = split;
-				prev->cp.right_cnt++;
-			}
-			if ((wrmask & (1 << i)) && !ctx->atomic)
-				ssa_dst(ctx, split, dst, chan+i);
-			prev = split;
-		}
-	}
-
-	return reg;
-}
-
-static struct ir3_register *
-add_dst_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
-		const struct tgsi_dst_register *dst, unsigned chan)
-{
-	return add_dst_reg_wrmask(ctx, instr, dst, chan, 0x1);
-}
-
-static struct ir3_register *
-add_src_reg_wrmask(struct ir3_compile_context *ctx,
-		struct ir3_instruction *instr, const struct tgsi_src_register *src,
-		unsigned chan, unsigned wrmask)
-{
-	unsigned flags = 0, num = 0;
-	struct ir3_register *reg;
-
-	switch (src->File) {
-	case TGSI_FILE_IMMEDIATE:
-		/* TODO if possible, use actual immediate instead of const.. but
-		 * TGSI has vec4 immediates, we can only embed scalar (of limited
-		 * size, depending on instruction..)
-		 */
-		flags |= IR3_REG_CONST;
-		num = src->Index + ctx->so->first_immediate;
-		break;
-	case TGSI_FILE_CONSTANT:
-		flags |= IR3_REG_CONST;
-		num = src->Index;
-		break;
-	case TGSI_FILE_OUTPUT:
-		/* NOTE: we should only end up w/ OUTPUT file for things like
-		 * clamp()'ing saturated dst instructions
-		 */
-	case TGSI_FILE_INPUT:
-	case TGSI_FILE_TEMPORARY:
-	case TGSI_FILE_SYSTEM_VALUE:
-		/* uses SSA */
-		break;
-	default:
-		compile_error(ctx, "unsupported src register file: %s\n",
-			tgsi_file_name(src->File));
-		break;
-	}
-
-	/* We seem to have 8 bits (6.2) for dst register always, so I think
-	 * it is safe to assume GPR cannot be >=64
-	 *
-	 * cat3 instructions only have 8 bits for src2, but cannot take a
-	 * const for src2
-	 *
-	 * cat5 and cat6 in some cases only has 8 bits, but cannot take a
-	 * const for any src.
-	 *
-	 * Other than that we seem to have 12 bits to encode const src,
-	 * except for cat1 which may only have 11 bits (but that seems like
-	 * a bug)
-	 */
-	if (flags & IR3_REG_CONST)
-		compile_assert(ctx, src->Index < (1 << 9));
-	else
-		compile_assert(ctx, src->Index < (1 << 6));
-
-	/* NOTE: abs/neg modifiers in tgsi only apply to float */
-	if (src->Absolute)
-		flags |= IR3_REG_FABS;
-	if (src->Negate)
-		flags |= IR3_REG_FNEG;
-
-	if (src->Indirect) {
-		flags |= IR3_REG_RELATIV;
-
-		/* shouldn't happen, and we can't cope with it below: */
-		compile_assert(ctx, wrmask == 0x1);
-
-		compile_assert(ctx, ctx->block->address);
-		if (instr->address)
-			compile_assert(ctx, ctx->block->address == instr->address);
-
-		instr->address = ctx->block->address;
-		array_insert(ctx->ir->indirects, instr);
-	}
-
-	reg = ir3_reg_create(instr, regid(num, chan), flags);
-	reg->wrmask = wrmask;
-
-	if (wrmask == 0x1) {
-		/* normal case */
-		ssa_src(ctx, reg, src, chan);
-	} else if ((src->File == TGSI_FILE_TEMPORARY) ||
-			(src->File == TGSI_FILE_OUTPUT) ||
-			(src->File == TGSI_FILE_INPUT)) {
-		struct ir3_instruction *collect;
-		unsigned i;
-
-		compile_assert(ctx, !src->Indirect);
-
-		/* if instruction reads multiple, we need to create
-		 * some place-holder collect the registers:
-		 */
-		collect = ir3_instr_create(ctx->block, -1, OPC_META_FI);
-		ir3_reg_create(collect, 0, 0);   /* unused dst reg */
-
-		for (i = 0; i < 4; i++) {
-			if (wrmask & (1 << i)) {
-				/* and src reg used point to the original instr */
-				ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
-						src, chan + i);
-			} else if (wrmask & ~((i << i) - 1)) {
-				/* if any remaining components, then dummy
-				 * placeholder src reg to fill in the blanks:
-				 */
-				ir3_reg_create(collect, 0, 0);
-			}
-		}
-
-		reg->flags |= IR3_REG_SSA;
-		reg->instr = collect;
-	}
-
-	return reg;
-}
-
-static struct ir3_register *
-add_src_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
-		const struct tgsi_src_register *src, unsigned chan)
-{
-	return add_src_reg_wrmask(ctx, instr, src, chan, 0x1);
-}
-
-static void
-src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
-{
-	src->File      = dst->File;
-	src->Indirect  = dst->Indirect;
-	src->Dimension = dst->Dimension;
-	src->Index     = dst->Index;
-	src->Absolute  = 0;
-	src->Negate    = 0;
-	src->SwizzleX  = TGSI_SWIZZLE_X;
-	src->SwizzleY  = TGSI_SWIZZLE_Y;
-	src->SwizzleZ  = TGSI_SWIZZLE_Z;
-	src->SwizzleW  = TGSI_SWIZZLE_W;
-}
-
-/* Get internal-temp src/dst to use for a sequence of instructions
- * generated by a single TGSI op.
- */
-static struct tgsi_src_register *
-get_internal_temp(struct ir3_compile_context *ctx,
-		struct tgsi_dst_register *tmp_dst)
-{
-	struct tgsi_src_register *tmp_src;
-	int n;
-
-	tmp_dst->File      = TGSI_FILE_TEMPORARY;
-	tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
-	tmp_dst->Indirect  = 0;
-	tmp_dst->Dimension = 0;
-
-	/* assign next temporary: */
-	n = ctx->num_internal_temps++;
-	compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
-	tmp_src = &ctx->internal_temps[n];
-
-	tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
-
-	src_from_dst(tmp_src, tmp_dst);
-
-	return tmp_src;
-}
-
-static inline bool
-is_const(struct tgsi_src_register *src)
-{
-	return (src->File == TGSI_FILE_CONSTANT) ||
-			(src->File == TGSI_FILE_IMMEDIATE);
-}
-
-static inline bool
-is_relative(struct tgsi_src_register *src)
-{
-	return src->Indirect;
-}
-
-static inline bool
-is_rel_or_const(struct tgsi_src_register *src)
-{
-	return is_relative(src) || is_const(src);
-}
-
-static type_t
-get_ftype(struct ir3_compile_context *ctx)
-{
-	return TYPE_F32;
-}
-
-static type_t
-get_utype(struct ir3_compile_context *ctx)
-{
-	return TYPE_U32;
-}
-
-static type_t
-get_stype(struct ir3_compile_context *ctx)
-{
-	return TYPE_S32;
-}
-
-static unsigned
-src_swiz(struct tgsi_src_register *src, int chan)
-{
-	switch (chan) {
-	case 0: return src->SwizzleX;
-	case 1: return src->SwizzleY;
-	case 2: return src->SwizzleZ;
-	case 3: return src->SwizzleW;
-	}
-	assert(0);
-	return 0;
-}
-
-/* for instructions that cannot take a const register as src, if needed
- * generate a move to temporary gpr:
- */
-static struct tgsi_src_register *
-get_unconst(struct ir3_compile_context *ctx, struct tgsi_src_register *src)
-{
-	struct tgsi_dst_register tmp_dst;
-	struct tgsi_src_register *tmp_src;
-
-	compile_assert(ctx, is_rel_or_const(src));
-
-	tmp_src = get_internal_temp(ctx, &tmp_dst);
-
-	create_mov(ctx, &tmp_dst, src);
-
-	return tmp_src;
-}
-
-static void
-get_immediate(struct ir3_compile_context *ctx,
-		struct tgsi_src_register *reg, uint32_t val)
+struct ir3_compiler * ir3_compiler_create(uint32_t gpu_id)
 {
-	unsigned neg, swiz, idx, i;
-	/* actually maps 1:1 currently.. not sure if that is safe to rely on: */
-	static const unsigned swiz2tgsi[] = {
-			TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
-	};
-
-	for (i = 0; i < ctx->immediate_idx; i++) {
-		swiz = i % 4;
-		idx  = i / 4;
-
-		if (ctx->so->immediates[idx].val[swiz] == val) {
-			neg = 0;
-			break;
-		}
-
-		if (ctx->so->immediates[idx].val[swiz] == -val) {
-			neg = 1;
-			break;
-		}
-	}
-
-	if (i == ctx->immediate_idx) {
-		/* need to generate a new immediate: */
-		swiz = i % 4;
-		idx  = i / 4;
-		neg  = 0;
-		ctx->so->immediates[idx].val[swiz] = val;
-		ctx->so->immediates_count = idx + 1;
-		ctx->immediate_idx++;
-	}
-
-	reg->File      = TGSI_FILE_IMMEDIATE;
-	reg->Indirect  = 0;
-	reg->Dimension = 0;
-	reg->Index     = idx;
-	reg->Absolute  = 0;
-	reg->Negate    = neg;
-	reg->SwizzleX  = swiz2tgsi[swiz];
-	reg->SwizzleY  = swiz2tgsi[swiz];
-	reg->SwizzleZ  = swiz2tgsi[swiz];
-	reg->SwizzleW  = swiz2tgsi[swiz];
-}
-
-static void
-create_mov(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst,
-		struct tgsi_src_register *src)
-{
-	type_t type_mov = get_ftype(ctx);
-	unsigned i;
-
-	for (i = 0; i < 4; i++) {
-		/* move to destination: */
-		if (dst->WriteMask & (1 << i)) {
-			struct ir3_instruction *instr;
-
-			if (src->Absolute || src->Negate) {
-				/* can't have abs or neg on a mov instr, so use
-				 * absneg.f instead to handle these cases:
-				 */
-				instr = instr_create(ctx, 2, OPC_ABSNEG_F);
-			} else {
-				instr = instr_create(ctx, 1, 0);
-				instr->cat1.src_type = type_mov;
-				instr->cat1.dst_type = type_mov;
-			}
-
-			add_dst_reg(ctx, instr, dst, i);
-			add_src_reg(ctx, instr, src, src_swiz(src, i));
-		}
-	}
-}
-
-static void
-create_clamp(struct ir3_compile_context *ctx,
-		struct tgsi_dst_register *dst, struct tgsi_src_register *val,
-		struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
-{
-	struct ir3_instruction *instr;
-
-	instr = instr_create(ctx, 2, OPC_MAX_F);
-	vectorize(ctx, instr, dst, 2, val, 0, minval, 0);
-
-	instr = instr_create(ctx, 2, OPC_MIN_F);
-	vectorize(ctx, instr, dst, 2, val, 0, maxval, 0);
+	struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler);
+	compiler->gpu_id = gpu_id;
+	compiler->set = ir3_ra_alloc_reg_set(compiler);
+	return compiler;
 }
 
-static void
-create_clamp_imm(struct ir3_compile_context *ctx,
-		struct tgsi_dst_register *dst,
-		uint32_t minval, uint32_t maxval)
-{
-	struct tgsi_src_register minconst, maxconst;
-	struct tgsi_src_register src;
-
-	src_from_dst(&src, dst);
-
-	get_immediate(ctx, &minconst, minval);
-	get_immediate(ctx, &maxconst, maxval);
-
-	create_clamp(ctx, dst, &src, &minconst, &maxconst);
-}
-
-static struct tgsi_dst_register *
-get_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst)
-{
-	struct tgsi_dst_register *dst = &inst->Dst[0].Register;
-	unsigned i;
-
-	compile_assert(ctx, !ctx->using_tmp_dst);
-	ctx->using_tmp_dst = true;
-
-	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-		struct tgsi_src_register *src = &inst->Src[i].Register;
-		if ((src->File == dst->File) && (src->Index == dst->Index)) {
-			if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) &&
-					(src->SwizzleX == TGSI_SWIZZLE_X) &&
-					(src->SwizzleY == TGSI_SWIZZLE_Y) &&
-					(src->SwizzleZ == TGSI_SWIZZLE_Z) &&
-					(src->SwizzleW == TGSI_SWIZZLE_W))
-				continue;
-			ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
-			ctx->tmp_dst.WriteMask = dst->WriteMask;
-			dst = &ctx->tmp_dst;
-			break;
-		}
-	}
-	return dst;
-}
-
-static void
-put_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst,
-		struct tgsi_dst_register *dst)
-{
-	compile_assert(ctx, ctx->using_tmp_dst);
-	ctx->using_tmp_dst = false;
-
-	/* if necessary, add mov back into original dst: */
-	if (dst != &inst->Dst[0].Register) {
-		create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
-	}
-}
-
-/* helper to generate the necessary repeat and/or additional instructions
- * to turn a scalar instruction into a vector operation:
- */
-static void
-vectorize(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
-		struct tgsi_dst_register *dst, int nsrcs, ...)
+void ir3_compiler_destroy(struct ir3_compiler *compiler)
 {
-	va_list ap;
-	int i, j, n = 0;
-
-	instr_atomic_start(ctx);
-
-	for (i = 0; i < 4; i++) {
-		if (dst->WriteMask & (1 << i)) {
-			struct ir3_instruction *cur;
-
-			if (n++ == 0) {
-				cur = instr;
-			} else {
-				cur = instr_create(ctx, instr->category, instr->opc);
-				memcpy(cur->info, instr->info, sizeof(cur->info));
-			}
-
-			add_dst_reg(ctx, cur, dst, i);
-
-			va_start(ap, nsrcs);
-			for (j = 0; j < nsrcs; j++) {
-				struct tgsi_src_register *src =
-						va_arg(ap, struct tgsi_src_register *);
-				unsigned flags = va_arg(ap, unsigned);
-				struct ir3_register *reg;
-				if (flags & IR3_REG_IMMED) {
-					reg = ir3_reg_create(cur, 0, IR3_REG_IMMED);
-					/* this is an ugly cast.. should have put flags first! */
-					reg->iim_val = *(int *)&src;
-				} else {
-					reg = add_src_reg(ctx, cur, src, src_swiz(src, i));
-				}
-				reg->flags |= flags & ~(IR3_REG_FNEG | IR3_REG_SNEG);
-				if (flags & IR3_REG_FNEG)
-					reg->flags ^= IR3_REG_FNEG;
-				if (flags & IR3_REG_SNEG)
-					reg->flags ^= IR3_REG_SNEG;
-			}
-			va_end(ap);
-		}
-	}
-
-	instr_atomic_end(ctx);
-}
-
-/*
- * Handlers for TGSI instructions which do not have a 1:1 mapping to
- * native instructions:
- */
-
-static void
-trans_clamp(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-	struct tgsi_src_register *src0 = &inst->Src[0].Register;
-	struct tgsi_src_register *src1 = &inst->Src[1].Register;
-	struct tgsi_src_register *src2 = &inst->Src[2].Register;
-
-	create_clamp(ctx, dst, src0, src1, src2);
-
-	put_dst(ctx, inst, dst);
-}
-
-/* ARL(x) = x, but mova from hrN.x to a0.. */
-static void
-trans_arl(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr;
-	struct tgsi_dst_register tmp_dst;
-	struct tgsi_src_register *tmp_src;
-	struct tgsi_dst_register *dst = &inst->Dst[0].Register;
-	struct tgsi_src_register *src = &inst->Src[0].Register;
-	unsigned chan = src->SwizzleX;
-
-	compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS);
-
-	/* NOTE: we allocate a temporary from a flat register
-	 * namespace (ignoring half vs full).  It turns out
-	 * not to really matter since registers get reassigned
-	 * later in ir3_ra which (hopefully!) can deal a bit
-	 * better with mixed half and full precision.
-	 */
-	tmp_src = get_internal_temp(ctx, &tmp_dst);
-
-	/* cov.{u,f}{32,16}s16 Rtmp, Rsrc */
-	instr = instr_create(ctx, 1, 0);
-	instr->cat1.src_type = (t->tgsi_opc == TGSI_OPCODE_ARL) ?
-			get_ftype(ctx) : get_utype(ctx);
-	instr->cat1.dst_type = TYPE_S16;
-	add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
-	add_src_reg(ctx, instr, src, chan);
-
-	/* shl.b Rtmp, Rtmp, 2 */
-	instr = instr_create(ctx, 2, OPC_SHL_B);
-	add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
-	add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
-	ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
-
-	/* mova a0, Rtmp */
-	instr = instr_create(ctx, 1, 0);
-	instr->cat1.src_type = TYPE_S16;
-	instr->cat1.dst_type = TYPE_S16;
-	add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF;
-	add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
-}
-
-/*
- * texture fetch/sample instructions:
- */
-
-struct tex_info {
-	int8_t order[4];
-	int8_t args;
-	unsigned src_wrmask, flags;
-};
-
-struct target_info {
-	uint8_t dims;
-	uint8_t cube;
-	uint8_t array;
-	uint8_t shadow;
-};
-
-static const struct target_info tex_targets[] = {
-	[TGSI_TEXTURE_1D]               = { 1, 0, 0, 0 },
-	[TGSI_TEXTURE_2D]               = { 2, 0, 0, 0 },
-	[TGSI_TEXTURE_3D]               = { 3, 0, 0, 0 },
-	[TGSI_TEXTURE_CUBE]             = { 3, 1, 0, 0 },
-	[TGSI_TEXTURE_RECT]             = { 2, 0, 0, 0 },
-	[TGSI_TEXTURE_SHADOW1D]         = { 1, 0, 0, 1 },
-	[TGSI_TEXTURE_SHADOW2D]         = { 2, 0, 0, 1 },
-	[TGSI_TEXTURE_SHADOWRECT]       = { 2, 0, 0, 1 },
-	[TGSI_TEXTURE_1D_ARRAY]         = { 1, 0, 1, 0 },
-	[TGSI_TEXTURE_2D_ARRAY]         = { 2, 0, 1, 0 },
-	[TGSI_TEXTURE_SHADOW1D_ARRAY]   = { 1, 0, 1, 1 },
-	[TGSI_TEXTURE_SHADOW2D_ARRAY]   = { 2, 0, 1, 1 },
-	[TGSI_TEXTURE_SHADOWCUBE]       = { 3, 1, 0, 1 },
-	[TGSI_TEXTURE_2D_MSAA]          = { 2, 0, 0, 0 },
-	[TGSI_TEXTURE_2D_ARRAY_MSAA]    = { 2, 0, 1, 0 },
-	[TGSI_TEXTURE_CUBE_ARRAY]       = { 3, 1, 1, 0 },
-	[TGSI_TEXTURE_SHADOWCUBE_ARRAY] = { 3, 1, 1, 1 },
-};
-
-static void
-fill_tex_info(struct ir3_compile_context *ctx,
-			  struct tgsi_full_instruction *inst,
-			  struct tex_info *info)
-{
-	const struct target_info *tgt = &tex_targets[inst->Texture.Texture];
-
-	if (tgt->dims == 3)
-		info->flags |= IR3_INSTR_3D;
-	if (tgt->array)
-		info->flags |= IR3_INSTR_A;
-	if (tgt->shadow)
-		info->flags |= IR3_INSTR_S;
-
-	switch (inst->Instruction.Opcode) {
-	case TGSI_OPCODE_TXB:
-	case TGSI_OPCODE_TXB2:
-	case TGSI_OPCODE_TXL:
-	case TGSI_OPCODE_TXF:
-		info->args = 2;
-		break;
-	case TGSI_OPCODE_TXP:
-		info->flags |= IR3_INSTR_P;
-		/* fallthrough */
-	case TGSI_OPCODE_TEX:
-	case TGSI_OPCODE_TXD:
-		info->args = 1;
-		break;
-	}
-
-	/*
-	 * lay out the first argument in the proper order:
-	 *  - actual coordinates first
-	 *  - shadow reference
-	 *  - array index
-	 *  - projection w
-	 *
-	 * bias/lod go into the second arg
-	 */
-	int arg, pos = 0;
-	for (arg = 0; arg < tgt->dims; arg++)
-		info->order[arg] = pos++;
-	if (tgt->dims == 1)
-		info->order[pos++] = -1;
-	if (tgt->shadow)
-		info->order[pos++] = MAX2(arg + tgt->array, 2);
-	if (tgt->array)
-		info->order[pos++] = arg++;
-	if (info->flags & IR3_INSTR_P)
-		info->order[pos++] = 3;
-
-	info->src_wrmask = (1 << pos) - 1;
-
-	for (; pos < 4; pos++)
-		info->order[pos] = -1;
-
-	assert(pos <= 4);
-}
-
-static bool check_swiz(struct tgsi_src_register *src, const int8_t order[4])
-{
-	unsigned i;
-	for (i = 1; (i < 4) && order[i] >= 0; i++)
-		if (src_swiz(src, i) != (src_swiz(src, 0) + order[i]))
-			return false;
-	return true;
-}
-
-static bool is_1d(unsigned tex)
-{
-	return tex_targets[tex].dims == 1;
-}
-
-static struct tgsi_src_register *
-get_tex_coord(struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst,
-		const struct tex_info *tinf)
-{
-	struct tgsi_src_register *coord = &inst->Src[0].Register;
-	struct ir3_instruction *instr;
-	unsigned tex = inst->Texture.Texture;
-	struct tgsi_dst_register tmp_dst;
-	struct tgsi_src_register *tmp_src;
-	type_t type_mov = get_ftype(ctx);
-	unsigned j;
-
-	/* need to move things around: */
-	tmp_src = get_internal_temp(ctx, &tmp_dst);
-
-	for (j = 0; j < 4; j++) {
-		if (tinf->order[j] < 0)
-			continue;
-		instr = instr_create(ctx, 1, 0);  /* mov */
-		instr->cat1.src_type = type_mov;
-		instr->cat1.dst_type = type_mov;
-		add_dst_reg(ctx, instr, &tmp_dst, j);
-		add_src_reg(ctx, instr, coord,
-				src_swiz(coord, tinf->order[j]));
-	}
-
-	/* fix up .y coord: */
-	if (is_1d(tex)) {
-		struct ir3_register *imm;
-		instr = instr_create(ctx, 1, 0);  /* mov */
-		instr->cat1.src_type = type_mov;
-		instr->cat1.dst_type = type_mov;
-		add_dst_reg(ctx, instr, &tmp_dst, 1);  /* .y */
-		imm = ir3_reg_create(instr, 0, IR3_REG_IMMED);
-		if (inst->Instruction.Opcode == TGSI_OPCODE_TXF)
-			imm->iim_val = 0;
-		else
-			imm->fim_val = 0.5;
-	}
-
-	return tmp_src;
-}
-
-static void
-trans_samp(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr, *collect;
-	struct ir3_register *reg;
-	struct tgsi_dst_register *dst = &inst->Dst[0].Register;
-	struct tgsi_src_register *orig, *coord, *samp, *offset, *dpdx, *dpdy;
-	struct tgsi_src_register zero;
-	const struct target_info *tgt = &tex_targets[inst->Texture.Texture];
-	struct tex_info tinf;
-	int i;
-
-	memset(&tinf, 0, sizeof(tinf));
-	fill_tex_info(ctx, inst, &tinf);
-	coord = get_tex_coord(ctx, inst, &tinf);
-	get_immediate(ctx, &zero, 0);
-
-	switch (inst->Instruction.Opcode) {
-	case TGSI_OPCODE_TXB2:
-		orig = &inst->Src[1].Register;
-		samp = &inst->Src[2].Register;
-		break;
-	case TGSI_OPCODE_TXD:
-		orig = &inst->Src[0].Register;
-		dpdx = &inst->Src[1].Register;
-		dpdy = &inst->Src[2].Register;
-		samp = &inst->Src[3].Register;
-		if (is_rel_or_const(dpdx))
-				dpdx = get_unconst(ctx, dpdx);
-		if (is_rel_or_const(dpdy))
-				dpdy = get_unconst(ctx, dpdy);
-		break;
-	default:
-		orig = &inst->Src[0].Register;
-		samp = &inst->Src[1].Register;
-		break;
-	}
-	if (tinf.args > 1 && is_rel_or_const(orig))
-		orig = get_unconst(ctx, orig);
-
-	/* scale up integer coords for TXF based on the LOD */
-	if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
-		struct tgsi_dst_register tmp_dst;
-		struct tgsi_src_register *tmp_src;
-		type_t type_mov = get_utype(ctx);
-
-		tmp_src = get_internal_temp(ctx, &tmp_dst);
-		for (i = 0; i < tgt->dims; i++) {
-			instr = instr_create(ctx, 2, OPC_SHL_B);
-			add_dst_reg(ctx, instr, &tmp_dst, i);
-			add_src_reg(ctx, instr, coord, src_swiz(coord, i));
-			add_src_reg(ctx, instr, orig, orig->SwizzleW);
-		}
-		if (tgt->dims < 2) {
-			instr = instr_create(ctx, 1, 0);
-			instr->cat1.src_type = type_mov;
-			instr->cat1.dst_type = type_mov;
-			add_dst_reg(ctx, instr, &tmp_dst, i);
-			add_src_reg(ctx, instr, &zero, zero.SwizzleX);
-			i++;
-		}
-		if (tgt->array) {
-			instr = instr_create(ctx, 1, 0);
-			instr->cat1.src_type = type_mov;
-			instr->cat1.dst_type = type_mov;
-			add_dst_reg(ctx, instr, &tmp_dst, i);
-			add_src_reg(ctx, instr, coord, src_swiz(coord, i));
-		}
-		coord = tmp_src;
-	}
-
-	if (inst->Texture.NumOffsets) {
-		struct tgsi_texture_offset *tex_offset = &inst->TexOffsets[0];
-		struct tgsi_src_register offset_src = {0};
-
-		offset_src.File = tex_offset->File;
-		offset_src.Index = tex_offset->Index;
-		offset_src.SwizzleX = tex_offset->SwizzleX;
-		offset_src.SwizzleY = tex_offset->SwizzleY;
-		offset_src.SwizzleZ = tex_offset->SwizzleZ;
-		offset = get_unconst(ctx, &offset_src);
-		tinf.flags |= IR3_INSTR_O;
-	}
-
-	instr = instr_create(ctx, 5, t->opc);
-	if (ctx->integer_s & (1 << samp->Index))
-		instr->cat5.type = get_utype(ctx);
-	else
-		instr->cat5.type = get_ftype(ctx);
-	instr->cat5.samp = samp->Index;
-	instr->cat5.tex  = samp->Index;
-	instr->flags |= tinf.flags;
-
-	add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask);
-
-	reg = ir3_reg_create(instr, 0, IR3_REG_SSA);
-
-	collect = ir3_instr_create2(ctx->block, -1, OPC_META_FI, 12);
-	ir3_reg_create(collect, 0, 0);
-	for (i = 0; i < 4; i++) {
-		if (tinf.src_wrmask & (1 << i))
-			ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
-					coord, src_swiz(coord, i));
-		else if (tinf.src_wrmask & ~((1 << i) - 1))
-			ir3_reg_create(collect, 0, 0);
-	}
-
-	/* Attach derivatives onto the end of the fan-in. Derivatives start after
-	 * the 4th argument, so make sure that fi is padded up to 4 first.
-	 */
-	if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
-		while (collect->regs_count < 5)
-			ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
-					&zero, zero.SwizzleX);
-		for (i = 0; i < tgt->dims; i++)
-			ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), dpdx, i);
-		if (tgt->dims < 2)
-			ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
-					&zero, zero.SwizzleX);
-		for (i = 0; i < tgt->dims; i++)
-			ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), dpdy, i);
-		if (tgt->dims < 2)
-			ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
-					&zero, zero.SwizzleX);
-		tinf.src_wrmask |= ((1 << (2 * MAX2(tgt->dims, 2))) - 1) << 4;
-	}
-
-	reg->instr = collect;
-	reg->wrmask = tinf.src_wrmask;
-
-	/* The second argument contains the offsets, followed by the lod/bias
-	 * argument. This is constructed more manually due to the dynamic nature.
-	 */
-	if (inst->Texture.NumOffsets == 0 && tinf.args == 1)
-		return;
-
-	reg = ir3_reg_create(instr, 0, IR3_REG_SSA);
-
-	collect = ir3_instr_create2(ctx->block, -1, OPC_META_FI, 5);
-	ir3_reg_create(collect, 0, 0);
-
-	if (inst->Texture.NumOffsets) {
-		for (i = 0; i < tgt->dims; i++)
-			ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
-					offset, i);
-		if (tgt->dims < 2)
-			ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
-					&zero, zero.SwizzleX);
-	}
-	if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2)
-		ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
-				orig, orig->SwizzleX);
-	else if (tinf.args > 1)
-		ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
-				orig, orig->SwizzleW);
-
-	reg->instr = collect;
-	reg->wrmask = (1 << (collect->regs_count - 1)) - 1;
-}
-
-static void
-trans_txq(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr;
-	struct tgsi_dst_register *dst = &inst->Dst[0].Register;
-	struct tgsi_src_register *level = &inst->Src[0].Register;
-	struct tgsi_src_register *samp = &inst->Src[1].Register;
-	const struct target_info *tgt = &tex_targets[inst->Texture.Texture];
-	struct tex_info tinf;
-
-	memset(&tinf, 0, sizeof(tinf));
-	fill_tex_info(ctx, inst, &tinf);
-	if (is_rel_or_const(level))
-		level = get_unconst(ctx, level);
-
-	instr = instr_create(ctx, 5, OPC_GETSIZE);
-	instr->cat5.type = get_utype(ctx);
-	instr->cat5.samp = samp->Index;
-	instr->cat5.tex  = samp->Index;
-	instr->flags |= tinf.flags;
-
-	if (tgt->array && (dst->WriteMask & (1 << tgt->dims))) {
-		/* Array size actually ends up in .w rather than .z. This doesn't
-		 * matter for miplevel 0, but for higher mips the value in z is
-		 * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is
-		 * returned, which means that we have to add 1 to it for arrays.
-		 */
-		struct tgsi_dst_register tmp_dst;
-		struct tgsi_src_register *tmp_src;
-		type_t type_mov = get_utype(ctx);
-
-		tmp_src = get_internal_temp(ctx, &tmp_dst);
-		add_dst_reg_wrmask(ctx, instr, &tmp_dst, 0,
-						   dst->WriteMask | TGSI_WRITEMASK_W);
-		add_src_reg_wrmask(ctx, instr, level, level->SwizzleX, 0x1);
-
-		if (dst->WriteMask & TGSI_WRITEMASK_X) {
-			instr = instr_create(ctx, 1, 0);
-			instr->cat1.src_type = type_mov;
-			instr->cat1.dst_type = type_mov;
-			add_dst_reg(ctx, instr, dst, 0);
-			add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 0));
-		}
-
-		if (tgt->dims == 2) {
-			if (dst->WriteMask & TGSI_WRITEMASK_Y) {
-				instr = instr_create(ctx, 1, 0);
-				instr->cat1.src_type = type_mov;
-				instr->cat1.dst_type = type_mov;
-				add_dst_reg(ctx, instr, dst, 1);
-				add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 1));
-			}
-		}
-
-		instr = instr_create(ctx, 2, OPC_ADD_U);
-		add_dst_reg(ctx, instr, dst, tgt->dims);
-		add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 3));
-		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
-	} else {
-		add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask);
-		add_src_reg_wrmask(ctx, instr, level, level->SwizzleX, 0x1);
-	}
-
-	if (dst->WriteMask & TGSI_WRITEMASK_W) {
-		/* The # of levels comes from getinfo.z. We need to add 1 to it, since
-		 * the value in TEX_CONST_0 is zero-based.
-		 */
-		struct tgsi_dst_register tmp_dst;
-		struct tgsi_src_register *tmp_src;
-
-		tmp_src = get_internal_temp(ctx, &tmp_dst);
-		instr = instr_create(ctx, 5, OPC_GETINFO);
-		instr->cat5.type = get_utype(ctx);
-		instr->cat5.samp = samp->Index;
-		instr->cat5.tex  = samp->Index;
-		add_dst_reg_wrmask(ctx, instr, &tmp_dst, 0, TGSI_WRITEMASK_Z);
-
-		instr = instr_create(ctx, 2, OPC_ADD_U);
-		add_dst_reg(ctx, instr, dst, 3);
-		add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 2));
-		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
-	}
-}
-
-/* DDX/DDY */
-static void
-trans_deriv(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr;
-	struct tgsi_dst_register *dst = &inst->Dst[0].Register;
-	struct tgsi_src_register *src = &inst->Src[0].Register;
-	static const int8_t order[4] = {0, 1, 2, 3};
-
-	if (!check_swiz(src, order)) {
-		struct tgsi_dst_register tmp_dst;
-		struct tgsi_src_register *tmp_src;
-
-		tmp_src = get_internal_temp(ctx, &tmp_dst);
-		create_mov(ctx, &tmp_dst, src);
-
-		src = tmp_src;
-	}
-
-	/* This might be a workaround for hw bug?  Blob compiler always
-	 * seems to work two components at a time for dsy/dsx.  It does
-	 * actually seem to work in some cases (or at least some piglit
-	 * tests) for four components at a time.  But seems more reliable
-	 * to split this into two instructions like the blob compiler
-	 * does:
-	 */
-
-	instr = instr_create(ctx, 5, t->opc);
-	instr->cat5.type = get_ftype(ctx);
-	add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask & 0x3);
-	add_src_reg_wrmask(ctx, instr, src, 0, dst->WriteMask & 0x3);
-
-	instr = instr_create(ctx, 5, t->opc);
-	instr->cat5.type = get_ftype(ctx);
-	add_dst_reg_wrmask(ctx, instr, dst, 2, (dst->WriteMask >> 2) & 0x3);
-	add_src_reg_wrmask(ctx, instr, src, 2, (dst->WriteMask >> 2) & 0x3);
-}
-
-/*
- * SEQ(a,b) = (a == b) ? 1.0 : 0.0
- *   cmps.f.eq tmp0, a, b
- *   cov.u16f16 dst, tmp0
- *
- * SNE(a,b) = (a != b) ? 1.0 : 0.0
- *   cmps.f.ne tmp0, a, b
- *   cov.u16f16 dst, tmp0
- *
- * SGE(a,b) = (a >= b) ? 1.0 : 0.0
- *   cmps.f.ge tmp0, a, b
- *   cov.u16f16 dst, tmp0
- *
- * SLE(a,b) = (a <= b) ? 1.0 : 0.0
- *   cmps.f.le tmp0, a, b
- *   cov.u16f16 dst, tmp0
- *
- * SGT(a,b) = (a > b)  ? 1.0 : 0.0
- *   cmps.f.gt tmp0, a, b
- *   cov.u16f16 dst, tmp0
- *
- * SLT(a,b) = (a < b)  ? 1.0 : 0.0
- *   cmps.f.lt tmp0, a, b
- *   cov.u16f16 dst, tmp0
- *
- * CMP(a,b,c) = (a < 0.0) ? b : c
- *   cmps.f.lt tmp0, a, {0.0}
- *   sel.b16 dst, b, tmp0, c
- */
-static void
-trans_cmp(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr;
-	struct tgsi_dst_register tmp_dst;
-	struct tgsi_src_register *tmp_src;
-	struct tgsi_src_register constval0;
-	/* final instruction for CMP() uses orig src1 and src2: */
-	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-	struct tgsi_src_register *a0, *a1, *a2;
-	unsigned condition;
-
-	tmp_src = get_internal_temp(ctx, &tmp_dst);
-
-	a0 = &inst->Src[0].Register;  /* a */
-	a1 = &inst->Src[1].Register;  /* b */
-
-	switch (t->tgsi_opc) {
-	case TGSI_OPCODE_SEQ:
-	case TGSI_OPCODE_FSEQ:
-		condition = IR3_COND_EQ;
-		break;
-	case TGSI_OPCODE_SNE:
-	case TGSI_OPCODE_FSNE:
-		condition = IR3_COND_NE;
-		break;
-	case TGSI_OPCODE_SGE:
-	case TGSI_OPCODE_FSGE:
-		condition = IR3_COND_GE;
-		break;
-	case TGSI_OPCODE_SLT:
-	case TGSI_OPCODE_FSLT:
-		condition = IR3_COND_LT;
-		break;
-	case TGSI_OPCODE_SLE:
-		condition = IR3_COND_LE;
-		break;
-	case TGSI_OPCODE_SGT:
-		condition = IR3_COND_GT;
-		break;
-	case TGSI_OPCODE_CMP:
-		get_immediate(ctx, &constval0, fui(0.0));
-		a0 = &inst->Src[0].Register;  /* a */
-		a1 = &constval0;              /* {0.0} */
-		condition = IR3_COND_LT;
-		break;
-	default:
-		compile_assert(ctx, 0);
-		return;
-	}
-
-	if (is_const(a0) && is_const(a1))
-		a0 = get_unconst(ctx, a0);
-
-	/* cmps.f.<cond> tmp, a0, a1 */
-	instr = instr_create(ctx, 2, OPC_CMPS_F);
-	instr->cat2.condition = condition;
-	vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
-
-	switch (t->tgsi_opc) {
-	case TGSI_OPCODE_SEQ:
-	case TGSI_OPCODE_SGE:
-	case TGSI_OPCODE_SLE:
-	case TGSI_OPCODE_SNE:
-	case TGSI_OPCODE_SGT:
-	case TGSI_OPCODE_SLT:
-		/* cov.u16f16 dst, tmp0 */
-		instr = instr_create(ctx, 1, 0);
-		instr->cat1.src_type = get_utype(ctx);
-		instr->cat1.dst_type = get_ftype(ctx);
-		vectorize(ctx, instr, dst, 1, tmp_src, 0);
-		break;
-	case TGSI_OPCODE_FSEQ:
-	case TGSI_OPCODE_FSGE:
-	case TGSI_OPCODE_FSNE:
-	case TGSI_OPCODE_FSLT:
-		/* absneg.s dst, (neg)tmp0 */
-		instr = instr_create(ctx, 2, OPC_ABSNEG_S);
-		vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_SNEG);
-		break;
-	case TGSI_OPCODE_CMP:
-		a1 = &inst->Src[1].Register;
-		a2 = &inst->Src[2].Register;
-		/* sel.{b32,b16} dst, src2, tmp, src1 */
-		instr = instr_create(ctx, 3, OPC_SEL_B32);
-		vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0);
-
-		break;
-	}
-
-	put_dst(ctx, inst, dst);
-}
-
-/*
- * USNE(a,b) = (a != b) ? ~0 : 0
- *   cmps.u32.ne dst, a, b
- *
- * USEQ(a,b) = (a == b) ? ~0 : 0
- *   cmps.u32.eq dst, a, b
- *
- * ISGE(a,b) = (a > b) ? ~0 : 0
- *   cmps.s32.ge dst, a, b
- *
- * USGE(a,b) = (a > b) ? ~0 : 0
- *   cmps.u32.ge dst, a, b
- *
- * ISLT(a,b) = (a < b) ? ~0 : 0
- *   cmps.s32.lt dst, a, b
- *
- * USLT(a,b) = (a < b) ? ~0 : 0
- *   cmps.u32.lt dst, a, b
- *
- */
-static void
-trans_icmp(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr;
-	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-	struct tgsi_dst_register tmp_dst;
-	struct tgsi_src_register *tmp_src;
-	struct tgsi_src_register *a0, *a1;
-	unsigned condition;
-
-	a0 = &inst->Src[0].Register;  /* a */
-	a1 = &inst->Src[1].Register;  /* b */
-
-	switch (t->tgsi_opc) {
-	case TGSI_OPCODE_USNE:
-		condition = IR3_COND_NE;
-		break;
-	case TGSI_OPCODE_USEQ:
-		condition = IR3_COND_EQ;
-		break;
-	case TGSI_OPCODE_ISGE:
-	case TGSI_OPCODE_USGE:
-		condition = IR3_COND_GE;
-		break;
-	case TGSI_OPCODE_ISLT:
-	case TGSI_OPCODE_USLT:
-		condition = IR3_COND_LT;
-		break;
-
-	default:
-		compile_assert(ctx, 0);
-		return;
-	}
-
-	if (is_const(a0) && is_const(a1))
-		a0 = get_unconst(ctx, a0);
-
-	tmp_src = get_internal_temp(ctx, &tmp_dst);
-	/* cmps.{u32,s32}.<cond> tmp, a0, a1 */
-	instr = instr_create(ctx, 2, t->opc);
-	instr->cat2.condition = condition;
-	vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
-
-	/* absneg.s dst, (neg)tmp */
-	instr = instr_create(ctx, 2, OPC_ABSNEG_S);
-	vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_SNEG);
-
-	put_dst(ctx, inst, dst);
-}
-
-/*
- * UCMP(a,b,c) = a ? b : c
- *   sel.b16 dst, b, a, c
- */
-static void
-trans_ucmp(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr;
-	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-	struct tgsi_src_register *a0, *a1, *a2;
-
-	a0 = &inst->Src[0].Register;  /* a */
-	a1 = &inst->Src[1].Register;  /* b */
-	a2 = &inst->Src[2].Register;  /* c */
-
-	if (is_rel_or_const(a0))
-		a0 = get_unconst(ctx, a0);
-
-	/* sel.{b32,b16} dst, b, a, c */
-	instr = instr_create(ctx, 3, OPC_SEL_B32);
-	vectorize(ctx, instr, dst, 3, a1, 0, a0, 0, a2, 0);
-	put_dst(ctx, inst, dst);
-}
-
-/*
- * ISSG(a) = a < 0 ? -1 : a > 0 ? 1 : 0
- *   cmps.s.lt tmp_neg, a, 0  # 1 if a is negative
- *   cmps.s.gt tmp_pos, a, 0  # 1 if a is positive
- *   sub.u dst, tmp_pos, tmp_neg
- */
-static void
-trans_issg(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr;
-	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-	struct tgsi_src_register *a = &inst->Src[0].Register;
-	struct tgsi_dst_register neg_dst, pos_dst;
-	struct tgsi_src_register *neg_src, *pos_src;
-
-	neg_src = get_internal_temp(ctx, &neg_dst);
-	pos_src = get_internal_temp(ctx, &pos_dst);
-
-	/* cmps.s.lt neg, a, 0 */
-	instr = instr_create(ctx, 2, OPC_CMPS_S);
-	instr->cat2.condition = IR3_COND_LT;
-	vectorize(ctx, instr, &neg_dst, 2, a, 0, 0, IR3_REG_IMMED);
-
-	/* cmps.s.gt pos, a, 0 */
-	instr = instr_create(ctx, 2, OPC_CMPS_S);
-	instr->cat2.condition = IR3_COND_GT;
-	vectorize(ctx, instr, &pos_dst, 2, a, 0, 0, IR3_REG_IMMED);
-
-	/* sub.u dst, pos, neg */
-	instr = instr_create(ctx, 2, OPC_SUB_U);
-	vectorize(ctx, instr, dst, 2, pos_src, 0, neg_src, 0);
-
-	put_dst(ctx, inst, dst);
-}
-
-
-
-/*
- * Conditional / Flow control
- */
-
-static void
-push_branch(struct ir3_compile_context *ctx, bool inv,
-		struct ir3_instruction *instr, struct ir3_instruction *cond)
-{
-	unsigned int idx = ctx->branch_count++;
-	compile_assert(ctx, idx < ARRAY_SIZE(ctx->branch));
-	ctx->branch[idx].instr = instr;
-	ctx->branch[idx].inv = inv;
-	/* else side of branch has same condition: */
-	if (!inv)
-		ctx->branch[idx].cond = cond;
-}
-
-static struct ir3_instruction *
-pop_branch(struct ir3_compile_context *ctx)
-{
-	unsigned int idx = --ctx->branch_count;
-	return ctx->branch[idx].instr;
-}
-
-static void
-trans_if(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr, *cond;
-	struct tgsi_src_register *src = &inst->Src[0].Register;
-	struct tgsi_dst_register tmp_dst;
-	struct tgsi_src_register *tmp_src;
-	struct tgsi_src_register constval;
-
-	get_immediate(ctx, &constval, fui(0.0));
-	tmp_src = get_internal_temp(ctx, &tmp_dst);
-
-	if (is_const(src))
-		src = get_unconst(ctx, src);
-
-	/* cmps.{f,u}.ne tmp0, b, {0.0} */
-	instr = instr_create(ctx, 2, t->opc);
-	add_dst_reg(ctx, instr, &tmp_dst, 0);
-	add_src_reg(ctx, instr, src, src->SwizzleX);
-	add_src_reg(ctx, instr, &constval, constval.SwizzleX);
-	instr->cat2.condition = IR3_COND_NE;
-
-	compile_assert(ctx, instr->regs[1]->flags & IR3_REG_SSA); /* because get_unconst() */
-	cond = instr->regs[1]->instr;
-
-	/* meta:flow tmp0 */
-	instr = instr_create(ctx, -1, OPC_META_FLOW);
-	ir3_reg_create(instr, 0, 0);  /* dummy dst */
-	add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
-
-	push_branch(ctx, false, instr, cond);
-	instr->flow.if_block = push_block(ctx);
-}
-
-static void
-trans_else(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr;
-
-	pop_block(ctx);
-
-	instr = pop_branch(ctx);
-
-	compile_assert(ctx, (instr->category == -1) &&
-			(instr->opc == OPC_META_FLOW));
-
-	push_branch(ctx, true, instr, NULL);
-	instr->flow.else_block = push_block(ctx);
-}
-
-static struct ir3_instruction *
-find_temporary(struct ir3_block *block, unsigned n)
-{
-	if (block->parent && !block->temporaries[n])
-		return find_temporary(block->parent, n);
-	return block->temporaries[n];
-}
-
-static struct ir3_instruction *
-find_output(struct ir3_block *block, unsigned n)
-{
-	if (block->parent && !block->outputs[n])
-		return find_output(block->parent, n);
-	return block->outputs[n];
-}
-
-static struct ir3_instruction *
-create_phi(struct ir3_compile_context *ctx, struct ir3_instruction *cond,
-		struct ir3_instruction *a, struct ir3_instruction *b)
-{
-	struct ir3_instruction *phi;
-
-	compile_assert(ctx, cond);
-
-	/* Either side of the condition could be null..  which
-	 * indicates a variable written on only one side of the
-	 * branch.  Normally this should only be variables not
-	 * used outside of that side of the branch.  So we could
-	 * just 'return a ? a : b;' in that case.  But for better
-	 * defined undefined behavior we just stick in imm{0.0}.
-	 * In the common case of a value only used within the
-	 * one side of the branch, the PHI instruction will not
-	 * get scheduled
-	 */
-	if (!a)
-		a = create_immed(ctx, 0.0);
-	if (!b)
-		b = create_immed(ctx, 0.0);
-
-	phi = instr_create(ctx, -1, OPC_META_PHI);
-	ir3_reg_create(phi, 0, 0);  /* dummy dst */
-	ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = cond;
-	ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = a;
-	ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = b;
-
-	return phi;
-}
-
-static void
-trans_endif(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr;
-	struct ir3_block *ifb, *elseb;
-	struct ir3_instruction **ifout, **elseout;
-	unsigned i, ifnout = 0, elsenout = 0;
-
-	pop_block(ctx);
-
-	instr = pop_branch(ctx);
-
-	compile_assert(ctx, (instr->category == -1) &&
-			(instr->opc == OPC_META_FLOW));
-
-	ifb = instr->flow.if_block;
-	elseb = instr->flow.else_block;
-	/* if there is no else block, the parent block is used for the
-	 * branch-not-taken src of the PHI instructions:
-	 */
-	if (!elseb)
-		elseb = ifb->parent;
-
-	/* worst case sizes: */
-	ifnout = ifb->ntemporaries + ifb->noutputs;
-	elsenout = elseb->ntemporaries + elseb->noutputs;
-
-	ifout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * ifnout);
-	if (elseb != ifb->parent)
-		elseout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * elsenout);
-
-	ifnout = 0;
-	elsenout = 0;
-
-	/* generate PHI instructions for any temporaries written: */
-	for (i = 0; i < ifb->ntemporaries; i++) {
-		struct ir3_instruction *a = ifb->temporaries[i];
-		struct ir3_instruction *b = elseb->temporaries[i];
-
-		/* if temporary written in if-block, or if else block
-		 * is present and temporary written in else-block:
-		 */
-		if (a || ((elseb != ifb->parent) && b)) {
-			struct ir3_instruction *phi;
-
-			/* if only written on one side, find the closest
-			 * enclosing update on other side:
-			 */
-			if (!a)
-				a = find_temporary(ifb, i);
-			if (!b)
-				b = find_temporary(elseb, i);
-
-			ifout[ifnout] = a;
-			a = create_output(ifb, a, ifnout++);
-
-			if (elseb != ifb->parent) {
-				elseout[elsenout] = b;
-				b = create_output(elseb, b, elsenout++);
-			}
-
-			phi = create_phi(ctx, instr, a, b);
-			ctx->block->temporaries[i] = phi;
-		}
-	}
-
-	compile_assert(ctx, ifb->noutputs == elseb->noutputs);
-
-	/* .. and any outputs written: */
-	for (i = 0; i < ifb->noutputs; i++) {
-		struct ir3_instruction *a = ifb->outputs[i];
-		struct ir3_instruction *b = elseb->outputs[i];
-
-		/* if output written in if-block, or if else block
-		 * is present and output written in else-block:
-		 */
-		if (a || ((elseb != ifb->parent) && b)) {
-			struct ir3_instruction *phi;
-
-			/* if only written on one side, find the closest
-			 * enclosing update on other side:
-			 */
-			if (!a)
-				a = find_output(ifb, i);
-			if (!b)
-				b = find_output(elseb, i);
-
-			ifout[ifnout] = a;
-			a = create_output(ifb, a, ifnout++);
-
-			if (elseb != ifb->parent) {
-				elseout[elsenout] = b;
-				b = create_output(elseb, b, elsenout++);
-			}
-
-			phi = create_phi(ctx, instr, a, b);
-			ctx->block->outputs[i] = phi;
-		}
-	}
-
-	ifb->noutputs = ifnout;
-	ifb->outputs = ifout;
-
-	if (elseb != ifb->parent) {
-		elseb->noutputs = elsenout;
-		elseb->outputs = elseout;
-	}
-
-	// TODO maybe we want to compact block->inputs?
-}
-
-/*
- * Kill
- */
-
-static void
-trans_kill(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr, *immed, *cond = NULL;
-	bool inv = false;
-
-	/* unconditional kill, use enclosing if condition: */
-	if (ctx->branch_count > 0) {
-		unsigned int idx = ctx->branch_count - 1;
-		cond = ctx->branch[idx].cond;
-		inv = ctx->branch[idx].inv;
-	} else {
-		cond = create_immed(ctx, 1.0);
-	}
-
-	compile_assert(ctx, cond);
-
-	immed = create_immed(ctx, 0.0);
-
-	/* cmps.f.ne p0.x, cond, {0.0} */
-	instr = instr_create(ctx, 2, OPC_CMPS_F);
-	instr->cat2.condition = IR3_COND_NE;
-	ir3_reg_create(instr, regid(REG_P0, 0), 0);
-	ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
-	ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed;
-	cond = instr;
-
-	/* kill p0.x */
-	instr = instr_create(ctx, 0, OPC_KILL);
-	instr->cat0.inv = inv;
-	ir3_reg_create(instr, 0, 0);  /* dummy dst */
-	ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
-
-	ctx->kill[ctx->kill_count++] = instr;
-
-	ctx->so->has_kill = true;
-}
-
-/*
- * Kill-If
- */
-
-static void
-trans_killif(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct tgsi_src_register *src = &inst->Src[0].Register;
-	struct ir3_instruction *instr, *immed, *cond = NULL;
-	bool inv = false;
-
-	immed = create_immed(ctx, 0.0);
-
-	/* cmps.f.ne p0.x, cond, {0.0} */
-	instr = instr_create(ctx, 2, OPC_CMPS_F);
-	instr->cat2.condition = IR3_COND_NE;
-	ir3_reg_create(instr, regid(REG_P0, 0), 0);
-	ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed;
-	add_src_reg(ctx, instr, src, src->SwizzleX);
-
-	cond = instr;
-
-	/* kill p0.x */
-	instr = instr_create(ctx, 0, OPC_KILL);
-	instr->cat0.inv = inv;
-	ir3_reg_create(instr, 0, 0);  /* dummy dst */
-	ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
-
-	ctx->kill[ctx->kill_count++] = instr;
-
-	ctx->so->has_kill = true;
-
-}
-/*
- * I2F / U2F / F2I / F2U
- */
-
-static void
-trans_cov(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr;
-	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-	struct tgsi_src_register *src = &inst->Src[0].Register;
-
-	// cov.f32s32 dst, tmp0 /
-	instr = instr_create(ctx, 1, 0);
-	switch (t->tgsi_opc) {
-	case TGSI_OPCODE_U2F:
-		instr->cat1.src_type = TYPE_U32;
-		instr->cat1.dst_type = TYPE_F32;
-		break;
-	case TGSI_OPCODE_I2F:
-		instr->cat1.src_type = TYPE_S32;
-		instr->cat1.dst_type = TYPE_F32;
-		break;
-	case TGSI_OPCODE_F2U:
-		instr->cat1.src_type = TYPE_F32;
-		instr->cat1.dst_type = TYPE_U32;
-		break;
-	case TGSI_OPCODE_F2I:
-		instr->cat1.src_type = TYPE_F32;
-		instr->cat1.dst_type = TYPE_S32;
-		break;
-
-	}
-	vectorize(ctx, instr, dst, 1, src, 0);
-	put_dst(ctx, inst, dst);
-}
-
-/*
- * UMUL / UMAD
- *
- * There is no 32-bit multiply instruction, so splitting a and b into high and
- * low components, we get that
- *
- * dst = al * bl + ah * bl << 16 + al * bh << 16
- *
- *  mull.u tmp0, a, b (mul low, i.e. al * bl)
- *  madsh.m16 tmp1, a, b, tmp0 (mul-add shift high mix, i.e. ah * bl << 16)
- *  madsh.m16 dst, b, a, tmp1 (i.e. al * bh << 16)
- *
- * For UMAD, add in the extra argument after mull.u.
- */
-static void
-trans_umul(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr;
-	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-	struct tgsi_src_register *a = &inst->Src[0].Register;
-	struct tgsi_src_register *b = &inst->Src[1].Register;
-
-	struct tgsi_dst_register tmp0_dst, tmp1_dst;
-	struct tgsi_src_register *tmp0_src, *tmp1_src;
-
-	tmp0_src = get_internal_temp(ctx, &tmp0_dst);
-	tmp1_src = get_internal_temp(ctx, &tmp1_dst);
-
-	if (is_rel_or_const(a))
-		a = get_unconst(ctx, a);
-	if (is_rel_or_const(b))
-		b = get_unconst(ctx, b);
-
-	/* mull.u tmp0, a, b */
-	instr = instr_create(ctx, 2, OPC_MULL_U);
-	vectorize(ctx, instr, &tmp0_dst, 2, a, 0, b, 0);
-
-	if (t->tgsi_opc == TGSI_OPCODE_UMAD) {
-		struct tgsi_src_register *c = &inst->Src[2].Register;
-
-		/* add.u tmp0, tmp0, c */
-		instr = instr_create(ctx, 2, OPC_ADD_U);
-		vectorize(ctx, instr, &tmp0_dst, 2, tmp0_src, 0, c, 0);
-	}
-
-	/* madsh.m16 tmp1, a, b, tmp0 */
-	instr = instr_create(ctx, 3, OPC_MADSH_M16);
-	vectorize(ctx, instr, &tmp1_dst, 3, a, 0, b, 0, tmp0_src, 0);
-
-	/* madsh.m16 dst, b, a, tmp1 */
-	instr = instr_create(ctx, 3, OPC_MADSH_M16);
-	vectorize(ctx, instr, dst, 3, b, 0, a, 0, tmp1_src, 0);
-	put_dst(ctx, inst, dst);
-}
-
-/*
- * IDIV / UDIV / MOD / UMOD
- *
- * See NV50LegalizeSSA::handleDIV for the origin of this implementation. For
- * MOD/UMOD, it becomes a - [IU]DIV(a, modulus) * modulus.
- */
-static void
-trans_idiv(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct ir3_instruction *instr;
-	struct tgsi_dst_register *dst = get_dst(ctx, inst), *premod_dst = dst;
-	struct tgsi_src_register *a = &inst->Src[0].Register;
-	struct tgsi_src_register *b = &inst->Src[1].Register;
-
-	struct tgsi_dst_register af_dst, bf_dst, q_dst, r_dst, a_dst, b_dst;
-	struct tgsi_src_register *af_src, *bf_src, *q_src, *r_src, *a_src, *b_src;
-
-	struct tgsi_src_register negative_2, thirty_one;
-	type_t src_type;
-
-	if (t->tgsi_opc == TGSI_OPCODE_IDIV || t->tgsi_opc == TGSI_OPCODE_MOD)
-		src_type = get_stype(ctx);
-	else
-		src_type = get_utype(ctx);
-
-	af_src = get_internal_temp(ctx, &af_dst);
-	bf_src = get_internal_temp(ctx, &bf_dst);
-	q_src = get_internal_temp(ctx, &q_dst);
-	r_src = get_internal_temp(ctx, &r_dst);
-	a_src = get_internal_temp(ctx, &a_dst);
-	b_src = get_internal_temp(ctx, &b_dst);
-
-	get_immediate(ctx, &negative_2, -2);
-	get_immediate(ctx, &thirty_one, 31);
-
-	if (t->tgsi_opc == TGSI_OPCODE_MOD || t->tgsi_opc == TGSI_OPCODE_UMOD)
-		premod_dst = &q_dst;
-
-	/* cov.[us]32f32 af, numerator */
-	instr = instr_create(ctx, 1, 0);
-	instr->cat1.src_type = src_type;
-	instr->cat1.dst_type = get_ftype(ctx);
-	vectorize(ctx, instr, &af_dst, 1, a, 0);
-
-	/* cov.[us]32f32 bf, denominator */
-	instr = instr_create(ctx, 1, 0);
-	instr->cat1.src_type = src_type;
-	instr->cat1.dst_type = get_ftype(ctx);
-	vectorize(ctx, instr, &bf_dst, 1, b, 0);
-
-	/* Get the absolute values for IDIV */
-	if (type_sint(src_type)) {
-		/* absneg.f af, (abs)af */
-		instr = instr_create(ctx, 2, OPC_ABSNEG_F);
-		vectorize(ctx, instr, &af_dst, 1, af_src, IR3_REG_FABS);
-
-		/* absneg.f bf, (abs)bf */
-		instr = instr_create(ctx, 2, OPC_ABSNEG_F);
-		vectorize(ctx, instr, &bf_dst, 1, bf_src, IR3_REG_FABS);
-
-		/* absneg.s a, (abs)numerator */
-		instr = instr_create(ctx, 2, OPC_ABSNEG_S);
-		vectorize(ctx, instr, &a_dst, 1, a, IR3_REG_SABS);
-
-		/* absneg.s b, (abs)denominator */
-		instr = instr_create(ctx, 2, OPC_ABSNEG_S);
-		vectorize(ctx, instr, &b_dst, 1, b, IR3_REG_SABS);
-	} else {
-		/* mov.u32u32 a, numerator */
-		instr = instr_create(ctx, 1, 0);
-		instr->cat1.src_type = src_type;
-		instr->cat1.dst_type = src_type;
-		vectorize(ctx, instr, &a_dst, 1, a, 0);
-
-		/* mov.u32u32 b, denominator */
-		instr = instr_create(ctx, 1, 0);
-		instr->cat1.src_type = src_type;
-		instr->cat1.dst_type = src_type;
-		vectorize(ctx, instr, &b_dst, 1, b, 0);
-	}
-
-	/* rcp.f bf, bf */
-	instr = instr_create(ctx, 4, OPC_RCP);
-	vectorize(ctx, instr, &bf_dst, 1, bf_src, 0);
-
-	/* That's right, subtract 2 as an integer from the float */
-	/* add.u bf, bf, -2 */
-	instr = instr_create(ctx, 2, OPC_ADD_U);
-	vectorize(ctx, instr, &bf_dst, 2, bf_src, 0, &negative_2, 0);
-
-	/* mul.f q, af, bf */
-	instr = instr_create(ctx, 2, OPC_MUL_F);
-	vectorize(ctx, instr, &q_dst, 2, af_src, 0, bf_src, 0);
-
-	/* cov.f32[us]32 q, q */
-	instr = instr_create(ctx, 1, 0);
-	instr->cat1.src_type = get_ftype(ctx);
-	instr->cat1.dst_type = src_type;
-	vectorize(ctx, instr, &q_dst, 1, q_src, 0);
-
-	/* integer multiply q by b */
-	/* mull.u r, q, b */
-	instr = instr_create(ctx, 2, OPC_MULL_U);
-	vectorize(ctx, instr, &r_dst, 2, q_src, 0, b_src, 0);
-
-	/* madsh.m16 r, q, b, r */
-	instr = instr_create(ctx, 3, OPC_MADSH_M16);
-	vectorize(ctx, instr, &r_dst, 3, q_src, 0, b_src, 0, r_src, 0);
-
-	/* madsh.m16, r, b, q, r */
-	instr = instr_create(ctx, 3, OPC_MADSH_M16);
-	vectorize(ctx, instr, &r_dst, 3, b_src, 0, q_src, 0, r_src, 0);
-
-	/* sub.u r, a, r */
-	instr = instr_create(ctx, 2, OPC_SUB_U);
-	vectorize(ctx, instr, &r_dst, 2, a_src, 0, r_src, 0);
-
-	/* cov.u32f32, r, r */
-	instr = instr_create(ctx, 1, 0);
-	instr->cat1.src_type = get_utype(ctx);
-	instr->cat1.dst_type = get_ftype(ctx);
-	vectorize(ctx, instr, &r_dst, 1, r_src, 0);
-
-	/* mul.f r, r, bf */
-	instr = instr_create(ctx, 2, OPC_MUL_F);
-	vectorize(ctx, instr, &r_dst, 2, r_src, 0, bf_src, 0);
-
-	/* cov.f32u32 r, r */
-	instr = instr_create(ctx, 1, 0);
-	instr->cat1.src_type = get_ftype(ctx);
-	instr->cat1.dst_type = get_utype(ctx);
-	vectorize(ctx, instr, &r_dst, 1, r_src, 0);
-
-	/* add.u q, q, r */
-	instr = instr_create(ctx, 2, OPC_ADD_U);
-	vectorize(ctx, instr, &q_dst, 2, q_src, 0, r_src, 0);
-
-	/* mull.u r, q, b */
-	instr = instr_create(ctx, 2, OPC_MULL_U);
-	vectorize(ctx, instr, &r_dst, 2, q_src, 0, b_src, 0);
-
-	/* madsh.m16 r, q, b, r */
-	instr = instr_create(ctx, 3, OPC_MADSH_M16);
-	vectorize(ctx, instr, &r_dst, 3, q_src, 0, b_src, 0, r_src, 0);
-
-	/* madsh.m16 r, b, q, r */
-	instr = instr_create(ctx, 3, OPC_MADSH_M16);
-	vectorize(ctx, instr, &r_dst, 3, b_src, 0, q_src, 0, r_src, 0);
-
-	/* sub.u r, a, r */
-	instr = instr_create(ctx, 2, OPC_SUB_U);
-	vectorize(ctx, instr, &r_dst, 2, a_src, 0, r_src, 0);
-
-	/* cmps.u.ge r, r, b */
-	instr = instr_create(ctx, 2, OPC_CMPS_U);
-	instr->cat2.condition = IR3_COND_GE;
-	vectorize(ctx, instr, &r_dst, 2, r_src, 0, b_src, 0);
-
-	if (type_uint(src_type)) {
-		/* add.u dst, q, r */
-		instr = instr_create(ctx, 2, OPC_ADD_U);
-		vectorize(ctx, instr, premod_dst, 2, q_src, 0, r_src, 0);
-	} else {
-		/* add.u q, q, r */
-		instr = instr_create(ctx, 2, OPC_ADD_U);
-		vectorize(ctx, instr, &q_dst, 2, q_src, 0, r_src, 0);
-
-		/* negate result based on the original arguments */
-		if (is_const(a) && is_const(b))
-			a = get_unconst(ctx, a);
-
-		/* xor.b r, numerator, denominator */
-		instr = instr_create(ctx, 2, OPC_XOR_B);
-		vectorize(ctx, instr, &r_dst, 2, a, 0, b, 0);
-
-		/* shr.b r, r, 31 */
-		instr = instr_create(ctx, 2, OPC_SHR_B);
-		vectorize(ctx, instr, &r_dst, 2, r_src, 0, &thirty_one, 0);
-
-		/* absneg.s b, (neg)q */
-		instr = instr_create(ctx, 2, OPC_ABSNEG_S);
-		vectorize(ctx, instr, &b_dst, 1, q_src, IR3_REG_SNEG);
-
-		/* sel.b dst, b, r, q */
-		instr = instr_create(ctx, 3, OPC_SEL_B32);
-		vectorize(ctx, instr, premod_dst, 3, b_src, 0, r_src, 0, q_src, 0);
-	}
-
-	if (t->tgsi_opc == TGSI_OPCODE_MOD || t->tgsi_opc == TGSI_OPCODE_UMOD) {
-		/* The division result will have ended up in q. */
-
-		if (is_rel_or_const(b))
-			b = get_unconst(ctx, b);
-
-		/* mull.u r, q, b */
-		instr = instr_create(ctx, 2, OPC_MULL_U);
-		vectorize(ctx, instr, &r_dst, 2, q_src, 0, b, 0);
-
-		/* madsh.m16 r, q, b, r */
-		instr = instr_create(ctx, 3, OPC_MADSH_M16);
-		vectorize(ctx, instr, &r_dst, 3, q_src, 0, b, 0, r_src, 0);
-
-		/* madsh.m16 r, b, q, r */
-		instr = instr_create(ctx, 3, OPC_MADSH_M16);
-		vectorize(ctx, instr, &r_dst, 3, b, 0, q_src, 0, r_src, 0);
-
-		/* sub.u dst, a, r */
-		instr = instr_create(ctx, 2, OPC_SUB_U);
-		vectorize(ctx, instr, dst, 2, a, 0, r_src, 0);
-	}
-
-	put_dst(ctx, inst, dst);
-}
-
-/*
- * Handlers for TGSI instructions which do have 1:1 mapping to native
- * instructions:
- */
-
-static void
-instr_cat0(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	instr_create(ctx, 0, t->opc);
-}
-
-static void
-instr_cat1(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct tgsi_dst_register *dst = &inst->Dst[0].Register;
-	struct tgsi_src_register *src = &inst->Src[0].Register;
-
-	/* NOTE: atomic start/end, rather than in create_mov() since
-	 * create_mov() is used already w/in atomic sequences (and
-	 * we aren't clever enough to deal with the nesting)
-	 */
-	instr_atomic_start(ctx);
-	create_mov(ctx, dst, src);
-	instr_atomic_end(ctx);
-}
-
-static void
-instr_cat2(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-	struct tgsi_src_register *src0 = &inst->Src[0].Register;
-	struct tgsi_src_register *src1 = &inst->Src[1].Register;
-	struct ir3_instruction *instr;
-	unsigned src0_flags = 0, src1_flags = 0;
-
-	switch (t->tgsi_opc) {
-	case TGSI_OPCODE_ABS:
-		src0_flags = IR3_REG_FABS;
-		break;
-	case TGSI_OPCODE_IABS:
-		src0_flags = IR3_REG_SABS;
-		break;
-	case TGSI_OPCODE_INEG:
-		src0_flags = IR3_REG_SNEG;
-		break;
-	case TGSI_OPCODE_SUB:
-		src1_flags = IR3_REG_FNEG;
-		break;
-	}
-
-	switch (t->opc) {
-	case OPC_ABSNEG_F:
-	case OPC_ABSNEG_S:
-	case OPC_CLZ_B:
-	case OPC_CLZ_S:
-	case OPC_SIGN_F:
-	case OPC_FLOOR_F:
-	case OPC_CEIL_F:
-	case OPC_RNDNE_F:
-	case OPC_RNDAZ_F:
-	case OPC_TRUNC_F:
-	case OPC_NOT_B:
-	case OPC_BFREV_B:
-	case OPC_SETRM:
-	case OPC_CBITS_B:
-		/* these only have one src reg */
-		instr = instr_create(ctx, 2, t->opc);
-		vectorize(ctx, instr, dst, 1, src0, src0_flags);
-		break;
-	default:
-		if (is_const(src0) && is_const(src1))
-			src0 = get_unconst(ctx, src0);
-
-		instr = instr_create(ctx, 2, t->opc);
-		vectorize(ctx, instr, dst, 2, src0, src0_flags,
-				src1, src1_flags);
-		break;
-	}
-
-	put_dst(ctx, inst, dst);
-}
-
-static void
-instr_cat3(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-	struct tgsi_src_register *src0 = &inst->Src[0].Register;
-	struct tgsi_src_register *src1 = &inst->Src[1].Register;
-	struct ir3_instruction *instr;
-
-	/* in particular, can't handle const for src1 for cat3..
-	 * for mad, we can swap first two src's if needed:
-	 */
-	if (is_rel_or_const(src1)) {
-		if (is_mad(t->opc) && !is_rel_or_const(src0)) {
-			struct tgsi_src_register *tmp;
-			tmp = src0;
-			src0 = src1;
-			src1 = tmp;
-		} else {
-			src1 = get_unconst(ctx, src1);
-		}
-	}
-
-	instr = instr_create(ctx, 3, t->opc);
-	vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
-			&inst->Src[2].Register, 0);
-	put_dst(ctx, inst, dst);
-}
-
-static void
-instr_cat4(const struct instr_translater *t,
-		struct ir3_compile_context *ctx,
-		struct tgsi_full_instruction *inst)
-{
-	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-	struct tgsi_src_register *src = &inst->Src[0].Register;
-	struct ir3_instruction *instr;
-	unsigned i;
-
-	/* seems like blob compiler avoids const as src.. */
-	if (is_const(src))
-		src = get_unconst(ctx, src);
-
-	/* we need to replicate into each component: */
-	for (i = 0; i < 4; i++) {
-		if (dst->WriteMask & (1 << i)) {
-			instr = instr_create(ctx, 4, t->opc);
-			add_dst_reg(ctx, instr, dst, i);
-			add_src_reg(ctx, instr, src, src->SwizzleX);
-		}
-	}
-
-	put_dst(ctx, inst, dst);
-}
-
-static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
-#define INSTR(n, f, ...) \
-	[TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ }
-
-	INSTR(MOV,          instr_cat1),
-	INSTR(RCP,          instr_cat4, .opc = OPC_RCP),
-	INSTR(RSQ,          instr_cat4, .opc = OPC_RSQ),
-	INSTR(SQRT,         instr_cat4, .opc = OPC_SQRT),
-	INSTR(MUL,          instr_cat2, .opc = OPC_MUL_F),
-	INSTR(ADD,          instr_cat2, .opc = OPC_ADD_F),
-	INSTR(SUB,          instr_cat2, .opc = OPC_ADD_F),
-	INSTR(MIN,          instr_cat2, .opc = OPC_MIN_F),
-	INSTR(MAX,          instr_cat2, .opc = OPC_MAX_F),
-	INSTR(UADD,         instr_cat2, .opc = OPC_ADD_U),
-	INSTR(IMIN,         instr_cat2, .opc = OPC_MIN_S),
-	INSTR(UMIN,         instr_cat2, .opc = OPC_MIN_U),
-	INSTR(IMAX,         instr_cat2, .opc = OPC_MAX_S),
-	INSTR(UMAX,         instr_cat2, .opc = OPC_MAX_U),
-	INSTR(AND,          instr_cat2, .opc = OPC_AND_B),
-	INSTR(OR,           instr_cat2, .opc = OPC_OR_B),
-	INSTR(NOT,          instr_cat2, .opc = OPC_NOT_B),
-	INSTR(XOR,          instr_cat2, .opc = OPC_XOR_B),
-	INSTR(UMUL,         trans_umul),
-	INSTR(UMAD,         trans_umul),
-	INSTR(UDIV,         trans_idiv),
-	INSTR(IDIV,         trans_idiv),
-	INSTR(MOD,          trans_idiv),
-	INSTR(UMOD,         trans_idiv),
-	INSTR(SHL,          instr_cat2, .opc = OPC_SHL_B),
-	INSTR(USHR,         instr_cat2, .opc = OPC_SHR_B),
-	INSTR(ISHR,         instr_cat2, .opc = OPC_ASHR_B),
-	INSTR(IABS,         instr_cat2, .opc = OPC_ABSNEG_S),
-	INSTR(INEG,         instr_cat2, .opc = OPC_ABSNEG_S),
-	INSTR(AND,          instr_cat2, .opc = OPC_AND_B),
-	INSTR(MAD,          instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
-	INSTR(TRUNC,        instr_cat2, .opc = OPC_TRUNC_F),
-	INSTR(CLAMP,        trans_clamp),
-	INSTR(FLR,          instr_cat2, .opc = OPC_FLOOR_F),
-	INSTR(ROUND,        instr_cat2, .opc = OPC_RNDNE_F),
-	INSTR(SSG,          instr_cat2, .opc = OPC_SIGN_F),
-	INSTR(CEIL,         instr_cat2, .opc = OPC_CEIL_F),
-	INSTR(ARL,          trans_arl),
-	INSTR(UARL,         trans_arl),
-	INSTR(EX2,          instr_cat4, .opc = OPC_EXP2),
-	INSTR(LG2,          instr_cat4, .opc = OPC_LOG2),
-	INSTR(ABS,          instr_cat2, .opc = OPC_ABSNEG_F),
-	INSTR(COS,          instr_cat4, .opc = OPC_COS),
-	INSTR(SIN,          instr_cat4, .opc = OPC_SIN),
-	INSTR(TEX,          trans_samp, .opc = OPC_SAM),
-	INSTR(TXP,          trans_samp, .opc = OPC_SAM),
-	INSTR(TXB,          trans_samp, .opc = OPC_SAMB),
-	INSTR(TXB2,         trans_samp, .opc = OPC_SAMB),
-	INSTR(TXL,          trans_samp, .opc = OPC_SAML),
-	INSTR(TXD,          trans_samp, .opc = OPC_SAMGQ),
-	INSTR(TXF,          trans_samp, .opc = OPC_ISAML),
-	INSTR(TXQ,          trans_txq),
-	INSTR(DDX,          trans_deriv, .opc = OPC_DSX),
-	INSTR(DDY,          trans_deriv, .opc = OPC_DSY),
-	INSTR(SGT,          trans_cmp),
-	INSTR(SLT,          trans_cmp),
-	INSTR(FSLT,         trans_cmp),
-	INSTR(SGE,          trans_cmp),
-	INSTR(FSGE,         trans_cmp),
-	INSTR(SLE,          trans_cmp),
-	INSTR(SNE,          trans_cmp),
-	INSTR(FSNE,         trans_cmp),
-	INSTR(SEQ,          trans_cmp),
-	INSTR(FSEQ,         trans_cmp),
-	INSTR(CMP,          trans_cmp),
-	INSTR(USNE,         trans_icmp, .opc = OPC_CMPS_U),
-	INSTR(USEQ,         trans_icmp, .opc = OPC_CMPS_U),
-	INSTR(ISGE,         trans_icmp, .opc = OPC_CMPS_S),
-	INSTR(USGE,         trans_icmp, .opc = OPC_CMPS_U),
-	INSTR(ISLT,         trans_icmp, .opc = OPC_CMPS_S),
-	INSTR(USLT,         trans_icmp, .opc = OPC_CMPS_U),
-	INSTR(UCMP,         trans_ucmp),
-	INSTR(ISSG,         trans_issg),
-	INSTR(IF,           trans_if,   .opc = OPC_CMPS_F),
-	INSTR(UIF,          trans_if,   .opc = OPC_CMPS_U),
-	INSTR(ELSE,         trans_else),
-	INSTR(ENDIF,        trans_endif),
-	INSTR(END,          instr_cat0, .opc = OPC_END),
-	INSTR(KILL,         trans_kill, .opc = OPC_KILL),
-	INSTR(KILL_IF,      trans_killif, .opc = OPC_KILL),
-	INSTR(I2F,          trans_cov),
-	INSTR(U2F,          trans_cov),
-	INSTR(F2I,          trans_cov),
-	INSTR(F2U,          trans_cov),
-};
-
-static ir3_semantic
-decl_semantic(const struct tgsi_declaration_semantic *sem)
-{
-	return ir3_semantic_name(sem->Name, sem->Index);
-}
-
-static struct ir3_instruction *
-decl_in_frag_bary(struct ir3_compile_context *ctx, unsigned regid,
-		unsigned j, unsigned inloc, bool use_ldlv)
-{
-	struct ir3_instruction *instr;
-	struct ir3_register *src;
-
-	if (use_ldlv) {
-		/* ldlv.u32 dst, l[#inloc], 1 */
-		instr = instr_create(ctx, 6, OPC_LDLV);
-		instr->cat6.type = TYPE_U32;
-		instr->cat6.iim_val = 1;
-		ir3_reg_create(instr, regid, 0);   /* dummy dst */
-		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc;
-		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
-
-		return instr;
-	}
-
-	/* bary.f dst, #inloc, r0.x */
-	instr = instr_create(ctx, 2, OPC_BARY_F);
-	ir3_reg_create(instr, regid, 0);   /* dummy dst */
-	ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc;
-	src = ir3_reg_create(instr, 0, IR3_REG_SSA);
-	src->wrmask = 0x3;
-	src->instr = ctx->frag_pos;
-
-	return instr;
-}
-
-/* TGSI_SEMANTIC_POSITION
- * """"""""""""""""""""""
- *
- * For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that
- * fragment shader input contains the fragment's window position.  The X
- * component starts at zero and always increases from left to right.
- * The Y component starts at zero and always increases but Y=0 may either
- * indicate the top of the window or the bottom depending on the fragment
- * coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN).
- * The Z coordinate ranges from 0 to 1 to represent depth from the front
- * to the back of the Z buffer.  The W component contains the reciprocol
- * of the interpolated vertex position W component.
- */
-static struct ir3_instruction *
-decl_in_frag_coord(struct ir3_compile_context *ctx, unsigned regid,
-		unsigned j)
-{
-	struct ir3_instruction *instr, *src;
-
-	compile_assert(ctx, !ctx->frag_coord[j]);
-
-	ctx->frag_coord[j] = create_input(ctx->block, NULL, 0);
-
-
-	switch (j) {
-	case 0: /* .x */
-	case 1: /* .y */
-		/* for frag_coord, we get unsigned values.. we need
-		 * to subtract (integer) 8 and divide by 16 (right-
-		 * shift by 4) then convert to float:
-		 */
-
-		/* add.s tmp, src, -8 */
-		instr = instr_create(ctx, 2, OPC_ADD_S);
-		ir3_reg_create(instr, regid, 0);    /* dummy dst */
-		ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_coord[j];
-		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -8;
-		src = instr;
-
-		/* shr.b tmp, tmp, 4 */
-		instr = instr_create(ctx, 2, OPC_SHR_B);
-		ir3_reg_create(instr, regid, 0);    /* dummy dst */
-		ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
-		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4;
-		src = instr;
-
-		/* mov.u32f32 dst, tmp */
-		instr = instr_create(ctx, 1, 0);
-		instr->cat1.src_type = TYPE_U32;
-		instr->cat1.dst_type = TYPE_F32;
-		ir3_reg_create(instr, regid, 0);    /* dummy dst */
-		ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
-
-		break;
-	case 2: /* .z */
-	case 3: /* .w */
-		/* seems that we can use these as-is: */
-		instr = ctx->frag_coord[j];
-		break;
-	default:
-		compile_error(ctx, "invalid channel\n");
-		instr = create_immed(ctx, 0.0);
-		break;
-	}
-
-	return instr;
-}
-
-/* TGSI_SEMANTIC_FACE
- * """"""""""""""""""
- *
- * This label applies to fragment shader inputs only and indicates that
- * the register contains front/back-face information of the form (F, 0,
- * 0, 1).  The first component will be positive when the fragment belongs
- * to a front-facing polygon, and negative when the fragment belongs to a
- * back-facing polygon.
- */
-static struct ir3_instruction *
-decl_in_frag_face(struct ir3_compile_context *ctx, unsigned regid,
-		unsigned j)
-{
-	struct ir3_instruction *instr, *src;
-
-	switch (j) {
-	case 0: /* .x */
-		compile_assert(ctx, !ctx->frag_face);
-
-		ctx->frag_face = create_input(ctx->block, NULL, 0);
-
-		/* for faceness, we always get -1 or 0 (int).. but TGSI expects
-		 * positive vs negative float.. and piglit further seems to
-		 * expect -1.0 or 1.0:
-		 *
-		 *    mul.s tmp, hr0.x, 2
-		 *    add.s tmp, tmp, 1
-		 *    mov.s16f32, dst, tmp
-		 *
-		 */
-
-		instr = instr_create(ctx, 2, OPC_MUL_S);
-		ir3_reg_create(instr, regid, 0);    /* dummy dst */
-		ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_face;
-		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
-		src = instr;
-
-		instr = instr_create(ctx, 2, OPC_ADD_S);
-		ir3_reg_create(instr, regid, 0);    /* dummy dst */
-		ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
-		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
-		src = instr;
-
-		instr = instr_create(ctx, 1, 0); /* mov */
-		instr->cat1.src_type = TYPE_S32;
-		instr->cat1.dst_type = TYPE_F32;
-		ir3_reg_create(instr, regid, 0);    /* dummy dst */
-		ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
-
-		break;
-	case 1: /* .y */
-	case 2: /* .z */
-		instr = create_immed(ctx, 0.0);
-		break;
-	case 3: /* .w */
-		instr = create_immed(ctx, 1.0);
-		break;
-	default:
-		compile_error(ctx, "invalid channel\n");
-		instr = create_immed(ctx, 0.0);
-		break;
-	}
-
-	return instr;
-}
-
-static void
-decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
-{
-	struct ir3_shader_variant *so = ctx->so;
-	unsigned name = decl->Semantic.Name;
-	unsigned i;
-
-	/* I don't think we should get frag shader input without
-	 * semantic info?  Otherwise how do inputs get linked to
-	 * vert outputs?
-	 */
-	compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) ||
-			decl->Declaration.Semantic);
-
-	for (i = decl->Range.First; i <= decl->Range.Last; i++) {
-		unsigned n = so->inputs_count++;
-		unsigned r = regid(i, 0);
-		unsigned ncomp, j;
-
-		/* we'll figure out the actual components used after scheduling */
-		ncomp = 4;
-
-		DBG("decl in -> r%d", i);
-
-		compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
-
-		so->inputs[n].semantic = decl_semantic(&decl->Semantic);
-		so->inputs[n].compmask = (1 << ncomp) - 1;
-		so->inputs[n].regid = r;
-		so->inputs[n].inloc = ctx->next_inloc;
-		so->inputs[n].interpolate = decl->Interp.Interpolate;
-
-		for (j = 0; j < ncomp; j++) {
-			struct ir3_instruction *instr = NULL;
-
-			if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
-				/* for fragment shaders, POSITION and FACE are handled
-				 * specially, not using normal varying / bary.f
-				 */
-				if (name == TGSI_SEMANTIC_POSITION) {
-					so->inputs[n].bary = false;
-					so->frag_coord = true;
-					instr = decl_in_frag_coord(ctx, r + j, j);
-				} else if (name == TGSI_SEMANTIC_FACE) {
-					so->inputs[n].bary = false;
-					so->frag_face = true;
-					instr = decl_in_frag_face(ctx, r + j, j);
-				} else {
-					bool use_ldlv = false;
-
-					/* if no interpolation given, pick based on
-					 * semantic:
-					 */
-					if (!decl->Declaration.Interpolate) {
-						switch (decl->Semantic.Name) {
-						case TGSI_SEMANTIC_COLOR:
-							so->inputs[n].interpolate =
-									TGSI_INTERPOLATE_COLOR;
-							break;
-						default:
-							so->inputs[n].interpolate =
-									TGSI_INTERPOLATE_LINEAR;
-						}
-					}
-
-					if (ctx->flat_bypass) {
-						switch (so->inputs[n].interpolate) {
-						case TGSI_INTERPOLATE_COLOR:
-							if (!ctx->so->key.rasterflat)
-								break;
-							/* fallthrough */
-						case TGSI_INTERPOLATE_CONSTANT:
-							use_ldlv = true;
-							break;
-						}
-					}
-
-					so->inputs[n].bary = true;
-
-					instr = decl_in_frag_bary(ctx, r + j, j,
-							so->inputs[n].inloc + j - 8, use_ldlv);
-				}
-			} else {
-				instr = create_input(ctx->block, NULL, (i * 4) + j);
-			}
-
-			ctx->block->inputs[(i * 4) + j] = instr;
-		}
-
-		if (so->inputs[n].bary || (ctx->type == TGSI_PROCESSOR_VERTEX)) {
-			ctx->next_inloc += ncomp;
-			so->total_in += ncomp;
-		}
-	}
-}
-
-static void
-decl_sv(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
-{
-	struct ir3_shader_variant *so = ctx->so;
-	unsigned r = regid(so->inputs_count, 0);
-	unsigned n = so->inputs_count++;
-
-	DBG("decl sv -> r%d", n);
-
-	compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
-	compile_assert(ctx, decl->Range.First < ARRAY_SIZE(ctx->sysval_semantics));
-
-	ctx->sysval_semantics[decl->Range.First] = decl->Semantic.Name;
-	so->inputs[n].semantic = decl_semantic(&decl->Semantic);
-	so->inputs[n].compmask = 1;
-	so->inputs[n].regid = r;
-	so->inputs[n].inloc = ctx->next_inloc;
-	so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
-
-	struct ir3_instruction *instr = NULL;
-
-	switch (decl->Semantic.Name) {
-	case TGSI_SEMANTIC_VERTEXID_NOBASE:
-		ctx->vertex_id = instr = create_input(ctx->block, NULL, r);
-		break;
-	case TGSI_SEMANTIC_BASEVERTEX:
-		ctx->basevertex = instr = instr_create(ctx, 1, 0);
-		instr->cat1.src_type = get_stype(ctx);
-		instr->cat1.dst_type = get_stype(ctx);
-		ir3_reg_create(instr, 0, 0);
-		ir3_reg_create(instr, regid(so->first_driver_param + 4, 0),
-					   IR3_REG_CONST);
-		break;
-	case TGSI_SEMANTIC_INSTANCEID:
-		ctx->instance_id = instr = create_input(ctx->block, NULL, r);
-		break;
-	default:
-		compile_error(ctx, "Unknown semantic: %s\n",
-					  tgsi_semantic_names[decl->Semantic.Name]);
-	}
-
-	ctx->block->inputs[r] = instr;
-	ctx->next_inloc++;
-	so->total_in++;
-}
-
-static void
-decl_out(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
-{
-	struct ir3_shader_variant *so = ctx->so;
-	unsigned comp = 0;
-	unsigned name = decl->Semantic.Name;
-	unsigned i;
-
-	compile_assert(ctx, decl->Declaration.Semantic);
-
-	DBG("decl out[%d] -> r%d", name, decl->Range.First);
-
-	if (ctx->type == TGSI_PROCESSOR_VERTEX) {
-		switch (name) {
-		case TGSI_SEMANTIC_POSITION:
-			so->writes_pos = true;
-			break;
-		case TGSI_SEMANTIC_PSIZE:
-			so->writes_psize = true;
-			break;
-		case TGSI_SEMANTIC_COLOR:
-		case TGSI_SEMANTIC_BCOLOR:
-		case TGSI_SEMANTIC_GENERIC:
-		case TGSI_SEMANTIC_FOG:
-		case TGSI_SEMANTIC_TEXCOORD:
-			break;
-		default:
-			compile_error(ctx, "unknown VS semantic name: %s\n",
-					tgsi_semantic_names[name]);
-		}
-	} else {
-		switch (name) {
-		case TGSI_SEMANTIC_POSITION:
-			comp = 2;  /* tgsi will write to .z component */
-			so->writes_pos = true;
-			break;
-		case TGSI_SEMANTIC_COLOR:
-			break;
-		default:
-			compile_error(ctx, "unknown FS semantic name: %s\n",
-					tgsi_semantic_names[name]);
-		}
-	}
-
-	for (i = decl->Range.First; i <= decl->Range.Last; i++) {
-		unsigned n = so->outputs_count++;
-		unsigned ncomp, j;
-
-		ncomp = 4;
-
-		compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
-
-		so->outputs[n].semantic = decl_semantic(&decl->Semantic);
-		so->outputs[n].regid = regid(i, comp);
-
-		/* avoid undefined outputs, stick a dummy mov from imm{0.0},
-		 * which if the output is actually assigned will be over-
-		 * written
-		 */
-		for (j = 0; j < ncomp; j++)
-			ctx->block->outputs[(i * 4) + j] = create_immed(ctx, 0.0);
-	}
-}
-
-/* from TGSI perspective, we actually have inputs.  But most of the "inputs"
- * for a fragment shader are just bary.f instructions.  The *actual* inputs
- * from the hw perspective are the frag_pos and optionally frag_coord and
- * frag_face.
- */
-static void
-fixup_frag_inputs(struct ir3_compile_context *ctx)
-{
-	struct ir3_shader_variant *so = ctx->so;
-	struct ir3_block *block = ctx->block;
-	struct ir3_instruction **inputs;
-	struct ir3_instruction *instr;
-	int n, regid = 0;
-
-	block->ninputs = 0;
-
-	n  = 4;  /* always have frag_pos */
-	n += COND(so->frag_face, 4);
-	n += COND(so->frag_coord, 4);
-
-	inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *)));
-
-	if (so->frag_face) {
-		/* this ultimately gets assigned to hr0.x so doesn't conflict
-		 * with frag_coord/frag_pos..
-		 */
-		inputs[block->ninputs++] = ctx->frag_face;
-		ctx->frag_face->regs[0]->num = 0;
-
-		/* remaining channels not used, but let's avoid confusing
-		 * other parts that expect inputs to come in groups of vec4
-		 */
-		inputs[block->ninputs++] = NULL;
-		inputs[block->ninputs++] = NULL;
-		inputs[block->ninputs++] = NULL;
-	}
-
-	/* since we don't know where to set the regid for frag_coord,
-	 * we have to use r0.x for it.  But we don't want to *always*
-	 * use r1.x for frag_pos as that could increase the register
-	 * footprint on simple shaders:
-	 */
-	if (so->frag_coord) {
-		ctx->frag_coord[0]->regs[0]->num = regid++;
-		ctx->frag_coord[1]->regs[0]->num = regid++;
-		ctx->frag_coord[2]->regs[0]->num = regid++;
-		ctx->frag_coord[3]->regs[0]->num = regid++;
-
-		inputs[block->ninputs++] = ctx->frag_coord[0];
-		inputs[block->ninputs++] = ctx->frag_coord[1];
-		inputs[block->ninputs++] = ctx->frag_coord[2];
-		inputs[block->ninputs++] = ctx->frag_coord[3];
-	}
-
-	/* we always have frag_pos: */
-	so->pos_regid = regid;
-
-	/* r0.x */
-	instr = create_input(block, NULL, block->ninputs);
-	instr->regs[0]->num = regid++;
-	inputs[block->ninputs++] = instr;
-	ctx->frag_pos->regs[1]->instr = instr;
-
-	/* r0.y */
-	instr = create_input(block, NULL, block->ninputs);
-	instr->regs[0]->num = regid++;
-	inputs[block->ninputs++] = instr;
-	ctx->frag_pos->regs[2]->instr = instr;
-
-	block->inputs = inputs;
-}
-
-static void
-compile_instructions(struct ir3_compile_context *ctx)
-{
-	push_block(ctx);
-
-	/* for fragment shader, we have a single input register (usually
-	 * r0.xy) which is used as the base for bary.f varying fetch instrs:
-	 */
-	if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
-		struct ir3_instruction *instr;
-		instr = ir3_instr_create(ctx->block, -1, OPC_META_FI);
-		ir3_reg_create(instr, 0, 0);
-		ir3_reg_create(instr, 0, IR3_REG_SSA);    /* r0.x */
-		ir3_reg_create(instr, 0, IR3_REG_SSA);    /* r0.y */
-		ctx->frag_pos = instr;
-	}
-
-	while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
-		tgsi_parse_token(&ctx->parser);
-
-		switch (ctx->parser.FullToken.Token.Type) {
-		case TGSI_TOKEN_TYPE_DECLARATION: {
-			struct tgsi_full_declaration *decl =
-					&ctx->parser.FullToken.FullDeclaration;
-			unsigned file = decl->Declaration.File;
-			if (file == TGSI_FILE_OUTPUT) {
-				decl_out(ctx, decl);
-			} else if (file == TGSI_FILE_INPUT) {
-				decl_in(ctx, decl);
-			} else if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
-				decl_sv(ctx, decl);
-			}
-
-			if ((file != TGSI_FILE_CONSTANT) && decl->Declaration.Array) {
-				int aid = decl->Array.ArrayID + ctx->array_offsets[file];
-
-				compile_assert(ctx, aid < ARRAY_SIZE(ctx->array));
-
-				/* legacy ArrayID==0 stuff probably isn't going to work
-				 * well (and is at least untested).. let's just scream:
-				 */
-				compile_assert(ctx, aid != 0);
-
-				ctx->array[aid].first = decl->Range.First;
-				ctx->array[aid].last  = decl->Range.Last;
-			}
-			break;
-		}
-		case TGSI_TOKEN_TYPE_IMMEDIATE: {
-			/* TODO: if we know the immediate is small enough, and only
-			 * used with instructions that can embed an immediate, we
-			 * can skip this:
-			 */
-			struct tgsi_full_immediate *imm =
-					&ctx->parser.FullToken.FullImmediate;
-			unsigned n = ctx->so->immediates_count++;
-			compile_assert(ctx, n < ARRAY_SIZE(ctx->so->immediates));
-			memcpy(ctx->so->immediates[n].val, imm->u, 16);
-			break;
-		}
-		case TGSI_TOKEN_TYPE_INSTRUCTION: {
-			struct tgsi_full_instruction *inst =
-					&ctx->parser.FullToken.FullInstruction;
-			unsigned opc = inst->Instruction.Opcode;
-			const struct instr_translater *t = &translaters[opc];
-
-			if (t->fxn) {
-				t->fxn(t, ctx, inst);
-				ctx->num_internal_temps = 0;
-
-				compile_assert(ctx, !ctx->using_tmp_dst);
-			} else {
-				compile_error(ctx, "unknown TGSI opc: %s\n",
-						tgsi_get_opcode_name(opc));
-			}
-
-			switch (inst->Instruction.Saturate) {
-			case TGSI_SAT_ZERO_ONE:
-				create_clamp_imm(ctx, &inst->Dst[0].Register,
-						fui(0.0), fui(1.0));
-				break;
-			case TGSI_SAT_MINUS_PLUS_ONE:
-				create_clamp_imm(ctx, &inst->Dst[0].Register,
-						fui(-1.0), fui(1.0));
-				break;
-			}
-
-			instr_finish(ctx);
-
-			break;
-		}
-		case TGSI_TOKEN_TYPE_PROPERTY: {
-			struct tgsi_full_property *prop =
-				&ctx->parser.FullToken.FullProperty;
-			switch (prop->Property.PropertyName) {
-			case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
-				ctx->so->color0_mrt = !!prop->u[0].Data;
-				break;
-			}
-		}
-		default:
-			break;
-		}
-	}
-}
-
-static void
-compile_dump(struct ir3_compile_context *ctx)
-{
-	const char *name = (ctx->so->type == SHADER_VERTEX) ? "vert" : "frag";
-	static unsigned n = 0;
-	char fname[16];
-	FILE *f;
-	snprintf(fname, sizeof(fname), "%s-%04u.dot", name, n++);
-	f = fopen(fname, "w");
-	if (!f)
-		return;
-	ir3_block_depth(ctx->block);
-	ir3_dump(ctx->ir, name, ctx->block, f);
-	fclose(f);
-}
-
-int
-ir3_compile_shader(struct ir3_shader_variant *so,
-		const struct tgsi_token *tokens, struct ir3_shader_key key,
-		bool cp)
-{
-	struct ir3_compile_context ctx;
-	struct ir3_block *block;
-	struct ir3_instruction **inputs;
-	unsigned i, j, actual_in;
-	int ret = 0, max_bary;
-
-	assert(!so->ir);
-
-	so->ir = ir3_create();
-
-	assert(so->ir);
-
-	if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK) {
-		DBG("INIT failed!");
-		ret = -1;
-		goto out;
-	}
-
-	/* for now, until the edge cases are worked out: */
-	if (ctx.info.indirect_files_written & (FM(TEMPORARY) | FM(INPUT) | FM(OUTPUT)))
-		cp = false;
-
-	compile_instructions(&ctx);
-
-	block = ctx.block;
-	so->ir->block = block;
-
-	/* keep track of the inputs from TGSI perspective.. */
-	inputs = block->inputs;
-
-	/* but fixup actual inputs for frag shader: */
-	if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
-		fixup_frag_inputs(&ctx);
-
-	/* at this point, for binning pass, throw away unneeded outputs: */
-	if (key.binning_pass) {
-		for (i = 0, j = 0; i < so->outputs_count; i++) {
-			unsigned name = sem2name(so->outputs[i].semantic);
-			unsigned idx = sem2idx(so->outputs[i].semantic);
-
-			/* throw away everything but first position/psize */
-			if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) ||
-					(name == TGSI_SEMANTIC_PSIZE))) {
-				if (i != j) {
-					so->outputs[j] = so->outputs[i];
-					block->outputs[(j*4)+0] = block->outputs[(i*4)+0];
-					block->outputs[(j*4)+1] = block->outputs[(i*4)+1];
-					block->outputs[(j*4)+2] = block->outputs[(i*4)+2];
-					block->outputs[(j*4)+3] = block->outputs[(i*4)+3];
-				}
-				j++;
-			}
-		}
-		so->outputs_count = j;
-		block->noutputs = j * 4;
-	}
-
-	/* if we want half-precision outputs, mark the output registers
-	 * as half:
-	 */
-	if (key.half_precision) {
-		for (i = 0; i < block->noutputs; i++) {
-			if (!block->outputs[i])
-				continue;
-			block->outputs[i]->regs[0]->flags |= IR3_REG_HALF;
-		}
-	}
-
-	/* at this point, we want the kill's in the outputs array too,
-	 * so that they get scheduled (since they have no dst).. we've
-	 * already ensured that the array is big enough in push_block():
-	 */
-	if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
-		for (i = 0; i < ctx.kill_count; i++)
-			block->outputs[block->noutputs++] = ctx.kill[i];
-	}
-
-	if (fd_mesa_debug & FD_DBG_OPTDUMP)
-		compile_dump(&ctx);
-
-	ret = ir3_block_flatten(block);
-	if (ret < 0) {
-		DBG("FLATTEN failed!");
-		goto out;
-	}
-	if ((ret > 0) && (fd_mesa_debug & FD_DBG_OPTDUMP))
-		compile_dump(&ctx);
-
-	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
-		printf("BEFORE CP:\n");
-		ir3_dump_instr_list(block->head);
-	}
-
-	ir3_block_depth(block);
-
-	/* First remove all the extra mov's (which we could skip if the
-	 * front-end was clever enough not to insert them in the first
-	 * place).  Then figure out left/right neighbors, re-inserting
-	 * extra mov's when needed to avoid conflicts.
-	 */
-	if (cp && !(fd_mesa_debug & FD_DBG_NOCP))
-		ir3_block_cp(block);
-
-	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
-		printf("BEFORE GROUPING:\n");
-		ir3_dump_instr_list(block->head);
-	}
-
-	/* Group left/right neighbors, inserting mov's where needed to
-	 * solve conflicts:
-	 */
-	ir3_block_group(block);
-
-	if (fd_mesa_debug & FD_DBG_OPTDUMP)
-		compile_dump(&ctx);
-
-	ir3_block_depth(block);
-
-	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
-		printf("AFTER DEPTH:\n");
-		ir3_dump_instr_list(block->head);
-	}
-
-	ret = ir3_block_sched(block);
-	if (ret) {
-		DBG("SCHED failed!");
-		goto out;
-	}
-
-	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
-		printf("AFTER SCHED:\n");
-		ir3_dump_instr_list(block->head);
-	}
-
-	ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face);
-	if (ret) {
-		DBG("RA failed!");
-		goto out;
-	}
-
-	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
-		printf("AFTER RA:\n");
-		ir3_dump_instr_list(block->head);
-	}
-
-	ir3_block_legalize(block, &so->has_samp, &max_bary);
-
-	/* fixup input/outputs: */
-	for (i = 0; i < so->outputs_count; i++) {
-		so->outputs[i].regid = block->outputs[i*4]->regs[0]->num;
-		/* preserve hack for depth output.. tgsi writes depth to .z,
-		 * but what we give the hw is the scalar register:
-		 */
-		if ((ctx.type == TGSI_PROCESSOR_FRAGMENT) &&
-			(sem2name(so->outputs[i].semantic) == TGSI_SEMANTIC_POSITION))
-			so->outputs[i].regid += 2;
-	}
-	/* Note that some or all channels of an input may be unused: */
-	actual_in = 0;
-	for (i = 0; i < so->inputs_count; i++) {
-		unsigned j, regid = ~0, compmask = 0;
-		so->inputs[i].ncomp = 0;
-		for (j = 0; j < 4; j++) {
-			struct ir3_instruction *in = inputs[(i*4) + j];
-			if (in) {
-				compmask |= (1 << j);
-				regid = in->regs[0]->num - j;
-				actual_in++;
-				so->inputs[i].ncomp++;
-			}
-		}
-		so->inputs[i].regid = regid;
-		so->inputs[i].compmask = compmask;
-	}
-
-	/* fragment shader always gets full vec4's even if it doesn't
-	 * fetch all components, but vertex shader we need to update
-	 * with the actual number of components fetch, otherwise thing
-	 * will hang due to mismaptch between VFD_DECODE's and
-	 * TOTALATTRTOVS
-	 */
-	if (so->type == SHADER_VERTEX)
-		so->total_in = actual_in;
-	else
-		so->total_in = align(max_bary + 1, 4);
-
-out:
-	if (ret) {
-		ir3_destroy(so->ir);
-		so->ir = NULL;
-	}
-	compile_free(&ctx);
-
-	return ret;
+	ralloc_free(compiler);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_compiler.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_compiler.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_compiler.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_compiler.h	2015-09-16 14:36:09.000000000 +0000
@@ -31,12 +31,18 @@
 
 #include "ir3_shader.h"
 
+struct ir3_ra_reg_set;
 
-int ir3_compile_shader_nir(struct ir3_shader_variant *so,
-		const struct tgsi_token *tokens, struct ir3_shader_key key);
+struct ir3_compiler {
+	uint32_t gpu_id;
+	struct ir3_ra_reg_set *set;
+	uint32_t shader_count;
+};
 
-int ir3_compile_shader(struct ir3_shader_variant *so,
-		const struct tgsi_token *tokens,
-		struct ir3_shader_key key, bool cp);
+struct ir3_compiler * ir3_compiler_create(uint32_t gpu_id);
+void ir3_compiler_destroy(struct ir3_compiler *compiler);
+
+int ir3_compile_shader_nir(struct ir3_compiler *compiler,
+		struct ir3_shader_variant *so);
 
 #endif /* IR3_COMPILER_H_ */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c	2015-09-16 14:36:09.000000000 +0000
@@ -48,19 +48,19 @@
 #include "ir3.h"
 
 
-static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
-
 struct ir3_compile {
+	struct ir3_compiler *compiler;
+
 	const struct tgsi_token *tokens;
 	struct nir_shader *s;
 
 	struct ir3 *ir;
 	struct ir3_shader_variant *so;
 
-	/* bitmask of which samplers are integer: */
-	uint16_t integer_s;
+	struct ir3_block *block;      /* the current block */
+	struct ir3_block *in_block;   /* block created for shader inputs */
 
-	struct ir3_block *block;
+	nir_function_impl *impl;
 
 	/* For fragment shaders, from the hw perspective the only
 	 * actual input is r0.xy position register passed to bary.f.
@@ -92,6 +92,11 @@
 	 */
 	struct hash_table *addr_ht;
 
+	/* maps nir_block to ir3_block, mostly for the purposes of
+	 * figuring out the blocks successors
+	 */
+	struct hash_table *block_ht;
+
 	/* for calculating input/output positions/linkages: */
 	unsigned next_inloc;
 
@@ -104,13 +109,14 @@
 	 */
 	bool levels_add_one;
 
+	/* on a3xx, we need to scale up integer coords for isaml based
+	 * on LoD:
+	 */
+	bool unminify_coords;
+
 	/* for looking up which system value is which */
 	unsigned sysval_semantics[8];
 
-	/* list of kill instructions: */
-	struct ir3_instruction *kill[16];
-	unsigned int kill_count;
-
 	/* set if we encounter something we can't handle yet, so we
 	 * can bail cleanly and fallback to TGSI compiler f/e
 	 */
@@ -118,6 +124,9 @@
 };
 
 
+static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
+static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock);
+
 static struct nir_shader *to_nir(const struct tgsi_token *tokens)
 {
 	struct nir_shader_compiler_options options = {
@@ -140,12 +149,14 @@
 	nir_opt_global_to_local(s);
 	nir_convert_to_ssa(s);
 	nir_lower_idiv(s);
+	nir_lower_load_const_to_scalar(s);
 
 	do {
 		progress = false;
 
 		nir_lower_vars_to_ssa(s);
 		nir_lower_alu_to_scalar(s);
+		nir_lower_phis_to_scalar(s);
 
 		progress |= nir_copy_prop(s);
 		progress |= nir_opt_dce(s);
@@ -170,7 +181,8 @@
 
 /* TODO nir doesn't lower everything for us yet, but ideally it would: */
 static const struct tgsi_token *
-lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so)
+lower_tgsi(struct ir3_compile *ctx, const struct tgsi_token *tokens,
+		struct ir3_shader_variant *so)
 {
 	struct tgsi_shader_info info;
 	struct tgsi_lowering_config lconfig = {
@@ -192,11 +204,7 @@
 		break;
 	}
 
-	if (!so->shader) {
-		/* hack for standalone compiler which does not have
-		 * screen/context:
-		 */
-	} else if (ir3_shader_gpuid(so->shader) >= 400) {
+	if (ctx->compiler->gpu_id >= 400) {
 		/* a4xx seems to have *no* sam.p */
 		lconfig.lower_TXP = ~0;  /* lower all txp */
 	} else {
@@ -208,36 +216,26 @@
 }
 
 static struct ir3_compile *
-compile_init(struct ir3_shader_variant *so,
+compile_init(struct ir3_compiler *compiler,
+		struct ir3_shader_variant *so,
 		const struct tgsi_token *tokens)
 {
 	struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile);
 	const struct tgsi_token *lowered_tokens;
 
-	if (!so->shader) {
-		/* hack for standalone compiler which does not have
-		 * screen/context:
-		 */
-	} else if (ir3_shader_gpuid(so->shader) >= 400) {
+	if (compiler->gpu_id >= 400) {
 		/* need special handling for "flat" */
 		ctx->flat_bypass = true;
 		ctx->levels_add_one = false;
+		ctx->unminify_coords = false;
 	} else {
 		/* no special handling for "flat" */
 		ctx->flat_bypass = false;
 		ctx->levels_add_one = true;
+		ctx->unminify_coords = true;
 	}
 
-	switch (so->type) {
-	case SHADER_FRAGMENT:
-	case SHADER_COMPUTE:
-		ctx->integer_s = so->key.finteger_s;
-		break;
-	case SHADER_VERTEX:
-		ctx->integer_s = so->key.vinteger_s;
-		break;
-	}
-
+	ctx->compiler = compiler;
 	ctx->ir = so->ir;
 	ctx->so = so;
 	ctx->next_inloc = 8;
@@ -247,8 +245,10 @@
 			_mesa_hash_pointer, _mesa_key_pointer_equal);
 	ctx->addr_ht = _mesa_hash_table_create(ctx,
 			_mesa_hash_pointer, _mesa_key_pointer_equal);
+	ctx->block_ht = _mesa_hash_table_create(ctx,
+			_mesa_hash_pointer, _mesa_key_pointer_equal);
 
-	lowered_tokens = lower_tgsi(tokens, so);
+	lowered_tokens = lower_tgsi(ctx, tokens, so);
 	if (!lowered_tokens)
 		lowered_tokens = tokens;
 	ctx->s = to_nir(lowered_tokens);
@@ -258,13 +258,29 @@
 
 	so->first_driver_param = so->first_immediate = ctx->s->num_uniforms;
 
-	/* one (vec4) slot for vertex id base: */
-	if (so->type == SHADER_VERTEX)
-		so->first_immediate++;
+	/* Layout of constant registers:
+	 *
+	 *    num_uniform * vec4  -  user consts
+	 *    4 * vec4            -  UBO addresses
+	 *    if (vertex shader) {
+	 *        1 * vec4        -  driver params (IR3_DP_*)
+	 *        1 * vec4        -  stream-out addresses
+	 *    }
+	 *
+	 * TODO this could be made more dynamic, to at least skip sections
+	 * that we don't need..
+	 */
 
 	/* reserve 4 (vec4) slots for ubo base addresses: */
 	so->first_immediate += 4;
 
+	if (so->type == SHADER_VERTEX) {
+		/* one (vec4) slot for driver params (see ir3_driver_param): */
+		so->first_immediate++;
+		/* one (vec4) slot for stream-output base addresses: */
+		so->first_immediate++;
+	}
+
 	return ctx;
 }
 
@@ -290,33 +306,206 @@
 	ralloc_free(ctx);
 }
 
-
+/* global per-array information: */
 struct ir3_array {
 	unsigned length, aid;
+};
+
+/* per-block array state: */
+struct ir3_array_value {
+	/* TODO drop length/aid, and just have ptr back to ir3_array */
+	unsigned length, aid;
+	/* initial array element values are phi's, other than for the
+	 * entry block.  The phi src's get added later in a resolve step
+	 * after we have visited all the blocks, to account for back
+	 * edges in the cfg.
+	 */
+	struct ir3_instruction **phis;
+	/* current array element values (as block is processed).  When
+	 * the array phi's are resolved, it will contain the array state
+	 * at exit of block, so successor blocks can use it to add their
+	 * phi srcs.
+	 */
 	struct ir3_instruction *arr[];
 };
 
+/* track array assignments per basic block.  When an array is read
+ * outside of the same basic block, we can use NIR's dominance-frontier
+ * information to figure out where phi nodes are needed.
+ */
+struct ir3_nir_block_data {
+	unsigned foo;
+	/* indexed by array-id (aid): */
+	struct ir3_array_value *arrs[];
+};
+
+static struct ir3_nir_block_data *
+get_block_data(struct ir3_compile *ctx, struct ir3_block *block)
+{
+	if (!block->bd) {
+		struct ir3_nir_block_data *bd = ralloc_size(ctx, sizeof(*bd) +
+				((ctx->num_arrays + 1) * sizeof(bd->arrs[0])));
+		block->bd = bd;
+	}
+	return block->bd;
+}
+
 static void
 declare_var(struct ir3_compile *ctx, nir_variable *var)
 {
 	unsigned length = glsl_get_length(var->type) * 4;  /* always vec4, at least with ttn */
-	struct ir3_array *arr = ralloc_size(ctx, sizeof(*arr) +
-			(length * sizeof(arr->arr[0])));
+	struct ir3_array *arr = ralloc(ctx, struct ir3_array);
 	arr->length = length;
 	arr->aid = ++ctx->num_arrays;
-	/* Some shaders end up reading array elements without first writing..
-	 * so initialize things to prevent null instr ptrs later:
-	 */
-	for (unsigned i = 0; i < length; i++)
-		arr->arr[i] = create_immed(ctx->block, 0);
 	_mesa_hash_table_insert(ctx->var_ht, var, arr);
 }
 
-static struct ir3_array *
+static nir_block *
+nir_block_pred(nir_block *block)
+{
+	assert(block->predecessors->entries < 2);
+	if (block->predecessors->entries == 0)
+		return NULL;
+	return (nir_block *)_mesa_set_next_entry(block->predecessors, NULL)->key;
+}
+
+static struct ir3_array_value *
 get_var(struct ir3_compile *ctx, nir_variable *var)
 {
 	struct hash_entry *entry = _mesa_hash_table_search(ctx->var_ht, var);
-	return entry->data;
+	struct ir3_block *block = ctx->block;
+	struct ir3_nir_block_data *bd = get_block_data(ctx, block);
+	struct ir3_array *arr = entry->data;
+
+	if (!bd->arrs[arr->aid]) {
+		struct ir3_array_value *av = ralloc_size(bd, sizeof(*av) +
+				(arr->length * sizeof(av->arr[0])));
+		struct ir3_array_value *defn = NULL;
+		nir_block *pred_block;
+
+		av->length = arr->length;
+		av->aid = arr->aid;
+
+		/* For loops, we have to consider that we have not visited some
+		 * of the blocks who should feed into the phi (ie. back-edges in
+		 * the cfg).. for example:
+		 *
+		 *   loop {
+		 *      block { load_var; ... }
+		 *      if then block {} else block {}
+		 *      block { store_var; ... }
+		 *      if then block {} else block {}
+		 *      block {...}
+		 *   }
+		 *
+		 * We can skip the phi if we can chase the block predecessors
+		 * until finding the block previously defining the array without
+		 * crossing a block that has more than one predecessor.
+		 *
+		 * Otherwise create phi's and resolve them as a post-pass after
+		 * all the blocks have been visited (to handle back-edges).
+		 */
+
+		for (pred_block = block->nblock;
+				pred_block && (pred_block->predecessors->entries < 2) && !defn;
+				pred_block = nir_block_pred(pred_block)) {
+			struct ir3_block *pblock = get_block(ctx, pred_block);
+			struct ir3_nir_block_data *pbd = pblock->bd;
+			if (!pbd)
+				continue;
+			defn = pbd->arrs[arr->aid];
+		}
+
+		if (defn) {
+			/* only one possible definer: */
+			for (unsigned i = 0; i < arr->length; i++)
+				av->arr[i] = defn->arr[i];
+		} else if (pred_block) {
+			/* not the first block, and multiple potential definers: */
+			av->phis = ralloc_size(av, arr->length * sizeof(av->phis[0]));
+
+			for (unsigned i = 0; i < arr->length; i++) {
+				struct ir3_instruction *phi;
+
+				phi = ir3_instr_create2(block, -1, OPC_META_PHI,
+						1 + ctx->impl->num_blocks);
+				ir3_reg_create(phi, 0, 0);         /* dst */
+
+				/* phi's should go at head of block: */
+				list_delinit(&phi->node);
+				list_add(&phi->node, &block->instr_list);
+
+				av->phis[i] = av->arr[i] = phi;
+			}
+		} else {
+			/* Some shaders end up reading array elements without
+			 * first writing.. so initialize things to prevent null
+			 * instr ptrs later:
+			 */
+			for (unsigned i = 0; i < arr->length; i++)
+				av->arr[i] = create_immed(block, 0);
+		}
+
+		bd->arrs[arr->aid] = av;
+	}
+
+	return bd->arrs[arr->aid];
+}
+
+static void
+add_array_phi_srcs(struct ir3_compile *ctx, nir_block *nblock,
+		struct ir3_array_value *av, BITSET_WORD *visited)
+{
+	struct ir3_block *block;
+	struct ir3_nir_block_data *bd;
+
+	if (BITSET_TEST(visited, nblock->index))
+		return;
+
+	BITSET_SET(visited, nblock->index);
+
+	block = get_block(ctx, nblock);
+	bd = block->bd;
+
+	if (bd && bd->arrs[av->aid]) {
+		struct ir3_array_value *dav = bd->arrs[av->aid];
+		for (unsigned i = 0; i < av->length; i++) {
+			ir3_reg_create(av->phis[i], 0, IR3_REG_SSA)->instr =
+					dav->arr[i];
+		}
+	} else {
+		/* didn't find defn, recurse predecessors: */
+		struct set_entry *entry;
+		set_foreach(nblock->predecessors, entry) {
+			add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
+		}
+	}
+}
+
+static void
+resolve_array_phis(struct ir3_compile *ctx, struct ir3_block *block)
+{
+	struct ir3_nir_block_data *bd = block->bd;
+	unsigned bitset_words = BITSET_WORDS(ctx->impl->num_blocks);
+
+	if (!bd)
+		return;
+
+	/* TODO use nir dom_frontier to help us with this? */
+
+	for (unsigned i = 1; i <= ctx->num_arrays; i++) {
+		struct ir3_array_value *av = bd->arrs[i];
+		BITSET_WORD visited[bitset_words];
+		struct set_entry *entry;
+
+		if (!(av && av->phis))
+			continue;
+
+		memset(visited, 0, sizeof(visited));
+		set_foreach(block->nblock->predecessors, entry) {
+			add_array_phi_srcs(ctx, (nir_block *)entry->key, av, visited);
+		}
+	}
 }
 
 /* allocate a n element value array (to be populated by caller) and
@@ -393,7 +582,8 @@
 	instr->regs[1]->flags |= IR3_REG_HALF;
 
 	instr = ir3_MOV(block, instr, TYPE_S16);
-	instr->regs[0]->flags |= IR3_REG_ADDR | IR3_REG_HALF;
+	instr->regs[0]->num = regid(REG_A0, 0);
+	instr->regs[0]->flags |= IR3_REG_HALF;
 	instr->regs[1]->flags |= IR3_REG_HALF;
 
 	return instr;
@@ -419,6 +609,22 @@
 }
 
 static struct ir3_instruction *
+get_predicate(struct ir3_compile *ctx, struct ir3_instruction *src)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction *cond;
+
+	/* NOTE: only cmps.*.* can write p0.x: */
+	cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0);
+	cond->cat2.condition = IR3_COND_NE;
+
+	/* condition always goes in predicate register: */
+	cond->regs[0]->num = regid(REG_P0, 0);
+
+	return cond;
+}
+
+static struct ir3_instruction *
 create_uniform(struct ir3_compile *ctx, unsigned n)
 {
 	struct ir3_instruction *mov;
@@ -444,9 +650,8 @@
 	mov->cat1.dst_type = TYPE_U32;
 	ir3_reg_create(mov, 0, 0);
 	ir3_reg_create(mov, n, IR3_REG_CONST | IR3_REG_RELATIV);
-	mov->address = address;
 
-	array_insert(ctx->ir->indirects, mov);
+	ir3_instr_set_address(mov, address);
 
 	return mov;
 }
@@ -461,7 +666,7 @@
 		return NULL;
 
 	collect = ir3_instr_create2(block, -1, OPC_META_FI, 1 + arrsz);
-	ir3_reg_create(collect, 0, 0);
+	ir3_reg_create(collect, 0, 0);     /* dst */
 	for (unsigned i = 0; i < arrsz; i++)
 		ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = arr[i];
 
@@ -484,9 +689,8 @@
 	src->instr = collect;
 	src->size  = arrsz;
 	src->offset = n;
-	mov->address = address;
 
-	array_insert(ctx->ir->indirects, mov);
+	ir3_instr_set_address(mov, address);
 
 	return mov;
 }
@@ -507,25 +711,21 @@
 	dst->size  = arrsz;
 	dst->offset = n;
 	ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
-	mov->address = address;
 	mov->fanin = collect;
 
-	array_insert(ctx->ir->indirects, mov);
+	ir3_instr_set_address(mov, address);
 
 	return mov;
 }
 
 static struct ir3_instruction *
-create_input(struct ir3_block *block, struct ir3_instruction *instr,
-		unsigned n)
+create_input(struct ir3_block *block, unsigned n)
 {
 	struct ir3_instruction *in;
 
 	in = ir3_instr_create(block, -1, OPC_META_INPUT);
 	in->inout.block = block;
 	ir3_reg_create(in, n, 0);
-	if (instr)
-		ir3_reg_create(in, 0, IR3_REG_SSA)->instr = instr;
 
 	return in;
 }
@@ -557,7 +757,7 @@
 
 	compile_assert(ctx, !ctx->frag_coord[comp]);
 
-	ctx->frag_coord[comp] = create_input(ctx->block, NULL, 0);
+	ctx->frag_coord[comp] = create_input(ctx->block, 0);
 
 	switch (comp) {
 	case 0: /* .x */
@@ -596,7 +796,8 @@
 	case 0: /* .x */
 		compile_assert(ctx, !ctx->frag_face);
 
-		ctx->frag_face = create_input(block, NULL, 0);
+		ctx->frag_face = create_input(block, 0);
+		ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
 
 		/* for faceness, we always get -1 or 0 (int).. but TGSI expects
 		 * positive vs negative float.. and piglit further seems to
@@ -623,15 +824,23 @@
 	}
 }
 
+static struct ir3_instruction *
+create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp)
+{
+	/* first four vec4 sysval's reserved for UBOs: */
+	unsigned r = regid(ctx->so->first_driver_param + 4, dp);
+	return create_uniform(ctx, r);
+}
+
 /* helper for instructions that produce multiple consecutive scalar
  * outputs which need to have a split/fanout meta instruction inserted
  */
 static void
 split_dest(struct ir3_block *block, struct ir3_instruction **dst,
-		struct ir3_instruction *src)
+		struct ir3_instruction *src, unsigned n)
 {
 	struct ir3_instruction *prev = NULL;
-	for (int i = 0, j = 0; i < 4; i++) {
+	for (int i = 0, j = 0; i < n; i++) {
 		struct ir3_instruction *split =
 				ir3_instr_create(block, -1, OPC_META_FO);
 		ir3_reg_create(split, 0, IR3_REG_SSA);
@@ -882,9 +1091,15 @@
 	case nir_op_imax:
 		dst[0] = ir3_MAX_S(b, src[0], 0, src[1], 0);
 		break;
+	case nir_op_umax:
+		dst[0] = ir3_MAX_U(b, src[0], 0, src[1], 0);
+		break;
 	case nir_op_imin:
 		dst[0] = ir3_MIN_S(b, src[0], 0, src[1], 0);
 		break;
+	case nir_op_umin:
+		dst[0] = ir3_MIN_U(b, src[0], 0, src[1], 0);
+		break;
 	case nir_op_imul:
 		/*
 		 * dst = (al * bl) + (ah * bl << 16) + (al * bh << 16)
@@ -1018,7 +1233,7 @@
 		struct ir3_instruction *load =
 				ir3_LDG(b, addr, 0, create_immed(b, 1), 0);
 		load->cat6.type = TYPE_U32;
-		load->cat6.offset = off + i * 4;    /* byte offset */
+		load->cat6.src_offset = off + i * 4;     /* byte offset */
 		dst[i] = load;
 	}
 }
@@ -1030,7 +1245,7 @@
 {
 	nir_deref_var *dvar = intr->variables[0];
 	nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
-	struct ir3_array *arr = get_var(ctx, dvar->var);
+	struct ir3_array_value *arr = get_var(ctx, dvar->var);
 
 	compile_assert(ctx, dvar->deref.child &&
 		(dvar->deref.child->deref_type == nir_deref_type_array));
@@ -1070,7 +1285,7 @@
 {
 	nir_deref_var *dvar = intr->variables[0];
 	nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
-	struct ir3_array *arr = get_var(ctx, dvar->var);
+	struct ir3_array_value *arr = get_var(ctx, dvar->var);
 	struct ir3_instruction **src;
 
 	compile_assert(ctx, dvar->deref.child &&
@@ -1107,7 +1322,7 @@
 			 * store_output_indirect? or move this into
 			 * create_indirect_store()?
 			 */
-			for (int j = i; j < arr->length; j += 4) {
+			for (int j = i; j < arr->length; j += intr->num_components) {
 				struct ir3_instruction *split;
 
 				split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
@@ -1118,6 +1333,13 @@
 				arr->arr[j] = split;
 			}
 		}
+		/* fixup fanout/split neighbors: */
+		for (int i = 0; i < arr->length; i++) {
+			arr->arr[i]->cp.right = (i < (arr->length - 1)) ?
+					arr->arr[i+1] : NULL;
+			arr->arr[i]->cp.left = (i > 0) ?
+					arr->arr[i-1] : NULL;
+		}
 		break;
 	}
 	default:
@@ -1140,8 +1362,8 @@
 	so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
 	so->total_in++;
 
-	ctx->block->ninputs = MAX2(ctx->block->ninputs, r + 1);
-	ctx->block->inputs[r] = instr;
+	ctx->ir->ninputs = MAX2(ctx->ir->ninputs, r + 1);
+	ctx->ir->inputs[r] = instr;
 }
 
 static void
@@ -1154,45 +1376,49 @@
 
 	if (info->has_dest) {
 		dst = get_dst(ctx, &intr->dest, intr->num_components);
+	} else {
+		dst = NULL;
 	}
 
 	switch (intr->intrinsic) {
 	case nir_intrinsic_load_uniform:
-		compile_assert(ctx, intr->const_index[1] == 1);
 		for (int i = 0; i < intr->num_components; i++) {
 			unsigned n = idx * 4 + i;
 			dst[i] = create_uniform(ctx, n);
 		}
 		break;
 	case nir_intrinsic_load_uniform_indirect:
-		compile_assert(ctx, intr->const_index[1] == 1);
 		src = get_src(ctx, &intr->src[0]);
 		for (int i = 0; i < intr->num_components; i++) {
 			unsigned n = idx * 4 + i;
 			dst[i] = create_uniform_indirect(ctx, n,
 					get_addr(ctx, src[0]));
 		}
+		/* NOTE: if relative addressing is used, we set constlen in
+		 * the compiler (to worst-case value) since we don't know in
+		 * the assembler what the max addr reg value can be:
+		 */
+		ctx->so->constlen = ctx->s->num_uniforms;
 		break;
 	case nir_intrinsic_load_ubo:
 	case nir_intrinsic_load_ubo_indirect:
 		emit_intrinsic_load_ubo(ctx, intr, dst);
 		break;
 	case nir_intrinsic_load_input:
-		compile_assert(ctx, intr->const_index[1] == 1);
 		for (int i = 0; i < intr->num_components; i++) {
 			unsigned n = idx * 4 + i;
-			dst[i] = b->inputs[n];
+			dst[i] = ctx->ir->inputs[n];
 		}
 		break;
 	case nir_intrinsic_load_input_indirect:
-		compile_assert(ctx, intr->const_index[1] == 1);
 		src = get_src(ctx, &intr->src[0]);
 		struct ir3_instruction *collect =
-				create_collect(b, b->inputs, b->ninputs);
+				create_collect(b, ctx->ir->inputs, ctx->ir->ninputs);
 		struct ir3_instruction *addr = get_addr(ctx, src[0]);
 		for (int i = 0; i < intr->num_components; i++) {
 			unsigned n = idx * 4 + i;
-			dst[i] = create_indirect_load(ctx, b->ninputs, n, addr, collect);
+			dst[i] = create_indirect_load(ctx, ctx->ir->ninputs,
+					n, addr, collect);
 		}
 		break;
 	case nir_intrinsic_load_var:
@@ -1202,18 +1428,15 @@
 		emit_intrinisic_store_var(ctx, intr);
 		break;
 	case nir_intrinsic_store_output:
-		compile_assert(ctx, intr->const_index[1] == 1);
 		src = get_src(ctx, &intr->src[0]);
 		for (int i = 0; i < intr->num_components; i++) {
 			unsigned n = idx * 4 + i;
-			b->outputs[n] = src[i];
+			ctx->ir->outputs[n] = src[i];
 		}
 		break;
 	case nir_intrinsic_load_base_vertex:
 		if (!ctx->basevertex) {
-			/* first four vec4 sysval's reserved for UBOs: */
-			unsigned r = regid(ctx->so->first_driver_param + 4, 0);
-			ctx->basevertex = create_uniform(ctx, r);
+			ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE);
 			add_sysval_input(ctx, TGSI_SEMANTIC_BASEVERTEX,
 					ctx->basevertex);
 		}
@@ -1221,7 +1444,7 @@
 		break;
 	case nir_intrinsic_load_vertex_id_zero_base:
 		if (!ctx->vertex_id) {
-			ctx->vertex_id = create_input(ctx->block, NULL, 0);
+			ctx->vertex_id = create_input(ctx->block, 0);
 			add_sysval_input(ctx, TGSI_SEMANTIC_VERTEXID_NOBASE,
 					ctx->vertex_id);
 		}
@@ -1229,7 +1452,7 @@
 		break;
 	case nir_intrinsic_load_instance_id:
 		if (!ctx->instance_id) {
-			ctx->instance_id = create_input(ctx->block, NULL, 0);
+			ctx->instance_id = create_input(ctx->block, 0);
 			add_sysval_input(ctx, TGSI_SEMANTIC_INSTANCEID,
 					ctx->instance_id);
 		}
@@ -1248,6 +1471,7 @@
 			cond = create_immed(b, 1);
 		}
 
+		/* NOTE: only cmps.*.* can write p0.x: */
 		cond = ir3_CMPS_S(b, cond, 0, create_immed(b, 0), 0);
 		cond->cat2.condition = IR3_COND_NE;
 
@@ -1255,8 +1479,9 @@
 		cond->regs[0]->num = regid(REG_P0, 0);
 
 		kill = ir3_KILL(b, cond, 0);
+		array_insert(ctx->ir->predicates, kill);
 
-		ctx->kill[ctx->kill_count++] = kill;
+		array_insert(ctx->ir->keeps, kill);
 		ctx->so->has_kill = true;
 
 		break;
@@ -1318,6 +1543,8 @@
 		coords = 3;
 		flags |= IR3_INSTR_3D;
 		break;
+	default:
+		unreachable("bad sampler_dim");
 	}
 
 	if (tex->is_shadow)
@@ -1340,7 +1567,10 @@
 	unsigned i, coords, flags;
 	unsigned nsrc0 = 0, nsrc1 = 0;
 	type_t type;
-	opc_t opc;
+	opc_t opc = 0;
+
+	coord = off = ddx = ddy = NULL;
+	lod = proj = compare = NULL;
 
 	/* TODO: might just be one component for gathers? */
 	dst = get_dst(ctx, &tex->dest, 4);
@@ -1400,11 +1630,17 @@
 	tex_info(tex, &flags, &coords);
 
 	/* scale up integer coords for TXF based on the LOD */
-	if (opc == OPC_ISAML) {
+	if (ctx->unminify_coords && (opc == OPC_ISAML)) {
 		assert(has_lod);
 		for (i = 0; i < coords; i++)
 			coord[i] = ir3_SHL_B(b, coord[i], 0, lod, 0);
 	}
+
+	/* the array coord for cube arrays needs 0.5 added to it */
+	if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array &&
+		opc != OPC_ISAML)
+		coord[3] = ir3_ADD_F(b, coord[3], 0, create_immed(b, fui(0.5)), 0);
+
 	/*
 	 * lay out the first argument in the proper order:
 	 *  - actual coordinates first
@@ -1484,6 +1720,8 @@
 	case nir_type_bool:
 		type = TYPE_U32;
 		break;
+	default:
+		unreachable("bad dest_type");
 	}
 
 	sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW,
@@ -1491,7 +1729,7 @@
 			create_collect(b, src0, nsrc0),
 			create_collect(b, src1, nsrc1));
 
-	split_dest(b, dst, sam);
+	split_dest(b, dst, sam, 4);
 }
 
 static void
@@ -1508,7 +1746,7 @@
 	/* even though there is only one component, since it ends
 	 * up in .z rather than .x, we need a split_dest()
 	 */
-	split_dest(b, dst, sam);
+	split_dest(b, dst, sam, 3);
 
 	/* The # of levels comes from getinfo.z. We need to add 1 to it, since
 	 * the value in TEX_CONST_0 is zero-based.
@@ -1526,6 +1764,12 @@
 
 	tex_info(tex, &flags, &coords);
 
+	/* Actually we want the number of dimensions, not coordinates. This
+	 * distinction only matters for cubes.
+	 */
+	if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
+		coords = 2;
+
 	dst = get_dst(ctx, &tex->dest, 4);
 
 	compile_assert(ctx, tex->num_srcs == 1);
@@ -1536,7 +1780,7 @@
 	sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags,
 			tex->sampler_index, tex->sampler_index, lod, NULL);
 
-	split_dest(b, dst, sam);
+	split_dest(b, dst, sam, 4);
 
 	/* Array size actually ends up in .w rather than .z. This doesn't
 	 * matter for miplevel 0, but for higher mips the value in z is
@@ -1553,6 +1797,71 @@
 }
 
 static void
+emit_phi(struct ir3_compile *ctx, nir_phi_instr *nphi)
+{
+	struct ir3_instruction *phi, **dst;
+
+	/* NOTE: phi's should be lowered to scalar at this point */
+	compile_assert(ctx, nphi->dest.ssa.num_components == 1);
+
+	dst = get_dst(ctx, &nphi->dest, 1);
+
+	phi = ir3_instr_create2(ctx->block, -1, OPC_META_PHI,
+			1 + exec_list_length(&nphi->srcs));
+	ir3_reg_create(phi, 0, 0);         /* dst */
+	phi->phi.nphi = nphi;
+
+	dst[0] = phi;
+}
+
+/* phi instructions are left partially constructed.  We don't resolve
+ * their srcs until the end of the block, since (eg. loops) one of
+ * the phi's srcs might be defined after the phi due to back edges in
+ * the CFG.
+ */
+static void
+resolve_phis(struct ir3_compile *ctx, struct ir3_block *block)
+{
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		nir_phi_instr *nphi;
+
+		/* phi's only come at start of block: */
+		if (!(is_meta(instr) && (instr->opc == OPC_META_PHI)))
+			break;
+
+		if (!instr->phi.nphi)
+			break;
+
+		nphi = instr->phi.nphi;
+		instr->phi.nphi = NULL;
+
+		foreach_list_typed(nir_phi_src, nsrc, node, &nphi->srcs) {
+			struct ir3_instruction *src = get_src(ctx, &nsrc->src)[0];
+			ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+		}
+	}
+
+	resolve_array_phis(ctx, block);
+}
+
+static void
+emit_jump(struct ir3_compile *ctx, nir_jump_instr *jump)
+{
+	switch (jump->type) {
+	case nir_jump_break:
+	case nir_jump_continue:
+		/* I *think* we can simply just ignore this, and use the
+		 * successor block link to figure out where we need to
+		 * jump to for break/continue
+		 */
+		break;
+	default:
+		compile_error(ctx, "Unhandled NIR jump type: %d\n", jump->type);
+		break;
+	}
+}
+
+static void
 emit_instr(struct ir3_compile *ctx, nir_instr *instr)
 {
 	switch (instr->type) {
@@ -1585,44 +1894,238 @@
 		}
 		break;
 	}
-	case nir_instr_type_call:
-	case nir_instr_type_jump:
 	case nir_instr_type_phi:
+		emit_phi(ctx, nir_instr_as_phi(instr));
+		break;
+	case nir_instr_type_jump:
+		emit_jump(ctx, nir_instr_as_jump(instr));
+		break;
+	case nir_instr_type_call:
 	case nir_instr_type_parallel_copy:
 		compile_error(ctx, "Unhandled NIR instruction type: %d\n", instr->type);
 		break;
 	}
 }
 
+static struct ir3_block *
+get_block(struct ir3_compile *ctx, nir_block *nblock)
+{
+	struct ir3_block *block;
+	struct hash_entry *entry;
+	entry = _mesa_hash_table_search(ctx->block_ht, nblock);
+	if (entry)
+		return entry->data;
+
+	block = ir3_block_create(ctx->ir);
+	block->nblock = nblock;
+	_mesa_hash_table_insert(ctx->block_ht, nblock, block);
+
+	return block;
+}
+
 static void
-emit_block(struct ir3_compile *ctx, nir_block *block)
+emit_block(struct ir3_compile *ctx, nir_block *nblock)
 {
-	nir_foreach_instr(block, instr) {
+	struct ir3_block *block = get_block(ctx, nblock);
+
+	for (int i = 0; i < ARRAY_SIZE(block->successors); i++) {
+		if (nblock->successors[i]) {
+			block->successors[i] =
+				get_block(ctx, nblock->successors[i]);
+		}
+	}
+
+	ctx->block = block;
+	list_addtail(&block->node, &ctx->ir->block_list);
+
+	nir_foreach_instr(nblock, instr) {
 		emit_instr(ctx, instr);
 		if (ctx->error)
 			return;
 	}
 }
 
+static void emit_cf_list(struct ir3_compile *ctx, struct exec_list *list);
+
 static void
-emit_function(struct ir3_compile *ctx, nir_function_impl *impl)
+emit_if(struct ir3_compile *ctx, nir_if *nif)
+{
+	struct ir3_instruction *condition = get_src(ctx, &nif->condition)[0];
+
+	ctx->block->condition =
+		get_predicate(ctx, ir3_b2n(condition->block, condition));
+
+	emit_cf_list(ctx, &nif->then_list);
+	emit_cf_list(ctx, &nif->else_list);
+}
+
+static void
+emit_loop(struct ir3_compile *ctx, nir_loop *nloop)
 {
-	foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+	emit_cf_list(ctx, &nloop->body);
+}
+
+static void
+emit_cf_list(struct ir3_compile *ctx, struct exec_list *list)
+{
+	foreach_list_typed(nir_cf_node, node, node, list) {
 		switch (node->type) {
 		case nir_cf_node_block:
 			emit_block(ctx, nir_cf_node_as_block(node));
 			break;
 		case nir_cf_node_if:
+			emit_if(ctx, nir_cf_node_as_if(node));
+			break;
 		case nir_cf_node_loop:
+			emit_loop(ctx, nir_cf_node_as_loop(node));
+			break;
 		case nir_cf_node_function:
 			compile_error(ctx, "TODO\n");
 			break;
 		}
-		if (ctx->error)
-			return;
 	}
 }
 
+/* emit stream-out code.  At this point, the current block is the original
+ * (nir) end block, and nir ensures that all flow control paths terminate
+ * into the end block.  We re-purpose the original end block to generate
+ * the 'if (vtxcnt < maxvtxcnt)' condition, then append the conditional
+ * block holding stream-out write instructions, followed by the new end
+ * block:
+ *
+ *   blockOrigEnd {
+ *      p0.x = (vtxcnt < maxvtxcnt)
+ *      // succs: blockStreamOut, blockNewEnd
+ *   }
+ *   blockStreamOut {
+ *      ... stream-out instructions ...
+ *      // succs: blockNewEnd
+ *   }
+ *   blockNewEnd {
+ *   }
+ */
+static void
+emit_stream_out(struct ir3_compile *ctx)
+{
+	struct ir3_shader_variant *v = ctx->so;
+	struct ir3 *ir = ctx->ir;
+	struct pipe_stream_output_info *strmout =
+			&ctx->so->shader->stream_output;
+	struct ir3_block *orig_end_block, *stream_out_block, *new_end_block;
+	struct ir3_instruction *vtxcnt, *maxvtxcnt, *cond;
+	struct ir3_instruction *bases[PIPE_MAX_SO_BUFFERS];
+
+	/* create vtxcnt input in input block at top of shader,
+	 * so that it is seen as live over the entire duration
+	 * of the shader:
+	 */
+	vtxcnt = create_input(ctx->in_block, 0);
+	add_sysval_input(ctx, IR3_SEMANTIC_VTXCNT, vtxcnt);
+
+	maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX);
+
+	/* at this point, we are at the original 'end' block,
+	 * re-purpose this block to stream-out condition, then
+	 * append stream-out block and new-end block
+	 */
+	orig_end_block = ctx->block;
+
+	stream_out_block = ir3_block_create(ir);
+	list_addtail(&stream_out_block->node, &ir->block_list);
+
+	new_end_block = ir3_block_create(ir);
+	list_addtail(&new_end_block->node, &ir->block_list);
+
+	orig_end_block->successors[0] = stream_out_block;
+	orig_end_block->successors[1] = new_end_block;
+	stream_out_block->successors[0] = new_end_block;
+
+	/* setup 'if (vtxcnt < maxvtxcnt)' condition: */
+	cond = ir3_CMPS_S(ctx->block, vtxcnt, 0, maxvtxcnt, 0);
+	cond->regs[0]->num = regid(REG_P0, 0);
+	cond->cat2.condition = IR3_COND_LT;
+
+	/* condition goes on previous block to the conditional,
+	 * since it is used to pick which of the two successor
+	 * paths to take:
+	 */
+	orig_end_block->condition = cond;
+
+	/* switch to stream_out_block to generate the stream-out
+	 * instructions:
+	 */
+	ctx->block = stream_out_block;
+
+	/* Calculate base addresses based on vtxcnt.  Instructions
+	 * generated for bases not used in following loop will be
+	 * stripped out in the backend.
+	 */
+	for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
+		unsigned stride = strmout->stride[i];
+		struct ir3_instruction *base, *off;
+
+		base = create_uniform(ctx, regid(v->first_driver_param + 5, i));
+
+		/* 24-bit should be enough: */
+		off = ir3_MUL_U(ctx->block, vtxcnt, 0,
+				create_immed(ctx->block, stride * 4), 0);
+
+		bases[i] = ir3_ADD_S(ctx->block, off, 0, base, 0);
+	}
+
+	/* Generate the per-output store instructions: */
+	for (unsigned i = 0; i < strmout->num_outputs; i++) {
+		for (unsigned j = 0; j < strmout->output[i].num_components; j++) {
+			unsigned c = j + strmout->output[i].start_component;
+			struct ir3_instruction *base, *out, *stg;
+
+			base = bases[strmout->output[i].output_buffer];
+			out = ctx->ir->outputs[regid(strmout->output[i].register_index, c)];
+
+			stg = ir3_STG(ctx->block, base, 0, out, 0,
+					create_immed(ctx->block, 1), 0);
+			stg->cat6.type = TYPE_U32;
+			stg->cat6.dst_offset = (strmout->output[i].dst_offset + j) * 4;
+
+			array_insert(ctx->ir->keeps, stg);
+		}
+	}
+
+	/* and finally switch to the new_end_block: */
+	ctx->block = new_end_block;
+}
+
+static void
+emit_function(struct ir3_compile *ctx, nir_function_impl *impl)
+{
+	emit_cf_list(ctx, &impl->body);
+	emit_block(ctx, impl->end_block);
+
+	/* at this point, we should have a single empty block,
+	 * into which we emit the 'end' instruction.
+	 */
+	compile_assert(ctx, list_empty(&ctx->block->instr_list));
+
+	/* If stream-out (aka transform-feedback) enabled, emit the
+	 * stream-out instructions, followed by a new empty block (into
+	 * which the 'end' instruction lands).
+	 *
+	 * NOTE: it is done in this order, rather than inserting before
+	 * we emit end_block, because NIR guarantees that all blocks
+	 * flow into end_block, and that end_block has no successors.
+	 * So by re-purposing end_block as the first block of stream-
+	 * out, we guarantee that all exit paths flow into the stream-
+	 * out instructions.
+	 */
+	if ((ctx->so->shader->stream_output.num_outputs > 0) &&
+			!ctx->so->key.binning_pass) {
+		debug_assert(ctx->so->type == SHADER_VERTEX);
+		emit_stream_out(ctx);
+	}
+
+	ir3_END(ctx->block);
+}
+
 static void
 setup_input(struct ir3_compile *ctx, nir_variable *in)
 {
@@ -1634,7 +2137,7 @@
 	unsigned semantic_index = in->data.index;
 	unsigned n = in->data.driver_location;
 
-	DBG("; in: %u:%u, len=%ux%u, loc=%u\n",
+	DBG("; in: %u:%u, len=%ux%u, loc=%u",
 			semantic_name, semantic_index, array_len,
 			ncomp, n);
 
@@ -1705,10 +2208,10 @@
 						so->inputs[n].inloc + i - 8, use_ldlv);
 			}
 		} else {
-			instr = create_input(ctx->block, NULL, idx);
+			instr = create_input(ctx->block, idx);
 		}
 
-		ctx->block->inputs[idx] = instr;
+		ctx->ir->inputs[idx] = instr;
 	}
 
 	if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) {
@@ -1729,7 +2232,7 @@
 	unsigned n = out->data.driver_location;
 	unsigned comp = 0;
 
-	DBG("; out: %u:%u, len=%ux%u, loc=%u\n",
+	DBG("; out: %u:%u, len=%ux%u, loc=%u",
 			semantic_name, semantic_index, array_len,
 			ncomp, n);
 
@@ -1758,6 +2261,10 @@
 			so->writes_pos = true;
 			break;
 		case TGSI_SEMANTIC_COLOR:
+			if (semantic_index == -1) {
+				semantic_index = 0;
+				so->color0_mrt = 1;
+			}
 			break;
 		default:
 			compile_error(ctx, "unknown FS semantic name: %s\n",
@@ -1775,32 +2282,42 @@
 	for (int i = 0; i < ncomp; i++) {
 		unsigned idx = (n * 4) + i;
 
-		ctx->block->outputs[idx] = create_immed(ctx->block, fui(0.0));
+		ctx->ir->outputs[idx] = create_immed(ctx->block, fui(0.0));
 	}
 }
 
 static void
 emit_instructions(struct ir3_compile *ctx)
 {
-	unsigned ninputs  = exec_list_length(&ctx->s->inputs) * 4;
-	unsigned noutputs = exec_list_length(&ctx->s->outputs) * 4;
+	unsigned ninputs, noutputs;
+	nir_function_impl *fxn = NULL;
+
+	/* Find the main function: */
+	nir_foreach_overload(ctx->s, overload) {
+		compile_assert(ctx, strcmp(overload->function->name, "main") == 0);
+		compile_assert(ctx, overload->impl);
+		fxn = overload->impl;
+		break;
+	}
+
+	ninputs  = exec_list_length(&ctx->s->inputs) * 4;
+	noutputs = exec_list_length(&ctx->s->outputs) * 4;
 
-	/* we need to allocate big enough outputs array so that
-	 * we can stuff the kill's at the end.  Likewise for vtx
-	 * shaders, we need to leave room for sysvals:
+	/* or vtx shaders, we need to leave room for sysvals:
 	 */
-	if (ctx->so->type == SHADER_FRAGMENT) {
-		noutputs += ARRAY_SIZE(ctx->kill);
-	} else if (ctx->so->type == SHADER_VERTEX) {
+	if (ctx->so->type == SHADER_VERTEX) {
 		ninputs += 8;
 	}
 
-	ctx->block = ir3_block_create(ctx->ir, 0, ninputs, noutputs);
+	ctx->ir = ir3_create(ctx->compiler, ninputs, noutputs);
 
-	if (ctx->so->type == SHADER_FRAGMENT) {
-		ctx->block->noutputs -= ARRAY_SIZE(ctx->kill);
-	} else if (ctx->so->type == SHADER_VERTEX) {
-		ctx->block->ninputs -= 8;
+	/* Create inputs in first block: */
+	ctx->block = get_block(ctx, fxn->start_block);
+	ctx->in_block = ctx->block;
+	list_addtail(&ctx->block->node, &ctx->ir->block_list);
+
+	if (ctx->so->type == SHADER_VERTEX) {
+		ctx->ir->ninputs -= 8;
 	}
 
 	/* for fragment shader, we have a single input register (usually
@@ -1831,13 +2348,12 @@
 		declare_var(ctx, var);
 	}
 
-	/* Find the main function and emit the body: */
-	nir_foreach_overload(ctx->s, overload) {
-		compile_assert(ctx, strcmp(overload->function->name, "main") == 0);
-		compile_assert(ctx, overload->impl);
-		emit_function(ctx, overload->impl);
-		if (ctx->error)
-			return;
+	/* And emit the body: */
+	ctx->impl = fxn;
+	emit_function(ctx, fxn);
+
+	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+		resolve_phis(ctx, block);
 	}
 }
 
@@ -1850,12 +2366,12 @@
 fixup_frag_inputs(struct ir3_compile *ctx)
 {
 	struct ir3_shader_variant *so = ctx->so;
-	struct ir3_block *block = ctx->block;
+	struct ir3 *ir = ctx->ir;
 	struct ir3_instruction **inputs;
 	struct ir3_instruction *instr;
 	int n, regid = 0;
 
-	block->ninputs = 0;
+	ir->ninputs = 0;
 
 	n  = 4;  /* always have frag_pos */
 	n += COND(so->frag_face, 4);
@@ -1867,15 +2383,15 @@
 		/* this ultimately gets assigned to hr0.x so doesn't conflict
 		 * with frag_coord/frag_pos..
 		 */
-		inputs[block->ninputs++] = ctx->frag_face;
+		inputs[ir->ninputs++] = ctx->frag_face;
 		ctx->frag_face->regs[0]->num = 0;
 
 		/* remaining channels not used, but let's avoid confusing
 		 * other parts that expect inputs to come in groups of vec4
 		 */
-		inputs[block->ninputs++] = NULL;
-		inputs[block->ninputs++] = NULL;
-		inputs[block->ninputs++] = NULL;
+		inputs[ir->ninputs++] = NULL;
+		inputs[ir->ninputs++] = NULL;
+		inputs[ir->ninputs++] = NULL;
 	}
 
 	/* since we don't know where to set the regid for frag_coord,
@@ -1889,63 +2405,43 @@
 		ctx->frag_coord[2]->regs[0]->num = regid++;
 		ctx->frag_coord[3]->regs[0]->num = regid++;
 
-		inputs[block->ninputs++] = ctx->frag_coord[0];
-		inputs[block->ninputs++] = ctx->frag_coord[1];
-		inputs[block->ninputs++] = ctx->frag_coord[2];
-		inputs[block->ninputs++] = ctx->frag_coord[3];
+		inputs[ir->ninputs++] = ctx->frag_coord[0];
+		inputs[ir->ninputs++] = ctx->frag_coord[1];
+		inputs[ir->ninputs++] = ctx->frag_coord[2];
+		inputs[ir->ninputs++] = ctx->frag_coord[3];
 	}
 
 	/* we always have frag_pos: */
 	so->pos_regid = regid;
 
 	/* r0.x */
-	instr = create_input(block, NULL, block->ninputs);
+	instr = create_input(ctx->in_block, ir->ninputs);
 	instr->regs[0]->num = regid++;
-	inputs[block->ninputs++] = instr;
+	inputs[ir->ninputs++] = instr;
 	ctx->frag_pos->regs[1]->instr = instr;
 
 	/* r0.y */
-	instr = create_input(block, NULL, block->ninputs);
+	instr = create_input(ctx->in_block, ir->ninputs);
 	instr->regs[0]->num = regid++;
-	inputs[block->ninputs++] = instr;
+	inputs[ir->ninputs++] = instr;
 	ctx->frag_pos->regs[2]->instr = instr;
 
-	block->inputs = inputs;
-}
-
-static void
-compile_dump(struct ir3_compile *ctx)
-{
-	const char *name = (ctx->so->type == SHADER_VERTEX) ? "vert" : "frag";
-	static unsigned n = 0;
-	char fname[16];
-	FILE *f;
-	snprintf(fname, sizeof(fname), "%s-%04u.dot", name, n++);
-	f = fopen(fname, "w");
-	if (!f)
-		return;
-	ir3_block_depth(ctx->block);
-	ir3_dump(ctx->ir, name, ctx->block, f);
-	fclose(f);
+	ir->inputs = inputs;
 }
 
 int
-ir3_compile_shader_nir(struct ir3_shader_variant *so,
-		const struct tgsi_token *tokens, struct ir3_shader_key key)
+ir3_compile_shader_nir(struct ir3_compiler *compiler,
+		struct ir3_shader_variant *so)
 {
 	struct ir3_compile *ctx;
-	struct ir3_block *block;
+	struct ir3 *ir;
 	struct ir3_instruction **inputs;
 	unsigned i, j, actual_in;
 	int ret = 0, max_bary;
 
 	assert(!so->ir);
 
-	so->ir = ir3_create();
-
-	assert(so->ir);
-
-	ctx = compile_init(so, tokens);
+	ctx = compile_init(compiler, so, so->shader->tokens);
 	if (!ctx) {
 		DBG("INIT failed!");
 		ret = -1;
@@ -1960,18 +2456,17 @@
 		goto out;
 	}
 
-	block = ctx->block;
-	so->ir->block = block;
+	ir = so->ir = ctx->ir;
 
 	/* keep track of the inputs from TGSI perspective.. */
-	inputs = block->inputs;
+	inputs = ir->inputs;
 
 	/* but fixup actual inputs for frag shader: */
 	if (so->type == SHADER_FRAGMENT)
 		fixup_frag_inputs(ctx);
 
 	/* at this point, for binning pass, throw away unneeded outputs: */
-	if (key.binning_pass) {
+	if (so->key.binning_pass) {
 		for (i = 0, j = 0; i < so->outputs_count; i++) {
 			unsigned name = sem2name(so->outputs[i].semantic);
 			unsigned idx = sem2idx(so->outputs[i].semantic);
@@ -1981,71 +2476,67 @@
 					(name == TGSI_SEMANTIC_PSIZE))) {
 				if (i != j) {
 					so->outputs[j] = so->outputs[i];
-					block->outputs[(j*4)+0] = block->outputs[(i*4)+0];
-					block->outputs[(j*4)+1] = block->outputs[(i*4)+1];
-					block->outputs[(j*4)+2] = block->outputs[(i*4)+2];
-					block->outputs[(j*4)+3] = block->outputs[(i*4)+3];
+					ir->outputs[(j*4)+0] = ir->outputs[(i*4)+0];
+					ir->outputs[(j*4)+1] = ir->outputs[(i*4)+1];
+					ir->outputs[(j*4)+2] = ir->outputs[(i*4)+2];
+					ir->outputs[(j*4)+3] = ir->outputs[(i*4)+3];
 				}
 				j++;
 			}
 		}
 		so->outputs_count = j;
-		block->noutputs = j * 4;
+		ir->noutputs = j * 4;
 	}
 
 	/* if we want half-precision outputs, mark the output registers
 	 * as half:
 	 */
-	if (key.half_precision) {
-		for (i = 0; i < block->noutputs; i++) {
-			if (!block->outputs[i])
+	if (so->key.half_precision) {
+		for (i = 0; i < ir->noutputs; i++) {
+			struct ir3_instruction *out = ir->outputs[i];
+			if (!out)
 				continue;
-			block->outputs[i]->regs[0]->flags |= IR3_REG_HALF;
-		}
-	}
+			out->regs[0]->flags |= IR3_REG_HALF;
+			/* output could be a fanout (ie. texture fetch output)
+			 * in which case we need to propagate the half-reg flag
+			 * up to the definer so that RA sees it:
+			 */
+			if (is_meta(out) && (out->opc == OPC_META_FO)) {
+				out = out->regs[1]->instr;
+				out->regs[0]->flags |= IR3_REG_HALF;
+			}
 
-	/* at this point, we want the kill's in the outputs array too,
-	 * so that they get scheduled (since they have no dst).. we've
-	 * already ensured that the array is big enough in push_block():
-	 */
-	if (so->type == SHADER_FRAGMENT) {
-		for (i = 0; i < ctx->kill_count; i++)
-			block->outputs[block->noutputs++] = ctx->kill[i];
+			if (out->category == 1) {
+				out->cat1.dst_type = half_type(out->cat1.dst_type);
+			}
+		}
 	}
 
-	if (fd_mesa_debug & FD_DBG_OPTDUMP)
-		compile_dump(ctx);
-
 	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
 		printf("BEFORE CP:\n");
-		ir3_dump_instr_list(block->head);
+		ir3_print(ir);
 	}
 
-	ir3_block_depth(block);
-
-	ir3_block_cp(block);
+	ir3_cp(ir);
 
 	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
 		printf("BEFORE GROUPING:\n");
-		ir3_dump_instr_list(block->head);
+		ir3_print(ir);
 	}
 
 	/* Group left/right neighbors, inserting mov's where needed to
 	 * solve conflicts:
 	 */
-	ir3_block_group(block);
+	ir3_group(ir);
 
-	if (fd_mesa_debug & FD_DBG_OPTDUMP)
-		compile_dump(ctx);
-
-	ir3_block_depth(block);
+	ir3_depth(ir);
 
 	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
 		printf("AFTER DEPTH:\n");
-		ir3_dump_instr_list(block->head);
+		ir3_print(ir);
 	}
 
-	ret = ir3_block_sched(block);
+	ret = ir3_sched(ir);
 	if (ret) {
 		DBG("SCHED failed!");
 		goto out;
@@ -2053,10 +2544,10 @@
 
 	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
 		printf("AFTER SCHED:\n");
-		ir3_dump_instr_list(block->head);
+		ir3_print(ir);
 	}
 
-	ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face);
+	ret = ir3_ra(ir, so->type, so->frag_coord, so->frag_face);
 	if (ret) {
 		DBG("RA failed!");
 		goto out;
@@ -2064,14 +2555,19 @@
 
 	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
 		printf("AFTER RA:\n");
-		ir3_dump_instr_list(block->head);
+		ir3_print(ir);
 	}
 
-	ir3_block_legalize(block, &so->has_samp, &max_bary);
+	ir3_legalize(ir, &so->has_samp, &max_bary);
+
+	if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+		printf("AFTER LEGALIZE:\n");
+		ir3_print(ir);
+	}
 
 	/* fixup input/outputs: */
 	for (i = 0; i < so->outputs_count; i++) {
-		so->outputs[i].regid = block->outputs[i*4]->regs[0]->num;
+		so->outputs[i].regid = ir->outputs[i*4]->regs[0]->num;
 		/* preserve hack for depth output.. tgsi writes depth to .z,
 		 * but what we give the hw is the scalar register:
 		 */
@@ -2111,7 +2607,8 @@
 
 out:
 	if (ret) {
-		ir3_destroy(so->ir);
+		if (so->ir)
+			ir3_destroy(so->ir);
 		so->ir = NULL;
 	}
 	compile_free(ctx);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_cp.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_cp.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_cp.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_cp.c	2015-09-16 14:36:09.000000000 +0000
@@ -41,7 +41,7 @@
 		struct ir3_register *dst = instr->regs[0];
 		struct ir3_register *src = instr->regs[1];
 		struct ir3_instruction *src_instr = ssa(src);
-		if (dst->flags & (IR3_REG_ADDR | IR3_REG_RELATIV))
+		if (dst->flags & IR3_REG_RELATIV)
 			return false;
 		if (src->flags & IR3_REG_RELATIV)
 			return false;
@@ -54,6 +54,13 @@
 		/* TODO: remove this hack: */
 		if (is_meta(src_instr) && (src_instr->opc == OPC_META_FO))
 			return false;
+		/* TODO: we currently don't handle left/right neighbors
+		 * very well when inserting parallel-copies into phi..
+		 * to avoid problems don't eliminate a mov coming out
+		 * of phi..
+		 */
+		if (is_meta(src_instr) && (src_instr->opc == OPC_META_PHI))
+			return false;
 		return true;
 	}
 	return false;
@@ -284,7 +291,7 @@
 			instr->regs[n+1] = src_reg;
 
 			if (src_reg->flags & IR3_REG_RELATIV)
-				instr->address = reg->instr->address;
+				ir3_instr_set_address(instr, reg->instr->address);
 
 			return;
 		}
@@ -293,7 +300,7 @@
 				!conflicts(instr->address, reg->instr->address)) {
 			src_reg->flags = new_flags;
 			instr->regs[n+1] = src_reg;
-			instr->address = reg->instr->address;
+			ir3_instr_set_address(instr, reg->instr->address);
 
 			return;
 		}
@@ -354,13 +361,6 @@
 {
 	struct ir3_register *reg;
 
-	/* stay within the block.. don't try to operate across
-	 * basic block boundaries or we'll have problems when
-	 * dealing with multiple basic blocks:
-	 */
-	if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
-		return instr;
-
 	if (is_eligible_mov(instr, !!flags)) {
 		struct ir3_register *reg = instr->regs[1];
 		struct ir3_instruction *src_instr = ssa(reg);
@@ -389,27 +389,31 @@
 	}
 
 	if (instr->address)
-		instr->address = instr_cp(instr->address, NULL);
+		ir3_instr_set_address(instr, instr_cp(instr->address, NULL));
 
 	return instr;
 }
 
-static void block_cp(struct ir3_block *block)
+void
+ir3_cp(struct ir3 *ir)
 {
-	unsigned i;
+	ir3_clear_mark(ir);
 
-	for (i = 0; i < block->noutputs; i++) {
-		if (block->outputs[i]) {
+	for (unsigned i = 0; i < ir->noutputs; i++) {
+		if (ir->outputs[i]) {
 			struct ir3_instruction *out =
-					instr_cp(block->outputs[i], NULL);
+					instr_cp(ir->outputs[i], NULL);
 
-			block->outputs[i] = out;
+			ir->outputs[i] = out;
 		}
 	}
-}
 
-void ir3_block_cp(struct ir3_block *block)
-{
-	ir3_clear_mark(block->shader);
-	block_cp(block);
+	for (unsigned i = 0; i < ir->keeps_count; i++) {
+		ir->keeps[i] = instr_cp(ir->keeps[i], NULL);
+	}
+
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		if (block->condition)
+			block->condition = instr_cp(block->condition, NULL);
+	}
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_depth.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_depth.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_depth.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_depth.c	2015-09-16 14:36:09.000000000 +0000
@@ -84,25 +84,25 @@
 	}
 }
 
-static void insert_by_depth(struct ir3_instruction *instr)
+void
+ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list)
 {
-	struct ir3_block *block = instr->block;
-	struct ir3_instruction *n = block->head;
-	struct ir3_instruction *p = NULL;
-
-	while (n && (n != instr) && (n->depth > instr->depth)) {
-		p = n;
-		n = n->next;
-	}
+	/* remove from existing spot in list: */
+	list_delinit(&instr->node);
 
-	instr->next = n;
-	if (p)
-		p->next = instr;
-	else
-		block->head = instr;
+	/* find where to re-insert instruction: */
+	list_for_each_entry (struct ir3_instruction, pos, list, node) {
+		if (pos->depth > instr->depth) {
+			list_add(&instr->node, &pos->node);
+			return;
+		}
+	}
+	/* if we get here, we didn't find an insertion spot: */
+	list_addtail(&instr->node, list);
 }
 
-static void ir3_instr_depth(struct ir3_instruction *instr)
+static void
+ir3_instr_depth(struct ir3_instruction *instr)
 {
 	struct ir3_instruction *src;
 
@@ -123,47 +123,66 @@
 		instr->depth = MAX2(instr->depth, sd);
 	}
 
-	/* meta-instructions don't add cycles, other than PHI.. which
-	 * might translate to a real instruction..
-	 *
-	 * well, not entirely true, fan-in/out, etc might need to need
-	 * to generate some extra mov's in edge cases, etc.. probably
-	 * we might want to do depth calculation considering the worst
-	 * case for these??
-	 */
 	if (!is_meta(instr))
 		instr->depth++;
 
-	insert_by_depth(instr);
+	ir3_insert_by_depth(instr, &instr->block->instr_list);
 }
 
-void ir3_block_depth(struct ir3_block *block)
+static void
+remove_unused_by_block(struct ir3_block *block)
 {
-	unsigned i;
+	list_for_each_entry_safe (struct ir3_instruction, instr, &block->instr_list, node) {
+		if (!ir3_instr_check_mark(instr)) {
+			if (is_flow(instr) && (instr->opc == OPC_END))
+				continue;
+			/* mark it, in case it is input, so we can
+			 * remove unused inputs:
+			 */
+			instr->depth = DEPTH_UNUSED;
+			/* and remove from instruction list: */
+			list_delinit(&instr->node);
+		}
+	}
+}
 
-	block->head = NULL;
+void
+ir3_depth(struct ir3 *ir)
+{
+	unsigned i;
 
-	ir3_clear_mark(block->shader);
-	for (i = 0; i < block->noutputs; i++)
-		if (block->outputs[i])
-			ir3_instr_depth(block->outputs[i]);
+	ir3_clear_mark(ir);
+	for (i = 0; i < ir->noutputs; i++)
+		if (ir->outputs[i])
+			ir3_instr_depth(ir->outputs[i]);
+
+	for (i = 0; i < ir->keeps_count; i++)
+		ir3_instr_depth(ir->keeps[i]);
+
+	/* We also need to account for if-condition: */
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		if (block->condition)
+			ir3_instr_depth(block->condition);
+	}
 
 	/* mark un-used instructions: */
-	for (i = 0; i < block->shader->instrs_count; i++) {
-		struct ir3_instruction *instr = block->shader->instrs[i];
-
-		/* just consider instructions within this block: */
-		if (instr->block != block)
-			continue;
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		remove_unused_by_block(block);
+	}
 
-		if (!ir3_instr_check_mark(instr))
-			instr->depth = DEPTH_UNUSED;
+	/* note that we can end up with unused indirects, but we should
+	 * not end up with unused predicates.
+	 */
+	for (i = 0; i < ir->indirects_count; i++) {
+		struct ir3_instruction *instr = ir->indirects[i];
+		if (instr->depth == DEPTH_UNUSED)
+			ir->indirects[i] = NULL;
 	}
 
 	/* cleanup unused inputs: */
-	for (i = 0; i < block->ninputs; i++) {
-		struct ir3_instruction *in = block->inputs[i];
+	for (i = 0; i < ir->ninputs; i++) {
+		struct ir3_instruction *in = ir->inputs[i];
 		if (in && (in->depth == DEPTH_UNUSED))
-			block->inputs[i] = NULL;
+			ir->inputs[i] = NULL;
 	}
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_dump.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_dump.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_dump.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_dump.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,456 +0,0 @@
-/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
-
-/*
- * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include <stdarg.h>
-
-#include "ir3.h"
-
-#define PTRID(x) ((unsigned long)(x))
-
-struct ir3_dump_ctx {
-	FILE *f;
-	bool verbose;
-};
-
-static void dump_instr_name(struct ir3_dump_ctx *ctx,
-		struct ir3_instruction *instr)
-{
-	/* for debugging: */
-	if (ctx->verbose) {
-#ifdef DEBUG
-		fprintf(ctx->f, "%04u:", instr->serialno);
-#endif
-		fprintf(ctx->f, "%03u: ", instr->depth);
-	}
-
-	if (instr->flags & IR3_INSTR_SY)
-		fprintf(ctx->f, "(sy)");
-	if (instr->flags & IR3_INSTR_SS)
-		fprintf(ctx->f, "(ss)");
-
-	if (is_meta(instr)) {
-		switch(instr->opc) {
-		case OPC_META_PHI:
-			fprintf(ctx->f, "&#934;");
-			break;
-		default:
-			/* shouldn't hit here.. just for debugging: */
-			switch (instr->opc) {
-			case OPC_META_INPUT:  fprintf(ctx->f, "_meta:in");   break;
-			case OPC_META_OUTPUT: fprintf(ctx->f, "_meta:out");  break;
-			case OPC_META_FO:     fprintf(ctx->f, "_meta:fo");   break;
-			case OPC_META_FI:     fprintf(ctx->f, "_meta:fi");   break;
-			case OPC_META_FLOW:   fprintf(ctx->f, "_meta:flow"); break;
-
-			default: fprintf(ctx->f, "_meta:%d", instr->opc); break;
-			}
-			break;
-		}
-	} else if (instr->category == 1) {
-		static const char *type[] = {
-				[TYPE_F16] = "f16",
-				[TYPE_F32] = "f32",
-				[TYPE_U16] = "u16",
-				[TYPE_U32] = "u32",
-				[TYPE_S16] = "s16",
-				[TYPE_S32] = "s32",
-				[TYPE_U8]  = "u8",
-				[TYPE_S8]  = "s8",
-		};
-		if (instr->cat1.src_type == instr->cat1.dst_type)
-			fprintf(ctx->f, "mov");
-		else
-			fprintf(ctx->f, "cov");
-		fprintf(ctx->f, ".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]);
-	} else {
-		fprintf(ctx->f, "%s", ir3_instr_name(instr));
-		if (instr->flags & IR3_INSTR_3D)
-			fprintf(ctx->f, ".3d");
-		if (instr->flags & IR3_INSTR_A)
-			fprintf(ctx->f, ".a");
-		if (instr->flags & IR3_INSTR_O)
-			fprintf(ctx->f, ".o");
-		if (instr->flags & IR3_INSTR_P)
-			fprintf(ctx->f, ".p");
-		if (instr->flags & IR3_INSTR_S)
-			fprintf(ctx->f, ".s");
-		if (instr->flags & IR3_INSTR_S2EN)
-			fprintf(ctx->f, ".s2en");
-	}
-}
-
-static void dump_reg_name(struct ir3_dump_ctx *ctx,
-		struct ir3_register *reg, bool followssa)
-{
-	if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
-			(reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
-		fprintf(ctx->f, "(absneg)");
-	else if (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))
-		fprintf(ctx->f, "(neg)");
-	else if (reg->flags & (IR3_REG_FABS | IR3_REG_SABS))
-		fprintf(ctx->f, "(abs)");
-
-	if (reg->flags & IR3_REG_IMMED) {
-		fprintf(ctx->f, "imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
-	} else if (reg->flags & IR3_REG_SSA) {
-		if (ctx->verbose) {
-			fprintf(ctx->f, "_");
-			if (followssa) {
-				fprintf(ctx->f, "[");
-				dump_instr_name(ctx, reg->instr);
-				fprintf(ctx->f, "]");
-			}
-		}
-	} else if (reg->flags & IR3_REG_RELATIV) {
-		if (reg->flags & IR3_REG_HALF)
-			fprintf(ctx->f, "h");
-		if (reg->flags & IR3_REG_CONST)
-			fprintf(ctx->f, "c<a0.x + %u>", reg->num);
-		else
-			fprintf(ctx->f, "\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->num, reg->size);
-	} else {
-		if (reg->flags & IR3_REG_HALF)
-			fprintf(ctx->f, "h");
-		if (reg->flags & IR3_REG_CONST)
-			fprintf(ctx->f, "c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]);
-		else
-			fprintf(ctx->f, "\x1b[0;31mr%u.%c\x1b[0m", reg_num(reg), "xyzw"[reg_comp(reg)]);
-	}
-}
-
-static void ir3_instr_dump(struct ir3_dump_ctx *ctx,
-		struct ir3_instruction *instr);
-static void ir3_block_dump(struct ir3_dump_ctx *ctx,
-		struct ir3_block *block, const char *name);
-
-static void dump_instr(struct ir3_dump_ctx *ctx,
-		struct ir3_instruction *instr)
-{
-	/* if we've already visited this instruction, bail now: */
-	if (ir3_instr_check_mark(instr))
-		return;
-
-	/* some meta-instructions need to be handled specially: */
-	if (is_meta(instr)) {
-		if ((instr->opc == OPC_META_FO) ||
-				(instr->opc == OPC_META_FI)) {
-			struct ir3_instruction *src;
-			foreach_ssa_src(src, instr)
-				dump_instr(ctx, src);
-		} else if (instr->opc == OPC_META_FLOW) {
-			struct ir3_register *reg = instr->regs[1];
-			ir3_block_dump(ctx, instr->flow.if_block, "if");
-			if (instr->flow.else_block)
-				ir3_block_dump(ctx, instr->flow.else_block, "else");
-			if (reg->flags & IR3_REG_SSA)
-				dump_instr(ctx, reg->instr);
-		} else if (instr->opc == OPC_META_PHI) {
-			/* treat like a normal instruction: */
-			ir3_instr_dump(ctx, instr);
-		}
-	} else {
-		ir3_instr_dump(ctx, instr);
-	}
-}
-
-/* arrarraggh!  if link is to something outside of the current block, we
- * need to defer emitting the link until the end of the block, since the
- * edge triggers pre-creation of the node it links to inside the cluster,
- * even though it is meant to be outside..
- */
-static struct {
-	char buf[40960];
-	unsigned n;
-} edge_buf;
-
-/* helper to print or defer: */
-static void printdef(struct ir3_dump_ctx *ctx,
-		bool defer, const char *fmt, ...)
-{
-	va_list ap;
-	va_start(ap, fmt);
-	if (defer) {
-		unsigned n = edge_buf.n;
-		n += vsnprintf(&edge_buf.buf[n], sizeof(edge_buf.buf) - n,
-				fmt, ap);
-		edge_buf.n = n;
-	} else {
-		vfprintf(ctx->f, fmt, ap);
-	}
-	va_end(ap);
-}
-
-static void dump_link2(struct ir3_dump_ctx *ctx,
-		struct ir3_instruction *instr, const char *target, bool defer)
-{
-	/* some meta-instructions need to be handled specially: */
-	if (is_meta(instr)) {
-		if (instr->opc == OPC_META_INPUT) {
-			printdef(ctx, defer, "input%lx:<in%u>:w -> %s",
-					PTRID(instr->inout.block),
-					instr->regs[0]->num, target);
-		} else if (instr->opc == OPC_META_FO) {
-			struct ir3_register *reg = instr->regs[1];
-			dump_link2(ctx, reg->instr, target, defer);
-			printdef(ctx, defer, "[label=\".%c\"]",
-					"xyzw"[instr->fo.off & 0x3]);
-		} else if (instr->opc == OPC_META_FI) {
-			struct ir3_instruction *src;
-
-			foreach_ssa_src_n(src, i, instr) {
-				dump_link2(ctx, src, target, defer);
-				printdef(ctx, defer, "[label=\".%c\"]",
-						"xyzw"[i & 0x3]);
-			}
-		} else if (instr->opc == OPC_META_OUTPUT) {
-			printdef(ctx, defer, "output%lx:<out%u>:w -> %s",
-					PTRID(instr->inout.block),
-					instr->regs[0]->num, target);
-		} else if (instr->opc == OPC_META_PHI) {
-			/* treat like a normal instruction: */
-			printdef(ctx, defer, "instr%lx:<dst0> -> %s", PTRID(instr), target);
-		}
-	} else {
-		printdef(ctx, defer, "instr%lx:<dst0> -> %s", PTRID(instr), target);
-	}
-}
-
-static void dump_link(struct ir3_dump_ctx *ctx,
-		struct ir3_instruction *instr,
-		struct ir3_block *block, const char *target)
-{
-	bool defer = instr->block != block;
-	dump_link2(ctx, instr, target, defer);
-	printdef(ctx, defer, "\n");
-}
-
-static struct ir3_register *follow_flow(struct ir3_register *reg)
-{
-	if (reg->flags & IR3_REG_SSA) {
-		struct ir3_instruction *instr = reg->instr;
-		/* go with the flow.. */
-		if (is_meta(instr) && (instr->opc == OPC_META_FLOW))
-			return instr->regs[1];
-	}
-	return reg;
-}
-
-static void ir3_instr_dump(struct ir3_dump_ctx *ctx,
-		struct ir3_instruction *instr)
-{
-	struct ir3_register *src;
-
-	fprintf(ctx->f, "instr%lx [shape=record,style=filled,fillcolor=lightgrey,label=\"{",
-			PTRID(instr));
-	dump_instr_name(ctx, instr);
-
-	/* destination register: */
-	fprintf(ctx->f, "|<dst0>");
-
-	/* source register(s): */
-	foreach_src_n(src, i, instr) {
-		struct ir3_register *reg = follow_flow(src);
-
-		fprintf(ctx->f, "|");
-
-		if (reg->flags & IR3_REG_SSA)
-			fprintf(ctx->f, "<src%u> ", i);
-
-		dump_reg_name(ctx, reg, true);
-	}
-
-	fprintf(ctx->f, "}\"];\n");
-
-	/* and recursively dump dependent instructions: */
-	foreach_src_n(src, i, instr) {
-		struct ir3_register *reg = follow_flow(src);
-		char target[32];  /* link target */
-
-		if (!(reg->flags & IR3_REG_SSA))
-			continue;
-
-		snprintf(target, sizeof(target), "instr%lx:<src%u>",
-				PTRID(instr), i);
-
-		dump_instr(ctx, reg->instr);
-		dump_link(ctx, reg->instr, instr->block, target);
-	}
-}
-
-static void ir3_block_dump(struct ir3_dump_ctx *ctx,
-		struct ir3_block *block, const char *name)
-{
-	unsigned i, n;
-
-	n = edge_buf.n;
-
-	fprintf(ctx->f, "subgraph cluster%lx {\n", PTRID(block));
-	fprintf(ctx->f, "label=\"%s\";\n", name);
-
-	/* draw inputs: */
-	fprintf(ctx->f, "input%lx [shape=record,label=\"inputs", PTRID(block));
-	for (i = 0; i < block->ninputs; i++)
-		if (block->inputs[i])
-			fprintf(ctx->f, "|<in%u> i%u.%c", i, (i >> 2), "xyzw"[i & 0x3]);
-	fprintf(ctx->f, "\"];\n");
-
-	/* draw instruction graph: */
-	for (i = 0; i < block->noutputs; i++)
-		if (block->outputs[i])
-			dump_instr(ctx, block->outputs[i]);
-
-	/* draw outputs: */
-	fprintf(ctx->f, "output%lx [shape=record,label=\"outputs", PTRID(block));
-	for (i = 0; i < block->noutputs; i++)
-		fprintf(ctx->f, "|<out%u> o%u.%c", i, (i >> 2), "xyzw"[i & 0x3]);
-	fprintf(ctx->f, "\"];\n");
-
-	/* and links to outputs: */
-	for (i = 0; i < block->noutputs; i++) {
-		char target[32];  /* link target */
-
-		/* NOTE: there could be outputs that are never assigned,
-		 * so skip them
-		 */
-		if (!block->outputs[i])
-			continue;
-
-		snprintf(target, sizeof(target), "output%lx:<out%u>:e",
-				PTRID(block), i);
-
-		dump_link(ctx, block->outputs[i], block, target);
-	}
-
-	fprintf(ctx->f, "}\n");
-
-	/* and links to inputs: */
-	if (block->parent) {
-		for (i = 0; i < block->ninputs; i++) {
-			char target[32];  /* link target */
-
-			if (!block->inputs[i])
-				continue;
-
-			dump_instr(ctx, block->inputs[i]);
-
-			snprintf(target, sizeof(target), "input%lx:<in%u>:e",
-					PTRID(block), i);
-
-			dump_link(ctx, block->inputs[i], block, target);
-		}
-	}
-
-	/* dump deferred edges: */
-	if (edge_buf.n > n) {
-		fprintf(ctx->f, "%*s", edge_buf.n - n, &edge_buf.buf[n]);
-		edge_buf.n = n;
-	}
-}
-
-void ir3_dump(struct ir3 *shader, const char *name,
-		struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */,
-		FILE *f)
-{
-	struct ir3_dump_ctx ctx = {
-			.f = f,
-	};
-	ir3_clear_mark(shader);
-	fprintf(ctx.f, "digraph G {\n");
-	fprintf(ctx.f, "rankdir=RL;\n");
-	fprintf(ctx.f, "nodesep=0.25;\n");
-	fprintf(ctx.f, "ranksep=1.5;\n");
-	ir3_block_dump(&ctx, block, name);
-	fprintf(ctx.f, "}\n");
-}
-
-/*
- * For Debugging:
- */
-
-void
-ir3_dump_instr_single(struct ir3_instruction *instr)
-{
-	struct ir3_dump_ctx ctx = {
-			.f = stdout,
-			.verbose = true,
-	};
-	unsigned i;
-
-	dump_instr_name(&ctx, instr);
-	for (i = 0; i < instr->regs_count; i++) {
-		struct ir3_register *reg = instr->regs[i];
-		printf(i ? ", " : " ");
-		dump_reg_name(&ctx, reg, !!i);
-	}
-
-	if (instr->address) {
-		fprintf(ctx.f, ", address=_");
-		fprintf(ctx.f, "[");
-		dump_instr_name(&ctx, instr->address);
-		fprintf(ctx.f, "]");
-	}
-
-	if (instr->fanin) {
-		fprintf(ctx.f, ", fanin=_");
-		fprintf(ctx.f, "[");
-		dump_instr_name(&ctx, instr->fanin);
-		fprintf(ctx.f, "]");
-	}
-
-	if (is_meta(instr)) {
-		if (instr->opc == OPC_META_FO) {
-			printf(", off=%d", instr->fo.off);
-		} else if ((instr->opc == OPC_META_FI) && instr->fi.aid) {
-			printf(", aid=%d", instr->fi.aid);
-		}
-	}
-
-	printf("\n");
-}
-
-void
-ir3_dump_instr_list(struct ir3_instruction *instr)
-{
-	struct ir3_block *block = instr->block;
-	unsigned n = 0;
-
-	while (instr) {
-		ir3_dump_instr_single(instr);
-		if (!is_meta(instr))
-			n++;
-		instr = instr->next;
-	}
-	printf("%u instructions\n", n);
-
-	for (n = 0; n < block->noutputs; n++) {
-		if (!block->outputs[n])
-			continue;
-		printf("out%d: ", n);
-		ir3_dump_instr_single(block->outputs[n]);
-	}
-}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_flatten.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_flatten.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_flatten.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_flatten.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,152 +0,0 @@
-/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
-
-/*
- * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Rob Clark <robclark@freedesktop.org>
- */
-
-#include <stdarg.h>
-
-#include "ir3.h"
-
-/*
- * Flatten: flatten out legs of if/else, etc
- *
- * TODO probably should use some heuristic to decide to not flatten
- * if one side of the other is too large / deeply nested / whatever?
- */
-
-struct ir3_flatten_ctx {
-	struct ir3_block *block;
-	unsigned cnt;
-};
-
-static struct ir3_register *unwrap(struct ir3_register *reg)
-{
-
-	if (reg->flags & IR3_REG_SSA) {
-		struct ir3_instruction *instr = reg->instr;
-		if (is_meta(instr)) {
-			switch (instr->opc) {
-			case OPC_META_OUTPUT:
-			case OPC_META_FLOW:
-				if (instr->regs_count > 1)
-					return instr->regs[1];
-				return NULL;
-			default:
-				break;
-			}
-		}
-	}
-	return reg;
-}
-
-static void ir3_instr_flatten(struct ir3_flatten_ctx *ctx,
-		struct ir3_instruction *instr)
-{
-	struct ir3_instruction *src;
-
-	/* if we've already visited this instruction, bail now: */
-	if (ir3_instr_check_mark(instr))
-		return;
-
-	instr->block = ctx->block;
-
-	/* TODO: maybe some threshold to decide whether to
-	 * flatten or not??
-	 */
-	if (is_meta(instr)) {
-		if (instr->opc == OPC_META_PHI) {
-			struct ir3_register *cond, *t, *f;
-
-			cond = unwrap(instr->regs[1]);
-			t    = unwrap(instr->regs[2]);  /* true val */
-			f    = unwrap(instr->regs[3]);  /* false val */
-
-			/* must have cond, but t or f may be null if only written
-			 * one one side of the if/else (in which case we can just
-			 * convert the PHI to a simple move).
-			 */
-			assert(cond);
-			assert(t || f);
-
-			if (t && f) {
-				/* convert the PHI instruction to sel.{b16,b32} */
-				instr->category = 3;
-
-				/* instruction type based on dst size: */
-				if (instr->regs[0]->flags & IR3_REG_HALF)
-					instr->opc = OPC_SEL_B16;
-				else
-					instr->opc = OPC_SEL_B32;
-
-				instr->regs[1] = t;
-				instr->regs[2] = cond;
-				instr->regs[3] = f;
-			} else {
-				/* convert to simple mov: */
-				instr->category = 1;
-				instr->cat1.dst_type = TYPE_F32;
-				instr->cat1.src_type = TYPE_F32;
-				instr->regs_count = 2;
-				instr->regs[1] = t ? t : f;
-			}
-
-			ctx->cnt++;
-		} else if ((instr->opc == OPC_META_INPUT) &&
-				(instr->regs_count == 2)) {
-			type_t ftype;
-
-			if (instr->regs[0]->flags & IR3_REG_HALF)
-				ftype = TYPE_F16;
-			else
-				ftype = TYPE_F32;
-
-			/* convert meta:input to mov: */
-			instr->category = 1;
-			instr->cat1.src_type = ftype;
-			instr->cat1.dst_type = ftype;
-		}
-	}
-
-	/* recursively visit children: */
-	foreach_ssa_src(src, instr)
-		ir3_instr_flatten(ctx, src);
-}
-
-/* return >= 0 is # of phi's flattened, < 0 is error */
-int ir3_block_flatten(struct ir3_block *block)
-{
-	struct ir3_flatten_ctx ctx = {
-			.block = block,
-	};
-	unsigned i;
-
-	ir3_clear_mark(block->shader);
-	for(i = 0; i < block->noutputs; i++)
-		if (block->outputs[i])
-			ir3_instr_flatten(&ctx, block->outputs[i]);
-
-	return ctx.cnt;
-}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_group.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_group.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_group.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_group.c	2015-09-16 14:36:09.000000000 +0000
@@ -34,35 +34,6 @@
  * Find/group instruction neighbors:
  */
 
-/* stop condition for iteration: */
-static bool check_stop(struct ir3_instruction *instr)
-{
-	if (ir3_instr_check_mark(instr))
-		return true;
-
-	/* stay within the block.. don't try to operate across
-	 * basic block boundaries or we'll have problems when
-	 * dealing with multiple basic blocks:
-	 */
-	if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
-		return true;
-
-	return false;
-}
-
-static struct ir3_instruction * create_mov(struct ir3_instruction *instr)
-{
-	struct ir3_instruction *mov;
-
-	mov = ir3_instr_create(instr->block, 1, 0);
-	mov->cat1.src_type = TYPE_F32;
-	mov->cat1.dst_type = TYPE_F32;
-	ir3_reg_create(mov, 0, 0);    /* dst */
-	ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = instr;
-
-	return mov;
-}
-
 /* bleh.. we need to do the same group_n() thing for both inputs/outputs
  * (where we have a simple instr[] array), and fanin nodes (where we have
  * an extra indirection via reg->instr).
@@ -78,7 +49,8 @@
 }
 static void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr)
 {
-	((struct ir3_instruction **)arr)[idx] = create_mov(instr);
+	((struct ir3_instruction **)arr)[idx] =
+			ir3_MOV(instr->block, instr, TYPE_F32);
 }
 static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr)
 {
@@ -111,14 +83,17 @@
 {
 	return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
 }
-static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
+static void
+instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
 {
-	((struct ir3_instruction *)arr)->regs[idx+1]->instr = create_mov(instr);
+	((struct ir3_instruction *)arr)->regs[idx+1]->instr =
+			ir3_MOV(instr->block, instr, TYPE_F32);
 }
 static struct group_ops instr_ops = { instr_get, instr_insert_mov };
 
 
-static void group_n(struct group_ops *ops, void *arr, unsigned n)
+static void
+group_n(struct group_ops *ops, void *arr, unsigned n)
 {
 	unsigned i, j;
 
@@ -141,6 +116,10 @@
 			conflict = conflicts(instr->cp.left, left) ||
 				conflicts(instr->cp.right, right);
 
+			/* RA can't yet deal very well w/ group'd phi's: */
+			if (is_meta(instr) && (instr->opc == OPC_META_PHI))
+				conflict = true;
+
 			/* we also can't have an instr twice in the group: */
 			for (j = i + 1; (j < n) && !conflict; j++)
 				if (ops->get(arr, j) == instr)
@@ -181,11 +160,12 @@
 	}
 }
 
-static void instr_find_neighbors(struct ir3_instruction *instr)
+static void
+instr_find_neighbors(struct ir3_instruction *instr)
 {
 	struct ir3_instruction *src;
 
-	if (check_stop(instr))
+	if (ir3_instr_check_mark(instr))
 		return;
 
 	if (is_meta(instr) && (instr->opc == OPC_META_FI))
@@ -200,7 +180,8 @@
  * we need to insert dummy/padding instruction for grouping, and
  * then take it back out again before anyone notices.
  */
-static void pad_and_group_input(struct ir3_instruction **input, unsigned n)
+static void
+pad_and_group_input(struct ir3_instruction **input, unsigned n)
 {
 	int i, mask = 0;
 	struct ir3_block *block = NULL;
@@ -210,8 +191,8 @@
 		if (instr) {
 			block = instr->block;
 		} else if (block) {
-			instr = ir3_instr_create(block, 0, OPC_NOP);
-			ir3_reg_create(instr, 0, IR3_REG_SSA);    /* dst */
+			instr = ir3_NOP(block);
+			ir3_reg_create(instr, 0, IR3_REG_SSA);    /* dummy dst */
 			input[i] = instr;
 			mask |= (1 << i);
 		}
@@ -225,42 +206,46 @@
 	}
 }
 
-static void block_find_neighbors(struct ir3_block *block)
+static void
+find_neighbors(struct ir3 *ir)
 {
 	unsigned i;
 
-	for (i = 0; i < block->noutputs; i++) {
-		if (block->outputs[i]) {
-			struct ir3_instruction *instr = block->outputs[i];
+	/* shader inputs/outputs themselves must be contiguous as well:
+	 *
+	 * NOTE: group inputs first, since we only insert mov's
+	 * *before* the conflicted instr (and that would go badly
+	 * for inputs).  By doing inputs first, we should never
+	 * have a conflict on inputs.. pushing any conflict to
+	 * resolve to the outputs, for stuff like:
+	 *
+	 *     MOV OUT[n], IN[m].wzyx
+	 *
+	 * NOTE: we assume here inputs/outputs are grouped in vec4.
+	 * This logic won't quite cut it if we don't align smaller
+	 * on vec4 boundaries
+	 */
+	for (i = 0; i < ir->ninputs; i += 4)
+		pad_and_group_input(&ir->inputs[i], 4);
+	for (i = 0; i < ir->noutputs; i += 4)
+		group_n(&arr_ops_out, &ir->outputs[i], 4);
+
+	for (i = 0; i < ir->noutputs; i++) {
+		if (ir->outputs[i]) {
+			struct ir3_instruction *instr = ir->outputs[i];
 			instr_find_neighbors(instr);
 		}
 	}
 
-	/* shader inputs/outputs themselves must be contiguous as well:
-	 */
-	if (!block->parent) {
-		/* NOTE: group inputs first, since we only insert mov's
-		 * *before* the conflicted instr (and that would go badly
-		 * for inputs).  By doing inputs first, we should never
-		 * have a conflict on inputs.. pushing any conflict to
-		 * resolve to the outputs, for stuff like:
-		 *
-		 *     MOV OUT[n], IN[m].wzyx
-		 *
-		 * NOTE: we assume here inputs/outputs are grouped in vec4.
-		 * This logic won't quite cut it if we don't align smaller
-		 * on vec4 boundaries
-		 */
-		for (i = 0; i < block->ninputs; i += 4)
-			pad_and_group_input(&block->inputs[i], 4);
-		for (i = 0; i < block->noutputs; i += 4)
-			group_n(&arr_ops_out, &block->outputs[i], 4);
-
+	for (i = 0; i < ir->keeps_count; i++) {
+		struct ir3_instruction *instr = ir->keeps[i];
+		instr_find_neighbors(instr);
 	}
 }
 
-void ir3_block_group(struct ir3_block *block)
+void
+ir3_group(struct ir3 *ir)
 {
-	ir3_clear_mark(block->shader);
-	block_find_neighbors(block);
+	ir3_clear_mark(ir);
+	find_neighbors(ir);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3.h	2015-09-16 14:36:09.000000000 +0000
@@ -28,17 +28,20 @@
 #include <stdbool.h>
 
 #include "util/u_debug.h"
+#include "util/list.h"
 
 #include "instr-a3xx.h"
 #include "disasm.h"  /* TODO move 'enum shader_t' somewhere else.. */
 
 /* low level intermediate representation of an adreno shader program */
 
+struct ir3_compiler;
 struct ir3;
 struct ir3_instruction;
 struct ir3_block;
 
 struct ir3_info {
+	uint32_t gpu_id;
 	uint16_t sizedwords;
 	uint16_t instrs_count;   /* expanded to account for rpt's */
 	/* NOTE: max_reg, etc, does not include registers not touched
@@ -80,8 +83,8 @@
 		 * before register assignment is done:
 		 */
 		IR3_REG_SSA    = 0x2000,   /* 'instr' is ptr to assigning instr */
-		IR3_REG_IA     = 0x4000,   /* meta-input dst is "assigned" */
-		IR3_REG_ADDR   = 0x8000,   /* register is a0.x */
+		IR3_REG_PHI_SRC= 0x4000,   /* phi src, regs[0]->instr points to phi */
+
 	} flags;
 	union {
 		/* normal registers:
@@ -169,6 +172,7 @@
 		IR3_INSTR_P     = 0x080,
 		IR3_INSTR_S     = 0x100,
 		IR3_INSTR_S2EN  = 0x200,
+		IR3_INSTR_G     = 0x400,
 		/* meta-flags, for intermediate stages of IR, ie.
 		 * before register assignment is done:
 		 */
@@ -185,6 +189,7 @@
 			char inv;
 			char comp;
 			int  immed;
+			struct ir3_block *target;
 		} cat0;
 		struct {
 			type_t src_type, dst_type;
@@ -205,7 +210,8 @@
 		} cat5;
 		struct {
 			type_t type;
-			int offset;
+			int src_offset;
+			int dst_offset;
 			int iim_val;
 		} cat6;
 		/* for meta-instructions, just used to hold extra data
@@ -218,14 +224,14 @@
 			int aid;
 		} fi;
 		struct {
-			struct ir3_block *if_block, *else_block;
-		} flow;
+			/* used to temporarily hold reference to nir_phi_instr
+			 * until we resolve the phi srcs
+			 */
+			void *nphi;
+		} phi;
 		struct {
 			struct ir3_block *block;
 		} inout;
-
-		/* XXX keep this as big as all other union members! */
-		uint32_t info[3];
 	};
 
 	/* transient values used during various algorithms: */
@@ -243,6 +249,13 @@
 		 */
 #define DEPTH_UNUSED  ~0
 		unsigned depth;
+		/* When we get to the RA stage, we no longer need depth, but
+		 * we do need instruction's position/name:
+		 */
+		struct {
+			uint16_t ip;
+			uint16_t name;
+		};
 	};
 
 	/* Used during CP and RA stages.  For fanin and shader inputs/
@@ -274,6 +287,8 @@
 
 	/* an instruction can reference at most one address register amongst
 	 * it's src/dst registers.  Beyond that, you need to insert mov's.
+	 *
+	 * NOTE: do not write this directly, use ir3_instr_set_address()
 	 */
 	struct ir3_instruction *address;
 
@@ -290,7 +305,9 @@
 	 */
 	struct ir3_instruction *fanin;
 
-	struct ir3_instruction *next;
+	/* Entry in ir3_block's instruction list: */
+	struct list_head node;
+
 #ifdef DEBUG
 	uint32_t serialno;
 #endif
@@ -321,8 +338,11 @@
 struct ir3_heap_chunk;
 
 struct ir3 {
-	unsigned instrs_count, instrs_sz;
-	struct ir3_instruction **instrs;
+	struct ir3_compiler *compiler;
+
+	unsigned ninputs, noutputs;
+	struct ir3_instruction **inputs;
+	struct ir3_instruction **outputs;
 
 	/* Track bary.f (and ldlv) instructions.. this is needed in
 	 * scheduling to ensure that all varying fetches happen before
@@ -345,33 +365,60 @@
 	 */
 	unsigned indirects_count, indirects_sz;
 	struct ir3_instruction **indirects;
+	/* and same for instructions that consume predicate register: */
+	unsigned predicates_count, predicates_sz;
+	struct ir3_instruction **predicates;
+
+	/* Track instructions which do not write a register but other-
+	 * wise must not be discarded (such as kill, stg, etc)
+	 */
+	unsigned keeps_count, keeps_sz;
+	struct ir3_instruction **keeps;
+
+	/* List of blocks: */
+	struct list_head block_list;
 
-	struct ir3_block *block;
 	unsigned heap_idx;
 	struct ir3_heap_chunk *chunk;
 };
 
+typedef struct nir_block nir_block;
+
 struct ir3_block {
+	struct list_head node;
 	struct ir3 *shader;
-	unsigned ntemporaries, ninputs, noutputs;
-	/* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */
-	struct ir3_instruction **temporaries;
-	struct ir3_instruction **inputs;
-	struct ir3_instruction **outputs;
-	/* only a single address register: */
-	struct ir3_instruction *address;
-	struct ir3_block *parent;
-	struct ir3_instruction *head;
+
+	nir_block *nblock;
+
+	struct list_head instr_list;  /* list of ir3_instruction */
+
+	/* each block has either one or two successors.. in case of
+	 * two successors, 'condition' decides which one to follow.
+	 * A block preceding an if/else has two successors.
+	 */
+	struct ir3_instruction *condition;
+	struct ir3_block *successors[2];
+
+	uint16_t start_ip, end_ip;
+
+	/* used for per-pass extra block data.  Mainly used right
+	 * now in RA step to track livein/liveout.
+	 */
+	void *bd;
+
+#ifdef DEBUG
+	uint32_t serialno;
+#endif
 };
 
-struct ir3 * ir3_create(void);
+struct ir3 * ir3_create(struct ir3_compiler *compiler,
+		unsigned nin, unsigned nout);
 void ir3_destroy(struct ir3 *shader);
 void * ir3_assemble(struct ir3 *shader,
 		struct ir3_info *info, uint32_t gpu_id);
 void * ir3_alloc(struct ir3 *shader, int sz);
 
-struct ir3_block * ir3_block_create(struct ir3 *shader,
-		unsigned ntmp, unsigned nin, unsigned nout);
+struct ir3_block * ir3_block_create(struct ir3 *shader);
 
 struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
 		int category, opc_t opc);
@@ -383,6 +430,8 @@
 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
 		int num, int flags);
 
+void ir3_instr_set_address(struct ir3_instruction *instr,
+		struct ir3_instruction *addr);
 
 static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
 {
@@ -392,22 +441,10 @@
 	return false;
 }
 
-static inline void ir3_clear_mark(struct ir3 *shader)
-{
-	/* TODO would be nice to drop the instruction array.. for
-	 * new compiler, _clear_mark() is all we use it for, and
-	 * we could probably manage a linked list instead..
-	 *
-	 * Also, we'll probably want to mark instructions within
-	 * a block, so tracking the list of instrs globally is
-	 * unlikely to be what we want.
-	 */
-	unsigned i;
-	for (i = 0; i < shader->instrs_count; i++) {
-		struct ir3_instruction *instr = shader->instrs[i];
-		instr->flags &= ~IR3_INSTR_MARK;
-	}
-}
+void ir3_block_clear_mark(struct ir3_block *block);
+void ir3_clear_mark(struct ir3 *shader);
+
+unsigned ir3_count_instructions(struct ir3 *ir);
 
 static inline int ir3_instr_regno(struct ir3_instruction *instr,
 		struct ir3_register *reg)
@@ -501,6 +538,48 @@
 	return (instr->category == 6);
 }
 
+static inline bool
+is_store(struct ir3_instruction *instr)
+{
+	if (is_mem(instr)) {
+		/* these instructions, the "destination" register is
+		 * actually a source, the address to store to.
+		 */
+		switch (instr->opc) {
+		case OPC_STG:
+		case OPC_STP:
+		case OPC_STL:
+		case OPC_STLW:
+		case OPC_L2G:
+		case OPC_G2L:
+			return true;
+		default:
+			break;
+		}
+	}
+	return false;
+}
+
+static inline bool is_load(struct ir3_instruction *instr)
+{
+	if (is_mem(instr)) {
+		switch (instr->opc) {
+		case OPC_LDG:
+		case OPC_LDL:
+		case OPC_LDP:
+		case OPC_L2G:
+		case OPC_LDLW:
+		case OPC_LDC_4:
+		case OPC_LDLV:
+		/* probably some others too.. */
+			return true;
+		default:
+			break;
+		}
+	}
+	return false;
+}
+
 static inline bool is_input(struct ir3_instruction *instr)
 {
 	/* in some cases, ldlv is used to fetch varying without
@@ -525,7 +604,7 @@
 {
 	if (instr->regs_count > 0) {
 		struct ir3_register *dst = instr->regs[0];
-		return !!(dst->flags & IR3_REG_ADDR);
+		return reg_num(dst) == REG_A0;
 	}
 	return false;
 }
@@ -556,13 +635,29 @@
 
 static inline bool reg_gpr(struct ir3_register *r)
 {
-	if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))
+	if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED))
 		return false;
 	if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
 		return false;
 	return true;
 }
 
+static inline type_t half_type(type_t type)
+{
+	switch (type) {
+	case TYPE_F32: return TYPE_F16;
+	case TYPE_U32: return TYPE_U16;
+	case TYPE_S32: return TYPE_S16;
+	case TYPE_F16:
+	case TYPE_U16:
+	case TYPE_S16:
+		return type;
+	default:
+		assert(0);
+		return ~0;
+	}
+}
+
 /* some cat2 instructions (ie. those which are not float) can embed an
  * immediate:
  */
@@ -747,37 +842,31 @@
 
 
 /* dump: */
-#include <stdio.h>
-void ir3_dump(struct ir3 *shader, const char *name,
-		struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */,
-		FILE *f);
-void ir3_dump_instr_single(struct ir3_instruction *instr);
-void ir3_dump_instr_list(struct ir3_instruction *instr);
-
-/* flatten if/else: */
-int ir3_block_flatten(struct ir3_block *block);
+void ir3_print(struct ir3 *ir);
+void ir3_print_instr(struct ir3_instruction *instr);
 
 /* depth calculation: */
 int ir3_delayslots(struct ir3_instruction *assigner,
 		struct ir3_instruction *consumer, unsigned n);
-void ir3_block_depth(struct ir3_block *block);
+void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
+void ir3_depth(struct ir3 *ir);
 
 /* copy-propagate: */
-void ir3_block_cp(struct ir3_block *block);
+void ir3_cp(struct ir3 *ir);
 
-/* group neightbors and insert mov's to resolve conflicts: */
-void ir3_block_group(struct ir3_block *block);
+/* group neighbors and insert mov's to resolve conflicts: */
+void ir3_group(struct ir3 *ir);
 
 /* scheduling: */
-int ir3_block_sched(struct ir3_block *block);
+int ir3_sched(struct ir3 *ir);
 
 /* register assignment: */
-int ir3_block_ra(struct ir3_block *block, enum shader_t type,
+struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(void *memctx);
+int ir3_ra(struct ir3 *ir3, enum shader_t type,
 		bool frag_coord, bool frag_face);
 
 /* legalize: */
-void ir3_block_legalize(struct ir3_block *block,
-		bool *has_samp, int *max_bary);
+void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary);
 
 /* ************************************************************************* */
 /* instruction helpers */
@@ -807,6 +896,21 @@
 	return instr;
 }
 
+static inline struct ir3_instruction *
+ir3_NOP(struct ir3_block *block)
+{
+	return ir3_instr_create(block, 0, OPC_NOP);
+}
+
+#define INSTR0(CAT, name)                                                \
+static inline struct ir3_instruction *                                   \
+ir3_##name(struct ir3_block *block)                                      \
+{                                                                        \
+	struct ir3_instruction *instr =                                      \
+		ir3_instr_create(block, CAT, OPC_##name);                        \
+	return instr;                                                        \
+}
+
 #define INSTR1(CAT, name)                                                \
 static inline struct ir3_instruction *                                   \
 ir3_##name(struct ir3_block *block,                                      \
@@ -850,7 +954,10 @@
 }
 
 /* cat0 instructions: */
+INSTR0(0, BR);
+INSTR0(0, JUMP);
 INSTR1(0, KILL);
+INSTR0(0, END);
 
 /* cat2 instructions, most 2 src but some 1 src: */
 INSTR2(2, ADD_F)
@@ -962,6 +1069,7 @@
 /* cat6 instructions: */
 INSTR2(6, LDLV)
 INSTR2(6, LDG)
+INSTR3(6, STG)
 
 /* ************************************************************************* */
 /* split this out or find some helper to use.. like main/bitset.h.. */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_legalize.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_legalize.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_legalize.c	2015-09-16 14:36:09.000000000 +0000
@@ -26,7 +26,6 @@
  *    Rob Clark <robclark@freedesktop.org>
  */
 
-#include "pipe/p_shader_tokens.h"
 #include "util/u_math.h"
 
 #include "freedreno_util.h"
@@ -43,20 +42,31 @@
  */
 
 struct ir3_legalize_ctx {
-	struct ir3_block *block;
 	bool has_samp;
 	int max_bary;
 };
 
-static void legalize(struct ir3_legalize_ctx *ctx)
+/* We want to evaluate each block from the position of any other
+ * predecessor block, in order that the flags set are the union
+ * of all possible program paths.  For stopping condition, we
+ * want to stop when the pair of <pred-block, current-block> has
+ * been visited already.
+ *
+ * XXX is that completely true?  We could have different needs_xyz
+ * flags set depending on path leading to pred-block.. we could
+ * do *most* of this based on chasing src instructions ptrs (and
+ * following all phi srcs).. except the write-after-read hazzard.
+ *
+ * For now we just set ss/sy flag on first instruction on block,
+ * and handle everything within the block as before.
+ */
+
+static void
+legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
 {
-	struct ir3_block *block = ctx->block;
-	struct ir3_instruction *n;
-	struct ir3 *shader = block->shader;
-	struct ir3_instruction *end =
-			ir3_instr_create(block, 0, OPC_END);
 	struct ir3_instruction *last_input = NULL;
 	struct ir3_instruction *last_rel = NULL;
+	struct list_head instr_list;
 	regmask_t needs_ss_war;       /* write after read */
 	regmask_t needs_ss;
 	regmask_t needs_sy;
@@ -65,9 +75,13 @@
 	regmask_init(&needs_ss);
 	regmask_init(&needs_sy);
 
-	shader->instrs_count = 0;
+	/* remove all the instructions from the list, we'll be adding
+	 * them back in as we go
+	 */
+	list_replace(&block->instr_list, &instr_list);
+	list_inithead(&block->instr_list);
 
-	for (n = block->head; n; n = n->next) {
+	list_for_each_entry_safe (struct ir3_instruction, n, &instr_list, node) {
 		struct ir3_register *reg;
 		unsigned i;
 
@@ -134,18 +148,18 @@
 		 */
 		if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) {
 			struct ir3_instruction *nop;
-			nop = ir3_instr_create(block, 0, OPC_NOP);
+			nop = ir3_NOP(block);
 			nop->flags |= IR3_INSTR_SS;
 			n->flags &= ~IR3_INSTR_SS;
 		}
 
 		/* need to be able to set (ss) on first instruction: */
-		if ((shader->instrs_count == 0) && (n->category >= 5))
-			ir3_instr_create(block, 0, OPC_NOP);
+		if (list_empty(&block->instr_list) && (n->category >= 5))
+			ir3_NOP(block);
 
-		if (is_nop(n) && shader->instrs_count) {
-			struct ir3_instruction *last =
-					shader->instrs[shader->instrs_count-1];
+		if (is_nop(n) && !list_empty(&block->instr_list)) {
+			struct ir3_instruction *last = list_last_entry(&block->instr_list,
+					struct ir3_instruction, node);
 			if (is_nop(last) && (last->repeat < 5)) {
 				last->repeat++;
 				last->flags |= n->flags;
@@ -153,7 +167,7 @@
 			}
 		}
 
-		shader->instrs[shader->instrs_count++] = n;
+		list_addtail(&n->node, &block->instr_list);
 
 		if (is_sfu(n))
 			regmask_set(&needs_ss, n->regs[0]);
@@ -168,14 +182,14 @@
 			 */
 			ctx->has_samp = true;
 			regmask_set(&needs_sy, n->regs[0]);
-		} else if (is_mem(n)) {
+		} else if (is_load(n)) {
 			regmask_set(&needs_sy, n->regs[0]);
 		}
 
 		/* both tex/sfu appear to not always immediately consume
 		 * their src register(s):
 		 */
-		if (is_tex(n) || is_sfu(n) || is_mem(n)) {
+		if (is_tex(n) || is_sfu(n) || is_load(n)) {
 			foreach_src(reg, n) {
 				if (reg_gpr(reg))
 					regmask_set(&needs_ss_war, reg);
@@ -192,35 +206,20 @@
 		 * the (ei) flag:
 		 */
 		if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
-			int i, cnt;
-
-			/* note that ir3_instr_create() inserts into
-			 * shader->instrs[] and increments the count..
-			 * so we need to bump up the cnt initially (to
-			 * avoid it clobbering the last real instr) and
-			 * restore it after.
-			 */
-			cnt = ++shader->instrs_count;
+			struct ir3_instruction *baryf;
 
-			/* inserting instructions would be a bit nicer if list.. */
-			for (i = cnt - 2; i >= 0; i--) {
-				if (shader->instrs[i] == last_input) {
-
-					/* (ss)bary.f (ei)r63.x, 0, r0.x */
-					last_input = ir3_instr_create(block, 2, OPC_BARY_F);
-					last_input->flags |= IR3_INSTR_SS;
-					ir3_reg_create(last_input, regid(63, 0), 0);
-					ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0;
-					ir3_reg_create(last_input, regid(0, 0), 0);
+			/* (ss)bary.f (ei)r63.x, 0, r0.x */
+			baryf = ir3_instr_create(block, 2, OPC_BARY_F);
+			baryf->flags |= IR3_INSTR_SS;
+			ir3_reg_create(baryf, regid(63, 0), 0);
+			ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
+			ir3_reg_create(baryf, regid(0, 0), 0);
+
+			/* insert the dummy bary.f after last_input: */
+			list_delinit(&baryf->node);
+			list_add(&baryf->node, &last_input->node);
 
-					shader->instrs[i + 1] = last_input;
-
-					break;
-				}
-				shader->instrs[i + 1] = shader->instrs[i];
-			}
-
-			shader->instrs_count = cnt;
+			last_input = baryf;
 		}
 		last_input->regs[0]->flags |= IR3_REG_EI;
 	}
@@ -228,21 +227,177 @@
 	if (last_rel)
 		last_rel->flags |= IR3_INSTR_UL;
 
-	shader->instrs[shader->instrs_count++] = end;
+	list_first_entry(&block->instr_list, struct ir3_instruction, node)
+		->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
+}
+
+/* NOTE: branch instructions are always the last instruction(s)
+ * in the block.  We take advantage of this as we resolve the
+ * branches, since "if (foo) break;" constructs turn into
+ * something like:
+ *
+ *   block3 {
+ *   	...
+ *   	0029:021: mov.s32s32 r62.x, r1.y
+ *   	0082:022: br !p0.x, target=block5
+ *   	0083:023: br p0.x, target=block4
+ *   	// succs: if _[0029:021: mov.s32s32] block4; else block5;
+ *   }
+ *   block4 {
+ *   	0084:024: jump, target=block6
+ *   	// succs: block6;
+ *   }
+ *   block5 {
+ *   	0085:025: jump, target=block7
+ *   	// succs: block7;
+ *   }
+ *
+ * ie. only instruction in block4/block5 is a jump, so when
+ * resolving branches we can easily detect this by checking
+ * that the first instruction in the target block is itself
+ * a jump, and setup the br directly to the jump's target
+ * (and strip back out the now unreached jump)
+ *
+ * TODO sometimes we end up with things like:
+ *
+ *    br !p0.x, #2
+ *    br p0.x, #12
+ *    add.u r0.y, r0.y, 1
+ *
+ * If we swapped the order of the branches, we could drop one.
+ */
+static struct ir3_block *
+resolve_dest_block(struct ir3_block *block)
+{
+	/* special case for last block: */
+	if (!block->successors[0])
+		return block;
+
+	/* NOTE that we may or may not have inserted the jump
+	 * in the target block yet, so conditions to resolve
+	 * the dest to the dest block's successor are:
+	 *
+	 *   (1) successor[1] == NULL &&
+	 *   (2) (block-is-empty || only-instr-is-jump)
+	 */
+	if (block->successors[1] == NULL) {
+		if (list_empty(&block->instr_list)) {
+			return block->successors[0];
+		} else if (list_length(&block->instr_list) == 1) {
+			struct ir3_instruction *instr = list_first_entry(
+					&block->instr_list, struct ir3_instruction, node);
+			if (is_flow(instr) && (instr->opc == OPC_JUMP))
+				return block->successors[0];
+		}
+	}
+	return block;
+}
+
+static bool
+resolve_jump(struct ir3_instruction *instr)
+{
+	struct ir3_block *tblock =
+		resolve_dest_block(instr->cat0.target);
+	struct ir3_instruction *target;
+
+	if (tblock != instr->cat0.target) {
+		list_delinit(&instr->cat0.target->node);
+		instr->cat0.target = tblock;
+		return true;
+	}
+
+	target = list_first_entry(&tblock->instr_list,
+				struct ir3_instruction, node);
+
+	if ((!target) || (target->ip == (instr->ip + 1))) {
+		list_delinit(&instr->node);
+		return true;
+	} else {
+		instr->cat0.immed =
+			(int)target->ip - (int)instr->ip;
+	}
+	return false;
+}
+
+/* resolve jumps, removing jumps/branches to immediately following
+ * instruction which we end up with from earlier stages.  Since
+ * removing an instruction can invalidate earlier instruction's
+ * branch offsets, we need to do this iteratively until no more
+ * branches are removed.
+ */
+static bool
+resolve_jumps(struct ir3 *ir)
+{
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node)
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node)
+			if (is_flow(instr) && instr->cat0.target)
+				if (resolve_jump(instr))
+					return true;
+
+	return false;
+}
 
-	shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
+/* we want to mark points where divergent flow control re-converges
+ * with (jp) flags.  For now, since we don't do any optimization for
+ * things that start out as a 'do {} while()', re-convergence points
+ * will always be a branch or jump target.  Note that this is overly
+ * conservative, since unconditional jump targets are not convergence
+ * points, we are just assuming that the other path to reach the jump
+ * target was divergent.  If we were clever enough to optimize the
+ * jump at end of a loop back to a conditional branch into a single
+ * conditional branch, ie. like:
+ *
+ *    add.f r1.w, r0.x, (neg)(r)c2.x   <= loop start
+ *    mul.f r1.z, r1.z, r0.x
+ *    mul.f r1.y, r1.y, r0.x
+ *    mul.f r0.z, r1.x, r0.x
+ *    mul.f r0.w, r0.y, r0.x
+ *    cmps.f.ge r0.x, (r)c2.y, (r)r1.w
+ *    add.s r0.x, (r)r0.x, (r)-1
+ *    sel.f32 r0.x, (r)c3.y, (r)r0.x, c3.x
+ *    cmps.f.eq p0.x, r0.x, c3.y
+ *    mov.f32f32 r0.x, r1.w
+ *    mov.f32f32 r0.y, r0.w
+ *    mov.f32f32 r1.x, r0.z
+ *    (rpt2)nop
+ *    br !p0.x, #-13
+ *    (jp)mul.f r0.x, c263.y, r1.y
+ *
+ * Then we'd have to be more clever, as the convergence point is no
+ * longer a branch or jump target.
+ */
+static void
+mark_convergence_points(struct ir3 *ir)
+{
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+			if (is_flow(instr) && instr->cat0.target) {
+				struct ir3_instruction *target =
+					list_first_entry(&instr->cat0.target->instr_list,
+							struct ir3_instruction, node);
+				target->flags |= IR3_INSTR_JP;
+			}
+		}
+	}
 }
 
-void ir3_block_legalize(struct ir3_block *block,
-		bool *has_samp, int *max_bary)
+void
+ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary)
 {
 	struct ir3_legalize_ctx ctx = {
-			.block = block,
 			.max_bary = -1,
 	};
 
-	legalize(&ctx);
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		legalize_block(&ctx, block);
+	}
 
 	*has_samp = ctx.has_samp;
 	*max_bary = ctx.max_bary;
+
+	do {
+		ir3_count_instructions(ir);
+	} while(resolve_jumps(ir));
+
+	mark_convergence_points(ir);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_print.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_print.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_print.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_print.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,251 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "ir3.h"
+
+#define PTRID(x) ((unsigned long)(x))
+
+static void print_instr_name(struct ir3_instruction *instr)
+{
+#ifdef DEBUG
+	printf("%04u:", instr->serialno);
+#endif
+	printf("%03u: ", instr->depth);
+
+	if (instr->flags & IR3_INSTR_SY)
+		printf("(sy)");
+	if (instr->flags & IR3_INSTR_SS)
+		printf("(ss)");
+
+	if (is_meta(instr)) {
+		switch(instr->opc) {
+		case OPC_META_PHI:
+			printf("&#934;");
+			break;
+		default:
+			/* shouldn't hit here.. just for debugging: */
+			switch (instr->opc) {
+			case OPC_META_INPUT:  printf("_meta:in");   break;
+			case OPC_META_FO:     printf("_meta:fo");   break;
+			case OPC_META_FI:     printf("_meta:fi");   break;
+
+			default: printf("_meta:%d", instr->opc); break;
+			}
+			break;
+		}
+	} else if (instr->category == 1) {
+		static const char *type[] = {
+				[TYPE_F16] = "f16",
+				[TYPE_F32] = "f32",
+				[TYPE_U16] = "u16",
+				[TYPE_U32] = "u32",
+				[TYPE_S16] = "s16",
+				[TYPE_S32] = "s32",
+				[TYPE_U8]  = "u8",
+				[TYPE_S8]  = "s8",
+		};
+		if (instr->cat1.src_type == instr->cat1.dst_type)
+			printf("mov");
+		else
+			printf("cov");
+		printf(".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]);
+	} else {
+		printf("%s", ir3_instr_name(instr));
+		if (instr->flags & IR3_INSTR_3D)
+			printf(".3d");
+		if (instr->flags & IR3_INSTR_A)
+			printf(".a");
+		if (instr->flags & IR3_INSTR_O)
+			printf(".o");
+		if (instr->flags & IR3_INSTR_P)
+			printf(".p");
+		if (instr->flags & IR3_INSTR_S)
+			printf(".s");
+		if (instr->flags & IR3_INSTR_S2EN)
+			printf(".s2en");
+	}
+}
+
+static void print_reg_name(struct ir3_register *reg, bool followssa)
+{
+	if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
+			(reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
+		printf("(absneg)");
+	else if (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))
+		printf("(neg)");
+	else if (reg->flags & (IR3_REG_FABS | IR3_REG_SABS))
+		printf("(abs)");
+
+	if (reg->flags & IR3_REG_IMMED) {
+		printf("imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
+	} else if (reg->flags & IR3_REG_SSA) {
+		printf("_");
+		if (followssa) {
+			printf("[");
+			print_instr_name(reg->instr);
+			printf("]");
+		}
+	} else if (reg->flags & IR3_REG_RELATIV) {
+		if (reg->flags & IR3_REG_HALF)
+			printf("h");
+		if (reg->flags & IR3_REG_CONST)
+			printf("c<a0.x + %u>", reg->num);
+		else
+			printf("\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->num, reg->size);
+	} else {
+		if (reg->flags & IR3_REG_HALF)
+			printf("h");
+		if (reg->flags & IR3_REG_CONST)
+			printf("c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]);
+		else
+			printf("\x1b[0;31mr%u.%c\x1b[0m", reg_num(reg), "xyzw"[reg_comp(reg)]);
+	}
+}
+
+static void
+tab(int lvl)
+{
+	for (int i = 0; i < lvl; i++)
+		printf("\t");
+}
+
+static uint32_t
+block_id(struct ir3_block *block)
+{
+#ifdef DEBUG
+	return block->serialno;
+#else
+	return (uint32_t)(uint64_t)block;
+#endif
+}
+
+static void
+print_instr(struct ir3_instruction *instr, int lvl)
+{
+	unsigned i;
+
+	tab(lvl);
+
+	print_instr_name(instr);
+	for (i = 0; i < instr->regs_count; i++) {
+		struct ir3_register *reg = instr->regs[i];
+		printf(i ? ", " : " ");
+		print_reg_name(reg, !!i);
+	}
+
+	if (instr->address) {
+		printf(", address=_");
+		printf("[");
+		print_instr_name(instr->address);
+		printf("]");
+	}
+
+	if (instr->fanin) {
+		printf(", fanin=_");
+		printf("[");
+		print_instr_name(instr->fanin);
+		printf("]");
+	}
+
+	if (instr->cp.left) {
+		printf(", left=_");
+		printf("[");
+		print_instr_name(instr->cp.left);
+		printf("]");
+	}
+
+	if (instr->cp.right) {
+		printf(", right=_");
+		printf("[");
+		print_instr_name(instr->cp.right);
+		printf("]");
+	}
+
+	if (is_meta(instr)) {
+		if (instr->opc == OPC_META_FO) {
+			printf(", off=%d", instr->fo.off);
+		} else if ((instr->opc == OPC_META_FI) && instr->fi.aid) {
+			printf(", aid=%d", instr->fi.aid);
+		}
+	}
+
+	if (is_flow(instr) && instr->cat0.target) {
+		/* the predicate register src is implied: */
+		if (instr->opc == OPC_BR) {
+			printf(" %sp0.x", instr->cat0.inv ? "!" : "");
+		}
+		printf(", target=block%u", block_id(instr->cat0.target));
+	}
+
+	printf("\n");
+}
+
+void ir3_print_instr(struct ir3_instruction *instr)
+{
+	print_instr(instr, 0);
+}
+
+static void
+print_block(struct ir3_block *block, int lvl)
+{
+	tab(lvl); printf("block%u {\n", block_id(block));
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		print_instr(instr, lvl+1);
+	}
+	if (block->successors[1]) {
+		/* leading into if/else: */
+		tab(lvl+1);
+		printf("/* succs: if _[");
+		print_instr_name(block->condition);
+		printf("] block%u; else block%u; */\n",
+				block_id(block->successors[0]),
+				block_id(block->successors[1]));
+	} else if (block->successors[0]) {
+		tab(lvl+1);
+		printf("/* succs: block%u; */\n",
+				block_id(block->successors[0]));
+	}
+	tab(lvl); printf("}\n");
+}
+
+void
+ir3_print(struct ir3 *ir)
+{
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node)
+		print_block(block, 0);
+
+	for (unsigned i = 0; i < ir->noutputs; i++) {
+		if (!ir->outputs[i])
+			continue;
+		printf("out%d: ", i);
+		print_instr(ir->outputs[i], 0);
+	}
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_ra.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_ra.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_ra.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_ra.c	2015-09-16 14:36:09.000000000 +0000
@@ -26,284 +26,749 @@
  *    Rob Clark <robclark@freedesktop.org>
  */
 
-#include "pipe/p_shader_tokens.h"
 #include "util/u_math.h"
+#include "util/register_allocate.h"
+#include "util/ralloc.h"
+#include "util/bitset.h"
 
 #include "ir3.h"
+#include "ir3_compiler.h"
 
 /*
  * Register Assignment:
  *
- * NOTE: currently only works on a single basic block.. need to think
- * about how multiple basic blocks are going to get scheduled.  But
- * I think I want to re-arrange how blocks work, ie. get rid of the
- * block nesting thing..
+ * Uses the register_allocate util, which implements graph coloring
+ * algo with interference classes.  To handle the cases where we need
+ * consecutive registers (for example, texture sample instructions),
+ * we model these as larger (double/quad/etc) registers which conflict
+ * with the corresponding registers in other classes.
  *
- * NOTE: we could do register coalescing (eliminate moves) as part of
- * the RA step.. OTOH I think we need to do scheduling before register
- * assignment.  And if we remove a mov that effects scheduling (unless
- * we leave a placeholder nop, which seems lame), so I'm not really
- * sure how practical this is to do both in a single stage.  But OTOH
- * I'm not really sure a sane way for the CP stage to realize when it
- * cannot remove a mov due to multi-register constraints..
+ * Additionally we create additional classes for half-regs, which
+ * do not conflict with the full-reg classes.  We do need at least
+ * sizes 1-4 (to deal w/ texture sample instructions output to half-
+ * reg).  At the moment we don't create the higher order half-reg
+ * classes as half-reg frequently does not have enough precision
+ * for texture coords at higher resolutions.
  *
- * NOTE: http://scopesconf.org/scopes-01/paper/session1_2.ps.gz has
- * some ideas to handle array allocation with a more conventional
- * graph coloring algorithm for register assignment, which might be
- * a good alternative to the current algo.  However afaict it cannot
- * handle overlapping arrays, which is a scenario that we have to
- * deal with
+ * There are some additional cases that we need to handle specially,
+ * as the graph coloring algo doesn't understand "partial writes".
+ * For example, a sequence like:
+ *
+ *   add r0.z, ...
+ *   sam (f32)(xy)r0.x, ...
+ *   ...
+ *   sam (f32)(xyzw)r0.w, r0.x, ...  ; 3d texture, so r0.xyz are coord
+ *
+ * In this scenario, we treat r0.xyz as class size 3, which is written
+ * (from a use/def perspective) at the 'add' instruction and ignore the
+ * subsequent partial writes to r0.xy.  So the 'add r0.z, ...' is the
+ * defining instruction, as it is the first to partially write r0.xyz.
+ *
+ * Note i965 has a similar scenario, which they solve with a virtual
+ * LOAD_PAYLOAD instruction which gets turned into multiple MOV's after
+ * register assignment.  But for us that is horrible from a scheduling
+ * standpoint.  Instead what we do is use idea of 'definer' instruction.
+ * Ie. the first instruction (lowest ip) to write to the array is the
+ * one we consider from use/def perspective when building interference
+ * graph.  (Other instructions which write other array elements just
+ * define the variable some more.)
  */
 
-struct ir3_ra_ctx {
-	struct ir3_block *block;
-	enum shader_t type;
-	bool frag_coord;
-	bool frag_face;
-	int cnt;
-	bool error;
-	struct {
-		unsigned base;
-		unsigned size;
-	} arrays[MAX_ARRAYS];
+static const unsigned class_sizes[] = {
+	1, 2, 3, 4,
+	4 + 4, /* txd + 1d/2d */
+	4 + 6, /* txd + 3d */
+	/* temporary: until we can assign arrays, create classes so we
+	 * can round up array to fit.  NOTE with tgsi arrays should
+	 * really all be multiples of four:
+	 */
+	4 * 4,
+	4 * 8,
+	4 * 16,
+	4 * 32,
+
 };
+#define class_count ARRAY_SIZE(class_sizes)
 
-#ifdef DEBUG
-#  include "freedreno_util.h"
-#  define ra_debug (fd_mesa_debug & FD_DBG_OPTMSGS)
-#else
-#  define ra_debug 0
-#endif
+static const unsigned half_class_sizes[] = {
+	1, 2, 3, 4,
+};
+#define half_class_count  ARRAY_SIZE(half_class_sizes)
+#define total_class_count (class_count + half_class_count)
+
+/* Below a0.x are normal regs.  RA doesn't need to assign a0.x/p0.x. */
+#define NUM_REGS             (4 * (REG_A0 - 1))
+/* Number of virtual regs in a given class: */
+#define CLASS_REGS(i)        (NUM_REGS - (class_sizes[i] - 1))
+#define HALF_CLASS_REGS(i)   (NUM_REGS - (half_class_sizes[i] - 1))
+
+/* register-set, created one time, used for all shaders: */
+struct ir3_ra_reg_set {
+	struct ra_regs *regs;
+	unsigned int classes[class_count];
+	unsigned int half_classes[half_class_count];
+	/* maps flat virtual register space to base gpr: */
+	uint16_t *ra_reg_to_gpr;
+	/* maps cls,gpr to flat virtual register space: */
+	uint16_t **gpr_to_ra_reg;
+};
 
-#define ra_dump_list(msg, n) do { \
-		if (ra_debug) { \
-			debug_printf("-- " msg); \
-			ir3_dump_instr_list(n); \
-		} \
-	} while (0)
-
-#define ra_dump_instr(msg, n) do { \
-		if (ra_debug) { \
-			debug_printf(">> " msg); \
-			ir3_dump_instr_single(n); \
-		} \
-	} while (0)
-
-#define ra_assert(ctx, x) do { \
-		debug_assert(x); \
-		if (!(x)) { \
-			debug_printf("RA: failed assert: %s\n", #x); \
-			(ctx)->error = true; \
-		}; \
-	} while (0)
-
-
-/* sorta ugly way to retrofit half-precision support.. rather than
- * passing extra param around, just OR in a high bit.  All the low
- * value arithmetic (ie. +/- offset within a contiguous vec4, etc)
- * will continue to work as long as you don't underflow (and that
- * would go badly anyways).
+/* One-time setup of RA register-set, which describes all the possible
+ * "virtual" registers and their interferences.  Ie. double register
+ * occupies (and conflicts with) two single registers, and so forth.
+ * Since registers do not need to be aligned to their class size, they
+ * can conflict with other registers in the same class too.  Ie:
+ *
+ *    Single (base) |  Double
+ *    --------------+---------------
+ *       R0         |  D0
+ *       R1         |  D0 D1
+ *       R2         |     D1 D2
+ *       R3         |        D2
+ *           .. and so on..
+ *
+ * (NOTE the disassembler uses notation like r0.x/y/z/w but those are
+ * really just four scalar registers.  Don't let that confuse you.)
  */
-#define REG_HALF  0x8000
+struct ir3_ra_reg_set *
+ir3_ra_alloc_reg_set(void *memctx)
+{
+	struct ir3_ra_reg_set *set = rzalloc(memctx, struct ir3_ra_reg_set);
+	unsigned ra_reg_count, reg, first_half_reg;
+	unsigned int **q_values;
+
+	/* calculate # of regs across all classes: */
+	ra_reg_count = 0;
+	for (unsigned i = 0; i < class_count; i++)
+		ra_reg_count += CLASS_REGS(i);
+	for (unsigned i = 0; i < half_class_count; i++)
+		ra_reg_count += HALF_CLASS_REGS(i);
+
+	/* allocate and populate q_values: */
+	q_values = ralloc_array(set, unsigned *, total_class_count);
+	for (unsigned i = 0; i < class_count; i++) {
+		q_values[i] = rzalloc_array(q_values, unsigned, total_class_count);
+
+		/* From register_allocate.c:
+		 *
+		 * q(B,C) (indexed by C, B is this register class) in
+		 * Runeson/Nyström paper.  This is "how many registers of B could
+		 * the worst choice register from C conflict with".
+		 *
+		 * If we just let the register allocation algorithm compute these
+		 * values, is extremely expensive.  However, since all of our
+		 * registers are laid out, we can very easily compute them
+		 * ourselves.  View the register from C as fixed starting at GRF n
+		 * somewhere in the middle, and the register from B as sliding back
+		 * and forth.  Then the first register to conflict from B is the
+		 * one starting at n - class_size[B] + 1 and the last register to
+		 * conflict will start at n + class_size[B] - 1.  Therefore, the
+		 * number of conflicts from B is class_size[B] + class_size[C] - 1.
+		 *
+		 *   +-+-+-+-+-+-+     +-+-+-+-+-+-+
+		 * B | | | | | |n| --> | | | | | | |
+		 *   +-+-+-+-+-+-+     +-+-+-+-+-+-+
+		 *             +-+-+-+-+-+
+		 * C           |n| | | | |
+		 *             +-+-+-+-+-+
+		 *
+		 * (Idea copied from brw_fs_reg_allocate.cpp)
+		 */
+		for (unsigned j = 0; j < class_count; j++)
+			q_values[i][j] = class_sizes[i] + class_sizes[j] - 1;
+	}
 
-#define REG(n, wm, f) (struct ir3_register){ \
-		.flags  = (f), \
-		.num    = (n), \
-		.wrmask = TGSI_WRITEMASK_ ## wm, \
+	for (unsigned i = class_count; i < total_class_count; i++) {
+		q_values[i] = ralloc_array(q_values, unsigned, total_class_count);
+
+		/* see comment above: */
+		for (unsigned j = class_count; j < total_class_count; j++) {
+			q_values[i][j] = half_class_sizes[i - class_count] +
+					half_class_sizes[j - class_count] - 1;
+		}
 	}
 
-/* check that the register exists, is a GPR and is not special (a0/p0) */
-static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n)
-{
-	if ((n < instr->regs_count) && reg_gpr(instr->regs[n]) &&
-			!(instr->regs[n]->flags & IR3_REG_SSA))
-		return instr->regs[n];
-	return NULL;
+	/* allocate the reg-set.. */
+	set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
+	set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
+	set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count);
+
+	/* .. and classes */
+	reg = 0;
+	for (unsigned i = 0; i < class_count; i++) {
+		set->classes[i] = ra_alloc_reg_class(set->regs);
+
+		set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i));
+
+		for (unsigned j = 0; j < CLASS_REGS(i); j++) {
+			ra_class_add_reg(set->regs, set->classes[i], reg);
+
+			set->ra_reg_to_gpr[reg] = j;
+			set->gpr_to_ra_reg[i][j] = reg;
+
+			for (unsigned br = j; br < j + class_sizes[i]; br++)
+				ra_add_transitive_reg_conflict(set->regs, br, reg);
+
+			reg++;
+		}
+	}
+
+	first_half_reg = reg;
+
+	for (unsigned i = 0; i < half_class_count; i++) {
+		set->half_classes[i] = ra_alloc_reg_class(set->regs);
+
+		set->gpr_to_ra_reg[class_count + i] =
+				ralloc_array(set, uint16_t, CLASS_REGS(i));
+
+		for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
+			ra_class_add_reg(set->regs, set->half_classes[i], reg);
+
+			set->ra_reg_to_gpr[reg] = j;
+			set->gpr_to_ra_reg[class_count + i][j] = reg;
+
+			for (unsigned br = j; br < j + half_class_sizes[i]; br++)
+				ra_add_transitive_reg_conflict(set->regs, br + first_half_reg, reg);
+
+			reg++;
+		}
+	}
+
+	ra_set_finalize(set->regs, q_values);
+
+	ralloc_free(q_values);
+
+	return set;
 }
 
-/* figure out if an unassigned src register points back to the instr we
- * are assigning:
- */
-static bool instr_used_by(struct ir3_instruction *instr,
-		struct ir3_register *src)
-{
-	struct ir3_instruction *src_instr = ssa(src);
-	unsigned i;
-	if (instr == src_instr)
-		return true;
-	if (src_instr && is_meta(src_instr))
-		for (i = 1; i < src_instr->regs_count; i++)
-			if (instr_used_by(instr, src_instr->regs[i]))
-				return true;
+/* additional block-data (per-block) */
+struct ir3_ra_block_data {
+	BITSET_WORD *def;        /* variables defined before used in block */
+	BITSET_WORD *use;        /* variables used before defined in block */
+	BITSET_WORD *livein;     /* which defs reach entry point of block */
+	BITSET_WORD *liveout;    /* which defs reach exit point of block */
+};
+
+/* additional instruction-data (per-instruction) */
+struct ir3_ra_instr_data {
+	/* cached instruction 'definer' info: */
+	struct ir3_instruction *defn;
+	int off, sz, cls;
+};
 
-	return false;
+/* register-assign context, per-shader */
+struct ir3_ra_ctx {
+	struct ir3 *ir;
+	enum shader_t type;
+	bool frag_face;
+
+	struct ir3_ra_reg_set *set;
+	struct ra_graph *g;
+	unsigned alloc_count;
+	unsigned class_alloc_count[total_class_count];
+	unsigned class_base[total_class_count];
+	unsigned instr_cnt;
+	unsigned *def, *use;     /* def/use table */
+	struct ir3_ra_instr_data *instrd;
+};
+
+static bool
+is_half(struct ir3_instruction *instr)
+{
+	return !!(instr->regs[0]->flags & IR3_REG_HALF);
 }
 
-static bool instr_is_output(struct ir3_instruction *instr)
+static int
+size_to_class(unsigned sz, bool half)
 {
-	struct ir3_block *block = instr->block;
-	unsigned i;
+	if (half) {
+		for (unsigned i = 0; i < half_class_count; i++)
+			if (half_class_sizes[i] >= sz)
+				return i + class_count;
+	} else {
+		for (unsigned i = 0; i < class_count; i++)
+			if (class_sizes[i] >= sz)
+				return i;
+	}
+	debug_assert(0);
+	return -1;
+}
 
-	for (i = 0; i < block->noutputs; i++)
-		if (instr == block->outputs[i])
-			return true;
+static bool
+is_temp(struct ir3_register *reg)
+{
+	if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
+		return false;
+	if ((reg->num == regid(REG_A0, 0)) ||
+			(reg->num == regid(REG_P0, 0)))
+		return false;
+	return true;
+}
 
-	return false;
+static bool
+writes_gpr(struct ir3_instruction *instr)
+{
+	if (is_store(instr))
+		return false;
+	/* is dest a normal temp register: */
+	return is_temp(instr->regs[0]);
 }
 
-static void mark_sources(struct ir3_instruction *instr,
-		struct ir3_instruction *n, regmask_t *liveregs, regmask_t *written)
+static struct ir3_instruction *
+get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
+		int *sz, int *off)
 {
-	unsigned i;
+	struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+	struct ir3_instruction *d = NULL;
+
+	if (instr->fanin)
+		return get_definer(ctx, instr->fanin, sz, off);
+
+	if (id->defn) {
+		*sz = id->sz;
+		*off = id->off;
+		return id->defn;
+	}
+
+	if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
+		/* What about the case where collect is subset of array, we
+		 * need to find the distance between where actual array starts
+		 * and fanin..  that probably doesn't happen currently.
+		 */
+		struct ir3_register *src;
+		int dsz, doff;
+
+		/* note: don't use foreach_ssa_src as this gets called once
+		 * while assigning regs (which clears SSA flag)
+		 */
+		foreach_src_n(src, n, instr) {
+			struct ir3_instruction *dd;
+			if (!src->instr)
+				continue;
+
+			dd = get_definer(ctx, src->instr, &dsz, &doff);
+
+			if ((!d) || (dd->ip < d->ip)) {
+				d = dd;
+				*sz = dsz;
+				*off = doff - n;
+			}
+		}
+
+	} else if (instr->cp.right || instr->cp.left) {
+		/* covers also the meta:fo case, which ends up w/ single
+		 * scalar instructions for each component:
+		 */
+		struct ir3_instruction *f = ir3_neighbor_first(instr);
+
+		/* by definition, the entire sequence forms one linked list
+		 * of single scalar register nodes (even if some of them may
+		 * be fanouts from a texture sample (for example) instr.  We
+		 * just need to walk the list finding the first element of
+		 * the group defined (lowest ip)
+		 */
+		int cnt = 0;
+
+		d = f;
+		while (f) {
+			if (f->ip < d->ip)
+				d = f;
+			if (f == instr)
+				*off = cnt;
+			f = f->cp.right;
+			cnt++;
+		}
 
-	for (i = 1; i < n->regs_count; i++) {
-		struct ir3_register *r = reg_check(n, i);
-		if (r)
-			regmask_set_if_not(liveregs, r, written);
+		*sz = cnt;
 
-		/* if any src points back to the instruction(s) in
-		 * the block of neighbors that we are assigning then
-		 * mark any written (clobbered) registers as live:
+	} else {
+		/* second case is looking directly at the instruction which
+		 * produces multiple values (eg, texture sample), rather
+		 * than the fanout nodes that point back to that instruction.
+		 * This isn't quite right, because it may be part of a larger
+		 * group, such as:
+		 *
+		 *     sam (f32)(xyzw)r0.x, ...
+		 *     add r1.x, ...
+		 *     add r1.y, ...
+		 *     sam (f32)(xyzw)r2.x, r0.w  <-- (r0.w, r1.x, r1.y)
+		 *
+		 * need to come up with a better way to handle that case.
 		 */
-		if (instr_used_by(instr, n->regs[i]))
-			regmask_or(liveregs, liveregs, written);
+		if (instr->address) {
+			*sz = instr->regs[0]->size;
+		} else {
+			*sz = util_last_bit(instr->regs[0]->wrmask);
+		}
+		*off = 0;
+		d = instr;
+	}
+
+	if (d->regs[0]->flags & IR3_REG_PHI_SRC) {
+		struct ir3_instruction *phi = d->regs[0]->instr;
+		struct ir3_instruction *dd;
+		int dsz, doff;
+
+		dd = get_definer(ctx, phi, &dsz, &doff);
+
+		*sz = MAX2(*sz, dsz);
+		*off = doff;
+
+		if (dd->ip < d->ip) {
+			d = dd;
+		}
+	}
+
+	if (is_meta(d) && (d->opc == OPC_META_PHI)) {
+		/* we have already inserted parallel-copies into
+		 * the phi, so we don't need to chase definers
+		 */
+		struct ir3_register *src;
+		struct ir3_instruction *dd = d;
+
+		/* note: don't use foreach_ssa_src as this gets called once
+		 * while assigning regs (which clears SSA flag)
+		 */
+		foreach_src(src, d) {
+			if (!src->instr)
+				continue;
+			if (src->instr->ip < dd->ip)
+				dd = src->instr;
+		}
+
+		d = dd;
 	}
 
+	if (is_meta(d) && (d->opc == OPC_META_FO)) {
+		struct ir3_instruction *dd;
+		int dsz, doff;
+
+		dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff);
+
+		/* by definition, should come before: */
+		debug_assert(dd->ip < d->ip);
+
+		*sz = MAX2(*sz, dsz);
+
+		/* Fanout's are grouped, so *off should already valid */
+
+		d = dd;
+	}
+
+	id->defn = d;
+	id->sz = *sz;
+	id->off = *off;
+
+	return d;
 }
 
-/* live means read before written */
-static void compute_liveregs(struct ir3_ra_ctx *ctx,
-		struct ir3_instruction *instr, regmask_t *liveregs)
+static void
+ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 {
-	struct ir3_block *block = instr->block;
-	struct ir3_instruction *n;
-	regmask_t written;
-	unsigned i;
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+		if (instr->regs_count == 0)
+			continue;
+		/* couple special cases: */
+		if (writes_addr(instr) || writes_pred(instr)) {
+			id->cls = -1;
+			continue;
+		}
+		id->defn = get_definer(ctx, instr, &id->sz, &id->off);
+		id->cls = size_to_class(id->sz, is_half(id->defn));
+	}
+}
 
-	regmask_init(&written);
+/* give each instruction a name (and ip), and count up the # of names
+ * of each class
+ */
+static void
+ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 
-	for (n = instr->next; n; n = n->next) {
-		struct ir3_register *r;
+#ifdef DEBUG
+		instr->name = ~0;
+#endif
 
-		if (is_meta(n))
-			continue;
+		ctx->instr_cnt++;
 
-		/* check first src's read: */
-		mark_sources(instr, n, liveregs, &written);
+		if (instr->regs_count == 0)
+			continue;
 
-		/* for instructions that write to an array, we need to
-		 * capture the dependency on the array elements:
-		 */
-		if (n->fanin)
-			mark_sources(instr, n->fanin, liveregs, &written);
+		if (!writes_gpr(instr))
+			continue;
 
-		/* meta-instructions don't actually get scheduled,
-		 * so don't let it's write confuse us.. what we
-		 * really care about is when the src to the meta
-		 * instr was written:
-		 */
-		if (is_meta(n))
+		if (id->defn != instr)
 			continue;
 
-		/* then dst written (if assigned already): */
-		r = reg_check(n, 0);
-		if (r) {
-			/* if an instruction *is* an output, then it is live */
-			if (!instr_is_output(n))
-				regmask_set(&written, r);
+		/* arrays which don't fit in one of the pre-defined class
+		 * sizes are pre-colored:
+		 *
+		 * TODO but we still need to allocate names for them, don't we??
+		 */
+		if (id->cls >= 0) {
+			instr->name = ctx->class_alloc_count[id->cls]++;
+			ctx->alloc_count++;
 		}
+	}
+}
+
+static void
+ra_init(struct ir3_ra_ctx *ctx)
+{
+	unsigned n;
+
+	ir3_clear_mark(ctx->ir);
+	n = ir3_count_instructions(ctx->ir);
 
+	ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n);
+
+	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+		ra_block_find_definers(ctx, block);
 	}
 
-	/* be sure to account for output registers too: */
-	for (i = 0; i < block->noutputs; i++) {
-		struct ir3_register *r;
-		if (!block->outputs[i])
-			continue;
-		r = reg_check(block->outputs[i], 0);
-		if (r)
-			regmask_set_if_not(liveregs, r, &written);
+	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+		ra_block_name_instructions(ctx, block);
 	}
 
-	/* if instruction is output, we need a reg that isn't written
-	 * before the end.. equiv to the instr_used_by() check above
-	 * in the loop body
-	 * TODO maybe should follow fanin/fanout?
+	/* figure out the base register name for each class.  The
+	 * actual ra name is class_base[cls] + instr->name;
 	 */
-	if (instr_is_output(instr))
-		regmask_or(liveregs, liveregs, &written);
+	ctx->class_base[0] = 0;
+	for (unsigned i = 1; i < total_class_count; i++) {
+		ctx->class_base[i] = ctx->class_base[i-1] +
+				ctx->class_alloc_count[i-1];
+	}
+
+	ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
+	ralloc_steal(ctx->g, ctx->instrd);
+	ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
+	ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
+}
+
+static unsigned
+ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn)
+{
+	unsigned name;
+	debug_assert(cls >= 0);
+	name = ctx->class_base[cls] + defn->name;
+	debug_assert(name < ctx->alloc_count);
+	return name;
+}
+
+static void
+ra_destroy(struct ir3_ra_ctx *ctx)
+{
+	ralloc_free(ctx->g);
 }
 
-static int find_available(regmask_t *liveregs, int size, bool half)
+static void
+ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 {
-	unsigned i;
-	unsigned f = half ? IR3_REG_HALF : 0;
-	for (i = 0; i < MAX_REG - size; i++) {
-		if (!regmask_get(liveregs, &REG(i, X, f))) {
-			unsigned start = i++;
-			for (; (i < MAX_REG) && ((i - start) < size); i++)
-				if (regmask_get(liveregs, &REG(i, X, f)))
-					break;
-			if ((i - start) >= size)
-				return start;
+	struct ir3_ra_block_data *bd;
+	unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
+
+	bd = rzalloc(ctx->g, struct ir3_ra_block_data);
+
+	bd->def     = rzalloc_array(bd, BITSET_WORD, bitset_words);
+	bd->use     = rzalloc_array(bd, BITSET_WORD, bitset_words);
+	bd->livein  = rzalloc_array(bd, BITSET_WORD, bitset_words);
+	bd->liveout = rzalloc_array(bd, BITSET_WORD, bitset_words);
+
+	block->bd = bd;
+
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		struct ir3_instruction *src;
+
+		if (instr->regs_count == 0)
+			continue;
+
+		/* There are a couple special cases to deal with here:
+		 *
+		 * fanout: used to split values from a higher class to a lower
+		 *     class, for example split the results of a texture fetch
+		 *     into individual scalar values;  We skip over these from
+		 *     a 'def' perspective, and for a 'use' we walk the chain
+		 *     up to the defining instruction.
+		 *
+		 * fanin: used to collect values from lower class and assemble
+		 *     them together into a higher class, for example arguments
+		 *     to texture sample instructions;  We consider these to be
+		 *     defined at the earliest fanin source.
+		 *
+		 * phi: used to merge values from different flow control paths
+		 *     to the same reg.  Consider defined at earliest phi src,
+		 *     and update all the other phi src's (which may come later
+		 *     in the program) as users to extend the var's live range.
+		 *
+		 * Most of this, other than phi, is completely handled in the
+		 * get_definer() helper.
+		 *
+		 * In either case, we trace the instruction back to the original
+		 * definer and consider that as the def/use ip.
+		 */
+
+		if (writes_gpr(instr)) {
+			struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+
+			if (id->defn == instr) {
+				/* arrays which don't fit in one of the pre-defined class
+				 * sizes are pre-colored:
+				 */
+				if (id->cls >= 0) {
+					unsigned name = ra_name(ctx, id->cls, id->defn);
+
+					ctx->def[name] = id->defn->ip;
+					ctx->use[name] = id->defn->ip;
+
+					/* since we are in SSA at this point: */
+					debug_assert(!BITSET_TEST(bd->use, name));
+
+					BITSET_SET(bd->def, name);
+
+					if (is_half(id->defn)) {
+						ra_set_node_class(ctx->g, name,
+								ctx->set->half_classes[id->cls - class_count]);
+					} else {
+						ra_set_node_class(ctx->g, name,
+								ctx->set->classes[id->cls]);
+					}
+
+					/* extend the live range for phi srcs, which may come
+					 * from the bottom of the loop
+					 */
+					if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) {
+						struct ir3_instruction *phi = id->defn->regs[0]->instr;
+						foreach_ssa_src(src, phi) {
+							/* if src is after phi, then we need to extend
+							 * the liverange to the end of src's block:
+							 */
+							if (src->ip > phi->ip) {
+								struct ir3_instruction *last =
+									list_last_entry(&src->block->instr_list,
+										struct ir3_instruction, node);
+								ctx->use[name] = MAX2(ctx->use[name], last->ip);
+							}
+						}
+					}
+				}
+			}
+		}
+
+		foreach_ssa_src(src, instr) {
+			if (writes_gpr(src)) {
+				struct ir3_ra_instr_data *id = &ctx->instrd[src->ip];
+
+				if (id->cls >= 0) {
+					unsigned name = ra_name(ctx, id->cls, id->defn);
+					ctx->use[name] = MAX2(ctx->use[name], instr->ip);
+					if (!BITSET_TEST(bd->def, name))
+						BITSET_SET(bd->use, name);
+				}
+			}
 		}
 	}
-	assert(0);
-	return -1;
 }
 
-static int alloc_block(struct ir3_ra_ctx *ctx,
-		struct ir3_instruction *instr, int size)
+static bool
+ra_compute_livein_liveout(struct ir3_ra_ctx *ctx)
 {
-	struct ir3_register *dst = instr->regs[0];
-	struct ir3_instruction *n;
-	regmask_t liveregs;
-	unsigned name;
+	unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
+	bool progress = false;
 
-	/* should only ever be called w/ head of neighbor list: */
-	debug_assert(!instr->cp.left);
+	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+		struct ir3_ra_block_data *bd = block->bd;
 
-	regmask_init(&liveregs);
+		/* update livein: */
+		for (unsigned i = 0; i < bitset_words; i++) {
+			BITSET_WORD new_livein =
+				(bd->use[i] | (bd->liveout[i] & ~bd->def[i]));
+
+			if (new_livein & ~bd->livein[i]) {
+				bd->livein[i] |= new_livein;
+				progress = true;
+			}
+		}
 
-	for (n = instr; n; n = n->cp.right)
-		compute_liveregs(ctx, n, &liveregs);
+		/* update liveout: */
+		for (unsigned j = 0; j < ARRAY_SIZE(block->successors); j++) {
+			struct ir3_block *succ = block->successors[j];
+			struct ir3_ra_block_data *succ_bd;
 
-	/* because we do assignment on fanout nodes for wrmask!=0x1, we
-	 * need to handle this special case, where the fanout nodes all
-	 * appear after one or more of the consumers of the src node:
-	 *
-	 *   0098:009: sam _, r2.x
-	 *   0028:010: mul.f r3.z, r4.x, c13.x
-	 *   ; we start assigning here for '0098:009: sam'.. but
-	 *   ; would miss the usage at '0028:010: mul.f'
-	 *   0101:009: _meta:fo _, _[0098:009: sam], off=2
-	 */
-	if (is_meta(instr) && (instr->opc == OPC_META_FO))
-		compute_liveregs(ctx, instr->regs[1]->instr, &liveregs);
+			if (!succ)
+				continue;
 
-	name = find_available(&liveregs, size,
-			!!(dst->flags & IR3_REG_HALF));
+			succ_bd = succ->bd;
 
-	if (dst->flags & IR3_REG_HALF)
-		name |= REG_HALF;
+			for (unsigned i = 0; i < bitset_words; i++) {
+				BITSET_WORD new_liveout =
+					(succ_bd->livein[i] & ~bd->liveout[i]);
+
+				if (new_liveout) {
+					bd->liveout[i] |= new_liveout;
+					progress = true;
+				}
+			}
+		}
+	}
 
-	return name;
+	return progress;
 }
 
-static type_t half_type(type_t type)
+static void
+ra_add_interference(struct ir3_ra_ctx *ctx)
 {
-	switch (type) {
-	case TYPE_F32: return TYPE_F16;
-	case TYPE_U32: return TYPE_U16;
-	case TYPE_S32: return TYPE_S16;
-	/* instructions may already be fixed up: */
-	case TYPE_F16:
-	case TYPE_U16:
-	case TYPE_S16:
-		return type;
-	default:
-		assert(0);
-		return ~0;
+	struct ir3 *ir = ctx->ir;
+
+	/* compute live ranges (use/def) on a block level, also updating
+	 * block's def/use bitmasks (used below to calculate per-block
+	 * livein/liveout):
+	 */
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		ra_block_compute_live_ranges(ctx, block);
+	}
+
+	/* update per-block livein/liveout: */
+	while (ra_compute_livein_liveout(ctx)) {}
+
+	/* extend start/end ranges based on livein/liveout info from cfg: */
+	unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		struct ir3_ra_block_data *bd = block->bd;
+
+		for (unsigned i = 0; i < bitset_words; i++) {
+			if (BITSET_TEST(bd->livein, i)) {
+				ctx->def[i] = MIN2(ctx->def[i], block->start_ip);
+				ctx->use[i] = MAX2(ctx->use[i], block->start_ip);
+			}
+
+			if (BITSET_TEST(bd->liveout, i)) {
+				ctx->def[i] = MIN2(ctx->def[i], block->end_ip);
+				ctx->use[i] = MAX2(ctx->use[i], block->end_ip);
+			}
+		}
+	}
+
+	/* need to fix things up to keep outputs live: */
+	for (unsigned i = 0; i < ir->noutputs; i++) {
+		struct ir3_instruction *instr = ir->outputs[i];
+		struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+
+		if (id->cls >= 0) {
+			unsigned name = ra_name(ctx, id->cls, id->defn);
+			ctx->use[name] = ctx->instr_cnt;
+		}
+	}
+
+	for (unsigned i = 0; i < ctx->alloc_count; i++) {
+		for (unsigned j = 0; j < ctx->alloc_count; j++) {
+			if (!((ctx->def[i] >= ctx->use[j]) ||
+					(ctx->def[j] >= ctx->use[i]))) {
+				ra_add_node_interference(ctx->g, i, j);
+			}
+		}
 	}
 }
 
@@ -358,302 +823,118 @@
 	}
 }
 
-static void reg_assign(struct ir3_instruction *instr,
-		unsigned r, unsigned name)
+static void
+reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
+		struct ir3_instruction *instr)
 {
-	struct ir3_register *reg = instr->regs[r];
+	struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 
-	reg->flags &= ~IR3_REG_SSA;
-	reg->num = name & ~REG_HALF;
+	if (id->cls >= 0) {
+		unsigned name = ra_name(ctx, id->cls, id->defn);
+		unsigned r = ra_get_node_reg(ctx->g, name);
+		unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
 
-	if (name & REG_HALF) {
-		reg->flags |= IR3_REG_HALF;
-		/* if dst reg being assigned, patch up the instr: */
-		if (reg == instr->regs[0])
-			fixup_half_instr_dst(instr);
-		else
-			fixup_half_instr_src(instr);
-	}
-}
+		if (reg->flags & IR3_REG_RELATIV)
+			num += reg->offset;
 
-static void instr_assign(struct ir3_ra_ctx *ctx,
-		struct ir3_instruction *instr, unsigned name);
-
-static void instr_assign_src(struct ir3_ra_ctx *ctx,
-		struct ir3_instruction *instr, unsigned r, unsigned name)
-{
-	struct ir3_register *reg = instr->regs[r];
+		reg->num = num;
+		reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
 
-	if (reg->flags & IR3_REG_RELATIV)
-		name += reg->offset;
-
-	reg_assign(instr, r, name);
-
-	if (is_meta(instr)) {
-		switch (instr->opc) {
-		case OPC_META_INPUT:
-			/* shader-input does not have a src, only block input: */
-			debug_assert(instr->regs_count == 2);
-			instr_assign(ctx, instr, name);
-			return;
-		case OPC_META_FO:
-			instr_assign(ctx, instr, name + instr->fo.off);
-			return;
-		case OPC_META_FI:
-			instr_assign(ctx, instr, name - (r - 1));
-			return;
-		default:
-			break;
-		}
+		if (is_half(id->defn))
+			reg->flags |= IR3_REG_HALF;
 	}
 }
 
-static void instr_assign_srcs(struct ir3_ra_ctx *ctx,
-		struct ir3_instruction *instr, unsigned name)
+static void
+ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 {
-	struct ir3_instruction *n, *src;
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		struct ir3_register *reg;
 
-	for (n = instr->next; n && !ctx->error; n = n->next) {
-		foreach_ssa_src_n(src, i, n) {
-			unsigned r = i + 1;
-
-			/* skip address / etc (non real sources): */
-			if (r >= n->regs_count)
-				continue;
+		if (instr->regs_count == 0)
+			continue;
 
-			if (src == instr)
-				instr_assign_src(ctx, n, r, name);
+		if (writes_gpr(instr)) {
+			reg_assign(ctx, instr->regs[0], instr);
+			if (instr->regs[0]->flags & IR3_REG_HALF)
+				fixup_half_instr_dst(instr);
 		}
-	}
-}
-
-static void instr_assign(struct ir3_ra_ctx *ctx,
-		struct ir3_instruction *instr, unsigned name)
-{
-	struct ir3_register *reg = instr->regs[0];
-
-	if (reg->flags & IR3_REG_RELATIV)
-		return;
-
-	/* check if already assigned: */
-	if (!(reg->flags & IR3_REG_SSA)) {
-		/* ... and if so, sanity check: */
-		ra_assert(ctx, reg->num == (name & ~REG_HALF));
-		return;
-	}
-
-	/* rename this instructions dst register: */
-	reg_assign(instr, 0, name);
-
-	/* and rename any subsequent use of result of this instr: */
-	instr_assign_srcs(ctx, instr, name);
-
-	/* To simplify the neighbor logic, and to "avoid" dealing with
-	 * instructions which write more than one output, we actually
-	 * do register assignment for instructions that produce multiple
-	 * outputs on the fanout nodes and propagate up the assignment
-	 * to the actual instruction:
-	 */
-	if (is_meta(instr) && (instr->opc == OPC_META_FO)) {
-		struct ir3_instruction *src;
-
-		debug_assert(name >= instr->fo.off);
-
-		foreach_ssa_src(src, instr)
-			instr_assign(ctx, src, name - instr->fo.off);
-	}
-}
-
-/* check neighbor list to see if it is already partially (or completely)
- * assigned, in which case register block is already allocated and we
- * just need to complete the assignment:
- */
-static int check_partial_assignment(struct ir3_ra_ctx *ctx,
-		struct ir3_instruction *instr)
-{
-	struct ir3_instruction *n;
-	int off = 0;
 
-	debug_assert(!instr->cp.left);
+		foreach_src_n(reg, n, instr) {
+			struct ir3_instruction *src = reg->instr;
+			if (!src)
+				continue;
 
-	for (n = instr; n; n = n->cp.right) {
-		struct ir3_register *dst = n->regs[0];
-		if ((n->depth != DEPTH_UNUSED) &&
-				!(dst->flags & IR3_REG_SSA)) {
-			int name = dst->num - off;
-			debug_assert(name >= 0);
-			return name;
+			reg_assign(ctx, instr->regs[n+1], src);
+			if (instr->regs[n+1]->flags & IR3_REG_HALF)
+				fixup_half_instr_src(instr);
 		}
-		off++;
-	}
-
-	return -1;
-}
-
-/* allocate register name(s) for a list of neighboring instructions;
- * instr should point to leftmost neighbor (head of list)
- */
-static void instr_alloc_and_assign(struct ir3_ra_ctx *ctx,
-		struct ir3_instruction *instr)
-{
-	struct ir3_instruction *n;
-	struct ir3_register *dst;
-	int name;
-
-	debug_assert(!instr->cp.left);
-
-	if (instr->regs_count == 0)
-		return;
-
-	dst = instr->regs[0];
-
-	/* For indirect dst, take the register assignment from the
-	 * fanin and propagate it forward.
-	 */
-	if (dst->flags & IR3_REG_RELATIV) {
-		/* NOTE can be grouped, if for example outputs:
-		 * for now disable cp if indirect writes
-		 */
-		instr_alloc_and_assign(ctx, instr->fanin);
-
-		dst->num += instr->fanin->regs[0]->num;
-		dst->flags &= ~IR3_REG_SSA;
-
-		instr_assign_srcs(ctx, instr, instr->fanin->regs[0]->num);
-
-		return;
-	}
-
-	/* for instructions w/ fanouts, do the actual register assignment
-	 * on the group of fanout neighbor nodes and propagate the reg
-	 * name back up to the texture instruction.
-	 */
-	if (dst->wrmask != 0x1)
-		return;
-
-	name = check_partial_assignment(ctx, instr);
-
-	/* allocate register(s): */
-	if (name >= 0) {
-		/* already partially assigned, just finish the job */
-	} else if (reg_gpr(dst)) {
-		int size;
-		/* number of consecutive registers to assign: */
-		size = ir3_neighbor_count(instr);
-		if (dst->wrmask != 0x1)
-			size = MAX2(size, ffs(~dst->wrmask) - 1);
-		name = alloc_block(ctx, instr, size);
-	} else if (dst->flags & IR3_REG_ADDR) {
-		debug_assert(!instr->cp.right);
-		dst->flags &= ~IR3_REG_ADDR;
-		name = regid(REG_A0, 0) | REG_HALF;
-	} else {
-		debug_assert(!instr->cp.right);
-		/* predicate register (p0).. etc */
-		name = regid(REG_P0, 0);
-		debug_assert(dst->num == name);
-	}
-
-	ra_assert(ctx, name >= 0);
-
-	for (n = instr; n && !ctx->error; n = n->cp.right) {
-		instr_assign(ctx, n, name);
-		name++;
 	}
 }
 
-static void instr_assign_array(struct ir3_ra_ctx *ctx,
-		struct ir3_instruction *instr)
+static int
+ra_alloc(struct ir3_ra_ctx *ctx)
 {
-	struct ir3_instruction *src;
-	int name, aid = instr->fi.aid;
-
-	if (ctx->arrays[aid].base == ~0) {
-		int size = instr->regs_count - 1;
-		ctx->arrays[aid].base = alloc_block(ctx, instr, size);
-		ctx->arrays[aid].size = size;
-	}
-
-	name = ctx->arrays[aid].base;
-
-	foreach_ssa_src_n(src, i, instr) {
-		unsigned r = i + 1;
-
-		/* skip address / etc (non real sources): */
-		if (r >= instr->regs_count)
-			break;
-
-		instr_assign(ctx, src, name);
-		name++;
-	}
-
-}
-
-static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
-{
-	struct ir3_instruction *n;
-
 	/* frag shader inputs get pre-assigned, since we have some
 	 * constraints/unknowns about setup for some of these regs:
 	 */
-	if ((ctx->type == SHADER_FRAGMENT) && !block->parent) {
+	if (ctx->type == SHADER_FRAGMENT) {
+		struct ir3 *ir = ctx->ir;
 		unsigned i = 0, j;
-		if (ctx->frag_face && (i < block->ninputs) && block->inputs[i]) {
+		if (ctx->frag_face && (i < ir->ninputs) && ir->inputs[i]) {
+			struct ir3_instruction *instr = ir->inputs[i];
+			int cls = size_to_class(1, true);
+			unsigned name = ra_name(ctx, cls, instr);
+			unsigned reg = ctx->set->gpr_to_ra_reg[cls][0];
+
 			/* if we have frag_face, it gets hr0.x */
-			instr_assign(ctx, block->inputs[i], REG_HALF | 0);
+			ra_set_node_reg(ctx->g, name, reg);
 			i += 4;
 		}
-		for (j = 0; i < block->ninputs; i++, j++)
-			if (block->inputs[i])
-				instr_assign(ctx, block->inputs[i], j);
-	}
 
-	ra_dump_list("-------\n", block->head);
-
-	/* first pass, assign arrays: */
-	for (n = block->head; n && !ctx->error; n = n->next) {
-		if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) {
-			debug_assert(!n->cp.left);  /* don't think this should happen */
-			ra_dump_instr("ASSIGN ARRAY: ", n);
-			instr_assign_array(ctx, n);
-			ra_dump_list("-------\n", block->head);
+		for (j = 0; i < ir->ninputs; i++) {
+			struct ir3_instruction *instr = ir->inputs[i];
+			if (instr) {
+				struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+
+				if (id->defn == instr) {
+					unsigned name, reg;
+
+					name = ra_name(ctx, id->cls, id->defn);
+					reg = ctx->set->gpr_to_ra_reg[id->cls][j];
+
+					ra_set_node_reg(ctx->g, name, reg);
+					j += id->sz;
+				}
+			}
 		}
 	}
 
-	for (n = block->head; n && !ctx->error; n = n->next) {
-		ra_dump_instr("ASSIGN: ", n);
-		instr_alloc_and_assign(ctx, ir3_neighbor_first(n));
-		ra_dump_list("-------\n", block->head);
+	if (!ra_allocate(ctx->g))
+		return -1;
+
+	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+		ra_block_alloc(ctx, block);
 	}
 
-	return ctx->error ? -1 : 0;
+	return 0;
 }
 
-int ir3_block_ra(struct ir3_block *block, enum shader_t type,
+int ir3_ra(struct ir3 *ir, enum shader_t type,
 		bool frag_coord, bool frag_face)
 {
-	struct ir3_instruction *n;
 	struct ir3_ra_ctx ctx = {
-			.block = block,
+			.ir = ir,
 			.type = type,
-			.frag_coord = frag_coord,
 			.frag_face = frag_face,
+			.set = ir->compiler->set,
 	};
 	int ret;
 
-	memset(&ctx.arrays, ~0, sizeof(ctx.arrays));
-
-	/* mark dst registers w/ SSA flag so we can see which
-	 * have been assigned so far:
-	 * NOTE: we really should set SSA flag consistently on
-	 * every dst register in the frontend.
-	 */
-	for (n = block->head; n; n = n->next)
-		if (n->regs_count > 0)
-			n->regs[0]->flags |= IR3_REG_SSA;
-
-	ir3_clear_mark(block->shader);
-	ret = block_ra(&ctx, block);
+	ra_init(&ctx);
+	ra_add_interference(&ctx);
+	ret = ra_alloc(&ctx);
+	ra_destroy(&ctx);
 
 	return ret;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_sched.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_sched.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_sched.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_sched.c	2015-09-16 14:36:09.000000000 +0000
@@ -31,23 +31,14 @@
 
 #include "ir3.h"
 
-enum {
-	SCHEDULED = -1,
-	DELAYED = -2,
-};
-
 /*
  * Instruction Scheduling:
  *
- * Using the depth sorted list from depth pass, attempt to recursively
- * schedule deepest unscheduled path.  The first instruction that cannot
- * be scheduled, returns the required delay slots it needs, at which
- * point we return back up to the top and attempt to schedule by next
- * highest depth.  After a sufficient number of instructions have been
- * scheduled, return back to beginning of list and start again.  If you
- * reach the end of depth sorted list without being able to insert any
- * instruction, insert nop's.  Repeat until no more unscheduled
- * instructions.
+ * A priority-queue based scheduling algo.  Add eligible instructions,
+ * ie. ones with all their dependencies scheduled, to the priority
+ * (depth) sorted queue (list).  Pop highest priority instruction off
+ * the queue and schedule it, add newly eligible instructions to the
+ * priority queue, rinse, repeat.
  *
  * There are a few special cases that need to be handled, since sched
  * is currently independent of register allocation.  Usages of address
@@ -60,115 +51,70 @@
  */
 
 struct ir3_sched_ctx {
-	struct ir3_instruction *scheduled; /* last scheduled instr */
+	struct ir3_block *block;           /* the current block */
+	struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/
 	struct ir3_instruction *addr;      /* current a0.x user, if any */
 	struct ir3_instruction *pred;      /* current p0.x user, if any */
-	unsigned cnt;
 	bool error;
 };
 
-static struct ir3_instruction *
-deepest(struct ir3_instruction **srcs, unsigned nsrcs)
-{
-	struct ir3_instruction *d = NULL;
-	unsigned i = 0, id = 0;
-
-	while ((i < nsrcs) && !(d = srcs[id = i]))
-		i++;
-
-	if (!d)
-		return NULL;
-
-	for (; i < nsrcs; i++)
-		if (srcs[i] && (srcs[i]->depth > d->depth))
-			d = srcs[id = i];
-
-	srcs[id] = NULL;
-
-	return d;
-}
-
-static unsigned distance(struct ir3_sched_ctx *ctx,
-		struct ir3_instruction *instr, unsigned maxd)
-{
-	struct ir3_instruction *n = ctx->scheduled;
-	unsigned d = 0;
-	while (n && (n != instr) && (d < maxd)) {
-		if (is_alu(n) || is_flow(n))
-			d++;
-		n = n->next;
-	}
-	return d;
-}
-
-/* TODO maybe we want double linked list? */
-static struct ir3_instruction * prev(struct ir3_instruction *instr)
-{
-	struct ir3_instruction *p = instr->block->head;
-	while (p && (p->next != instr))
-		p = p->next;
-	return p;
-}
-
 static bool is_sfu_or_mem(struct ir3_instruction *instr)
 {
 	return is_sfu(instr) || is_mem(instr);
 }
 
-static void schedule(struct ir3_sched_ctx *ctx,
-		struct ir3_instruction *instr, bool remove)
+static void
+schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
 {
-	struct ir3_block *block = instr->block;
+	debug_assert(ctx->block == instr->block);
 
 	/* maybe there is a better way to handle this than just stuffing
 	 * a nop.. ideally we'd know about this constraint in the
 	 * scheduling and depth calculation..
 	 */
 	if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr))
-		schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false);
+		ir3_NOP(ctx->block);
 
 	/* remove from depth list:
 	 */
-	if (remove) {
-		struct ir3_instruction *p = prev(instr);
-
-		/* NOTE: this can happen for inputs which are not
-		 * read.. in that case there is no need to schedule
-		 * the input, so just bail:
-		 */
-		if (instr != (p ? p->next : block->head))
-			return;
-
-		if (p)
-			p->next = instr->next;
-		else
-			block->head = instr->next;
-	}
+	list_delinit(&instr->node);
 
 	if (writes_addr(instr)) {
-		assert(ctx->addr == NULL);
+		debug_assert(ctx->addr == NULL);
 		ctx->addr = instr;
 	}
 
 	if (writes_pred(instr)) {
-		assert(ctx->pred == NULL);
+		debug_assert(ctx->pred == NULL);
 		ctx->pred = instr;
 	}
 
 	instr->flags |= IR3_INSTR_MARK;
 
-	instr->next = ctx->scheduled;
+	list_addtail(&instr->node, &instr->block->instr_list);
 	ctx->scheduled = instr;
-
-	ctx->cnt++;
 }
 
-/*
- * Delay-slot calculation.  Follows fanin/fanout.
- */
+static unsigned
+distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr,
+		unsigned maxd)
+{
+	struct list_head *instr_list = &ctx->block->instr_list;
+	unsigned d = 0;
+
+	list_for_each_entry_rev (struct ir3_instruction, n, instr_list, node) {
+		if ((n == instr) || (d >= maxd))
+			break;
+		if (is_alu(n) || is_flow(n))
+			d++;
+	}
+
+	return d;
+}
 
 /* calculate delay for specified src: */
-static unsigned delay_calc_srcn(struct ir3_sched_ctx *ctx,
+static unsigned
+delay_calc_srcn(struct ir3_sched_ctx *ctx,
 		struct ir3_instruction *assigner,
 		struct ir3_instruction *consumer, unsigned srcn)
 {
@@ -177,7 +123,10 @@
 	if (is_meta(assigner)) {
 		struct ir3_instruction *src;
 		foreach_ssa_src(src, assigner) {
-			unsigned d = delay_calc_srcn(ctx, src, consumer, srcn);
+			unsigned d;
+			if (src->block != assigner->block)
+				break;
+			d = delay_calc_srcn(ctx, src, consumer, srcn);
 			delay = MAX2(delay, d);
 		}
 	} else {
@@ -189,48 +138,87 @@
 }
 
 /* calculate delay for instruction (maximum of delay for all srcs): */
-static unsigned delay_calc(struct ir3_sched_ctx *ctx,
-		struct ir3_instruction *instr)
+static unsigned
+delay_calc(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
 {
 	unsigned delay = 0;
 	struct ir3_instruction *src;
 
 	foreach_ssa_src_n(src, i, instr) {
-		unsigned d = delay_calc_srcn(ctx, src, instr, i);
+		unsigned d;
+		if (src->block != instr->block)
+			continue;
+		d = delay_calc_srcn(ctx, src, instr, i);
 		delay = MAX2(delay, d);
 	}
 
 	return delay;
 }
 
-/* A negative return value signals that an instruction has been newly
- * SCHEDULED (or DELAYED due to address or predicate register already
- * in use), return back up to the top of the stack (to block_sched())
+struct ir3_sched_notes {
+	/* there is at least one kill which could be scheduled, except
+	 * for unscheduled bary.f's:
+	 */
+	bool blocked_kill;
+	/* there is at least one instruction that could be scheduled,
+	 * except for conflicting address/predicate register usage:
+	 */
+	bool addr_conflict, pred_conflict;
+};
+
+static bool is_scheduled(struct ir3_instruction *instr)
+{
+	return !!(instr->flags & IR3_INSTR_MARK);
+}
+
+static bool
+check_conflict(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
+		struct ir3_instruction *instr)
+{
+	/* if this is a write to address/predicate register, and that
+	 * register is currently in use, we need to defer until it is
+	 * free:
+	 */
+	if (writes_addr(instr) && ctx->addr) {
+		debug_assert(ctx->addr != instr);
+		notes->addr_conflict = true;
+		return true;
+	}
+
+	if (writes_pred(instr) && ctx->pred) {
+		debug_assert(ctx->pred != instr);
+		notes->pred_conflict = true;
+		return true;
+	}
+
+	return false;
+}
+
+/* is this instruction ready to be scheduled?  Return negative for not
+ * ready (updating notes if needed), or >= 0 to indicate number of
+ * delay slots needed.
  */
-static int trysched(struct ir3_sched_ctx *ctx,
+static int
+instr_eligibility(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
 		struct ir3_instruction *instr)
 {
-	struct ir3_instruction *srcs[64];
 	struct ir3_instruction *src;
-	unsigned delay, nsrcs = 0;
+	unsigned delay = 0;
 
-	/* if already scheduled: */
-	if (instr->flags & IR3_INSTR_MARK)
+	/* Phi instructions can have a dependency on something not
+	 * scheduled yet (for ex, loops).  But OTOH we don't really
+	 * care.  By definition phi's should appear at the top of
+	 * the block, and it's sources should be values from the
+	 * previously executing block, so they are always ready to
+	 * be scheduled:
+	 */
+	if (is_meta(instr) && (instr->opc == OPC_META_PHI))
 		return 0;
 
-	/* figure out our src's, copy 'em out into an array for sorting: */
 	foreach_ssa_src(src, instr) {
-		debug_assert(nsrcs < ARRAY_SIZE(srcs));
-		srcs[nsrcs++] = src;
-	}
-
-	/* for each src register in sorted order:
-	 */
-	delay = 0;
-	while ((src = deepest(srcs, nsrcs))) {
-		delay = trysched(ctx, src);
-		if (delay)
-			return delay;
+		/* if dependency not scheduled, we aren't ready yet: */
+		if (!is_scheduled(src))
+			return -1;
 	}
 
 	/* all our dependents are scheduled, figure out if
@@ -255,216 +243,338 @@
 	 */
 	if (is_kill(instr)) {
 		struct ir3 *ir = instr->block->shader;
-		unsigned i;
 
-		for (i = 0; i < ir->baryfs_count; i++) {
+		for (unsigned i = 0; i < ir->baryfs_count; i++) {
 			struct ir3_instruction *baryf = ir->baryfs[i];
 			if (baryf->depth == DEPTH_UNUSED)
 				continue;
-			delay = trysched(ctx, baryf);
-			if (delay)
-				return delay;
+			if (!is_scheduled(baryf)) {
+				notes->blocked_kill = true;
+				return -1;
+			}
 		}
 	}
 
-	/* if this is a write to address/predicate register, and that
-	 * register is currently in use, we need to defer until it is
-	 * free:
-	 */
-	if (writes_addr(instr) && ctx->addr) {
-		assert(ctx->addr != instr);
-		return DELAYED;
-	}
-	if (writes_pred(instr) && ctx->pred) {
-		assert(ctx->pred != instr);
-		return DELAYED;
-	}
+	if (check_conflict(ctx, notes, instr))
+		return -1;
 
-	schedule(ctx, instr, true);
-	return SCHEDULED;
+	return 0;
 }
 
-static struct ir3_instruction * reverse(struct ir3_instruction *instr)
-{
-	struct ir3_instruction *reversed = NULL;
-	while (instr) {
-		struct ir3_instruction *next = instr->next;
-		instr->next = reversed;
-		reversed = instr;
-		instr = next;
+/* could an instruction be scheduled if specified ssa src was scheduled? */
+static bool
+could_sched(struct ir3_instruction *instr, struct ir3_instruction *src)
+{
+	struct ir3_instruction *other_src;
+	foreach_ssa_src(other_src, instr) {
+		/* if dependency not scheduled, we aren't ready yet: */
+		if ((src != other_src) && !is_scheduled(other_src)) {
+			return false;
+		}
 	}
-	return reversed;
+	return true;
 }
 
-static bool uses_current_addr(struct ir3_sched_ctx *ctx,
-		struct ir3_instruction *instr)
-{
-	return instr->address && (ctx->addr == instr->address);
-}
+/* move eligible instructions to the priority list: */
+static unsigned
+add_eligible_instrs(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
+		struct list_head *prio_queue, struct list_head *unscheduled_list)
+{
+	unsigned min_delay = ~0;
+
+	list_for_each_entry_safe (struct ir3_instruction, instr, unscheduled_list, node) {
+		int e = instr_eligibility(ctx, notes, instr);
+		if (e < 0)
+			continue;
+
+		/* For instructions that write address register we need to
+		 * make sure there is at least one instruction that uses the
+		 * addr value which is otherwise ready.
+		 *
+		 * TODO if any instructions use pred register and have other
+		 * src args, we would need to do the same for writes_pred()..
+		 */
+		if (unlikely(writes_addr(instr))) {
+			struct ir3 *ir = instr->block->shader;
+			bool ready = false;
+			for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) {
+				struct ir3_instruction *indirect = ir->indirects[i];
+				if (!indirect)
+					continue;
+				if (indirect->address != instr)
+					continue;
+				ready = could_sched(indirect, instr);
+			}
 
-static bool uses_current_pred(struct ir3_sched_ctx *ctx,
-		struct ir3_instruction *instr)
-{
-	struct ir3_instruction *src;
-	foreach_ssa_src(src, instr)
-		if (ctx->pred == src)
-			return true;
-	return false;
+			/* nothing could be scheduled, so keep looking: */
+			if (!ready)
+				continue;
+		}
+
+		min_delay = MIN2(min_delay, e);
+		if (e == 0) {
+			/* remove from unscheduled list and into priority queue: */
+			list_delinit(&instr->node);
+			ir3_insert_by_depth(instr, prio_queue);
+		}
+	}
+
+	return min_delay;
 }
 
-/* when we encounter an instruction that writes to the address register
- * when it is in use, we delay that instruction and try to schedule all
- * other instructions using the current address register:
+/* "spill" the address register by remapping any unscheduled
+ * instructions which depend on the current address register
+ * to a clone of the instruction which wrote the address reg.
  */
-static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
-		struct ir3_block *block)
+static struct ir3_instruction *
+split_addr(struct ir3_sched_ctx *ctx)
 {
-	struct ir3_instruction *instr = block->head;
-	bool addr_in_use = false;
-	bool pred_in_use = false;
-	bool all_delayed = true;
-	unsigned cnt = ~0, attempted = 0;
-
-	while (instr) {
-		struct ir3_instruction *next = instr->next;
-		bool addr = uses_current_addr(ctx, instr);
-		bool pred = uses_current_pred(ctx, instr);
-
-		if (addr || pred) {
-			int ret = trysched(ctx, instr);
-
-			if (ret != DELAYED)
-				all_delayed = false;
-
-			if (ret == SCHEDULED)
-				cnt = 0;
-			else if (ret > 0)
-				cnt = MIN2(cnt, ret);
-			if (addr)
-				addr_in_use = true;
-			if (pred)
-				pred_in_use = true;
+	struct ir3 *ir;
+	struct ir3_instruction *new_addr = NULL;
+	unsigned i;
 
-			attempted++;
-		}
+	debug_assert(ctx->addr);
 
-		instr = next;
-	}
+	ir = ctx->addr->block->shader;
 
-	if (!addr_in_use)
-		ctx->addr = NULL;
+	for (i = 0; i < ir->indirects_count; i++) {
+		struct ir3_instruction *indirect = ir->indirects[i];
 
-	if (!pred_in_use)
-		ctx->pred = NULL;
+		if (!indirect)
+			continue;
 
-	/* detect if we've gotten ourselves into an impossible situation
-	 * and bail if needed
-	 */
-	if (all_delayed && (attempted > 0)) {
-		if (pred_in_use) {
-			/* TODO we probably need to keep a list of instructions
-			 * that reference predicate, similar to indirects
-			 */
-			ctx->error = true;
-			return DELAYED;
+		/* skip instructions already scheduled: */
+		if (is_scheduled(indirect))
+			continue;
+
+		/* remap remaining instructions using current addr
+		 * to new addr:
+		 */
+		if (indirect->address == ctx->addr) {
+			if (!new_addr) {
+				new_addr = ir3_instr_clone(ctx->addr);
+				/* original addr is scheduled, but new one isn't: */
+				new_addr->flags &= ~IR3_INSTR_MARK;
+			}
+			ir3_instr_set_address(indirect, new_addr);
 		}
-		if (addr_in_use) {
-			struct ir3 *ir = ctx->addr->block->shader;
-			struct ir3_instruction *new_addr =
-					ir3_instr_clone(ctx->addr);
-			unsigned i;
+	}
 
-			/* original addr is scheduled, but new one isn't: */
-			new_addr->flags &= ~IR3_INSTR_MARK;
+	/* all remaining indirects remapped to new addr: */
+	ctx->addr = NULL;
 
-			for (i = 0; i < ir->indirects_count; i++) {
-				struct ir3_instruction *indirect = ir->indirects[i];
+	return new_addr;
+}
 
-				/* skip instructions already scheduled: */
-				if (indirect->flags & IR3_INSTR_MARK)
-					continue;
+/* "spill" the predicate register by remapping any unscheduled
+ * instructions which depend on the current predicate register
+ * to a clone of the instruction which wrote the address reg.
+ */
+static struct ir3_instruction *
+split_pred(struct ir3_sched_ctx *ctx)
+{
+	struct ir3 *ir;
+	struct ir3_instruction *new_pred = NULL;
+	unsigned i;
 
-				/* remap remaining instructions using current addr
-				 * to new addr:
-				 */
-				if (indirect->address == ctx->addr)
-					indirect->address = new_addr;
-			}
+	debug_assert(ctx->pred);
 
-			/* all remaining indirects remapped to new addr: */
-			ctx->addr = NULL;
+	ir = ctx->pred->block->shader;
 
-			/* not really, but this will trigger us to go back to
-			 * main trysched() loop now that we've resolved the
-			 * conflict by duplicating the instr that writes to
-			 * the address register.
-			 */
-			return SCHEDULED;
+	for (i = 0; i < ir->predicates_count; i++) {
+		struct ir3_instruction *predicated = ir->predicates[i];
+
+		/* skip instructions already scheduled: */
+		if (is_scheduled(predicated))
+			continue;
+
+		/* remap remaining instructions using current pred
+		 * to new pred:
+		 *
+		 * TODO is there ever a case when pred isn't first
+		 * (and only) src?
+		 */
+		if (ssa(predicated->regs[1]) == ctx->pred) {
+			if (!new_pred) {
+				new_pred = ir3_instr_clone(ctx->pred);
+				/* original pred is scheduled, but new one isn't: */
+				new_pred->flags &= ~IR3_INSTR_MARK;
+			}
+			predicated->regs[1]->instr = new_pred;
 		}
 	}
 
-	return cnt;
+	/* all remaining predicated remapped to new pred: */
+	ctx->pred = NULL;
+
+	return new_pred;
 }
 
-static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
+static void
+sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
 {
-	struct ir3_instruction *instr;
+	struct list_head unscheduled_list, prio_queue;
 
-	/* schedule all the shader input's (meta-instr) first so that
-	 * the RA step sees that the input registers contain a value
-	 * from the start of the shader:
+	ctx->block = block;
+
+	/* move all instructions to the unscheduled list, and
+	 * empty the block's instruction list (to which we will
+	 * be inserting.
+	 */
+	list_replace(&block->instr_list, &unscheduled_list);
+	list_inithead(&block->instr_list);
+	list_inithead(&prio_queue);
+
+	/* first a pre-pass to schedule all meta:input/phi instructions
+	 * (which need to appear first so that RA knows the register is
+	 * occupied:
 	 */
-	if (!block->parent) {
-		unsigned i;
-		for (i = 0; i < block->ninputs; i++) {
-			struct ir3_instruction *in = block->inputs[i];
-			if (in)
-				schedule(ctx, in, true);
+	list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
+		if (is_meta(instr) && ((instr->opc == OPC_META_INPUT) ||
+				(instr->opc == OPC_META_PHI)))
+			schedule(ctx, instr);
+	}
+
+	while (!(list_empty(&unscheduled_list) &&
+			list_empty(&prio_queue))) {
+		struct ir3_sched_notes notes = {0};
+		unsigned delay;
+
+		delay = add_eligible_instrs(ctx, &notes, &prio_queue, &unscheduled_list);
+
+		if (!list_empty(&prio_queue)) {
+			struct ir3_instruction *instr = list_last_entry(&prio_queue,
+					struct ir3_instruction, node);
+			/* ugg, this is a bit ugly, but between the time when
+			 * the instruction became eligible and now, a new
+			 * conflict may have arose..
+			 */
+			if (check_conflict(ctx, &notes, instr)) {
+				list_del(&instr->node);
+				list_addtail(&instr->node, &unscheduled_list);
+				continue;
+			}
+
+			schedule(ctx, instr);
+		} else if (delay == ~0) {
+			struct ir3_instruction *new_instr = NULL;
+
+			/* nothing available to schedule.. if we are blocked on
+			 * address/predicate register conflict, then break the
+			 * deadlock by cloning the instruction that wrote that
+			 * reg:
+			 */
+			if (notes.addr_conflict) {
+				new_instr = split_addr(ctx);
+			} else if (notes.pred_conflict) {
+				new_instr = split_pred(ctx);
+			} else {
+				debug_assert(0);
+				ctx->error = true;
+				return;
+			}
+
+			if (new_instr) {
+				list_del(&new_instr->node);
+				list_addtail(&new_instr->node, &unscheduled_list);
+				/* the original instr that wrote addr/pred may have
+				 * originated from a different block:
+				 */
+				new_instr->block = block;
+			}
+
+		} else {
+			/* and if we run out of instructions that can be scheduled,
+			 * then it is time for nop's:
+			 */
+			debug_assert(delay <= 6);
+			while (delay > 0) {
+				ir3_NOP(block);
+				delay--;
+			}
 		}
 	}
 
-	while ((instr = block->head) && !ctx->error) {
-		/* NOTE: always grab next *before* trysched(), in case the
-		 * instruction is actually scheduled (and therefore moved
-		 * from depth list into scheduled list)
+	/* And lastly, insert branch/jump instructions to take us to
+	 * the next block.  Later we'll strip back out the branches
+	 * that simply jump to next instruction.
+	 */
+	if (block->successors[1]) {
+		/* if/else, conditional branches to "then" or "else": */
+		struct ir3_instruction *br;
+		unsigned delay = 6;
+
+		debug_assert(ctx->pred);
+		debug_assert(block->condition);
+
+		delay -= distance(ctx, ctx->pred, delay);
+
+		while (delay > 0) {
+			ir3_NOP(block);
+			delay--;
+		}
+
+		/* create "else" branch first (since "then" block should
+		 * frequently/always end up being a fall-thru):
+		 */
+		br = ir3_BR(block);
+		br->cat0.inv = true;
+		br->cat0.target = block->successors[1];
+
+		/* NOTE: we have to hard code delay of 6 above, since
+		 * we want to insert the nop's before constructing the
+		 * branch.  Throw in an assert so we notice if this
+		 * ever breaks on future generation:
 		 */
-		struct ir3_instruction *next = instr->next;
-		int cnt = trysched(ctx, instr);
+		debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6);
 
-		if (cnt == DELAYED)
-			cnt = block_sched_undelayed(ctx, block);
+		br = ir3_BR(block);
+		br->cat0.target = block->successors[0];
 
-		/* -1 is signal to return up stack, but to us means same as 0: */
-		cnt = MAX2(0, cnt);
-		cnt += ctx->cnt;
-		instr = next;
-
-		/* if deepest remaining instruction cannot be scheduled, try
-		 * the increasingly more shallow instructions until needed
-		 * number of delay slots is filled:
-		 */
-		while (instr && (cnt > ctx->cnt)) {
-			next = instr->next;
-			trysched(ctx, instr);
-			instr = next;
-		}
+	} else if (block->successors[0]) {
+		/* otherwise unconditional jump to next block: */
+		struct ir3_instruction *jmp;
 
-		/* and if we run out of instructions that can be scheduled,
-		 * then it is time for nop's:
-		 */
-		while (cnt > ctx->cnt)
-			schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false);
+		jmp = ir3_JUMP(block);
+		jmp->cat0.target = block->successors[0];
 	}
 
-	/* at this point, scheduled list is in reverse order, so fix that: */
-	block->head = reverse(ctx->scheduled);
+	/* NOTE: if we kept track of the predecessors, we could do a better
+	 * job w/ (jp) flags.. every node w/ > predecessor is a join point.
+	 * Note that as we eliminate blocks which contain only an unconditional
+	 * jump we probably need to propagate (jp) flag..
+	 */
 }
 
-int ir3_block_sched(struct ir3_block *block)
+/* this is needed to ensure later RA stage succeeds: */
+static void
+sched_insert_parallel_copies(struct ir3_block *block)
+{
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		if (is_meta(instr) && (instr->opc == OPC_META_PHI)) {
+			struct ir3_register *reg;
+			foreach_src(reg, instr) {
+				struct ir3_instruction *src = reg->instr;
+				struct ir3_instruction *mov =
+					ir3_MOV(src->block, src, TYPE_U32);
+				mov->regs[0]->flags |= IR3_REG_PHI_SRC;
+				mov->regs[0]->instr = instr;
+				reg->instr = mov;
+			}
+		}
+	}
+}
+
+int ir3_sched(struct ir3 *ir)
 {
 	struct ir3_sched_ctx ctx = {0};
-	ir3_clear_mark(block->shader);
-	block_sched(&ctx, block);
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		sched_insert_parallel_copies(block);
+	}
+	ir3_clear_mark(ir);
+	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+		sched_block(&ctx, block);
+	}
 	if (ctx.error)
 		return -1;
 	return 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_shader.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_shader.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_shader.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_shader.c	2015-09-16 14:36:09.000000000 +0000
@@ -46,7 +46,8 @@
 {
 	if (v->ir)
 		ir3_destroy(v->ir);
-	fd_bo_del(v->bo);
+	if (v->bo)
+		fd_bo_del(v->bo);
 	free(v);
 }
 
@@ -127,7 +128,7 @@
 assemble_variant(struct ir3_shader_variant *v)
 {
 	struct fd_context *ctx = fd_context(v->shader->pctx);
-	uint32_t gpu_id = ir3_shader_gpuid(v->shader);
+	uint32_t gpu_id = v->shader->compiler->gpu_id;
 	uint32_t sz, *bin;
 
 	bin = ir3_shader_assemble(v, gpu_id);
@@ -139,6 +140,32 @@
 
 	memcpy(fd_bo_map(v->bo), bin, sz);
 
+	if (fd_mesa_debug & FD_DBG_DISASM) {
+		struct ir3_shader_key key = v->key;
+		DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
+			key.binning_pass, key.color_two_side, key.half_precision);
+		ir3_shader_disasm(v, bin);
+	}
+
+	if (fd_mesa_debug & FD_DBG_SHADERDB) {
+		/* print generic shader info: */
+		fprintf(stderr, "SHADER-DB: %s prog %d/%d: %u instructions, %u dwords\n",
+				ir3_shader_stage(v->shader),
+				v->shader->id, v->id,
+				v->info.instrs_count,
+				v->info.sizedwords);
+		fprintf(stderr, "SHADER-DB: %s prog %d/%d: %u half, %u full\n",
+				ir3_shader_stage(v->shader),
+				v->shader->id, v->id,
+				v->info.max_half_reg + 1,
+				v->info.max_reg + 1);
+		fprintf(stderr, "SHADER-DB: %s prog %d/%d: %u const, %u constlen\n",
+				ir3_shader_stage(v->shader),
+				v->shader->id, v->id,
+				v->info.max_const + 1,
+				v->constlen);
+	}
+
 	free(bin);
 
 	/* no need to keep the ir around beyond this point: */
@@ -146,27 +173,16 @@
 	v->ir = NULL;
 }
 
-/* reset before attempting to compile again.. */
-static void reset_variant(struct ir3_shader_variant *v, const char *msg)
-{
-	debug_error(msg);
-	v->inputs_count = 0;
-	v->outputs_count = 0;
-	v->total_in = 0;
-	v->has_samp = false;
-	v->immediates_count = 0;
-}
-
 static struct ir3_shader_variant *
 create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
 {
 	struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant);
-	const struct tgsi_token *tokens = shader->tokens;
 	int ret;
 
 	if (!v)
 		return NULL;
 
+	v->id = ++shader->variant_count;
 	v->shader = shader;
 	v->key = key;
 	v->type = shader->type;
@@ -174,25 +190,10 @@
 	if (fd_mesa_debug & FD_DBG_DISASM) {
 		DBG("dump tgsi: type=%d, k={bp=%u,cts=%u,hp=%u}", shader->type,
 			key.binning_pass, key.color_two_side, key.half_precision);
-		tgsi_dump(tokens, 0);
-	}
-
-	if (fd_mesa_debug & FD_DBG_NIR) {
-		ret = ir3_compile_shader_nir(v, tokens, key);
-		if (ret)
-			reset_variant(v, "NIR compiler failed, fallback to TGSI!");
-	} else {
-		ret = -1;
-	}
-
-	if (ret) {
-		ret = ir3_compile_shader(v, tokens, key, true);
-		if (ret) {
-			reset_variant(v, "new compiler failed, trying without copy propagation!");
-			ret = ir3_compile_shader(v, tokens, key, false);
-		}
+		tgsi_dump(shader->tokens, 0);
 	}
 
+	ret = ir3_compile_shader_nir(shader->compiler, v);
 	if (ret) {
 		debug_error("compile failed!");
 		goto fail;
@@ -204,12 +205,6 @@
 		goto fail;
 	}
 
-	if (fd_mesa_debug & FD_DBG_DISASM) {
-		DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
-			key.binning_pass, key.color_two_side, key.half_precision);
-		disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type);
-	}
-
 	return v;
 
 fail:
@@ -217,13 +212,6 @@
 	return NULL;
 }
 
-uint32_t
-ir3_shader_gpuid(struct ir3_shader *shader)
-{
-	struct fd_context *ctx = fd_context(shader->pctx);
-	return ctx->screen->gpu_id;
-}
-
 struct ir3_shader_variant *
 ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
 {
@@ -261,8 +249,10 @@
 
 	/* compile new variant if it doesn't exist already: */
 	v = create_variant(shader, key);
-	v->next = shader->variants;
-	shader->variants = v;
+	if (v) {
+		v->next = shader->variants;
+		shader->variants = v;
+	}
 
 	return v;
 }
@@ -282,12 +272,372 @@
 }
 
 struct ir3_shader *
-ir3_shader_create(struct pipe_context *pctx, const struct tgsi_token *tokens,
+ir3_shader_create(struct pipe_context *pctx,
+		const struct pipe_shader_state *cso,
 		enum shader_t type)
 {
 	struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader);
+	shader->compiler = fd_context(pctx)->screen->compiler;
+	shader->id = ++shader->compiler->shader_count;
 	shader->pctx = pctx;
 	shader->type = type;
-	shader->tokens = tgsi_dup_tokens(tokens);
+	shader->tokens = tgsi_dup_tokens(cso->tokens);
+	shader->stream_output = cso->stream_output;
+	if (fd_mesa_debug & FD_DBG_SHADERDB) {
+		/* if shader-db run, create a standard variant immediately
+		 * (as otherwise nothing will trigger the shader to be
+		 * actually compiled)
+		 */
+		static struct ir3_shader_key key = {};
+		ir3_shader_variant(shader, key);
+	}
 	return shader;
 }
+
+static void dump_reg(const char *name, uint32_t r)
+{
+	if (r != regid(63,0))
+		debug_printf("; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
+}
+
+static void dump_semantic(struct ir3_shader_variant *so,
+		unsigned sem, const char *name)
+{
+	uint32_t regid;
+	regid = ir3_find_output_regid(so, ir3_semantic_name(sem, 0));
+	dump_reg(name, regid);
+}
+
+void
+ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
+{
+	struct ir3 *ir = so->ir;
+	struct ir3_register *reg;
+	const char *type = ir3_shader_stage(so->shader);
+	uint8_t regid;
+	unsigned i;
+
+	for (i = 0; i < ir->ninputs; i++) {
+		if (!ir->inputs[i]) {
+			debug_printf("; in%d unused\n", i);
+			continue;
+		}
+		reg = ir->inputs[i]->regs[0];
+		regid = reg->num;
+		debug_printf("@in(%sr%d.%c)\tin%d\n",
+				(reg->flags & IR3_REG_HALF) ? "h" : "",
+				(regid >> 2), "xyzw"[regid & 0x3], i);
+	}
+
+	for (i = 0; i < ir->noutputs; i++) {
+		if (!ir->outputs[i]) {
+			debug_printf("; out%d unused\n", i);
+			continue;
+		}
+		/* kill shows up as a virtual output.. skip it! */
+		if (is_kill(ir->outputs[i]))
+			continue;
+		reg = ir->outputs[i]->regs[0];
+		regid = reg->num;
+		debug_printf("@out(%sr%d.%c)\tout%d\n",
+				(reg->flags & IR3_REG_HALF) ? "h" : "",
+				(regid >> 2), "xyzw"[regid & 0x3], i);
+	}
+
+	for (i = 0; i < so->immediates_count; i++) {
+		debug_printf("@const(c%d.x)\t", so->first_immediate + i);
+		debug_printf("0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
+				so->immediates[i].val[0],
+				so->immediates[i].val[1],
+				so->immediates[i].val[2],
+				so->immediates[i].val[3]);
+	}
+
+	disasm_a3xx(bin, so->info.sizedwords, 0, so->type);
+
+	debug_printf("; %s: outputs:", type);
+	for (i = 0; i < so->outputs_count; i++) {
+		uint8_t regid = so->outputs[i].regid;
+		ir3_semantic sem = so->outputs[i].semantic;
+		debug_printf(" r%d.%c (%u:%u)",
+				(regid >> 2), "xyzw"[regid & 0x3],
+				sem2name(sem), sem2idx(sem));
+	}
+	debug_printf("\n");
+	debug_printf("; %s: inputs:", type);
+	for (i = 0; i < so->inputs_count; i++) {
+		uint8_t regid = so->inputs[i].regid;
+		ir3_semantic sem = so->inputs[i].semantic;
+		debug_printf(" r%d.%c (%u:%u,cm=%x,il=%u,b=%u)",
+				(regid >> 2), "xyzw"[regid & 0x3],
+				sem2name(sem), sem2idx(sem),
+				so->inputs[i].compmask,
+				so->inputs[i].inloc,
+				so->inputs[i].bary);
+	}
+	debug_printf("\n");
+
+	/* print generic shader info: */
+	debug_printf("; %s prog %d/%d: %u instructions, %d half, %d full\n",
+			type, so->shader->id, so->id,
+			so->info.instrs_count,
+			so->info.max_half_reg + 1,
+			so->info.max_reg + 1);
+
+	debug_printf("; %d const, %u constlen\n",
+			so->info.max_const + 1,
+			so->constlen);
+
+	/* print shader type specific info: */
+	switch (so->type) {
+	case SHADER_VERTEX:
+		dump_semantic(so, TGSI_SEMANTIC_POSITION, "pos");
+		dump_semantic(so, TGSI_SEMANTIC_PSIZE, "psize");
+		break;
+	case SHADER_FRAGMENT:
+		dump_reg("pos (bary)", so->pos_regid);
+		dump_semantic(so, TGSI_SEMANTIC_POSITION, "posz");
+		dump_semantic(so, TGSI_SEMANTIC_COLOR, "color");
+		/* these two are hard-coded since we don't know how to
+		 * program them to anything but all 0's...
+		 */
+		if (so->frag_coord)
+			debug_printf("; fragcoord: r0.x\n");
+		if (so->frag_face)
+			debug_printf("; fragface: hr0.x\n");
+		break;
+	case SHADER_COMPUTE:
+		break;
+	}
+
+	debug_printf("\n");
+}
+
+/* This has to reach into the fd_context a bit more than the rest of
+ * ir3, but it needs to be aligned with the compiler, so both agree
+ * on which const regs hold what.  And the logic is identical between
+ * a3xx/a4xx, the only difference is small details in the actual
+ * CP_LOAD_STATE packets (which is handled inside the generation
+ * specific ctx->emit_const(_bo)() fxns)
+ */
+
+#include "freedreno_resource.h"
+
+static void
+emit_user_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		struct fd_constbuf_stateobj *constbuf)
+{
+	struct fd_context *ctx = fd_context(v->shader->pctx);
+	const unsigned index = 0;     /* user consts are index 0 */
+	/* TODO save/restore dirty_mask for binning pass instead: */
+	uint32_t dirty_mask = constbuf->enabled_mask;
+
+	if (dirty_mask & (1 << index)) {
+		struct pipe_constant_buffer *cb = &constbuf->cb[index];
+		unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+
+		/* in particular, with binning shader we may end up with
+		 * unused consts, ie. we could end up w/ constlen that is
+		 * smaller than first_driver_param.  In that case truncate
+		 * the user consts early to avoid HLSQ lockup caused by
+		 * writing too many consts
+		 */
+		uint32_t max_const = MIN2(v->first_driver_param, v->constlen);
+
+		// I expect that size should be a multiple of vec4's:
+		assert(size == align(size, 4));
+
+		/* and even if the start of the const buffer is before
+		 * first_immediate, the end may not be:
+		 */
+		size = MIN2(size, 4 * max_const);
+
+		if (size > 0) {
+			fd_wfi(ctx, ring);
+			ctx->emit_const(ring, v->type, 0,
+					cb->buffer_offset, size,
+					cb->user_buffer, cb->buffer);
+			constbuf->dirty_mask &= ~(1 << index);
+		}
+	}
+}
+
+static void
+emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		struct fd_constbuf_stateobj *constbuf)
+{
+	uint32_t offset = v->first_driver_param;  /* UBOs after user consts */
+	if (v->constlen > offset) {
+		struct fd_context *ctx = fd_context(v->shader->pctx);
+		uint32_t params = MIN2(4, v->constlen - offset) * 4;
+		uint32_t offsets[params];
+		struct fd_bo *bos[params];
+
+		for (uint32_t i = 0; i < params; i++) {
+			const uint32_t index = i + 1;   /* UBOs start at index 1 */
+			struct pipe_constant_buffer *cb = &constbuf->cb[index];
+			assert(!cb->user_buffer);
+
+			if ((constbuf->enabled_mask & (1 << index)) && cb->buffer) {
+				offsets[i] = cb->buffer_offset;
+				bos[i] = fd_resource(cb->buffer)->bo;
+			} else {
+				offsets[i] = 0;
+				bos[i] = NULL;
+			}
+		}
+
+		fd_wfi(ctx, ring);
+		ctx->emit_const_bo(ring, v->type, false, offset * 4, params, bos, offsets);
+	}
+}
+
+static void
+emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
+{
+	struct fd_context *ctx = fd_context(v->shader->pctx);
+	int size = v->immediates_count;
+	uint32_t base = v->first_immediate;
+
+	/* truncate size to avoid writing constants that shader
+	 * does not use:
+	 */
+	size = MIN2(size + base, v->constlen) - base;
+
+	/* convert out of vec4: */
+	base *= 4;
+	size *= 4;
+
+	if (size > 0) {
+		fd_wfi(ctx, ring);
+		ctx->emit_const(ring, v->type, base,
+			0, size, v->immediates[0].val, NULL);
+	}
+}
+
+/* emit stream-out buffers: */
+static void
+emit_tfbos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
+{
+	uint32_t offset = v->first_driver_param + 5;  /* streamout addresses after driver-params*/
+	if (v->constlen > offset) {
+		struct fd_context *ctx = fd_context(v->shader->pctx);
+		struct fd_streamout_stateobj *so = &ctx->streamout;
+		struct pipe_stream_output_info *info = &v->shader->stream_output;
+		uint32_t params = 4;
+		uint32_t offsets[params];
+		struct fd_bo *bos[params];
+
+		for (uint32_t i = 0; i < params; i++) {
+			struct pipe_stream_output_target *target = so->targets[i];
+
+			if (target) {
+				offsets[i] = (so->offsets[i] * info->stride[i] * 4) +
+						target->buffer_offset;
+				bos[i] = fd_resource(target->buffer)->bo;
+			} else {
+				offsets[i] = 0;
+				bos[i] = NULL;
+			}
+		}
+
+		fd_wfi(ctx, ring);
+		ctx->emit_const_bo(ring, v->type, true, offset * 4, params, bos, offsets);
+	}
+}
+
+static uint32_t
+max_tf_vtx(struct ir3_shader_variant *v)
+{
+	struct fd_context *ctx = fd_context(v->shader->pctx);
+	struct fd_streamout_stateobj *so = &ctx->streamout;
+	struct pipe_stream_output_info *info = &v->shader->stream_output;
+	uint32_t maxvtxcnt = 0x7fffffff;
+
+	if (v->key.binning_pass)
+		return 0;
+	if (v->shader->stream_output.num_outputs == 0)
+		return 0;
+	if (so->num_targets == 0)
+		return 0;
+
+	/* offset to write to is:
+	 *
+	 *   total_vtxcnt = vtxcnt + offsets[i]
+	 *   offset = total_vtxcnt * stride[i]
+	 *
+	 *   offset =   vtxcnt * stride[i]       ; calculated in shader
+	 *            + offsets[i] * stride[i]   ; calculated at emit_tfbos()
+	 *
+	 * assuming for each vtx, each target buffer will have data written
+	 * up to 'offset + stride[i]', that leaves maxvtxcnt as:
+	 *
+	 *   buffer_size = (maxvtxcnt * stride[i]) + stride[i]
+	 *   maxvtxcnt   = (buffer_size - stride[i]) / stride[i]
+	 *
+	 * but shader is actually doing a less-than (rather than less-than-
+	 * equal) check, so we can drop the -stride[i].
+	 *
+	 * TODO is assumption about `offset + stride[i]` legit?
+	 */
+	for (unsigned i = 0; i < so->num_targets; i++) {
+		struct pipe_stream_output_target *target = so->targets[i];
+		unsigned stride = info->stride[i] * 4;   /* convert dwords->bytes */
+		if (target) {
+			uint32_t max = target->buffer_size / stride;
+			maxvtxcnt = MIN2(maxvtxcnt, max);
+		}
+	}
+
+	return maxvtxcnt;
+}
+
+void
+ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		const struct pipe_draw_info *info, uint32_t dirty)
+{
+	struct fd_context *ctx = fd_context(v->shader->pctx);
+
+	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
+		struct fd_constbuf_stateobj *constbuf;
+		bool shader_dirty;
+
+		if (v->type == SHADER_VERTEX) {
+			constbuf = &ctx->constbuf[PIPE_SHADER_VERTEX];
+			shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_VP);
+		} else if (v->type == SHADER_FRAGMENT) {
+			constbuf = &ctx->constbuf[PIPE_SHADER_FRAGMENT];
+			shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_FP);
+		} else {
+			unreachable("bad shader type");
+			return;
+		}
+
+		emit_user_consts(v, ring, constbuf);
+		emit_ubos(v, ring, constbuf);
+		if (shader_dirty)
+			emit_immediates(v, ring);
+	}
+
+	/* emit driver params every time: */
+	/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
+	if (info && (v->type == SHADER_VERTEX)) {
+		uint32_t offset = v->first_driver_param + 4;  /* driver params after UBOs */
+		if (v->constlen >= offset) {
+			uint32_t vertex_params[4] = {
+				[IR3_DP_VTXID_BASE] = info->indexed ?
+						info->index_bias : info->start,
+				[IR3_DP_VTXCNT_MAX] = max_tf_vtx(v),
+			};
+
+			fd_wfi(ctx, ring);
+			ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
+					ARRAY_SIZE(vertex_params), vertex_params, NULL);
+
+			/* if needed, emit stream-out buffer addresses: */
+			if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
+				emit_tfbos(v, ring);
+			}
+		}
+	}
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_shader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_shader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/ir3/ir3_shader.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/ir3/ir3_shader.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,9 +29,22 @@
 #ifndef IR3_SHADER_H_
 #define IR3_SHADER_H_
 
+#include "pipe/p_state.h"
+
 #include "ir3.h"
 #include "disasm.h"
 
+/* driver param indices: */
+enum ir3_driver_param {
+	IR3_DP_VTXID_BASE = 0,
+	IR3_DP_VTXCNT_MAX = 1,
+};
+
+/* internal semantic used for passing vtxcnt to vertex shader to
+ * implement transform feedback:
+ */
+#define IR3_SEMANTIC_VTXCNT (TGSI_SEMANTIC_COUNT + 0)
+
 typedef uint16_t ir3_semantic;  /* semantic name + index */
 static inline ir3_semantic
 ir3_semantic_name(uint8_t name, uint16_t index)
@@ -86,10 +99,6 @@
 	 * shader:
 	 */
 	uint16_t fsaturate_s, fsaturate_t, fsaturate_r;
-
-	/* bitmask of sampler which produces integer outputs:
-	 */
-	uint16_t vinteger_s, finteger_s;
 };
 
 static inline bool
@@ -104,6 +113,9 @@
 struct ir3_shader_variant {
 	struct fd_bo *bo;
 
+	/* variant id (for debug) */
+	uint32_t id;
+
 	struct ir3_shader_key key;
 
 	struct ir3_info info;
@@ -196,30 +208,51 @@
 struct ir3_shader {
 	enum shader_t type;
 
+	/* shader id (for debug): */
+	uint32_t id;
+	uint32_t variant_count;
+
+	struct ir3_compiler *compiler;
+
 	struct pipe_context *pctx;
 	const struct tgsi_token *tokens;
+	struct pipe_stream_output_info stream_output;
 
 	struct ir3_shader_variant *variants;
-
-	/* so far, only used for blit_prog shader.. values for
-	 * VPC_VARYING_PS_REPL[i].MODE
-	 */
-	uint32_t vpsrepl[8];
 };
 
 void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
 
 struct ir3_shader * ir3_shader_create(struct pipe_context *pctx,
-		const struct tgsi_token *tokens, enum shader_t type);
+		const struct pipe_shader_state *cso, enum shader_t type);
 void ir3_shader_destroy(struct ir3_shader *shader);
-uint32_t ir3_shader_gpuid(struct ir3_shader *shader);
 struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
 		struct ir3_shader_key key);
+void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin);
+
+struct fd_ringbuffer;
+void ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+		const struct pipe_draw_info *info, uint32_t dirty);
+
+static inline const char *
+ir3_shader_stage(struct ir3_shader *shader)
+{
+	switch (shader->type) {
+	case SHADER_VERTEX:     return "VERT";
+	case SHADER_FRAGMENT:   return "FRAG";
+	case SHADER_COMPUTE:    return "CL";
+	default:
+		unreachable("invalid type");
+		return NULL;
+	}
+}
 
 /*
  * Helper/util:
  */
 
+#include "pipe/p_shader_tokens.h"
+
 static inline int
 ir3_find_output(const struct ir3_shader_variant *so, ir3_semantic semantic)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
@@ -28,7 +26,7 @@
 
 ir3_compiler_LDADD = \
 	libfreedreno.la \
-	../../auxiliary/libgallium.la \
+	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
 	$(top_builddir)/src/glsl/libnir.la \
 	$(top_builddir)/src/libglsl_util.la \
 	$(top_builddir)/src/util/libmesautil.la \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/freedreno/Makefile.sources	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/freedreno/Makefile.sources	2015-09-16 14:36:09.000000000 +0000
@@ -120,18 +120,17 @@
 	ir3/disasm-a3xx.c \
 	ir3/instr-a3xx.h \
 	ir3/ir3.c \
-	ir3/ir3_compiler.c \
 	ir3/ir3_compiler_nir.c \
+	ir3/ir3_compiler.c \
 	ir3/ir3_compiler.h \
 	ir3/ir3_cp.c \
 	ir3/ir3_depth.c \
-	ir3/ir3_dump.c \
-	ir3/ir3_flatten.c \
 	ir3/ir3_group.c \
 	ir3/ir3.h \
 	ir3/ir3_legalize.c \
 	ir3/ir3_nir.h \
 	ir3/ir3_nir_lower_if_else.c \
+	ir3/ir3_print.c \
 	ir3/ir3_ra.c \
 	ir3/ir3_sched.c \
 	ir3/ir3_shader.c \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_batchbuffer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_batchbuffer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_batchbuffer.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_batchbuffer.h	2015-09-16 14:36:09.000000000 +0000
@@ -33,20 +33,20 @@
 
 struct i915_context;
 
-static INLINE size_t
+static inline size_t
 i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch)
 {
    return batch->size - (batch->ptr - batch->map);
 }
 
-static INLINE boolean
+static inline boolean
 i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch,
                               size_t dwords)
 {
    return dwords * 4 <= i915_winsys_batchbuffer_space(batch);
 }
 
-static INLINE void
+static inline void
 i915_winsys_batchbuffer_dword_unchecked(struct i915_winsys_batchbuffer *batch,
                                         unsigned dword)
 {
@@ -54,7 +54,7 @@
    batch->ptr += 4;
 }
 
-static INLINE void
+static inline void
 i915_winsys_batchbuffer_float(struct i915_winsys_batchbuffer *batch,
                               float f)
 {
@@ -64,7 +64,7 @@
    i915_winsys_batchbuffer_dword_unchecked(batch, uif.ui);
 }
 
-static INLINE void
+static inline void
 i915_winsys_batchbuffer_dword(struct i915_winsys_batchbuffer *batch,
                               unsigned dword)
 {
@@ -72,7 +72,7 @@
    i915_winsys_batchbuffer_dword_unchecked(batch, dword);
 }
 
-static INLINE void
+static inline void
 i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch,
                               void *data,
                               size_t size)
@@ -83,7 +83,7 @@
    batch->ptr += size;
 }
 
-static INLINE boolean
+static inline boolean
 i915_winsys_validate_buffers(struct i915_winsys_batchbuffer *batch,
                              struct i915_winsys_buffer **buffers,
                              int num_of_buffers)
@@ -91,7 +91,7 @@
    return batch->iws->validate_buffers(batch, buffers, num_of_buffers);
 }
 
-static INLINE int
+static inline int
 i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch,
                               struct i915_winsys_buffer *buffer,
                               enum i915_winsys_buffer_usage usage,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_context.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -339,7 +339,7 @@
 #define I915_DST_VARS                   4
 #define I915_DST_RECT                   8
 
-static INLINE
+static inline
 void i915_set_flush_dirty(struct i915_context *i915, unsigned flush)
 {
    i915->hardware_dirty |= I915_HW_FLUSH;
@@ -408,7 +408,7 @@
  * Inline conversion functions.  These are better-typed than the
  * macros used previously:
  */
-static INLINE struct i915_context *
+static inline struct i915_context *
 i915_context( struct pipe_context *pipe )
 {
    return (struct i915_context *)pipe;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_debug.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_debug.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_debug.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_debug.h	2015-09-16 14:36:09.000000000 +0000
@@ -48,13 +48,13 @@
 extern unsigned i915_debug;
 
 #ifdef DEBUG
-static INLINE boolean
+static inline boolean
 I915_DBG_ON(unsigned flags)
 {
    return i915_debug & flags;
 }
 
-static INLINE void
+static inline void
 I915_DBG(unsigned flags, const char *fmt, ...)
 {
    if (I915_DBG_ON(flags)) {
@@ -67,7 +67,7 @@
 }
 #else
 #define I915_DBG_ON(flags) (0)
-static INLINE void I915_DBG(unsigned flags, const char *fmt, ...) {}
+static inline void I915_DBG(unsigned flags, const char *fmt, ...) {}
 #endif
 
 void i915_debug_init(struct i915_screen *i915);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_fpc.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_fpc.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_fpc.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_fpc.h	2015-09-16 14:36:09.000000000 +0000
@@ -136,7 +136,7 @@
 
 /* One neat thing about the UREG representation:  
  */
-static INLINE int
+static inline int
 swizzle(int reg, uint x, uint y, uint z, uint w)
 {
    assert(x <= SRC_ONE);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_fpc_optimize.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_fpc_optimize.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_fpc_optimize.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_fpc_optimize.c	2015-09-16 14:36:09.000000000 +0000
@@ -552,7 +552,7 @@
    if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
         current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
         op_has_dst(current.FullInstruction.Instruction.Opcode) &&
-        current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
+        !current.FullInstruction.Instruction.Saturate &&
         current.FullInstruction.Src[0].Register.Absolute == 0 &&
         current.FullInstruction.Src[0].Register.Negate == 0 &&
         is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
@@ -582,7 +582,7 @@
         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
         op_has_dst(current->FullInstruction.Instruction.Opcode) &&
-        next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
+        !next->FullInstruction.Instruction.Saturate &&
         next->FullInstruction.Src[0].Register.Absolute == 0 &&
         next->FullInstruction.Src[0].Register.Negate == 0 &&
         unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_fpc_translate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_fpc_translate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_fpc_translate.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_fpc_translate.c	2015-09-16 14:36:09.000000000 +0000
@@ -111,7 +111,7 @@
 /**
  * component-wise negation of ureg
  */
-static INLINE int
+static inline int
 negate(int reg, int x, int y, int z, int w)
 {
    /* Another neat thing about the UREG representation */
@@ -329,7 +329,7 @@
       = inst->Dst[0].Register.WriteMask;
    uint flags = 0x0;
 
-   if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
+   if (inst->Instruction.Saturate)
       flags |= A0_DEST_SATURATE;
 
    if (writeMask & TGSI_WRITEMASK_X)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_prim_emit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_prim_emit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_prim_emit.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_prim_emit.c	2015-09-16 14:36:09.000000000 +0000
@@ -53,7 +53,7 @@
 /**
  * Basically a cast wrapper.
  */
-static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
+static inline struct setup_stage *setup_stage( struct draw_stage *stage )
 {
    return (struct setup_stage *)stage;
 }
@@ -65,7 +65,7 @@
  * have a couple of slots at the beginning (1-dword header, 4-dword
  * clip pos) that we ignore here.
  */
-static INLINE void
+static inline void
 emit_hw_vertex( struct i915_context *i915,
                 const struct vertex_header *vertex)
 {
@@ -124,7 +124,7 @@
 
 
 
-static INLINE void 
+static inline void 
 emit_prim( struct draw_stage *stage, 
 	   struct prim_header *prim,
 	   unsigned hwprim,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_prim_vbuf.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_prim_vbuf.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_prim_vbuf.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_prim_vbuf.c	2015-09-16 14:36:09.000000000 +0000
@@ -96,7 +96,7 @@
 /**
  * Basically a cast wrapper.
  */
-static INLINE struct i915_vbuf_render *
+static inline struct i915_vbuf_render *
 i915_vbuf_render(struct vbuf_render *render)
 {
    assert(render);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_resource.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_resource.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_resource.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_resource.h	2015-09-16 14:36:09.000000000 +0000
@@ -94,14 +94,14 @@
 extern struct u_resource_vtbl i915_buffer_vtbl;
 extern struct u_resource_vtbl i915_texture_vtbl;
 
-static INLINE struct i915_texture *i915_texture(struct pipe_resource *resource)
+static inline struct i915_texture *i915_texture(struct pipe_resource *resource)
 {
    struct i915_texture *tex = (struct i915_texture *)resource;
    assert(tex->b.vtbl == &i915_texture_vtbl);
    return tex;
 }
 
-static INLINE struct i915_buffer *i915_buffer(struct pipe_resource *resource)
+static inline struct i915_buffer *i915_buffer(struct pipe_resource *resource)
 {
    struct i915_buffer *tex = (struct i915_buffer *)resource;
    assert(tex->b.vtbl == &i915_buffer_vtbl);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_resource_texture.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_resource_texture.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_resource_texture.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_resource_texture.c	2015-09-16 14:36:09.000000000 +0000
@@ -89,25 +89,25 @@
    [PIPE_TEX_FACE_NEG_Z] = 16 + 5 * 8,
 };
 
-static INLINE unsigned
+static inline unsigned
 align_nblocksx(enum pipe_format format, unsigned width, unsigned align_to)
 {
    return align(util_format_get_nblocksx(format, width), align_to);
 }
 
-static INLINE unsigned
+static inline unsigned
 align_nblocksy(enum pipe_format format, unsigned width, unsigned align_to)
 {
    return align(util_format_get_nblocksy(format, width), align_to);
 }
 
-static INLINE unsigned
+static inline unsigned
 get_pot_stride(enum pipe_format format, unsigned width)
 {
    return util_next_power_of_two(util_format_get_stride(format, width));
 }
 
-static INLINE const char*
+static inline const char*
 get_tiling_string(enum i915_winsys_buffer_tile tile)
 {
    switch(tile) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -165,6 +165,7 @@
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
          return 0;
       default:
          debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
@@ -242,6 +243,10 @@
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+   case PIPE_CAP_DEPTH_BOUNDS_TEST:
       return 0;
 
    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
@@ -462,21 +467,15 @@
 }
 
 static boolean
-i915_fence_signalled(struct pipe_screen *screen,
-                     struct pipe_fence_handle *fence)
-{
-   struct i915_screen *is = i915_screen(screen);
-
-   return is->iws->fence_signalled(is->iws, fence) == 1;
-}
-
-static boolean
 i915_fence_finish(struct pipe_screen *screen,
                   struct pipe_fence_handle *fence,
                   uint64_t timeout)
 {
    struct i915_screen *is = i915_screen(screen);
 
+   if (!timeout)
+      return is->iws->fence_signalled(is->iws, fence) == 1;
+
    return is->iws->fence_finish(is->iws, fence) == 1;
 }
 
@@ -564,7 +563,6 @@
    is->base.context_create = i915_create_context;
 
    is->base.fence_reference = i915_fence_reference;
-   is->base.fence_signalled = i915_fence_signalled;
    is->base.fence_finish = i915_fence_finish;
 
    i915_init_screen_resource_functions(is);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_screen.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -59,7 +59,7 @@
  */
 
 
-static INLINE struct i915_screen *
+static inline struct i915_screen *
 i915_screen(struct pipe_screen *pscreen)
 {
    return (struct i915_screen *) pscreen;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_state_dynamic.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_state_dynamic.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_state_dynamic.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_state_dynamic.c	2015-09-16 14:36:09.000000000 +0000
@@ -46,7 +46,7 @@
  * (active) state every time a 4kb boundary is crossed.
  */
 
-static INLINE void set_dynamic(struct i915_context *i915,
+static inline void set_dynamic(struct i915_context *i915,
                                unsigned offset,
                                const unsigned state)
 {
@@ -60,7 +60,7 @@
 
 
 
-static INLINE void set_dynamic_array(struct i915_context *i915,
+static inline void set_dynamic_array(struct i915_context *i915,
                                      unsigned offset,
                                      const unsigned *src,
                                      unsigned dwords)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_state_immediate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_state_immediate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_state_immediate.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_state_immediate.c	2015-09-16 14:36:09.000000000 +0000
@@ -39,7 +39,7 @@
 /* Convinience function to check immediate state.
  */
 
-static INLINE void set_immediate(struct i915_context *i915,
+static inline void set_immediate(struct i915_context *i915,
                                  unsigned offset,
                                  const unsigned state)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_state_inlines.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_state_inlines.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_state_inlines.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_state_inlines.h	2015-09-16 14:36:09.000000000 +0000
@@ -34,7 +34,7 @@
 #include "i915_reg.h"
 
 
-static INLINE unsigned
+static inline unsigned
 i915_translate_compare_func(unsigned func)
 {
    switch (func) {
@@ -59,7 +59,7 @@
    }
 }
 
-static INLINE unsigned
+static inline unsigned
 i915_translate_shadow_compare_func(unsigned func)
 {
    switch (func) {
@@ -84,7 +84,7 @@
    }
 }
 
-static INLINE unsigned
+static inline unsigned
 i915_translate_stencil_op(unsigned op)
 {
    switch (op) {
@@ -109,7 +109,7 @@
    }
 }
 
-static INLINE unsigned
+static inline unsigned
 i915_translate_blend_factor(unsigned factor)
 {
    switch (factor) {
@@ -148,7 +148,7 @@
    }
 }
 
-static INLINE unsigned
+static inline unsigned
 i915_translate_blend_func(unsigned mode)
 {
    switch (mode) {
@@ -168,7 +168,7 @@
 }
 
 
-static INLINE unsigned
+static inline unsigned
 i915_translate_logic_op(unsigned opcode)
 {
    switch (opcode) {
@@ -211,7 +211,7 @@
 
 
 
-static INLINE boolean i915_validate_vertices( unsigned hw_prim, unsigned nr )
+static inline boolean i915_validate_vertices( unsigned hw_prim, unsigned nr )
 {
    boolean ok;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_surface.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_surface.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/i915/i915_surface.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/i915/i915_surface.c	2015-09-16 14:36:09.000000000 +0000
@@ -120,7 +120,8 @@
 
    util_blitter_blit_generic(i915->blitter, dst_view, &dstbox,
                              src_view, src_box, src_width0, src_height0,
-                             PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL);
+                             PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
+                             FALSE);
    return;
 
 fallback:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_buffer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_buffer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_buffer.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_buffer.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,90 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2013 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chia-I Wu <olv@lunarg.com>
- */
-
-#ifndef ILO_BUFFER_H
-#define ILO_BUFFER_H
-
-#include "intel_winsys.h"
-
-#include "ilo_core.h"
-#include "ilo_dev.h"
-
-struct ilo_buffer {
-   unsigned bo_size;
-
-   struct intel_bo *bo;
-};
-
-static inline void
-ilo_buffer_init(struct ilo_buffer *buf, const struct ilo_dev *dev,
-                unsigned size, uint32_t bind, uint32_t flags)
-{
-   buf->bo_size = size;
-
-   /*
-    * From the Sandy Bridge PRM, volume 1 part 1, page 118:
-    *
-    *     "For buffers, which have no inherent "height," padding requirements
-    *      are different. A buffer must be padded to the next multiple of 256
-    *      array elements, with an additional 16 bytes added beyond that to
-    *      account for the L1 cache line."
-    */
-   if (bind & PIPE_BIND_SAMPLER_VIEW)
-      buf->bo_size = align(buf->bo_size, 256) + 16;
-
-   if ((bind & PIPE_BIND_VERTEX_BUFFER) && ilo_dev_gen(dev) < ILO_GEN(7.5)) {
-      /*
-       * As noted in ilo_format_translate(), we treat some 3-component formats
-       * as 4-component formats to work around hardware limitations.  Imagine
-       * the case where the vertex buffer holds a single
-       * PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.  The
-       * hardware would fail to fetch it at boundary check because the vertex
-       * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
-       * and that takes at least 8 bytes.
-       *
-       * For the workaround to work, we should add 2 to the bo size.  But that
-       * would waste a page when the bo size is already page aligned.  Let's
-       * round it to page size for now and revisit this when needed.
-       */
-      buf->bo_size = align(buf->bo_size, 4096);
-   }
-}
-
-static inline void
-ilo_buffer_cleanup(struct ilo_buffer *buf)
-{
-   intel_bo_unref(buf->bo);
-}
-
-static inline void
-ilo_buffer_set_bo(struct ilo_buffer *buf, struct intel_bo *bo)
-{
-   intel_bo_unref(buf->bo);
-   buf->bo = intel_bo_ref(bo);
-}
-
-#endif /* ILO_BUFFER_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,335 +29,122 @@
 #define ILO_BUILDER_3D_BOTTOM_H
 
 #include "genhw/genhw.h"
-#include "../ilo_shader.h"
 #include "intel_winsys.h"
 
 #include "ilo_core.h"
 #include "ilo_dev.h"
-#include "ilo_format.h"
+#include "ilo_state_cc.h"
+#include "ilo_state_raster.h"
+#include "ilo_state_sbe.h"
+#include "ilo_state_shader.h"
+#include "ilo_state_viewport.h"
+#include "ilo_state_zs.h"
+#include "ilo_vma.h"
 #include "ilo_builder.h"
 #include "ilo_builder_3d_top.h"
 
 static inline void
 gen6_3DSTATE_CLIP(struct ilo_builder *builder,
-                  const struct ilo_rasterizer_state *rasterizer,
-                  const struct ilo_shader_state *fs,
-                  bool enable_guardband,
-                  int num_viewports)
-{
-   const uint8_t cmd_len = 4;
-   uint32_t dw1, dw2, dw3, *dw;
-   int interps;
-
-   ILO_DEV_ASSERT(builder->dev, 6, 8);
-
-   dw1 = rasterizer->clip.payload[0];
-   dw2 = rasterizer->clip.payload[1];
-   dw3 = rasterizer->clip.payload[2];
-
-   if (enable_guardband && rasterizer->clip.can_enable_guardband)
-      dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE;
-
-   interps = (fs) ?  ilo_shader_get_kernel_param(fs,
-         ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
-
-   if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL |
-                  GEN6_INTERP_NONPERSPECTIVE_CENTROID |
-                  GEN6_INTERP_NONPERSPECTIVE_SAMPLE))
-      dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE;
-
-   dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO |
-          (num_viewports - 1);
-
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
-   dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2);
-   dw[1] = dw1;
-   dw[2] = dw2;
-   dw[3] = dw3;
-}
-
-static inline void
-gen6_disable_3DSTATE_CLIP(struct ilo_builder *builder)
+                  const struct ilo_state_raster *rs)
 {
    const uint8_t cmd_len = 4;
    uint32_t *dw;
 
-   ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+   ILO_DEV_ASSERT(builder->dev, 6, 8);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2);
-   dw[1] = 0;
-   dw[2] = 0;
-   dw[3] = 0;
-}
-
-static inline void
-gen7_internal_3dstate_sf(struct ilo_builder *builder,
-                         uint8_t cmd_len, uint32_t *dw,
-                         const struct ilo_rasterizer_sf *sf,
-                         int num_samples)
-{
-   ILO_DEV_ASSERT(builder->dev, 6, 7.5);
-
-   assert(cmd_len == 7);
-
-   dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
-
-   if (!sf) {
-      dw[1] = 0;
-      dw[2] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0;
-      dw[3] = 0;
-      dw[4] = 0;
-      dw[5] = 0;
-      dw[6] = 0;
-
-      return;
-   }
-
-   /* see rasterizer_init_sf_gen6() */
-   STATIC_ASSERT(Elements(sf->payload) >= 3);
-   dw[1] = sf->payload[0];
-   dw[2] = sf->payload[1];
-   dw[3] = sf->payload[2];
-
-   if (num_samples > 1)
-      dw[2] |= sf->dw_msaa;
-
-   dw[4] = sf->dw_depth_offset_const;
-   dw[5] = sf->dw_depth_offset_scale;
-   dw[6] = sf->dw_depth_offset_clamp;
-}
-
-static inline void
-gen8_internal_3dstate_sbe(struct ilo_builder *builder,
-                          uint8_t cmd_len, uint32_t *dw,
-                          const struct ilo_shader_state *fs,
-                          int sprite_coord_mode)
-{
-   const struct ilo_kernel_routing *routing;
-   int vue_offset, vue_len, out_count;
-
-   ILO_DEV_ASSERT(builder->dev, 6, 8);
-
-   assert(cmd_len == 4);
-
-   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2);
-
-   if (!fs) {
-      dw[1] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
-      dw[2] = 0;
-      dw[3] = 0;
-      return;
-   }
-
-   routing = ilo_shader_get_kernel_routing(fs);
-
-   vue_offset = routing->source_skip;
-   assert(vue_offset % 2 == 0);
-   vue_offset /= 2;
-
-   vue_len = (routing->source_len + 1) / 2;
-   if (!vue_len)
-      vue_len = 1;
-
-   out_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
-   assert(out_count <= 32);
-
-   dw[1] = out_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT |
-           vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
-
-   if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
-      dw[1] |= GEN8_SBE_DW1_USE_URB_READ_LEN |
-               GEN8_SBE_DW1_USE_URB_READ_OFFSET |
-               vue_offset << GEN8_SBE_DW1_URB_READ_OFFSET__SHIFT;
-   } else {
-      dw[1] |= vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;
-   }
-
-   if (routing->swizzle_enable)
-      dw[1] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE;
-
-   switch (sprite_coord_mode) {
-   case PIPE_SPRITE_COORD_UPPER_LEFT:
-      dw[1] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT;
-      break;
-   case PIPE_SPRITE_COORD_LOWER_LEFT:
-      dw[1] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT;
-      break;
-   }
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 268:
-    *
-    *     "This field (Point Sprite Texture Coordinate Enable) must be
-    *      programmed to 0 when non-point primitives are rendered."
-    *
-    * TODO We do not check that yet.
-    */
-   dw[2] = routing->point_sprite_enable;
-
-   dw[3] = routing->const_interp_enable;
-}
-
-static inline void
-gen8_internal_3dstate_sbe_swiz(struct ilo_builder *builder,
-                               uint8_t cmd_len, uint32_t *dw,
-                               const struct ilo_shader_state *fs)
-{
-   const struct ilo_kernel_routing *routing;
-
-   ILO_DEV_ASSERT(builder->dev, 6, 8);
-
-   assert(cmd_len == 11);
-
-   dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_SBE_SWIZ) | (cmd_len - 2);
-
-   if (!fs) {
-      memset(&dw[1], 0, sizeof(*dw) * (cmd_len - 1));
-      return;
-   }
-
-   routing = ilo_shader_get_kernel_routing(fs);
-
-   STATIC_ASSERT(sizeof(routing->swizzles) >= sizeof(*dw) * 8);
-   memcpy(&dw[1], routing->swizzles, sizeof(*dw) * 8);
-
-   /* WrapShortest enables */
-   dw[9] = 0;
-   dw[10] = 0;
+   /* see raster_set_gen6_3DSTATE_CLIP() */
+   dw[1] = rs->clip[0];
+   dw[2] = rs->clip[1];
+   dw[3] = rs->clip[2];
 }
 
 static inline void
 gen6_3DSTATE_SF(struct ilo_builder *builder,
-                const struct ilo_rasterizer_state *rasterizer,
-                const struct ilo_shader_state *fs,
-                int sample_count)
+                const struct ilo_state_raster *rs,
+                const struct ilo_state_sbe *sbe)
 {
    const uint8_t cmd_len = 20;
-   uint32_t gen8_3dstate_sbe[4], gen8_3dstate_sbe_swiz[11];
-   uint32_t gen7_3dstate_sf[7];
-   const struct ilo_rasterizer_sf *sf;
-   int sprite_coord_mode;
    uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 6, 6);
 
-   sf = (rasterizer) ? &rasterizer->sf : NULL;
-   sprite_coord_mode = (rasterizer) ? rasterizer->state.sprite_coord_mode : 0;
-
-   gen8_internal_3dstate_sbe(builder, Elements(gen8_3dstate_sbe),
-         gen8_3dstate_sbe, fs, sprite_coord_mode);
-   gen8_internal_3dstate_sbe_swiz(builder, Elements(gen8_3dstate_sbe_swiz),
-         gen8_3dstate_sbe_swiz, fs);
-   gen7_internal_3dstate_sf(builder, Elements(gen7_3dstate_sf),
-         gen7_3dstate_sf, sf, sample_count);
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
-   dw[1] = gen8_3dstate_sbe[1];
-   memcpy(&dw[2], &gen7_3dstate_sf[1], sizeof(*dw) * 6);
-   memcpy(&dw[8], &gen8_3dstate_sbe_swiz[1], sizeof(*dw) * 8);
-   dw[16] = gen8_3dstate_sbe[2];
-   dw[17] = gen8_3dstate_sbe[3];
-   dw[18] = gen8_3dstate_sbe_swiz[9];
-   dw[19] = gen8_3dstate_sbe_swiz[10];
-}
+   /* see sbe_set_gen8_3DSTATE_SBE() */
+   dw[1] = sbe->sbe[0];
 
-static inline void
-gen7_3DSTATE_SF(struct ilo_builder *builder,
-                const struct ilo_rasterizer_sf *sf,
-                enum pipe_format zs_format,
-                int sample_count)
-{
-   const uint8_t cmd_len = 7;
-   uint32_t *dw;
+   /* see raster_set_gen7_3DSTATE_SF() */
+   dw[2] = rs->sf[0];
+   dw[3] = rs->sf[1];
+   dw[4] = rs->sf[2];
+   dw[5] = rs->raster[1];
+   dw[6] = rs->raster[2];
+   dw[7] = rs->raster[3];
 
-   ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+   /* see sbe_set_gen8_3DSTATE_SBE_SWIZ() */
+   memcpy(&dw[8], sbe->swiz, sizeof(*dw) * 8);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
-   gen7_internal_3dstate_sf(builder, cmd_len, dw, sf, sample_count);
-
-   if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
-      int hw_format;
-
-      /* separate stencil */
-      switch (zs_format) {
-      case PIPE_FORMAT_Z16_UNORM:
-         hw_format = GEN6_ZFORMAT_D16_UNORM;
-         break;
-      case PIPE_FORMAT_Z32_FLOAT:
-      case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-         hw_format = GEN6_ZFORMAT_D32_FLOAT;
-         break;
-      case PIPE_FORMAT_Z24X8_UNORM:
-      case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-         hw_format = GEN6_ZFORMAT_D24_UNORM_X8_UINT;
-         break;
-      default:
-         /* FLOAT surface is assumed when there is no depth buffer */
-         hw_format = GEN6_ZFORMAT_D32_FLOAT;
-         break;
-      }
-
-      dw[1] |= hw_format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT;
-   }
+   dw[16] = sbe->sbe[1];
+   dw[17] = sbe->sbe[2];
+   /* WrapShortest enables */
+   dw[18] = 0;
+   dw[19] = 0;
 }
 
 static inline void
-gen8_3DSTATE_SF(struct ilo_builder *builder,
-                const struct ilo_rasterizer_sf *sf)
+gen7_3DSTATE_SF(struct ilo_builder *builder,
+                const struct ilo_state_raster *rs)
 {
-   const uint8_t cmd_len = 4;
+   const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 4 : 7;
    uint32_t *dw;
 
-   ILO_DEV_ASSERT(builder->dev, 8, 8);
+   ILO_DEV_ASSERT(builder->dev, 7, 8);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
 
-   /* see rasterizer_init_sf_gen8() */
-   STATIC_ASSERT(Elements(sf->payload) >= 3);
-   dw[1] = sf->payload[0];
-   dw[2] = sf->payload[1];
-   dw[3] = sf->payload[2];
+   /* see raster_set_gen7_3DSTATE_SF() or raster_set_gen8_3DSTATE_SF() */
+   dw[1] = rs->sf[0];
+   dw[2] = rs->sf[1];
+   dw[3] = rs->sf[2];
+   if (ilo_dev_gen(builder->dev) < ILO_GEN(8)) {
+      dw[4] = rs->raster[1];
+      dw[5] = rs->raster[2];
+      dw[6] = rs->raster[3];
+   }
 }
 
 static inline void
 gen7_3DSTATE_SBE(struct ilo_builder *builder,
-                 const struct ilo_shader_state *fs,
-                 int sprite_coord_mode)
+                 const struct ilo_state_sbe *sbe)
 {
    const uint8_t cmd_len = 14;
-   uint32_t gen8_3dstate_sbe[4], gen8_3dstate_sbe_swiz[11];
    uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
-   gen8_internal_3dstate_sbe(builder, Elements(gen8_3dstate_sbe),
-         gen8_3dstate_sbe, fs, sprite_coord_mode);
-   gen8_internal_3dstate_sbe_swiz(builder, Elements(gen8_3dstate_sbe_swiz),
-         gen8_3dstate_sbe_swiz, fs);
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2);
-   dw[1] = gen8_3dstate_sbe[1];
-   memcpy(&dw[2], &gen8_3dstate_sbe_swiz[1], sizeof(*dw) * 8);
-   dw[10] = gen8_3dstate_sbe[2];
-   dw[11] = gen8_3dstate_sbe[3];
-   dw[12] = gen8_3dstate_sbe_swiz[9];
-   dw[13] = gen8_3dstate_sbe_swiz[10];
+   /* see sbe_set_gen8_3DSTATE_SBE() and sbe_set_gen8_3DSTATE_SBE_SWIZ() */
+   dw[1] = sbe->sbe[0];
+   memcpy(&dw[2], sbe->swiz, sizeof(*dw) * 8);
+   dw[10] = sbe->sbe[1];
+   dw[11] = sbe->sbe[2];
+
+   /* WrapShortest enables */
+   dw[12] = 0;
+   dw[13] = 0;
 }
 
 static inline void
 gen8_3DSTATE_SBE(struct ilo_builder *builder,
-                 const struct ilo_shader_state *fs,
-                 int sprite_coord_mode)
+                 const struct ilo_state_sbe *sbe)
 {
    const uint8_t cmd_len = 4;
    uint32_t *dw;
@@ -366,12 +153,16 @@
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
-   gen8_internal_3dstate_sbe(builder, cmd_len, dw, fs, sprite_coord_mode);
+   /* see sbe_set_gen8_3DSTATE_SBE() */
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2);
+   dw[1] = sbe->sbe[0];
+   dw[2] = sbe->sbe[1];
+   dw[3] = sbe->sbe[2];
 }
 
 static inline void
 gen8_3DSTATE_SBE_SWIZ(struct ilo_builder *builder,
-                      const struct ilo_shader_state *fs)
+                      const struct ilo_state_sbe *sbe)
 {
    const uint8_t cmd_len = 11;
    uint32_t *dw;
@@ -380,12 +171,17 @@
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
-   gen8_internal_3dstate_sbe_swiz(builder, cmd_len, dw, fs);
+   dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_SBE_SWIZ) | (cmd_len - 2);
+   /* see sbe_set_gen8_3DSTATE_SBE_SWIZ() */
+   memcpy(&dw[1], sbe->swiz, sizeof(*dw) * 8);
+   /* WrapShortest enables */
+   dw[9] = 0;
+   dw[10] = 0;
 }
 
 static inline void
 gen8_3DSTATE_RASTER(struct ilo_builder *builder,
-                    const struct ilo_rasterizer_sf *sf)
+                    const struct ilo_state_raster *rs)
 {
    const uint8_t cmd_len = 5;
    uint32_t *dw;
@@ -395,232 +191,108 @@
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_RASTER) | (cmd_len - 2);
-   dw[1] = sf->dw_raster;
-   dw[2] = sf->dw_depth_offset_const;
-   dw[3] = sf->dw_depth_offset_scale;
-   dw[4] = sf->dw_depth_offset_clamp;
+   /* see raster_set_gen8_3DSTATE_RASTER() */
+   dw[1] = rs->raster[0];
+   dw[2] = rs->raster[1];
+   dw[3] = rs->raster[2];
+   dw[4] = rs->raster[3];
 }
 
 static inline void
 gen6_3DSTATE_WM(struct ilo_builder *builder,
-                const struct ilo_shader_state *fs,
-                const struct ilo_rasterizer_state *rasterizer,
-                bool dual_blend, bool cc_may_kill)
+                const struct ilo_state_raster *rs,
+                const struct ilo_state_ps *ps,
+                uint32_t kernel_offset)
 {
    const uint8_t cmd_len = 9;
-   const int num_samples = 1;
-   const struct ilo_shader_cso *cso;
-   uint32_t dw2, dw4, dw5, dw6, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 6, 6);
 
-   cso = ilo_shader_get_kernel_cso(fs);
-   dw2 = cso->payload[0];
-   dw4 = cso->payload[1];
-   dw5 = cso->payload[2];
-   dw6 = cso->payload[3];
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 248:
-    *
-    *     "This bit (Statistics Enable) must be disabled if either of these
-    *      bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
-    *      Enable or Depth Buffer Resolve Enable."
-    */
-   dw4 |= GEN6_WM_DW4_STATISTICS;
-
-   if (cc_may_kill)
-      dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL | GEN6_WM_DW5_PS_DISPATCH_ENABLE;
-
-   if (dual_blend)
-      dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
-
-   dw5 |= rasterizer->wm.payload[0];
-
-   dw6 |= rasterizer->wm.payload[1];
-
-   if (num_samples > 1) {
-      dw6 |= rasterizer->wm.dw_msaa_rast |
-             rasterizer->wm.dw_msaa_disp;
-   }
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
-   dw[1] = ilo_shader_get_kernel_offset(fs);
-   dw[2] = dw2;
-   dw[3] = 0; /* scratch */
-   dw[4] = dw4;
-   dw[5] = dw5;
-   dw[6] = dw6;
+   dw[1] = kernel_offset;
+   /* see raster_set_gen6_3dstate_wm() and ps_set_gen6_3dstate_wm() */
+   dw[2] = ps->ps[0];
+   dw[3] = ps->ps[1];
+   dw[4] = rs->wm[0] | ps->ps[2];
+   dw[5] = rs->wm[1] | ps->ps[3];
+   dw[6] = rs->wm[2] | ps->ps[4];
    dw[7] = 0; /* kernel 1 */
    dw[8] = 0; /* kernel 2 */
 }
 
 static inline void
-gen6_hiz_3DSTATE_WM(struct ilo_builder *builder, uint32_t hiz_op)
-{
-   const uint8_t cmd_len = 9;
-   const int max_threads = (builder->dev->gt == 2) ? 80 : 40;
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(builder->dev, 6, 6);
-
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
-   dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
-   dw[1] = 0;
-   dw[2] = 0;
-   dw[3] = 0;
-   dw[4] = hiz_op;
-   /* honor the valid range even if dispatching is disabled */
-   dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
-   dw[6] = 0;
-   dw[7] = 0;
-   dw[8] = 0;
-}
-
-static inline void
 gen7_3DSTATE_WM(struct ilo_builder *builder,
-                const struct ilo_shader_state *fs,
-                const struct ilo_rasterizer_state *rasterizer,
-                bool cc_may_kill)
+                const struct ilo_state_raster *rs,
+                const struct ilo_state_ps *ps)
 {
    const uint8_t cmd_len = 3;
-   const int num_samples = 1;
-   const struct ilo_shader_cso *cso;
-   uint32_t dw1, dw2, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
-   /* see rasterizer_init_wm_gen7() */
-   dw1 = rasterizer->wm.payload[0];
-   dw2 = rasterizer->wm.payload[1];
-
-   /* see fs_init_cso_gen7() */
-   cso = ilo_shader_get_kernel_cso(fs);
-   dw1 |= cso->payload[3];
-
-   dw1 |= GEN7_WM_DW1_STATISTICS;
-
-   if (cc_may_kill)
-      dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE | GEN7_WM_DW1_PS_KILL_PIXEL;
-
-   if (num_samples > 1) {
-      dw1 |= rasterizer->wm.dw_msaa_rast;
-      dw2 |= rasterizer->wm.dw_msaa_disp;
-   }
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
-   dw[1] = dw1;
-   dw[2] = dw2;
+   /* see raster_set_gen8_3DSTATE_WM() and ps_set_gen7_3dstate_wm() */
+   dw[1] = rs->wm[0] | ps->ps[0];
+   dw[2] = ps->ps[1];
 }
 
 static inline void
 gen8_3DSTATE_WM(struct ilo_builder *builder,
-                const struct ilo_shader_state *fs,
-                const struct ilo_rasterizer_state *rasterizer)
+                const struct ilo_state_raster *rs)
 {
    const uint8_t cmd_len = 2;
-   const struct ilo_shader_cso *cso;
-   uint32_t dw1, interps, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   /* see rasterizer_get_wm_gen8() */
-   dw1 = rasterizer->wm.payload[0];
-   dw1 |= GEN7_WM_DW1_STATISTICS;
-
-   /* see fs_init_cso_gen8() */
-   cso = ilo_shader_get_kernel_cso(fs);
-   interps = cso->payload[4];
-
-   assert(!(dw1 & interps));
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
-   dw[1] = dw1 | interps;
-}
-
-static inline void
-gen7_hiz_3DSTATE_WM(struct ilo_builder *builder, uint32_t hiz_op)
-{
-   const uint8_t cmd_len = 3;
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(builder->dev, 7, 7.5);
-
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
-   dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
-   dw[1] = hiz_op;
-   dw[2] = 0;
+   /* see raster_set_gen8_3DSTATE_WM() */
+   dw[1] = rs->wm[0];
 }
 
 static inline void
 gen8_3DSTATE_WM_DEPTH_STENCIL(struct ilo_builder *builder,
-                              const struct ilo_dsa_state *dsa)
+                              const struct ilo_state_cc *cc)
 {
    const uint8_t cmd_len = 3;
-   uint32_t dw1, dw2, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   dw1 = dsa->payload[0];
-   dw2 = dsa->payload[1];
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_DEPTH_STENCIL) | (cmd_len - 2);
-   dw[1] = dw1;
-   dw[2] = dw2;
+   /* see cc_set_gen8_3DSTATE_WM_DEPTH_STENCIL() */
+   dw[1] = cc->ds[0];
+   dw[2] = cc->ds[1];
 }
 
 static inline void
-gen8_3DSTATE_WM_HZ_OP(struct ilo_builder *builder, uint32_t op,
-                      uint16_t width, uint16_t height, int sample_count)
+gen8_3DSTATE_WM_HZ_OP(struct ilo_builder *builder,
+                      const struct ilo_state_raster *rs,
+                      uint16_t width, uint16_t height)
 {
    const uint8_t cmd_len = 5;
-   const uint32_t sample_mask = ((1 << sample_count) - 1) | 0x1;
-   uint32_t dw1, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   dw1 = op;
-
-   switch (sample_count) {
-   case 0:
-   case 1:
-      dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_1;
-      break;
-   case 2:
-      dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_2;
-      break;
-   case 4:
-      dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_4;
-      break;
-   case 8:
-      dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_8;
-      break;
-   case 16:
-      dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_16;
-      break;
-   default:
-      assert(!"unsupported sample count");
-      dw1 |= GEN8_WM_HZ_DW1_NUMSAMPLES_1;
-      break;
-   }
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_WM_HZ_OP) | (cmd_len - 2);
-   dw[1] = dw1;
+   /* see raster_set_gen8_3dstate_wm_hz_op() */
+   dw[1] = rs->wm[1];
    dw[2] = 0;
-   /* exclusive? */
+   /* exclusive */
    dw[3] = height << 16 | width;
-   dw[4] = sample_mask;
+   dw[4] = rs->wm[2];
 }
 
 static inline void
@@ -656,100 +328,48 @@
 
 static inline void
 gen7_3DSTATE_PS(struct ilo_builder *builder,
-                const struct ilo_shader_state *fs,
-                bool dual_blend)
+                const struct ilo_state_ps *ps,
+                uint32_t kernel_offset)
 {
    const uint8_t cmd_len = 8;
-   const struct ilo_shader_cso *cso;
-   uint32_t dw2, dw4, dw5, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
-   /* see fs_init_cso_gen7() */
-   cso = ilo_shader_get_kernel_cso(fs);
-   dw2 = cso->payload[0];
-   dw4 = cso->payload[1];
-   dw5 = cso->payload[2];
-
-   if (dual_blend)
-      dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
-   dw[1] = ilo_shader_get_kernel_offset(fs);
-   dw[2] = dw2;
-   dw[3] = 0; /* scratch */
-   dw[4] = dw4;
-   dw[5] = dw5;
+   dw[1] = kernel_offset;
+   /* see ps_set_gen7_3DSTATE_PS() */
+   dw[2] = ps->ps[2];
+   dw[3] = ps->ps[3];
+   dw[4] = ps->ps[4];
+   dw[5] = ps->ps[5];
    dw[6] = 0; /* kernel 1 */
    dw[7] = 0; /* kernel 2 */
 }
 
 static inline void
-gen7_disable_3DSTATE_PS(struct ilo_builder *builder)
-{
-   const uint8_t cmd_len = 8;
-   int max_threads;
-   uint32_t dw4, *dw;
-
-   ILO_DEV_ASSERT(builder->dev, 7, 7.5);
-
-   /* GPU hangs if none of the dispatch enable bits is set */
-   dw4 = GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
-
-   /* see brwCreateContext() */
-   switch (ilo_dev_gen(builder->dev)) {
-   case ILO_GEN(7.5):
-      max_threads = (builder->dev->gt == 3) ? 408 :
-                    (builder->dev->gt == 2) ? 204 : 102;
-      dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
-      break;
-   case ILO_GEN(7):
-   default:
-      max_threads = (builder->dev->gt == 2) ? 172 : 48;
-      dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
-      break;
-   }
-
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
-   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
-   dw[1] = 0;
-   dw[2] = 0;
-   dw[3] = 0;
-   dw[4] = dw4;
-   dw[5] = 0;
-   dw[6] = 0;
-   dw[7] = 0;
-}
-
-static inline void
 gen8_3DSTATE_PS(struct ilo_builder *builder,
-                const struct ilo_shader_state *fs)
+                const struct ilo_state_ps *ps,
+                uint32_t kernel_offset)
 {
    const uint8_t cmd_len = 12;
-   const struct ilo_shader_cso *cso;
-   uint32_t dw3, dw6, dw7, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   /* see fs_init_cso_gen8() */
-   cso = ilo_shader_get_kernel_cso(fs);
-   dw3 = cso->payload[0];
-   dw6 = cso->payload[1];
-   dw7 = cso->payload[2];
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
-   dw[1] = ilo_shader_get_kernel_offset(fs);
+   dw[1] = kernel_offset;
    dw[2] = 0;
-   dw[3] = dw3;
-   dw[4] = 0; /* scratch */
+   /* see ps_set_gen8_3DSTATE_PS() */
+   dw[3] = ps->ps[0];
+   dw[4] = ps->ps[1];
    dw[5] = 0;
-   dw[6] = dw6;
-   dw[7] = dw7;
+   dw[6] = ps->ps[2];
+   dw[7] = ps->ps[3];
    dw[8] = 0; /* kernel 1 */
    dw[9] = 0;
    dw[10] = 0; /* kernel 2 */
@@ -758,66 +378,34 @@
 
 static inline void
 gen8_3DSTATE_PS_EXTRA(struct ilo_builder *builder,
-                      const struct ilo_shader_state *fs,
-                      bool cc_may_kill, bool per_sample)
+                      const struct ilo_state_ps *ps)
 {
    const uint8_t cmd_len = 2;
-   const struct ilo_shader_cso *cso;
-   uint32_t dw1, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   /* see fs_init_cso_gen8() */
-   cso = ilo_shader_get_kernel_cso(fs);
-   dw1 = cso->payload[3];
-
-   if (cc_may_kill)
-      dw1 |= GEN8_PSX_DW1_DISPATCH_ENABLE | GEN8_PSX_DW1_KILL_PIXEL;
-   if (per_sample)
-      dw1 |= GEN8_PSX_DW1_PER_SAMPLE;
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_PS_EXTRA) | (cmd_len - 2);
-   dw[1] = dw1;
+   /* see ps_set_gen8_3DSTATE_PS_EXTRA() */
+   dw[1] = ps->ps[4];
 }
 
 static inline void
 gen8_3DSTATE_PS_BLEND(struct ilo_builder *builder,
-                      const struct ilo_blend_state *blend,
-                      const struct ilo_fb_state *fb,
-                      const struct ilo_dsa_state *dsa)
+                      const struct ilo_state_cc *cc)
 {
    const uint8_t cmd_len = 2;
-   uint32_t dw1, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   dw1 = 0;
-   if (blend->alpha_to_coverage && fb->num_samples > 1)
-      dw1 |= GEN8_PS_BLEND_DW1_ALPHA_TO_COVERAGE;
-
-   if (fb->state.nr_cbufs && fb->state.cbufs[0]) {
-      const struct ilo_fb_blend_caps *caps = &fb->blend_caps[0];
-
-      dw1 |= GEN8_PS_BLEND_DW1_WRITABLE_RT;
-      if (caps->can_blend) {
-         if (caps->dst_alpha_forced_one)
-            dw1 |= blend->dw_ps_blend_dst_alpha_forced_one;
-         else
-            dw1 |= blend->dw_ps_blend;
-      }
-
-      if (caps->can_alpha_test)
-         dw1 |= dsa->dw_ps_blend_alpha;
-   } else {
-      dw1 |= dsa->dw_ps_blend_alpha;
-   }
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_PS_BLEND) | (cmd_len - 2);
-   dw[1] = dw1;
+   /* see cc_set_gen8_3DSTATE_PS_BLEND() */
+   dw[1] = cc->blend[0];
 }
 
 static inline void
@@ -862,101 +450,49 @@
 
 static inline void
 gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder,
-                         int num_samples, const uint32_t *pattern,
-                         bool pixel_location_center)
+                         const struct ilo_state_raster *rs,
+                         const struct ilo_state_sample_pattern *pattern,
+                         uint8_t sample_count)
 {
    const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 3;
-   uint32_t dw1, dw2, dw3, *dw;
+   const uint32_t *packed = (const uint32_t *)
+      ilo_state_sample_pattern_get_packed_offsets(pattern,
+            builder->dev, sample_count);
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
-   dw1 = (pixel_location_center) ? GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER :
-      GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER;
-
-   switch (num_samples) {
-   case 0:
-   case 1:
-      dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
-      dw2 = 0;
-      dw3 = 0;
-      break;
-   case 4:
-      dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
-      dw2 = pattern[0];
-      dw3 = 0;
-      break;
-   case 8:
-      assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7));
-      dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
-      dw2 = pattern[0];
-      dw3 = pattern[1];
-      break;
-   default:
-      assert(!"unsupported sample count");
-      dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
-      dw2 = 0;
-      dw3 = 0;
-      break;
-   }
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | (cmd_len - 2);
-   dw[1] = dw1;
-   dw[2] = dw2;
+   /* see raster_set_gen8_3DSTATE_MULTISAMPLE() */
+   dw[1] = rs->sample[0];
+
+   /* see sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN() */
+   dw[2] = (sample_count >= 4) ? packed[0] : 0;
    if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
-      dw[3] = dw3;
+      dw[3] = (sample_count >= 8) ? packed[1] : 0;
 }
 
 static inline void
 gen8_3DSTATE_MULTISAMPLE(struct ilo_builder *builder,
-                         int num_samples,
-                         bool pixel_location_center)
+                         const struct ilo_state_raster *rs)
 {
    const uint8_t cmd_len = 2;
-   uint32_t dw1, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   dw1 = (pixel_location_center) ? GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER :
-      GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER;
-
-   switch (num_samples) {
-   case 0:
-   case 1:
-      dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
-      break;
-   case 2:
-      dw1 |= GEN8_MULTISAMPLE_DW1_NUMSAMPLES_2;
-      break;
-   case 4:
-      dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
-      break;
-   case 8:
-      dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
-      break;
-   case 16:
-      dw1 |= GEN8_MULTISAMPLE_DW1_NUMSAMPLES_16;
-      break;
-   default:
-      assert(!"unsupported sample count");
-      dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
-      break;
-   }
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | (cmd_len - 2);
-   dw[1] = dw1;
+   /* see raster_set_gen8_3DSTATE_MULTISAMPLE() */
+   dw[1] = rs->sample[0];
 }
 
 static inline void
 gen8_3DSTATE_SAMPLE_PATTERN(struct ilo_builder *builder,
-                            const uint32_t *pattern_1x,
-                            const uint32_t *pattern_2x,
-                            const uint32_t *pattern_4x,
-                            const uint32_t *pattern_8x,
-                            const uint32_t *pattern_16x)
+                            const struct ilo_state_sample_pattern *pattern)
 {
    const uint8_t cmd_len = 9;
    uint32_t *dw;
@@ -966,61 +502,32 @@
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_SAMPLE_PATTERN) | (cmd_len - 2);
-   dw[1] = pattern_16x[3];
-   dw[2] = pattern_16x[2];
-   dw[3] = pattern_16x[1];
-   dw[4] = pattern_16x[0];
-   dw[5] = pattern_8x[1];
-   dw[6] = pattern_8x[0];
-   dw[7] = pattern_4x[0];
-   dw[8] = pattern_1x[0] << 16 |
-           pattern_2x[0];
+   dw[1] = 0;
+   dw[2] = 0;
+   dw[3] = 0;
+   dw[4] = 0;
+   /* see sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN() */
+   dw[5] = ((const uint32_t *) pattern->pattern_8x)[1];
+   dw[6] = ((const uint32_t *) pattern->pattern_8x)[0];
+   dw[7] = ((const uint32_t *) pattern->pattern_4x)[0];
+   dw[8] = pattern->pattern_1x[0] << 16 |
+           ((const uint16_t *) pattern->pattern_2x)[0];
 }
 
 static inline void
 gen6_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder,
-                         unsigned sample_mask)
-{
-   const uint8_t cmd_len = 2;
-   const unsigned valid_mask = 0xf;
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(builder->dev, 6, 6);
-
-   sample_mask &= valid_mask;
-
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
-   dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | (cmd_len - 2);
-   dw[1] = sample_mask;
-}
-
-static inline void
-gen7_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder,
-                         unsigned sample_mask,
-                         int num_samples)
+                         const struct ilo_state_raster *rs)
 {
    const uint8_t cmd_len = 2;
-   const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
    uint32_t *dw;
 
-   ILO_DEV_ASSERT(builder->dev, 7, 8);
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 294:
-    *
-    *     "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
-    *      (Sample Mask) must be zero.
-    *
-    *      If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
-    *      must be zero."
-    */
-   sample_mask &= valid_mask;
+   ILO_DEV_ASSERT(builder->dev, 6, 8);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | (cmd_len - 2);
-   dw[1] = sample_mask;
+   /* see raster_set_gen6_3DSTATE_SAMPLE_MASK() */
+   dw[1] = rs->sample[1];
 }
 
 static inline void
@@ -1070,95 +577,75 @@
 
 static inline void
 gen6_3DSTATE_POLY_STIPPLE_OFFSET(struct ilo_builder *builder,
-                                 int x_offset, int y_offset)
+                                 const struct ilo_state_poly_stipple *stipple)
 {
    const uint8_t cmd_len = 2;
    uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 6, 8);
 
-   assert(x_offset >= 0 && x_offset <= 31);
-   assert(y_offset >= 0 && y_offset <= 31);
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_OFFSET) | (cmd_len - 2);
-   dw[1] = x_offset << 8 | y_offset;
+   /* constant */
+   dw[1] = 0;
 }
 
 static inline void
 gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_builder *builder,
-                                  const struct pipe_poly_stipple *pattern)
+                                  const struct ilo_state_poly_stipple *stipple)
 {
    const uint8_t cmd_len = 33;
    uint32_t *dw;
-   int i;
 
    ILO_DEV_ASSERT(builder->dev, 6, 8);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_PATTERN) | (cmd_len - 2);
-   dw++;
-
-   STATIC_ASSERT(Elements(pattern->stipple) == 32);
-   for (i = 0; i < 32; i++)
-      dw[i] = pattern->stipple[i];
+   /* see poly_stipple_set_gen6_3DSTATE_POLY_STIPPLE_PATTERN() */
+   memcpy(&dw[1], stipple->stipple, sizeof(stipple->stipple));
 }
 
 static inline void
 gen6_3DSTATE_LINE_STIPPLE(struct ilo_builder *builder,
-                          unsigned pattern, unsigned factor)
+                          const struct ilo_state_line_stipple *stipple)
 {
    const uint8_t cmd_len = 3;
-   unsigned inverse;
    uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 6, 8);
 
-   assert((pattern & 0xffff) == pattern);
-   assert(factor >= 1 && factor <= 256);
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_LINE_STIPPLE) | (cmd_len - 2);
-   dw[1] = pattern;
-
-   if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
-      /* in U1.16 */
-      inverse = 65536 / factor;
-
-      dw[2] = inverse << GEN7_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT |
-              factor;
-   }
-   else {
-      /* in U1.13 */
-      inverse = 8192 / factor;
-
-      dw[2] = inverse << GEN6_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT |
-              factor;
-   }
+   /* see line_stipple_set_gen6_3DSTATE_LINE_STIPPLE() */
+   dw[1] = stipple->stipple[0];
+   dw[2] = stipple->stipple[1];
 }
 
 static inline void
-gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder)
+gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder,
+                                const struct ilo_state_raster *rs)
 {
    const uint8_t cmd_len = 3;
-   const uint32_t dw[3] = {
-      GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) | (cmd_len - 2),
-      0 << GEN6_AA_LINE_DW1_BIAS__SHIFT | 0,
-      0 << GEN6_AA_LINE_DW2_CAP_BIAS__SHIFT | 0,
-   };
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 6, 8);
 
-   ilo_builder_batch_write(builder, cmd_len, dw);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) | (cmd_len - 2);
+   /* constant */
+   dw[1] = 0 << GEN6_AA_LINE_DW1_BIAS__SHIFT |
+           0 << GEN6_AA_LINE_DW1_SLOPE__SHIFT;
+   dw[2] = 0 << GEN6_AA_LINE_DW2_CAP_BIAS__SHIFT |
+           0 << GEN6_AA_LINE_DW2_CAP_SLOPE__SHIFT;
 }
 
 static inline void
 gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
-                          const struct ilo_zs_surface *zs,
-                          bool aligned_8x4)
+                          const struct ilo_state_zs *zs)
 {
    const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
       GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) :
@@ -1172,44 +659,51 @@
    pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = cmd | (cmd_len - 2);
-   dw[1] = zs->payload[0];
-   dw[2] = 0;
 
-   /* see ilo_gpe_init_zs_surface() */
+   /*
+    * see zs_set_gen6_3DSTATE_DEPTH_BUFFER() and
+    * zs_set_gen7_3DSTATE_DEPTH_BUFFER()
+    */
    if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
+      dw[1] = zs->depth[0];
+      dw[2] = 0;
       dw[3] = 0;
-      dw[4] = (aligned_8x4) ? zs->dw_aligned_8x4 : zs->payload[2];
-      dw[5] = zs->payload[3];
-      dw[6] = zs->payload[4];
-      dw[7] = zs->payload[5];
+      dw[4] = zs->depth[2];
+      dw[5] = zs->depth[3];
+      dw[6] = 0;
+      dw[7] = zs->depth[4];
 
       dw[5] |= builder->mocs << GEN8_DEPTH_DW5_MOCS__SHIFT;
 
-      if (zs->bo) {
-         ilo_builder_batch_reloc64(builder, pos + 2, zs->bo,
-               zs->payload[1], INTEL_RELOC_WRITE);
+      if (zs->z_vma) {
+         ilo_builder_batch_reloc64(builder, pos + 2, zs->z_vma->bo,
+               zs->z_vma->bo_offset + zs->depth[1],
+               (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
       }
    } else {
-      dw[3] = (aligned_8x4) ? zs->dw_aligned_8x4 : zs->payload[2];
-      dw[4] = zs->payload[3];
-      dw[5] = zs->payload[4];
-      dw[6] = zs->payload[5];
+      dw[1] = zs->depth[0];
+      dw[2] = 0;
+      dw[3] = zs->depth[2];
+      dw[4] = zs->depth[3];
+      dw[5] = 0;
+      dw[6] = zs->depth[4];
 
       if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
          dw[4] |= builder->mocs << GEN7_DEPTH_DW4_MOCS__SHIFT;
       else
          dw[6] |= builder->mocs << GEN6_DEPTH_DW6_MOCS__SHIFT;
 
-      if (zs->bo) {
-         ilo_builder_batch_reloc(builder, pos + 2, zs->bo,
-               zs->payload[1], INTEL_RELOC_WRITE);
+      if (zs->z_vma) {
+         ilo_builder_batch_reloc(builder, pos + 2, zs->z_vma->bo,
+               zs->z_vma->bo_offset + zs->depth[1],
+               (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
       }
    }
 }
 
 static inline void
 gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
-                            const struct ilo_zs_surface *zs)
+                            const struct ilo_state_zs *zs)
 {
    const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
       GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) :
@@ -1223,33 +717,38 @@
    pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = cmd | (cmd_len - 2);
-   /* see ilo_gpe_init_zs_surface() */
-   dw[1] = zs->payload[6];
-   dw[2] = 0;
 
+   /* see zs_set_gen6_3DSTATE_STENCIL_BUFFER() */
    if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
-      dw[1] |= builder->mocs << GEN8_STENCIL_DW1_MOCS__SHIFT;
-
+      dw[1] = zs->stencil[0];
+      dw[2] = 0;
       dw[3] = 0;
-      dw[4] = zs->payload[8];
+      dw[4] = zs->stencil[2];
 
-      if (zs->separate_s8_bo) {
-         ilo_builder_batch_reloc64(builder, pos + 2,
-               zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE);
+      dw[1] |= builder->mocs << GEN8_STENCIL_DW1_MOCS__SHIFT;
+
+      if (zs->s_vma) {
+         ilo_builder_batch_reloc64(builder, pos + 2, zs->s_vma->bo,
+               zs->s_vma->bo_offset + zs->stencil[1],
+               (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
       }
    } else {
+      dw[1] = zs->stencil[0];
+      dw[2] = 0;
+
       dw[1] |= builder->mocs << GEN6_STENCIL_DW1_MOCS__SHIFT;
 
-      if (zs->separate_s8_bo) {
-         ilo_builder_batch_reloc(builder, pos + 2,
-               zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE);
+      if (zs->s_vma) {
+         ilo_builder_batch_reloc(builder, pos + 2, zs->s_vma->bo,
+               zs->s_vma->bo_offset + zs->stencil[1],
+               (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
       }
    }
 }
 
 static inline void
 gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
-                               const struct ilo_zs_surface *zs)
+                               const struct ilo_state_zs *zs)
 {
    const uint32_t cmd = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ?
       GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) :
@@ -1263,26 +762,31 @@
    pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = cmd | (cmd_len - 2);
-   /* see ilo_gpe_init_zs_surface() */
-   dw[1] = zs->payload[9];
-   dw[2] = 0;
 
+   /* see zs_set_gen6_3DSTATE_HIER_DEPTH_BUFFER() */
    if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
-      dw[1] |= builder->mocs << GEN8_HIZ_DW1_MOCS__SHIFT;
-
+      dw[1] = zs->hiz[0];
+      dw[2] = 0;
       dw[3] = 0;
-      dw[4] = zs->payload[11];
+      dw[4] = zs->hiz[2];
 
-      if (zs->hiz_bo) {
-         ilo_builder_batch_reloc64(builder, pos + 2,
-               zs->hiz_bo, zs->payload[10], INTEL_RELOC_WRITE);
+      dw[1] |= builder->mocs << GEN8_HIZ_DW1_MOCS__SHIFT;
+
+      if (zs->hiz_vma) {
+         ilo_builder_batch_reloc64(builder, pos + 2, zs->hiz_vma->bo,
+               zs->hiz_vma->bo_offset + zs->hiz[1],
+               (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
       }
    } else {
+      dw[1] = zs->hiz[0];
+      dw[2] = 0;
+
       dw[1] |= builder->mocs << GEN6_HIZ_DW1_MOCS__SHIFT;
 
-      if (zs->hiz_bo) {
-         ilo_builder_batch_reloc(builder, pos + 2,
-               zs->hiz_bo, zs->payload[10], INTEL_RELOC_WRITE);
+      if (zs->hiz_vma) {
+         ilo_builder_batch_reloc(builder, pos + 2, zs->hiz_vma->bo,
+               zs->hiz_vma->bo_offset + zs->hiz[1],
+               (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
       }
    }
 }
@@ -1440,34 +944,24 @@
 
 static inline uint32_t
 gen6_CLIP_VIEWPORT(struct ilo_builder *builder,
-                   const struct ilo_viewport_cso *viewports,
-                   unsigned num_viewports)
+                   const struct ilo_state_viewport *vp)
 {
    const int state_align = 32;
-   const int state_len = 4 * num_viewports;
+   const int state_len = 4 * vp->count;
    uint32_t state_offset, *dw;
-   unsigned i;
+   int i;
 
    ILO_DEV_ASSERT(builder->dev, 6, 6);
 
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 193:
-    *
-    *     "The viewport-related state is stored as an array of up to 16
-    *      elements..."
-    */
-   assert(num_viewports && num_viewports <= 16);
-
    state_offset = ilo_builder_dynamic_pointer(builder,
          ILO_BUILDER_ITEM_CLIP_VIEWPORT, state_align, state_len, &dw);
 
-   for (i = 0; i < num_viewports; i++) {
-      const struct ilo_viewport_cso *vp = &viewports[i];
-
-      dw[0] = fui(vp->min_gbx);
-      dw[1] = fui(vp->max_gbx);
-      dw[2] = fui(vp->min_gby);
-      dw[3] = fui(vp->max_gby);
+   for (i = 0; i < vp->count; i++) {
+      /* see viewport_matrix_set_gen7_SF_CLIP_VIEWPORT() */
+      dw[0] = vp->sf_clip[i][8];
+      dw[1] = vp->sf_clip[i][9];
+      dw[2] = vp->sf_clip[i][10];
+      dw[3] = vp->sf_clip[i][11];
 
       dw += 4;
    }
@@ -1477,38 +971,21 @@
 
 static inline uint32_t
 gen6_SF_VIEWPORT(struct ilo_builder *builder,
-                 const struct ilo_viewport_cso *viewports,
-                 unsigned num_viewports)
+                 const struct ilo_state_viewport *vp)
 {
    const int state_align = 32;
-   const int state_len = 8 * num_viewports;
+   const int state_len = 8 * vp->count;
    uint32_t state_offset, *dw;
-   unsigned i;
+   int i;
 
    ILO_DEV_ASSERT(builder->dev, 6, 6);
 
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 262:
-    *
-    *     "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
-    *      stored as an array of up to 16 elements..."
-    */
-   assert(num_viewports && num_viewports <= 16);
-
    state_offset = ilo_builder_dynamic_pointer(builder,
          ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw);
 
-   for (i = 0; i < num_viewports; i++) {
-      const struct ilo_viewport_cso *vp = &viewports[i];
-
-      dw[0] = fui(vp->m00);
-      dw[1] = fui(vp->m11);
-      dw[2] = fui(vp->m22);
-      dw[3] = fui(vp->m30);
-      dw[4] = fui(vp->m31);
-      dw[5] = fui(vp->m32);
-      dw[6] = 0;
-      dw[7] = 0;
+   for (i = 0; i < vp->count; i++) {
+      /* see viewport_matrix_set_gen7_SF_CLIP_VIEWPORT() */
+      memcpy(dw, vp->sf_clip[i], sizeof(*dw) * 8);
 
       dw += 8;
    }
@@ -1518,298 +995,103 @@
 
 static inline uint32_t
 gen7_SF_CLIP_VIEWPORT(struct ilo_builder *builder,
-                      const struct ilo_viewport_cso *viewports,
-                      unsigned num_viewports)
+                      const struct ilo_state_viewport *vp)
 {
    const int state_align = 64;
-   const int state_len = 16 * num_viewports;
-   uint32_t state_offset, *dw;
-   unsigned i;
+   const int state_len = 16 * vp->count;
 
    ILO_DEV_ASSERT(builder->dev, 7, 8);
 
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 270:
-    *
-    *     "The viewport-specific state used by both the SF and CL units
-    *      (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
-    *      of which contains the DWords described below. The start of each
-    *      element is spaced 16 DWords apart. The location of first element of
-    *      the array, as specified by both Pointer to SF_VIEWPORT and Pointer
-    *      to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
-    */
-   assert(num_viewports && num_viewports <= 16);
-
-   state_offset = ilo_builder_dynamic_pointer(builder,
-         ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw);
-
-   for (i = 0; i < num_viewports; i++) {
-      const struct ilo_viewport_cso *vp = &viewports[i];
-
-      dw[0] = fui(vp->m00);
-      dw[1] = fui(vp->m11);
-      dw[2] = fui(vp->m22);
-      dw[3] = fui(vp->m30);
-      dw[4] = fui(vp->m31);
-      dw[5] = fui(vp->m32);
-      dw[6] = 0;
-      dw[7] = 0;
-
-      dw[8] = fui(vp->min_gbx);
-      dw[9] = fui(vp->max_gbx);
-      dw[10] = fui(vp->min_gby);
-      dw[11] = fui(vp->max_gby);
-
-      if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
-         dw[12] = fui(vp->min_x);
-         dw[13] = fui(vp->max_x - 1.0f);
-         dw[14] = fui(vp->min_y);
-         dw[15] = fui(vp->max_y - 1.0f);
-      } else {
-         dw[12] = 0;
-         dw[13] = 0;
-         dw[14] = 0;
-         dw[15] = 0;
-      }
-
-      dw += 16;
-   }
-
-   return state_offset;
+   /* see viewport_matrix_set_gen7_SF_CLIP_VIEWPORT() */
+   return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_SF_VIEWPORT,
+         state_align, state_len, (const uint32_t *) vp->sf_clip);
 }
 
 static inline uint32_t
 gen6_CC_VIEWPORT(struct ilo_builder *builder,
-                 const struct ilo_viewport_cso *viewports,
-                 unsigned num_viewports)
+                 const struct ilo_state_viewport *vp)
 {
    const int state_align = 32;
-   const int state_len = 2 * num_viewports;
-   uint32_t state_offset, *dw;
-   unsigned i;
+   const int state_len = 2 * vp->count;
 
    ILO_DEV_ASSERT(builder->dev, 6, 8);
 
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 385:
-    *
-    *     "The viewport state is stored as an array of up to 16 elements..."
-    */
-   assert(num_viewports && num_viewports <= 16);
-
-   state_offset = ilo_builder_dynamic_pointer(builder,
-         ILO_BUILDER_ITEM_CC_VIEWPORT, state_align, state_len, &dw);
-
-   for (i = 0; i < num_viewports; i++) {
-      const struct ilo_viewport_cso *vp = &viewports[i];
-
-      dw[0] = fui(vp->min_z);
-      dw[1] = fui(vp->max_z);
-
-      dw += 2;
-   }
-
-   return state_offset;
+   /* see viewport_matrix_set_gen6_CC_VIEWPORT() */
+   return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_CC_VIEWPORT,
+         state_align, state_len, (const uint32_t *) vp->cc);
 }
 
 static inline uint32_t
 gen6_SCISSOR_RECT(struct ilo_builder *builder,
-                  const struct ilo_scissor_state *scissor,
-                  unsigned num_viewports)
+                  const struct ilo_state_viewport *vp)
 {
    const int state_align = 32;
-   const int state_len = 2 * num_viewports;
+   const int state_len = 2 * vp->count;
 
    ILO_DEV_ASSERT(builder->dev, 6, 8);
 
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 263:
-    *
-    *     "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
-    *      stored as an array of up to 16 elements..."
-    */
-   assert(num_viewports && num_viewports <= 16);
-   assert(Elements(scissor->payload) >= state_len);
-
+   /* see viewport_scissor_set_gen6_SCISSOR_RECT() */
    return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_SCISSOR_RECT,
-         state_align, state_len, scissor->payload);
+         state_align, state_len, (const uint32_t *) vp->scissor);
 }
 
 static inline uint32_t
 gen6_COLOR_CALC_STATE(struct ilo_builder *builder,
-                      const struct pipe_stencil_ref *stencil_ref,
-                      ubyte alpha_ref,
-                      const struct pipe_blend_color *blend_color)
+                      const struct ilo_state_cc *cc)
 {
    const int state_align = 64;
    const int state_len = 6;
-   uint32_t state_offset, *dw;
 
    ILO_DEV_ASSERT(builder->dev, 6, 8);
 
-   state_offset = ilo_builder_dynamic_pointer(builder,
-         ILO_BUILDER_ITEM_COLOR_CALC, state_align, state_len, &dw);
-
-   dw[0] = stencil_ref->ref_value[0] << 24 |
-           stencil_ref->ref_value[1] << 16 |
-           GEN6_CC_DW0_ALPHATEST_UNORM8;
-   dw[1] = alpha_ref;
-   dw[2] = fui(blend_color->color[0]);
-   dw[3] = fui(blend_color->color[1]);
-   dw[4] = fui(blend_color->color[2]);
-   dw[5] = fui(blend_color->color[3]);
-
-   return state_offset;
+   /* see cc_params_set_gen6_COLOR_CALC_STATE() */
+   return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_COLOR_CALC,
+         state_align, state_len, cc->cc);
 }
 
 static inline uint32_t
 gen6_DEPTH_STENCIL_STATE(struct ilo_builder *builder,
-                         const struct ilo_dsa_state *dsa)
+                         const struct ilo_state_cc *cc)
 {
    const int state_align = 64;
    const int state_len = 3;
 
    ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
-   STATIC_ASSERT(Elements(dsa->payload) >= state_len);
-
+   /* see cc_set_gen6_DEPTH_STENCIL_STATE() */
    return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_DEPTH_STENCIL,
-         state_align, state_len, dsa->payload);
+         state_align, state_len, cc->ds);
 }
 
 static inline uint32_t
 gen6_BLEND_STATE(struct ilo_builder *builder,
-                 const struct ilo_blend_state *blend,
-                 const struct ilo_fb_state *fb,
-                 const struct ilo_dsa_state *dsa)
+                 const struct ilo_state_cc *cc)
 {
    const int state_align = 64;
-   int state_len;
-   uint32_t state_offset, *dw;
-   unsigned num_targets, i;
+   const int state_len = 2 * cc->blend_state_count;
 
    ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 376:
-    *
-    *     "The blend state is stored as an array of up to 8 elements..."
-    */
-   num_targets = fb->state.nr_cbufs;
-   assert(num_targets <= 8);
-
-   if (!num_targets) {
-      if (!dsa->dw_blend_alpha)
-         return 0;
-      /* to be able to reference alpha func */
-      num_targets = 1;
-   }
-
-   state_len = 2 * num_targets;
-
-   state_offset = ilo_builder_dynamic_pointer(builder,
-         ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw);
-
-   for (i = 0; i < num_targets; i++) {
-      const struct ilo_blend_cso *cso = &blend->cso[i];
-
-      dw[0] = cso->payload[0];
-      dw[1] = cso->payload[1] | blend->dw_shared;
-
-      if (i < fb->state.nr_cbufs && fb->state.cbufs[i]) {
-         const struct ilo_fb_blend_caps *caps = &fb->blend_caps[i];
-
-         if (caps->can_blend) {
-            if (caps->dst_alpha_forced_one)
-               dw[0] |= cso->dw_blend_dst_alpha_forced_one;
-            else
-               dw[0] |= cso->dw_blend;
-         }
-
-         if (caps->can_logicop)
-            dw[1] |= blend->dw_logicop;
-
-         if (caps->can_alpha_test)
-            dw[1] |= dsa->dw_blend_alpha;
-      } else {
-         dw[1] |= GEN6_RT_DW1_WRITE_DISABLE_A |
-                  GEN6_RT_DW1_WRITE_DISABLE_R |
-                  GEN6_RT_DW1_WRITE_DISABLE_G |
-                  GEN6_RT_DW1_WRITE_DISABLE_B |
-                  dsa->dw_blend_alpha;
-      }
-
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 356:
-       *
-       *     "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
-       *      Dither both must be disabled."
-       *
-       * There is no such limitation on GEN7, or for AlphaToOne.  But GL
-       * requires that anyway.
-       */
-      if (fb->num_samples > 1)
-         dw[1] |= blend->dw_alpha_mod;
-
-      dw += 2;
-   }
+   if (!state_len)
+      return 0;
 
-   return state_offset;
+   /* see cc_set_gen6_BLEND_STATE() */
+   return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLEND,
+         state_align, state_len, cc->blend);
 }
 
 static inline uint32_t
 gen8_BLEND_STATE(struct ilo_builder *builder,
-                 const struct ilo_blend_state *blend,
-                 const struct ilo_fb_state *fb,
-                 const struct ilo_dsa_state *dsa)
+                 const struct ilo_state_cc *cc)
 {
    const int state_align = 64;
-   const int state_len = 1 + 2 * fb->state.nr_cbufs;
-   uint32_t state_offset, *dw;
-   unsigned i;
+   const int state_len = 1 + 2 * cc->blend_state_count;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   assert(fb->state.nr_cbufs <= 8);
-
-   state_offset = ilo_builder_dynamic_pointer(builder,
-         ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw);
-
-   dw[0] = blend->dw_shared;
-   if (fb->num_samples > 1)
-      dw[0] |= blend->dw_alpha_mod;
-   if (!fb->state.nr_cbufs || fb->blend_caps[0].can_alpha_test)
-      dw[0] |= dsa->dw_blend_alpha;
-   dw++;
-
-   for (i = 0; i < fb->state.nr_cbufs; i++) {
-      const struct ilo_fb_blend_caps *caps = &fb->blend_caps[i];
-      const struct ilo_blend_cso *cso = &blend->cso[i];
-
-      dw[0] = cso->payload[0];
-      dw[1] = cso->payload[1];
-
-      if (fb->state.cbufs[i]) {
-         if (caps->can_blend) {
-            if (caps->dst_alpha_forced_one)
-               dw[0] |= cso->dw_blend_dst_alpha_forced_one;
-            else
-               dw[0] |= cso->dw_blend;
-         }
-
-         if (caps->can_logicop)
-            dw[1] |= blend->dw_logicop;
-      } else {
-         dw[0] |= GEN8_RT_DW0_WRITE_DISABLE_A |
-                  GEN8_RT_DW0_WRITE_DISABLE_R |
-                  GEN8_RT_DW0_WRITE_DISABLE_G |
-                  GEN8_RT_DW0_WRITE_DISABLE_B;
-      }
-
-      dw += 2;
-   }
-
-   return state_offset;
+   /* see cc_set_gen8_BLEND_STATE() */
+   return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLEND,
+         state_align, state_len, &cc->blend[1]);
 }
 
 #endif /* ILO_BUILDER_3D_BOTTOM_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_builder_3d.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_builder_3d.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_builder_3d.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_builder_3d.h	2015-09-16 14:36:09.000000000 +0000
@@ -35,45 +35,45 @@
 #include "ilo_builder_3d_top.h"
 #include "ilo_builder_3d_bottom.h"
 
+struct gen6_3dprimitive_info {
+   enum gen_3dprim_type topology;
+   bool indexed;
+
+   uint32_t vertex_count;
+   uint32_t vertex_start;
+   uint32_t instance_count;
+   uint32_t instance_start;
+   int32_t vertex_base;
+};
+
 static inline void
 gen6_3DPRIMITIVE(struct ilo_builder *builder,
-                 const struct pipe_draw_info *info,
-                 const struct ilo_ib_state *ib)
+                 const struct gen6_3dprimitive_info *info)
 {
    const uint8_t cmd_len = 6;
-   const int prim = gen6_3d_translate_pipe_prim(info->mode);
-   const int vb_access = (info->indexed) ?
-      GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL;
-   const uint32_t vb_start = info->start +
-      ((info->indexed) ? ib->draw_start_offset : 0);
    uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 6, 6);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
-   dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) |
-           vb_access |
-           prim << GEN6_3DPRIM_DW0_TYPE__SHIFT |
-           (cmd_len - 2);
-   dw[1] = info->count;
-   dw[2] = vb_start;
+   dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2) |
+           info->topology << GEN6_3DPRIM_DW0_TYPE__SHIFT;
+   if (info->indexed)
+      dw[0] |= GEN6_3DPRIM_DW0_ACCESS_RANDOM;
+
+   dw[1] = info->vertex_count;
+   dw[2] = info->vertex_start;
    dw[3] = info->instance_count;
-   dw[4] = info->start_instance;
-   dw[5] = info->index_bias;
+   dw[4] = info->instance_start;
+   dw[5] = info->vertex_base;
 }
 
 static inline void
 gen7_3DPRIMITIVE(struct ilo_builder *builder,
-                 const struct pipe_draw_info *info,
-                 const struct ilo_ib_state *ib)
+                 const struct gen6_3dprimitive_info *info)
 {
    const uint8_t cmd_len = 7;
-   const int prim = gen6_3d_translate_pipe_prim(info->mode);
-   const int vb_access = (info->indexed) ?
-      GEN7_3DPRIM_DW1_ACCESS_RANDOM : GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL;
-   const uint32_t vb_start = info->start +
-      ((info->indexed) ? ib->draw_start_offset : 0);
    uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 7, 8);
@@ -81,12 +81,16 @@
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2);
-   dw[1] = vb_access | prim;
-   dw[2] = info->count;
-   dw[3] = vb_start;
+
+   dw[1] = info->topology << GEN7_3DPRIM_DW1_TYPE__SHIFT;
+   if (info->indexed)
+      dw[1] |= GEN7_3DPRIM_DW1_ACCESS_RANDOM;
+
+   dw[2] = info->vertex_count;
+   dw[3] = info->vertex_start;
    dw[4] = info->instance_count;
-   dw[5] = info->start_instance;
-   dw[6] = info->index_bias;
+   dw[5] = info->instance_start;
+   dw[6] = info->vertex_base;
 }
 
 #endif /* ILO_BUILDER_3D_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,303 +29,168 @@
 #define ILO_BUILDER_3D_TOP_H
 
 #include "genhw/genhw.h"
-#include "../ilo_resource.h"
-#include "../ilo_shader.h"
 #include "intel_winsys.h"
 
 #include "ilo_core.h"
 #include "ilo_dev.h"
-#include "ilo_state_3d.h"
+#include "ilo_state_sampler.h"
+#include "ilo_state_shader.h"
+#include "ilo_state_sol.h"
+#include "ilo_state_surface.h"
+#include "ilo_state_urb.h"
+#include "ilo_state_vf.h"
+#include "ilo_vma.h"
 #include "ilo_builder.h"
 
 static inline void
 gen6_3DSTATE_URB(struct ilo_builder *builder,
-                 int vs_total_size, int gs_total_size,
-                 int vs_entry_size, int gs_entry_size)
+                 const struct ilo_state_urb *urb)
 {
    const uint8_t cmd_len = 3;
-   const int row_size = 128; /* 1024 bits */
-   int vs_alloc_size, gs_alloc_size;
-   int vs_num_entries, gs_num_entries;
    uint32_t *dw;
 
-   ILO_DEV_ASSERT(builder->dev, 6, 6);
-
-   /* in 1024-bit URB rows */
-   vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
-   gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
-
-   /* the valid range is [1, 5] */
-   if (!vs_alloc_size)
-      vs_alloc_size = 1;
-   if (!gs_alloc_size)
-      gs_alloc_size = 1;
-   assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
-
-   /* the valid range is [24, 256] in multiples of 4 */
-   vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
-   if (vs_num_entries > 256)
-      vs_num_entries = 256;
-   assert(vs_num_entries >= 24);
-
-   /* the valid range is [0, 256] in multiples of 4 */
-   gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
-   if (gs_num_entries > 256)
-      gs_num_entries = 256;
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2);
-   dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
-           vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
-   dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
-           (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
+   /* see urb_set_gen6_3DSTATE_URB() */
+   dw[1] = urb->urb[0];
+   dw[2] = urb->urb[1];
 }
 
 static inline void
-gen7_3dstate_push_constant_alloc(struct ilo_builder *builder,
-                                 int subop, int offset, int size)
+gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
+                                    const struct ilo_state_urb *urb)
 {
-   const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
-                        GEN6_RENDER_SUBTYPE_3D |
-                        subop;
    const uint8_t cmd_len = 2;
-   const int slice_count = ((ilo_dev_gen(builder->dev) == ILO_GEN(7.5) &&
-                             builder->dev->gt == 3) ||
-                            ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 2 : 1;
    uint32_t *dw;
-   int end;
-
-   ILO_DEV_ASSERT(builder->dev, 7, 8);
-
-   /* VS, HS, DS, GS, and PS variants */
-   assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS &&
-          subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS);
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 68:
-    *
-    *     "(A table that says the maximum size of each constant buffer is
-    *      16KB")
-    *
-    * From the Ivy Bridge PRM, volume 2 part 1, page 115:
-    *
-    *     "The sum of the Constant Buffer Offset and the Constant Buffer Size
-    *      may not exceed the maximum value of the Constant Buffer Size."
-    *
-    * Thus, the valid range of buffer end is [0KB, 16KB].
-    */
-   end = (offset + size) / 1024;
-   if (end > 16 * slice_count) {
-      assert(!"invalid constant buffer end");
-      end = 16 * slice_count;
-   }
-
-   /* the valid range of buffer offset is [0KB, 15KB] */
-   offset = (offset + 1023) / 1024;
-   if (offset > 15 * slice_count) {
-      assert(!"invalid constant buffer offset");
-      offset = 15 * slice_count;
-   }
-
-   if (offset > end) {
-      assert(!size);
-      offset = end;
-   }
-
-   /* the valid range of buffer size is [0KB, 15KB] */
-   size = end - offset;
-   if (size > 15 * slice_count) {
-      assert(!"invalid constant buffer size");
-      size = 15 * slice_count;
-   }
-
-   assert(offset % slice_count == 0 && size % slice_count == 0);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
-   dw[0] = cmd | (cmd_len - 2);
-   dw[1] = offset << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT |
-           size;
-}
-
-static inline void
-gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
-                                    int offset, int size)
-{
-   gen7_3dstate_push_constant_alloc(builder,
-         GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size);
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_VS) |
+           (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->pcb[0];
 }
 
 static inline void
 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder,
-                                    int offset, int size)
+                                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_push_constant_alloc(builder,
-         GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size);
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
+
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_HS) |
+           (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->pcb[1];
 }
 
 static inline void
 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder,
-                                    int offset, int size)
+                                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_push_constant_alloc(builder,
-         GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size);
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
+
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_DS) |
+           (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->pcb[2];
 }
 
 static inline void
 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder,
-                                    int offset, int size)
+                                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_push_constant_alloc(builder,
-         GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size);
-}
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
 
-static inline void
-gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
-                                    int offset, int size)
-{
-   gen7_3dstate_push_constant_alloc(builder,
-         GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size);
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_GS) |
+           (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->pcb[3];
 }
 
 static inline void
-gen7_3dstate_urb(struct ilo_builder *builder,
-                 int subop, int offset, int size,
-                 int entry_size)
+gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
+                                    const struct ilo_state_urb *urb)
 {
-   const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
-                        GEN6_RENDER_SUBTYPE_3D |
-                        subop;
    const uint8_t cmd_len = 2;
-   const int row_size = 64; /* 512 bits */
-   int alloc_size, num_entries, min_entries, max_entries;
    uint32_t *dw;
 
-   ILO_DEV_ASSERT(builder->dev, 7, 8);
-
-   /* VS, HS, DS, and GS variants */
-   assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS &&
-          subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS);
-
-   /* in multiples of 8KB */
-   assert(offset % 8192 == 0);
-   offset /= 8192;
-
-   /* in multiple of 512-bit rows */
-   alloc_size = (entry_size + row_size - 1) / row_size;
-   if (!alloc_size)
-      alloc_size = 1;
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 34:
-    *
-    *     "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
-    *      cause performance to decrease due to banking in the URB. Element
-    *      sizes of 16 to 20 should be programmed with six 512-bit URB rows."
-    */
-   if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5)
-      alloc_size = 6;
-
-   /* in multiples of 8 */
-   num_entries = (size / row_size / alloc_size) & ~7;
-
-   switch (subop) {
-   case GEN7_RENDER_OPCODE_3DSTATE_URB_VS:
-      switch (ilo_dev_gen(builder->dev)) {
-      case ILO_GEN(8):
-         max_entries = 2560;
-         min_entries = 64;
-         break;
-      case ILO_GEN(7.5):
-         max_entries = (builder->dev->gt >= 2) ? 1664 : 640;
-         min_entries = (builder->dev->gt >= 2) ? 64 : 32;
-         break;
-      case ILO_GEN(7):
-      default:
-         max_entries = (builder->dev->gt == 2) ? 704 : 512;
-         min_entries = 32;
-         break;
-      }
-
-      assert(num_entries >= min_entries);
-      if (num_entries > max_entries)
-         num_entries = max_entries;
-      break;
-   case GEN7_RENDER_OPCODE_3DSTATE_URB_HS:
-      max_entries = (builder->dev->gt == 2) ? 64 : 32;
-      if (num_entries > max_entries)
-         num_entries = max_entries;
-      break;
-   case GEN7_RENDER_OPCODE_3DSTATE_URB_DS:
-      if (num_entries)
-         assert(num_entries >= 138);
-      break;
-   case GEN7_RENDER_OPCODE_3DSTATE_URB_GS:
-      switch (ilo_dev_gen(builder->dev)) {
-      case ILO_GEN(8):
-         max_entries = 960;
-         break;
-      case ILO_GEN(7.5):
-         max_entries = (builder->dev->gt >= 2) ? 640 : 256;
-         break;
-      case ILO_GEN(7):
-      default:
-         max_entries = (builder->dev->gt == 2) ? 320 : 192;
-         break;
-      }
-
-      if (num_entries > max_entries)
-         num_entries = max_entries;
-      break;
-   default:
-      break;
-   }
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
-   dw[0] = cmd | (cmd_len - 2);
-   dw[1] = offset << GEN7_URB_DW1_OFFSET__SHIFT |
-           (alloc_size - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT |
-           num_entries;
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_PS) |
+           (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->pcb[4];
 }
 
 static inline void
 gen7_3DSTATE_URB_VS(struct ilo_builder *builder,
-                    int offset, int size, int entry_size)
+                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS,
-         offset, size, entry_size);
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
+
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_VS) | (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->urb[0];
 }
 
 static inline void
 gen7_3DSTATE_URB_HS(struct ilo_builder *builder,
-                    int offset, int size, int entry_size)
+                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS,
-         offset, size, entry_size);
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
+
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_HS) | (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->urb[1];
 }
 
 static inline void
 gen7_3DSTATE_URB_DS(struct ilo_builder *builder,
-                    int offset, int size, int entry_size)
+                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS,
-         offset, size, entry_size);
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
+
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_DS) | (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->urb[2];
 }
 
 static inline void
 gen7_3DSTATE_URB_GS(struct ilo_builder *builder,
-                    int offset, int size, int entry_size)
+                    const struct ilo_state_urb *urb)
 {
-   gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS,
-         offset, size, entry_size);
+   const uint8_t cmd_len = 2;
+   uint32_t *dw;
+
+   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_GS) | (cmd_len - 2);
+   /* see urb_set_gen7_3dstate_push_constant_alloc() */
+   dw[1] = urb->urb[3];
 }
 
 static inline void
 gen75_3DSTATE_VF(struct ilo_builder *builder,
-                 bool enable_cut_index,
-                 uint32_t cut_index)
+                 const struct ilo_state_vf *vf)
 {
    const uint8_t cmd_len = 2;
    uint32_t *dw;
@@ -334,11 +199,10 @@
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
-   dw[0] = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2);
-   if (enable_cut_index)
-      dw[0] |= GEN75_VF_DW0_CUT_INDEX_ENABLE;
-
-   dw[1] = cut_index;
+   /* see vf_params_set_gen75_3DSTATE_VF() */
+   dw[0] = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2) |
+           vf->cut[0];
+   dw[1] = vf->cut[1];
 }
 
 static inline void
@@ -354,40 +218,11 @@
    ilo_builder_batch_write(builder, cmd_len, &dw0);
 }
 
-/**
- * Translate a pipe primitive type to the matching hardware primitive type.
- */
-static inline int
-gen6_3d_translate_pipe_prim(unsigned prim)
-{
-   static const int prim_mapping[ILO_PRIM_MAX] = {
-      [PIPE_PRIM_POINTS]                     = GEN6_3DPRIM_POINTLIST,
-      [PIPE_PRIM_LINES]                      = GEN6_3DPRIM_LINELIST,
-      [PIPE_PRIM_LINE_LOOP]                  = GEN6_3DPRIM_LINELOOP,
-      [PIPE_PRIM_LINE_STRIP]                 = GEN6_3DPRIM_LINESTRIP,
-      [PIPE_PRIM_TRIANGLES]                  = GEN6_3DPRIM_TRILIST,
-      [PIPE_PRIM_TRIANGLE_STRIP]             = GEN6_3DPRIM_TRISTRIP,
-      [PIPE_PRIM_TRIANGLE_FAN]               = GEN6_3DPRIM_TRIFAN,
-      [PIPE_PRIM_QUADS]                      = GEN6_3DPRIM_QUADLIST,
-      [PIPE_PRIM_QUAD_STRIP]                 = GEN6_3DPRIM_QUADSTRIP,
-      [PIPE_PRIM_POLYGON]                    = GEN6_3DPRIM_POLYGON,
-      [PIPE_PRIM_LINES_ADJACENCY]            = GEN6_3DPRIM_LINELIST_ADJ,
-      [PIPE_PRIM_LINE_STRIP_ADJACENCY]       = GEN6_3DPRIM_LINESTRIP_ADJ,
-      [PIPE_PRIM_TRIANGLES_ADJACENCY]        = GEN6_3DPRIM_TRILIST_ADJ,
-      [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]   = GEN6_3DPRIM_TRISTRIP_ADJ,
-      [ILO_PRIM_RECTANGLES]                  = GEN6_3DPRIM_RECTLIST,
-   };
-
-   assert(prim_mapping[prim]);
-
-   return prim_mapping[prim];
-}
-
 static inline void
-gen8_3DSTATE_VF_TOPOLOGY(struct ilo_builder *builder, unsigned pipe_prim)
+gen8_3DSTATE_VF_TOPOLOGY(struct ilo_builder *builder,
+                         enum gen_3dprim_type topology)
 {
    const uint8_t cmd_len = 2;
-   const int prim = gen6_3d_translate_pipe_prim(pipe_prim);
    uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
@@ -395,12 +230,13 @@
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_TOPOLOGY) | (cmd_len - 2);
-   dw[1] = prim;
+   dw[1] = topology << GEN8_TOPOLOGY_DW1_TYPE__SHIFT;
 }
 
 static inline void
 gen8_3DSTATE_VF_INSTANCING(struct ilo_builder *builder,
-                           int vb_index, uint32_t step_rate)
+                           const struct ilo_state_vf *vf,
+                           uint32_t attr)
 {
    const uint8_t cmd_len = 3;
    uint32_t *dw;
@@ -410,16 +246,20 @@
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_INSTANCING) | (cmd_len - 2);
-   dw[1] = vb_index;
-   if (step_rate)
-      dw[1] |= GEN8_INSTANCING_DW1_ENABLE;
-   dw[2] = step_rate;
+   dw[1] = attr << GEN8_INSTANCING_DW1_VE_INDEX__SHIFT;
+   dw[2] = 0;
+   /* see vf_set_gen8_3DSTATE_VF_INSTANCING() */
+   if (attr >= vf->internal_ve_count) {
+      attr -= vf->internal_ve_count;
+
+      dw[1] |= vf->user_instancing[attr][0];
+      dw[2] |= vf->user_instancing[attr][1];
+   }
 }
 
 static inline void
 gen8_3DSTATE_VF_SGVS(struct ilo_builder *builder,
-                     bool vid_enable, int vid_ve, int vid_comp,
-                     bool iid_enable, int iid_ve, int iid_comp)
+                     const struct ilo_state_vf *vf)
 {
    const uint8_t cmd_len = 2;
    uint32_t *dw;
@@ -429,29 +269,19 @@
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_VF_SGVS) | (cmd_len - 2);
-   dw[1] = 0;
-
-   if (iid_enable) {
-      dw[1] |= GEN8_SGVS_DW1_IID_ENABLE |
-               vid_comp << GEN8_SGVS_DW1_IID_VE_COMP__SHIFT |
-               vid_ve << GEN8_SGVS_DW1_IID_VE_INDEX__SHIFT;
-   }
-
-   if (vid_enable) {
-      dw[1] |= GEN8_SGVS_DW1_VID_ENABLE |
-               vid_comp << GEN8_SGVS_DW1_VID_VE_COMP__SHIFT |
-               vid_ve << GEN8_SGVS_DW1_VID_VE_INDEX__SHIFT;
-   }
+   /* see vf_params_set_gen8_3DSTATE_VF_SGVS() */
+   dw[1] = vf->sgvs[0];
 }
 
 static inline void
 gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
-                            const struct ilo_ve_state *ve,
-                            const struct ilo_vb_state *vb)
+                            const struct ilo_state_vf *vf,
+                            const struct ilo_state_vertex_buffer *vb,
+                            unsigned vb_count)
 {
    uint8_t cmd_len;
    uint32_t *dw;
-   unsigned pos, hw_idx;
+   unsigned pos, i;
 
    ILO_DEV_ASSERT(builder->dev, 6, 8);
 
@@ -460,67 +290,56 @@
     *
     *     "From 1 to 33 VBs can be specified..."
     */
-   assert(ve->vb_count <= 33);
+   assert(vb_count <= 33);
 
-   if (!ve->vb_count)
+   if (!vb_count)
       return;
 
-   cmd_len = 1 + 4 * ve->vb_count;
+   cmd_len = 1 + 4 * vb_count;
    pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (cmd_len - 2);
    dw++;
    pos++;
 
-   for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
-      const unsigned instance_divisor = ve->instance_divisors[hw_idx];
-      const unsigned pipe_idx = ve->vb_mapping[hw_idx];
-      const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
+   for (i = 0; i < vb_count; i++) {
+      const struct ilo_state_vertex_buffer *b = &vb[i];
 
-      dw[0] = hw_idx << GEN6_VB_DW0_INDEX__SHIFT;
+      /* see vertex_buffer_set_gen8_vertex_buffer_state() */
+      dw[0] = b->vb[0] |
+              i << GEN6_VB_DW0_INDEX__SHIFT;
 
       if (ilo_dev_gen(builder->dev) >= ILO_GEN(8))
          dw[0] |= builder->mocs << GEN8_VB_DW0_MOCS__SHIFT;
       else
          dw[0] |= builder->mocs << GEN6_VB_DW0_MOCS__SHIFT;
 
-      if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
-         dw[0] |= GEN7_VB_DW0_ADDR_MODIFIED;
-
-      if (instance_divisor)
-         dw[0] |= GEN6_VB_DW0_ACCESS_INSTANCEDATA;
-      else
-         dw[0] |= GEN6_VB_DW0_ACCESS_VERTEXDATA;
-
-      /* use null vb if there is no buffer or the stride is out of range */
-      if (!cso->buffer || cso->stride > 2048) {
-         dw[0] |= GEN6_VB_DW0_IS_NULL;
-         dw[1] = 0;
-         dw[2] = 0;
-         dw[3] = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ?
-            0 : instance_divisor;
-
-         continue;
-      }
-
-      dw[0] |= cso->stride << GEN6_VB_DW0_PITCH__SHIFT;
+      dw[1] = 0;
+      dw[2] = 0;
+      dw[3] = 0;
 
       if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
-         const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
-         const uint32_t start_offset = cso->buffer_offset;
+         if (b->vma) {
+            ilo_builder_batch_reloc64(builder, pos + 1, b->vma->bo,
+                  b->vma->bo_offset + b->vb[1], 0);
+         }
 
-         ilo_builder_batch_reloc64(builder, pos + 1,
-               buf->bo, start_offset, 0);
-         dw[3] = buf->bo_size;
+         dw[3] |= b->vb[2];
       } else {
-         const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
-         const uint32_t start_offset = cso->buffer_offset;
-         const uint32_t end_offset = buf->bo_size - 1;
+         const int8_t elem = vf->vb_to_first_elem[i];
 
-         dw[3] = instance_divisor;
+         /* see vf_set_gen6_vertex_buffer_state() */
+         if (elem >= 0) {
+            dw[0] |= vf->user_instancing[elem][0];
+            dw[3] |= vf->user_instancing[elem][1];
+         }
 
-         ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
-         ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
+         if (b->vma) {
+            ilo_builder_batch_reloc(builder, pos + 1, b->vma->bo,
+                  b->vma->bo_offset + b->vb[1], 0);
+            ilo_builder_batch_reloc(builder, pos + 2, b->vma->bo,
+                  b->vma->bo_offset + b->vb[2], 0);
+         }
       }
 
       dw += 4;
@@ -563,248 +382,192 @@
 
 static inline void
 gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder,
-                             const struct ilo_ve_state *ve)
+                             const struct ilo_state_vf *vf)
 {
    uint8_t cmd_len;
    uint32_t *dw;
-   unsigned i;
 
    ILO_DEV_ASSERT(builder->dev, 6, 8);
 
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 92:
-    *
-    *    "At least one VERTEX_ELEMENT_STATE structure must be included."
-    *
-    * From the Sandy Bridge PRM, volume 2 part 1, page 93:
-    *
-    *     "Up to 34 (DevSNB+) vertex elements are supported."
-    */
-   assert(ve->count + ve->prepend_nosrc_cso >= 1);
-   assert(ve->count + ve->prepend_nosrc_cso <= 34);
-
-   STATIC_ASSERT(Elements(ve->cso[0].payload) == 2);
+   cmd_len = 1 + 2 * (vf->internal_ve_count + vf->user_ve_count);
 
-   cmd_len = 1 + 2 * (ve->count + ve->prepend_nosrc_cso);
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2);
    dw++;
 
-   if (ve->prepend_nosrc_cso) {
-      memcpy(dw, ve->nosrc_cso.payload, sizeof(ve->nosrc_cso.payload));
-      dw += 2;
-   }
-
-   for (i = 0; i < ve->count - ve->last_cso_edgeflag; i++) {
-      memcpy(dw, ve->cso[i].payload, sizeof(ve->cso[i].payload));
-      dw += 2;
+   /*
+    * see vf_params_set_gen6_internal_ve() and
+    * vf_set_gen6_3DSTATE_VERTEX_ELEMENTS()
+    */
+   if (vf->internal_ve_count) {
+      memcpy(dw, vf->internal_ve,
+            sizeof(vf->internal_ve[0]) * vf->internal_ve_count);
+      dw += 2 * vf->internal_ve_count;
    }
 
-   if (ve->last_cso_edgeflag)
-      memcpy(dw, ve->edgeflag_cso.payload, sizeof(ve->edgeflag_cso.payload));
+   memcpy(dw, vf->user_ve, sizeof(vf->user_ve[0]) * vf->user_ve_count);
 }
 
 static inline void
 gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
-                          const struct ilo_ib_state *ib,
-                          bool enable_cut_index)
+                          const struct ilo_state_vf *vf,
+                          const struct ilo_state_index_buffer *ib)
 {
    const uint8_t cmd_len = 3;
-   struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
-   uint32_t start_offset, end_offset;
-   int format;
-   uint32_t *dw;
+   uint32_t dw0, *dw;
    unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
-   if (!buf)
-      return;
-
-   /* this is moved to the new 3DSTATE_VF */
-   if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5))
-      assert(!enable_cut_index);
-
-   switch (ib->hw_index_size) {
-   case 4:
-      format = GEN6_IB_DW0_FORMAT_DWORD;
-      break;
-   case 2:
-      format = GEN6_IB_DW0_FORMAT_WORD;
-      break;
-   case 1:
-      format = GEN6_IB_DW0_FORMAT_BYTE;
-      break;
-   default:
-      assert(!"unknown index size");
-      format = GEN6_IB_DW0_FORMAT_BYTE;
-      break;
-   }
+   dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2) |
+         builder->mocs << GEN6_IB_DW0_MOCS__SHIFT;
 
    /*
-    * set start_offset to 0 here and adjust pipe_draw_info::start with
-    * ib->draw_start_offset in 3DPRIMITIVE
+    * see index_buffer_set_gen8_3DSTATE_INDEX_BUFFER() and
+    * vf_params_set_gen6_3dstate_index_buffer()
     */
-   start_offset = 0;
-   end_offset = buf->bo_size;
-
-   /* end_offset must also be aligned and is inclusive */
-   end_offset -= (end_offset % ib->hw_index_size);
-   end_offset--;
+   dw0 |= ib->ib[0];
+   if (ilo_dev_gen(builder->dev) <= ILO_GEN(7))
+      dw0 |= vf->cut[0];
 
    pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
-   dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2) |
-           builder->mocs << GEN6_IB_DW0_MOCS__SHIFT |
-           format;
-   if (enable_cut_index)
-      dw[0] |= GEN6_IB_DW0_CUT_INDEX_ENABLE;
-
-   ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
-   ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
+   dw[0] = dw0;
+   if (ib->vma) {
+      ilo_builder_batch_reloc(builder, pos + 1, ib->vma->bo,
+            ib->vma->bo_offset + ib->ib[1], 0);
+      ilo_builder_batch_reloc(builder, pos + 2, ib->vma->bo,
+            ib->vma->bo_offset + ib->ib[2], 0);
+   } else {
+      dw[1] = 0;
+      dw[2] = 0;
+   }
 }
 
 static inline void
 gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
-                          const struct ilo_ib_state *ib)
+                          const struct ilo_state_vf *vf,
+                          const struct ilo_state_index_buffer *ib)
 {
    const uint8_t cmd_len = 5;
-   struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
-   int format;
    uint32_t *dw;
    unsigned pos;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   if (!buf)
-      return;
-
-   switch (ib->hw_index_size) {
-   case 4:
-      format = GEN8_IB_DW1_FORMAT_DWORD;
-      break;
-   case 2:
-      format = GEN8_IB_DW1_FORMAT_WORD;
-      break;
-   case 1:
-      format = GEN8_IB_DW1_FORMAT_BYTE;
-      break;
-   default:
-      assert(!"unknown index size");
-      format = GEN8_IB_DW1_FORMAT_BYTE;
-      break;
-   }
-
    pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2);
-   dw[1] = format |
+   /* see index_buffer_set_gen8_3DSTATE_INDEX_BUFFER() */
+   dw[1] = ib->ib[0] |
            builder->mocs << GEN8_IB_DW1_MOCS__SHIFT;
-   dw[4] = buf->bo_size;
 
-   /* ignore ib->offset here in favor of adjusting 3DPRIMITIVE */
-   ilo_builder_batch_reloc64(builder, pos + 2, buf->bo, 0, 0);
+   if (ib->vma) {
+      ilo_builder_batch_reloc64(builder, pos + 2, ib->vma->bo,
+            ib->vma->bo_offset + ib->ib[1], 0);
+   } else {
+      dw[2] = 0;
+      dw[3] = 0;
+   }
+
+   dw[4] = ib->ib[2];
 }
 
 static inline void
 gen6_3DSTATE_VS(struct ilo_builder *builder,
-                const struct ilo_shader_state *vs)
+                const struct ilo_state_vs *vs,
+                uint32_t kernel_offset)
 {
    const uint8_t cmd_len = 6;
-   const struct ilo_shader_cso *cso;
-   uint32_t dw2, dw4, dw5, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
-   cso = ilo_shader_get_kernel_cso(vs);
-   dw2 = cso->payload[0];
-   dw4 = cso->payload[1];
-   dw5 = cso->payload[2];
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
-   dw[1] = ilo_shader_get_kernel_offset(vs);
-   dw[2] = dw2;
-   dw[3] = 0; /* scratch */
-   dw[4] = dw4;
-   dw[5] = dw5;
+   dw[1] = kernel_offset;
+   /* see vs_set_gen6_3DSTATE_VS() */
+   dw[2] = vs->vs[0];
+   dw[3] = vs->vs[1];
+   dw[4] = vs->vs[2];
+   dw[5] = vs->vs[3];
 }
 
 static inline void
 gen8_3DSTATE_VS(struct ilo_builder *builder,
-                const struct ilo_shader_state *vs,
-                uint32_t clip_plane_enable)
+                const struct ilo_state_vs *vs,
+                uint32_t kernel_offset)
 {
    const uint8_t cmd_len = 9;
-   const struct ilo_shader_cso *cso;
-   uint32_t dw3, dw6, dw7, dw8, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   cso = ilo_shader_get_kernel_cso(vs);
-   dw3 = cso->payload[0];
-   dw6 = cso->payload[1];
-   dw7 = cso->payload[2];
-   dw8 = clip_plane_enable << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
-   dw[1] = ilo_shader_get_kernel_offset(vs);
+   dw[1] = kernel_offset;
    dw[2] = 0;
-   dw[3] = dw3;
-   dw[4] = 0; /* scratch */
+   /* see vs_set_gen6_3DSTATE_VS() */
+   dw[3] = vs->vs[0];
+   dw[4] = vs->vs[1];
    dw[5] = 0;
-   dw[6] = dw6;
-   dw[7] = dw7;
-   dw[8] = dw8;
+   dw[6] = vs->vs[2];
+   dw[7] = vs->vs[3];
+   dw[8] = vs->vs[4];
 }
 
 static inline void
-gen6_disable_3DSTATE_VS(struct ilo_builder *builder)
+gen7_3DSTATE_HS(struct ilo_builder *builder,
+                const struct ilo_state_hs *hs,
+                uint32_t kernel_offset)
 {
-   const uint8_t cmd_len = 6;
+   const uint8_t cmd_len = 7;
    uint32_t *dw;
 
-   ILO_DEV_ASSERT(builder->dev, 6, 7.5);
+   ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
-   dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
-   dw[1] = 0;
-   dw[2] = 0;
-   dw[3] = 0;
-   dw[4] = 0;
-   dw[5] = 0;
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
+   /* see hs_set_gen7_3DSTATE_HS() */
+   dw[1] = hs->hs[0];
+   dw[2] = hs->hs[1];
+   dw[3] = kernel_offset;
+   dw[4] = hs->hs[2];
+   dw[5] = hs->hs[3];
+   dw[6] = 0;
 }
 
 static inline void
-gen7_disable_3DSTATE_HS(struct ilo_builder *builder)
+gen8_3DSTATE_HS(struct ilo_builder *builder,
+                const struct ilo_state_hs *hs,
+                uint32_t kernel_offset)
 {
-   const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 9 : 7;
+   const uint8_t cmd_len = 9;
    uint32_t *dw;
 
-   ILO_DEV_ASSERT(builder->dev, 7, 8);
+   ILO_DEV_ASSERT(builder->dev, 8, 8);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
-   dw[1] = 0;
-   dw[2] = 0;
-   dw[3] = 0;
+   /* see hs_set_gen7_3DSTATE_HS() */
+   dw[1] = hs->hs[0];
+   dw[2] = hs->hs[1];
+   dw[3] = kernel_offset;
    dw[4] = 0;
-   dw[5] = 0;
+   dw[5] = hs->hs[2];
    dw[6] = 0;
-   if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
-      dw[7] = 0;
-      dw[8] = 0;
-   }
+   dw[7] = hs->hs[3];
+   dw[8] = 0;
 }
 
 static inline void
-gen7_3DSTATE_TE(struct ilo_builder *builder)
+gen7_3DSTATE_TE(struct ilo_builder *builder,
+                const struct ilo_state_ds *ds)
 {
    const uint8_t cmd_len = 4;
    uint32_t *dw;
@@ -814,108 +577,61 @@
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_TE) | (cmd_len - 2);
-   dw[1] = 0;
-   dw[2] = 0;
-   dw[3] = 0;
+   /* see ds_set_gen7_3DSTATE_TE() */
+   dw[1] = ds->te[0];
+   dw[2] = ds->te[1];
+   dw[3] = ds->te[2];
 }
 
 static inline void
-gen7_disable_3DSTATE_DS(struct ilo_builder *builder)
+gen7_3DSTATE_DS(struct ilo_builder *builder,
+                const struct ilo_state_ds *ds,
+                uint32_t kernel_offset)
 {
-   const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 9 : 6;
+   const uint8_t cmd_len = 6;
    uint32_t *dw;
 
-   ILO_DEV_ASSERT(builder->dev, 7, 8);
+   ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
-   dw[1] = 0;
-   dw[2] = 0;
-   dw[3] = 0;
-   dw[4] = 0;
-   dw[5] = 0;
-   if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
-      dw[6] = 0;
-      dw[7] = 0;
-      dw[8] = 0;
-   }
-}
-
-static inline void
-gen6_3DSTATE_GS(struct ilo_builder *builder,
-                const struct ilo_shader_state *gs)
-{
-   const uint8_t cmd_len = 7;
-   const struct ilo_shader_cso *cso;
-   uint32_t dw2, dw4, dw5, dw6, *dw;
-
-   ILO_DEV_ASSERT(builder->dev, 6, 6);
-
-   cso = ilo_shader_get_kernel_cso(gs);
-   dw2 = cso->payload[0];
-   dw4 = cso->payload[1];
-   dw5 = cso->payload[2];
-   dw6 = cso->payload[3];
-
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
-
-   dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
-   dw[1] = ilo_shader_get_kernel_offset(gs);
-   dw[2] = dw2;
-   dw[3] = 0; /* scratch */
-   dw[4] = dw4;
-   dw[5] = dw5;
-   dw[6] = dw6;
+   /* see ds_set_gen7_3DSTATE_DS() */
+   dw[1] = kernel_offset;
+   dw[2] = ds->ds[0];
+   dw[3] = ds->ds[1];
+   dw[4] = ds->ds[2];
+   dw[5] = ds->ds[3];
 }
 
 static inline void
-gen6_so_3DSTATE_GS(struct ilo_builder *builder,
-                   const struct ilo_shader_state *vs,
-                   int verts_per_prim)
+gen8_3DSTATE_DS(struct ilo_builder *builder,
+                const struct ilo_state_ds *ds,
+                uint32_t kernel_offset)
 {
-   const uint8_t cmd_len = 7;
-   struct ilo_shader_cso cso;
-   enum ilo_kernel_param param;
-   uint32_t dw2, dw4, dw5, dw6, *dw;
-
-   ILO_DEV_ASSERT(builder->dev, 6, 6);
-
-   assert(ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO));
-
-   switch (verts_per_prim) {
-   case 1:
-      param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
-      break;
-   case 2:
-      param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
-      break;
-   default:
-      param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
-      break;
-   }
+   const uint8_t cmd_len = 9;
+   uint32_t *dw;
 
-   /* cannot use VS's CSO */
-   ilo_gpe_init_gs_cso(builder->dev, vs, &cso);
-   dw2 = cso.payload[0];
-   dw4 = cso.payload[1];
-   dw5 = cso.payload[2];
-   dw6 = cso.payload[3];
+   ILO_DEV_ASSERT(builder->dev, 8, 8);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
-   dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
-   dw[1] = ilo_shader_get_kernel_offset(vs) +
-           ilo_shader_get_kernel_param(vs, param);
-   dw[2] = dw2;
-   dw[3] = 0;
-   dw[4] = dw4;
-   dw[5] = dw5;
-   dw[6] = dw6;
+   dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
+   /* see ds_set_gen7_3DSTATE_DS() */
+   dw[1] = kernel_offset;
+   dw[2] = 0;
+   dw[3] = ds->ds[0];
+   dw[4] = ds->ds[1];
+   dw[5] = 0;
+   dw[6] = ds->ds[2];
+   dw[7] = ds->ds[3];
+   dw[8] = ds->ds[4];
 }
 
 static inline void
-gen6_disable_3DSTATE_GS(struct ilo_builder *builder)
+gen6_3DSTATE_GS(struct ilo_builder *builder,
+                const struct ilo_state_gs *gs,
+                uint32_t kernel_offset)
 {
    const uint8_t cmd_len = 7;
    uint32_t *dw;
@@ -925,13 +641,13 @@
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
-   dw[1] = 0;
-   dw[2] = 0;
-   dw[3] = 0;
-   /* honor the valid range of URB read length */
-   dw[4] = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT;
-   dw[5] = GEN6_GS_DW5_STATISTICS;
-   dw[6] = 0;
+   dw[1] = kernel_offset;
+   /* see gs_set_gen6_3DSTATE_GS() */
+   dw[2] = gs->gs[0];
+   dw[3] = gs->gs[1];
+   dw[4] = gs->gs[2];
+   dw[5] = gs->gs[3];
+   dw[6] = gs->gs[4];
 }
 
 static inline void
@@ -960,183 +676,90 @@
 
 static inline void
 gen7_3DSTATE_GS(struct ilo_builder *builder,
-                const struct ilo_shader_state *gs)
+                const struct ilo_state_gs *gs,
+                uint32_t kernel_offset)
 {
    const uint8_t cmd_len = 7;
-   const struct ilo_shader_cso *cso;
-   uint32_t dw2, dw4, dw5, *dw;
+   uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
-   cso = ilo_shader_get_kernel_cso(gs);
-   dw2 = cso->payload[0];
-   dw4 = cso->payload[1];
-   dw5 = cso->payload[2];
-
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
-   dw[1] = ilo_shader_get_kernel_offset(gs);
-   dw[2] = dw2;
-   dw[3] = 0; /* scratch */
-   dw[4] = dw4;
-   dw[5] = dw5;
+   dw[1] = kernel_offset;
+   /* see gs_set_gen7_3DSTATE_GS() */
+   dw[2] = gs->gs[0];
+   dw[3] = gs->gs[1];
+   dw[4] = gs->gs[2];
+   dw[5] = gs->gs[3];
    dw[6] = 0;
 }
 
 static inline void
-gen7_disable_3DSTATE_GS(struct ilo_builder *builder)
+gen8_3DSTATE_GS(struct ilo_builder *builder,
+                const struct ilo_state_gs *gs,
+                uint32_t kernel_offset)
 {
-   const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 10 : 7;
+   const uint8_t cmd_len = 10;
    uint32_t *dw;
 
-   ILO_DEV_ASSERT(builder->dev, 7, 8);
+   ILO_DEV_ASSERT(builder->dev, 8, 8);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
-   dw[1] = 0;
+   dw[1] = kernel_offset;
    dw[2] = 0;
-   dw[3] = 0;
-   dw[4] = 0;
-
-   if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
-      dw[7] = GEN8_GS_DW7_STATISTICS;
-      dw[8] = 0;
-      dw[9] = 0;
-   } else {
-      dw[5] = GEN7_GS_DW5_STATISTICS;
-      dw[6] = 0;
-   }
+   /* see gs_set_gen7_3DSTATE_GS() */
+   dw[3] = gs->gs[0];
+   dw[4] = gs->gs[1];
+   dw[5] = 0;
+   dw[6] = gs->gs[2];
+   dw[7] = gs->gs[3];
+   dw[8] = 0;
+   dw[9] = gs->gs[4];
 }
 
 static inline void
 gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder,
-                       int render_stream,
-                       bool render_disable,
-                       int vertex_attrib_count,
-                       const int *buf_strides)
+                       const struct ilo_state_sol *sol)
 {
    const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 5 : 3;
    uint32_t *dw;
-   int buf_mask;
 
    ILO_DEV_ASSERT(builder->dev, 7, 8);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) | (cmd_len - 2);
-
-   dw[1] = render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT;
-   if (render_disable)
-      dw[1] |= GEN7_SO_DW1_RENDER_DISABLE;
-
-   if (buf_strides) {
-      buf_mask = ((bool) buf_strides[3]) << 3 |
-                 ((bool) buf_strides[2]) << 2 |
-                 ((bool) buf_strides[1]) << 1 |
-                 ((bool) buf_strides[0]);
-      if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
-         dw[3] = buf_strides[1] << 16 | buf_strides[0];
-         dw[4] = buf_strides[3] << 16 | buf_strides[1];
-      }
-   } else {
-      buf_mask = 0;
-   }
-
-   if (buf_mask) {
-      int read_len;
-
-      dw[1] |= GEN7_SO_DW1_SO_ENABLE |
-               GEN7_SO_DW1_STATISTICS;
-      /* API_OPENGL */
-      if (true)
-         dw[1] |= GEN7_SO_DW1_REORDER_TRAILING;
-      if (ilo_dev_gen(builder->dev) < ILO_GEN(8))
-         dw[1] |= buf_mask << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT;
-
-      read_len = (vertex_attrib_count + 1) / 2;
-      if (!read_len)
-         read_len = 1;
-
-      dw[2] = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
-              (read_len - 1) << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
-              0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
-              (read_len - 1) << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
-              0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
-              (read_len - 1) << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
-              0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
-              (read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
-   } else {
-      dw[2] = 0;
+   /* see sol_set_gen7_3DSTATE_STREAMOUT() */
+   dw[1] = sol->streamout[0];
+   dw[2] = sol->streamout[1];
+   if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
+      dw[3] = sol->strides[1] << GEN8_SO_DW3_BUFFER1_PITCH__SHIFT |
+              sol->strides[0] << GEN8_SO_DW3_BUFFER0_PITCH__SHIFT;
+      dw[4] = sol->strides[3] << GEN8_SO_DW4_BUFFER3_PITCH__SHIFT |
+              sol->strides[2] << GEN8_SO_DW4_BUFFER2_PITCH__SHIFT;
    }
 }
 
 static inline void
 gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
-                          const struct pipe_stream_output_info *so_info)
+                          const struct ilo_state_sol *sol)
 {
    /*
     * Note that "DWord Length" has 9 bits for this command and the type of
     * cmd_len cannot be uint8_t.
     */
    uint16_t cmd_len;
-   struct {
-      int buf_selects;
-      int decl_count;
-      uint16_t decls[128];
-   } streams[4];
-   unsigned buf_offsets[PIPE_MAX_SO_BUFFERS];
-   int hw_decl_count, i;
+   int cmd_decl_count;
    uint32_t *dw;
 
    ILO_DEV_ASSERT(builder->dev, 7, 8);
 
-   memset(streams, 0, sizeof(streams));
-   memset(buf_offsets, 0, sizeof(buf_offsets));
-
-   for (i = 0; i < so_info->num_outputs; i++) {
-      unsigned decl, st, buf, reg, mask;
-
-      st = so_info->output[i].stream;
-      buf = so_info->output[i].output_buffer;
-
-      /* pad with holes */
-      while (buf_offsets[buf] < so_info->output[i].dst_offset) {
-         int num_dwords;
-
-         num_dwords = so_info->output[i].dst_offset - buf_offsets[buf];
-         if (num_dwords > 4)
-            num_dwords = 4;
-
-         decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
-                GEN7_SO_DECL_HOLE_FLAG |
-                ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
-
-         assert(streams[st].decl_count < Elements(streams[st].decls));
-         streams[st].decls[streams[st].decl_count++] = decl;
-         buf_offsets[buf] += num_dwords;
-      }
-      assert(buf_offsets[buf] == so_info->output[i].dst_offset);
-
-      reg = so_info->output[i].register_index;
-      mask = ((1 << so_info->output[i].num_components) - 1) <<
-         so_info->output[i].start_component;
-
-      decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
-             reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
-             mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
-
-      assert(streams[st].decl_count < Elements(streams[st].decls));
-
-      streams[st].buf_selects |= 1 << buf;
-      streams[st].decls[streams[st].decl_count++] = decl;
-      buf_offsets[buf] += so_info->output[i].num_components;
-   }
-
    if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) {
-      hw_decl_count = MAX4(streams[0].decl_count, streams[1].decl_count,
-                           streams[2].decl_count, streams[3].decl_count);
+      cmd_decl_count = sol->decl_count;
    } else {
       /*
        * From the Ivy Bridge PRM, volume 2 part 1, page 201:
@@ -1145,100 +768,98 @@
        *      whenever this command is issued. The "Num Entries [n]" fields
        *      still contain the actual numbers of valid decls."
        */
-      hw_decl_count = 128;
+      cmd_decl_count = 128;
    }
 
-   cmd_len = 3 + 2 * hw_decl_count;
+   cmd_len = 3 + 2 * cmd_decl_count;
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2);
-   dw[1] = streams[3].buf_selects << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
-           streams[2].buf_selects << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
-           streams[1].buf_selects << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
-           streams[0].buf_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
-   dw[2] = streams[3].decl_count << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
-           streams[2].decl_count << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
-           streams[1].decl_count << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
-           streams[0].decl_count << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
-   dw += 3;
-
-   for (i = 0; i < hw_decl_count; i++) {
-      dw[0] = streams[1].decls[i] << 16 | streams[0].decls[i];
-      dw[1] = streams[3].decls[i] << 16 | streams[2].decls[i];
-      dw += 2;
+   /* see sol_set_gen7_3DSTATE_SO_DECL_LIST() */
+   dw[1] = sol->so_decl[0];
+   dw[2] = sol->so_decl[1];
+   memcpy(&dw[3], sol->decl, sizeof(sol->decl[0]) * sol->decl_count);
+
+   if (sol->decl_count < cmd_decl_count) {
+      memset(&dw[3 + 2 * sol->decl_count], 0, sizeof(sol->decl[0]) *
+            cmd_decl_count - sol->decl_count);
    }
 }
 
 static inline void
-gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder, int index, int stride,
-                       const struct pipe_stream_output_target *so_target)
+gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
+                       const struct ilo_state_sol *sol,
+                       const struct ilo_state_sol_buffer *sb,
+                       uint8_t buffer)
 {
-   const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 4;
-   struct ilo_buffer *buf;
-   int start, end;
+   const uint8_t cmd_len = 4;
    uint32_t *dw;
    unsigned pos;
 
-   ILO_DEV_ASSERT(builder->dev, 7, 8);
-
-   buf = ilo_buffer(so_target->buffer);
+   ILO_DEV_ASSERT(builder->dev, 7, 7.5);
 
-   /* DWord-aligned */
-   assert(stride % 4 == 0);
-   assert(so_target->buffer_offset % 4 == 0);
-
-   stride &= ~3;
-   start = so_target->buffer_offset & ~3;
-   end = (start + so_target->buffer_size) & ~3;
+   assert(buffer < ILO_STATE_SOL_MAX_BUFFER_COUNT);
 
    pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | (cmd_len - 2);
-   dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT |
-           stride;
-
-   if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
-      dw[1] |= builder->mocs << GEN8_SO_BUF_DW1_MOCS__SHIFT;
-
-      dw[4] = end - start;
-      dw[5] = 0;
-      dw[6] = 0;
-      dw[7] = 0;
-
-      ilo_builder_batch_reloc64(builder, pos + 2,
-            buf->bo, start, INTEL_RELOC_WRITE);
+   /* see sol_buffer_set_gen7_3dstate_so_buffer() */
+   dw[1] = buffer << GEN7_SO_BUF_DW1_INDEX__SHIFT |
+           builder->mocs << GEN7_SO_BUF_DW1_MOCS__SHIFT |
+           sol->strides[buffer] << GEN7_SO_BUF_DW1_PITCH__SHIFT;
+
+   if (sb->vma) {
+      ilo_builder_batch_reloc(builder, pos + 2, sb->vma->bo,
+            sb->vma->bo_offset + sb->so_buf[0], INTEL_RELOC_WRITE);
+      ilo_builder_batch_reloc(builder, pos + 3, sb->vma->bo,
+            sb->vma->bo_offset + sb->so_buf[1], INTEL_RELOC_WRITE);
    } else {
-      dw[1] |= builder->mocs << GEN7_SO_BUF_DW1_MOCS__SHIFT;
-
-      ilo_builder_batch_reloc(builder, pos + 2,
-            buf->bo, start, INTEL_RELOC_WRITE);
-      ilo_builder_batch_reloc(builder, pos + 3,
-            buf->bo, end, INTEL_RELOC_WRITE);
+      dw[2] = 0;
+      dw[3] = 0;
    }
 }
 
 static inline void
-gen7_disable_3DSTATE_SO_BUFFER(struct ilo_builder *builder, int index)
+gen8_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
+                       const struct ilo_state_sol *sol,
+                       const struct ilo_state_sol_buffer *sb,
+                       uint8_t buffer)
 {
-   const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 8 : 4;
+   const uint8_t cmd_len = 8;
    uint32_t *dw;
+   unsigned pos;
 
-   ILO_DEV_ASSERT(builder->dev, 7, 8);
+   ILO_DEV_ASSERT(builder->dev, 8, 8);
 
-   ilo_builder_batch_pointer(builder, cmd_len, &dw);
+   pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | (cmd_len - 2);
-   dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT;
-   dw[2] = 0;
-   dw[3] = 0;
+   /* see sol_buffer_set_gen8_3dstate_so_buffer() */
+   dw[1] = sb->so_buf[0] |
+           buffer << GEN7_SO_BUF_DW1_INDEX__SHIFT |
+           builder->mocs << GEN8_SO_BUF_DW1_MOCS__SHIFT;
+
+   if (sb->vma) {
+      ilo_builder_batch_reloc64(builder, pos + 2, sb->vma->bo,
+            sb->vma->bo_offset + sb->so_buf[1], INTEL_RELOC_WRITE);
+   } else {
+      dw[2] = 0;
+      dw[3] = 0;
+   }
 
-   if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
-      dw[4] = 0;
+   dw[4] = sb->so_buf[2];
+
+   if (sb->write_offset_vma) {
+      ilo_builder_batch_reloc64(builder, pos + 5, sb->write_offset_vma->bo,
+            sb->write_offset_vma->bo_offset + sizeof(uint32_t) * buffer,
+            INTEL_RELOC_WRITE);
+   } else {
       dw[5] = 0;
       dw[6] = 0;
-      dw[7] = 0;
    }
+
+   dw[7] = sb->so_buf[3];
 }
 
 static inline void
@@ -1627,8 +1248,7 @@
 
 static inline uint32_t
 gen6_SURFACE_STATE(struct ilo_builder *builder,
-                   const struct ilo_view_surface *surf,
-                   bool for_render)
+                   const struct ilo_state_surface *surf)
 {
    int state_align, state_len;
    uint32_t state_offset, *dw;
@@ -1641,16 +1261,17 @@
 
       state_offset = ilo_builder_surface_pointer(builder,
             ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw);
-      memcpy(dw, surf->payload, state_len << 2);
+      memcpy(dw, surf->surface, state_len << 2);
 
-      if (surf->bo) {
+      if (surf->vma) {
          const uint32_t mocs = (surf->scanout) ?
             (GEN8_MOCS_MT_PTE | GEN8_MOCS_CT_L3) : builder->mocs;
 
          dw[1] |= mocs << GEN8_SURFACE_DW1_MOCS__SHIFT;
 
-         ilo_builder_surface_reloc64(builder, state_offset, 8, surf->bo,
-               surf->payload[8], (for_render) ? INTEL_RELOC_WRITE : 0);
+         ilo_builder_surface_reloc64(builder, state_offset, 8, surf->vma->bo,
+               surf->vma->bo_offset + surf->surface[8],
+               (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
       }
    } else {
       state_align = 32;
@@ -1658,17 +1279,18 @@
 
       state_offset = ilo_builder_surface_pointer(builder,
             ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw);
-      memcpy(dw, surf->payload, state_len << 2);
+      memcpy(dw, surf->surface, state_len << 2);
 
-      if (surf->bo) {
+      if (surf->vma) {
          /*
           * For scanouts, we should not enable caching in LLC.  Since we only
           * enable that on Gen8+, we are fine here.
           */
          dw[5] |= builder->mocs << GEN6_SURFACE_DW5_MOCS__SHIFT;
 
-         ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo,
-               surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0);
+         ilo_builder_surface_reloc(builder, state_offset, 1, surf->vma->bo,
+               surf->vma->bo_offset + surf->surface[1],
+               (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
       }
    }
 
@@ -1676,55 +1298,13 @@
 }
 
 static inline uint32_t
-gen6_so_SURFACE_STATE(struct ilo_builder *builder,
-                      const struct pipe_stream_output_target *so,
-                      const struct pipe_stream_output_info *so_info,
-                      int so_index)
-{
-   struct ilo_buffer *buf = ilo_buffer(so->buffer);
-   unsigned bo_offset, struct_size;
-   enum pipe_format elem_format;
-   struct ilo_view_surface surf;
-
-   ILO_DEV_ASSERT(builder->dev, 6, 6);
-
-   bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
-   struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
-
-   switch (so_info->output[so_index].num_components) {
-   case 1:
-      elem_format = PIPE_FORMAT_R32_FLOAT;
-      break;
-   case 2:
-      elem_format = PIPE_FORMAT_R32G32_FLOAT;
-      break;
-   case 3:
-      elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
-      break;
-   case 4:
-      elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-      break;
-   default:
-      assert(!"unexpected SO components length");
-      elem_format = PIPE_FORMAT_R32_FLOAT;
-      break;
-   }
-
-   ilo_gpe_init_view_surface_for_buffer(builder->dev, buf, bo_offset,
-         so->buffer_size, struct_size, elem_format, false, true, &surf);
-
-   return gen6_SURFACE_STATE(builder, &surf, false);
-}
-
-static inline uint32_t
 gen6_SAMPLER_STATE(struct ilo_builder *builder,
-                   const struct ilo_sampler_cso * const *samplers,
-                   const struct pipe_sampler_view * const *views,
+                   const struct ilo_state_sampler *samplers,
                    const uint32_t *sampler_border_colors,
-                   int num_samplers)
+                   int sampler_count)
 {
    const int state_align = 32;
-   const int state_len = 4 * num_samplers;
+   const int state_len = 4 * sampler_count;
    uint32_t state_offset, *dw;
    int i;
 
@@ -1735,9 +1315,9 @@
     *
     *     "The sampler state is stored as an array of up to 16 elements..."
     */
-   assert(num_samplers <= 16);
+   assert(sampler_count <= 16);
 
-   if (!num_samplers)
+   if (!sampler_count)
       return 0;
 
    /*
@@ -1749,86 +1329,19 @@
     *
     * It also applies to other shader stages.
     */
-   ilo_builder_dynamic_pad_top(builder, 4 * (4 - (num_samplers % 4)));
+   ilo_builder_dynamic_pad_top(builder, 4 * (4 - (sampler_count % 4)));
 
    state_offset = ilo_builder_dynamic_pointer(builder,
          ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw);
 
-   for (i = 0; i < num_samplers; i++) {
-      const struct ilo_sampler_cso *sampler = samplers[i];
-      const struct pipe_sampler_view *view = views[i];
-      const uint32_t border_color = sampler_border_colors[i];
-      uint32_t dw_filter, dw_wrap;
-
-      /* there may be holes */
-      if (!sampler || !view) {
-         /* disabled sampler */
-         dw[0] = 1 << 31;
-         dw[1] = 0;
-         dw[2] = 0;
-         dw[3] = 0;
-         dw += 4;
-
-         continue;
-      }
-
-      /* determine filter and wrap modes */
-      switch (view->texture->target) {
-      case PIPE_TEXTURE_1D:
-         dw_filter = (sampler->anisotropic) ?
-            sampler->dw_filter_aniso : sampler->dw_filter;
-         dw_wrap = sampler->dw_wrap_1d;
-         break;
-      case PIPE_TEXTURE_3D:
-         /*
-          * From the Sandy Bridge PRM, volume 4 part 1, page 103:
-          *
-          *     "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
-          *      surfaces of type SURFTYPE_3D."
-          */
-         dw_filter = sampler->dw_filter;
-         dw_wrap = sampler->dw_wrap;
-         break;
-      case PIPE_TEXTURE_CUBE:
-         dw_filter = (sampler->anisotropic) ?
-            sampler->dw_filter_aniso : sampler->dw_filter;
-         dw_wrap = sampler->dw_wrap_cube;
-         break;
-      default:
-         dw_filter = (sampler->anisotropic) ?
-            sampler->dw_filter_aniso : sampler->dw_filter;
-         dw_wrap = sampler->dw_wrap;
-         break;
-      }
+   for (i = 0; i < sampler_count; i++) {
+      /* see sampler_set_gen6_SAMPLER_STATE() */
+      dw[0] = samplers[i].sampler[0];
+      dw[1] = samplers[i].sampler[1];
+      dw[3] = samplers[i].sampler[2];
 
-      dw[0] = sampler->payload[0];
-      dw[1] = sampler->payload[1];
-      assert(!(border_color & 0x1f));
-      dw[2] = border_color;
-      dw[3] = sampler->payload[2];
-
-      dw[0] |= dw_filter;
-
-      if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
-         dw[3] |= dw_wrap;
-      }
-      else {
-         /*
-          * From the Sandy Bridge PRM, volume 4 part 1, page 21:
-          *
-          *     "[DevSNB] Errata: Incorrect behavior is observed in cases
-          *      where the min and mag mode filters are different and
-          *      SurfMinLOD is nonzero. The determination of MagMode uses the
-          *      following equation instead of the one in the above
-          *      pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
-          *
-          * As a way to work around that, we set Base to
-          * view->u.tex.first_level.
-          */
-         dw[0] |= view->u.tex.first_level << 22;
-
-         dw[1] |= dw_wrap;
-      }
+      assert(!(sampler_border_colors[i] & 0x1f));
+      dw[2] = sampler_border_colors[i];
 
       dw += 4;
    }
@@ -1838,7 +1351,7 @@
 
 static inline uint32_t
 gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder,
-                                const struct ilo_sampler_cso *sampler)
+                                const struct ilo_state_sampler_border *border)
 {
    const int state_align =
       (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 64 : 32;
@@ -1846,11 +1359,12 @@
 
    ILO_DEV_ASSERT(builder->dev, 6, 8);
 
-   assert(Elements(sampler->payload) >= 3 + state_len);
-
-   /* see ilo_gpe_init_sampler_cso() */
+   /*
+    * see border_set_gen6_SAMPLER_BORDER_COLOR_STATE() and
+    * border_set_gen7_SAMPLER_BORDER_COLOR_STATE()
+    */
    return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLOB,
-         state_align, state_len, &sampler->payload[3]);
+         state_align, state_len, border->color);
 }
 
 static inline uint32_t
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_builder.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_builder.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_builder.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_builder.c	2015-09-16 14:36:09.000000000 +0000
@@ -333,7 +333,7 @@
 {
    int i;
 
-   memset(builder, 0, sizeof(*builder));
+   assert(ilo_is_zeroed(builder, sizeof(*builder)));
 
    builder->dev = dev;
    builder->winsys = winsys;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_builder_decode.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_builder_decode.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_builder_decode.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_builder_decode.c	2015-09-16 14:36:09.000000000 +0000
@@ -319,7 +319,7 @@
               "stencil ref %d, bf stencil ref %d\n",
 	      GEN_EXTRACT(dw, GEN6_CC_DW0_ALPHATEST) ? "FLOAT32" : "UNORM8",
 	      (bool) (dw & GEN6_CC_DW0_ROUND_DISABLE_DISABLE),
-	      GEN_EXTRACT(dw, GEN6_CC_DW0_STENCIL0_REF),
+	      GEN_EXTRACT(dw, GEN6_CC_DW0_STENCIL_REF),
 	      GEN_EXTRACT(dw, GEN6_CC_DW0_STENCIL1_REF));
 
    writer_dw(builder, which, item->offset, 1, "CC\n");
@@ -347,13 +347,13 @@
    dw = writer_dw(builder, which, item->offset, 0, "D_S");
    ilo_printf("stencil %sable, func %d, write %sable\n",
          (dw & GEN6_ZS_DW0_STENCIL_TEST_ENABLE) ? "en" : "dis",
-         GEN_EXTRACT(dw, GEN6_ZS_DW0_STENCIL0_FUNC),
+         GEN_EXTRACT(dw, GEN6_ZS_DW0_STENCIL_FUNC),
          (dw & GEN6_ZS_DW0_STENCIL_WRITE_ENABLE) ? "en" : "dis");
 
    dw = writer_dw(builder, which, item->offset, 1, "D_S");
    ilo_printf("stencil test mask 0x%x, write mask 0x%x\n",
-         GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL0_VALUEMASK),
-         GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL0_WRITEMASK));
+         GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL_TEST_MASK),
+         GEN_EXTRACT(dw, GEN6_ZS_DW1_STENCIL_WRITE_MASK));
 
    dw = writer_dw(builder, which, item->offset, 2, "D_S");
    ilo_printf("depth test %sable, func %d, write %sable\n",
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_builder_media.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_builder_media.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_builder_media.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_builder_media.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,57 +29,30 @@
 #define ILO_BUILDER_MEDIA_H
 
 #include "genhw/genhw.h"
-#include "../ilo_shader.h"
 #include "intel_winsys.h"
 
 #include "ilo_core.h"
 #include "ilo_dev.h"
+#include "ilo_state_compute.h"
 #include "ilo_builder.h"
 
-struct gen6_idrt_data {
-   const struct ilo_shader_state *cs;
-
-   uint32_t sampler_offset;
-   uint32_t binding_table_offset;
-
-   unsigned curbe_size;
-   unsigned thread_group_size;
-};
-
 static inline void
 gen6_MEDIA_VFE_STATE(struct ilo_builder *builder,
-                     unsigned curbe_alloc, bool use_slm)
+                     const struct ilo_state_compute *compute)
 {
    const uint8_t cmd_len = 8;
-   const unsigned idrt_alloc =
-      ((ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) ? 64 : 32) * 32;
-   int max_threads;
    uint32_t *dw;
 
-   ILO_DEV_ASSERT(builder->dev, 7, 7.5);
-
-   max_threads = builder->dev->thread_count;
-
-   curbe_alloc = align(curbe_alloc, 32);
-   assert(idrt_alloc + curbe_alloc <= builder->dev->urb_size / (use_slm + 1));
+   ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
    ilo_builder_batch_pointer(builder, cmd_len, &dw);
 
    dw[0] = GEN6_RENDER_CMD(MEDIA, MEDIA_VFE_STATE) | (cmd_len - 2);
-   dw[1] = 0; /* scratch */
-
-   dw[2] = (max_threads - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
-           0 << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
-           GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
-           GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL;
-   if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
-      dw[2] |= GEN7_VFE_DW2_GPGPU_MODE;
-
+   /* see compute_set_gen6_MEDIA_VFE_STATE() */
+   dw[1] = compute->vfe[0];
+   dw[2] = compute->vfe[1];
    dw[3] = 0;
-
-   dw[4] = 0 << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT |
-           (curbe_alloc / 32);
-
+   dw[4] = compute->vfe[2];
    dw[5] = 0;
    dw[6] = 0;
    dw[7] = 0;
@@ -194,8 +167,10 @@
 
 static inline uint32_t
 gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_builder *builder,
-                               const struct gen6_idrt_data *data,
-                               int idrt_count)
+                               const struct ilo_state_compute *compute,
+                               const uint32_t *kernel_offsets,
+                               const uint32_t *sampler_offsets,
+                               const uint32_t *binding_table_offsets)
 {
    /*
     * From the Sandy Bridge PRM, volume 2 part 2, page 34:
@@ -211,61 +186,26 @@
     *      aligned address of the Interface Descriptor data."
     */
    const int state_align = 32;
-   const int state_len = (32 / 4) * idrt_count;
+   const int state_len = (32 / 4) * compute->idrt_count;
    uint32_t state_offset, *dw;
    int i;
 
-   ILO_DEV_ASSERT(builder->dev, 7, 7.5);
+   ILO_DEV_ASSERT(builder->dev, 6, 7.5);
 
    state_offset = ilo_builder_dynamic_pointer(builder,
          ILO_BUILDER_ITEM_INTERFACE_DESCRIPTOR, state_align, state_len, &dw);
 
-   for (i = 0; i < idrt_count; i++) {
-      const struct gen6_idrt_data *idrt = &data[i];
-      const struct ilo_shader_state *cs = idrt->cs;
-      unsigned sampler_count, bt_size, slm_size;
-
-      sampler_count =
-         ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT);
-      assert(sampler_count <= 16);
-      sampler_count = (sampler_count + 3) / 4;
-
-      bt_size =
-         ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT);
-      if (bt_size > 31)
-         bt_size = 31;
-
-      slm_size = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE);
-
-      assert(idrt->curbe_size / 32 <= 63);
-
-      dw[0] = ilo_shader_get_kernel_offset(idrt->cs);
+   for (i = 0; i < compute->idrt_count; i++) {
+      /* see compute_set_gen6_INTERFACE_DESCRIPTOR_DATA() */
+      dw[0] = compute->idrt[i][0] + kernel_offsets[i];
       dw[1] = 0;
-      dw[2] = idrt->sampler_offset |
-              sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT;
-      dw[3] = idrt->binding_table_offset |
-              bt_size << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT;
-
-      dw[4] = (idrt->curbe_size / 32) << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT |
-              0 << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT;
-
-      if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
-         dw[5] = GEN7_IDRT_DW5_ROUNDING_MODE_RTNE;
-
-         if (slm_size) {
-            assert(slm_size <= 64 * 1024);
-            slm_size = util_next_power_of_two((slm_size + 4095) / 4096);
-
-            dw[5] |= GEN7_IDRT_DW5_BARRIER_ENABLE |
-                     slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT |
-                     idrt->thread_group_size <<
-                        GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT;
-         }
-      } else {
-         dw[5] = 0;
-      }
-
-      dw[6] = 0;
+      dw[2] = compute->idrt[i][1] |
+              sampler_offsets[i];
+      dw[3] = compute->idrt[i][2] |
+              binding_table_offsets[i];
+      dw[4] = compute->idrt[i][3];
+      dw[5] = compute->idrt[i][4];
+      dw[6] = compute->idrt[i][5];
       dw[7] = 0;
 
       dw += 8;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_core.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_core.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_core.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_core.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,18 +29,9 @@
 #define ILO_CORE_H
 
 #include "pipe/p_compiler.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_format.h"
 
 #include "util/u_debug.h"
-#include "util/list.h"
-#include "util/u_format.h"
-#include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
-#include "util/u_pointer.h"
-
-#define ILO_PRIM_RECTANGLES PIPE_PRIM_MAX
-#define ILO_PRIM_MAX (PIPE_PRIM_MAX + 1)
 
 #endif /* ILO_CORE_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_debug.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_debug.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_debug.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_debug.h	2015-09-16 14:36:09.000000000 +0000
@@ -100,4 +100,21 @@
 #endif
 }
 
+static inline bool
+ilo_is_zeroed(const void *ptr, size_t size)
+{
+#ifdef DEBUG
+   size_t i;
+
+   for (i = 0; i < size; i++) {
+      if (*((const char *) ptr) != 0)
+         return false;
+   }
+
+   return true;
+#else
+   return true;
+#endif
+}
+
 #endif /* ILO_DEBUG_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_dev.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_dev.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_dev.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_dev.c	2015-09-16 14:36:09.000000000 +0000
@@ -32,14 +32,15 @@
 #include "ilo_dev.h"
 
 /**
- * Initialize the \p dev from \p winsys.  \p winsys is considered owned by \p
- * dev and will be destroyed in \p ilo_dev_cleanup().
+ * Initialize the \p dev from \p winsys.
  */
 bool
 ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys)
 {
    const struct intel_winsys_info *info;
 
+   assert(ilo_is_zeroed(dev, sizeof(*dev)));
+
    info = intel_winsys_get_info(winsys);
 
    dev->winsys = winsys;
@@ -178,9 +179,3 @@
 
    return true;
 }
-
-void
-ilo_dev_cleanup(struct ilo_dev *dev)
-{
-   intel_winsys_destroy(dev->winsys);
-}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_dev.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_dev.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_dev.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_dev.h	2015-09-16 14:36:09.000000000 +0000
@@ -63,9 +63,6 @@
 bool
 ilo_dev_init(struct ilo_dev *dev, struct intel_winsys *winsys);
 
-void
-ilo_dev_cleanup(struct ilo_dev *dev);
-
 static inline int
 ilo_dev_gen(const struct ilo_dev *dev)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_fence.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_fence.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_fence.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_fence.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,73 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2013 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chia-I Wu <olv@lunarg.com>
- */
-
-#ifndef ILO_FENCE_H
-#define ILO_FENCE_H
-
-#include "intel_winsys.h"
-
-#include "ilo_core.h"
-#include "ilo_dev.h"
-
-struct ilo_fence {
-   struct intel_bo *seq_bo;
-};
-
-static inline void
-ilo_fence_init(struct ilo_fence *fence, const struct ilo_dev *dev)
-{
-   /* no-op */
-}
-
-static inline void
-ilo_fence_cleanup(struct ilo_fence *fence)
-{
-   intel_bo_unref(fence->seq_bo);
-}
-
-/**
- * Set the sequence bo for waiting.  The fence is considered signaled when
- * there is no sequence bo.
- */
-static inline void
-ilo_fence_set_seq_bo(struct ilo_fence *fence, struct intel_bo *seq_bo)
-{
-   intel_bo_unref(fence->seq_bo);
-   fence->seq_bo = intel_bo_ref(seq_bo);
-}
-
-/**
- * Wait for the fence to be signaled or until \p timeout nanoseconds has
- * passed.  It will wait indefinitely when \p timeout is negative.
- */
-static inline bool
-ilo_fence_wait(struct ilo_fence *fence, int64_t timeout)
-{
-   return (!fence->seq_bo || intel_bo_wait(fence->seq_bo, timeout) == 0);
-}
-
-#endif /* ILO_FENCE_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_format.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_format.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_format.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_format.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,755 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2013 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chia-I Wu <olv@lunarg.com>
- */
-
-#include "genhw/genhw.h"
-#include "ilo_format.h"
-
-struct ilo_vf_cap {
-   int vertex_element;
-};
-
-struct ilo_sol_cap {
-   int buffer;
-};
-
-struct ilo_sampler_cap {
-   int sampling;
-   int filtering;
-   int shadow_map;
-   int chroma_key;
-};
-
-struct ilo_dp_cap {
-   int rt_write;
-   int rt_write_blending;
-   int typed_write;
-   int media_color_processing;
-};
-
-/*
- * This table is based on:
- *
- *  - the Sandy Bridge PRM, volume 4 part 1, page 88-97
- *  - the Ivy Bridge PRM, volume 2 part 1, page 97-99
- *  - the Haswell PRM, volume 7, page 467-470
- */
-static const struct ilo_vf_cap ilo_vf_caps[] = {
-#define CAP(vertex_element) { ILO_GEN(vertex_element) }
-   [GEN6_FORMAT_R32G32B32A32_FLOAT]       = CAP(  1),
-   [GEN6_FORMAT_R32G32B32A32_SINT]        = CAP(  1),
-   [GEN6_FORMAT_R32G32B32A32_UINT]        = CAP(  1),
-   [GEN6_FORMAT_R32G32B32A32_UNORM]       = CAP(  1),
-   [GEN6_FORMAT_R32G32B32A32_SNORM]       = CAP(  1),
-   [GEN6_FORMAT_R64G64_FLOAT]             = CAP(  1),
-   [GEN6_FORMAT_R32G32B32A32_SSCALED]     = CAP(  1),
-   [GEN6_FORMAT_R32G32B32A32_USCALED]     = CAP(  1),
-   [GEN6_FORMAT_R32G32B32A32_SFIXED]      = CAP(7.5),
-   [GEN6_FORMAT_R32G32B32_FLOAT]          = CAP(  1),
-   [GEN6_FORMAT_R32G32B32_SINT]           = CAP(  1),
-   [GEN6_FORMAT_R32G32B32_UINT]           = CAP(  1),
-   [GEN6_FORMAT_R32G32B32_UNORM]          = CAP(  1),
-   [GEN6_FORMAT_R32G32B32_SNORM]          = CAP(  1),
-   [GEN6_FORMAT_R32G32B32_SSCALED]        = CAP(  1),
-   [GEN6_FORMAT_R32G32B32_USCALED]        = CAP(  1),
-   [GEN6_FORMAT_R32G32B32_SFIXED]         = CAP(7.5),
-   [GEN6_FORMAT_R16G16B16A16_UNORM]       = CAP(  1),
-   [GEN6_FORMAT_R16G16B16A16_SNORM]       = CAP(  1),
-   [GEN6_FORMAT_R16G16B16A16_SINT]        = CAP(  1),
-   [GEN6_FORMAT_R16G16B16A16_UINT]        = CAP(  1),
-   [GEN6_FORMAT_R16G16B16A16_FLOAT]       = CAP(  1),
-   [GEN6_FORMAT_R32G32_FLOAT]             = CAP(  1),
-   [GEN6_FORMAT_R32G32_SINT]              = CAP(  1),
-   [GEN6_FORMAT_R32G32_UINT]              = CAP(  1),
-   [GEN6_FORMAT_R32G32_UNORM]             = CAP(  1),
-   [GEN6_FORMAT_R32G32_SNORM]             = CAP(  1),
-   [GEN6_FORMAT_R64_FLOAT]                = CAP(  1),
-   [GEN6_FORMAT_R16G16B16A16_SSCALED]     = CAP(  1),
-   [GEN6_FORMAT_R16G16B16A16_USCALED]     = CAP(  1),
-   [GEN6_FORMAT_R32G32_SSCALED]           = CAP(  1),
-   [GEN6_FORMAT_R32G32_USCALED]           = CAP(  1),
-   [GEN6_FORMAT_R32G32_SFIXED]            = CAP(7.5),
-   [GEN6_FORMAT_B8G8R8A8_UNORM]           = CAP(  1),
-   [GEN6_FORMAT_R10G10B10A2_UNORM]        = CAP(  1),
-   [GEN6_FORMAT_R10G10B10A2_UINT]         = CAP(  1),
-   [GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = CAP(  1),
-   [GEN6_FORMAT_R8G8B8A8_UNORM]           = CAP(  1),
-   [GEN6_FORMAT_R8G8B8A8_SNORM]           = CAP(  1),
-   [GEN6_FORMAT_R8G8B8A8_SINT]            = CAP(  1),
-   [GEN6_FORMAT_R8G8B8A8_UINT]            = CAP(  1),
-   [GEN6_FORMAT_R16G16_UNORM]             = CAP(  1),
-   [GEN6_FORMAT_R16G16_SNORM]             = CAP(  1),
-   [GEN6_FORMAT_R16G16_SINT]              = CAP(  1),
-   [GEN6_FORMAT_R16G16_UINT]              = CAP(  1),
-   [GEN6_FORMAT_R16G16_FLOAT]             = CAP(  1),
-   [GEN6_FORMAT_B10G10R10A2_UNORM]        = CAP(7.5),
-   [GEN6_FORMAT_R11G11B10_FLOAT]          = CAP(  1),
-   [GEN6_FORMAT_R32_SINT]                 = CAP(  1),
-   [GEN6_FORMAT_R32_UINT]                 = CAP(  1),
-   [GEN6_FORMAT_R32_FLOAT]                = CAP(  1),
-   [GEN6_FORMAT_R32_UNORM]                = CAP(  1),
-   [GEN6_FORMAT_R32_SNORM]                = CAP(  1),
-   [GEN6_FORMAT_R10G10B10X2_USCALED]      = CAP(  1),
-   [GEN6_FORMAT_R8G8B8A8_SSCALED]         = CAP(  1),
-   [GEN6_FORMAT_R8G8B8A8_USCALED]         = CAP(  1),
-   [GEN6_FORMAT_R16G16_SSCALED]           = CAP(  1),
-   [GEN6_FORMAT_R16G16_USCALED]           = CAP(  1),
-   [GEN6_FORMAT_R32_SSCALED]              = CAP(  1),
-   [GEN6_FORMAT_R32_USCALED]              = CAP(  1),
-   [GEN6_FORMAT_R8G8_UNORM]               = CAP(  1),
-   [GEN6_FORMAT_R8G8_SNORM]               = CAP(  1),
-   [GEN6_FORMAT_R8G8_SINT]                = CAP(  1),
-   [GEN6_FORMAT_R8G8_UINT]                = CAP(  1),
-   [GEN6_FORMAT_R16_UNORM]                = CAP(  1),
-   [GEN6_FORMAT_R16_SNORM]                = CAP(  1),
-   [GEN6_FORMAT_R16_SINT]                 = CAP(  1),
-   [GEN6_FORMAT_R16_UINT]                 = CAP(  1),
-   [GEN6_FORMAT_R16_FLOAT]                = CAP(  1),
-   [GEN6_FORMAT_R8G8_SSCALED]             = CAP(  1),
-   [GEN6_FORMAT_R8G8_USCALED]             = CAP(  1),
-   [GEN6_FORMAT_R16_SSCALED]              = CAP(  1),
-   [GEN6_FORMAT_R16_USCALED]              = CAP(  1),
-   [GEN6_FORMAT_R8_UNORM]                 = CAP(  1),
-   [GEN6_FORMAT_R8_SNORM]                 = CAP(  1),
-   [GEN6_FORMAT_R8_SINT]                  = CAP(  1),
-   [GEN6_FORMAT_R8_UINT]                  = CAP(  1),
-   [GEN6_FORMAT_R8_SSCALED]               = CAP(  1),
-   [GEN6_FORMAT_R8_USCALED]               = CAP(  1),
-   [GEN6_FORMAT_R8G8B8_UNORM]             = CAP(  1),
-   [GEN6_FORMAT_R8G8B8_SNORM]             = CAP(  1),
-   [GEN6_FORMAT_R8G8B8_SSCALED]           = CAP(  1),
-   [GEN6_FORMAT_R8G8B8_USCALED]           = CAP(  1),
-   [GEN6_FORMAT_R64G64B64A64_FLOAT]       = CAP(  1),
-   [GEN6_FORMAT_R64G64B64_FLOAT]          = CAP(  1),
-   [GEN6_FORMAT_R16G16B16_FLOAT]          = CAP(  6),
-   [GEN6_FORMAT_R16G16B16_UNORM]          = CAP(  1),
-   [GEN6_FORMAT_R16G16B16_SNORM]          = CAP(  1),
-   [GEN6_FORMAT_R16G16B16_SSCALED]        = CAP(  1),
-   [GEN6_FORMAT_R16G16B16_USCALED]        = CAP(  1),
-   [GEN6_FORMAT_R16G16B16_UINT]           = CAP(7.5),
-   [GEN6_FORMAT_R16G16B16_SINT]           = CAP(7.5),
-   [GEN6_FORMAT_R32_SFIXED]               = CAP(7.5),
-   [GEN6_FORMAT_R10G10B10A2_SNORM]        = CAP(7.5),
-   [GEN6_FORMAT_R10G10B10A2_USCALED]      = CAP(7.5),
-   [GEN6_FORMAT_R10G10B10A2_SSCALED]      = CAP(7.5),
-   [GEN6_FORMAT_R10G10B10A2_SINT]         = CAP(7.5),
-   [GEN6_FORMAT_B10G10R10A2_SNORM]        = CAP(7.5),
-   [GEN6_FORMAT_B10G10R10A2_USCALED]      = CAP(7.5),
-   [GEN6_FORMAT_B10G10R10A2_SSCALED]      = CAP(7.5),
-   [GEN6_FORMAT_B10G10R10A2_UINT]         = CAP(7.5),
-   [GEN6_FORMAT_B10G10R10A2_SINT]         = CAP(7.5),
-   [GEN6_FORMAT_R8G8B8_UINT]              = CAP(7.5),
-   [GEN6_FORMAT_R8G8B8_SINT]              = CAP(7.5),
-#undef CAP
-};
-
-/*
- * This table is based on:
- *
- *  - the Sandy Bridge PRM, volume 4 part 1, page 88-97
- *  - the Ivy Bridge PRM, volume 2 part 1, page 195
- *  - the Haswell PRM, volume 7, page 535
- */
-static const struct ilo_sol_cap ilo_sol_caps[] = {
-#define CAP(buffer) { ILO_GEN(buffer) }
-   [GEN6_FORMAT_R32G32B32A32_FLOAT]       = CAP(  1),
-   [GEN6_FORMAT_R32G32B32A32_SINT]        = CAP(  1),
-   [GEN6_FORMAT_R32G32B32A32_UINT]        = CAP(  1),
-   [GEN6_FORMAT_R32G32B32_FLOAT]          = CAP(  1),
-   [GEN6_FORMAT_R32G32B32_SINT]           = CAP(  1),
-   [GEN6_FORMAT_R32G32B32_UINT]           = CAP(  1),
-   [GEN6_FORMAT_R32G32_FLOAT]             = CAP(  1),
-   [GEN6_FORMAT_R32G32_SINT]              = CAP(  1),
-   [GEN6_FORMAT_R32G32_UINT]              = CAP(  1),
-   [GEN6_FORMAT_R32_SINT]                 = CAP(  1),
-   [GEN6_FORMAT_R32_UINT]                 = CAP(  1),
-   [GEN6_FORMAT_R32_FLOAT]                = CAP(  1),
-#undef CAP
-};
-
-/*
- * This table is based on:
- *
- *  - the Sandy Bridge PRM, volume 4 part 1, page 88-97
- *  - the Ivy Bridge PRM, volume 4 part 1, page 84-87
- */
-static const struct ilo_sampler_cap ilo_sampler_caps[] = {
-#define CAP(sampling, filtering, shadow_map, chroma_key) \
-   { ILO_GEN(sampling), ILO_GEN(filtering), ILO_GEN(shadow_map), ILO_GEN(chroma_key) }
-   [GEN6_FORMAT_R32G32B32A32_FLOAT]       = CAP(  1,   5,   0,   0),
-   [GEN6_FORMAT_R32G32B32A32_SINT]        = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R32G32B32A32_UINT]        = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R32G32B32X32_FLOAT]       = CAP(  1,   5,   0,   0),
-   [GEN6_FORMAT_R32G32B32_FLOAT]          = CAP(  1,   5,   0,   0),
-   [GEN6_FORMAT_R32G32B32_SINT]           = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R32G32B32_UINT]           = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R16G16B16A16_UNORM]       = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R16G16B16A16_SNORM]       = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R16G16B16A16_SINT]        = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R16G16B16A16_UINT]        = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R16G16B16A16_FLOAT]       = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R32G32_FLOAT]             = CAP(  1,   5,   0,   0),
-   [GEN6_FORMAT_R32G32_SINT]              = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R32G32_UINT]              = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R32_FLOAT_X8X24_TYPELESS] = CAP(  1,   5,   1,   0),
-   [GEN6_FORMAT_X32_TYPELESS_G8X24_UINT]  = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_L32A32_FLOAT]             = CAP(  1,   5,   0,   0),
-   [GEN6_FORMAT_R16G16B16X16_UNORM]       = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R16G16B16X16_FLOAT]       = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_A32X32_FLOAT]             = CAP(  1,   5,   0,   0),
-   [GEN6_FORMAT_L32X32_FLOAT]             = CAP(  1,   5,   0,   0),
-   [GEN6_FORMAT_I32X32_FLOAT]             = CAP(  1,   5,   0,   0),
-   [GEN6_FORMAT_B8G8R8A8_UNORM]           = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_B8G8R8A8_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R10G10B10A2_UNORM]        = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R10G10B10A2_UNORM_SRGB]   = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R10G10B10A2_UINT]         = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R8G8B8A8_UNORM]           = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R8G8B8A8_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R8G8B8A8_SNORM]           = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R8G8B8A8_SINT]            = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R8G8B8A8_UINT]            = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R16G16_UNORM]             = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R16G16_SNORM]             = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R16G16_SINT]              = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R16G16_UINT]              = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R16G16_FLOAT]             = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_B10G10R10A2_UNORM]        = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_B10G10R10A2_UNORM_SRGB]   = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R11G11B10_FLOAT]          = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R32_SINT]                 = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R32_UINT]                 = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R32_FLOAT]                = CAP(  1,   5,   1,   0),
-   [GEN6_FORMAT_R24_UNORM_X8_TYPELESS]    = CAP(  1,   5,   1,   0),
-   [GEN6_FORMAT_X24_TYPELESS_G8_UINT]     = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_L16A16_UNORM]             = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_I24X8_UNORM]              = CAP(  1,   5,   1,   0),
-   [GEN6_FORMAT_L24X8_UNORM]              = CAP(  1,   5,   1,   0),
-   [GEN6_FORMAT_A24X8_UNORM]              = CAP(  1,   5,   1,   0),
-   [GEN6_FORMAT_I32_FLOAT]                = CAP(  1,   5,   1,   0),
-   [GEN6_FORMAT_L32_FLOAT]                = CAP(  1,   5,   1,   0),
-   [GEN6_FORMAT_A32_FLOAT]                = CAP(  1,   5,   1,   0),
-   [GEN6_FORMAT_B8G8R8X8_UNORM]           = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_B8G8R8X8_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R8G8B8X8_UNORM]           = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R8G8B8X8_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R9G9B9E5_SHAREDEXP]       = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_B10G10R10X2_UNORM]        = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_L16A16_FLOAT]             = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_B5G6R5_UNORM]             = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_B5G6R5_UNORM_SRGB]        = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_B5G5R5A1_UNORM]           = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_B5G5R5A1_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_B4G4R4A4_UNORM]           = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_B4G4R4A4_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R8G8_UNORM]               = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R8G8_SNORM]               = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_R8G8_SINT]                = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R8G8_UINT]                = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R16_UNORM]                = CAP(  1,   1,   1,   0),
-   [GEN6_FORMAT_R16_SNORM]                = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R16_SINT]                 = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R16_UINT]                 = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R16_FLOAT]                = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_A8P8_UNORM_PALETTE0]      = CAP(  5,   5,   0,   0),
-   [GEN6_FORMAT_A8P8_UNORM_PALETTE1]      = CAP(  5,   5,   0,   0),
-   [GEN6_FORMAT_I16_UNORM]                = CAP(  1,   1,   1,   0),
-   [GEN6_FORMAT_L16_UNORM]                = CAP(  1,   1,   1,   0),
-   [GEN6_FORMAT_A16_UNORM]                = CAP(  1,   1,   1,   0),
-   [GEN6_FORMAT_L8A8_UNORM]               = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_I16_FLOAT]                = CAP(  1,   1,   1,   0),
-   [GEN6_FORMAT_L16_FLOAT]                = CAP(  1,   1,   1,   0),
-   [GEN6_FORMAT_A16_FLOAT]                = CAP(  1,   1,   1,   0),
-   [GEN6_FORMAT_L8A8_UNORM_SRGB]          = CAP(4.5, 4.5,   0,   0),
-   [GEN6_FORMAT_R5G5_SNORM_B6_UNORM]      = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_P8A8_UNORM_PALETTE0]      = CAP(  5,   5,   0,   0),
-   [GEN6_FORMAT_P8A8_UNORM_PALETTE1]      = CAP(  5,   5,   0,   0),
-   [GEN6_FORMAT_R8_UNORM]                 = CAP(  1,   1,   0, 4.5),
-   [GEN6_FORMAT_R8_SNORM]                 = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R8_SINT]                  = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_R8_UINT]                  = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_A8_UNORM]                 = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_I8_UNORM]                 = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_L8_UNORM]                 = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_P4A4_UNORM_PALETTE0]      = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_A4P4_UNORM_PALETTE0]      = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_P8_UNORM_PALETTE0]        = CAP(4.5, 4.5,   0,   0),
-   [GEN6_FORMAT_L8_UNORM_SRGB]            = CAP(4.5, 4.5,   0,   0),
-   [GEN6_FORMAT_P8_UNORM_PALETTE1]        = CAP(4.5, 4.5,   0,   0),
-   [GEN6_FORMAT_P4A4_UNORM_PALETTE1]      = CAP(4.5, 4.5,   0,   0),
-   [GEN6_FORMAT_A4P4_UNORM_PALETTE1]      = CAP(4.5, 4.5,   0,   0),
-   [GEN6_FORMAT_DXT1_RGB_SRGB]            = CAP(4.5, 4.5,   0,   0),
-   [GEN6_FORMAT_R1_UNORM]                 = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_YCRCB_NORMAL]             = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_YCRCB_SWAPUVY]            = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_P2_UNORM_PALETTE0]        = CAP(4.5, 4.5,   0,   0),
-   [GEN6_FORMAT_P2_UNORM_PALETTE1]        = CAP(4.5, 4.5,   0,   0),
-   [GEN6_FORMAT_BC1_UNORM]                = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_BC2_UNORM]                = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_BC3_UNORM]                = CAP(  1,   1,   0,   1),
-   [GEN6_FORMAT_BC4_UNORM]                = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_BC5_UNORM]                = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_BC1_UNORM_SRGB]           = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_BC2_UNORM_SRGB]           = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_BC3_UNORM_SRGB]           = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_MONO8]                    = CAP(  1,   0,   0,   0),
-   [GEN6_FORMAT_YCRCB_SWAPUV]             = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_YCRCB_SWAPY]              = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_DXT1_RGB]                 = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_FXT1]                     = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_BC4_SNORM]                = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_BC5_SNORM]                = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R16G16B16_FLOAT]          = CAP(  5,   5,   0,   0),
-   [GEN6_FORMAT_BC6H_SF16]                = CAP(  7,   7,   0,   0),
-   [GEN6_FORMAT_BC7_UNORM]                = CAP(  7,   7,   0,   0),
-   [GEN6_FORMAT_BC7_UNORM_SRGB]           = CAP(  7,   7,   0,   0),
-   [GEN6_FORMAT_BC6H_UF16]                = CAP(  7,   7,   0,   0),
-#undef CAP
-};
-
-/*
- * This table is based on:
- *
- *  - the Sandy Bridge PRM, volume 4 part 1, page 88-97
- *  - the Ivy Bridge PRM, volume 4 part 1, page 172, 252-253, and 277-278
- *  - the Haswell PRM, volume 7, page 262-264
- */
-static const struct ilo_dp_cap ilo_dp_caps[] = {
-#define CAP(rt_write, rt_write_blending, typed_write, media_color_processing) \
-   { ILO_GEN(rt_write), ILO_GEN(rt_write_blending), ILO_GEN(typed_write), ILO_GEN(media_color_processing) }
-   [GEN6_FORMAT_R32G32B32A32_FLOAT]       = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_R32G32B32A32_SINT]        = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R32G32B32A32_UINT]        = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R16G16B16A16_UNORM]       = CAP(  1, 4.5,   7,   6),
-   [GEN6_FORMAT_R16G16B16A16_SNORM]       = CAP(  1,   6,   7,   0),
-   [GEN6_FORMAT_R16G16B16A16_SINT]        = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R16G16B16A16_UINT]        = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R16G16B16A16_FLOAT]       = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_R32G32_FLOAT]             = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_R32G32_SINT]              = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R32G32_UINT]              = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_B8G8R8A8_UNORM]           = CAP(  1,   1,   7,   6),
-   [GEN6_FORMAT_B8G8R8A8_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R10G10B10A2_UNORM]        = CAP(  1,   1,   7,   6),
-   [GEN6_FORMAT_R10G10B10A2_UNORM_SRGB]   = CAP(  0,   0,   0,   6),
-   [GEN6_FORMAT_R10G10B10A2_UINT]         = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R8G8B8A8_UNORM]           = CAP(  1,   1,   7,   6),
-   [GEN6_FORMAT_R8G8B8A8_UNORM_SRGB]      = CAP(  1,   1,   0,   6),
-   [GEN6_FORMAT_R8G8B8A8_SNORM]           = CAP(  1,   6,   7,   0),
-   [GEN6_FORMAT_R8G8B8A8_SINT]            = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R8G8B8A8_UINT]            = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R16G16_UNORM]             = CAP(  1, 4.5,   7,   0),
-   [GEN6_FORMAT_R16G16_SNORM]             = CAP(  1,   6,   7,   0),
-   [GEN6_FORMAT_R16G16_SINT]              = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R16G16_UINT]              = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R16G16_FLOAT]             = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_B10G10R10A2_UNORM]        = CAP(  1,   1,   7,   6),
-   [GEN6_FORMAT_B10G10R10A2_UNORM_SRGB]   = CAP(  1,   1,   0,   6),
-   [GEN6_FORMAT_R11G11B10_FLOAT]          = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_R32_SINT]                 = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R32_UINT]                 = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R32_FLOAT]                = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_B8G8R8X8_UNORM]           = CAP(  0,   0,   0,   6),
-   [GEN6_FORMAT_B5G6R5_UNORM]             = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_B5G6R5_UNORM_SRGB]        = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_B5G5R5A1_UNORM]           = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_B5G5R5A1_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_B4G4R4A4_UNORM]           = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_B4G4R4A4_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R8G8_UNORM]               = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_R8G8_SNORM]               = CAP(  1,   6,   7,   0),
-   [GEN6_FORMAT_R8G8_SINT]                = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R8G8_UINT]                = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R16_UNORM]                = CAP(  1, 4.5,   7,   7),
-   [GEN6_FORMAT_R16_SNORM]                = CAP(  1,   6,   7,   0),
-   [GEN6_FORMAT_R16_SINT]                 = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R16_UINT]                 = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R16_FLOAT]                = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_B5G5R5X1_UNORM]           = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_B5G5R5X1_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
-   [GEN6_FORMAT_R8_UNORM]                 = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_R8_SNORM]                 = CAP(  1,   6,   7,   0),
-   [GEN6_FORMAT_R8_SINT]                  = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_R8_UINT]                  = CAP(  1,   0,   7,   0),
-   [GEN6_FORMAT_A8_UNORM]                 = CAP(  1,   1,   7,   0),
-   [GEN6_FORMAT_YCRCB_NORMAL]             = CAP(  1,   0,   0,   6),
-   [GEN6_FORMAT_YCRCB_SWAPUVY]            = CAP(  1,   0,   0,   6),
-   [GEN6_FORMAT_YCRCB_SWAPUV]             = CAP(  1,   0,   0,   6),
-   [GEN6_FORMAT_YCRCB_SWAPY]              = CAP(  1,   0,   0,   6),
-#undef CAP
-};
-
-bool
-ilo_format_support_vb(const struct ilo_dev *dev,
-                      enum pipe_format format)
-{
-   const int idx = ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER);
-   const struct ilo_vf_cap *cap = (idx >= 0 && idx < Elements(ilo_vf_caps)) ?
-      &ilo_vf_caps[idx] : NULL;
-
-   return (cap && cap->vertex_element &&
-         ilo_dev_gen(dev) >= cap->vertex_element);
-}
-
-bool
-ilo_format_support_sol(const struct ilo_dev *dev,
-                       enum pipe_format format)
-{
-   const int idx = ilo_format_translate(dev, format, PIPE_BIND_STREAM_OUTPUT);
-   const struct ilo_sol_cap *cap = (idx >= 0 && idx < Elements(ilo_sol_caps)) ?
-      &ilo_sol_caps[idx] : NULL;
-
-   return (cap && cap->buffer && ilo_dev_gen(dev) >= cap->buffer);
-}
-
-bool
-ilo_format_support_sampler(const struct ilo_dev *dev,
-                           enum pipe_format format)
-{
-   const int idx = ilo_format_translate(dev, format, PIPE_BIND_SAMPLER_VIEW);
-   const struct ilo_sampler_cap *cap = (idx >= 0 &&
-         idx < Elements(ilo_sampler_caps)) ? &ilo_sampler_caps[idx] : NULL;
-
-   if (!cap || !cap->sampling)
-      return false;
-
-   assert(!cap->filtering || cap->filtering >= cap->sampling);
-
-   if (util_format_is_pure_integer(format))
-      return (ilo_dev_gen(dev) >= cap->sampling);
-   else if (cap->filtering)
-      return (ilo_dev_gen(dev) >= cap->filtering);
-   else
-      return false;
-}
-
-bool
-ilo_format_support_rt(const struct ilo_dev *dev,
-                      enum pipe_format format)
-{
-   const int idx = ilo_format_translate(dev, format, PIPE_BIND_RENDER_TARGET);
-   const struct ilo_dp_cap *cap = (idx >= 0 && idx < Elements(ilo_dp_caps)) ?
-      &ilo_dp_caps[idx] : NULL;
-
-   if (!cap || !cap->rt_write)
-      return false;
-
-   assert(!cap->rt_write_blending || cap->rt_write_blending >= cap->rt_write);
-
-   if (util_format_is_pure_integer(format))
-      return (ilo_dev_gen(dev) >= cap->rt_write);
-   else if (cap->rt_write_blending)
-      return (ilo_dev_gen(dev) >= cap->rt_write_blending);
-   else
-      return false;
-}
-
-bool
-ilo_format_support_zs(const struct ilo_dev *dev,
-                      enum pipe_format format)
-{
-   switch (format) {
-   case PIPE_FORMAT_Z16_UNORM:
-   case PIPE_FORMAT_Z24X8_UNORM:
-   case PIPE_FORMAT_Z32_FLOAT:
-   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-      return true;
-   case PIPE_FORMAT_S8_UINT:
-      /* TODO separate stencil */
-   default:
-      return false;
-   }
-}
-
-/**
- * Translate a color (non-depth/stencil) pipe format to the matching hardware
- * format.  Return -1 on errors.
- */
-int
-ilo_format_translate_color(const struct ilo_dev *dev,
-                           enum pipe_format format)
-{
-   static const int format_mapping[PIPE_FORMAT_COUNT] = {
-      [PIPE_FORMAT_NONE]                  = 0,
-      [PIPE_FORMAT_B8G8R8A8_UNORM]        = GEN6_FORMAT_B8G8R8A8_UNORM,
-      [PIPE_FORMAT_B8G8R8X8_UNORM]        = GEN6_FORMAT_B8G8R8X8_UNORM,
-      [PIPE_FORMAT_A8R8G8B8_UNORM]        = 0,
-      [PIPE_FORMAT_X8R8G8B8_UNORM]        = 0,
-      [PIPE_FORMAT_B5G5R5A1_UNORM]        = GEN6_FORMAT_B5G5R5A1_UNORM,
-      [PIPE_FORMAT_B4G4R4A4_UNORM]        = GEN6_FORMAT_B4G4R4A4_UNORM,
-      [PIPE_FORMAT_B5G6R5_UNORM]          = GEN6_FORMAT_B5G6R5_UNORM,
-      [PIPE_FORMAT_R10G10B10A2_UNORM]     = GEN6_FORMAT_R10G10B10A2_UNORM,
-      [PIPE_FORMAT_L8_UNORM]              = GEN6_FORMAT_L8_UNORM,
-      [PIPE_FORMAT_A8_UNORM]              = GEN6_FORMAT_A8_UNORM,
-      [PIPE_FORMAT_I8_UNORM]              = GEN6_FORMAT_I8_UNORM,
-      [PIPE_FORMAT_L8A8_UNORM]            = GEN6_FORMAT_L8A8_UNORM,
-      [PIPE_FORMAT_L16_UNORM]             = GEN6_FORMAT_L16_UNORM,
-      [PIPE_FORMAT_UYVY]                  = GEN6_FORMAT_YCRCB_SWAPUVY,
-      [PIPE_FORMAT_YUYV]                  = GEN6_FORMAT_YCRCB_NORMAL,
-      [PIPE_FORMAT_Z16_UNORM]             = 0,
-      [PIPE_FORMAT_Z32_UNORM]             = 0,
-      [PIPE_FORMAT_Z32_FLOAT]             = 0,
-      [PIPE_FORMAT_Z24_UNORM_S8_UINT]     = 0,
-      [PIPE_FORMAT_S8_UINT_Z24_UNORM]     = 0,
-      [PIPE_FORMAT_Z24X8_UNORM]           = 0,
-      [PIPE_FORMAT_X8Z24_UNORM]           = 0,
-      [PIPE_FORMAT_S8_UINT]               = 0,
-      [PIPE_FORMAT_R64_FLOAT]             = GEN6_FORMAT_R64_FLOAT,
-      [PIPE_FORMAT_R64G64_FLOAT]          = GEN6_FORMAT_R64G64_FLOAT,
-      [PIPE_FORMAT_R64G64B64_FLOAT]       = GEN6_FORMAT_R64G64B64_FLOAT,
-      [PIPE_FORMAT_R64G64B64A64_FLOAT]    = GEN6_FORMAT_R64G64B64A64_FLOAT,
-      [PIPE_FORMAT_R32_FLOAT]             = GEN6_FORMAT_R32_FLOAT,
-      [PIPE_FORMAT_R32G32_FLOAT]          = GEN6_FORMAT_R32G32_FLOAT,
-      [PIPE_FORMAT_R32G32B32_FLOAT]       = GEN6_FORMAT_R32G32B32_FLOAT,
-      [PIPE_FORMAT_R32G32B32A32_FLOAT]    = GEN6_FORMAT_R32G32B32A32_FLOAT,
-      [PIPE_FORMAT_R32_UNORM]             = GEN6_FORMAT_R32_UNORM,
-      [PIPE_FORMAT_R32G32_UNORM]          = GEN6_FORMAT_R32G32_UNORM,
-      [PIPE_FORMAT_R32G32B32_UNORM]       = GEN6_FORMAT_R32G32B32_UNORM,
-      [PIPE_FORMAT_R32G32B32A32_UNORM]    = GEN6_FORMAT_R32G32B32A32_UNORM,
-      [PIPE_FORMAT_R32_USCALED]           = GEN6_FORMAT_R32_USCALED,
-      [PIPE_FORMAT_R32G32_USCALED]        = GEN6_FORMAT_R32G32_USCALED,
-      [PIPE_FORMAT_R32G32B32_USCALED]     = GEN6_FORMAT_R32G32B32_USCALED,
-      [PIPE_FORMAT_R32G32B32A32_USCALED]  = GEN6_FORMAT_R32G32B32A32_USCALED,
-      [PIPE_FORMAT_R32_SNORM]             = GEN6_FORMAT_R32_SNORM,
-      [PIPE_FORMAT_R32G32_SNORM]          = GEN6_FORMAT_R32G32_SNORM,
-      [PIPE_FORMAT_R32G32B32_SNORM]       = GEN6_FORMAT_R32G32B32_SNORM,
-      [PIPE_FORMAT_R32G32B32A32_SNORM]    = GEN6_FORMAT_R32G32B32A32_SNORM,
-      [PIPE_FORMAT_R32_SSCALED]           = GEN6_FORMAT_R32_SSCALED,
-      [PIPE_FORMAT_R32G32_SSCALED]        = GEN6_FORMAT_R32G32_SSCALED,
-      [PIPE_FORMAT_R32G32B32_SSCALED]     = GEN6_FORMAT_R32G32B32_SSCALED,
-      [PIPE_FORMAT_R32G32B32A32_SSCALED]  = GEN6_FORMAT_R32G32B32A32_SSCALED,
-      [PIPE_FORMAT_R16_UNORM]             = GEN6_FORMAT_R16_UNORM,
-      [PIPE_FORMAT_R16G16_UNORM]          = GEN6_FORMAT_R16G16_UNORM,
-      [PIPE_FORMAT_R16G16B16_UNORM]       = GEN6_FORMAT_R16G16B16_UNORM,
-      [PIPE_FORMAT_R16G16B16A16_UNORM]    = GEN6_FORMAT_R16G16B16A16_UNORM,
-      [PIPE_FORMAT_R16_USCALED]           = GEN6_FORMAT_R16_USCALED,
-      [PIPE_FORMAT_R16G16_USCALED]        = GEN6_FORMAT_R16G16_USCALED,
-      [PIPE_FORMAT_R16G16B16_USCALED]     = GEN6_FORMAT_R16G16B16_USCALED,
-      [PIPE_FORMAT_R16G16B16A16_USCALED]  = GEN6_FORMAT_R16G16B16A16_USCALED,
-      [PIPE_FORMAT_R16_SNORM]             = GEN6_FORMAT_R16_SNORM,
-      [PIPE_FORMAT_R16G16_SNORM]          = GEN6_FORMAT_R16G16_SNORM,
-      [PIPE_FORMAT_R16G16B16_SNORM]       = GEN6_FORMAT_R16G16B16_SNORM,
-      [PIPE_FORMAT_R16G16B16A16_SNORM]    = GEN6_FORMAT_R16G16B16A16_SNORM,
-      [PIPE_FORMAT_R16_SSCALED]           = GEN6_FORMAT_R16_SSCALED,
-      [PIPE_FORMAT_R16G16_SSCALED]        = GEN6_FORMAT_R16G16_SSCALED,
-      [PIPE_FORMAT_R16G16B16_SSCALED]     = GEN6_FORMAT_R16G16B16_SSCALED,
-      [PIPE_FORMAT_R16G16B16A16_SSCALED]  = GEN6_FORMAT_R16G16B16A16_SSCALED,
-      [PIPE_FORMAT_R8_UNORM]              = GEN6_FORMAT_R8_UNORM,
-      [PIPE_FORMAT_R8G8_UNORM]            = GEN6_FORMAT_R8G8_UNORM,
-      [PIPE_FORMAT_R8G8B8_UNORM]          = GEN6_FORMAT_R8G8B8_UNORM,
-      [PIPE_FORMAT_R8G8B8A8_UNORM]        = GEN6_FORMAT_R8G8B8A8_UNORM,
-      [PIPE_FORMAT_X8B8G8R8_UNORM]        = 0,
-      [PIPE_FORMAT_R8_USCALED]            = GEN6_FORMAT_R8_USCALED,
-      [PIPE_FORMAT_R8G8_USCALED]          = GEN6_FORMAT_R8G8_USCALED,
-      [PIPE_FORMAT_R8G8B8_USCALED]        = GEN6_FORMAT_R8G8B8_USCALED,
-      [PIPE_FORMAT_R8G8B8A8_USCALED]      = GEN6_FORMAT_R8G8B8A8_USCALED,
-      [PIPE_FORMAT_R8_SNORM]              = GEN6_FORMAT_R8_SNORM,
-      [PIPE_FORMAT_R8G8_SNORM]            = GEN6_FORMAT_R8G8_SNORM,
-      [PIPE_FORMAT_R8G8B8_SNORM]          = GEN6_FORMAT_R8G8B8_SNORM,
-      [PIPE_FORMAT_R8G8B8A8_SNORM]        = GEN6_FORMAT_R8G8B8A8_SNORM,
-      [PIPE_FORMAT_R8_SSCALED]            = GEN6_FORMAT_R8_SSCALED,
-      [PIPE_FORMAT_R8G8_SSCALED]          = GEN6_FORMAT_R8G8_SSCALED,
-      [PIPE_FORMAT_R8G8B8_SSCALED]        = GEN6_FORMAT_R8G8B8_SSCALED,
-      [PIPE_FORMAT_R8G8B8A8_SSCALED]      = GEN6_FORMAT_R8G8B8A8_SSCALED,
-      [PIPE_FORMAT_R32_FIXED]             = GEN6_FORMAT_R32_SFIXED,
-      [PIPE_FORMAT_R32G32_FIXED]          = GEN6_FORMAT_R32G32_SFIXED,
-      [PIPE_FORMAT_R32G32B32_FIXED]       = GEN6_FORMAT_R32G32B32_SFIXED,
-      [PIPE_FORMAT_R32G32B32A32_FIXED]    = GEN6_FORMAT_R32G32B32A32_SFIXED,
-      [PIPE_FORMAT_R16_FLOAT]             = GEN6_FORMAT_R16_FLOAT,
-      [PIPE_FORMAT_R16G16_FLOAT]          = GEN6_FORMAT_R16G16_FLOAT,
-      [PIPE_FORMAT_R16G16B16_FLOAT]       = GEN6_FORMAT_R16G16B16_FLOAT,
-      [PIPE_FORMAT_R16G16B16A16_FLOAT]    = GEN6_FORMAT_R16G16B16A16_FLOAT,
-      [PIPE_FORMAT_L8_SRGB]               = GEN6_FORMAT_L8_UNORM_SRGB,
-      [PIPE_FORMAT_L8A8_SRGB]             = GEN6_FORMAT_L8A8_UNORM_SRGB,
-      [PIPE_FORMAT_R8G8B8_SRGB]           = GEN6_FORMAT_R8G8B8_UNORM_SRGB,
-      [PIPE_FORMAT_A8B8G8R8_SRGB]         = 0,
-      [PIPE_FORMAT_X8B8G8R8_SRGB]         = 0,
-      [PIPE_FORMAT_B8G8R8A8_SRGB]         = GEN6_FORMAT_B8G8R8A8_UNORM_SRGB,
-      [PIPE_FORMAT_B8G8R8X8_SRGB]         = GEN6_FORMAT_B8G8R8X8_UNORM_SRGB,
-      [PIPE_FORMAT_A8R8G8B8_SRGB]         = 0,
-      [PIPE_FORMAT_X8R8G8B8_SRGB]         = 0,
-      [PIPE_FORMAT_R8G8B8A8_SRGB]         = GEN6_FORMAT_R8G8B8A8_UNORM_SRGB,
-      [PIPE_FORMAT_DXT1_RGB]              = GEN6_FORMAT_DXT1_RGB,
-      [PIPE_FORMAT_DXT1_RGBA]             = GEN6_FORMAT_BC1_UNORM,
-      [PIPE_FORMAT_DXT3_RGBA]             = GEN6_FORMAT_BC2_UNORM,
-      [PIPE_FORMAT_DXT5_RGBA]             = GEN6_FORMAT_BC3_UNORM,
-      [PIPE_FORMAT_DXT1_SRGB]             = GEN6_FORMAT_DXT1_RGB_SRGB,
-      [PIPE_FORMAT_DXT1_SRGBA]            = GEN6_FORMAT_BC1_UNORM_SRGB,
-      [PIPE_FORMAT_DXT3_SRGBA]            = GEN6_FORMAT_BC2_UNORM_SRGB,
-      [PIPE_FORMAT_DXT5_SRGBA]            = GEN6_FORMAT_BC3_UNORM_SRGB,
-      [PIPE_FORMAT_RGTC1_UNORM]           = GEN6_FORMAT_BC4_UNORM,
-      [PIPE_FORMAT_RGTC1_SNORM]           = GEN6_FORMAT_BC4_SNORM,
-      [PIPE_FORMAT_RGTC2_UNORM]           = GEN6_FORMAT_BC5_UNORM,
-      [PIPE_FORMAT_RGTC2_SNORM]           = GEN6_FORMAT_BC5_SNORM,
-      [PIPE_FORMAT_R8G8_B8G8_UNORM]       = 0,
-      [PIPE_FORMAT_G8R8_G8B8_UNORM]       = 0,
-      [PIPE_FORMAT_R8SG8SB8UX8U_NORM]     = 0,
-      [PIPE_FORMAT_R5SG5SB6U_NORM]        = 0,
-      [PIPE_FORMAT_A8B8G8R8_UNORM]        = 0,
-      [PIPE_FORMAT_B5G5R5X1_UNORM]        = GEN6_FORMAT_B5G5R5X1_UNORM,
-      [PIPE_FORMAT_R10G10B10A2_USCALED]   = GEN6_FORMAT_R10G10B10A2_USCALED,
-      [PIPE_FORMAT_R11G11B10_FLOAT]       = GEN6_FORMAT_R11G11B10_FLOAT,
-      [PIPE_FORMAT_R9G9B9E5_FLOAT]        = GEN6_FORMAT_R9G9B9E5_SHAREDEXP,
-      [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT]  = 0,
-      [PIPE_FORMAT_R1_UNORM]              = GEN6_FORMAT_R1_UNORM,
-      [PIPE_FORMAT_R10G10B10X2_USCALED]   = GEN6_FORMAT_R10G10B10X2_USCALED,
-      [PIPE_FORMAT_R10G10B10X2_SNORM]     = 0,
-      [PIPE_FORMAT_L4A4_UNORM]            = 0,
-      [PIPE_FORMAT_B10G10R10A2_UNORM]     = GEN6_FORMAT_B10G10R10A2_UNORM,
-      [PIPE_FORMAT_R10SG10SB10SA2U_NORM]  = 0,
-      [PIPE_FORMAT_R8G8Bx_SNORM]          = 0,
-      [PIPE_FORMAT_R8G8B8X8_UNORM]        = GEN6_FORMAT_R8G8B8X8_UNORM,
-      [PIPE_FORMAT_B4G4R4X4_UNORM]        = 0,
-      [PIPE_FORMAT_X24S8_UINT]            = 0,
-      [PIPE_FORMAT_S8X24_UINT]            = 0,
-      [PIPE_FORMAT_X32_S8X24_UINT]        = 0,
-      [PIPE_FORMAT_B2G3R3_UNORM]          = 0,
-      [PIPE_FORMAT_L16A16_UNORM]          = GEN6_FORMAT_L16A16_UNORM,
-      [PIPE_FORMAT_A16_UNORM]             = GEN6_FORMAT_A16_UNORM,
-      [PIPE_FORMAT_I16_UNORM]             = GEN6_FORMAT_I16_UNORM,
-      [PIPE_FORMAT_LATC1_UNORM]           = 0,
-      [PIPE_FORMAT_LATC1_SNORM]           = 0,
-      [PIPE_FORMAT_LATC2_UNORM]           = 0,
-      [PIPE_FORMAT_LATC2_SNORM]           = 0,
-      [PIPE_FORMAT_A8_SNORM]              = 0,
-      [PIPE_FORMAT_L8_SNORM]              = 0,
-      [PIPE_FORMAT_L8A8_SNORM]            = 0,
-      [PIPE_FORMAT_I8_SNORM]              = 0,
-      [PIPE_FORMAT_A16_SNORM]             = 0,
-      [PIPE_FORMAT_L16_SNORM]             = 0,
-      [PIPE_FORMAT_L16A16_SNORM]          = 0,
-      [PIPE_FORMAT_I16_SNORM]             = 0,
-      [PIPE_FORMAT_A16_FLOAT]             = GEN6_FORMAT_A16_FLOAT,
-      [PIPE_FORMAT_L16_FLOAT]             = GEN6_FORMAT_L16_FLOAT,
-      [PIPE_FORMAT_L16A16_FLOAT]          = GEN6_FORMAT_L16A16_FLOAT,
-      [PIPE_FORMAT_I16_FLOAT]             = GEN6_FORMAT_I16_FLOAT,
-      [PIPE_FORMAT_A32_FLOAT]             = GEN6_FORMAT_A32_FLOAT,
-      [PIPE_FORMAT_L32_FLOAT]             = GEN6_FORMAT_L32_FLOAT,
-      [PIPE_FORMAT_L32A32_FLOAT]          = GEN6_FORMAT_L32A32_FLOAT,
-      [PIPE_FORMAT_I32_FLOAT]             = GEN6_FORMAT_I32_FLOAT,
-      [PIPE_FORMAT_YV12]                  = 0,
-      [PIPE_FORMAT_YV16]                  = 0,
-      [PIPE_FORMAT_IYUV]                  = 0,
-      [PIPE_FORMAT_NV12]                  = 0,
-      [PIPE_FORMAT_NV21]                  = 0,
-      [PIPE_FORMAT_A4R4_UNORM]            = 0,
-      [PIPE_FORMAT_R4A4_UNORM]            = 0,
-      [PIPE_FORMAT_R8A8_UNORM]            = 0,
-      [PIPE_FORMAT_A8R8_UNORM]            = 0,
-      [PIPE_FORMAT_R10G10B10A2_SSCALED]   = GEN6_FORMAT_R10G10B10A2_SSCALED,
-      [PIPE_FORMAT_R10G10B10A2_SNORM]     = GEN6_FORMAT_R10G10B10A2_SNORM,
-      [PIPE_FORMAT_B10G10R10A2_USCALED]   = GEN6_FORMAT_B10G10R10A2_USCALED,
-      [PIPE_FORMAT_B10G10R10A2_SSCALED]   = GEN6_FORMAT_B10G10R10A2_SSCALED,
-      [PIPE_FORMAT_B10G10R10A2_SNORM]     = GEN6_FORMAT_B10G10R10A2_SNORM,
-      [PIPE_FORMAT_R8_UINT]               = GEN6_FORMAT_R8_UINT,
-      [PIPE_FORMAT_R8G8_UINT]             = GEN6_FORMAT_R8G8_UINT,
-      [PIPE_FORMAT_R8G8B8_UINT]           = GEN6_FORMAT_R8G8B8_UINT,
-      [PIPE_FORMAT_R8G8B8A8_UINT]         = GEN6_FORMAT_R8G8B8A8_UINT,
-      [PIPE_FORMAT_R8_SINT]               = GEN6_FORMAT_R8_SINT,
-      [PIPE_FORMAT_R8G8_SINT]             = GEN6_FORMAT_R8G8_SINT,
-      [PIPE_FORMAT_R8G8B8_SINT]           = GEN6_FORMAT_R8G8B8_SINT,
-      [PIPE_FORMAT_R8G8B8A8_SINT]         = GEN6_FORMAT_R8G8B8A8_SINT,
-      [PIPE_FORMAT_R16_UINT]              = GEN6_FORMAT_R16_UINT,
-      [PIPE_FORMAT_R16G16_UINT]           = GEN6_FORMAT_R16G16_UINT,
-      [PIPE_FORMAT_R16G16B16_UINT]        = GEN6_FORMAT_R16G16B16_UINT,
-      [PIPE_FORMAT_R16G16B16A16_UINT]     = GEN6_FORMAT_R16G16B16A16_UINT,
-      [PIPE_FORMAT_R16_SINT]              = GEN6_FORMAT_R16_SINT,
-      [PIPE_FORMAT_R16G16_SINT]           = GEN6_FORMAT_R16G16_SINT,
-      [PIPE_FORMAT_R16G16B16_SINT]        = GEN6_FORMAT_R16G16B16_SINT,
-      [PIPE_FORMAT_R16G16B16A16_SINT]     = GEN6_FORMAT_R16G16B16A16_SINT,
-      [PIPE_FORMAT_R32_UINT]              = GEN6_FORMAT_R32_UINT,
-      [PIPE_FORMAT_R32G32_UINT]           = GEN6_FORMAT_R32G32_UINT,
-      [PIPE_FORMAT_R32G32B32_UINT]        = GEN6_FORMAT_R32G32B32_UINT,
-      [PIPE_FORMAT_R32G32B32A32_UINT]     = GEN6_FORMAT_R32G32B32A32_UINT,
-      [PIPE_FORMAT_R32_SINT]              = GEN6_FORMAT_R32_SINT,
-      [PIPE_FORMAT_R32G32_SINT]           = GEN6_FORMAT_R32G32_SINT,
-      [PIPE_FORMAT_R32G32B32_SINT]        = GEN6_FORMAT_R32G32B32_SINT,
-      [PIPE_FORMAT_R32G32B32A32_SINT]     = GEN6_FORMAT_R32G32B32A32_SINT,
-      [PIPE_FORMAT_A8_UINT]               = 0,
-      [PIPE_FORMAT_I8_UINT]               = GEN6_FORMAT_I8_UINT,
-      [PIPE_FORMAT_L8_UINT]               = GEN6_FORMAT_L8_UINT,
-      [PIPE_FORMAT_L8A8_UINT]             = GEN6_FORMAT_L8A8_UINT,
-      [PIPE_FORMAT_A8_SINT]               = 0,
-      [PIPE_FORMAT_I8_SINT]               = GEN6_FORMAT_I8_SINT,
-      [PIPE_FORMAT_L8_SINT]               = GEN6_FORMAT_L8_SINT,
-      [PIPE_FORMAT_L8A8_SINT]             = GEN6_FORMAT_L8A8_SINT,
-      [PIPE_FORMAT_A16_UINT]              = 0,
-      [PIPE_FORMAT_I16_UINT]              = 0,
-      [PIPE_FORMAT_L16_UINT]              = 0,
-      [PIPE_FORMAT_L16A16_UINT]           = 0,
-      [PIPE_FORMAT_A16_SINT]              = 0,
-      [PIPE_FORMAT_I16_SINT]              = 0,
-      [PIPE_FORMAT_L16_SINT]              = 0,
-      [PIPE_FORMAT_L16A16_SINT]           = 0,
-      [PIPE_FORMAT_A32_UINT]              = 0,
-      [PIPE_FORMAT_I32_UINT]              = 0,
-      [PIPE_FORMAT_L32_UINT]              = 0,
-      [PIPE_FORMAT_L32A32_UINT]           = 0,
-      [PIPE_FORMAT_A32_SINT]              = 0,
-      [PIPE_FORMAT_I32_SINT]              = 0,
-      [PIPE_FORMAT_L32_SINT]              = 0,
-      [PIPE_FORMAT_L32A32_SINT]           = 0,
-      [PIPE_FORMAT_B10G10R10A2_UINT]      = GEN6_FORMAT_B10G10R10A2_UINT,
-      [PIPE_FORMAT_ETC1_RGB8]             = GEN6_FORMAT_ETC1_RGB8,
-      [PIPE_FORMAT_R8G8_R8B8_UNORM]       = 0,
-      [PIPE_FORMAT_G8R8_B8R8_UNORM]       = 0,
-      [PIPE_FORMAT_R8G8B8X8_SNORM]        = 0,
-      [PIPE_FORMAT_R8G8B8X8_SRGB]         = 0,
-      [PIPE_FORMAT_R8G8B8X8_UINT]         = 0,
-      [PIPE_FORMAT_R8G8B8X8_SINT]         = 0,
-      [PIPE_FORMAT_B10G10R10X2_UNORM]     = GEN6_FORMAT_B10G10R10X2_UNORM,
-      [PIPE_FORMAT_R16G16B16X16_UNORM]    = GEN6_FORMAT_R16G16B16X16_UNORM,
-      [PIPE_FORMAT_R16G16B16X16_SNORM]    = 0,
-      [PIPE_FORMAT_R16G16B16X16_FLOAT]    = GEN6_FORMAT_R16G16B16X16_FLOAT,
-      [PIPE_FORMAT_R16G16B16X16_UINT]     = 0,
-      [PIPE_FORMAT_R16G16B16X16_SINT]     = 0,
-      [PIPE_FORMAT_R32G32B32X32_FLOAT]    = GEN6_FORMAT_R32G32B32X32_FLOAT,
-      [PIPE_FORMAT_R32G32B32X32_UINT]     = 0,
-      [PIPE_FORMAT_R32G32B32X32_SINT]     = 0,
-      [PIPE_FORMAT_R8A8_SNORM]            = 0,
-      [PIPE_FORMAT_R16A16_UNORM]          = 0,
-      [PIPE_FORMAT_R16A16_SNORM]          = 0,
-      [PIPE_FORMAT_R16A16_FLOAT]          = 0,
-      [PIPE_FORMAT_R32A32_FLOAT]          = 0,
-      [PIPE_FORMAT_R8A8_UINT]             = 0,
-      [PIPE_FORMAT_R8A8_SINT]             = 0,
-      [PIPE_FORMAT_R16A16_UINT]           = 0,
-      [PIPE_FORMAT_R16A16_SINT]           = 0,
-      [PIPE_FORMAT_R32A32_UINT]           = 0,
-      [PIPE_FORMAT_R32A32_SINT]           = 0,
-      [PIPE_FORMAT_R10G10B10A2_UINT]      = GEN6_FORMAT_R10G10B10A2_UINT,
-      [PIPE_FORMAT_B5G6R5_SRGB]           = GEN6_FORMAT_B5G6R5_UNORM_SRGB,
-   };
-   int sfmt = format_mapping[format];
-
-   /* GEN6_FORMAT_R32G32B32A32_FLOAT happens to be 0 */
-   if (!sfmt && format != PIPE_FORMAT_R32G32B32A32_FLOAT)
-      sfmt = -1;
-
-   return sfmt;
-}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_format.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_format.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_format.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_format.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,168 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2013 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chia-I Wu <olv@lunarg.com>
- */
-
-#ifndef ILO_FORMAT_H
-#define ILO_FORMAT_H
-
-#include "genhw/genhw.h"
-#include "ilo_core.h"
-#include "ilo_dev.h"
-
-bool
-ilo_format_support_vb(const struct ilo_dev *dev,
-                      enum pipe_format format);
-
-bool
-ilo_format_support_sol(const struct ilo_dev *dev,
-                       enum pipe_format format);
-
-bool
-ilo_format_support_sampler(const struct ilo_dev *dev,
-                           enum pipe_format format);
-
-bool
-ilo_format_support_rt(const struct ilo_dev *dev,
-                      enum pipe_format format);
-
-bool
-ilo_format_support_zs(const struct ilo_dev *dev,
-                      enum pipe_format format);
-
-int
-ilo_format_translate_color(const struct ilo_dev *dev,
-                           enum pipe_format format);
-
-/**
- * Translate a pipe format to a hardware surface format suitable for
- * the given purpose.  Return -1 on errors.
- *
- * This is an inline function not only for performance reasons.  There are
- * caveats that the callers should be aware of before calling this function.
- */
-static inline int
-ilo_format_translate(const struct ilo_dev *dev,
-                     enum pipe_format format, unsigned bind)
-{
-   switch (bind) {
-   case PIPE_BIND_RENDER_TARGET:
-      /*
-       * Some RGBX formats are not supported as render target formats.  But we
-       * can use their RGBA counterparts and force the destination alpha to be
-       * one when blending is enabled.
-       */
-      switch (format) {
-      case PIPE_FORMAT_B8G8R8X8_UNORM:
-         return GEN6_FORMAT_B8G8R8A8_UNORM;
-      default:
-         return ilo_format_translate_color(dev, format);
-      }
-      break;
-   case PIPE_BIND_SAMPLER_VIEW:
-      /*
-       * For depth formats, we want the depth values to be returned as R
-       * values.  But we assume in many places that the depth values are
-       * returned as I values (util_make_fragment_tex_shader_writedepth() is
-       * one such example).  We have to live with that at least for now.
-       *
-       * For ETC1 format, the texture data will be decompressed before being
-       * written to the bo.  See tex_staging_sys_convert_write().
-       */
-      switch (format) {
-      case PIPE_FORMAT_Z16_UNORM:
-         return GEN6_FORMAT_I16_UNORM;
-      case PIPE_FORMAT_Z32_FLOAT:
-         return GEN6_FORMAT_I32_FLOAT;
-      case PIPE_FORMAT_Z24X8_UNORM:
-      case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-         return GEN6_FORMAT_I24X8_UNORM;
-      case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-         return GEN6_FORMAT_I32X32_FLOAT;
-      case PIPE_FORMAT_ETC1_RGB8:
-         return GEN6_FORMAT_R8G8B8X8_UNORM;
-      default:
-         return ilo_format_translate_color(dev, format);
-      }
-      break;
-   case PIPE_BIND_VERTEX_BUFFER:
-      if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
-         return ilo_format_translate_color(dev, format);
-
-      /*
-       * Some 3-component formats are not supported as vertex element formats.
-       * But since we move between vertices using vb->stride, we should be
-       * good to use their 4-component counterparts if we force the W
-       * component to be one.  The only exception is that the vb boundary
-       * check for the last vertex may fail.
-       */
-      switch (format) {
-      case PIPE_FORMAT_R16G16B16_FLOAT:
-         return GEN6_FORMAT_R16G16B16A16_FLOAT;
-      case PIPE_FORMAT_R16G16B16_UINT:
-         return GEN6_FORMAT_R16G16B16A16_UINT;
-      case PIPE_FORMAT_R16G16B16_SINT:
-         return GEN6_FORMAT_R16G16B16A16_SINT;
-      case PIPE_FORMAT_R8G8B8_UINT:
-         return GEN6_FORMAT_R8G8B8A8_UINT;
-      case PIPE_FORMAT_R8G8B8_SINT:
-         return GEN6_FORMAT_R8G8B8A8_SINT;
-      default:
-         return ilo_format_translate_color(dev, format);
-      }
-      break;
-   case PIPE_BIND_STREAM_OUTPUT:
-      return ilo_format_translate_color(dev, format);
-      break;
-   default:
-      assert(!"cannot translate format");
-      break;
-   }
-
-   return -1;
-}
-
-static inline int
-ilo_format_translate_render(const struct ilo_dev *dev,
-                            enum pipe_format format)
-{
-   return ilo_format_translate(dev, format, PIPE_BIND_RENDER_TARGET);
-}
-
-static inline int
-ilo_format_translate_texture(const struct ilo_dev *dev,
-                             enum pipe_format format)
-{
-   return ilo_format_translate(dev, format, PIPE_BIND_SAMPLER_VIEW);
-}
-
-static inline int
-ilo_format_translate_vertex(const struct ilo_dev *dev,
-                            enum pipe_format format)
-{
-   return ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER);
-}
-
-#endif /* ILO_FORMAT_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_image.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_image.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_image.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_image.c	2015-09-16 14:36:09.000000000 +0000
@@ -40,269 +40,356 @@
                         IMAGE_TILING_W)
 };
 
-struct ilo_image_params {
-   const struct ilo_dev *dev;
-   const struct pipe_resource *templ;
-   unsigned valid_tilings;
-
-   bool compressed;
-
-   unsigned h0, h1;
-   unsigned max_x, max_y;
+struct ilo_image_layout {
+   enum ilo_image_walk_type walk;
+   bool interleaved_samples;
+
+   uint8_t valid_tilings;
+   enum gen_surface_tiling tiling;
+
+   enum ilo_image_aux_type aux;
+
+   int align_i;
+   int align_j;
+
+   struct ilo_image_lod *lods;
+   int walk_layer_h0;
+   int walk_layer_h1;
+   int walk_layer_height;
+   int monolithic_width;
+   int monolithic_height;
 };
 
-static void
-img_get_slice_size(const struct ilo_image *img,
-                   const struct ilo_image_params *params,
-                   unsigned level, unsigned *width, unsigned *height)
-{
-   const struct pipe_resource *templ = params->templ;
-   unsigned w, h;
-
-   w = u_minify(img->width0, level);
-   h = u_minify(img->height0, level);
-
-   /*
-    * From the Sandy Bridge PRM, volume 1 part 1, page 114:
-    *
-    *     "The dimensions of the mip maps are first determined by applying the
-    *      sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
-    *      if necessary, they are padded out to compression block boundaries."
-    */
-   w = align(w, img->block_width);
-   h = align(h, img->block_height);
+static enum ilo_image_walk_type
+image_get_gen6_walk(const struct ilo_dev *dev,
+                    const struct ilo_image_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   /* TODO we want LODs to be page-aligned */
+   if (info->type == GEN6_SURFTYPE_3D)
+      return ILO_IMAGE_WALK_3D;
 
    /*
-    * From the Sandy Bridge PRM, volume 1 part 1, page 111:
-    *
-    *     "If the surface is multisampled (4x), these values must be adjusted
-    *      as follows before proceeding:
-    *
-    *        W_L = ceiling(W_L / 2) * 4
-    *        H_L = ceiling(H_L / 2) * 4"
-    *
-    * From the Ivy Bridge PRM, volume 1 part 1, page 108:
-    *
-    *     "If the surface is multisampled and it is a depth or stencil surface
-    *      or Multisampled Surface StorageFormat in SURFACE_STATE is
-    *      MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
-    *      proceeding:
-    *
-    *        #samples  W_L =                    H_L =
-    *        2         ceiling(W_L / 2) * 4     HL [no adjustment]
-    *        4         ceiling(W_L / 2) * 4     ceiling(H_L / 2) * 4
-    *        8         ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 4
-    *        16        ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 8"
-    *
-    * For interleaved samples (4x), where pixels
-    *
-    *   (x, y  ) (x+1, y  )
-    *   (x, y+1) (x+1, y+1)
-    *
-    * would be is occupied by
+    * From the Sandy Bridge PRM, volume 1 part 1, page 115:
     *
-    *   (x, y  , si0) (x+1, y  , si0) (x, y  , si1) (x+1, y  , si1)
-    *   (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
-    *   (x, y  , si2) (x+1, y  , si2) (x, y  , si3) (x+1, y  , si3)
-    *   (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
+    *     "The separate stencil buffer does not support mip mapping, thus the
+    *      storage for LODs other than LOD 0 is not needed. The following
+    *      QPitch equation applies only to the separate stencil buffer:
     *
-    * Thus the need to
+    *        QPitch = h_0"
     *
-    *   w = align(w, 2) * 2;
-    *   y = align(y, 2) * 2;
+    * Use ILO_IMAGE_WALK_LOD and manually offset to the (page-aligned) levels
+    * when bound.
     */
-   if (img->interleaved_samples) {
-      switch (templ->nr_samples) {
-      case 0:
-      case 1:
-         break;
-      case 2:
-         w = align(w, 2) * 2;
-         break;
-      case 4:
-         w = align(w, 2) * 2;
-         h = align(h, 2) * 2;
-         break;
-      case 8:
-         w = align(w, 2) * 4;
-         h = align(h, 2) * 2;
-         break;
-      case 16:
-         w = align(w, 2) * 4;
-         h = align(h, 2) * 4;
-         break;
-      default:
-         assert(!"unsupported sample count");
-         break;
-      }
-   }
+   if (info->bind_zs && info->format == GEN6_FORMAT_R8_UINT)
+      return ILO_IMAGE_WALK_LOD;
+
+   /* compact spacing is not supported otherwise */
+   return ILO_IMAGE_WALK_LAYER;
+}
+
+static enum ilo_image_walk_type
+image_get_gen7_walk(const struct ilo_dev *dev,
+                    const struct ilo_image_info *info)
+{
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   if (info->type == GEN6_SURFTYPE_3D)
+      return ILO_IMAGE_WALK_3D;
 
    /*
-    * From the Ivy Bridge PRM, volume 1 part 1, page 108:
+    * From the Ivy Bridge PRM, volume 1 part 1, page 111:
     *
-    *     "For separate stencil buffer, the width must be mutiplied by 2 and
-    *      height divided by 2..."
+    *     "note that the depth buffer and stencil buffer have an implied value
+    *      of ARYSPC_FULL"
     *
-    * To make things easier (for transfer), we will just double the stencil
-    * stride in 3DSTATE_STENCIL_BUFFER.
+    * From the Ivy Bridge PRM, volume 4 part 1, page 66:
+    *
+    *     "If Multisampled Surface Storage Format is MSFMT_MSS and Number of
+    *      Multisamples is not MULTISAMPLECOUNT_1, this field (Surface Array
+    *      Spacing) must be set to ARYSPC_LOD0."
     */
-   w = align(w, img->align_i);
-   h = align(h, img->align_j);
-
-   *width = w;
-   *height = h;
+   if (info->sample_count > 1)
+      assert(info->level_count == 1);
+   return (info->bind_zs || info->level_count > 1) ?
+      ILO_IMAGE_WALK_LAYER : ILO_IMAGE_WALK_LOD;
 }
 
-static unsigned
-img_get_num_layers(const struct ilo_image *img,
-                   const struct ilo_image_params *params)
+static bool
+image_get_gen6_interleaved_samples(const struct ilo_dev *dev,
+                                   const struct ilo_image_info *info)
 {
-   const struct pipe_resource *templ = params->templ;
-   unsigned num_layers = templ->array_size;
-
-   /* samples of the same index are stored in a layer */
-   if (templ->nr_samples > 1 && !img->interleaved_samples)
-      num_layers *= templ->nr_samples;
+   ILO_DEV_ASSERT(dev, 6, 8);
 
-   return num_layers;
+   /*
+    * Gen6 supports only interleaved samples.  It is not explicitly stated,
+    * but on Gen7+, render targets are expected to be UMS/CMS (samples
+    * non-interleaved) and depth/stencil buffers are expected to be IMS
+    * (samples interleaved).
+    *
+    * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
+    */
+   return (ilo_dev_gen(dev) == ILO_GEN(6) || info->bind_zs);
 }
 
-static void
-img_init_layer_height(struct ilo_image *img,
-                      struct ilo_image_params *params)
+static uint8_t
+image_get_gen6_valid_tilings(const struct ilo_dev *dev,
+                             const struct ilo_image_info *info)
 {
-   const struct pipe_resource *templ = params->templ;
-   unsigned num_layers;
+   uint8_t valid_tilings = IMAGE_TILING_ALL;
 
-   if (img->walk != ILO_IMAGE_WALK_LAYER)
-      return;
+   ILO_DEV_ASSERT(dev, 6, 8);
 
-   num_layers = img_get_num_layers(img, params);
-   if (num_layers <= 1)
-      return;
+   if (info->valid_tilings)
+      valid_tilings &= info->valid_tilings;
 
    /*
-    * From the Sandy Bridge PRM, volume 1 part 1, page 115:
-    *
-    *     "The following equation is used for surface formats other than
-    *      compressed textures:
-    *
-    *        QPitch = (h0 + h1 + 11j)"
-    *
-    *     "The equation for compressed textures (BC* and FXT1 surface formats)
-    *      follows:
-    *
-    *        QPitch = (h0 + h1 + 11j) / 4"
+    * From the Sandy Bridge PRM, volume 1 part 2, page 32:
     *
-    *     "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
-    *      value calculated in the equation above, for every other odd Surface
-    *      Height starting from 1 i.e. 1,5,9,13"
+    *     "Display/Overlay   Y-Major not supported.
+    *                        X-Major required for Async Flips"
+    */
+   if (unlikely(info->bind_scanout))
+      valid_tilings &= IMAGE_TILING_X;
+
+   /*
+    * From the Sandy Bridge PRM, volume 3 part 2, page 158:
     *
-    * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
+    *     "The cursor surface address must be 4K byte aligned. The cursor must
+    *      be in linear memory, it cannot be tiled."
+    */
+   if (unlikely(info->bind_cursor))
+      valid_tilings &= IMAGE_TILING_NONE;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 318:
     *
-    *     "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
-    *      buffer and stencil buffer have an implied value of ARYSPC_FULL):
+    *     "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
+    *      Depth Buffer is not supported."
     *
-    *        QPitch = (h0 + h1 + 12j)
-    *        QPitch = (h0 + h1 + 12j) / 4 (compressed)
+    *     "The Depth Buffer, if tiled, must use Y-Major tiling."
     *
-    *      (There are many typos or missing words here...)"
+    * From the Sandy Bridge PRM, volume 1 part 2, page 22:
     *
-    * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
-    * the base address.  The PRM divides QPitch by 4 for compressed formats
-    * because the block height for those formats are 4, and it wants QPitch to
-    * mean the number of memory rows, as opposed to texel rows, between
-    * slices.  Since we use texel rows everywhere, we do not need to divide
-    * QPitch by 4.
+    *     "W-Major Tile Format is used for separate stencil."
     */
-   img->walk_layer_height = params->h0 + params->h1 +
-      ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * img->align_j;
+   if (info->bind_zs) {
+      if (info->format == GEN6_FORMAT_R8_UINT)
+         valid_tilings &= IMAGE_TILING_W;
+      else
+         valid_tilings &= IMAGE_TILING_Y;
+   }
 
-   if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 &&
-       img->height0 % 4 == 1)
-      img->walk_layer_height += 4;
+   if (info->bind_surface_sampler ||
+       info->bind_surface_dp_render ||
+       info->bind_surface_dp_typed) {
+      /*
+       * From the Haswell PRM, volume 2d, page 233:
+       *
+       *     "If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
+       *      (Tiled Surface) must be TRUE."
+       */
+      if (info->sample_count > 1)
+         valid_tilings &= ~IMAGE_TILING_NONE;
 
-   params->max_y += img->walk_layer_height * (num_layers - 1);
+      if (ilo_dev_gen(dev) < ILO_GEN(8))
+         valid_tilings &= ~IMAGE_TILING_W;
+   }
+
+   if (info->bind_surface_dp_render) {
+      /*
+       * From the Sandy Bridge PRM, volume 1 part 2, page 32:
+       *
+       *     "NOTE: 128BPE Format Color buffer ( render target ) MUST be
+       *      either TileX or Linear."
+       *
+       * From the Haswell PRM, volume 5, page 32:
+       *
+       *     "NOTE: 128 BPP format color buffer (render target) supports
+       *      Linear, TiledX and TiledY."
+       */
+      if (ilo_dev_gen(dev) < ILO_GEN(7.5) && info->block_size == 16)
+         valid_tilings &= ~IMAGE_TILING_Y;
+
+      /*
+       * From the Ivy Bridge PRM, volume 4 part 1, page 63:
+       *
+       *     "This field (Surface Vertical Aligment) must be set to VALIGN_4
+       *      for all tiled Y Render Target surfaces."
+       *
+       *     "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
+       *
+       * R32G32B32_FLOAT is not renderable and we only need an assert() here.
+       */
+      if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5))
+         assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT);
+   }
+
+   return valid_tilings;
 }
 
-static void
-img_init_lods(struct ilo_image *img,
-              struct ilo_image_params *params)
+static uint64_t
+image_get_gen6_estimated_size(const struct ilo_dev *dev,
+                              const struct ilo_image_info *info)
+{
+   /* padding not considered */
+   const uint64_t slice_size = info->width * info->height *
+      info->block_size / (info->block_width * info->block_height);
+   const uint64_t slice_count =
+      info->depth * info->array_size * info->sample_count;
+   const uint64_t estimated_size = slice_size * slice_count;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (info->level_count == 1)
+      return estimated_size;
+   else
+      return estimated_size * 4 / 3;
+}
+
+static enum gen_surface_tiling
+image_get_gen6_tiling(const struct ilo_dev *dev,
+                      const struct ilo_image_info *info,
+                      uint8_t valid_tilings)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   switch (valid_tilings) {
+   case IMAGE_TILING_NONE:
+      return GEN6_TILING_NONE;
+   case IMAGE_TILING_X:
+      return GEN6_TILING_X;
+   case IMAGE_TILING_Y:
+      return GEN6_TILING_Y;
+   case IMAGE_TILING_W:
+      return GEN8_TILING_W;
+   default:
+      break;
+   }
+
+   /*
+    * X-tiling has the property that vertically adjacent pixels are usually in
+    * the same page.  When the image size is less than a page, the image
+    * height is 1, or when the image is not accessed in blocks, there is no
+    * reason to tile.
+    *
+    * Y-tiling is similar, where vertically adjacent pixels are usually in the
+    * same cacheline.
+    */
+   if (valid_tilings & IMAGE_TILING_NONE) {
+      const uint64_t estimated_size =
+         image_get_gen6_estimated_size(dev, info);
+
+      if (info->height == 1 || !(info->bind_surface_sampler ||
+                                 info->bind_surface_dp_render ||
+                                 info->bind_surface_dp_typed))
+         return GEN6_TILING_NONE;
+
+      if (estimated_size <= 64 ||
+          estimated_size > info->prefer_linear_threshold)
+         return GEN6_TILING_NONE;
+
+      if (estimated_size <= 2048)
+         valid_tilings &= ~IMAGE_TILING_X;
+   }
+
+   return (valid_tilings & IMAGE_TILING_Y) ? GEN6_TILING_Y :
+          (valid_tilings & IMAGE_TILING_X) ? GEN6_TILING_X :
+          GEN6_TILING_NONE;
+}
+
+static bool
+image_get_gen6_hiz_enable(const struct ilo_dev *dev,
+                          const struct ilo_image_info *info)
 {
-   const struct pipe_resource *templ = params->templ;
-   unsigned cur_x, cur_y;
-   unsigned lv;
+   ILO_DEV_ASSERT(dev, 6, 8);
 
-   cur_x = 0;
-   cur_y = 0;
-   for (lv = 0; lv <= templ->last_level; lv++) {
-      unsigned lod_w, lod_h;
+   /* depth buffer? */
+   if (!info->bind_zs ||
+       info->format == GEN6_FORMAT_R8_UINT ||
+       info->interleaved_stencil)
+      return false;
 
-      img_get_slice_size(img, params, lv, &lod_w, &lod_h);
+   /* we want to be able to force 8x4 alignments */
+   if (info->type == GEN6_SURFTYPE_1D)
+      return false;
 
-      img->lods[lv].x = cur_x;
-      img->lods[lv].y = cur_y;
-      img->lods[lv].slice_width = lod_w;
-      img->lods[lv].slice_height = lod_h;
+   if (info->aux_disable)
+      return false;
 
-      switch (img->walk) {
-      case ILO_IMAGE_WALK_LAYER:
-         /* MIPLAYOUT_BELOW */
-         if (lv == 1)
-            cur_x += lod_w;
-         else
-            cur_y += lod_h;
-         break;
-      case ILO_IMAGE_WALK_LOD:
-         lod_h *= img_get_num_layers(img, params);
-         if (lv == 1)
-            cur_x += lod_w;
-         else
-            cur_y += lod_h;
+   if (ilo_debug & ILO_DEBUG_NOHIZ)
+      return false;
 
-         /* every LOD begins at tile boundaries */
-         if (templ->last_level > 0) {
-            assert(img->format == PIPE_FORMAT_S8_UINT);
-            cur_x = align(cur_x, 64);
-            cur_y = align(cur_y, 64);
-         }
-         break;
-      case ILO_IMAGE_WALK_3D:
-         {
-            const unsigned num_slices = u_minify(templ->depth0, lv);
-            const unsigned num_slices_per_row = 1 << lv;
-            const unsigned num_rows =
-               (num_slices + num_slices_per_row - 1) / num_slices_per_row;
+   return true;
+}
 
-            lod_w *= num_slices_per_row;
-            lod_h *= num_rows;
+static bool
+image_get_gen7_mcs_enable(const struct ilo_dev *dev,
+                          const struct ilo_image_info *info,
+                          enum gen_surface_tiling tiling)
+{
+   ILO_DEV_ASSERT(dev, 7, 8);
 
-            cur_y += lod_h;
-         }
-         break;
-      }
+   if (!info->bind_surface_sampler && !info->bind_surface_dp_render)
+      return false;
 
-      if (params->max_x < img->lods[lv].x + lod_w)
-         params->max_x = img->lods[lv].x + lod_w;
-      if (params->max_y < img->lods[lv].y + lod_h)
-         params->max_y = img->lods[lv].y + lod_h;
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 77:
+    *
+    *     "For Render Target and Sampling Engine Surfaces:If the surface is
+    *      multisampled (Number of Multisamples any value other than
+    *      MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
+    *
+    *     "This field must be set to 0 for all SINT MSRTs when all RT channels
+    *      are not written"
+    */
+   if (info->sample_count > 1) {
+      if (ilo_dev_gen(dev) < ILO_GEN(8))
+         assert(!info->is_integer);
+      return true;
    }
 
-   if (img->walk == ILO_IMAGE_WALK_LAYER) {
-      params->h0 = img->lods[0].slice_height;
+   if (info->aux_disable)
+      return false;
 
-      if (templ->last_level > 0)
-         params->h1 = img->lods[1].slice_height;
-      else
-         img_get_slice_size(img, params, 1, &cur_x, &params->h1);
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 326:
+    *
+    *     "When MCS is buffer is used for color clear of non-multisampler
+    *      render target, the following restrictions apply.
+    *      - Support is limited to tiled render targets.
+    *      - Support is for non-mip-mapped and non-array surface types only.
+    *      - Clear is supported only on the full RT; i.e., no partial clear or
+    *        overlapping clears.
+    *      - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
+    *        64bpp and 128bpp.
+    *      ..."
+    *
+    * How about SURFTYPE_3D?
+    */
+   if (!info->bind_surface_dp_render ||
+       tiling == GEN6_TILING_NONE ||
+       info->level_count > 1 ||
+       info->array_size > 1)
+      return false;
+
+   switch (info->block_size) {
+   case 4:
+   case 8:
+   case 16:
+      return true;
+   default:
+      return false;
    }
 }
 
 static void
-img_init_alignments(struct ilo_image *img,
-                    const struct ilo_image_params *params)
+image_get_gen6_alignments(const struct ilo_dev *dev,
+                          const struct ilo_image_info *info,
+                          int *align_i, int *align_j)
 {
-   const struct pipe_resource *templ = params->templ;
+   ILO_DEV_ASSERT(dev, 6, 6);
 
    /*
     * From the Sandy Bridge PRM, volume 1 part 1, page 113:
@@ -335,13 +422,33 @@
     *
     *                                  align_i        align_j
     *   compressed formats             block width    block height
-    *   PIPE_FORMAT_S8_UINT            4              2
+    *   GEN6_FORMAT_R8_UINT            4              2
     *   other depth/stencil formats    4              4
     *   4x multisampled                4              4
     *   bpp 96                         4              2
     *   others                         4              2 or 4
     */
 
+   *align_i = (info->compressed) ? info->block_width : 4;
+   if (info->compressed) {
+      *align_j = info->block_height;
+   } else if (info->bind_zs) {
+      *align_j = (info->format == GEN6_FORMAT_R8_UINT) ? 2 : 4;
+   } else {
+      *align_j = (info->sample_count > 1 || info->block_size != 12) ? 4 : 2;
+   }
+}
+
+static void
+image_get_gen7_alignments(const struct ilo_dev *dev,
+                          const struct ilo_image_info *info,
+                          enum gen_surface_tiling tiling,
+                          int *align_i, int *align_j)
+{
+   int i, j;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
    /*
     * From the Ivy Bridge PRM, volume 1 part 1, page 110:
     *
@@ -383,458 +490,301 @@
     *
     *                                  align_i        align_j
     *  compressed formats              block width    block height
-    *  PIPE_FORMAT_Z16_UNORM           8              4
-    *  PIPE_FORMAT_S8_UINT             8              8
+    *  GEN6_FORMAT_R16_UNORM           8              4
+    *  GEN6_FORMAT_R8_UINT             8              8
     *  other depth/stencil formats     4              4
     *  2x or 4x multisampled           4 or 8         4
     *  tiled Y                         4 or 8         4 (if rt)
-    *  PIPE_FORMAT_R32G32B32_FLOAT     4 or 8         2
+    *  GEN6_FORMAT_R32G32B32_FLOAT     4 or 8         2
     *  others                          4 or 8         2 or 4
     */
-
-   if (params->compressed) {
-      /* this happens to be the case */
-      img->align_i = img->block_width;
-      img->align_j = img->block_height;
-   } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
-      if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) {
-         switch (img->format) {
-         case PIPE_FORMAT_Z16_UNORM:
-            img->align_i = 8;
-            img->align_j = 4;
-            break;
-         case PIPE_FORMAT_S8_UINT:
-            img->align_i = 8;
-            img->align_j = 8;
-            break;
-         default:
-            img->align_i = 4;
-            img->align_j = 4;
-            break;
-         }
-      } else {
-         switch (img->format) {
-         case PIPE_FORMAT_S8_UINT:
-            img->align_i = 4;
-            img->align_j = 2;
-            break;
-         default:
-            img->align_i = 4;
-            img->align_j = 4;
-            break;
-         }
+   if (info->compressed) {
+      i = info->block_width;
+      j = info->block_height;
+   } else if (info->bind_zs) {
+      switch (info->format) {
+      case GEN6_FORMAT_R16_UNORM:
+         i = 8;
+         j = 4;
+         break;
+      case GEN6_FORMAT_R8_UINT:
+         i = 8;
+         j = 8;
+         break;
+      default:
+         i = 4;
+         j = 4;
+         break;
       }
    } else {
       const bool valign_4 =
-         (templ->nr_samples > 1) ||
-         (ilo_dev_gen(params->dev) >= ILO_GEN(8)) ||
-         (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
-          img->tiling == GEN6_TILING_Y &&
-          (templ->bind & PIPE_BIND_RENDER_TARGET));
-
-      if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
-          ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4)
-         assert(img->format != PIPE_FORMAT_R32G32B32_FLOAT);
+         (info->sample_count > 1 || ilo_dev_gen(dev) >= ILO_GEN(8) ||
+          (tiling == GEN6_TILING_Y && info->bind_surface_dp_render));
 
-      img->align_i = 4;
-      img->align_j = (valign_4) ? 4 : 2;
-   }
+      if (ilo_dev_gen(dev) < ILO_GEN(8) && valign_4)
+         assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT);
 
-   /*
-    * the fact that align i and j are multiples of block width and height
-    * respectively is what makes the size of the bo a multiple of the block
-    * size, slices start at block boundaries, and many of the computations
-    * work.
-    */
-   assert(img->align_i % img->block_width == 0);
-   assert(img->align_j % img->block_height == 0);
+      i = 4;
+      j = (valign_4) ? 4 : 2;
+   }
 
-   /* make sure align() works */
-   assert(util_is_power_of_two(img->align_i) &&
-          util_is_power_of_two(img->align_j));
-   assert(util_is_power_of_two(img->block_width) &&
-          util_is_power_of_two(img->block_height));
+   *align_i = i;
+   *align_j = j;
 }
 
-static void
-img_init_tiling(struct ilo_image *img,
-                const struct ilo_image_params *params)
+static bool
+image_init_gen6_hardware_layout(const struct ilo_dev *dev,
+                                const struct ilo_image_info *info,
+                                struct ilo_image_layout *layout)
 {
-   const struct pipe_resource *templ = params->templ;
-   unsigned preferred_tilings = params->valid_tilings;
-
-   /* no fencing nor BLT support */
-   if (preferred_tilings & ~IMAGE_TILING_W)
-      preferred_tilings &= ~IMAGE_TILING_W;
-
-   if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
-      /*
-       * heuristically set a minimum width/height for enabling tiling
-       */
-      if (img->width0 < 64 && (preferred_tilings & ~IMAGE_TILING_X))
-         preferred_tilings &= ~IMAGE_TILING_X;
-
-      if ((img->width0 < 32 || img->height0 < 16) &&
-          (img->width0 < 16 || img->height0 < 32) &&
-          (preferred_tilings & ~IMAGE_TILING_Y))
-         preferred_tilings &= ~IMAGE_TILING_Y;
-   } else {
-      /* force linear if we are not sure where the texture is bound to */
-      if (preferred_tilings & IMAGE_TILING_NONE)
-         preferred_tilings &= IMAGE_TILING_NONE;
-   }
+   ILO_DEV_ASSERT(dev, 6, 8);
 
-   /* prefer tiled over linear */
-   if (preferred_tilings & IMAGE_TILING_Y)
-      img->tiling = GEN6_TILING_Y;
-   else if (preferred_tilings & IMAGE_TILING_X)
-      img->tiling = GEN6_TILING_X;
-   else if (preferred_tilings & IMAGE_TILING_W)
-      img->tiling = GEN8_TILING_W;
+   if (ilo_dev_gen(dev) >= ILO_GEN(7))
+      layout->walk = image_get_gen7_walk(dev, info);
    else
-      img->tiling = GEN6_TILING_NONE;
-}
+      layout->walk = image_get_gen6_walk(dev, info);
 
-static void
-img_init_walk_gen7(struct ilo_image *img,
-                   const struct ilo_image_params *params)
-{
-   const struct pipe_resource *templ = params->templ;
+   layout->interleaved_samples =
+      image_get_gen6_interleaved_samples(dev, info);
 
-   /*
-    * It is not explicitly states, but render targets are expected to be
-    * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
-    * to be IMS (samples interleaved).
-    *
-    * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
-    */
-   if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
-      /*
-       * From the Ivy Bridge PRM, volume 1 part 1, page 111:
-       *
-       *     "note that the depth buffer and stencil buffer have an implied
-       *      value of ARYSPC_FULL"
-       */
-      img->walk = (templ->target == PIPE_TEXTURE_3D) ?
-         ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER;
+   layout->valid_tilings = image_get_gen6_valid_tilings(dev, info);
+   if (!layout->valid_tilings)
+      return false;
 
-      img->interleaved_samples = true;
-   } else {
-      /*
-       * From the Ivy Bridge PRM, volume 4 part 1, page 66:
-       *
-       *     "If Multisampled Surface Storage Format is MSFMT_MSS and Number
-       *      of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
-       *      Array Spacing) must be set to ARYSPC_LOD0."
-       *
-       * As multisampled resources are not mipmapped, we never use
-       * ARYSPC_FULL for them.
-       */
-      if (templ->nr_samples > 1)
-         assert(templ->last_level == 0);
+   layout->tiling = image_get_gen6_tiling(dev, info, layout->valid_tilings);
 
-      img->walk =
-         (templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
-         (templ->last_level > 0) ? ILO_IMAGE_WALK_LAYER :
-         ILO_IMAGE_WALK_LOD;
+   if (image_get_gen6_hiz_enable(dev, info))
+      layout->aux = ILO_IMAGE_AUX_HIZ;
+   else if (ilo_dev_gen(dev) >= ILO_GEN(7) &&
+            image_get_gen7_mcs_enable(dev, info, layout->tiling))
+      layout->aux = ILO_IMAGE_AUX_MCS;
+   else
+      layout->aux = ILO_IMAGE_AUX_NONE;
 
-      img->interleaved_samples = false;
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      image_get_gen7_alignments(dev, info, layout->tiling,
+            &layout->align_i, &layout->align_j);
+   } else {
+      image_get_gen6_alignments(dev, info,
+            &layout->align_i, &layout->align_j);
    }
+
+   return true;
 }
 
-static void
-img_init_walk_gen6(struct ilo_image *img,
-                   const struct ilo_image_params *params)
-{
-   /*
-    * From the Sandy Bridge PRM, volume 1 part 1, page 115:
-    *
-    *     "The separate stencil buffer does not support mip mapping, thus the
-    *      storage for LODs other than LOD 0 is not needed. The following
-    *      QPitch equation applies only to the separate stencil buffer:
-    *
-    *        QPitch = h_0"
-    *
-    * GEN6 does not support compact spacing otherwise.
-    */
-   img->walk =
-      (params->templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
-      (img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD :
-      ILO_IMAGE_WALK_LAYER;
+static bool
+image_init_gen6_transfer_layout(const struct ilo_dev *dev,
+                                const struct ilo_image_info *info,
+                                struct ilo_image_layout *layout)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* we can define our own layout to save space */
+   layout->walk = ILO_IMAGE_WALK_LOD;
+   layout->interleaved_samples = false;
+   layout->valid_tilings = IMAGE_TILING_NONE;
+   layout->tiling = GEN6_TILING_NONE;
+   layout->aux = ILO_IMAGE_AUX_NONE;
+   layout->align_i = info->block_width;
+   layout->align_j = info->block_height;
 
-   /* GEN6 supports only interleaved samples */
-   img->interleaved_samples = true;
+   return true;
 }
 
 static void
-img_init_walk(struct ilo_image *img,
-              const struct ilo_image_params *params)
+image_get_gen6_slice_size(const struct ilo_dev *dev,
+                          const struct ilo_image_info *info,
+                          const struct ilo_image_layout *layout,
+                          uint8_t level,
+                          int *width, int *height)
 {
-   if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
-      img_init_walk_gen7(img, params);
-   else
-      img_init_walk_gen6(img, params);
-}
+   int w, h;
 
-static unsigned
-img_get_valid_tilings(const struct ilo_image *img,
-                      const struct ilo_image_params *params)
-{
-   const struct pipe_resource *templ = params->templ;
-   const enum pipe_format format = img->format;
-   unsigned valid_tilings = params->valid_tilings;
+   ILO_DEV_ASSERT(dev, 6, 8);
 
-   /*
-    * From the Sandy Bridge PRM, volume 1 part 2, page 32:
-    *
-    *     "Display/Overlay   Y-Major not supported.
-    *                        X-Major required for Async Flips"
-    */
-   if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
-      valid_tilings &= IMAGE_TILING_X;
+   w = u_minify(info->width, level);
+   h = u_minify(info->height, level);
 
    /*
-    * From the Sandy Bridge PRM, volume 3 part 2, page 158:
+    * From the Sandy Bridge PRM, volume 1 part 1, page 114:
     *
-    *     "The cursor surface address must be 4K byte aligned. The cursor must
-    *      be in linear memory, it cannot be tiled."
+    *     "The dimensions of the mip maps are first determined by applying the
+    *      sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
+    *      if necessary, they are padded out to compression block boundaries."
     */
-   if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
-      valid_tilings &= IMAGE_TILING_NONE;
+   w = align(w, info->block_width);
+   h = align(h, info->block_height);
 
    /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 318:
+    * From the Sandy Bridge PRM, volume 1 part 1, page 111:
     *
-    *     "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
-    *      Depth Buffer is not supported."
+    *     "If the surface is multisampled (4x), these values must be adjusted
+    *      as follows before proceeding:
     *
-    *     "The Depth Buffer, if tiled, must use Y-Major tiling."
+    *        W_L = ceiling(W_L / 2) * 4
+    *        H_L = ceiling(H_L / 2) * 4"
     *
-    * From the Sandy Bridge PRM, volume 1 part 2, page 22:
+    * From the Ivy Bridge PRM, volume 1 part 1, page 108:
     *
-    *     "W-Major Tile Format is used for separate stencil."
+    *     "If the surface is multisampled and it is a depth or stencil surface
+    *      or Multisampled Surface StorageFormat in SURFACE_STATE is
+    *      MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
+    *      proceeding:
+    *
+    *        #samples  W_L =                    H_L =
+    *        2         ceiling(W_L / 2) * 4     HL [no adjustment]
+    *        4         ceiling(W_L / 2) * 4     ceiling(H_L / 2) * 4
+    *        8         ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 4
+    *        16        ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 8"
+    *
+    * For interleaved samples (4x), where pixels
+    *
+    *   (x, y  ) (x+1, y  )
+    *   (x, y+1) (x+1, y+1)
+    *
+    * would be is occupied by
+    *
+    *   (x, y  , si0) (x+1, y  , si0) (x, y  , si1) (x+1, y  , si1)
+    *   (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
+    *   (x, y  , si2) (x+1, y  , si2) (x, y  , si3) (x+1, y  , si3)
+    *   (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
+    *
+    * Thus the need to
+    *
+    *   w = align(w, 2) * 2;
+    *   y = align(y, 2) * 2;
     */
-   if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
-      switch (format) {
-      case PIPE_FORMAT_S8_UINT:
-         valid_tilings &= IMAGE_TILING_W;
+   if (layout->interleaved_samples) {
+      switch (info->sample_count) {
+      case 1:
          break;
-      default:
-         valid_tilings &= IMAGE_TILING_Y;
+      case 2:
+         w = align(w, 2) * 2;
          break;
-      }
-   }
-
-   if (templ->bind & PIPE_BIND_RENDER_TARGET) {
-      /*
-       * From the Sandy Bridge PRM, volume 1 part 2, page 32:
-       *
-       *     "NOTE: 128BPE Format Color buffer ( render target ) MUST be
-       *      either TileX or Linear."
-       *
-       * From the Haswell PRM, volume 5, page 32:
-       *
-       *     "NOTE: 128 BPP format color buffer (render target) supports
-       *      Linear, TiledX and TiledY."
-       */
-      if (ilo_dev_gen(params->dev) < ILO_GEN(7.5) && img->block_size == 16)
-         valid_tilings &= ~IMAGE_TILING_Y;
-
-      /*
-       * From the Ivy Bridge PRM, volume 4 part 1, page 63:
-       *
-       *     "This field (Surface Vertical Aligment) must be set to VALIGN_4
-       *      for all tiled Y Render Target surfaces."
-       *
-       *     "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
-       */
-      if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
-          ilo_dev_gen(params->dev) <= ILO_GEN(7.5) &&
-          img->format == PIPE_FORMAT_R32G32B32_FLOAT)
-         valid_tilings &= ~IMAGE_TILING_Y;
-
-      valid_tilings &= ~IMAGE_TILING_W;
-   }
-
-   if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
-      if (ilo_dev_gen(params->dev) < ILO_GEN(8))
-         valid_tilings &= ~IMAGE_TILING_W;
+      case 4:
+         w = align(w, 2) * 2;
+         h = align(h, 2) * 2;
+         break;
+      case 8:
+         w = align(w, 2) * 4;
+         h = align(h, 2) * 2;
+         break;
+      case 16:
+         w = align(w, 2) * 4;
+         h = align(h, 2) * 4;
+         break;
+      default:
+         assert(!"unsupported sample count");
+         break;
+      }
    }
 
-   /* no conflicting binding flags */
-   assert(valid_tilings);
-
-   return valid_tilings;
-}
-
-static void
-img_init_size_and_format(struct ilo_image *img,
-                         struct ilo_image_params *params)
-{
-   const struct pipe_resource *templ = params->templ;
-   enum pipe_format format = templ->format;
-   bool require_separate_stencil = false;
-
-   img->width0 = templ->width0;
-   img->height0 = templ->height0;
-   img->depth0 = templ->depth0;
-   img->sample_count = (templ->nr_samples) ? templ->nr_samples : 1;
-
    /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 317:
+    * From the Ivy Bridge PRM, volume 1 part 1, page 108:
     *
-    *     "This field (Separate Stencil Buffer Enable) must be set to the same
-    *      value (enabled or disabled) as Hierarchical Depth Buffer Enable."
+    *     "For separate stencil buffer, the width must be mutiplied by 2 and
+    *      height divided by 2..."
     *
-    * GEN7+ requires separate stencil buffers.
+    * To make things easier (for transfer), we will just double the stencil
+    * stride in 3DSTATE_STENCIL_BUFFER.
     */
-   if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
-      if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
-         require_separate_stencil = true;
-      else
-         require_separate_stencil = (img->aux.type == ILO_IMAGE_AUX_HIZ);
-   }
-
-   switch (format) {
-   case PIPE_FORMAT_ETC1_RGB8:
-      format = PIPE_FORMAT_R8G8B8X8_UNORM;
-      break;
-   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-      if (require_separate_stencil) {
-         format = PIPE_FORMAT_Z24X8_UNORM;
-         img->separate_stencil = true;
-      }
-      break;
-   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-      if (require_separate_stencil) {
-         format = PIPE_FORMAT_Z32_FLOAT;
-         img->separate_stencil = true;
-      }
-      break;
-   default:
-      break;
-   }
+   w = align(w, layout->align_i);
+   h = align(h, layout->align_j);
 
-   img->format = format;
-   img->block_width = util_format_get_blockwidth(format);
-   img->block_height = util_format_get_blockheight(format);
-   img->block_size = util_format_get_blocksize(format);
-
-   params->valid_tilings = img_get_valid_tilings(img, params);
-   params->compressed = util_format_is_compressed(img->format);
+   *width = w;
+   *height = h;
 }
 
-static bool
-img_want_mcs(const struct ilo_image *img,
-             const struct ilo_image_params *params)
+static int
+image_get_gen6_layer_count(const struct ilo_dev *dev,
+                           const struct ilo_image_info *info,
+                           const struct ilo_image_layout *layout)
 {
-   const struct pipe_resource *templ = params->templ;
-   bool want_mcs = false;
-
-   /* MCS is for RT on GEN7+ */
-   if (ilo_dev_gen(params->dev) < ILO_GEN(7))
-      return false;
+   int count = info->array_size;
 
-   if (templ->target != PIPE_TEXTURE_2D ||
-       !(templ->bind & PIPE_BIND_RENDER_TARGET))
-      return false;
+   ILO_DEV_ASSERT(dev, 6, 8);
 
-   /*
-    * From the Ivy Bridge PRM, volume 4 part 1, page 77:
-    *
-    *     "For Render Target and Sampling Engine Surfaces:If the surface is
-    *      multisampled (Number of Multisamples any value other than
-    *      MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
-    *
-    *     "This field must be set to 0 for all SINT MSRTs when all RT channels
-    *      are not written"
-    */
-   if (templ->nr_samples > 1 && !util_format_is_pure_sint(templ->format)) {
-      want_mcs = true;
-   } else if (templ->nr_samples <= 1) {
-      /*
-       * From the Ivy Bridge PRM, volume 2 part 1, page 326:
-       *
-       *     "When MCS is buffer is used for color clear of non-multisampler
-       *      render target, the following restrictions apply.
-       *      - Support is limited to tiled render targets.
-       *      - Support is for non-mip-mapped and non-array surface types
-       *        only.
-       *      - Clear is supported only on the full RT; i.e., no partial clear
-       *        or overlapping clears.
-       *      - MCS buffer for non-MSRT is supported only for RT formats
-       *        32bpp, 64bpp and 128bpp.
-       *      ..."
-       */
-      if (img->tiling != GEN6_TILING_NONE &&
-          templ->last_level == 0 && templ->array_size == 1) {
-         switch (img->block_size) {
-         case 4:
-         case 8:
-         case 16:
-            want_mcs = true;
-            break;
-         default:
-            break;
-         }
-      }
-   }
+   /* samples of the same index are stored in a layer */
+   if (!layout->interleaved_samples)
+      count *= info->sample_count;
 
-   return want_mcs;
+   return count;
 }
 
-static bool
-img_want_hiz(const struct ilo_image *img,
-             const struct ilo_image_params *params)
+static void
+image_get_gen6_walk_layer_heights(const struct ilo_dev *dev,
+                                  const struct ilo_image_info *info,
+                                  struct ilo_image_layout *layout)
 {
-   const struct pipe_resource *templ = params->templ;
-   const struct util_format_description *desc =
-      util_format_description(templ->format);
+   ILO_DEV_ASSERT(dev, 6, 8);
 
-   if (ilo_debug & ILO_DEBUG_NOHIZ)
-      return false;
-
-   if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL))
-      return false;
+   layout->walk_layer_h0 = layout->lods[0].slice_height;
 
-   if (!util_format_has_depth(desc))
-      return false;
+   if (info->level_count > 1) {
+      layout->walk_layer_h1 = layout->lods[1].slice_height;
+   } else {
+      int dummy;
+      image_get_gen6_slice_size(dev, info, layout, 1,
+            &dummy, &layout->walk_layer_h1);
+   }
 
-   /* no point in having HiZ */
-   if (templ->usage == PIPE_USAGE_STAGING)
-      return false;
+   if (image_get_gen6_layer_count(dev, info, layout) == 1) {
+      layout->walk_layer_height = 0;
+      return;
+   }
 
    /*
-    * As can be seen in img_calculate_hiz_size(), HiZ may not be enabled
-    * for every level.  This is generally fine except on GEN6, where HiZ and
-    * separate stencil are enabled and disabled at the same time.  When the
-    * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
-    * can result in incompatible formats.
-    */
-   if (ilo_dev_gen(params->dev) == ILO_GEN(6) &&
-       templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
-       templ->last_level)
-      return false;
-
-   return true;
-}
+    * From the Sandy Bridge PRM, volume 1 part 1, page 115:
+    *
+    *     "The following equation is used for surface formats other than
+    *      compressed textures:
+    *
+    *        QPitch = (h0 + h1 + 11j)"
+    *
+    *     "The equation for compressed textures (BC* and FXT1 surface formats)
+    *      follows:
+    *
+    *        QPitch = (h0 + h1 + 11j) / 4"
+    *
+    *     "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
+    *      value calculated in the equation above, for every other odd Surface
+    *      Height starting from 1 i.e. 1,5,9,13"
+    *
+    * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
+    *
+    *     "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
+    *      buffer and stencil buffer have an implied value of ARYSPC_FULL):
+    *
+    *        QPitch = (h0 + h1 + 12j)
+    *        QPitch = (h0 + h1 + 12j) / 4 (compressed)
+    *
+    *      (There are many typos or missing words here...)"
+    *
+    * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
+    * the base address.  The PRM divides QPitch by 4 for compressed formats
+    * because the block height for those formats are 4, and it wants QPitch to
+    * mean the number of memory rows, as opposed to texel rows, between
+    * slices.  Since we use texel rows everywhere, we do not need to divide
+    * QPitch by 4.
+    */
+   layout->walk_layer_height = layout->walk_layer_h0 + layout->walk_layer_h1 +
+      ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
 
-static void
-img_init_aux(struct ilo_image *img,
-             const struct ilo_image_params *params)
-{
-   if (img_want_hiz(img, params))
-      img->aux.type = ILO_IMAGE_AUX_HIZ;
-   else if (img_want_mcs(img, params))
-      img->aux.type = ILO_IMAGE_AUX_MCS;
+   if (ilo_dev_gen(dev) == ILO_GEN(6) && info->sample_count > 1 &&
+       info->height % 4 == 1)
+      layout->walk_layer_height += 4;
 }
 
 static void
-img_align(struct ilo_image *img, struct ilo_image_params *params)
+image_get_gen6_monolithic_size(const struct ilo_dev *dev,
+                               const struct ilo_image_info *info,
+                               struct ilo_image_layout *layout,
+                               int max_x, int max_y)
 {
-   const struct pipe_resource *templ = params->templ;
    int align_w = 1, align_h = 1, pad_h = 0;
 
+   ILO_DEV_ASSERT(dev, 6, 8);
+
    /*
     * From the Sandy Bridge PRM, volume 1 part 1, page 118:
     *
@@ -857,15 +807,15 @@
     *      padding purposes. The value of 4 for j still applies for mip level
     *      alignment and QPitch calculation."
     */
-   if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
-      align_w = MAX2(align_w, img->align_i);
-      align_h = MAX2(align_h, img->align_j);
+   if (info->bind_surface_sampler) {
+      align_w = MAX2(align_w, layout->align_i);
+      align_h = MAX2(align_h, layout->align_j);
 
-      if (templ->target == PIPE_TEXTURE_CUBE)
+      if (info->type == GEN6_SURFTYPE_CUBE)
          pad_h += 2;
 
-      if (params->compressed)
-         align_h = MAX2(align_h, img->align_j * 2);
+      if (info->compressed)
+         align_h = MAX2(align_h, layout->align_j * 2);
    }
 
    /*
@@ -874,149 +824,288 @@
     *     "If the surface contains an odd number of rows of data, a final row
     *      below the surface must be allocated."
     */
-   if (templ->bind & PIPE_BIND_RENDER_TARGET)
+   if (info->bind_surface_dp_render)
       align_h = MAX2(align_h, 2);
 
    /*
     * Depth Buffer Clear/Resolve works in 8x4 sample blocks.  Pad to allow HiZ
     * for unaligned non-mipmapped and non-array images.
     */
-   if (img->aux.type == ILO_IMAGE_AUX_HIZ &&
-       templ->last_level == 0 &&
-       templ->array_size == 1 &&
-       templ->depth0 == 1) {
+   if (layout->aux == ILO_IMAGE_AUX_HIZ &&
+       info->level_count == 1 && info->array_size == 1 && info->depth == 1) {
       align_w = MAX2(align_w, 8);
       align_h = MAX2(align_h, 4);
    }
 
-   params->max_x = align(params->max_x, align_w);
-   params->max_y = align(params->max_y + pad_h, align_h);
+   layout->monolithic_width = align(max_x, align_w);
+   layout->monolithic_height = align(max_y + pad_h, align_h);
 }
 
-/* note that this may force the texture to be linear */
 static void
-img_calculate_bo_size(struct ilo_image *img,
-                      const struct ilo_image_params *params)
-{
-   assert(params->max_x % img->block_width == 0);
-   assert(params->max_y % img->block_height == 0);
-   assert(img->walk_layer_height % img->block_height == 0);
-
-   img->bo_stride =
-      (params->max_x / img->block_width) * img->block_size;
-   img->bo_height = params->max_y / img->block_height;
-
-   while (true) {
-      unsigned w = img->bo_stride, h = img->bo_height;
-      unsigned align_w, align_h;
+image_get_gen6_lods(const struct ilo_dev *dev,
+                    const struct ilo_image_info *info,
+                    struct ilo_image_layout *layout)
+{
+   const int layer_count = image_get_gen6_layer_count(dev, info, layout);
+   int cur_x, cur_y, max_x, max_y;
+   uint8_t lv;
 
-      /*
-       * From the Haswell PRM, volume 5, page 163:
-       *
-       *     "For linear surfaces, additional padding of 64 bytes is required
-       *      at the bottom of the surface. This is in addition to the padding
-       *      required above."
-       */
-      if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) &&
-          (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
-          img->tiling == GEN6_TILING_NONE)
-         h += (64 + img->bo_stride - 1) / img->bo_stride;
+   ILO_DEV_ASSERT(dev, 6, 8);
 
-      /*
-       * From the Sandy Bridge PRM, volume 4 part 1, page 81:
-       *
-       *     "- For linear render target surfaces, the pitch must be a
-       *        multiple of the element size for non-YUV surface formats.
-       *        Pitch must be a multiple of 2 * element size for YUV surface
-       *        formats.
-       *      - For other linear surfaces, the pitch can be any multiple of
-       *        bytes.
-       *      - For tiled surfaces, the pitch must be a multiple of the tile
-       *        width."
-       *
-       * Different requirements may exist when the bo is used in different
-       * places, but our alignments here should be good enough that we do not
-       * need to check params->templ->bind.
-       */
-      switch (img->tiling) {
-      case GEN6_TILING_X:
-         align_w = 512;
-         align_h = 8;
+   cur_x = 0;
+   cur_y = 0;
+   max_x = 0;
+   max_y = 0;
+   for (lv = 0; lv < info->level_count; lv++) {
+      int slice_w, slice_h, lod_w, lod_h;
+
+      image_get_gen6_slice_size(dev, info, layout, lv, &slice_w, &slice_h);
+
+      layout->lods[lv].x = cur_x;
+      layout->lods[lv].y = cur_y;
+      layout->lods[lv].slice_width = slice_w;
+      layout->lods[lv].slice_height = slice_h;
+
+      switch (layout->walk) {
+      case ILO_IMAGE_WALK_LAYER:
+         lod_w = slice_w;
+         lod_h = slice_h;
+
+         /* MIPLAYOUT_BELOW */
+         if (lv == 1)
+            cur_x += lod_w;
+         else
+            cur_y += lod_h;
          break;
-      case GEN6_TILING_Y:
-         align_w = 128;
-         align_h = 32;
+      case ILO_IMAGE_WALK_LOD:
+         lod_w = slice_w;
+         lod_h = slice_h * layer_count;
+
+         if (lv == 1)
+            cur_x += lod_w;
+         else
+            cur_y += lod_h;
+
+         /* every LOD begins at tile boundaries */
+         if (info->level_count > 1) {
+            assert(info->format == GEN6_FORMAT_R8_UINT);
+            cur_x = align(cur_x, 64);
+            cur_y = align(cur_y, 64);
+         }
          break;
-      case GEN8_TILING_W:
-         /*
-          * From the Sandy Bridge PRM, volume 1 part 2, page 22:
-          *
-          *     "A 4KB tile is subdivided into 8-high by 8-wide array of
-          *      Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
-          *      bytes."
-          */
-         align_w = 64;
-         align_h = 64;
+      case ILO_IMAGE_WALK_3D:
+         {
+            const int slice_count = u_minify(info->depth, lv);
+            const int slice_count_per_row = 1 << lv;
+            const int row_count =
+               (slice_count + slice_count_per_row - 1) / slice_count_per_row;
+
+            lod_w = slice_w * slice_count_per_row;
+            lod_h = slice_h * row_count;
+         }
+
+         cur_y += lod_h;
          break;
       default:
-         assert(img->tiling == GEN6_TILING_NONE);
-         /* some good enough values */
-         align_w = 64;
-         align_h = 2;
+         assert(!"unknown walk type");
+         lod_w = 0;
+         lod_h = 0;
          break;
       }
 
-      w = align(w, align_w);
-      h = align(h, align_h);
+      if (max_x < layout->lods[lv].x + lod_w)
+         max_x = layout->lods[lv].x + lod_w;
+      if (max_y < layout->lods[lv].y + lod_h)
+         max_y = layout->lods[lv].y + lod_h;
+   }
 
-      /* make sure the bo is mappable */
-      if (img->tiling != GEN6_TILING_NONE) {
-         /*
-          * Usually only the first 256MB of the GTT is mappable.
-          *
-          * See also how intel_context::max_gtt_map_object_size is calculated.
-          */
-         const size_t mappable_gtt_size = 256 * 1024 * 1024;
-
-         /*
-          * Be conservative.  We may be able to switch from VALIGN_4 to
-          * VALIGN_2 if the image was Y-tiled, but let's keep it simple.
-          */
-         if (mappable_gtt_size / w / 4 < h) {
-            if (params->valid_tilings & IMAGE_TILING_NONE) {
-               img->tiling = GEN6_TILING_NONE;
-               /* MCS support for non-MSRTs is limited to tiled RTs */
-               if (img->aux.type == ILO_IMAGE_AUX_MCS &&
-                   params->templ->nr_samples <= 1)
-                  img->aux.type = ILO_IMAGE_AUX_NONE;
-
-               continue;
-            } else {
-               ilo_warn("cannot force texture to be linear\n");
-            }
-         }
-      }
+   if (layout->walk == ILO_IMAGE_WALK_LAYER) {
+      image_get_gen6_walk_layer_heights(dev, info, layout);
+      if (layer_count > 1)
+         max_y += layout->walk_layer_height * (layer_count - 1);
+   } else {
+      layout->walk_layer_h0 = 0;
+      layout->walk_layer_h1 = 0;
+      layout->walk_layer_height = 0;
+   }
+
+   image_get_gen6_monolithic_size(dev, info, layout, max_x, max_y);
+}
+
+static bool
+image_bind_gpu(const struct ilo_image_info *info)
+{
+   return (info->bind_surface_sampler ||
+           info->bind_surface_dp_render ||
+           info->bind_surface_dp_typed ||
+           info->bind_zs ||
+           info->bind_scanout ||
+           info->bind_cursor);
+}
+
+static bool
+image_validate_gen6(const struct ilo_dev *dev,
+                    const struct ilo_image_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 314:
+    *
+    *     "The separate stencil buffer is always enabled, thus the field in
+    *      3DSTATE_DEPTH_BUFFER to explicitly enable the separate stencil
+    *      buffer has been removed Surface formats with interleaved depth and
+    *      stencil are no longer supported"
+    */
+   if (ilo_dev_gen(dev) >= ILO_GEN(7) && info->bind_zs)
+      assert(!info->interleaved_stencil);
+
+   return true;
+}
+
+static bool
+image_get_gen6_layout(const struct ilo_dev *dev,
+                      const struct ilo_image_info *info,
+                      struct ilo_image_layout *layout)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!image_validate_gen6(dev, info))
+      return false;
+
+   if (image_bind_gpu(info) || info->level_count > 1) {
+      if (!image_init_gen6_hardware_layout(dev, info, layout))
+         return false;
+   } else {
+      if (!image_init_gen6_transfer_layout(dev, info, layout))
+         return false;
+   }
+
+   /*
+    * the fact that align i and j are multiples of block width and height
+    * respectively is what makes the size of the bo a multiple of the block
+    * size, slices start at block boundaries, and many of the computations
+    * work.
+    */
+   assert(layout->align_i % info->block_width == 0);
+   assert(layout->align_j % info->block_height == 0);
+
+   /* make sure align() works */
+   assert(util_is_power_of_two(layout->align_i) &&
+          util_is_power_of_two(layout->align_j));
+   assert(util_is_power_of_two(info->block_width) &&
+          util_is_power_of_two(info->block_height));
+
+   image_get_gen6_lods(dev, info, layout);
+
+   assert(layout->walk_layer_height % info->block_height == 0);
+   assert(layout->monolithic_width % info->block_width == 0);
+   assert(layout->monolithic_height % info->block_height == 0);
 
-      img->bo_stride = w;
-      img->bo_height = h;
+   return true;
+}
+
+static bool
+image_set_gen6_bo_size(struct ilo_image *img,
+                       const struct ilo_dev *dev,
+                       const struct ilo_image_info *info,
+                       const struct ilo_image_layout *layout)
+{
+   int stride, height;
+   int align_w, align_h;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   stride = (layout->monolithic_width / info->block_width) * info->block_size;
+   height = layout->monolithic_height / info->block_height;
+
+   /*
+    * From the Haswell PRM, volume 5, page 163:
+    *
+    *     "For linear surfaces, additional padding of 64 bytes is required
+    *      at the bottom of the surface. This is in addition to the padding
+    *      required above."
+    */
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && info->bind_surface_sampler &&
+       layout->tiling == GEN6_TILING_NONE)
+      height += (64 + stride - 1) / stride;
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 81:
+    *
+    *     "- For linear render target surfaces, the pitch must be a multiple
+    *        of the element size for non-YUV surface formats.  Pitch must be a
+    *        multiple of 2 * element size for YUV surface formats.
+    *
+    *      - For other linear surfaces, the pitch can be any multiple of
+    *        bytes.
+    *      - For tiled surfaces, the pitch must be a multiple of the tile
+    *        width."
+    *
+    * Different requirements may exist when the image is used in different
+    * places, but our alignments here should be good enough that we do not
+    * need to check info->bind_x.
+    */
+   switch (layout->tiling) {
+   case GEN6_TILING_X:
+      align_w = 512;
+      align_h = 8;
+      break;
+   case GEN6_TILING_Y:
+      align_w = 128;
+      align_h = 32;
+      break;
+   case GEN8_TILING_W:
+      /*
+       * From the Sandy Bridge PRM, volume 1 part 2, page 22:
+       *
+       *     "A 4KB tile is subdivided into 8-high by 8-wide array of
+       *      Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
+       *      bytes."
+       */
+      align_w = 64;
+      align_h = 64;
+      break;
+   default:
+      assert(layout->tiling == GEN6_TILING_NONE);
+      /* some good enough values */
+      align_w = 64;
+      align_h = 2;
       break;
    }
+
+   if (info->force_bo_stride) {
+      if (info->force_bo_stride % align_w || info->force_bo_stride < stride)
+         return false;
+
+      img->bo_stride = info->force_bo_stride;
+   } else {
+      img->bo_stride = align(stride, align_w);
+   }
+
+   img->bo_height = align(height, align_h);
+
+   return true;
 }
 
-static void
-img_calculate_hiz_size(struct ilo_image *img,
-                       const struct ilo_image_params *params)
+static bool
+image_set_gen6_hiz(struct ilo_image *img,
+                   const struct ilo_dev *dev,
+                   const struct ilo_image_info *info,
+                   const struct ilo_image_layout *layout)
 {
-   const struct pipe_resource *templ = params->templ;
-   const unsigned hz_align_j = 8;
+   const int hz_align_j = 8;
    enum ilo_image_walk_type hz_walk;
-   unsigned hz_width, hz_height, lv;
-   unsigned hz_clear_w, hz_clear_h;
+   int hz_width, hz_height;
+   int hz_clear_w, hz_clear_h;
+   uint8_t lv;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
 
-   assert(img->aux.type == ILO_IMAGE_AUX_HIZ);
+   assert(layout->aux == ILO_IMAGE_AUX_HIZ);
 
-   assert(img->walk == ILO_IMAGE_WALK_LAYER ||
-          img->walk == ILO_IMAGE_WALK_3D);
+   assert(layout->walk == ILO_IMAGE_WALK_LAYER ||
+          layout->walk == ILO_IMAGE_WALK_3D);
 
    /*
     * From the Sandy Bridge PRM, volume 2 part 1, page 312:
@@ -1029,8 +1118,8 @@
     *
     * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
     */
-   if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
-      hz_walk = img->walk;
+   if (ilo_dev_gen(dev) >= ILO_GEN(7))
+      hz_walk = layout->walk;
    else
       hz_walk = ILO_IMAGE_WALK_LOD;
 
@@ -1044,16 +1133,16 @@
    switch (hz_walk) {
    case ILO_IMAGE_WALK_LAYER:
       {
-         const unsigned h0 = align(params->h0, hz_align_j);
-         const unsigned h1 = align(params->h1, hz_align_j);
-         const unsigned htail =
-            ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
-         const unsigned hz_qpitch = h0 + h1 + htail;
+         const int h0 = align(layout->walk_layer_h0, hz_align_j);
+         const int h1 = align(layout->walk_layer_h1, hz_align_j);
+         const int htail =
+            ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
+         const int hz_qpitch = h0 + h1 + htail;
 
-         hz_width = align(img->lods[0].slice_width, 16);
+         hz_width = align(layout->lods[0].slice_width, 16);
 
-         hz_height = hz_qpitch * templ->array_size / 2;
-         if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
+         hz_height = hz_qpitch * info->array_size / 2;
+         if (ilo_dev_gen(dev) >= ILO_GEN(7))
             hz_height = align(hz_height, 8);
 
          img->aux.walk_layer_height = hz_qpitch;
@@ -1061,27 +1150,27 @@
       break;
    case ILO_IMAGE_WALK_LOD:
       {
-         unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS];
-         unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS];
-         unsigned cur_tx, cur_ty;
+         int lod_tx[ILO_IMAGE_MAX_LEVEL_COUNT];
+         int lod_ty[ILO_IMAGE_MAX_LEVEL_COUNT];
+         int cur_tx, cur_ty;
 
          /* figure out the tile offsets of LODs */
          hz_width = 0;
          hz_height = 0;
          cur_tx = 0;
          cur_ty = 0;
-         for (lv = 0; lv <= templ->last_level; lv++) {
-            unsigned tw, th;
+         for (lv = 0; lv < info->level_count; lv++) {
+            int tw, th;
 
             lod_tx[lv] = cur_tx;
             lod_ty[lv] = cur_ty;
 
-            tw = align(img->lods[lv].slice_width, 16);
-            th = align(img->lods[lv].slice_height, hz_align_j) *
-               templ->array_size / 2;
+            tw = align(layout->lods[lv].slice_width, 16);
+            th = align(layout->lods[lv].slice_height, hz_align_j) *
+               info->array_size / 2;
             /* convert to Y-tiles */
-            tw = align(tw, 128) / 128;
-            th = align(th, 32) / 32;
+            tw = (tw + 127) / 128;
+            th = (th + 31) / 32;
 
             if (hz_width < cur_tx + tw)
                hz_width = cur_tx + tw;
@@ -1095,22 +1184,23 @@
          }
 
          /* convert tile offsets to memory offsets */
-         for (lv = 0; lv <= templ->last_level; lv++) {
+         for (lv = 0; lv < info->level_count; lv++) {
             img->aux.walk_lod_offsets[lv] =
                (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
          }
+
          hz_width *= 128;
          hz_height *= 32;
       }
       break;
    case ILO_IMAGE_WALK_3D:
-      hz_width = align(img->lods[0].slice_width, 16);
+      hz_width = align(layout->lods[0].slice_width, 16);
 
       hz_height = 0;
-      for (lv = 0; lv <= templ->last_level; lv++) {
-         const unsigned h = align(img->lods[lv].slice_height, hz_align_j);
+      for (lv = 0; lv < info->level_count; lv++) {
+         const int h = align(layout->lods[lv].slice_height, hz_align_j);
          /* according to the formula, slices are packed together vertically */
-         hz_height += h * u_minify(templ->depth0, lv);
+         hz_height += h * u_minify(info->depth, lv);
       }
       hz_height /= 2;
       break;
@@ -1129,8 +1219,7 @@
     */
    hz_clear_w = 8;
    hz_clear_h = 4;
-   switch (templ->nr_samples) {
-   case 0:
+   switch (info->sample_count) {
    case 1:
    default:
       break;
@@ -1151,33 +1240,38 @@
       break;
    }
 
-   for (lv = 0; lv <= templ->last_level; lv++) {
-      if (u_minify(img->width0, lv) % hz_clear_w ||
-          u_minify(img->height0, lv) % hz_clear_h)
+   for (lv = 0; lv < info->level_count; lv++) {
+      if (u_minify(info->width, lv) % hz_clear_w ||
+          u_minify(info->height, lv) % hz_clear_h)
          break;
       img->aux.enables |= 1 << lv;
    }
 
-   /* we padded to allow this in img_align() */
-   if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1)
+   /* we padded to allow this in image_get_gen6_monolithic_size() */
+   if (info->level_count == 1 && info->array_size == 1 && info->depth == 1)
       img->aux.enables |= 0x1;
 
    /* align to Y-tile */
    img->aux.bo_stride = align(hz_width, 128);
    img->aux.bo_height = align(hz_height, 32);
+
+   return true;
 }
 
-static void
-img_calculate_mcs_size(struct ilo_image *img,
-                       const struct ilo_image_params *params)
+static bool
+image_set_gen7_mcs(struct ilo_image *img,
+                   const struct ilo_dev *dev,
+                   const struct ilo_image_info *info,
+                   const struct ilo_image_layout *layout)
 {
-   const struct pipe_resource *templ = params->templ;
    int mcs_width, mcs_height, mcs_cpp;
    int downscale_x, downscale_y;
 
-   assert(img->aux.type == ILO_IMAGE_AUX_MCS);
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   assert(layout->aux == ILO_IMAGE_AUX_MCS);
 
-   if (templ->nr_samples > 1) {
+   if (info->sample_count > 1) {
       /*
        * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
        * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA.  The
@@ -1191,7 +1285,7 @@
        * RT.  Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
        * pixel block in the RT.
        */
-      switch (templ->nr_samples) {
+      switch (info->sample_count) {
       case 2:
       case 4:
          downscale_x = 8;
@@ -1210,7 +1304,7 @@
          break;
       default:
          assert(!"unsupported sample count");
-         return;
+         return false;
          break;
       }
 
@@ -1219,8 +1313,8 @@
        * clear rectangle cannot be masked.  The scale-down clear rectangle
        * thus must be aligned to 2x2, and we need to pad.
        */
-      mcs_width = align(img->width0, downscale_x * 2);
-      mcs_height = align(img->height0, downscale_y * 2);
+      mcs_width = align(info->width, downscale_x * 2);
+      mcs_height = align(info->height, downscale_y * 2);
    } else {
       /*
        * From the Ivy Bridge PRM, volume 2 part 1, page 327:
@@ -1255,18 +1349,18 @@
        * anything except for the size of the allocated MCS.  Let's see if we
        * hit out-of-bound access.
        */
-      switch (img->tiling) {
+      switch (layout->tiling) {
       case GEN6_TILING_X:
-         downscale_x = 64 / img->block_size;
+         downscale_x = 64 / info->block_size;
          downscale_y = 2;
          break;
       case GEN6_TILING_Y:
-         downscale_x = 32 / img->block_size;
+         downscale_x = 32 / info->block_size;
          downscale_y = 4;
          break;
       default:
          assert(!"unsupported tiling mode");
-         return;
+         return false;
          break;
       }
 
@@ -1283,155 +1377,75 @@
        * The scaled-down clear rectangle must be aligned to 4x4 instead of
        * 2x2, and we need to pad.
        */
-      mcs_width = align(img->width0, downscale_x * 4) / downscale_x;
-      mcs_height = align(img->height0, downscale_y * 4) / downscale_y;
+      mcs_width = align(info->width, downscale_x * 4) / downscale_x;
+      mcs_height = align(info->height, downscale_y * 4) / downscale_y;
       mcs_cpp = 16; /* an OWord */
    }
 
-   img->aux.enables = (1 << (templ->last_level + 1)) - 1;
+   img->aux.enables = (1 << info->level_count) - 1;
    /* align to Y-tile */
    img->aux.bo_stride = align(mcs_width * mcs_cpp, 128);
    img->aux.bo_height = align(mcs_height, 32);
-}
-
-static void
-img_init(struct ilo_image *img,
-         struct ilo_image_params *params)
-{
-   /* there are hard dependencies between every function here */
 
-   img_init_aux(img, params);
-   img_init_size_and_format(img, params);
-   img_init_walk(img, params);
-   img_init_tiling(img, params);
-   img_init_alignments(img, params);
-   img_init_lods(img, params);
-   img_init_layer_height(img, params);
-
-   img_align(img, params);
-   img_calculate_bo_size(img, params);
-
-   img->scanout = (params->templ->bind & PIPE_BIND_SCANOUT);
-
-   switch (img->aux.type) {
-   case ILO_IMAGE_AUX_HIZ:
-      img_calculate_hiz_size(img, params);
-      break;
-   case ILO_IMAGE_AUX_MCS:
-      img_calculate_mcs_size(img, params);
-      break;
-   default:
-      break;
-   }
+   return true;
 }
 
-/**
- * The texutre is for transfer only.  We can define our own layout to save
- * space.
- */
-static void
-img_init_for_transfer(struct ilo_image *img,
-                      const struct ilo_dev *dev,
-                      const struct pipe_resource *templ)
+bool
+ilo_image_init(struct ilo_image *img,
+               const struct ilo_dev *dev,
+               const struct ilo_image_info *info)
 {
-   const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ?
-      templ->depth0 : templ->array_size;
-   unsigned layer_width, layer_height;
-
-   assert(templ->last_level == 0);
-   assert(templ->nr_samples <= 1);
-
-   img->aux.type = ILO_IMAGE_AUX_NONE;
-
-   img->width0 = templ->width0;
-   img->height0 = templ->height0;
-   img->depth0 = templ->depth0;
-   img->sample_count = 1;
-
-   img->format = templ->format;
-   img->block_width = util_format_get_blockwidth(templ->format);
-   img->block_height = util_format_get_blockheight(templ->format);
-   img->block_size = util_format_get_blocksize(templ->format);
-
-   img->walk = ILO_IMAGE_WALK_LOD;
-
-   img->tiling = GEN6_TILING_NONE;
-
-   img->align_i = img->block_width;
-   img->align_j = img->block_height;
+   struct ilo_image_layout layout;
 
-   assert(util_is_power_of_two(img->block_width) &&
-          util_is_power_of_two(img->block_height));
+   assert(ilo_is_zeroed(img, sizeof(*img)));
 
-   /* use packed layout */
-   layer_width = align(templ->width0, img->align_i);
-   layer_height = align(templ->height0, img->align_j);
+   memset(&layout, 0, sizeof(layout));
+   layout.lods = img->lods;
 
-   img->lods[0].slice_width = layer_width;
-   img->lods[0].slice_height = layer_height;
-
-   img->bo_stride = (layer_width / img->block_width) * img->block_size;
-   img->bo_stride = align(img->bo_stride, 64);
+   if (!image_get_gen6_layout(dev, info, &layout))
+      return false;
 
-   img->bo_height = (layer_height / img->block_height) * num_layers;
-}
+   img->type = info->type;
 
-/**
- * Initialize the image.  Callers should zero-initialize \p img first.
- */
-void ilo_image_init(struct ilo_image *img,
-                    const struct ilo_dev *dev,
-                    const struct pipe_resource *templ)
-{
-   struct ilo_image_params params;
-   bool transfer_only;
+   img->format = info->format;
+   img->block_width = info->block_width;
+   img->block_height = info->block_height;
+   img->block_size = info->block_size;
 
-   /* use transfer layout when the texture is never bound to GPU */
-   transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
-                                     PIPE_BIND_TRANSFER_READ));
-   if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) {
-      img_init_for_transfer(img, dev, templ);
-      return;
-   }
+   img->width0 = info->width;
+   img->height0 = info->height;
+   img->depth0 = info->depth;
+   img->array_size = info->array_size;
+   img->level_count = info->level_count;
+   img->sample_count = info->sample_count;
 
-   memset(&params, 0, sizeof(params));
-   params.dev = dev;
-   params.templ = templ;
-   params.valid_tilings = IMAGE_TILING_ALL;
+   img->walk = layout.walk;
+   img->interleaved_samples = layout.interleaved_samples;
 
-   img_init(img, &params);
-}
+   img->tiling = layout.tiling;
 
-bool
-ilo_image_init_for_imported(struct ilo_image *img,
-                            const struct ilo_dev *dev,
-                            const struct pipe_resource *templ,
-                            enum gen_surface_tiling tiling,
-                            unsigned bo_stride)
-{
-   struct ilo_image_params params;
-
-   if ((tiling == GEN6_TILING_X && bo_stride % 512) ||
-       (tiling == GEN6_TILING_Y && bo_stride % 128) ||
-       (tiling == GEN8_TILING_W && bo_stride % 64))
-      return false;
+   img->aux.type = layout.aux;
 
-   memset(&params, 0, sizeof(params));
-   params.dev = dev;
-   params.templ = templ;
-   params.valid_tilings = 1 << tiling;
+   img->align_i = layout.align_i;
+   img->align_j = layout.align_j;
 
-   img_init(img, &params);
+   img->walk_layer_height = layout.walk_layer_height;
 
-   assert(img->tiling == tiling);
-   if (img->bo_stride > bo_stride)
+   if (!image_set_gen6_bo_size(img, dev, info, &layout))
       return false;
 
-   img->bo_stride = bo_stride;
+   img->scanout = info->bind_scanout;
 
-   /* assume imported RTs are also scanouts */
-   if (!img->scanout)
-      img->scanout = (templ->bind & PIPE_BIND_RENDER_TARGET);
+   switch (layout.aux) {
+   case ILO_IMAGE_AUX_HIZ:
+      image_set_gen6_hiz(img, dev, info, &layout);
+      break;
+   case ILO_IMAGE_AUX_MCS:
+      image_set_gen7_mcs(img, dev, info, &layout);
+      break;
+   default:
+      break;
+   }
 
    return true;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_image.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_image.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_image.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_image.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,11 +29,17 @@
 #define ILO_IMAGE_H
 
 #include "genhw/genhw.h"
-#include "intel_winsys.h"
 
 #include "ilo_core.h"
 #include "ilo_dev.h"
 
+/*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 75:
+ *
+ *     "(MIP Count / LOD) representing [1,15] MIP levels"
+ */
+#define ILO_IMAGE_MAX_LEVEL_COUNT 15
+
 enum ilo_image_aux_type {
    ILO_IMAGE_AUX_NONE,
    ILO_IMAGE_AUX_HIZ,
@@ -68,6 +74,49 @@
    ILO_IMAGE_WALK_3D,
 };
 
+struct ilo_image_info {
+   enum gen_surface_type type;
+
+   enum gen_surface_format format;
+   bool interleaved_stencil;
+   bool is_integer;
+   /* width, height and size of pixel blocks */
+   bool compressed;
+   unsigned block_width;
+   unsigned block_height;
+   unsigned block_size;
+
+   /* image size */
+   uint16_t width;
+   uint16_t height;
+   uint16_t depth;
+   uint16_t array_size;
+   uint8_t level_count;
+   uint8_t sample_count;
+
+   /* disable optional aux */
+   bool aux_disable;
+
+   /* tilings to consider, if any bit is set */
+   uint8_t valid_tilings;
+
+   /*
+    * prefer GEN6_TILING_NONE when the (estimated) image size exceeds the
+    * threshold
+    */
+   uint32_t prefer_linear_threshold;
+
+   /* force a stride when non-zero */
+   uint32_t force_bo_stride;
+
+   bool bind_surface_sampler;
+   bool bind_surface_dp_render;
+   bool bind_surface_dp_typed;
+   bool bind_zs;
+   bool bind_scanout;
+   bool bind_cursor;
+};
+
 /*
  * When the walk type is ILO_IMAGE_WALK_LAYER, there is only a slice in each
  * LOD and this is used to describe LODs in the first array layer.  Otherwise,
@@ -88,13 +137,18 @@
  * Texture layout.
  */
 struct ilo_image {
+   enum gen_surface_type type;
+
+   enum gen_surface_format format;
+   bool interleaved_stencil;
+
    /* size, format, etc for programming hardware states */
    unsigned width0;
    unsigned height0;
    unsigned depth0;
+   unsigned array_size;
+   unsigned level_count;
    unsigned sample_count;
-   enum pipe_format format;
-   bool separate_stencil;
 
    /*
     * width, height, and size of pixel blocks for conversion between pixel
@@ -113,7 +167,7 @@
    unsigned align_i;
    unsigned align_j;
 
-   struct ilo_image_lod lods[PIPE_MAX_TEXTURE_LEVELS];
+   struct ilo_image_lod lods[ILO_IMAGE_MAX_LEVEL_COUNT];
 
    /* physical layer height for ILO_IMAGE_WALK_LAYER */
    unsigned walk_layer_height;
@@ -125,8 +179,6 @@
 
    bool scanout;
 
-   struct intel_bo *bo;
-
    struct {
       enum ilo_image_aux_type type;
 
@@ -134,55 +186,23 @@
       unsigned enables;
 
       /* LOD offsets for ILO_IMAGE_WALK_LOD */
-      unsigned walk_lod_offsets[PIPE_MAX_TEXTURE_LEVELS];
+      unsigned walk_lod_offsets[ILO_IMAGE_MAX_LEVEL_COUNT];
 
       unsigned walk_layer_height;
       unsigned bo_stride;
       unsigned bo_height;
-
-      struct intel_bo *bo;
    } aux;
 };
 
-struct pipe_resource;
-
-void
+bool
 ilo_image_init(struct ilo_image *img,
                const struct ilo_dev *dev,
-               const struct pipe_resource *templ);
-
-bool
-ilo_image_init_for_imported(struct ilo_image *img,
-                            const struct ilo_dev *dev,
-                            const struct pipe_resource *templ,
-                            enum gen_surface_tiling tiling,
-                            unsigned bo_stride);
-
-static inline void
-ilo_image_cleanup(struct ilo_image *img)
-{
-   intel_bo_unref(img->bo);
-   intel_bo_unref(img->aux.bo);
-}
-
-static inline void
-ilo_image_set_bo(struct ilo_image *img, struct intel_bo *bo)
-{
-   intel_bo_unref(img->bo);
-   img->bo = intel_bo_ref(bo);
-}
-
-static inline void
-ilo_image_set_aux_bo(struct ilo_image *img, struct intel_bo *bo)
-{
-   intel_bo_unref(img->aux.bo);
-   img->aux.bo = intel_bo_ref(bo);
-}
+               const struct ilo_image_info *info);
 
 static inline bool
 ilo_image_can_enable_aux(const struct ilo_image *img, unsigned level)
 {
-   return (img->aux.bo && (img->aux.enables & (1 << level)));
+   return (img->aux.enables & (1 << level));
 }
 
 /**
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,2222 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2014 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chia-I Wu <olv@lunarg.com>
- */
-
-#include "genhw/genhw.h"
-#include "util/u_dual_blend.h"
-#include "util/u_framebuffer.h"
-#include "util/u_half.h"
-
-#include "ilo_format.h"
-#include "ilo_image.h"
-#include "ilo_state_3d.h"
-#include "../ilo_shader.h"
-
-static void
-rasterizer_init_clip(const struct ilo_dev *dev,
-                     const struct pipe_rasterizer_state *state,
-                     struct ilo_rasterizer_clip *clip)
-{
-   uint32_t dw1, dw2, dw3;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   dw1 = GEN6_CLIP_DW1_STATISTICS;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      /*
-       * From the Ivy Bridge PRM, volume 2 part 1, page 219:
-       *
-       *     "Workaround : Due to Hardware issue "EarlyCull" needs to be
-       *      enabled only for the cases where the incoming primitive topology
-       *      into the clipper guaranteed to be Trilist."
-       *
-       * What does this mean?
-       */
-      dw1 |= 0 << 19 |
-             GEN7_CLIP_DW1_EARLY_CULL_ENABLE;
-
-      if (ilo_dev_gen(dev) < ILO_GEN(8)) {
-         if (state->front_ccw)
-            dw1 |= GEN7_CLIP_DW1_FRONTWINDING_CCW;
-
-         switch (state->cull_face) {
-         case PIPE_FACE_NONE:
-            dw1 |= GEN7_CLIP_DW1_CULLMODE_NONE;
-            break;
-         case PIPE_FACE_FRONT:
-            dw1 |= GEN7_CLIP_DW1_CULLMODE_FRONT;
-            break;
-         case PIPE_FACE_BACK:
-            dw1 |= GEN7_CLIP_DW1_CULLMODE_BACK;
-            break;
-         case PIPE_FACE_FRONT_AND_BACK:
-            dw1 |= GEN7_CLIP_DW1_CULLMODE_BOTH;
-            break;
-         }
-      }
-   }
-
-   dw2 = GEN6_CLIP_DW2_CLIP_ENABLE |
-         GEN6_CLIP_DW2_XY_TEST_ENABLE |
-         state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT |
-         GEN6_CLIP_DW2_CLIPMODE_NORMAL;
-
-   if (state->clip_halfz)
-      dw2 |= GEN6_CLIP_DW2_APIMODE_D3D;
-   else
-      dw2 |= GEN6_CLIP_DW2_APIMODE_OGL;
-
-   if (ilo_dev_gen(dev) < ILO_GEN(8) && state->depth_clip)
-      dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE;
-
-   if (state->flatshade_first) {
-      dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
-             0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
-             1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
-   }
-   else {
-      dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
-             1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
-             2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
-   }
-
-   dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT |
-         0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT;
-
-   clip->payload[0] = dw1;
-   clip->payload[1] = dw2;
-   clip->payload[2] = dw3;
-
-   clip->can_enable_guardband = true;
-
-   /*
-    * There are several reasons that guard band test should be disabled
-    *
-    *  - GL wide points (to avoid partially visibie object)
-    *  - GL wide or AA lines (to avoid partially visibie object)
-    */
-   if (state->point_size_per_vertex || state->point_size > 1.0f)
-      clip->can_enable_guardband = false;
-   if (state->line_smooth || state->line_width > 1.0f)
-      clip->can_enable_guardband = false;
-}
-
-static void
-rasterizer_init_sf_depth_offset_gen6(const struct ilo_dev *dev,
-                                     const struct pipe_rasterizer_state *state,
-                                     struct ilo_rasterizer_sf *sf)
-{
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   /*
-    * Scale the constant term.  The minimum representable value used by the HW
-    * is not large enouch to be the minimum resolvable difference.
-    */
-   sf->dw_depth_offset_const = fui(state->offset_units * 2.0f);
-   sf->dw_depth_offset_scale = fui(state->offset_scale);
-   sf->dw_depth_offset_clamp = fui(state->offset_clamp);
-}
-
-static void
-rasterizer_init_sf_gen6(const struct ilo_dev *dev,
-                        const struct pipe_rasterizer_state *state,
-                        struct ilo_rasterizer_sf *sf)
-{
-   int line_width, point_width;
-   uint32_t dw1, dw2, dw3;
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 248:
-    *
-    *     "This bit (Statistics Enable) should be set whenever clipping is
-    *      enabled and the Statistics Enable bit is set in CLIP_STATE. It
-    *      should be cleared if clipping is disabled or Statistics Enable in
-    *      CLIP_STATE is clear."
-    */
-   dw1 = GEN7_SF_DW1_STATISTICS |
-         GEN7_SF_DW1_VIEWPORT_ENABLE;
-
-   /* XXX GEN6 path seems to work fine for GEN7 */
-   if (false && ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      /*
-       * From the Ivy Bridge PRM, volume 2 part 1, page 258:
-       *
-       *     "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
-       *      Enable Solid , Global Depth Offset Enable Wireframe, and Global
-       *      Depth Offset Enable Point) should be set whenever non zero depth
-       *      bias (Slope, Bias) values are used. Setting this bit may have
-       *      some degradation of performance for some workloads."
-       */
-      if (state->offset_tri || state->offset_line || state->offset_point) {
-         /* XXX need to scale offset_const according to the depth format */
-         dw1 |= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET;
-
-         dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID |
-                GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME |
-                GEN7_SF_DW1_DEPTH_OFFSET_POINT;
-      }
-   } else {
-      if (state->offset_tri)
-         dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID;
-      if (state->offset_line)
-         dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME;
-      if (state->offset_point)
-         dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT;
-   }
-
-   switch (state->fill_front) {
-   case PIPE_POLYGON_MODE_FILL:
-      dw1 |= GEN7_SF_DW1_FRONTFACE_SOLID;
-      break;
-   case PIPE_POLYGON_MODE_LINE:
-      dw1 |= GEN7_SF_DW1_FRONTFACE_WIREFRAME;
-      break;
-   case PIPE_POLYGON_MODE_POINT:
-      dw1 |= GEN7_SF_DW1_FRONTFACE_POINT;
-      break;
-   }
-
-   switch (state->fill_back) {
-   case PIPE_POLYGON_MODE_FILL:
-      dw1 |= GEN7_SF_DW1_BACKFACE_SOLID;
-      break;
-   case PIPE_POLYGON_MODE_LINE:
-      dw1 |= GEN7_SF_DW1_BACKFACE_WIREFRAME;
-      break;
-   case PIPE_POLYGON_MODE_POINT:
-      dw1 |= GEN7_SF_DW1_BACKFACE_POINT;
-      break;
-   }
-
-   if (state->front_ccw)
-      dw1 |= GEN7_SF_DW1_FRONTWINDING_CCW;
-
-   dw2 = 0;
-
-   if (state->line_smooth) {
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 251:
-       *
-       *     "This field (Anti-aliasing Enable) must be disabled if any of the
-       *      render targets have integer (UINT or SINT) surface format."
-       *
-       * From the Sandy Bridge PRM, volume 2 part 1, page 317:
-       *
-       *     "This field (Hierarchical Depth Buffer Enable) must be disabled
-       *      if Anti-aliasing Enable in 3DSTATE_SF is enabled.
-       *
-       * TODO We do not check those yet.
-       */
-      dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE |
-             GEN7_SF_DW2_AA_LINE_CAP_1_0;
-   }
-
-   switch (state->cull_face) {
-   case PIPE_FACE_NONE:
-      dw2 |= GEN7_SF_DW2_CULLMODE_NONE;
-      break;
-   case PIPE_FACE_FRONT:
-      dw2 |= GEN7_SF_DW2_CULLMODE_FRONT;
-      break;
-   case PIPE_FACE_BACK:
-      dw2 |= GEN7_SF_DW2_CULLMODE_BACK;
-      break;
-   case PIPE_FACE_FRONT_AND_BACK:
-      dw2 |= GEN7_SF_DW2_CULLMODE_BOTH;
-      break;
-   }
-
-   /*
-    * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
-    * pixels in the minor direction.  We have to make the lines slightly
-    * thicker, 0.5 pixel on both sides, so that they intersect that many
-    * pixels are considered into the lines.
-    *
-    * Line width is in U3.7.
-    */
-   line_width = (int)
-      ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
-   line_width = CLAMP(line_width, 0, 1023);
-
-   /* use GIQ rules */
-   if (line_width == 128 && !state->line_smooth)
-      line_width = 0;
-
-   dw2 |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
-
-   if (ilo_dev_gen(dev) == ILO_GEN(7.5) && state->line_stipple_enable)
-      dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE;
-
-   if (state->scissor)
-      dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE;
-
-   dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
-         GEN7_SF_DW3_SUBPIXEL_8BITS;
-
-   if (state->line_last_pixel)
-      dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
-
-   if (state->flatshade_first) {
-      dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
-             0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
-             1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
-   } else {
-      dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
-             1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
-             2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
-   }
-
-   if (!state->point_size_per_vertex)
-      dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
-
-   /* in U8.3 */
-   point_width = (int) (state->point_size * 8.0f + 0.5f);
-   point_width = CLAMP(point_width, 1, 2047);
-
-   dw3 |= point_width;
-
-   STATIC_ASSERT(Elements(sf->payload) >= 3);
-   sf->payload[0] = dw1;
-   sf->payload[1] = dw2;
-   sf->payload[2] = dw3;
-
-   if (state->multisample) {
-      sf->dw_msaa = GEN7_SF_DW2_MSRASTMODE_ON_PATTERN;
-
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 251:
-       *
-       *     "Software must not program a value of 0.0 when running in
-       *      MSRASTMODE_ON_xxx modes - zero-width lines are not available
-       *      when multisampling rasterization is enabled."
-       */
-      if (!line_width) {
-         line_width = 128; /* 1.0f */
-
-         sf->dw_msaa |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
-      }
-   } else {
-      sf->dw_msaa = 0;
-   }
-
-   rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
-   /* 3DSTATE_RASTER is Gen8+ only */
-   sf->dw_raster = 0;
-}
-
-static uint32_t
-rasterizer_get_sf_raster_gen8(const struct ilo_dev *dev,
-                              const struct pipe_rasterizer_state *state)
-{
-   uint32_t dw = 0;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (state->front_ccw)
-      dw |= GEN8_RASTER_DW1_FRONTWINDING_CCW;
-
-   switch (state->cull_face) {
-   case PIPE_FACE_NONE:
-      dw |= GEN8_RASTER_DW1_CULLMODE_NONE;
-      break;
-   case PIPE_FACE_FRONT:
-      dw |= GEN8_RASTER_DW1_CULLMODE_FRONT;
-      break;
-   case PIPE_FACE_BACK:
-      dw |= GEN8_RASTER_DW1_CULLMODE_BACK;
-      break;
-   case PIPE_FACE_FRONT_AND_BACK:
-      dw |= GEN8_RASTER_DW1_CULLMODE_BOTH;
-      break;
-   }
-
-   if (state->point_smooth)
-      dw |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE;
-
-   if (state->multisample)
-      dw |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE;
-
-   if (state->offset_tri)
-      dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID;
-   if (state->offset_line)
-      dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME;
-   if (state->offset_point)
-      dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT;
-
-   switch (state->fill_front) {
-   case PIPE_POLYGON_MODE_FILL:
-      dw |= GEN8_RASTER_DW1_FRONTFACE_SOLID;
-      break;
-   case PIPE_POLYGON_MODE_LINE:
-      dw |= GEN8_RASTER_DW1_FRONTFACE_WIREFRAME;
-      break;
-   case PIPE_POLYGON_MODE_POINT:
-      dw |= GEN8_RASTER_DW1_FRONTFACE_POINT;
-      break;
-   }
-
-   switch (state->fill_back) {
-   case PIPE_POLYGON_MODE_FILL:
-      dw |= GEN8_RASTER_DW1_BACKFACE_SOLID;
-      break;
-   case PIPE_POLYGON_MODE_LINE:
-      dw |= GEN8_RASTER_DW1_BACKFACE_WIREFRAME;
-      break;
-   case PIPE_POLYGON_MODE_POINT:
-      dw |= GEN8_RASTER_DW1_BACKFACE_POINT;
-      break;
-   }
-
-   if (state->line_smooth)
-      dw |= GEN8_RASTER_DW1_AA_LINE_ENABLE;
-
-   if (state->scissor)
-      dw |= GEN8_RASTER_DW1_SCISSOR_ENABLE;
-
-   if (state->depth_clip)
-      dw |= GEN8_RASTER_DW1_Z_TEST_ENABLE;
-
-   return dw;
-}
-
-static void
-rasterizer_init_sf_gen8(const struct ilo_dev *dev,
-                        const struct pipe_rasterizer_state *state,
-                        struct ilo_rasterizer_sf *sf)
-{
-   int line_width, point_width;
-   uint32_t dw1, dw2, dw3;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   /* in U3.7 */
-   line_width = (int)
-      ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
-   line_width = CLAMP(line_width, 0, 1023);
-
-   /* use GIQ rules */
-   if (line_width == 128 && !state->line_smooth)
-      line_width = 0;
-
-   /* in U8.3 */
-   point_width = (int) (state->point_size * 8.0f + 0.5f);
-   point_width = CLAMP(point_width, 1, 2047);
-
-   dw1 = GEN7_SF_DW1_STATISTICS |
-         GEN7_SF_DW1_VIEWPORT_ENABLE;
-
-   dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
-   if (state->line_smooth)
-      dw2 |= GEN7_SF_DW2_AA_LINE_CAP_1_0;
-
-   dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
-         GEN7_SF_DW3_SUBPIXEL_8BITS |
-         point_width;
-
-   if (state->line_last_pixel)
-      dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
-
-   if (state->flatshade_first) {
-      dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
-             0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
-             1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
-   } else {
-      dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
-             1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
-             2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
-   }
-
-   if (!state->point_size_per_vertex)
-      dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
-
-   dw3 |= point_width;
-
-   STATIC_ASSERT(Elements(sf->payload) >= 3);
-   sf->payload[0] = dw1;
-   sf->payload[1] = dw2;
-   sf->payload[2] = dw3;
-
-   rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
-
-   sf->dw_msaa = 0;
-   sf->dw_raster = rasterizer_get_sf_raster_gen8(dev, state);
-}
-
-static void
-rasterizer_init_wm_gen6(const struct ilo_dev *dev,
-                        const struct pipe_rasterizer_state *state,
-                        struct ilo_rasterizer_wm *wm)
-{
-   uint32_t dw5, dw6;
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   /* only the FF unit states are set, as in GEN7 */
-
-   dw5 = GEN6_WM_DW5_AA_LINE_WIDTH_2_0;
-
-   /* same value as in 3DSTATE_SF */
-   if (state->line_smooth)
-      dw5 |= GEN6_WM_DW5_AA_LINE_CAP_1_0;
-
-   if (state->poly_stipple_enable)
-      dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE;
-   if (state->line_stipple_enable)
-      dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE;
-
-   /*
-    * assertion that makes sure
-    *
-    *   dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
-    *
-    * is valid
-    */
-   STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL == 0 &&
-                 GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0);
-   dw6 = GEN6_WM_DW6_ZW_INTERP_PIXEL;
-
-   if (state->bottom_edge_rule)
-      dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT;
-
-   wm->dw_msaa_rast =
-      (state->multisample) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN : 0;
-   wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
-
-   STATIC_ASSERT(Elements(wm->payload) >= 2);
-   wm->payload[0] = dw5;
-   wm->payload[1] = dw6;
-}
-
-static void
-rasterizer_init_wm_gen7(const struct ilo_dev *dev,
-                        const struct pipe_rasterizer_state *state,
-                        struct ilo_rasterizer_wm *wm)
-{
-   uint32_t dw1, dw2;
-
-   ILO_DEV_ASSERT(dev, 7, 7.5);
-
-   /*
-    * assertion that makes sure
-    *
-    *   dw1 |= wm->dw_msaa_rast;
-    *   dw2 |= wm->dw_msaa_disp;
-    *
-    * is valid
-    */
-   STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL == 0 &&
-                 GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0);
-   dw1 = GEN7_WM_DW1_ZW_INTERP_PIXEL |
-         GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
-   dw2 = 0;
-
-   /* same value as in 3DSTATE_SF */
-   if (state->line_smooth)
-      dw1 |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
-
-   if (state->poly_stipple_enable)
-      dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
-   if (state->line_stipple_enable)
-      dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
-
-   if (state->bottom_edge_rule)
-      dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
-
-   wm->dw_msaa_rast =
-      (state->multisample) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN : 0;
-   wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
-
-   STATIC_ASSERT(Elements(wm->payload) >= 2);
-   wm->payload[0] = dw1;
-   wm->payload[1] = dw2;
-}
-
-static uint32_t
-rasterizer_get_wm_gen8(const struct ilo_dev *dev,
-                       const struct pipe_rasterizer_state *state)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   dw = GEN7_WM_DW1_ZW_INTERP_PIXEL |
-        GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
-
-   /* same value as in 3DSTATE_SF */
-   if (state->line_smooth)
-      dw |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
-
-   if (state->poly_stipple_enable)
-      dw |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
-   if (state->line_stipple_enable)
-      dw |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
-
-   if (state->bottom_edge_rule)
-      dw |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
-
-   return dw;
-}
-
-void
-ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
-                        const struct pipe_rasterizer_state *state,
-                        struct ilo_rasterizer_state *rasterizer)
-{
-   rasterizer_init_clip(dev, state, &rasterizer->clip);
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      memset(&rasterizer->wm, 0, sizeof(rasterizer->wm));
-      rasterizer->wm.payload[0] = rasterizer_get_wm_gen8(dev, state);
-
-      rasterizer_init_sf_gen8(dev, state, &rasterizer->sf);
-   } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      rasterizer_init_wm_gen7(dev, state, &rasterizer->wm);
-      rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
-   } else {
-      rasterizer_init_wm_gen6(dev, state, &rasterizer->wm);
-      rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
-   }
-}
-
-static void
-fs_init_cso_gen6(const struct ilo_dev *dev,
-                 const struct ilo_shader_state *fs,
-                 struct ilo_shader_cso *cso)
-{
-   int start_grf, input_count, sampler_count, interps, max_threads;
-   uint32_t dw2, dw4, dw5, dw6;
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
-   input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
-   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
-   interps = ilo_shader_get_kernel_param(fs,
-         ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
-
-   /* see brwCreateContext() */
-   max_threads = (dev->gt == 2) ? 80 : 40;
-
-   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
-   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
-   dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
-         0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
-         0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
-
-   dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
-    *
-    *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
-    *      PS kernel or color calculator has the ability to kill (discard)
-    *      pixels or samples, other than due to depth or stencil testing.
-    *      This bit is required to be ENABLED in the following situations:
-    *
-    *      The API pixel shader program contains "killpix" or "discard"
-    *      instructions, or other code in the pixel shader kernel that can
-    *      cause the final pixel mask to differ from the pixel mask received
-    *      on dispatch.
-    *
-    *      A sampler with chroma key enabled with kill pixel mode is used by
-    *      the pixel shader.
-    *
-    *      Any render target has Alpha Test Enable or AlphaToCoverage Enable
-    *      enabled.
-    *
-    *      The pixel shader kernel generates and outputs oMask.
-    *
-    *      Note: As ClipDistance clipping is fully supported in hardware and
-    *      therefore not via PS instructions, there should be no need to
-    *      ENABLE this bit due to ClipDistance clipping."
-    */
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
-      dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
-    *
-    *     "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
-    *      field must be set to disabled."
-    *
-    * TODO This is not checked yet.
-    */
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
-      dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
-      dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
-      dw5 |= GEN6_WM_DW5_PS_USE_W;
-
-   /*
-    * TODO set this bit only when
-    *
-    *  a) fs writes colors and color is not masked, or
-    *  b) fs writes depth, or
-    *  c) fs or cc kills
-    */
-   if (true)
-      dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
-
-   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
-   dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
-
-   dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
-         GEN6_WM_DW6_PS_POSOFFSET_NONE |
-         interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT;
-
-   STATIC_ASSERT(Elements(cso->payload) >= 4);
-   cso->payload[0] = dw2;
-   cso->payload[1] = dw4;
-   cso->payload[2] = dw5;
-   cso->payload[3] = dw6;
-}
-
-static uint32_t
-fs_get_wm_gen7(const struct ilo_dev *dev,
-               const struct ilo_shader_state *fs)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 7, 7.5);
-
-   dw = ilo_shader_get_kernel_param(fs,
-         ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
-      GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
-
-   /*
-    * TODO set this bit only when
-    *
-    *  a) fs writes colors and color is not masked, or
-    *  b) fs writes depth, or
-    *  c) fs or cc kills
-    */
-   dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
-
-   /*
-    * From the Ivy Bridge PRM, volume 2 part 1, page 278:
-    *
-    *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
-    *      the PS kernel or color calculator has the ability to kill
-    *      (discard) pixels or samples, other than due to depth or stencil
-    *      testing. This bit is required to be ENABLED in the following
-    *      situations:
-    *
-    *      - The API pixel shader program contains "killpix" or "discard"
-    *        instructions, or other code in the pixel shader kernel that
-    *        can cause the final pixel mask to differ from the pixel mask
-    *        received on dispatch.
-    *
-    *      - A sampler with chroma key enabled with kill pixel mode is used
-    *        by the pixel shader.
-    *
-    *      - Any render target has Alpha Test Enable or AlphaToCoverage
-    *        Enable enabled.
-    *
-    *      - The pixel shader kernel generates and outputs oMask.
-    *
-    *      Note: As ClipDistance clipping is fully supported in hardware
-    *      and therefore not via PS instructions, there should be no need
-    *      to ENABLE this bit due to ClipDistance clipping."
-    */
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
-      dw |= GEN7_WM_DW1_PS_KILL_PIXEL;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
-      dw |= GEN7_WM_DW1_PSCDEPTH_ON;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
-      dw |= GEN7_WM_DW1_PS_USE_DEPTH;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
-      dw |= GEN7_WM_DW1_PS_USE_W;
-
-   return dw;
-}
-
-static void
-fs_init_cso_gen7(const struct ilo_dev *dev,
-                 const struct ilo_shader_state *fs,
-                 struct ilo_shader_cso *cso)
-{
-   int start_grf, sampler_count, max_threads;
-   uint32_t dw2, dw4, dw5;
-
-   ILO_DEV_ASSERT(dev, 7, 7.5);
-
-   start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
-   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
-
-   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
-   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
-   dw4 = GEN7_PS_DW4_POSOFFSET_NONE;
-
-   /* see brwCreateContext() */
-   switch (ilo_dev_gen(dev)) {
-   case ILO_GEN(7.5):
-      max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102;
-      dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
-      dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
-      break;
-   case ILO_GEN(7):
-   default:
-      max_threads = (dev->gt == 2) ? 172 : 48;
-      dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
-      break;
-   }
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
-      dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
-      dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
-
-   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
-   dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
-
-   dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
-         0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
-         0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
-
-   STATIC_ASSERT(Elements(cso->payload) >= 4);
-   cso->payload[0] = dw2;
-   cso->payload[1] = dw4;
-   cso->payload[2] = dw5;
-   cso->payload[3] = fs_get_wm_gen7(dev, fs);
-}
-
-static uint32_t
-fs_get_psx_gen8(const struct ilo_dev *dev,
-                const struct ilo_shader_state *fs)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   dw = GEN8_PSX_DW1_DISPATCH_ENABLE;
-
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
-      dw |= GEN8_PSX_DW1_KILL_PIXEL;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
-      dw |= GEN8_PSX_DW1_PSCDEPTH_ON;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
-      dw |= GEN8_PSX_DW1_USE_DEPTH;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
-      dw |= GEN8_PSX_DW1_USE_W;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
-      dw |= GEN8_PSX_DW1_ATTR_ENABLE;
-
-   return dw;
-}
-
-static uint32_t
-fs_get_wm_gen8(const struct ilo_dev *dev,
-               const struct ilo_shader_state *fs)
-{
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   return ilo_shader_get_kernel_param(fs,
-         ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
-      GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
-}
-
-static void
-fs_init_cso_gen8(const struct ilo_dev *dev,
-                 const struct ilo_shader_state *fs,
-                 struct ilo_shader_cso *cso)
-{
-   int start_grf, sampler_count;
-   uint32_t dw3, dw6, dw7;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
-   sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
-
-   dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
-   dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
-   /* always 64? */
-   dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT |
-         GEN8_PS_DW6_POSOFFSET_NONE;
-   if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
-      dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
-
-   assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
-   dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
-
-   dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
-         0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
-         0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
-
-   STATIC_ASSERT(Elements(cso->payload) >= 5);
-   cso->payload[0] = dw3;
-   cso->payload[1] = dw6;
-   cso->payload[2] = dw7;
-   cso->payload[3] = fs_get_psx_gen8(dev, fs);
-   cso->payload[4] = fs_get_wm_gen8(dev, fs);
-}
-
-void
-ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *fs,
-                    struct ilo_shader_cso *cso)
-{
-   if (ilo_dev_gen(dev) >= ILO_GEN(8))
-      fs_init_cso_gen8(dev, fs, cso);
-   else if (ilo_dev_gen(dev) >= ILO_GEN(7))
-      fs_init_cso_gen7(dev, fs, cso);
-   else
-      fs_init_cso_gen6(dev, fs, cso);
-}
-
-struct ilo_zs_surface_info {
-   int surface_type;
-   int format;
-
-   struct {
-      struct intel_bo *bo;
-      unsigned stride;
-      unsigned qpitch;
-      enum gen_surface_tiling tiling;
-      uint32_t offset;
-   } zs, stencil, hiz;
-
-   unsigned width, height, depth;
-   unsigned lod, first_layer, num_layers;
-};
-
-static void
-zs_init_info_null(const struct ilo_dev *dev,
-                  struct ilo_zs_surface_info *info)
-{
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   memset(info, 0, sizeof(*info));
-
-   info->surface_type = GEN6_SURFTYPE_NULL;
-   info->format = GEN6_ZFORMAT_D32_FLOAT;
-   info->width = 1;
-   info->height = 1;
-   info->depth = 1;
-   info->num_layers = 1;
-}
-
-static void
-zs_init_info(const struct ilo_dev *dev,
-             const struct ilo_image *img,
-             const struct ilo_image *s8_img,
-             enum pipe_texture_target target,
-             enum pipe_format format, unsigned level,
-             unsigned first_layer, unsigned num_layers,
-             struct ilo_zs_surface_info *info)
-{
-   bool separate_stencil;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   memset(info, 0, sizeof(*info));
-
-   info->surface_type = ilo_gpe_gen6_translate_texture(target);
-
-   if (info->surface_type == GEN6_SURFTYPE_CUBE) {
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
-       *
-       *     "For Other Surfaces (Cube Surfaces):
-       *      This field (Minimum Array Element) is ignored."
-       *
-       *     "For Other Surfaces (Cube Surfaces):
-       *      This field (Render Target View Extent) is ignored."
-       *
-       * As such, we cannot set first_layer and num_layers on cube surfaces.
-       * To work around that, treat it as a 2D surface.
-       */
-      info->surface_type = GEN6_SURFTYPE_2D;
-   }
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      separate_stencil = true;
-   } else {
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 317:
-       *
-       *     "This field (Separate Stencil Buffer Enable) must be set to the
-       *      same value (enabled or disabled) as Hierarchical Depth Buffer
-       *      Enable."
-       */
-      separate_stencil = ilo_image_can_enable_aux(img, level);
-   }
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 317:
-    *
-    *     "If this field (Hierarchical Depth Buffer Enable) is enabled, the
-    *      Surface Format of the depth buffer cannot be
-    *      D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
-    *      requires the separate stencil buffer."
-    *
-    * From the Ironlake PRM, volume 2 part 1, page 330:
-    *
-    *     "If this field (Separate Stencil Buffer Enable) is disabled, the
-    *      Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
-    *
-    * There is no similar restriction for GEN6.  But when D24_UNORM_X8_UINT
-    * is indeed used, the depth values output by the fragment shaders will
-    * be different when read back.
-    *
-    * As for GEN7+, separate_stencil is always true.
-    */
-   switch (format) {
-   case PIPE_FORMAT_Z16_UNORM:
-      info->format = GEN6_ZFORMAT_D16_UNORM;
-      break;
-   case PIPE_FORMAT_Z32_FLOAT:
-      info->format = GEN6_ZFORMAT_D32_FLOAT;
-      break;
-   case PIPE_FORMAT_Z24X8_UNORM:
-   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-      info->format = (separate_stencil) ?
-         GEN6_ZFORMAT_D24_UNORM_X8_UINT :
-         GEN6_ZFORMAT_D24_UNORM_S8_UINT;
-      break;
-   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-      info->format = (separate_stencil) ?
-         GEN6_ZFORMAT_D32_FLOAT :
-         GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
-      break;
-   case PIPE_FORMAT_S8_UINT:
-      if (separate_stencil) {
-         info->format = GEN6_ZFORMAT_D32_FLOAT;
-         break;
-      }
-      /* fall through */
-   default:
-      assert(!"unsupported depth/stencil format");
-      zs_init_info_null(dev, info);
-      return;
-      break;
-   }
-
-   if (format != PIPE_FORMAT_S8_UINT) {
-      info->zs.bo = img->bo;
-      info->zs.stride = img->bo_stride;
-
-      assert(img->walk_layer_height % 4 == 0);
-      info->zs.qpitch = img->walk_layer_height / 4;
-
-      info->zs.tiling = img->tiling;
-      info->zs.offset = 0;
-   }
-
-   if (s8_img || format == PIPE_FORMAT_S8_UINT) {
-      info->stencil.bo = s8_img->bo;
-
-      /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 329:
-       *
-       *     "The pitch must be set to 2x the value computed based on width,
-       *       as the stencil buffer is stored with two rows interleaved."
-       *
-       * For GEN7, we still dobule the stride because we did not double the
-       * slice widths when initializing the layout.
-       */
-      info->stencil.stride = s8_img->bo_stride * 2;
-
-      assert(s8_img->walk_layer_height % 4 == 0);
-      info->stencil.qpitch = s8_img->walk_layer_height / 4;
-
-      info->stencil.tiling = s8_img->tiling;
-
-      if (ilo_dev_gen(dev) == ILO_GEN(6)) {
-         unsigned x, y;
-
-         assert(s8_img->walk == ILO_IMAGE_WALK_LOD);
-
-         /* offset to the level */
-         ilo_image_get_slice_pos(s8_img, level, 0, &x, &y);
-         ilo_image_pos_to_mem(s8_img, x, y, &x, &y);
-         info->stencil.offset = ilo_image_mem_to_raw(s8_img, x, y);
-      }
-   }
-
-   if (ilo_image_can_enable_aux(img, level)) {
-      info->hiz.bo = img->aux.bo;
-      info->hiz.stride = img->aux.bo_stride;
-
-      assert(img->aux.walk_layer_height % 4 == 0);
-      info->hiz.qpitch = img->aux.walk_layer_height / 4;
-
-      info->hiz.tiling = GEN6_TILING_Y;
-
-      /* offset to the level */
-      if (ilo_dev_gen(dev) == ILO_GEN(6))
-         info->hiz.offset = img->aux.walk_lod_offsets[level];
-   }
-
-   info->width = img->width0;
-   info->height = img->height0;
-   info->depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers;
-
-   info->lod = level;
-   info->first_layer = first_layer;
-   info->num_layers = num_layers;
-}
-
-void
-ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
-                        const struct ilo_image *img,
-                        const struct ilo_image *s8_img,
-                        enum pipe_texture_target target,
-                        enum pipe_format format, unsigned level,
-                        unsigned first_layer, unsigned num_layers,
-                        struct ilo_zs_surface *zs)
-{
-   const int max_2d_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
-   const int max_array_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512;
-   struct ilo_zs_surface_info info;
-   uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
-   int align_w = 8, align_h = 4;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   if (img) {
-      zs_init_info(dev, img, s8_img, target, format,
-            level, first_layer, num_layers, &info);
-
-      switch (img->sample_count) {
-      case 2:
-         align_w /= 2;
-         break;
-      case 4:
-         align_w /= 2;
-         align_h /= 2;
-         break;
-      case 8:
-         align_w /= 4;
-         align_h /= 2;
-         break;
-      case 16:
-         align_w /= 4;
-         align_h /= 4;
-         break;
-      default:
-         break;
-      }
-   } else {
-      zs_init_info_null(dev, &info);
-   }
-
-   switch (info.surface_type) {
-   case GEN6_SURFTYPE_NULL:
-      break;
-   case GEN6_SURFTYPE_1D:
-      assert(info.width <= max_2d_size && info.height == 1 &&
-             info.depth <= max_array_size);
-      assert(info.first_layer < max_array_size - 1 &&
-             info.num_layers <= max_array_size);
-      break;
-   case GEN6_SURFTYPE_2D:
-      assert(info.width <= max_2d_size && info.height <= max_2d_size &&
-             info.depth <= max_array_size);
-      assert(info.first_layer < max_array_size - 1 &&
-             info.num_layers <= max_array_size);
-      break;
-   case GEN6_SURFTYPE_3D:
-      assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
-      assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
-      break;
-   case GEN6_SURFTYPE_CUBE:
-      assert(info.width <= max_2d_size && info.height <= max_2d_size &&
-             info.depth == 1);
-      assert(info.first_layer == 0 && info.num_layers == 1);
-      assert(info.width == info.height);
-      break;
-   default:
-      assert(!"unexpected depth surface type");
-      break;
-   }
-
-   dw1 = info.surface_type << GEN6_DEPTH_DW1_TYPE__SHIFT |
-         info.format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
-
-   if (info.zs.bo) {
-      /* required for GEN6+ */
-      assert(info.zs.tiling == GEN6_TILING_Y);
-      assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
-            info.zs.stride % 128 == 0);
-      assert(info.width <= info.zs.stride);
-
-      dw1 |= (info.zs.stride - 1);
-      dw2 = info.zs.offset;
-   } else {
-      dw2 = 0;
-   }
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      if (info.zs.bo)
-         dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE;
-
-      if (info.stencil.bo)
-         dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE;
-
-      if (info.hiz.bo)
-         dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE;
-
-      dw3 = (info.height - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
-            (info.width - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
-            info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
-
-      zs->dw_aligned_8x4 =
-         (align(info.height, align_h) - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
-         (align(info.width, align_w) - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
-         info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
-
-      dw4 = (info.depth - 1) << GEN7_DEPTH_DW4_DEPTH__SHIFT |
-            info.first_layer << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT;
-
-      dw5 = 0;
-
-      dw6 = (info.num_layers - 1) << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT;
-
-      if (ilo_dev_gen(dev) >= ILO_GEN(8))
-         dw6 |= info.zs.qpitch;
-   } else {
-      /* always Y-tiled */
-      dw1 |= GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT;
-
-      if (info.hiz.bo) {
-         dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE |
-                GEN6_DEPTH_DW1_SEPARATE_STENCIL;
-      }
-
-      dw3 = (info.height - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
-            (info.width - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
-            info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
-            GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
-
-      zs->dw_aligned_8x4 =
-         (align(info.height, align_h) - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
-         (align(info.width, align_w) - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
-         info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
-         GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
-
-      dw4 = (info.depth - 1) << GEN6_DEPTH_DW4_DEPTH__SHIFT |
-            info.first_layer << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT |
-            (info.num_layers - 1) << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT;
-
-      dw5 = 0;
-
-      dw6 = 0;
-   }
-
-   STATIC_ASSERT(Elements(zs->payload) >= 12);
-
-   zs->payload[0] = dw1;
-   zs->payload[1] = dw2;
-   zs->payload[2] = dw3;
-   zs->payload[3] = dw4;
-   zs->payload[4] = dw5;
-   zs->payload[5] = dw6;
-
-   /* do not increment reference count */
-   zs->bo = info.zs.bo;
-
-   /* separate stencil */
-   if (info.stencil.bo) {
-      assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
-             info.stencil.stride % 128 == 0);
-
-      dw1 = (info.stencil.stride - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT;
-      if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
-         dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE;
-
-      dw2 = info.stencil.offset;
-      dw4 = info.stencil.qpitch;
-   } else {
-      dw1 = 0;
-      dw2 = 0;
-      dw4 = 0;
-   }
-
-   zs->payload[6] = dw1;
-   zs->payload[7] = dw2;
-   zs->payload[8] = dw4;
-   /* do not increment reference count */
-   zs->separate_s8_bo = info.stencil.bo;
-
-   /* hiz */
-   if (info.hiz.bo) {
-      dw1 = (info.hiz.stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT;
-      dw2 = info.hiz.offset;
-      dw4 = info.hiz.qpitch;
-   } else {
-      dw1 = 0;
-      dw2 = 0;
-      dw4 = 0;
-   }
-
-   zs->payload[9] = dw1;
-   zs->payload[10] = dw2;
-   zs->payload[11] = dw4;
-   /* do not increment reference count */
-   zs->hiz_bo = info.hiz.bo;
-}
-
-static void
-viewport_get_guardband(const struct ilo_dev *dev,
-                       int center_x, int center_y,
-                       int *min_gbx, int *max_gbx,
-                       int *min_gby, int *max_gby)
-{
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 234:
-    *
-    *     "Per-Device Guardband Extents
-    *
-    *       - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
-    *       - Maximum Post-Clamp Delta (X or Y): 16K"
-    *
-    *     "In addition, in order to be correctly rendered, objects must have a
-    *      screenspace bounding box not exceeding 8K in the X or Y direction.
-    *      This additional restriction must also be comprehended by software,
-    *      i.e., enforced by use of clipping."
-    *
-    * From the Ivy Bridge PRM, volume 2 part 1, page 248:
-    *
-    *     "Per-Device Guardband Extents
-    *
-    *       - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
-    *       - Maximum Post-Clamp Delta (X or Y): N/A"
-    *
-    *     "In addition, in order to be correctly rendered, objects must have a
-    *      screenspace bounding box not exceeding 8K in the X or Y direction.
-    *      This additional restriction must also be comprehended by software,
-    *      i.e., enforced by use of clipping."
-    *
-    * Combined, the bounding box of any object can not exceed 8K in both
-    * width and height.
-    *
-    * Below we set the guardband as a squre of length 8K, centered at where
-    * the viewport is.  This makes sure all objects passing the GB test are
-    * valid to the renderer, and those failing the XY clipping have a
-    * better chance of passing the GB test.
-    */
-   const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384;
-   const int half_len = 8192 / 2;
-
-   /* make sure the guardband is within the valid range */
-   if (center_x - half_len < -max_extent)
-      center_x = -max_extent + half_len;
-   else if (center_x + half_len > max_extent - 1)
-      center_x = max_extent - half_len;
-
-   if (center_y - half_len < -max_extent)
-      center_y = -max_extent + half_len;
-   else if (center_y + half_len > max_extent - 1)
-      center_y = max_extent - half_len;
-
-   *min_gbx = (float) (center_x - half_len);
-   *max_gbx = (float) (center_x + half_len);
-   *min_gby = (float) (center_y - half_len);
-   *max_gby = (float) (center_y + half_len);
-}
-
-void
-ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
-                         const struct pipe_viewport_state *state,
-                         struct ilo_viewport_cso *vp)
-{
-   const float scale_x = fabs(state->scale[0]);
-   const float scale_y = fabs(state->scale[1]);
-   const float scale_z = fabs(state->scale[2]);
-   int min_gbx, max_gbx, min_gby, max_gby;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   viewport_get_guardband(dev,
-         (int) state->translate[0],
-         (int) state->translate[1],
-         &min_gbx, &max_gbx, &min_gby, &max_gby);
-
-   /* matrix form */
-   vp->m00 = state->scale[0];
-   vp->m11 = state->scale[1];
-   vp->m22 = state->scale[2];
-   vp->m30 = state->translate[0];
-   vp->m31 = state->translate[1];
-   vp->m32 = state->translate[2];
-
-   /* guardband in NDC space */
-   vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
-   vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
-   vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
-   vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
-
-   /* viewport in screen space */
-   vp->min_x = scale_x * -1.0f + state->translate[0];
-   vp->max_x = scale_x *  1.0f + state->translate[0];
-   vp->min_y = scale_y * -1.0f + state->translate[1];
-   vp->max_y = scale_y *  1.0f + state->translate[1];
-   vp->min_z = scale_z * -1.0f + state->translate[2];
-   vp->max_z = scale_z *  1.0f + state->translate[2];
-}
-
-/**
- * Translate a pipe logicop to the matching hardware logicop.
- */
-static int
-gen6_translate_pipe_logicop(unsigned logicop)
-{
-   switch (logicop) {
-   case PIPE_LOGICOP_CLEAR:         return GEN6_LOGICOP_CLEAR;
-   case PIPE_LOGICOP_NOR:           return GEN6_LOGICOP_NOR;
-   case PIPE_LOGICOP_AND_INVERTED:  return GEN6_LOGICOP_AND_INVERTED;
-   case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED;
-   case PIPE_LOGICOP_AND_REVERSE:   return GEN6_LOGICOP_AND_REVERSE;
-   case PIPE_LOGICOP_INVERT:        return GEN6_LOGICOP_INVERT;
-   case PIPE_LOGICOP_XOR:           return GEN6_LOGICOP_XOR;
-   case PIPE_LOGICOP_NAND:          return GEN6_LOGICOP_NAND;
-   case PIPE_LOGICOP_AND:           return GEN6_LOGICOP_AND;
-   case PIPE_LOGICOP_EQUIV:         return GEN6_LOGICOP_EQUIV;
-   case PIPE_LOGICOP_NOOP:          return GEN6_LOGICOP_NOOP;
-   case PIPE_LOGICOP_OR_INVERTED:   return GEN6_LOGICOP_OR_INVERTED;
-   case PIPE_LOGICOP_COPY:          return GEN6_LOGICOP_COPY;
-   case PIPE_LOGICOP_OR_REVERSE:    return GEN6_LOGICOP_OR_REVERSE;
-   case PIPE_LOGICOP_OR:            return GEN6_LOGICOP_OR;
-   case PIPE_LOGICOP_SET:           return GEN6_LOGICOP_SET;
-   default:
-      assert(!"unknown logicop function");
-      return GEN6_LOGICOP_CLEAR;
-   }
-}
-
-/**
- * Translate a pipe blend function to the matching hardware blend function.
- */
-static int
-gen6_translate_pipe_blend(unsigned blend)
-{
-   switch (blend) {
-   case PIPE_BLEND_ADD:                return GEN6_BLENDFUNCTION_ADD;
-   case PIPE_BLEND_SUBTRACT:           return GEN6_BLENDFUNCTION_SUBTRACT;
-   case PIPE_BLEND_REVERSE_SUBTRACT:   return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT;
-   case PIPE_BLEND_MIN:                return GEN6_BLENDFUNCTION_MIN;
-   case PIPE_BLEND_MAX:                return GEN6_BLENDFUNCTION_MAX;
-   default:
-      assert(!"unknown blend function");
-      return GEN6_BLENDFUNCTION_ADD;
-   };
-}
-
-/**
- * Translate a pipe blend factor to the matching hardware blend factor.
- */
-static int
-gen6_translate_pipe_blendfactor(unsigned blendfactor)
-{
-   switch (blendfactor) {
-   case PIPE_BLENDFACTOR_ONE:                return GEN6_BLENDFACTOR_ONE;
-   case PIPE_BLENDFACTOR_SRC_COLOR:          return GEN6_BLENDFACTOR_SRC_COLOR;
-   case PIPE_BLENDFACTOR_SRC_ALPHA:          return GEN6_BLENDFACTOR_SRC_ALPHA;
-   case PIPE_BLENDFACTOR_DST_ALPHA:          return GEN6_BLENDFACTOR_DST_ALPHA;
-   case PIPE_BLENDFACTOR_DST_COLOR:          return GEN6_BLENDFACTOR_DST_COLOR;
-   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE;
-   case PIPE_BLENDFACTOR_CONST_COLOR:        return GEN6_BLENDFACTOR_CONST_COLOR;
-   case PIPE_BLENDFACTOR_CONST_ALPHA:        return GEN6_BLENDFACTOR_CONST_ALPHA;
-   case PIPE_BLENDFACTOR_SRC1_COLOR:         return GEN6_BLENDFACTOR_SRC1_COLOR;
-   case PIPE_BLENDFACTOR_SRC1_ALPHA:         return GEN6_BLENDFACTOR_SRC1_ALPHA;
-   case PIPE_BLENDFACTOR_ZERO:               return GEN6_BLENDFACTOR_ZERO;
-   case PIPE_BLENDFACTOR_INV_SRC_COLOR:      return GEN6_BLENDFACTOR_INV_SRC_COLOR;
-   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:      return GEN6_BLENDFACTOR_INV_SRC_ALPHA;
-   case PIPE_BLENDFACTOR_INV_DST_ALPHA:      return GEN6_BLENDFACTOR_INV_DST_ALPHA;
-   case PIPE_BLENDFACTOR_INV_DST_COLOR:      return GEN6_BLENDFACTOR_INV_DST_COLOR;
-   case PIPE_BLENDFACTOR_INV_CONST_COLOR:    return GEN6_BLENDFACTOR_INV_CONST_COLOR;
-   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:    return GEN6_BLENDFACTOR_INV_CONST_ALPHA;
-   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:     return GEN6_BLENDFACTOR_INV_SRC1_COLOR;
-   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:     return GEN6_BLENDFACTOR_INV_SRC1_ALPHA;
-   default:
-      assert(!"unknown blend factor");
-      return GEN6_BLENDFACTOR_ONE;
-   };
-}
-
-/**
- * Translate a pipe stencil op to the matching hardware stencil op.
- */
-static int
-gen6_translate_pipe_stencil_op(unsigned stencil_op)
-{
-   switch (stencil_op) {
-   case PIPE_STENCIL_OP_KEEP:       return GEN6_STENCILOP_KEEP;
-   case PIPE_STENCIL_OP_ZERO:       return GEN6_STENCILOP_ZERO;
-   case PIPE_STENCIL_OP_REPLACE:    return GEN6_STENCILOP_REPLACE;
-   case PIPE_STENCIL_OP_INCR:       return GEN6_STENCILOP_INCRSAT;
-   case PIPE_STENCIL_OP_DECR:       return GEN6_STENCILOP_DECRSAT;
-   case PIPE_STENCIL_OP_INCR_WRAP:  return GEN6_STENCILOP_INCR;
-   case PIPE_STENCIL_OP_DECR_WRAP:  return GEN6_STENCILOP_DECR;
-   case PIPE_STENCIL_OP_INVERT:     return GEN6_STENCILOP_INVERT;
-   default:
-      assert(!"unknown stencil op");
-      return GEN6_STENCILOP_KEEP;
-   }
-}
-
-static int
-gen6_blend_factor_dst_alpha_forced_one(int factor)
-{
-   switch (factor) {
-   case GEN6_BLENDFACTOR_DST_ALPHA:
-      return GEN6_BLENDFACTOR_ONE;
-   case GEN6_BLENDFACTOR_INV_DST_ALPHA:
-   case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      return GEN6_BLENDFACTOR_ZERO;
-   default:
-      return factor;
-   }
-}
-
-static uint32_t
-blend_get_rt_blend_enable_gen6(const struct ilo_dev *dev,
-                               const struct pipe_rt_blend_state *rt,
-                               bool dst_alpha_forced_one)
-{
-   int rgb_src, rgb_dst, a_src, a_dst;
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   if (!rt->blend_enable)
-      return 0;
-
-   rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
-   rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
-   a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
-   a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
-
-   if (dst_alpha_forced_one) {
-      rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
-      rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
-      a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
-      a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
-   }
-
-   dw = GEN6_RT_DW0_BLEND_ENABLE |
-        gen6_translate_pipe_blend(rt->alpha_func) << 26 |
-        a_src << 20 |
-        a_dst << 15 |
-        gen6_translate_pipe_blend(rt->rgb_func) << 11 |
-        rgb_src << 5 |
-        rgb_dst;
-
-   if (rt->rgb_func != rt->alpha_func ||
-       rgb_src != a_src || rgb_dst != a_dst)
-      dw |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE;
-
-   return dw;
-}
-
-static uint32_t
-blend_get_rt_blend_enable_gen8(const struct ilo_dev *dev,
-                               const struct pipe_rt_blend_state *rt,
-                               bool dst_alpha_forced_one,
-                               bool *independent_alpha)
-{
-   int rgb_src, rgb_dst, a_src, a_dst;
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (!rt->blend_enable) {
-      *independent_alpha = false;
-      return 0;
-   }
-
-   rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
-   rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
-   a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
-   a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
-
-   if (dst_alpha_forced_one) {
-      rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
-      rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
-      a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
-      a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
-   }
-
-   dw = GEN8_RT_DW0_BLEND_ENABLE |
-        rgb_src << 26 |
-        rgb_dst << 21 |
-        gen6_translate_pipe_blend(rt->rgb_func) << 18 |
-        a_src << 13 |
-        a_dst << 8 |
-        gen6_translate_pipe_blend(rt->alpha_func) << 5;
-
-   *independent_alpha = (rt->rgb_func != rt->alpha_func ||
-                         rgb_src != a_src ||
-                         rgb_dst != a_dst);
-
-   return dw;
-}
-
-static void
-blend_init_cso_gen6(const struct ilo_dev *dev,
-                    const struct pipe_blend_state *state,
-                    struct ilo_blend_state *blend,
-                    unsigned index)
-{
-   const struct pipe_rt_blend_state *rt = &state->rt[index];
-   struct ilo_blend_cso *cso = &blend->cso[index];
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   cso->payload[0] = 0;
-   cso->payload[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT |
-                     GEN6_RT_DW1_PRE_BLEND_CLAMP |
-                     GEN6_RT_DW1_POST_BLEND_CLAMP;
-
-   if (!(rt->colormask & PIPE_MASK_A))
-      cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_A;
-   if (!(rt->colormask & PIPE_MASK_R))
-      cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_R;
-   if (!(rt->colormask & PIPE_MASK_G))
-      cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_G;
-   if (!(rt->colormask & PIPE_MASK_B))
-      cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_B;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 365:
-    *
-    *     "Color Buffer Blending and Logic Ops must not be enabled
-    *      simultaneously, or behavior is UNDEFINED."
-    *
-    * Since state->logicop_enable takes precedence over rt->blend_enable,
-    * no special care is needed.
-    */
-   if (state->logicop_enable) {
-      cso->dw_blend = 0;
-      cso->dw_blend_dst_alpha_forced_one = 0;
-   } else {
-      cso->dw_blend = blend_get_rt_blend_enable_gen6(dev, rt, false);
-      cso->dw_blend_dst_alpha_forced_one =
-         blend_get_rt_blend_enable_gen6(dev, rt, true);
-   }
-}
-
-static bool
-blend_init_cso_gen8(const struct ilo_dev *dev,
-                    const struct pipe_blend_state *state,
-                    struct ilo_blend_state *blend,
-                    unsigned index)
-{
-   const struct pipe_rt_blend_state *rt = &state->rt[index];
-   struct ilo_blend_cso *cso = &blend->cso[index];
-   bool independent_alpha = false;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   cso->payload[0] = 0;
-   cso->payload[1] = GEN8_RT_DW1_COLORCLAMP_RTFORMAT |
-                     GEN8_RT_DW1_PRE_BLEND_CLAMP |
-                     GEN8_RT_DW1_POST_BLEND_CLAMP;
-
-   if (!(rt->colormask & PIPE_MASK_A))
-      cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_A;
-   if (!(rt->colormask & PIPE_MASK_R))
-      cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_R;
-   if (!(rt->colormask & PIPE_MASK_G))
-      cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_G;
-   if (!(rt->colormask & PIPE_MASK_B))
-      cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_B;
-
-   if (state->logicop_enable) {
-      cso->dw_blend = 0;
-      cso->dw_blend_dst_alpha_forced_one = 0;
-   } else {
-      bool tmp[2];
-
-      cso->dw_blend = blend_get_rt_blend_enable_gen8(dev, rt, false, &tmp[0]);
-      cso->dw_blend_dst_alpha_forced_one =
-         blend_get_rt_blend_enable_gen8(dev, rt, true, &tmp[1]);
-
-      if (tmp[0] || tmp[1])
-         independent_alpha = true;
-   }
-
-   return independent_alpha;
-}
-
-static uint32_t
-blend_get_logicop_enable_gen6(const struct ilo_dev *dev,
-                              const struct pipe_blend_state *state)
-{
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   if (!state->logicop_enable)
-      return 0;
-
-   return GEN6_RT_DW1_LOGICOP_ENABLE |
-          gen6_translate_pipe_logicop(state->logicop_func) << 18;
-}
-
-static uint32_t
-blend_get_logicop_enable_gen8(const struct ilo_dev *dev,
-                              const struct pipe_blend_state *state)
-{
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (!state->logicop_enable)
-      return 0;
-
-   return GEN8_RT_DW1_LOGICOP_ENABLE |
-          gen6_translate_pipe_logicop(state->logicop_func) << 27;
-}
-
-static uint32_t
-blend_get_alpha_mod_gen6(const struct ilo_dev *dev,
-                         const struct pipe_blend_state *state,
-                         bool dual_blend)
-{
-   uint32_t dw = 0;
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   if (state->alpha_to_coverage) {
-      dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE;
-      if (ilo_dev_gen(dev) >= ILO_GEN(7))
-         dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER;
-   }
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 378:
-    *
-    *     "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
-    *      must be disabled."
-    */
-   if (state->alpha_to_one && !dual_blend)
-      dw |= GEN6_RT_DW1_ALPHA_TO_ONE;
-
-   return dw;
-}
-
-static uint32_t
-blend_get_alpha_mod_gen8(const struct ilo_dev *dev,
-                         const struct pipe_blend_state *state,
-                         bool dual_blend)
-{
-   uint32_t dw = 0;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (state->alpha_to_coverage) {
-      dw |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE |
-            GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER;
-   }
-
-   if (state->alpha_to_one && !dual_blend)
-      dw |= GEN8_BLEND_DW0_ALPHA_TO_ONE;
-
-   return dw;
-}
-
-static uint32_t
-blend_get_ps_blend_gen8(const struct ilo_dev *dev, uint32_t rt_dw0)
-{
-   int rgb_src, rgb_dst, a_src, a_dst;
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (!(rt_dw0 & GEN8_RT_DW0_BLEND_ENABLE))
-      return 0;
-
-   a_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_ALPHA_FACTOR);
-   a_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_ALPHA_FACTOR);
-   rgb_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_COLOR_FACTOR);
-   rgb_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_COLOR_FACTOR);
-
-   dw = GEN8_PS_BLEND_DW1_BLEND_ENABLE;
-   dw |= GEN_SHIFT32(a_src, GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR);
-   dw |= GEN_SHIFT32(a_dst, GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR);
-   dw |= GEN_SHIFT32(rgb_src, GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR);
-   dw |= GEN_SHIFT32(rgb_dst, GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR);
-
-   if (a_src != rgb_src || a_dst != rgb_dst)
-      dw |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE;
-
-   return dw;
-}
-
-void
-ilo_gpe_init_blend(const struct ilo_dev *dev,
-                   const struct pipe_blend_state *state,
-                   struct ilo_blend_state *blend)
-{
-   unsigned i;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   blend->dual_blend = (util_blend_state_is_dual(state, 0) &&
-                        state->rt[0].blend_enable &&
-                        !state->logicop_enable);
-   blend->alpha_to_coverage = state->alpha_to_coverage;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      bool independent_alpha;
-
-      blend->dw_alpha_mod =
-         blend_get_alpha_mod_gen8(dev, state, blend->dual_blend);
-      blend->dw_logicop = blend_get_logicop_enable_gen8(dev, state);
-      blend->dw_shared = (state->dither) ? GEN8_BLEND_DW0_DITHER_ENABLE : 0;
-
-      independent_alpha = blend_init_cso_gen8(dev, state, blend, 0);
-      if (independent_alpha)
-         blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
-
-      blend->dw_ps_blend = blend_get_ps_blend_gen8(dev,
-            blend->cso[0].dw_blend);
-      blend->dw_ps_blend_dst_alpha_forced_one = blend_get_ps_blend_gen8(dev,
-            blend->cso[0].dw_blend_dst_alpha_forced_one);
-
-      if (state->independent_blend_enable) {
-         for (i = 1; i < Elements(blend->cso); i++) {
-            independent_alpha = blend_init_cso_gen8(dev, state, blend, i);
-            if (independent_alpha)
-               blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
-         }
-      } else {
-         for (i = 1; i < Elements(blend->cso); i++)
-            blend->cso[i] = blend->cso[0];
-      }
-   } else {
-      blend->dw_alpha_mod =
-         blend_get_alpha_mod_gen6(dev, state, blend->dual_blend);
-      blend->dw_logicop = blend_get_logicop_enable_gen6(dev, state);
-      blend->dw_shared = (state->dither) ? GEN6_RT_DW1_DITHER_ENABLE : 0;
-
-      blend->dw_ps_blend = 0;
-      blend->dw_ps_blend_dst_alpha_forced_one = 0;
-
-      blend_init_cso_gen6(dev, state, blend, 0);
-      if (state->independent_blend_enable) {
-         for (i = 1; i < Elements(blend->cso); i++)
-            blend_init_cso_gen6(dev, state, blend, i);
-      } else {
-         for (i = 1; i < Elements(blend->cso); i++)
-            blend->cso[i] = blend->cso[0];
-      }
-   }
-}
-
-/**
- * Translate a pipe DSA test function to the matching hardware compare
- * function.
- */
-static int
-gen6_translate_dsa_func(unsigned func)
-{
-   switch (func) {
-   case PIPE_FUNC_NEVER:      return GEN6_COMPAREFUNCTION_NEVER;
-   case PIPE_FUNC_LESS:       return GEN6_COMPAREFUNCTION_LESS;
-   case PIPE_FUNC_EQUAL:      return GEN6_COMPAREFUNCTION_EQUAL;
-   case PIPE_FUNC_LEQUAL:     return GEN6_COMPAREFUNCTION_LEQUAL;
-   case PIPE_FUNC_GREATER:    return GEN6_COMPAREFUNCTION_GREATER;
-   case PIPE_FUNC_NOTEQUAL:   return GEN6_COMPAREFUNCTION_NOTEQUAL;
-   case PIPE_FUNC_GEQUAL:     return GEN6_COMPAREFUNCTION_GEQUAL;
-   case PIPE_FUNC_ALWAYS:     return GEN6_COMPAREFUNCTION_ALWAYS;
-   default:
-      assert(!"unknown depth/stencil/alpha test function");
-      return GEN6_COMPAREFUNCTION_NEVER;
-   }
-}
-
-static uint32_t
-dsa_get_stencil_enable_gen6(const struct ilo_dev *dev,
-                            const struct pipe_stencil_state *stencil0,
-                            const struct pipe_stencil_state *stencil1)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   if (!stencil0->enabled)
-      return 0;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 359:
-    *
-    *     "If the Depth Buffer is either undefined or does not have a surface
-    *      format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
-    *      stencil buffer is disabled, Stencil Test Enable must be DISABLED"
-    *
-    * From the Sandy Bridge PRM, volume 2 part 1, page 370:
-    *
-    *     "This field (Stencil Test Enable) cannot be enabled if
-    *      Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
-    *
-    * TODO We do not check these yet.
-    */
-   dw = GEN6_ZS_DW0_STENCIL_TEST_ENABLE |
-        gen6_translate_dsa_func(stencil0->func) << 28 |
-        gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
-        gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
-        gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
-   if (stencil0->writemask)
-      dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
-
-   if (stencil1->enabled) {
-      dw |= GEN6_ZS_DW0_STENCIL1_ENABLE |
-            gen6_translate_dsa_func(stencil1->func) << 12 |
-            gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
-            gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
-            gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
-      if (stencil1->writemask)
-         dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
-   }
-
-   return dw;
-}
-
-static uint32_t
-dsa_get_stencil_enable_gen8(const struct ilo_dev *dev,
-                            const struct pipe_stencil_state *stencil0,
-                            const struct pipe_stencil_state *stencil1)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (!stencil0->enabled)
-      return 0;
-
-   dw = gen6_translate_pipe_stencil_op(stencil0->fail_op) << 29 |
-        gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 26 |
-        gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 23 |
-        gen6_translate_dsa_func(stencil0->func) << 8 |
-        GEN8_ZS_DW1_STENCIL_TEST_ENABLE;
-   if (stencil0->writemask)
-      dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
-
-   if (stencil1->enabled) {
-      dw |= gen6_translate_dsa_func(stencil1->func) << 20 |
-            gen6_translate_pipe_stencil_op(stencil1->fail_op) << 17 |
-            gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 14 |
-            gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 11 |
-            GEN8_ZS_DW1_STENCIL1_ENABLE;
-      if (stencil1->writemask)
-         dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
-   }
-
-   return dw;
-}
-
-static uint32_t
-dsa_get_depth_enable_gen6(const struct ilo_dev *dev,
-                          const struct pipe_depth_state *state)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 360:
-    *
-    *     "Enabling the Depth Test function without defining a Depth Buffer is
-    *      UNDEFINED."
-    *
-    * From the Sandy Bridge PRM, volume 2 part 1, page 375:
-    *
-    *     "A Depth Buffer must be defined before enabling writes to it, or
-    *      operation is UNDEFINED."
-    *
-    * TODO We do not check these yet.
-    */
-   if (state->enabled) {
-      dw = GEN6_ZS_DW2_DEPTH_TEST_ENABLE |
-           gen6_translate_dsa_func(state->func) << 27;
-   } else {
-      dw = GEN6_COMPAREFUNCTION_ALWAYS << 27;
-   }
-
-   if (state->writemask)
-      dw |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE;
-
-   return dw;
-}
-
-static uint32_t
-dsa_get_depth_enable_gen8(const struct ilo_dev *dev,
-                          const struct pipe_depth_state *state)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (state->enabled) {
-      dw = GEN8_ZS_DW1_DEPTH_TEST_ENABLE |
-           gen6_translate_dsa_func(state->func) << 5;
-   } else {
-      dw = GEN6_COMPAREFUNCTION_ALWAYS << 5;
-   }
-
-   if (state->writemask)
-      dw |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE;
-
-   return dw;
-}
-
-static uint32_t
-dsa_get_alpha_enable_gen6(const struct ilo_dev *dev,
-                          const struct pipe_alpha_state *state)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 6, 7.5);
-
-   if (!state->enabled)
-      return 0;
-
-   /* this will be ORed to BLEND_STATE */
-   dw = GEN6_RT_DW1_ALPHA_TEST_ENABLE |
-        gen6_translate_dsa_func(state->func) << 13;
-
-   return dw;
-}
-
-static uint32_t
-dsa_get_alpha_enable_gen8(const struct ilo_dev *dev,
-                          const struct pipe_alpha_state *state)
-{
-   uint32_t dw;
-
-   ILO_DEV_ASSERT(dev, 8, 8);
-
-   if (!state->enabled)
-      return 0;
-
-   /* this will be ORed to BLEND_STATE */
-   dw = GEN8_BLEND_DW0_ALPHA_TEST_ENABLE |
-        gen6_translate_dsa_func(state->func) << 24;
-
-   return dw;
-}
-
-void
-ilo_gpe_init_dsa(const struct ilo_dev *dev,
-                 const struct pipe_depth_stencil_alpha_state *state,
-                 struct ilo_dsa_state *dsa)
-{
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   STATIC_ASSERT(Elements(dsa->payload) >= 3);
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      const uint32_t dw_stencil = dsa_get_stencil_enable_gen8(dev,
-            &state->stencil[0], &state->stencil[1]);
-      const uint32_t dw_depth = dsa_get_depth_enable_gen8(dev, &state->depth);
-
-      assert(!(dw_stencil & dw_depth));
-      dsa->payload[0] = dw_stencil | dw_depth;
-
-      dsa->dw_blend_alpha = dsa_get_alpha_enable_gen8(dev, &state->alpha);
-      dsa->dw_ps_blend_alpha = (state->alpha.enabled) ?
-         GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE : 0;
-   } else {
-      dsa->payload[0] = dsa_get_stencil_enable_gen6(dev,
-            &state->stencil[0], &state->stencil[1]);
-      dsa->payload[2] = dsa_get_depth_enable_gen6(dev, &state->depth);
-
-      dsa->dw_blend_alpha = dsa_get_alpha_enable_gen6(dev, &state->alpha);
-      dsa->dw_ps_blend_alpha = 0;
-   }
-
-   dsa->payload[1] = state->stencil[0].valuemask << 24 |
-                     state->stencil[0].writemask << 16 |
-                     state->stencil[1].valuemask << 8 |
-                     state->stencil[1].writemask;
-
-   dsa->alpha_ref = float_to_ubyte(state->alpha.ref_value);
-}
-
-void
-ilo_gpe_set_scissor(const struct ilo_dev *dev,
-                    unsigned start_slot,
-                    unsigned num_states,
-                    const struct pipe_scissor_state *states,
-                    struct ilo_scissor_state *scissor)
-{
-   unsigned i;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   for (i = 0; i < num_states; i++) {
-      uint16_t min_x, min_y, max_x, max_y;
-
-      /* both max and min are inclusive in SCISSOR_RECT */
-      if (states[i].minx < states[i].maxx &&
-          states[i].miny < states[i].maxy) {
-         min_x = states[i].minx;
-         min_y = states[i].miny;
-         max_x = states[i].maxx - 1;
-         max_y = states[i].maxy - 1;
-      }
-      else {
-         /* we have to make min greater than max */
-         min_x = 1;
-         min_y = 1;
-         max_x = 0;
-         max_y = 0;
-      }
-
-      scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
-      scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
-   }
-
-   if (!start_slot && num_states)
-      scissor->scissor0 = states[0];
-}
-
-void
-ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
-                         struct ilo_scissor_state *scissor)
-{
-   unsigned i;
-
-   for (i = 0; i < Elements(scissor->payload); i += 2) {
-      scissor->payload[i + 0] = 1 << 16 | 1;
-      scissor->payload[i + 1] = 0;
-   }
-}
-
-static void
-fb_set_blend_caps(const struct ilo_dev *dev,
-                  enum pipe_format format,
-                  struct ilo_fb_blend_caps *caps)
-{
-   const struct util_format_description *desc =
-      util_format_description(format);
-   const int ch = util_format_get_first_non_void_channel(format);
-
-   memset(caps, 0, sizeof(*caps));
-
-   if (format == PIPE_FORMAT_NONE || desc->is_mixed)
-      return;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 365:
-    *
-    *     "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
-    *      variants), otherwise Logic Ops must be DISABLED."
-    *
-    * According to the classic driver, this is lifted on Gen8+.
-    */
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      caps->can_logicop = true;
-   } else {
-      caps->can_logicop = (ch >= 0 && desc->channel[ch].normalized &&
-            desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED &&
-            desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
-   }
-
-   /* no blending for pure integer formats */
-   caps->can_blend = !util_format_is_pure_integer(format);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 382:
-    *
-    *     "Alpha Test can only be enabled if Pixel Shader outputs a float
-    *      alpha value."
-    */
-   caps->can_alpha_test = !util_format_is_pure_integer(format);
-
-   caps->dst_alpha_forced_one =
-      (ilo_format_translate_render(dev, format) !=
-       ilo_format_translate_color(dev, format));
-
-   /* sanity check */
-   if (caps->dst_alpha_forced_one) {
-      enum pipe_format render_format;
-
-      switch (format) {
-      case PIPE_FORMAT_B8G8R8X8_UNORM:
-         render_format = PIPE_FORMAT_B8G8R8A8_UNORM;
-         break;
-      default:
-         render_format = PIPE_FORMAT_NONE;
-         break;
-      }
-
-      assert(ilo_format_translate_render(dev, format) ==
-             ilo_format_translate_color(dev, render_format));
-   }
-}
-
-void
-ilo_gpe_set_fb(const struct ilo_dev *dev,
-               const struct pipe_framebuffer_state *state,
-               struct ilo_fb_state *fb)
-{
-   const struct pipe_surface *first_surf = NULL;
-   int i;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   util_copy_framebuffer_state(&fb->state, state);
-
-   ilo_gpe_init_view_surface_null(dev,
-         (state->width) ? state->width : 1,
-         (state->height) ? state->height : 1,
-         1, 0, &fb->null_rt);
-
-   for (i = 0; i < state->nr_cbufs; i++) {
-      if (state->cbufs[i]) {
-         fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]);
-
-         if (!first_surf)
-            first_surf = state->cbufs[i];
-      } else {
-         fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]);
-      }
-   }
-
-   if (!first_surf && state->zsbuf)
-      first_surf = state->zsbuf;
-
-   fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1;
-   if (!fb->num_samples)
-      fb->num_samples = 1;
-
-   /*
-    * The PRMs list several restrictions when the framebuffer has more than
-    * one surface.  It seems they are actually lifted on GEN6+.
-    */
-}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_3d.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_3d.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_3d.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_3d.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,427 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2014 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chia-I Wu <olv@lunarg.com>
- */
-
-#ifndef ILO_STATE_3D_H
-#define ILO_STATE_3D_H
-
-#include "genhw/genhw.h"
-#include "pipe/p_state.h"
-
-#include "ilo_core.h"
-#include "ilo_dev.h"
-
-/**
- * \see brw_context.h
- */
-#define ILO_MAX_DRAW_BUFFERS    8
-#define ILO_MAX_CONST_BUFFERS   (1 + 12)
-#define ILO_MAX_SAMPLER_VIEWS   16
-#define ILO_MAX_SAMPLERS        16
-#define ILO_MAX_SO_BINDINGS     64
-#define ILO_MAX_SO_BUFFERS      4
-#define ILO_MAX_VIEWPORTS       1
-
-#define ILO_MAX_SURFACES        256
-
-struct intel_bo;
-struct ilo_buffer;
-struct ilo_image;
-struct ilo_shader_state;
-
-struct ilo_vb_state {
-   struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS];
-   uint32_t enabled_mask;
-};
-
-struct ilo_ib_state {
-   struct pipe_resource *buffer;
-   const void *user_buffer;
-   unsigned offset;
-   unsigned index_size;
-
-   /* these are not valid until the state is finalized */
-   struct pipe_resource *hw_resource;
-   unsigned hw_index_size;
-   /* an offset to be added to pipe_draw_info::start */
-   int64_t draw_start_offset;
-};
-
-struct ilo_ve_cso {
-   /* VERTEX_ELEMENT_STATE */
-   uint32_t payload[2];
-};
-
-struct ilo_ve_state {
-   struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS];
-   unsigned count;
-
-   unsigned instance_divisors[PIPE_MAX_ATTRIBS];
-   unsigned vb_mapping[PIPE_MAX_ATTRIBS];
-   unsigned vb_count;
-
-   /* these are not valid until the state is finalized */
-   struct ilo_ve_cso edgeflag_cso;
-   bool last_cso_edgeflag;
-
-   struct ilo_ve_cso nosrc_cso;
-   bool prepend_nosrc_cso;
-};
-
-struct ilo_so_state {
-   struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS];
-   unsigned count;
-   unsigned append_bitmask;
-
-   bool enabled;
-};
-
-struct ilo_viewport_cso {
-   /* matrix form */
-   float m00, m11, m22, m30, m31, m32;
-
-   /* guardband in NDC space */
-   float min_gbx, min_gby, max_gbx, max_gby;
-
-   /* viewport in screen space */
-   float min_x, min_y, min_z;
-   float max_x, max_y, max_z;
-};
-
-struct ilo_viewport_state {
-   struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS];
-   unsigned count;
-
-   struct pipe_viewport_state viewport0;
-};
-
-struct ilo_scissor_state {
-   /* SCISSOR_RECT */
-   uint32_t payload[ILO_MAX_VIEWPORTS * 2];
-
-   struct pipe_scissor_state scissor0;
-};
-
-struct ilo_rasterizer_clip {
-   /* 3DSTATE_CLIP */
-   uint32_t payload[3];
-
-   uint32_t can_enable_guardband;
-};
-
-struct ilo_rasterizer_sf {
-   /* 3DSTATE_SF */
-   uint32_t payload[3];
-   uint32_t dw_msaa;
-
-   /* Global Depth Offset Constant/Scale/Clamp */
-   uint32_t dw_depth_offset_const;
-   uint32_t dw_depth_offset_scale;
-   uint32_t dw_depth_offset_clamp;
-
-   /* Gen8+ 3DSTATE_RASTER */
-   uint32_t dw_raster;
-};
-
-struct ilo_rasterizer_wm {
-   /* 3DSTATE_WM */
-   uint32_t payload[2];
-   uint32_t dw_msaa_rast;
-   uint32_t dw_msaa_disp;
-};
-
-struct ilo_rasterizer_state {
-   struct pipe_rasterizer_state state;
-
-   struct ilo_rasterizer_clip clip;
-   struct ilo_rasterizer_sf sf;
-   struct ilo_rasterizer_wm wm;
-};
-
-struct ilo_dsa_state {
-   /* DEPTH_STENCIL_STATE or Gen8+ 3DSTATE_WM_DEPTH_STENCIL */
-   uint32_t payload[3];
-
-   uint32_t dw_blend_alpha;
-   uint32_t dw_ps_blend_alpha;
-   ubyte alpha_ref;
-};
-
-struct ilo_blend_cso {
-   /* BLEND_STATE */
-   uint32_t payload[2];
-
-   uint32_t dw_blend;
-   uint32_t dw_blend_dst_alpha_forced_one;
-};
-
-struct ilo_blend_state {
-   struct ilo_blend_cso cso[ILO_MAX_DRAW_BUFFERS];
-
-   bool dual_blend;
-   bool alpha_to_coverage;
-
-   uint32_t dw_shared;
-   uint32_t dw_alpha_mod;
-   uint32_t dw_logicop;
-
-   /* a part of 3DSTATE_PS_BLEND */
-   uint32_t dw_ps_blend;
-   uint32_t dw_ps_blend_dst_alpha_forced_one;
-};
-
-struct ilo_sampler_cso {
-   /* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */
-   uint32_t payload[15];
-
-   uint32_t dw_filter;
-   uint32_t dw_filter_aniso;
-   uint32_t dw_wrap;
-   uint32_t dw_wrap_1d;
-   uint32_t dw_wrap_cube;
-
-   bool anisotropic;
-   bool saturate_r;
-   bool saturate_s;
-   bool saturate_t;
-};
-
-struct ilo_sampler_state {
-   const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS];
-};
-
-struct ilo_view_surface {
-   /* SURFACE_STATE */
-   uint32_t payload[13];
-   struct intel_bo *bo;
-
-   uint32_t scanout;
-};
-
-struct ilo_view_cso {
-   struct pipe_sampler_view base;
-
-   struct ilo_view_surface surface;
-};
-
-struct ilo_view_state {
-   struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS];
-   unsigned count;
-};
-
-struct ilo_cbuf_cso {
-   struct pipe_resource *resource;
-   struct ilo_view_surface surface;
-
-   /*
-    * this CSO is not so constant because user buffer needs to be uploaded in
-    * finalize_constant_buffers()
-    */
-   const void *user_buffer;
-   unsigned user_buffer_size;
-};
-
-struct ilo_cbuf_state {
-   struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS];
-   uint32_t enabled_mask;
-};
-
-struct ilo_resource_state {
-   struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES];
-   unsigned count;
-};
-
-struct ilo_surface_cso {
-   struct pipe_surface base;
-
-   bool is_rt;
-   union {
-      struct ilo_view_surface rt;
-      struct ilo_zs_surface {
-         uint32_t payload[12];
-         uint32_t dw_aligned_8x4;
-
-         struct intel_bo *bo;
-         struct intel_bo *hiz_bo;
-         struct intel_bo *separate_s8_bo;
-      } zs;
-   } u;
-};
-
-struct ilo_fb_state {
-   struct pipe_framebuffer_state state;
-
-   struct ilo_view_surface null_rt;
-   struct ilo_zs_surface null_zs;
-
-   struct ilo_fb_blend_caps {
-      bool can_logicop;
-      bool can_blend;
-      bool can_alpha_test;
-      bool dst_alpha_forced_one;
-   } blend_caps[PIPE_MAX_COLOR_BUFS];
-
-   unsigned num_samples;
-};
-
-struct ilo_shader_cso {
-   uint32_t payload[5];
-};
-
-/**
- * Translate a pipe texture target to the matching hardware surface type.
- */
-static inline int
-ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
-{
-   switch (target) {
-   case PIPE_BUFFER:
-      return GEN6_SURFTYPE_BUFFER;
-   case PIPE_TEXTURE_1D:
-   case PIPE_TEXTURE_1D_ARRAY:
-      return GEN6_SURFTYPE_1D;
-   case PIPE_TEXTURE_2D:
-   case PIPE_TEXTURE_RECT:
-   case PIPE_TEXTURE_2D_ARRAY:
-      return GEN6_SURFTYPE_2D;
-   case PIPE_TEXTURE_3D:
-      return GEN6_SURFTYPE_3D;
-   case PIPE_TEXTURE_CUBE:
-   case PIPE_TEXTURE_CUBE_ARRAY:
-      return GEN6_SURFTYPE_CUBE;
-   default:
-      assert(!"unknown texture target");
-      return GEN6_SURFTYPE_BUFFER;
-   }
-}
-
-void
-ilo_gpe_init_ve(const struct ilo_dev *dev,
-                unsigned num_states,
-                const struct pipe_vertex_element *states,
-                struct ilo_ve_state *ve);
-
-void
-ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
-                        struct ilo_ve_cso *cso);
-
-void
-ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
-                      int comp0, int comp1, int comp2, int comp3,
-                      struct ilo_ve_cso *cso);
-
-void
-ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
-                         const struct pipe_viewport_state *state,
-                         struct ilo_viewport_cso *vp);
-
-void
-ilo_gpe_set_scissor(const struct ilo_dev *dev,
-                    unsigned start_slot,
-                    unsigned num_states,
-                    const struct pipe_scissor_state *states,
-                    struct ilo_scissor_state *scissor);
-
-void
-ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
-                         struct ilo_scissor_state *scissor);
-
-void
-ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
-                        const struct pipe_rasterizer_state *state,
-                        struct ilo_rasterizer_state *rasterizer);
-void
-ilo_gpe_init_dsa(const struct ilo_dev *dev,
-                 const struct pipe_depth_stencil_alpha_state *state,
-                 struct ilo_dsa_state *dsa);
-
-void
-ilo_gpe_init_blend(const struct ilo_dev *dev,
-                   const struct pipe_blend_state *state,
-                   struct ilo_blend_state *blend);
-
-void
-ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
-                         const struct pipe_sampler_state *state,
-                         struct ilo_sampler_cso *sampler);
-
-void
-ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
-                               unsigned width, unsigned height,
-                               unsigned depth, unsigned level,
-                               struct ilo_view_surface *surf);
-
-void
-ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
-                                     const struct ilo_buffer *buf,
-                                     unsigned offset, unsigned size,
-                                     unsigned struct_size,
-                                     enum pipe_format elem_format,
-                                     bool is_rt, bool render_cache_rw,
-                                     struct ilo_view_surface *surf);
-
-void
-ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev,
-                                    const struct ilo_image *img,
-                                    enum pipe_texture_target target,
-                                    enum pipe_format format,
-                                    unsigned first_level,
-                                    unsigned num_levels,
-                                    unsigned first_layer,
-                                    unsigned num_layers,
-                                    bool is_rt,
-                                    struct ilo_view_surface *surf);
-
-void
-ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
-                        const struct ilo_image *img,
-                        const struct ilo_image *s8_img,
-                        enum pipe_texture_target target,
-                        enum pipe_format format, unsigned level,
-                        unsigned first_layer, unsigned num_layers,
-                        struct ilo_zs_surface *zs);
-
-void
-ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *vs,
-                    struct ilo_shader_cso *cso);
-
-void
-ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *gs,
-                    struct ilo_shader_cso *cso);
-
-void
-ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *fs,
-                    struct ilo_shader_cso *cso);
-
-void
-ilo_gpe_set_fb(const struct ilo_dev *dev,
-               const struct pipe_framebuffer_state *state,
-               struct ilo_fb_state *fb);
-
-#endif /* ILO_STATE_3D_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_3d_top.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_3d_top.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_3d_top.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_3d_top.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,1716 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2014 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chia-I Wu <olv@lunarg.com>
- */
-
-#include "genhw/genhw.h"
-#include "util/u_dual_blend.h"
-#include "util/u_framebuffer.h"
-#include "util/u_half.h"
-#include "util/u_resource.h"
-
-#include "ilo_buffer.h"
-#include "ilo_format.h"
-#include "ilo_image.h"
-#include "ilo_state_3d.h"
-#include "../ilo_shader.h"
-
-static void
-ve_init_cso(const struct ilo_dev *dev,
-            const struct pipe_vertex_element *state,
-            unsigned vb_index,
-            struct ilo_ve_cso *cso)
-{
-   int comp[4] = {
-      GEN6_VFCOMP_STORE_SRC,
-      GEN6_VFCOMP_STORE_SRC,
-      GEN6_VFCOMP_STORE_SRC,
-      GEN6_VFCOMP_STORE_SRC,
-   };
-   int format;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   switch (util_format_get_nr_components(state->src_format)) {
-   case 1: comp[1] = GEN6_VFCOMP_STORE_0;
-   case 2: comp[2] = GEN6_VFCOMP_STORE_0;
-   case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
-                     GEN6_VFCOMP_STORE_1_INT :
-                     GEN6_VFCOMP_STORE_1_FP;
-   }
-
-   format = ilo_format_translate_vertex(dev, state->src_format);
-
-   STATIC_ASSERT(Elements(cso->payload) >= 2);
-   cso->payload[0] =
-      vb_index << GEN6_VE_DW0_VB_INDEX__SHIFT |
-      GEN6_VE_DW0_VALID |
-      format << GEN6_VE_DW0_FORMAT__SHIFT |
-      state->src_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT;
-
-   cso->payload[1] =
-         comp[0] << GEN6_VE_DW1_COMP0__SHIFT |
-         comp[1] << GEN6_VE_DW1_COMP1__SHIFT |
-         comp[2] << GEN6_VE_DW1_COMP2__SHIFT |
-         comp[3] << GEN6_VE_DW1_COMP3__SHIFT;
-}
-
-void
-ilo_gpe_init_ve(const struct ilo_dev *dev,
-                unsigned num_states,
-                const struct pipe_vertex_element *states,
-                struct ilo_ve_state *ve)
-{
-   unsigned i;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   ve->count = num_states;
-   ve->vb_count = 0;
-
-   for (i = 0; i < num_states; i++) {
-      const unsigned pipe_idx = states[i].vertex_buffer_index;
-      const unsigned instance_divisor = states[i].instance_divisor;
-      unsigned hw_idx;
-
-      /*
-       * map the pipe vb to the hardware vb, which has a fixed instance
-       * divisor
-       */
-      for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
-         if (ve->vb_mapping[hw_idx] == pipe_idx &&
-             ve->instance_divisors[hw_idx] == instance_divisor)
-            break;
-      }
-
-      /* create one if there is no matching hardware vb */
-      if (hw_idx >= ve->vb_count) {
-         hw_idx = ve->vb_count++;
-
-         ve->vb_mapping[hw_idx] = pipe_idx;
-         ve->instance_divisors[hw_idx] = instance_divisor;
-      }
-
-      ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
-   }
-}
-
-void
-ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
-                        struct ilo_ve_cso *cso)
-{
-   int format;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 94:
-    *
-    *     "- This bit (Edge Flag Enable) must only be ENABLED on the last
-    *        valid VERTEX_ELEMENT structure.
-    *
-    *      - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
-    *        and Component 1-3 Control must be set to VFCOMP_NOSTORE.
-    *
-    *      - The Source Element Format must be set to the UINT format.
-    *
-    *      - [DevSNB]: Edge Flags are not supported for QUADLIST
-    *        primitives.  Software may elect to convert QUADLIST primitives
-    *        to some set of corresponding edge-flag-supported primitive
-    *        types (e.g., POLYGONs) prior to submission to the 3D pipeline."
-    */
-   cso->payload[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE;
-
-   /*
-    * Edge flags have format GEN6_FORMAT_R8_USCALED when defined via
-    * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
-    * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
-    *
-    * Since all the hardware cares about is whether the flags are zero or not,
-    * we can treat them as the corresponding _UINT formats.
-    */
-   format = GEN_EXTRACT(cso->payload[0], GEN6_VE_DW0_FORMAT);
-   cso->payload[0] &= ~GEN6_VE_DW0_FORMAT__MASK;
-
-   switch (format) {
-   case GEN6_FORMAT_R32_FLOAT:
-      format = GEN6_FORMAT_R32_UINT;
-      break;
-   case GEN6_FORMAT_R8_USCALED:
-      format = GEN6_FORMAT_R8_UINT;
-      break;
-   default:
-      break;
-   }
-
-   cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_DW0_FORMAT);
-
-   cso->payload[1] =
-         GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT |
-         GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT |
-         GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT |
-         GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT;
-}
-
-void
-ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
-                          int comp0, int comp1, int comp2, int comp3,
-                          struct ilo_ve_cso *cso)
-{
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   STATIC_ASSERT(Elements(cso->payload) >= 2);
-
-   assert(comp0 != GEN6_VFCOMP_STORE_SRC &&
-          comp1 != GEN6_VFCOMP_STORE_SRC &&
-          comp2 != GEN6_VFCOMP_STORE_SRC &&
-          comp3 != GEN6_VFCOMP_STORE_SRC);
-
-   cso->payload[0] = GEN6_VE_DW0_VALID;
-   cso->payload[1] =
-         comp0 << GEN6_VE_DW1_COMP0__SHIFT |
-         comp1 << GEN6_VE_DW1_COMP1__SHIFT |
-         comp2 << GEN6_VE_DW1_COMP2__SHIFT |
-         comp3 << GEN6_VE_DW1_COMP3__SHIFT;
-}
-
-void
-ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *vs,
-                    struct ilo_shader_cso *cso)
-{
-   int start_grf, vue_read_len, sampler_count, max_threads;
-   uint32_t dw2, dw4, dw5;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
-   vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
-   sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT);
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 135:
-    *
-    *     "(Vertex URB Entry Read Length) Specifies the number of pairs of
-    *      128-bit vertex elements to be passed into the payload for each
-    *      vertex."
-    *
-    *     "It is UNDEFINED to set this field to 0 indicating no Vertex URB
-    *      data to be read and passed to the thread."
-    */
-   vue_read_len = (vue_read_len + 1) / 2;
-   if (!vue_read_len)
-      vue_read_len = 1;
-
-   max_threads = dev->thread_count;
-   if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2)
-      max_threads *= 2;
-
-   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
-   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
-   dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT |
-         vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
-         0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
-
-   dw5 = GEN6_VS_DW5_STATISTICS |
-         GEN6_VS_DW5_VS_ENABLE;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
-      dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
-   else
-      dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
-
-   STATIC_ASSERT(Elements(cso->payload) >= 3);
-   cso->payload[0] = dw2;
-   cso->payload[1] = dw4;
-   cso->payload[2] = dw5;
-}
-
-static void
-gs_init_cso_gen6(const struct ilo_dev *dev,
-                 const struct ilo_shader_state *gs,
-                 struct ilo_shader_cso *cso)
-{
-   int start_grf, vue_read_len, max_threads;
-   uint32_t dw2, dw4, dw5, dw6;
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
-      start_grf = ilo_shader_get_kernel_param(gs,
-            ILO_KERNEL_URB_DATA_START_REG);
-
-      vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
-   }
-   else {
-      start_grf = ilo_shader_get_kernel_param(gs,
-            ILO_KERNEL_VS_GEN6_SO_START_REG);
-
-      vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
-   }
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 153:
-    *
-    *     "Specifies the amount of URB data read and passed in the thread
-    *      payload for each Vertex URB entry, in 256-bit register increments.
-    *
-    *      It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
-    *      0 indicating no Vertex URB data to be read and passed to the
-    *      thread."
-    */
-   vue_read_len = (vue_read_len + 1) / 2;
-   if (!vue_read_len)
-      vue_read_len = 1;
-
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 154:
-    *
-    *     "Maximum Number of Threads valid range is [0,27] when Rendering
-    *      Enabled bit is set."
-    *
-    * From the Sandy Bridge PRM, volume 2 part 1, page 173:
-    *
-    *     "Programming Note: If the GS stage is enabled, software must always
-    *      allocate at least one GS URB Entry. This is true even if the GS
-    *      thread never needs to output vertices to the pipeline, e.g., when
-    *      only performing stream output. This is an artifact of the need to
-    *      pass the GS thread an initial destination URB handle."
-    *
-    * As such, we always enable rendering, and limit the number of threads.
-    */
-   if (dev->gt == 2) {
-      /* maximum is 60, but limited to 28 */
-      max_threads = 28;
-   }
-   else {
-      /* maximum is 24, but limited to 21 (see brwCreateContext()) */
-      max_threads = 21;
-   }
-
-   dw2 = GEN6_THREADDISP_SPF;
-
-   dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
-         0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
-         start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT;
-
-   dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT |
-         GEN6_GS_DW5_STATISTICS |
-         GEN6_GS_DW5_SO_STATISTICS |
-         GEN6_GS_DW5_RENDER_ENABLE;
-
-   /*
-    * we cannot make use of GEN6_GS_REORDER because it will reorder
-    * triangle strips according to D3D rules (triangle 2N+1 uses vertices
-    * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
-    * (2N+2, 2N+1, 2N+3)).
-    */
-   dw6 = GEN6_GS_DW6_GS_ENABLE;
-
-   if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
-      dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY;
-
-   if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
-      const uint32_t svbi_post_inc =
-         ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
-
-      dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
-      if (svbi_post_inc) {
-         dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
-                svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
-      }
-   }
-
-   STATIC_ASSERT(Elements(cso->payload) >= 4);
-   cso->payload[0] = dw2;
-   cso->payload[1] = dw4;
-   cso->payload[2] = dw5;
-   cso->payload[3] = dw6;
-}
-
-static void
-gs_init_cso_gen7(const struct ilo_dev *dev,
-                 const struct ilo_shader_state *gs,
-                 struct ilo_shader_cso *cso)
-{
-   int start_grf, vue_read_len, sampler_count, max_threads;
-   uint32_t dw2, dw4, dw5;
-
-   ILO_DEV_ASSERT(dev, 7, 7.5);
-
-   start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
-   vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
-   sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT);
-
-   /* in pairs */
-   vue_read_len = (vue_read_len + 1) / 2;
-
-   switch (ilo_dev_gen(dev)) {
-   case ILO_GEN(7.5):
-      max_threads = (dev->gt >= 2) ? 256 : 70;
-      break;
-   case ILO_GEN(7):
-      max_threads = (dev->gt == 2) ? 128 : 36;
-      break;
-   default:
-      max_threads = 1;
-      break;
-   }
-
-   dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
-   dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
-   dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
-         GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
-         0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
-         start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT;
-
-   dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT |
-         GEN7_GS_DW5_STATISTICS |
-         GEN7_GS_DW5_GS_ENABLE;
-
-   STATIC_ASSERT(Elements(cso->payload) >= 3);
-   cso->payload[0] = dw2;
-   cso->payload[1] = dw4;
-   cso->payload[2] = dw5;
-}
-
-void
-ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
-                    const struct ilo_shader_state *gs,
-                    struct ilo_shader_cso *cso)
-{
-   if (ilo_dev_gen(dev) >= ILO_GEN(7))
-      gs_init_cso_gen7(dev, gs, cso);
-   else
-      gs_init_cso_gen6(dev, gs, cso);
-}
-
-static void
-view_init_null_gen6(const struct ilo_dev *dev,
-                    unsigned width, unsigned height,
-                    unsigned depth, unsigned level,
-                    struct ilo_view_surface *surf)
-{
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   assert(width >= 1 && height >= 1 && depth >= 1);
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 71:
-    *
-    *     "A null surface will be used in instances where an actual surface is
-    *      not bound. When a write message is generated to a null surface, no
-    *      actual surface is written to. When a read message (including any
-    *      sampling engine message) is generated to a null surface, the result
-    *      is all zeros. Note that a null surface type is allowed to be used
-    *      with all messages, even if it is not specificially indicated as
-    *      supported. All of the remaining fields in surface state are ignored
-    *      for null surfaces, with the following exceptions:
-    *
-    *        * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
-    *          depth buffer's corresponding state for all render target
-    *          surfaces, including null.
-    *        * Surface Format must be R8G8B8A8_UNORM."
-    *
-    * From the Sandy Bridge PRM, volume 4 part 1, page 82:
-    *
-    *     "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
-    *      true"
-    */
-
-   STATIC_ASSERT(Elements(surf->payload) >= 6);
-   dw = surf->payload;
-
-   dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT |
-           GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT;
-
-   dw[1] = 0;
-
-   dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
-           (width  - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
-           level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
-
-   dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
-           GEN6_TILING_X;
-
-   dw[4] = 0;
-   dw[5] = 0;
-}
-
-static void
-view_init_for_buffer_gen6(const struct ilo_dev *dev,
-                          const struct ilo_buffer *buf,
-                          unsigned offset, unsigned size,
-                          unsigned struct_size,
-                          enum pipe_format elem_format,
-                          bool is_rt, bool render_cache_rw,
-                          struct ilo_view_surface *surf)
-{
-   const int elem_size = util_format_get_blocksize(elem_format);
-   int width, height, depth, pitch;
-   int surface_format, num_entries;
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   /*
-    * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
-    * structure in a buffer.
-    */
-
-   surface_format = ilo_format_translate_color(dev, elem_format);
-
-   num_entries = size / struct_size;
-   /* see if there is enough space to fit another element */
-   if (size % struct_size >= elem_size)
-      num_entries++;
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 76:
-    *
-    *     "For SURFTYPE_BUFFER render targets, this field (Surface Base
-    *      Address) specifies the base address of first element of the
-    *      surface. The surface is interpreted as a simple array of that
-    *      single element type. The address must be naturally-aligned to the
-    *      element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
-    *      must be 16-byte aligned).
-    *
-    *      For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
-    *      the base address of the first element of the surface, computed in
-    *      software by adding the surface base address to the byte offset of
-    *      the element in the buffer."
-    */
-   if (is_rt)
-      assert(offset % elem_size == 0);
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 77:
-    *
-    *     "For buffer surfaces, the number of entries in the buffer ranges
-    *      from 1 to 2^27."
-    */
-   assert(num_entries >= 1 && num_entries <= 1 << 27);
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 81:
-    *
-    *     "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
-    *      indicates the size of the structure."
-    */
-   pitch = struct_size;
-
-   pitch--;
-   num_entries--;
-   /* bits [6:0] */
-   width  = (num_entries & 0x0000007f);
-   /* bits [19:7] */
-   height = (num_entries & 0x000fff80) >> 7;
-   /* bits [26:20] */
-   depth  = (num_entries & 0x07f00000) >> 20;
-
-   STATIC_ASSERT(Elements(surf->payload) >= 6);
-   dw = surf->payload;
-
-   dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT |
-           surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT;
-   if (render_cache_rw)
-      dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
-
-   dw[1] = offset;
-
-   dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
-           width << GEN6_SURFACE_DW2_WIDTH__SHIFT;
-
-   dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
-           pitch << GEN6_SURFACE_DW3_PITCH__SHIFT;
-
-   dw[4] = 0;
-   dw[5] = 0;
-}
-
-static void
-view_init_for_image_gen6(const struct ilo_dev *dev,
-                         const struct ilo_image *img,
-                         enum pipe_texture_target target,
-                         enum pipe_format format,
-                         unsigned first_level,
-                         unsigned num_levels,
-                         unsigned first_layer,
-                         unsigned num_layers,
-                         bool is_rt,
-                         struct ilo_view_surface *surf)
-{
-   int surface_type, surface_format;
-   int width, height, depth, pitch, lod;
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   surface_type = ilo_gpe_gen6_translate_texture(target);
-   assert(surface_type != GEN6_SURFTYPE_BUFFER);
-
-   if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil)
-      format = PIPE_FORMAT_Z32_FLOAT;
-
-   if (is_rt)
-      surface_format = ilo_format_translate_render(dev, format);
-   else
-      surface_format = ilo_format_translate_texture(dev, format);
-   assert(surface_format >= 0);
-
-   width = img->width0;
-   height = img->height0;
-   depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers;
-   pitch = img->bo_stride;
-
-   if (surface_type == GEN6_SURFTYPE_CUBE) {
-      /*
-       * From the Sandy Bridge PRM, volume 4 part 1, page 81:
-       *
-       *     "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
-       *      range of this field (Depth) is [0,84], indicating the number of
-       *      cube array elements (equal to the number of underlying 2D array
-       *      elements divided by 6). For other surfaces, this field must be
-       *      zero."
-       *
-       * When is_rt is true, we treat the texture as a 2D one to avoid the
-       * restriction.
-       */
-      if (is_rt) {
-         surface_type = GEN6_SURFTYPE_2D;
-      }
-      else {
-         assert(num_layers % 6 == 0);
-         depth = num_layers / 6;
-      }
-   }
-
-   /* sanity check the size */
-   assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
-   switch (surface_type) {
-   case GEN6_SURFTYPE_1D:
-      assert(width <= 8192 && height == 1 && depth <= 512);
-      assert(first_layer < 512 && num_layers <= 512);
-      break;
-   case GEN6_SURFTYPE_2D:
-      assert(width <= 8192 && height <= 8192 && depth <= 512);
-      assert(first_layer < 512 && num_layers <= 512);
-      break;
-   case GEN6_SURFTYPE_3D:
-      assert(width <= 2048 && height <= 2048 && depth <= 2048);
-      assert(first_layer < 2048 && num_layers <= 512);
-      if (!is_rt)
-         assert(first_layer == 0);
-      break;
-   case GEN6_SURFTYPE_CUBE:
-      assert(width <= 8192 && height <= 8192 && depth <= 85);
-      assert(width == height);
-      assert(first_layer < 512 && num_layers <= 512);
-      if (is_rt)
-         assert(first_layer == 0);
-      break;
-   default:
-      assert(!"unexpected surface type");
-      break;
-   }
-
-   /* non-full array spacing is supported only on GEN7+ */
-   assert(img->walk != ILO_IMAGE_WALK_LOD);
-   /* non-interleaved samples are supported only on GEN7+ */
-   if (img->sample_count > 1)
-      assert(img->interleaved_samples);
-
-   if (is_rt) {
-      assert(num_levels == 1);
-      lod = first_level;
-   }
-   else {
-      lod = num_levels - 1;
-   }
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 76:
-    *
-    *     "Linear render target surface base addresses must be element-size
-    *      aligned, for non-YUV surface formats, or a multiple of 2
-    *      element-sizes for YUV surface formats. Other linear surfaces have
-    *      no alignment requirements (byte alignment is sufficient.)"
-    *
-    * From the Sandy Bridge PRM, volume 4 part 1, page 81:
-    *
-    *     "For linear render target surfaces, the pitch must be a multiple
-    *      of the element size for non-YUV surface formats. Pitch must be a
-    *      multiple of 2 * element size for YUV surface formats."
-    *
-    * From the Sandy Bridge PRM, volume 4 part 1, page 86:
-    *
-    *     "For linear surfaces, this field (X Offset) must be zero"
-    */
-   if (img->tiling == GEN6_TILING_NONE) {
-      if (is_rt) {
-         const int elem_size = util_format_get_blocksize(format);
-         assert(pitch % elem_size == 0);
-      }
-   }
-
-   STATIC_ASSERT(Elements(surf->payload) >= 6);
-   dw = surf->payload;
-
-   dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT |
-           surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
-           GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
-
-   if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) {
-      dw[0] |= 1 << 9 |
-               GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
-   }
-
-   if (is_rt)
-      dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
-
-   dw[1] = 0;
-
-   dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
-           (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
-           lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
-
-   assert(img->tiling != GEN8_TILING_W);
-   dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
-           (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT |
-           img->tiling;
-
-   dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT |
-           first_layer << 17 |
-           (num_layers - 1) << 8 |
-           ((img->sample_count > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 :
-                                      GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1);
-
-   dw[5] = 0;
-
-   assert(img->align_j == 2 || img->align_j == 4);
-   if (img->align_j == 4)
-      dw[5] |= GEN6_SURFACE_DW5_VALIGN_4;
-}
-
-static void
-view_init_null_gen7(const struct ilo_dev *dev,
-                    unsigned width, unsigned height,
-                    unsigned depth, unsigned level,
-                    struct ilo_view_surface *surf)
-{
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(dev, 7, 8);
-
-   assert(width >= 1 && height >= 1 && depth >= 1);
-
-   /*
-    * From the Ivy Bridge PRM, volume 4 part 1, page 62:
-    *
-    *     "A null surface is used in instances where an actual surface is not
-    *      bound. When a write message is generated to a null surface, no
-    *      actual surface is written to. When a read message (including any
-    *      sampling engine message) is generated to a null surface, the result
-    *      is all zeros.  Note that a null surface type is allowed to be used
-    *      with all messages, even if it is not specificially indicated as
-    *      supported. All of the remaining fields in surface state are ignored
-    *      for null surfaces, with the following exceptions:
-    *
-    *      * Width, Height, Depth, LOD, and Render Target View Extent fields
-    *        must match the depth buffer's corresponding state for all render
-    *        target surfaces, including null.
-    *      * All sampling engine and data port messages support null surfaces
-    *        with the above behavior, even if not mentioned as specifically
-    *        supported, except for the following:
-    *        * Data Port Media Block Read/Write messages.
-    *      * The Surface Type of a surface used as a render target (accessed
-    *        via the Data Port's Render Target Write message) must be the same
-    *        as the Surface Type of all other render targets and of the depth
-    *        buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
-    *        buffer or render targets are SURFTYPE_NULL."
-    *
-    * From the Ivy Bridge PRM, volume 4 part 1, page 65:
-    *
-    *     "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
-    *      true"
-    */
-
-   STATIC_ASSERT(Elements(surf->payload) >= 13);
-   dw = surf->payload;
-
-   dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT |
-           GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8))
-      dw[0] |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT;
-   else
-      dw[0] |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT;
-
-   dw[1] = 0;
-
-   dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
-           GEN_SHIFT32(width  - 1, GEN7_SURFACE_DW2_WIDTH);
-
-   dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH);
-
-   dw[4] = 0;
-   dw[5] = level;
-
-   dw[6] = 0;
-   dw[7] = 0;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8))
-      memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
-}
-
-static void
-view_init_for_buffer_gen7(const struct ilo_dev *dev,
-                          const struct ilo_buffer *buf,
-                          unsigned offset, unsigned size,
-                          unsigned struct_size,
-                          enum pipe_format elem_format,
-                          bool is_rt, bool render_cache_rw,
-                          struct ilo_view_surface *surf)
-{
-   const bool typed = (elem_format != PIPE_FORMAT_NONE);
-   const bool structured = (!typed && struct_size > 1);
-   const int elem_size = (typed) ?
-      util_format_get_blocksize(elem_format) : 1;
-   int width, height, depth, pitch;
-   int surface_type, surface_format, num_entries;
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(dev, 7, 8);
-
-   surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;
-
-   surface_format = (typed) ?
-      ilo_format_translate_color(dev, elem_format) : GEN6_FORMAT_RAW;
-
-   num_entries = size / struct_size;
-   /* see if there is enough space to fit another element */
-   if (size % struct_size >= elem_size && !structured)
-      num_entries++;
-
-   /*
-    * From the Ivy Bridge PRM, volume 4 part 1, page 67:
-    *
-    *     "For SURFTYPE_BUFFER render targets, this field (Surface Base
-    *      Address) specifies the base address of first element of the
-    *      surface. The surface is interpreted as a simple array of that
-    *      single element type. The address must be naturally-aligned to the
-    *      element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
-    *      must be 16-byte aligned)
-    *
-    *      For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
-    *      the base address of the first element of the surface, computed in
-    *      software by adding the surface base address to the byte offset of
-    *      the element in the buffer."
-    */
-   if (is_rt)
-      assert(offset % elem_size == 0);
-
-   /*
-    * From the Ivy Bridge PRM, volume 4 part 1, page 68:
-    *
-    *     "For typed buffer and structured buffer surfaces, the number of
-    *      entries in the buffer ranges from 1 to 2^27.  For raw buffer
-    *      surfaces, the number of entries in the buffer is the number of
-    *      bytes which can range from 1 to 2^30."
-    */
-   assert(num_entries >= 1 &&
-          num_entries <= 1 << ((typed || structured) ? 27 : 30));
-
-   /*
-    * From the Ivy Bridge PRM, volume 4 part 1, page 69:
-    *
-    *     "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
-    *      11 if the Surface Format is RAW (the size of the buffer must be a
-    *      multiple of 4 bytes)."
-    *
-    * From the Ivy Bridge PRM, volume 4 part 1, page 70:
-    *
-    *     "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
-    *      field (Surface Pitch) indicates the size of the structure."
-    *
-    *     "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
-    *      must be a multiple of 4 bytes."
-    */
-   if (structured)
-      assert(struct_size % 4 == 0);
-   else if (!typed)
-      assert(num_entries % 4 == 0);
-
-   pitch = struct_size;
-
-   pitch--;
-   num_entries--;
-   /* bits [6:0] */
-   width  = (num_entries & 0x0000007f);
-   /* bits [20:7] */
-   height = (num_entries & 0x001fff80) >> 7;
-   /* bits [30:21] */
-   depth  = (num_entries & 0x7fe00000) >> 21;
-   /* limit to [26:21] */
-   if (typed || structured)
-      depth &= 0x3f;
-
-   STATIC_ASSERT(Elements(surf->payload) >= 13);
-   dw = surf->payload;
-
-   dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
-           surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
-   if (render_cache_rw)
-      dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      dw[8] = offset;
-      memset(&dw[9], 0, sizeof(*dw) * (13 - 9));
-   } else {
-      dw[1] = offset;
-   }
-
-   dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) |
-           GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH);
-
-   dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) |
-           pitch;
-
-   dw[4] = 0;
-   dw[5] = 0;
-
-   dw[6] = 0;
-   dw[7] = 0;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
-      dw[7] |= GEN_SHIFT32(GEN75_SCS_RED,   GEN75_SURFACE_DW7_SCS_R) |
-               GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
-               GEN_SHIFT32(GEN75_SCS_BLUE,  GEN75_SURFACE_DW7_SCS_B) |
-               GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
-   }
-}
-
-static void
-view_init_for_image_gen7(const struct ilo_dev *dev,
-                         const struct ilo_image *img,
-                         enum pipe_texture_target target,
-                         enum pipe_format format,
-                         unsigned first_level,
-                         unsigned num_levels,
-                         unsigned first_layer,
-                         unsigned num_layers,
-                         bool is_rt,
-                         struct ilo_view_surface *surf)
-{
-   int surface_type, surface_format;
-   int width, height, depth, pitch, lod;
-   uint32_t *dw;
-
-   ILO_DEV_ASSERT(dev, 7, 8);
-
-   surface_type = ilo_gpe_gen6_translate_texture(target);
-   assert(surface_type != GEN6_SURFTYPE_BUFFER);
-
-   if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && img->separate_stencil)
-      format = PIPE_FORMAT_Z32_FLOAT;
-
-   if (is_rt)
-      surface_format = ilo_format_translate_render(dev, format);
-   else
-      surface_format = ilo_format_translate_texture(dev, format);
-   assert(surface_format >= 0);
-
-   width = img->width0;
-   height = img->height0;
-   depth = (target == PIPE_TEXTURE_3D) ? img->depth0 : num_layers;
-   pitch = img->bo_stride;
-
-   if (surface_type == GEN6_SURFTYPE_CUBE) {
-      /*
-       * From the Ivy Bridge PRM, volume 4 part 1, page 70:
-       *
-       *     "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
-       *      this field is [0,340], indicating the number of cube array
-       *      elements (equal to the number of underlying 2D array elements
-       *      divided by 6). For other surfaces, this field must be zero."
-       *
-       * When is_rt is true, we treat the texture as a 2D one to avoid the
-       * restriction.
-       */
-      if (is_rt) {
-         surface_type = GEN6_SURFTYPE_2D;
-      }
-      else {
-         assert(num_layers % 6 == 0);
-         depth = num_layers / 6;
-      }
-   }
-
-   /* sanity check the size */
-   assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
-   assert(first_layer < 2048 && num_layers <= 2048);
-   switch (surface_type) {
-   case GEN6_SURFTYPE_1D:
-      assert(width <= 16384 && height == 1 && depth <= 2048);
-      break;
-   case GEN6_SURFTYPE_2D:
-      assert(width <= 16384 && height <= 16384 && depth <= 2048);
-      break;
-   case GEN6_SURFTYPE_3D:
-      assert(width <= 2048 && height <= 2048 && depth <= 2048);
-      if (!is_rt)
-         assert(first_layer == 0);
-      break;
-   case GEN6_SURFTYPE_CUBE:
-      assert(width <= 16384 && height <= 16384 && depth <= 86);
-      assert(width == height);
-      if (is_rt)
-         assert(first_layer == 0);
-      break;
-   default:
-      assert(!"unexpected surface type");
-      break;
-   }
-
-   if (is_rt) {
-      assert(num_levels == 1);
-      lod = first_level;
-   }
-   else {
-      lod = num_levels - 1;
-   }
-
-   /*
-    * From the Ivy Bridge PRM, volume 4 part 1, page 68:
-    *
-    *     "The Base Address for linear render target surfaces and surfaces
-    *      accessed with the typed surface read/write data port messages must
-    *      be element-size aligned, for non-YUV surface formats, or a multiple
-    *      of 2 element-sizes for YUV surface formats.  Other linear surfaces
-    *      have no alignment requirements (byte alignment is sufficient)."
-    *
-    * From the Ivy Bridge PRM, volume 4 part 1, page 70:
-    *
-    *     "For linear render target surfaces and surfaces accessed with the
-    *      typed data port messages, the pitch must be a multiple of the
-    *      element size for non-YUV surface formats. Pitch must be a multiple
-    *      of 2 * element size for YUV surface formats. For linear surfaces
-    *      with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
-    *      of 4 bytes.For other linear surfaces, the pitch can be any multiple
-    *      of bytes."
-    *
-    * From the Ivy Bridge PRM, volume 4 part 1, page 74:
-    *
-    *     "For linear surfaces, this field (X Offset) must be zero."
-    */
-   if (img->tiling == GEN6_TILING_NONE) {
-      if (is_rt) {
-         const int elem_size = util_format_get_blocksize(format);
-         assert(pitch % elem_size == 0);
-      }
-   }
-
-   STATIC_ASSERT(Elements(surf->payload) >= 13);
-   dw = surf->payload;
-
-   dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
-           surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
-
-   /*
-    * From the Ivy Bridge PRM, volume 4 part 1, page 63:
-    *
-    *     "If this field (Surface Array) is enabled, the Surface Type must be
-    *      SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
-    *      disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
-    *      SURFTYPE_CUBE, the Depth field must be set to zero."
-    *
-    * For non-3D sampler surfaces, resinfo (the sampler message) always
-    * returns zero for the number of layers when this field is not set.
-    */
-   if (surface_type != GEN6_SURFTYPE_3D) {
-      switch (target) {
-      case PIPE_TEXTURE_1D_ARRAY:
-      case PIPE_TEXTURE_2D_ARRAY:
-      case PIPE_TEXTURE_CUBE_ARRAY:
-         dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY;
-         break;
-      default:
-         assert(depth == 1);
-         break;
-      }
-   }
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      switch (img->align_j) {
-      case 4:
-         dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
-         break;
-      case 8:
-         dw[0] |= GEN8_SURFACE_DW0_VALIGN_8;
-         break;
-      case 16:
-         dw[0] |= GEN8_SURFACE_DW0_VALIGN_16;
-         break;
-      default:
-         assert(!"unsupported valign");
-         break;
-      }
-
-      switch (img->align_i) {
-      case 4:
-         dw[0] |= GEN8_SURFACE_DW0_HALIGN_4;
-         break;
-      case 8:
-         dw[0] |= GEN8_SURFACE_DW0_HALIGN_8;
-         break;
-      case 16:
-         dw[0] |= GEN8_SURFACE_DW0_HALIGN_16;
-         break;
-      default:
-         assert(!"unsupported halign");
-         break;
-      }
-
-      dw[0] |= img->tiling << GEN8_SURFACE_DW0_TILING__SHIFT;
-   } else {
-      assert(img->align_i == 4 || img->align_i == 8);
-      assert(img->align_j == 2 || img->align_j == 4);
-
-      if (img->align_j == 4)
-         dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
-
-      if (img->align_i == 8)
-         dw[0] |= GEN7_SURFACE_DW0_HALIGN_8;
-
-      assert(img->tiling != GEN8_TILING_W);
-      dw[0] |= img->tiling << GEN7_SURFACE_DW0_TILING__SHIFT;
-
-      if (img->walk == ILO_IMAGE_WALK_LOD)
-         dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
-      else
-         dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL;
-   }
-
-   if (is_rt)
-      dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
-
-   if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt)
-      dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
-      assert(img->walk_layer_height % 4 == 0);
-      dw[1] = img->walk_layer_height / 4;
-   } else {
-      dw[1] = 0;
-   }
-
-   dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
-           GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
-
-   dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) |
-           (pitch - 1);
-
-   dw[4] = first_layer << 18 |
-           (num_layers - 1) << 7;
-
-   /*
-    * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
-    * means the samples are interleaved.  The layouts are the same when the
-    * number of samples is 1.
-    */
-   if (img->interleaved_samples && img->sample_count > 1) {
-      assert(!is_rt);
-      dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
-   }
-   else {
-      dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS;
-   }
-
-   switch (img->sample_count) {
-   case 0:
-   case 1:
-   default:
-      dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1;
-      break;
-   case 2:
-      dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2;
-      break;
-   case 4:
-      dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4;
-      break;
-   case 8:
-      dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8;
-      break;
-   case 16:
-      dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16;
-      break;
-   }
-
-   dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) |
-           lod;
-
-   dw[6] = 0;
-   dw[7] = 0;
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
-      dw[7] |= GEN_SHIFT32(GEN75_SCS_RED,   GEN75_SURFACE_DW7_SCS_R) |
-               GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
-               GEN_SHIFT32(GEN75_SCS_BLUE,  GEN75_SURFACE_DW7_SCS_B) |
-               GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
-   }
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(8))
-      memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
-}
-
-void
-ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
-                               unsigned width, unsigned height,
-                               unsigned depth, unsigned level,
-                               struct ilo_view_surface *surf)
-{
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      view_init_null_gen7(dev,
-            width, height, depth, level, surf);
-   } else {
-      view_init_null_gen6(dev,
-            width, height, depth, level, surf);
-   }
-
-   surf->bo = NULL;
-   surf->scanout = false;
-}
-
-void
-ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
-                                     const struct ilo_buffer *buf,
-                                     unsigned offset, unsigned size,
-                                     unsigned struct_size,
-                                     enum pipe_format elem_format,
-                                     bool is_rt, bool render_cache_rw,
-                                     struct ilo_view_surface *surf)
-{
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      view_init_for_buffer_gen7(dev, buf, offset, size,
-            struct_size, elem_format, is_rt, render_cache_rw, surf);
-   } else {
-      view_init_for_buffer_gen6(dev, buf, offset, size,
-            struct_size, elem_format, is_rt, render_cache_rw, surf);
-   }
-
-   /* do not increment reference count */
-   surf->bo = buf->bo;
-   surf->scanout = false;
-}
-
-void
-ilo_gpe_init_view_surface_for_image(const struct ilo_dev *dev,
-                                    const struct ilo_image *img,
-                                    enum pipe_texture_target target,
-                                    enum pipe_format format,
-                                    unsigned first_level,
-                                    unsigned num_levels,
-                                    unsigned first_layer,
-                                    unsigned num_layers,
-                                    bool is_rt,
-                                    struct ilo_view_surface *surf)
-{
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      view_init_for_image_gen7(dev, img, target, format,
-            first_level, num_levels, first_layer, num_layers,
-            is_rt, surf);
-   } else {
-      view_init_for_image_gen6(dev, img, target, format,
-            first_level, num_levels, first_layer, num_layers,
-            is_rt, surf);
-   }
-
-   surf->scanout = img->scanout;
-   /* do not increment reference count */
-   surf->bo = img->bo;
-}
-
-static void
-sampler_init_border_color_gen6(const struct ilo_dev *dev,
-                               const union pipe_color_union *color,
-                               uint32_t *dw, int num_dwords)
-{
-   float rgba[4] = {
-      color->f[0], color->f[1], color->f[2], color->f[3],
-   };
-
-   ILO_DEV_ASSERT(dev, 6, 6);
-
-   assert(num_dwords >= 12);
-
-   /*
-    * This state is not documented in the Sandy Bridge PRM, but in the
-    * Ironlake PRM.  SNORM8 seems to be in DW11 instead of DW1.
-    */
-
-   /* IEEE_FP */
-   dw[1] = fui(rgba[0]);
-   dw[2] = fui(rgba[1]);
-   dw[3] = fui(rgba[2]);
-   dw[4] = fui(rgba[3]);
-
-   /* FLOAT_16 */
-   dw[5] = util_float_to_half(rgba[0]) |
-           util_float_to_half(rgba[1]) << 16;
-   dw[6] = util_float_to_half(rgba[2]) |
-           util_float_to_half(rgba[3]) << 16;
-
-   /* clamp to [-1.0f, 1.0f] */
-   rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
-   rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
-   rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
-   rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
-
-   /* SNORM16 */
-   dw[9] =  (int16_t) util_iround(rgba[0] * 32767.0f) |
-            (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
-   dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
-            (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
-
-   /* SNORM8 */
-   dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
-            (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
-            (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
-            (int8_t) util_iround(rgba[3] * 127.0f) << 24;
-
-   /* clamp to [0.0f, 1.0f] */
-   rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
-   rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
-   rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
-   rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
-
-   /* UNORM8 */
-   dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
-           (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
-           (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
-           (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
-
-   /* UNORM16 */
-   dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
-           (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
-   dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
-           (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
-}
-
-/**
- * Translate a pipe texture mipfilter to the matching hardware mipfilter.
- */
-static int
-gen6_translate_tex_mipfilter(unsigned filter)
-{
-   switch (filter) {
-   case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST;
-   case PIPE_TEX_MIPFILTER_LINEAR:  return GEN6_MIPFILTER_LINEAR;
-   case PIPE_TEX_MIPFILTER_NONE:    return GEN6_MIPFILTER_NONE;
-   default:
-      assert(!"unknown mipfilter");
-      return GEN6_MIPFILTER_NONE;
-   }
-}
-
-/**
- * Translate a pipe texture filter to the matching hardware mapfilter.
- */
-static int
-gen6_translate_tex_filter(unsigned filter)
-{
-   switch (filter) {
-   case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST;
-   case PIPE_TEX_FILTER_LINEAR:  return GEN6_MAPFILTER_LINEAR;
-   default:
-      assert(!"unknown sampler filter");
-      return GEN6_MAPFILTER_NEAREST;
-   }
-}
-
-/**
- * Translate a pipe texture coordinate wrapping mode to the matching hardware
- * wrapping mode.
- */
-static int
-gen6_translate_tex_wrap(unsigned wrap)
-{
-   switch (wrap) {
-   case PIPE_TEX_WRAP_CLAMP:              return GEN8_TEXCOORDMODE_HALF_BORDER;
-   case PIPE_TEX_WRAP_REPEAT:             return GEN6_TEXCOORDMODE_WRAP;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:      return GEN6_TEXCOORDMODE_CLAMP;
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:    return GEN6_TEXCOORDMODE_CLAMP_BORDER;
-   case PIPE_TEX_WRAP_MIRROR_REPEAT:      return GEN6_TEXCOORDMODE_MIRROR;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP:
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-   default:
-      assert(!"unknown sampler wrap mode");
-      return GEN6_TEXCOORDMODE_WRAP;
-   }
-}
-
-/**
- * Translate a pipe shadow compare function to the matching hardware shadow
- * function.
- */
-static int
-gen6_translate_shadow_func(unsigned func)
-{
-   /*
-    * For PIPE_FUNC_x, the reference value is on the left-hand side of the
-    * comparison, and 1.0 is returned when the comparison is true.
-    *
-    * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of
-    * the comparison, and 0.0 is returned when the comparison is true.
-    */
-   switch (func) {
-   case PIPE_FUNC_NEVER:      return GEN6_COMPAREFUNCTION_ALWAYS;
-   case PIPE_FUNC_LESS:       return GEN6_COMPAREFUNCTION_LEQUAL;
-   case PIPE_FUNC_EQUAL:      return GEN6_COMPAREFUNCTION_NOTEQUAL;
-   case PIPE_FUNC_LEQUAL:     return GEN6_COMPAREFUNCTION_LESS;
-   case PIPE_FUNC_GREATER:    return GEN6_COMPAREFUNCTION_GEQUAL;
-   case PIPE_FUNC_NOTEQUAL:   return GEN6_COMPAREFUNCTION_EQUAL;
-   case PIPE_FUNC_GEQUAL:     return GEN6_COMPAREFUNCTION_GREATER;
-   case PIPE_FUNC_ALWAYS:     return GEN6_COMPAREFUNCTION_NEVER;
-   default:
-      assert(!"unknown shadow compare function");
-      return GEN6_COMPAREFUNCTION_NEVER;
-   }
-}
-
-void
-ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
-                         const struct pipe_sampler_state *state,
-                         struct ilo_sampler_cso *sampler)
-{
-   int mip_filter, min_filter, mag_filter, max_aniso;
-   int lod_bias, max_lod, min_lod;
-   int wrap_s, wrap_t, wrap_r, wrap_cube;
-   uint32_t dw0, dw1, dw3;
-
-   ILO_DEV_ASSERT(dev, 6, 8);
-
-   memset(sampler, 0, sizeof(*sampler));
-
-   mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
-   min_filter = gen6_translate_tex_filter(state->min_img_filter);
-   mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
-
-   sampler->anisotropic = state->max_anisotropy;
-
-   if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
-      max_aniso = state->max_anisotropy / 2 - 1;
-   else if (state->max_anisotropy > 16)
-      max_aniso = GEN6_ANISORATIO_16;
-   else
-      max_aniso = GEN6_ANISORATIO_2;
-
-   /*
-    *
-    * Here is how the hardware calculate per-pixel LOD, from my reading of the
-    * PRMs:
-    *
-    *  1) LOD is set to log2(ratio of texels to pixels) if not specified in
-    *     other ways.  The number of texels is measured using level
-    *     SurfMinLod.
-    *  2) Bias is added to LOD.
-    *  3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
-    *     compared with Base to determine whether magnification or
-    *     minification is needed.  (if preclamp is disabled, LOD is compared
-    *     with Base before clamping)
-    *  4) If magnification is needed, or no mipmapping is requested, LOD is
-    *     set to floor(MinLod).
-    *  5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
-    *
-    * With Gallium interface, Base is always zero and
-    * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
-    */
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      const float scale = 256.0f;
-
-      /* [-16.0, 16.0) in S4.8 */
-      lod_bias = (int)
-         (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
-      lod_bias &= 0x1fff;
-
-      /* [0.0, 14.0] in U4.8 */
-      max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
-      min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
-   }
-   else {
-      const float scale = 64.0f;
-
-      /* [-16.0, 16.0) in S4.6 */
-      lod_bias = (int)
-         (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
-      lod_bias &= 0x7ff;
-
-      /* [0.0, 13.0] in U4.6 */
-      max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
-      min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
-   }
-
-   /*
-    * We want LOD to be clamped to determine magnification/minification, and
-    * get set to zero when it is magnification or when mipmapping is disabled.
-    * The hardware would set LOD to floor(MinLod) and that is a problem when
-    * MinLod is greater than or equal to 1.0f.
-    *
-    * With Base being zero, it is always minification when MinLod is non-zero.
-    * To achieve our goal, we just need to set MinLod to zero and set
-    * MagFilter to MinFilter when mipmapping is disabled.
-    */
-   if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
-      min_lod = 0;
-      mag_filter = min_filter;
-   }
-
-   /* determine wrap s/t/r */
-   wrap_s = gen6_translate_tex_wrap(state->wrap_s);
-   wrap_t = gen6_translate_tex_wrap(state->wrap_t);
-   wrap_r = gen6_translate_tex_wrap(state->wrap_r);
-   if (ilo_dev_gen(dev) < ILO_GEN(8)) {
-      /*
-       * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
-       * PIPE_TEX_WRAP_CLAMP_TO_EDGE;  for linear filtering,
-       * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
-       * additionally clamping the texture coordinates to [0.0, 1.0].
-       *
-       * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8.  The
-       * clamping has to be taken care of in the shaders.  There are two
-       * filters here, but let the minification one has a say.
-       */
-      const bool clamp_is_to_edge =
-         (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
-
-      if (clamp_is_to_edge) {
-         if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER)
-            wrap_s = GEN6_TEXCOORDMODE_CLAMP;
-         if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER)
-            wrap_t = GEN6_TEXCOORDMODE_CLAMP;
-         if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER)
-            wrap_r = GEN6_TEXCOORDMODE_CLAMP;
-      } else {
-         if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) {
-            wrap_s = GEN6_TEXCOORDMODE_CLAMP_BORDER;
-            sampler->saturate_s = true;
-         }
-         if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) {
-            wrap_t = GEN6_TEXCOORDMODE_CLAMP_BORDER;
-            sampler->saturate_t = true;
-         }
-         if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) {
-            wrap_r = GEN6_TEXCOORDMODE_CLAMP_BORDER;
-            sampler->saturate_r = true;
-         }
-      }
-   }
-
-   /*
-    * From the Sandy Bridge PRM, volume 4 part 1, page 107:
-    *
-    *     "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
-    *      and TEXCOORDMODE_CUBE settings are valid, and each TC component
-    *      must have the same Address Control mode."
-    *
-    * From the Ivy Bridge PRM, volume 4 part 1, page 96:
-    *
-    *     "This field (Cube Surface Control Mode) must be set to
-    *      CUBECTRLMODE_PROGRAMMED"
-    *
-    * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
-    * map filtering.
-    */
-   if (state->seamless_cube_map &&
-       (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
-        state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
-      wrap_cube = GEN6_TEXCOORDMODE_CUBE;
-   }
-   else {
-      wrap_cube = GEN6_TEXCOORDMODE_CLAMP;
-   }
-
-   if (!state->normalized_coords) {
-      /*
-       * From the Ivy Bridge PRM, volume 4 part 1, page 98:
-       *
-       *     "The following state must be set as indicated if this field
-       *      (Non-normalized Coordinate Enable) is enabled:
-       *
-       *      - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
-       *        TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
-       *      - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
-       *      - Mag Mode Filter must be MAPFILTER_NEAREST or
-       *        MAPFILTER_LINEAR.
-       *      - Min Mode Filter must be MAPFILTER_NEAREST or
-       *        MAPFILTER_LINEAR.
-       *      - Mip Mode Filter must be MIPFILTER_NONE.
-       *      - Min LOD must be 0.
-       *      - Max LOD must be 0.
-       *      - MIP Count must be 0.
-       *      - Surface Min LOD must be 0.
-       *      - Texture LOD Bias must be 0."
-       */
-      assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP ||
-             wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER);
-      assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP ||
-             wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER);
-      assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP ||
-             wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER);
-
-      assert(mag_filter == GEN6_MAPFILTER_NEAREST ||
-             mag_filter == GEN6_MAPFILTER_LINEAR);
-      assert(min_filter == GEN6_MAPFILTER_NEAREST ||
-             min_filter == GEN6_MAPFILTER_LINEAR);
-
-      /* work around a bug in util_blitter */
-      mip_filter = GEN6_MIPFILTER_NONE;
-
-      assert(mip_filter == GEN6_MIPFILTER_NONE);
-   }
-
-   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
-      dw0 = 1 << 28 |
-            mip_filter << 20 |
-            lod_bias << 1;
-
-      sampler->dw_filter = mag_filter << 17 |
-                           min_filter << 14;
-
-      sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
-                                 GEN6_MAPFILTER_ANISOTROPIC << 14 |
-                                 1;
-
-      dw1 = min_lod << 20 |
-            max_lod << 8;
-
-      if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
-         dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
-
-      dw3 = max_aniso << 19;
-
-      /* round the coordinates for linear filtering */
-      if (min_filter != GEN6_MAPFILTER_NEAREST) {
-         dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
-                 GEN6_SAMPLER_DW3_V_MIN_ROUND |
-                 GEN6_SAMPLER_DW3_R_MIN_ROUND);
-      }
-      if (mag_filter != GEN6_MAPFILTER_NEAREST) {
-         dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
-                 GEN6_SAMPLER_DW3_V_MAG_ROUND |
-                 GEN6_SAMPLER_DW3_R_MAG_ROUND);
-      }
-
-      if (!state->normalized_coords)
-         dw3 |= 1 << 10;
-
-      sampler->dw_wrap = wrap_s << 6 |
-                         wrap_t << 3 |
-                         wrap_r;
-
-      /*
-       * As noted in the classic i965 driver, the HW may still reference
-       * wrap_t and wrap_r for 1D textures.  We need to set them to a safe
-       * mode
-       */
-      sampler->dw_wrap_1d = wrap_s << 6 |
-                            GEN6_TEXCOORDMODE_WRAP << 3 |
-                            GEN6_TEXCOORDMODE_WRAP;
-
-      sampler->dw_wrap_cube = wrap_cube << 6 |
-                              wrap_cube << 3 |
-                              wrap_cube;
-
-      STATIC_ASSERT(Elements(sampler->payload) >= 7);
-
-      sampler->payload[0] = dw0;
-      sampler->payload[1] = dw1;
-      sampler->payload[2] = dw3;
-
-      memcpy(&sampler->payload[3],
-            state->border_color.ui, sizeof(state->border_color.ui));
-   }
-   else {
-      dw0 = 1 << 28 |
-            mip_filter << 20 |
-            lod_bias << 3;
-
-      if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
-         dw0 |= gen6_translate_shadow_func(state->compare_func);
-
-      sampler->dw_filter = (min_filter != mag_filter) << 27 |
-                           mag_filter << 17 |
-                           min_filter << 14;
-
-      sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
-                                 GEN6_MAPFILTER_ANISOTROPIC << 14;
-
-      dw1 = min_lod << 22 |
-            max_lod << 12;
-
-      sampler->dw_wrap = wrap_s << 6 |
-                         wrap_t << 3 |
-                         wrap_r;
-
-      sampler->dw_wrap_1d = wrap_s << 6 |
-                            GEN6_TEXCOORDMODE_WRAP << 3 |
-                            GEN6_TEXCOORDMODE_WRAP;
-
-      sampler->dw_wrap_cube = wrap_cube << 6 |
-                              wrap_cube << 3 |
-                              wrap_cube;
-
-      dw3 = max_aniso << 19;
-
-      /* round the coordinates for linear filtering */
-      if (min_filter != GEN6_MAPFILTER_NEAREST) {
-         dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
-                 GEN6_SAMPLER_DW3_V_MIN_ROUND |
-                 GEN6_SAMPLER_DW3_R_MIN_ROUND);
-      }
-      if (mag_filter != GEN6_MAPFILTER_NEAREST) {
-         dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
-                 GEN6_SAMPLER_DW3_V_MAG_ROUND |
-                 GEN6_SAMPLER_DW3_R_MAG_ROUND);
-      }
-
-      if (!state->normalized_coords)
-         dw3 |= 1;
-
-      STATIC_ASSERT(Elements(sampler->payload) >= 15);
-
-      sampler->payload[0] = dw0;
-      sampler->payload[1] = dw1;
-      sampler->payload[2] = dw3;
-
-      sampler_init_border_color_gen6(dev,
-            &state->border_color, &sampler->payload[3], 12);
-   }
-}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_cc.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_cc.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_cc.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_cc.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,890 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_cc.h"
+
+static bool
+cc_validate_gen6_stencil(const struct ilo_dev *dev,
+                         const struct ilo_state_cc_info *info)
+{
+   const struct ilo_state_cc_stencil_info *stencil = &info->stencil;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 359:
+    *
+    *     "If the Depth Buffer is either undefined or does not have a surface
+    *      format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
+    *      stencil buffer is disabled, Stencil Test Enable must be DISABLED"
+    *
+    * From the Sandy Bridge PRM, volume 2 part 1, page 370:
+    *
+    *     "This field (Stencil Test Enable) cannot be enabled if Surface
+    *      Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
+    */
+   if (stencil->test_enable)
+      assert(stencil->cv_has_buffer);
+
+   return true;
+}
+
+static bool
+cc_validate_gen6_depth(const struct ilo_dev *dev,
+                       const struct ilo_state_cc_info *info)
+{
+   const struct ilo_state_cc_depth_info *depth = &info->depth;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 360:
+    *
+    *     "Enabling the Depth Test function without defining a Depth Buffer is
+    *      UNDEFINED."
+    *
+    * From the Sandy Bridge PRM, volume 2 part 1, page 375:
+    *
+    *     "A Depth Buffer must be defined before enabling writes to it, or
+    *      operation is UNDEFINED."
+    */
+   if (depth->test_enable || depth->write_enable)
+      assert(depth->cv_has_buffer);
+
+   return true;
+}
+
+static bool
+cc_set_gen6_DEPTH_STENCIL_STATE(struct ilo_state_cc *cc,
+                                const struct ilo_dev *dev,
+                                const struct ilo_state_cc_info *info)
+{
+   const struct ilo_state_cc_stencil_info *stencil = &info->stencil;
+   const struct ilo_state_cc_depth_info *depth = &info->depth;
+   const struct ilo_state_cc_params_info *params = &info->params;
+   uint32_t dw0, dw1, dw2;
+
+   ILO_DEV_ASSERT(dev, 6, 7.5);
+
+   if (!cc_validate_gen6_stencil(dev, info) ||
+       !cc_validate_gen6_depth(dev, info))
+      return false;
+
+   dw0 = 0;
+   dw1 = 0;
+   if (stencil->test_enable) {
+      const struct ilo_state_cc_stencil_op_info *front = &stencil->front;
+      const struct ilo_state_cc_stencil_params_info *front_p =
+         &params->stencil_front;
+      const struct ilo_state_cc_stencil_op_info *back;
+      const struct ilo_state_cc_stencil_params_info *back_p;
+
+      dw0 |= GEN6_ZS_DW0_STENCIL_TEST_ENABLE;
+
+      if (stencil->twosided_enable) {
+         dw0 |= GEN6_ZS_DW0_STENCIL1_ENABLE;
+
+         back = &stencil->back;
+         back_p = &params->stencil_back;
+      } else {
+         back = &stencil->front;
+         back_p = &params->stencil_front;
+      }
+
+      dw0 |= front->test_func << GEN6_ZS_DW0_STENCIL_FUNC__SHIFT |
+             front->fail_op << GEN6_ZS_DW0_STENCIL_FAIL_OP__SHIFT |
+             front->zfail_op << GEN6_ZS_DW0_STENCIL_ZFAIL_OP__SHIFT |
+             front->zpass_op << GEN6_ZS_DW0_STENCIL_ZPASS_OP__SHIFT |
+             back->test_func << GEN6_ZS_DW0_STENCIL1_FUNC__SHIFT |
+             back->fail_op << GEN6_ZS_DW0_STENCIL1_FAIL_OP__SHIFT |
+             back->zfail_op << GEN6_ZS_DW0_STENCIL1_ZFAIL_OP__SHIFT |
+             back->zpass_op << GEN6_ZS_DW0_STENCIL1_ZPASS_OP__SHIFT;
+
+      /*
+       * From the Ivy Bridge PRM, volume 2 part 1, page 363:
+       *
+       *     "If this field (Stencil Buffer Write Enable) is enabled, Stencil
+       *      Test Enable must also be enabled."
+       *
+       * This is different from depth write enable, which is independent from
+       * depth test enable.
+       */
+      if (front_p->write_mask || back_p->write_mask)
+         dw0 |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
+
+      dw1 |= front_p->test_mask << GEN6_ZS_DW1_STENCIL_TEST_MASK__SHIFT |
+             front_p->write_mask << GEN6_ZS_DW1_STENCIL_WRITE_MASK__SHIFT |
+             back_p->test_mask << GEN6_ZS_DW1_STENCIL1_TEST_MASK__SHIFT |
+             back_p->write_mask << GEN6_ZS_DW1_STENCIL1_WRITE_MASK__SHIFT;
+   }
+
+   dw2 = 0;
+   if (depth->test_enable) {
+      dw2 |= GEN6_ZS_DW2_DEPTH_TEST_ENABLE |
+             depth->test_func << GEN6_ZS_DW2_DEPTH_FUNC__SHIFT;
+   } else {
+      dw2 |= GEN6_COMPAREFUNCTION_ALWAYS << GEN6_ZS_DW2_DEPTH_FUNC__SHIFT;
+   }
+
+   /* independent from depth->test_enable */
+   if (depth->write_enable)
+      dw2 |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE;
+
+   STATIC_ASSERT(ARRAY_SIZE(cc->ds) >= 3);
+   cc->ds[0] = dw0;
+   cc->ds[1] = dw1;
+   cc->ds[2] = dw2;
+
+   return true;
+}
+
+static bool
+cc_set_gen8_3DSTATE_WM_DEPTH_STENCIL(struct ilo_state_cc *cc,
+                                     const struct ilo_dev *dev,
+                                     const struct ilo_state_cc_info *info)
+{
+   const struct ilo_state_cc_stencil_info *stencil = &info->stencil;
+   const struct ilo_state_cc_depth_info *depth = &info->depth;
+   const struct ilo_state_cc_params_info *params = &info->params;
+   uint32_t dw1, dw2;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   if (!cc_validate_gen6_stencil(dev, info) ||
+       !cc_validate_gen6_depth(dev, info))
+      return false;
+
+   dw1 = 0;
+   dw2 = 0;
+   if (stencil->test_enable) {
+      const struct ilo_state_cc_stencil_op_info *front = &stencil->front;
+      const struct ilo_state_cc_stencil_params_info *front_p =
+         &params->stencil_front;
+      const struct ilo_state_cc_stencil_op_info *back;
+      const struct ilo_state_cc_stencil_params_info *back_p;
+
+      dw1 |= GEN8_ZS_DW1_STENCIL_TEST_ENABLE;
+
+      if (stencil->twosided_enable) {
+         dw1 |= GEN8_ZS_DW1_STENCIL1_ENABLE;
+
+         back = &stencil->back;
+         back_p = &params->stencil_back;
+      } else {
+         back = &stencil->front;
+         back_p = &params->stencil_front;
+      }
+
+      dw1 |= front->fail_op << GEN8_ZS_DW1_STENCIL_FAIL_OP__SHIFT |
+             front->zfail_op << GEN8_ZS_DW1_STENCIL_ZFAIL_OP__SHIFT |
+             front->zpass_op << GEN8_ZS_DW1_STENCIL_ZPASS_OP__SHIFT |
+             back->test_func << GEN8_ZS_DW1_STENCIL1_FUNC__SHIFT |
+             back->fail_op << GEN8_ZS_DW1_STENCIL1_FAIL_OP__SHIFT |
+             back->zfail_op << GEN8_ZS_DW1_STENCIL1_ZFAIL_OP__SHIFT |
+             back->zpass_op << GEN8_ZS_DW1_STENCIL1_ZPASS_OP__SHIFT |
+             front->test_func << GEN8_ZS_DW1_STENCIL_FUNC__SHIFT;
+
+      if (front_p->write_mask || back_p->write_mask)
+         dw1 |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
+
+      dw2 |= front_p->test_mask << GEN8_ZS_DW2_STENCIL_TEST_MASK__SHIFT |
+             front_p->write_mask << GEN8_ZS_DW2_STENCIL_WRITE_MASK__SHIFT |
+             back_p->test_mask << GEN8_ZS_DW2_STENCIL1_TEST_MASK__SHIFT |
+             back_p->write_mask << GEN8_ZS_DW2_STENCIL1_WRITE_MASK__SHIFT;
+   }
+
+   if (depth->test_enable) {
+      dw1 |= GEN8_ZS_DW1_DEPTH_TEST_ENABLE |
+             depth->test_func << GEN8_ZS_DW1_DEPTH_FUNC__SHIFT;
+   } else {
+      dw1 |= GEN6_COMPAREFUNCTION_ALWAYS << GEN8_ZS_DW1_DEPTH_FUNC__SHIFT;
+   }
+
+   if (depth->write_enable)
+      dw1 |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE;
+
+   STATIC_ASSERT(ARRAY_SIZE(cc->ds) >= 2);
+   cc->ds[0] = dw1;
+   cc->ds[1] = dw2;
+
+   return true;
+}
+
+static bool
+is_dual_source_blend_factor(enum gen_blend_factor factor)
+{
+   switch (factor) {
+   case GEN6_BLENDFACTOR_SRC1_COLOR:
+   case GEN6_BLENDFACTOR_SRC1_ALPHA:
+   case GEN6_BLENDFACTOR_INV_SRC1_COLOR:
+   case GEN6_BLENDFACTOR_INV_SRC1_ALPHA:
+      return true;
+   default:
+      return false;
+   }
+}
+
+static bool
+cc_get_gen6_dual_source_blending(const struct ilo_dev *dev,
+                                 const struct ilo_state_cc_info *info)
+{
+   const struct ilo_state_cc_blend_info *blend = &info->blend;
+   bool dual_source_blending;
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   dual_source_blending = (blend->rt_count &&
+         (is_dual_source_blend_factor(blend->rt[0].rgb_src) ||
+          is_dual_source_blend_factor(blend->rt[0].rgb_dst) ||
+          is_dual_source_blend_factor(blend->rt[0].a_src) ||
+          is_dual_source_blend_factor(blend->rt[0].a_dst)));
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 356:
+    *
+    *     "Dual Source Blending: When using "Dual Source" Render Target
+    *      Write messages, the Source1 pixel color+alpha passed in the
+    *      message can be selected as a src/dst blend factor. See Color
+    *      Buffer Blending.  In single-source mode, those blend factor
+    *      selections are invalid. If SRC1 is included in a src/dst blend
+    *      factor and a DualSource RT Write message is not utilized,
+    *      results are UNDEFINED. (This reflects the same restriction in DX
+    *      APIs, where undefined results are produced if "o1" is not
+    *      written by a PS - there are no default values defined). If SRC1
+    *      is not included in a src/dst blend factor, dual source blending
+    *      must be disabled."
+    *
+    * From the Ivy Bridge PRM, volume 4 part 1, page 356:
+    *
+    *     "The single source message will not cause a write to the render
+    *      target if Dual Source Blend Enable in 3DSTATE_WM is enabled."
+    *
+    *     "The dual source message will revert to a single source message
+    *      using source 0 if Dual Source Blend Enable in 3DSTATE_WM is
+    *      disabled."
+    *
+    * Dual source blending must be enabled or disabled universally.
+    */
+   for (i = 1; i < blend->rt_count; i++) {
+      assert(dual_source_blending ==
+         (is_dual_source_blend_factor(blend->rt[i].rgb_src) ||
+          is_dual_source_blend_factor(blend->rt[i].rgb_dst) ||
+          is_dual_source_blend_factor(blend->rt[i].a_src) ||
+          is_dual_source_blend_factor(blend->rt[i].a_dst)));
+   }
+
+   return dual_source_blending;
+}
+
+static bool
+cc_validate_gen6_alpha(const struct ilo_dev *dev,
+                       const struct ilo_state_cc_info *info)
+{
+   const struct ilo_state_cc_alpha_info *alpha = &info->alpha;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 356:
+    *
+    *     "Alpha values from the pixel shader are treated as FLOAT32 format
+    *      for computing the AlphaToCoverage Mask."
+    *
+    * From the Sandy Bridge PRM, volume 2 part 1, page 378:
+    *
+    *     "If set (AlphaToCoverage Enable), Source0 Alpha is converted to a
+    *      temporary 1/2/4-bit coverage mask and the mask bit corresponding to
+    *      the sample# ANDed with the sample mask bit. If set, sample coverage
+    *      is computed based on src0 alpha value. Value of 0 disables all
+    *      samples and value of 1 enables all samples for that pixel. The same
+    *      coverage needs to apply to all the RTs in MRT case. Further, any
+    *      value of src0 alpha between 0 and 1 monotonically increases the
+    *      number of enabled pixels.
+    *
+    *      The same coverage needs to be applied to all the RTs in MRT case."
+    *
+    *     "If set (AlphaToOne Enable), Source0 Alpha is set to 1.0f after
+    *      (possibly) being used to generate the AlphaToCoverage coverage
+    *      mask.
+    *
+    *      The same coverage needs to be applied to all the RTs in MRT case.
+    *
+    *      If Dual Source Blending is enabled, this bit must be disabled."
+    *
+    * From the Sandy Bridge PRM, volume 2 part 1, page 382:
+    *
+    *     "Alpha Test can only be enabled if Pixel Shader outputs a float
+    *      alpha value.
+    *
+    *      Alpha Test is applied independently on each render target by
+    *      comparing that render target's alpha value against the alpha
+    *      reference value. If the alpha test fails, the corresponding pixel
+    *      write will be supressed only for that render target. The
+    *      depth/stencil update will occur if alpha test passes for any render
+    *      target."
+    *
+    * From the Sandy Bridge PRM, volume 4 part 1, page 194:
+    *
+    *     "Multiple render targets are supported with the single source and
+    *      replicate data messages. Each render target is accessed with a
+    *      separate Render Target Write message, each with a different surface
+    *      indicated (different binding table index). The depth buffer is
+    *      written only by the message(s) to the last render target, indicated
+    *      by the Last Render Target Select bit set to clear the pixel
+    *      scoreboard bits."
+    *
+    * When AlphaToCoverage/AlphaToOne/AlphaTest is enabled, it is
+    * required/desirable for the RT write messages to set "Source0 Alpha
+    * Present to RenderTarget" in the MRT case.  It is also required/desirable
+    * for the alpha values to be FLOAT32.
+    */
+   if (alpha->alpha_to_coverage || alpha->alpha_to_one || alpha->test_enable)
+      assert(alpha->cv_float_source0_alpha);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 356:
+    *
+    *     "[DevSNB]: When NumSamples = 1, AlphaToCoverage and AlphaTo
+    *      Coverage Dither both must be disabled."
+    */
+   if (ilo_dev_gen(dev) == ILO_GEN(6) && alpha->alpha_to_coverage)
+      assert(alpha->cv_sample_count_one);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 378:
+    *
+    *     "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
+    *      must be disabled."
+    */
+   if (alpha->alpha_to_one)
+      assert(!cc_get_gen6_dual_source_blending(dev, info));
+
+   return true;
+}
+
+static bool
+cc_validate_gen6_blend(const struct ilo_dev *dev,
+                       const struct ilo_state_cc_info *info)
+{
+   const struct ilo_state_cc_blend_info *blend = &info->blend;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(blend->rt_count <= ILO_STATE_CC_BLEND_MAX_RT_COUNT);
+
+   return true;
+}
+
+static enum gen_blend_factor
+get_dst_alpha_one_blend_factor(enum gen_blend_factor factor, bool is_rgb)
+{
+   switch (factor) {
+   case GEN6_BLENDFACTOR_DST_ALPHA:
+      return GEN6_BLENDFACTOR_ONE;
+   case GEN6_BLENDFACTOR_INV_DST_ALPHA:
+      return GEN6_BLENDFACTOR_ZERO;
+   case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE:
+      return (is_rgb) ? GEN6_BLENDFACTOR_ZERO : GEN6_BLENDFACTOR_ONE;
+   default:
+      return factor;
+   }
+}
+
+static void
+cc_get_gen6_effective_rt(const struct ilo_dev *dev,
+                         const struct ilo_state_cc_info *info,
+                         uint8_t rt_index,
+                         struct ilo_state_cc_blend_rt_info *dst)
+{
+   const struct ilo_state_cc_blend_rt_info *rt = &info->blend.rt[rt_index];
+
+   if (rt->logicop_enable || rt->blend_enable ||
+       rt->argb_write_disables != 0xf)
+      assert(rt->cv_has_buffer);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+    *
+    *     "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
+    *      variants), otherwise Logic Ops must be DISABLED."
+    *
+    * From the Broadwell PRM, volume 7, page 671:
+    *
+    *     "Logic Ops are supported on all blendable render targets and render
+    *      targets with *INT formats."
+    */
+   if (ilo_dev_gen(dev) < ILO_GEN(8) && rt->logicop_enable)
+      assert(rt->cv_is_unorm);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 361:
+    *
+    *     "Only certain surface formats support Color Buffer Blending.  Refer
+    *      to the Surface Format tables in Sampling Engine. Blending must be
+    *      disabled on a RenderTarget if blending is not supported."
+    *
+    * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+    *
+    *     "Color Buffer Blending and Logic Ops must not be enabled
+    *      simultaneously, or behavior is UNDEFINED."
+    */
+   if (rt->blend_enable)
+      assert(!rt->cv_is_integer && !rt->logicop_enable);
+
+   *dst = *rt;
+   if (rt->blend_enable) {
+      /* 0x0 is reserved in enum gen_blend_factor */
+      assert(rt->rgb_src && rt->rgb_dst && rt->a_src && rt->a_dst);
+
+      if (rt->force_dst_alpha_one) {
+         dst->rgb_src = get_dst_alpha_one_blend_factor(rt->rgb_src, true);
+         dst->rgb_dst = get_dst_alpha_one_blend_factor(rt->rgb_dst, true);
+         dst->a_src = get_dst_alpha_one_blend_factor(rt->a_src, false);
+         dst->a_dst = get_dst_alpha_one_blend_factor(rt->a_dst, false);
+         dst->force_dst_alpha_one = false;
+      }
+   } else {
+      dst->rgb_src = GEN6_BLENDFACTOR_ONE;
+      dst->rgb_dst = GEN6_BLENDFACTOR_ZERO;
+      dst->rgb_func = GEN6_BLENDFUNCTION_ADD;
+      dst->a_src = dst->rgb_src;
+      dst->a_dst = dst->rgb_dst;
+      dst->a_func = dst->rgb_func;
+   }
+}
+
+static bool
+cc_set_gen6_BLEND_STATE(struct ilo_state_cc *cc,
+                        const struct ilo_dev *dev,
+                        const struct ilo_state_cc_info *info)
+{
+   const struct ilo_state_cc_alpha_info *alpha = &info->alpha;
+   const struct ilo_state_cc_blend_info *blend = &info->blend;
+   uint32_t dw_rt[2 * ILO_STATE_CC_BLEND_MAX_RT_COUNT], dw1_invariant;
+   uint32_t dw0, dw1;
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 7.5);
+
+   if (!cc_validate_gen6_alpha(dev, info) ||
+       !cc_validate_gen6_blend(dev, info))
+      return false;
+
+   /*
+    * According to the Sandy Bridge PRM, volume 2 part 1, page 360, pre-blend
+    * and post-blend color clamps must be enabled in most cases.  For the
+    * other cases, they are either desirable or ignored.  We can enable them
+    * unconditionally.
+    */
+   dw1 = GEN6_RT_DW1_COLORCLAMP_RTFORMAT |
+         GEN6_RT_DW1_PRE_BLEND_CLAMP |
+         GEN6_RT_DW1_POST_BLEND_CLAMP;
+
+   if (alpha->alpha_to_coverage) {
+      dw1 |= GEN6_RT_DW1_ALPHA_TO_COVERAGE;
+
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 379:
+       *
+       *     "[DevSNB]: This bit (AlphaToCoverage Dither Enable) must be
+       *      disabled."
+       */
+      if (ilo_dev_gen(dev) >= ILO_GEN(7))
+         dw1 |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER;
+   }
+
+   if (alpha->alpha_to_one)
+      dw1 |= GEN6_RT_DW1_ALPHA_TO_ONE;
+
+   if (alpha->test_enable) {
+      dw1 |= GEN6_RT_DW1_ALPHA_TEST_ENABLE |
+             alpha->test_func << GEN6_RT_DW1_ALPHA_TEST_FUNC__SHIFT;
+   } else {
+      /*
+       * From the Ivy Bridge PRM, volume 2 part 1, page 371:
+       *
+       *     "When Alpha Test is disabled, Alpha Test Function must be
+       *      COMPAREFUNCTION_ALWAYS."
+       */
+      dw1 |= GEN6_COMPAREFUNCTION_ALWAYS <<
+         GEN6_RT_DW1_ALPHA_TEST_FUNC__SHIFT;
+   }
+
+   if (blend->dither_enable)
+      dw1 |= GEN6_RT_DW1_DITHER_ENABLE;
+
+   dw1_invariant = dw1;
+
+   for (i = 0; i < blend->rt_count; i++) {
+      struct ilo_state_cc_blend_rt_info rt;
+
+      cc_get_gen6_effective_rt(dev, info, i, &rt);
+
+      /* 0x0 is reserved for blend factors and we have to set them all */
+      dw0 = rt.a_func << GEN6_RT_DW0_ALPHA_FUNC__SHIFT |
+            rt.a_src << GEN6_RT_DW0_SRC_ALPHA_FACTOR__SHIFT |
+            rt.a_dst << GEN6_RT_DW0_DST_ALPHA_FACTOR__SHIFT |
+            rt.rgb_func << GEN6_RT_DW0_COLOR_FUNC__SHIFT |
+            rt.rgb_src << GEN6_RT_DW0_SRC_COLOR_FACTOR__SHIFT |
+            rt.rgb_dst << GEN6_RT_DW0_DST_COLOR_FACTOR__SHIFT;
+
+      if (rt.blend_enable) {
+         dw0 |= GEN6_RT_DW0_BLEND_ENABLE;
+
+         if (rt.a_src != rt.rgb_src ||
+             rt.a_dst != rt.rgb_dst ||
+             rt.a_func != rt.rgb_func)
+            dw0 |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE;
+      }
+
+      dw1 = dw1_invariant |
+            rt.argb_write_disables << GEN6_RT_DW1_WRITE_DISABLES__SHIFT;
+
+      if (rt.logicop_enable) {
+         dw1 |= GEN6_RT_DW1_LOGICOP_ENABLE |
+                rt.logicop_func << GEN6_RT_DW1_LOGICOP_FUNC__SHIFT;
+      }
+
+      dw_rt[2 * i + 0] = dw0;
+      dw_rt[2 * i + 1] = dw1;
+   }
+
+
+   STATIC_ASSERT(ARRAY_SIZE(cc->blend) >= ARRAY_SIZE(dw_rt));
+   memcpy(&cc->blend[0], dw_rt, sizeof(uint32_t) * 2 * blend->rt_count);
+   cc->blend_state_count = info->blend.rt_count;
+
+   return true;
+}
+
+static bool
+cc_set_gen8_BLEND_STATE(struct ilo_state_cc *cc,
+                        const struct ilo_dev *dev,
+                        const struct ilo_state_cc_info *info)
+{
+   const struct ilo_state_cc_alpha_info *alpha = &info->alpha;
+   const struct ilo_state_cc_blend_info *blend = &info->blend;
+   uint32_t dw_rt[2 * ILO_STATE_CC_BLEND_MAX_RT_COUNT], dw0, dw1;
+   bool indep_alpha_enable;
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   if (!cc_validate_gen6_alpha(dev, info) ||
+       !cc_validate_gen6_blend(dev, info))
+      return false;
+
+   indep_alpha_enable = false;
+   for (i = 0; i < blend->rt_count; i++) {
+      struct ilo_state_cc_blend_rt_info rt;
+
+      cc_get_gen6_effective_rt(dev, info, i, &rt);
+
+      dw0 = rt.rgb_src << GEN8_RT_DW0_SRC_COLOR_FACTOR__SHIFT |
+            rt.rgb_dst << GEN8_RT_DW0_DST_COLOR_FACTOR__SHIFT |
+            rt.rgb_func << GEN8_RT_DW0_COLOR_FUNC__SHIFT |
+            rt.a_src << GEN8_RT_DW0_SRC_ALPHA_FACTOR__SHIFT |
+            rt.a_dst << GEN8_RT_DW0_DST_ALPHA_FACTOR__SHIFT |
+            rt.a_func << GEN8_RT_DW0_ALPHA_FUNC__SHIFT |
+            rt.argb_write_disables << GEN8_RT_DW0_WRITE_DISABLES__SHIFT;
+
+      if (rt.blend_enable) {
+         dw0 |= GEN8_RT_DW0_BLEND_ENABLE;
+
+         if (rt.a_src != rt.rgb_src ||
+             rt.a_dst != rt.rgb_dst ||
+             rt.a_func != rt.rgb_func)
+            indep_alpha_enable = true;
+      }
+
+      dw1 = GEN8_RT_DW1_COLORCLAMP_RTFORMAT |
+            GEN8_RT_DW1_PRE_BLEND_CLAMP |
+            GEN8_RT_DW1_POST_BLEND_CLAMP;
+
+      if (rt.logicop_enable) {
+         dw1 |= GEN8_RT_DW1_LOGICOP_ENABLE |
+                rt.logicop_func << GEN8_RT_DW1_LOGICOP_FUNC__SHIFT;
+      }
+
+      dw_rt[2 * i + 0] = dw0;
+      dw_rt[2 * i + 1] = dw1;
+   }
+
+   dw0 = 0;
+
+   if (alpha->alpha_to_coverage) {
+      dw0 |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE |
+             GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER;
+   }
+
+   if (indep_alpha_enable)
+      dw0 |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
+
+   if (alpha->alpha_to_one)
+      dw0 |= GEN8_BLEND_DW0_ALPHA_TO_ONE;
+
+   if (alpha->test_enable) {
+      dw0 |= GEN8_BLEND_DW0_ALPHA_TEST_ENABLE |
+             alpha->test_func << GEN8_BLEND_DW0_ALPHA_TEST_FUNC__SHIFT;
+   } else {
+      dw0 |= GEN6_COMPAREFUNCTION_ALWAYS <<
+         GEN8_BLEND_DW0_ALPHA_TEST_FUNC__SHIFT;
+   }
+
+   if (blend->dither_enable)
+      dw0 |= GEN8_BLEND_DW0_DITHER_ENABLE;
+
+   STATIC_ASSERT(ARRAY_SIZE(cc->blend) >= 2 + ARRAY_SIZE(dw_rt));
+   cc->blend[1] = dw0;
+   memcpy(&cc->blend[2], dw_rt, sizeof(uint32_t) * 2 * blend->rt_count);
+   cc->blend_state_count = info->blend.rt_count;
+
+   return true;
+}
+
+static bool
+cc_set_gen8_3DSTATE_PS_BLEND(struct ilo_state_cc *cc,
+                             const struct ilo_dev *dev,
+                             const struct ilo_state_cc_info *info)
+{
+   const struct ilo_state_cc_alpha_info *alpha = &info->alpha;
+   const struct ilo_state_cc_blend_info *blend = &info->blend;
+   uint32_t dw1;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   dw1 = 0;
+
+   if (alpha->alpha_to_coverage)
+      dw1 |= GEN8_PS_BLEND_DW1_ALPHA_TO_COVERAGE;
+
+   if (alpha->test_enable)
+      dw1 |= GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE;
+
+   if (blend->rt_count) {
+      struct ilo_state_cc_blend_rt_info rt0;
+      uint8_t i;
+
+      cc_get_gen6_effective_rt(dev, info, 0, &rt0);
+
+      /* 0x0 is reserved for blend factors and we have to set them all */
+      dw1 |= rt0.a_src << GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR__SHIFT |
+             rt0.a_dst << GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR__SHIFT |
+             rt0.rgb_src << GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR__SHIFT |
+             rt0.rgb_dst << GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR__SHIFT;
+
+      for (i = 0; i < blend->rt_count; i++) {
+         if (blend->rt[i].argb_write_disables != 0xf) {
+            dw1 |= GEN8_PS_BLEND_DW1_WRITABLE_RT;
+            break;
+         }
+      }
+
+      if (rt0.blend_enable) {
+         dw1 |= GEN8_PS_BLEND_DW1_BLEND_ENABLE;
+
+         if (rt0.a_src != rt0.rgb_src || rt0.a_dst != rt0.rgb_dst)
+            dw1 |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE;
+      }
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(cc->blend) >= 1);
+   cc->blend[0] = dw1;
+
+   return true;
+}
+
+static bool
+cc_params_set_gen6_COLOR_CALC_STATE(struct ilo_state_cc *cc,
+                                    const struct ilo_dev *dev,
+                                    const struct ilo_state_cc_params_info *params)
+{
+   uint32_t dw0;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   dw0 = params->stencil_front.test_ref << GEN6_CC_DW0_STENCIL_REF__SHIFT |
+         params->stencil_back.test_ref << GEN6_CC_DW0_STENCIL1_REF__SHIFT |
+         GEN6_CC_DW0_ALPHATEST_FLOAT32;
+
+   STATIC_ASSERT(ARRAY_SIZE(cc->cc) >= 6);
+   cc->cc[0] = dw0;
+   cc->cc[1] = fui(params->alpha_ref);
+   cc->cc[2] = fui(params->blend_rgba[0]);
+   cc->cc[3] = fui(params->blend_rgba[1]);
+   cc->cc[4] = fui(params->blend_rgba[2]);
+   cc->cc[5] = fui(params->blend_rgba[3]);
+
+   return true;
+}
+
+bool
+ilo_state_cc_init(struct ilo_state_cc *cc,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_cc_info *info)
+{
+   assert(ilo_is_zeroed(cc, sizeof(*cc)));
+   return ilo_state_cc_set_info(cc, dev, info);
+}
+
+bool
+ilo_state_cc_set_info(struct ilo_state_cc *cc,
+                      const struct ilo_dev *dev,
+                      const struct ilo_state_cc_info *info)
+{
+   bool ret = true;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      ret &= cc_set_gen8_3DSTATE_WM_DEPTH_STENCIL(cc, dev, info);
+      ret &= cc_set_gen8_BLEND_STATE(cc, dev, info);
+      ret &= cc_set_gen8_3DSTATE_PS_BLEND(cc, dev, info);
+   } else {
+      ret &= cc_set_gen6_DEPTH_STENCIL_STATE(cc, dev, info);
+      ret &= cc_set_gen6_BLEND_STATE(cc, dev, info);
+   }
+
+   ret &= cc_params_set_gen6_COLOR_CALC_STATE(cc, dev, &info->params);
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_cc_set_params(struct ilo_state_cc *cc,
+                        const struct ilo_dev *dev,
+                        const struct ilo_state_cc_params_info *params)
+{
+   /* modify stencil masks */
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      uint32_t dw1 = cc->ds[0];
+      uint32_t dw2 = cc->ds[1];
+
+      if (dw1 & GEN8_ZS_DW1_STENCIL_TEST_ENABLE) {
+         const bool twosided_enable = (dw1 & GEN8_ZS_DW1_STENCIL1_ENABLE);
+         const struct ilo_state_cc_stencil_params_info *front_p =
+            &params->stencil_front;
+         const struct ilo_state_cc_stencil_params_info *back_p =
+            (twosided_enable) ? &params->stencil_back :
+                                &params->stencil_front;
+
+         if (front_p->write_mask || back_p->write_mask)
+            dw1 |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
+         else
+            dw1 &= ~GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
+
+         dw2 =
+            front_p->test_mask << GEN8_ZS_DW2_STENCIL_TEST_MASK__SHIFT |
+            front_p->write_mask << GEN8_ZS_DW2_STENCIL_WRITE_MASK__SHIFT |
+            back_p->test_mask << GEN8_ZS_DW2_STENCIL1_TEST_MASK__SHIFT |
+            back_p->write_mask << GEN8_ZS_DW2_STENCIL1_WRITE_MASK__SHIFT;
+      }
+
+      cc->ds[0] = dw1;
+      cc->ds[1] = dw2;
+   } else {
+      uint32_t dw0 = cc->ds[0];
+      uint32_t dw1 = cc->ds[1];
+
+      if (dw0 & GEN6_ZS_DW0_STENCIL_TEST_ENABLE) {
+         const bool twosided_enable = (dw0 & GEN6_ZS_DW0_STENCIL1_ENABLE);
+         const struct ilo_state_cc_stencil_params_info *front_p =
+            &params->stencil_front;
+         const struct ilo_state_cc_stencil_params_info *back_p =
+            (twosided_enable) ? &params->stencil_back :
+                                &params->stencil_front;
+
+         if (front_p->write_mask || back_p->write_mask)
+            dw0 |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
+         else
+            dw0 &= ~GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
+
+         dw1 =
+            front_p->test_mask << GEN6_ZS_DW1_STENCIL_TEST_MASK__SHIFT |
+            front_p->write_mask << GEN6_ZS_DW1_STENCIL_WRITE_MASK__SHIFT |
+            back_p->test_mask << GEN6_ZS_DW1_STENCIL1_TEST_MASK__SHIFT |
+            back_p->write_mask << GEN6_ZS_DW1_STENCIL1_WRITE_MASK__SHIFT;
+      }
+
+      cc->ds[0] = dw0;
+      cc->ds[1] = dw1;
+   }
+
+   /* modify COLOR_CALC_STATE */
+   cc_params_set_gen6_COLOR_CALC_STATE(cc, dev, params);
+
+   return true;
+}
+
+void
+ilo_state_cc_full_delta(const struct ilo_state_cc *cc,
+                        const struct ilo_dev *dev,
+                        struct ilo_state_cc_delta *delta)
+{
+   delta->dirty = ILO_STATE_CC_BLEND_STATE |
+                  ILO_STATE_CC_COLOR_CALC_STATE;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      delta->dirty |= ILO_STATE_CC_3DSTATE_WM_DEPTH_STENCIL |
+                      ILO_STATE_CC_3DSTATE_PS_BLEND;
+   } else {
+      delta->dirty |= ILO_STATE_CC_DEPTH_STENCIL_STATE;
+   }
+}
+
+void
+ilo_state_cc_get_delta(const struct ilo_state_cc *cc,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_cc *old,
+                       struct ilo_state_cc_delta *delta)
+{
+   delta->dirty = 0;
+
+   if (memcmp(cc->ds, old->ds, sizeof(cc->ds))) {
+      if (ilo_dev_gen(dev) >= ILO_GEN(8))
+         delta->dirty |= ILO_STATE_CC_3DSTATE_WM_DEPTH_STENCIL;
+      else
+         delta->dirty |= ILO_STATE_CC_DEPTH_STENCIL_STATE;
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      if (cc->blend[0] != old->blend[0])
+         delta->dirty |= ILO_STATE_CC_3DSTATE_PS_BLEND;
+
+      if (memcmp(&cc->blend[1], &old->blend[1],
+               sizeof(uint32_t) * (1 + 2 * cc->blend_state_count)))
+         delta->dirty |= ILO_STATE_CC_BLEND_STATE;
+   } else if (memcmp(cc->blend, old->blend,
+            sizeof(uint32_t) * 2 * cc->blend_state_count)) {
+      delta->dirty |= ILO_STATE_CC_BLEND_STATE;
+   }
+
+   if (memcmp(cc->cc, old->cc, sizeof(cc->cc)))
+      delta->dirty |= ILO_STATE_CC_COLOR_CALC_STATE;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_cc.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_cc.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_cc.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_cc.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,199 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_CC_H
+#define ILO_STATE_CC_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 38:
+ *
+ *     "Render Target Index. Specifies the render target index that will be
+ *      used to select blend state from BLEND_STATE.
+ *      Format = U3"
+ */
+#define ILO_STATE_CC_BLEND_MAX_RT_COUNT 8
+
+enum ilo_state_cc_dirty_bits {
+   ILO_STATE_CC_3DSTATE_WM_DEPTH_STENCIL           = (1 << 0),
+   ILO_STATE_CC_3DSTATE_PS_BLEND                   = (1 << 1),
+   ILO_STATE_CC_DEPTH_STENCIL_STATE                = (1 << 2),
+   ILO_STATE_CC_BLEND_STATE                        = (1 << 3),
+   ILO_STATE_CC_COLOR_CALC_STATE                   = (1 << 4),
+};
+
+/**
+ * AlphaCoverage and AlphaTest.
+ */
+struct ilo_state_cc_alpha_info {
+   bool cv_sample_count_one;
+   bool cv_float_source0_alpha;
+
+   bool alpha_to_coverage;
+   bool alpha_to_one;
+
+   bool test_enable;
+   enum gen_compare_function test_func;
+};
+
+struct ilo_state_cc_stencil_op_info {
+   enum gen_compare_function test_func;
+   enum gen_stencil_op fail_op;
+   enum gen_stencil_op zfail_op;
+   enum gen_stencil_op zpass_op;
+};
+
+/**
+ * StencilTest.
+ */
+struct ilo_state_cc_stencil_info {
+   bool cv_has_buffer;
+
+   bool test_enable;
+   bool twosided_enable;
+
+   struct ilo_state_cc_stencil_op_info front;
+   struct ilo_state_cc_stencil_op_info back;
+};
+
+/**
+ * DepthTest.
+ */
+struct ilo_state_cc_depth_info {
+   bool cv_has_buffer;
+
+   bool test_enable;
+   /* independent from test_enable */
+   bool write_enable;
+
+   enum gen_compare_function test_func;
+};
+
+struct ilo_state_cc_blend_rt_info {
+   bool cv_has_buffer;
+   bool cv_is_unorm;
+   bool cv_is_integer;
+
+   uint8_t argb_write_disables;
+
+   bool logicop_enable;
+   enum gen_logic_op logicop_func;
+
+   bool blend_enable;
+   bool force_dst_alpha_one;
+   enum gen_blend_factor rgb_src;
+   enum gen_blend_factor rgb_dst;
+   enum gen_blend_function rgb_func;
+   enum gen_blend_factor a_src;
+   enum gen_blend_factor a_dst;
+   enum gen_blend_function a_func;
+};
+
+/**
+ * ColorBufferBlending, Dithering, and LogicOps.
+ */
+struct ilo_state_cc_blend_info {
+   const struct ilo_state_cc_blend_rt_info *rt;
+   uint8_t rt_count;
+
+   bool dither_enable;
+};
+
+struct ilo_state_cc_stencil_params_info {
+   uint8_t test_ref;
+   uint8_t test_mask;
+   uint8_t write_mask;
+};
+
+/**
+ * CC parameters.
+ */
+struct ilo_state_cc_params_info {
+   float alpha_ref;
+
+   struct ilo_state_cc_stencil_params_info stencil_front;
+   struct ilo_state_cc_stencil_params_info stencil_back;
+
+   float blend_rgba[4];
+};
+
+/**
+ * Pixel processing.
+ */
+struct ilo_state_cc_info {
+   struct ilo_state_cc_alpha_info alpha;
+   struct ilo_state_cc_stencil_info stencil;
+   struct ilo_state_cc_depth_info depth;
+   struct ilo_state_cc_blend_info blend;
+
+   struct ilo_state_cc_params_info params;
+};
+
+struct ilo_state_cc {
+   uint32_t ds[3];
+
+   uint8_t blend_state_count;
+   uint32_t blend[1 + 1 + 2 * ILO_STATE_CC_BLEND_MAX_RT_COUNT];
+
+   uint32_t cc[6];
+};
+
+struct ilo_state_cc_delta {
+   uint32_t dirty;
+};
+
+bool
+ilo_state_cc_init(struct ilo_state_cc *cc,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_cc_info *info);
+
+bool
+ilo_state_cc_set_info(struct ilo_state_cc *cc,
+                      const struct ilo_dev *dev,
+                      const struct ilo_state_cc_info *info);
+
+bool
+ilo_state_cc_set_params(struct ilo_state_cc *cc,
+                        const struct ilo_dev *dev,
+                        const struct ilo_state_cc_params_info *params);
+
+void
+ilo_state_cc_full_delta(const struct ilo_state_cc *cc,
+                        const struct ilo_dev *dev,
+                        struct ilo_state_cc_delta *delta);
+
+void
+ilo_state_cc_get_delta(const struct ilo_state_cc *cc,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_cc *old,
+                       struct ilo_state_cc_delta *delta);
+
+#endif /* ILO_STATE_CC_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_compute.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_compute.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_compute.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_compute.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,435 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_compute.h"
+
+struct compute_urb_configuration {
+   int idrt_entry_count;
+   int curbe_entry_count;
+
+   int urb_entry_count;
+   /* in 256-bit register increments */
+   int urb_entry_size;
+};
+
+static int
+get_gen6_rob_entry_count(const struct ilo_dev *dev)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 60:
+    *
+    *     "ROB has 64KB of storage; 2048 entries."
+    *
+    * From the valid ranges of "CURBE Allocation Size", we can also conclude
+    * that interface entries and CURBE data must be in ROB.  And that ROB
+    * should be 16KB, or 512 entries, on Gen7 GT1.
+    */
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+      return 2048;
+   else if (ilo_dev_gen(dev) >= ILO_GEN(7))
+      return (dev->gt == 2) ? 2048 : 512;
+   else
+      return (dev->gt == 2) ? 2048 : 1024;
+}
+
+static int
+get_gen6_idrt_entry_count(const struct ilo_dev *dev)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 21:
+    *
+    *     "The first 32 URB entries are reserved for the interface
+    *      descriptor..."
+    *
+    * From the Haswell PRM, volume 7, page 836:
+    *
+    *     "The first 64 URB entries are reserved for the interface
+    *      description..."
+    */
+   return (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ? 64 : 32;
+}
+
+static int
+get_gen6_curbe_entry_count(const struct ilo_dev *dev, uint32_t curbe_size)
+{
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 21:
+    *
+    *     "(CURBE Allocation Size) Specifies the total length allocated for
+    *      CURBE, in 256-bit register increments.
+    */
+   const int entry_count = (curbe_size + 31) / 32;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(get_gen6_idrt_entry_count(dev) + entry_count <=
+         get_gen6_rob_entry_count(dev));
+
+   return entry_count;
+}
+
+static bool
+compute_get_gen6_urb_configuration(const struct ilo_dev *dev,
+                                   const struct ilo_state_compute_info *info,
+                                   struct compute_urb_configuration *urb)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   urb->idrt_entry_count = get_gen6_idrt_entry_count(dev);
+   urb->curbe_entry_count =
+      get_gen6_curbe_entry_count(dev, info->curbe_alloc_size);
+
+   /*
+    * From the Broadwell PRM, volume 2b, page 451:
+    *
+    *     "Please note that 0 is not allowed for this field (Number of URB
+    *      Entries)."
+    */
+   urb->urb_entry_count = (ilo_dev_gen(dev) >= ILO_GEN(8)) ? 1 : 0;
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 52:
+    *
+    *     "(URB Entry Allocation Size) Specifies the length of each URB entry
+    *      used by the unit, in 256-bit register increments - 1."
+    */
+   urb->urb_entry_size = 1;
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 22:
+    *
+    *      MEDIA_VFE_STATE specifies the amount of CURBE space, the URB handle
+    *      size and the number of URB handles. The driver must ensure that
+    *      ((URB_handle_size * URB_num_handle) - CURBE - 32) <=
+    *      URB_allocation_in_L3."
+    */
+   assert(urb->idrt_entry_count + urb->curbe_entry_count +
+         urb->urb_entry_count * urb->urb_entry_size <=
+         info->cv_urb_alloc_size / 32);
+
+   return true;
+}
+
+static int
+compute_interface_get_gen6_read_end(const struct ilo_dev *dev,
+                                    const struct ilo_state_compute_interface_info *interface)
+{
+   const int per_thread_read = (interface->curbe_read_length + 31) / 32;
+   const int cross_thread_read =
+      (interface->cross_thread_curbe_read_length + 31) / 32;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(interface->curbe_read_offset % 32 == 0);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 60:
+    *
+    *     "(Constant URB Entry Read Length) [0,63]"
+    */
+   assert(per_thread_read <= 63);
+
+   /* From the Haswell PRM, volume 2d, page 199:
+    *
+    *     "(Cross-Thread Constant Data Read Length) [0,127]"
+    */
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+      assert(cross_thread_read <= 127);
+   else
+      assert(!cross_thread_read);
+
+   if (per_thread_read || cross_thread_read) {
+      return interface->curbe_read_offset / 32 + cross_thread_read +
+         per_thread_read * interface->thread_group_size;
+   } else {
+      return 0;
+   }
+}
+
+static bool
+compute_validate_gen6(const struct ilo_dev *dev,
+                      const struct ilo_state_compute_info *info,
+                      const struct compute_urb_configuration *urb)
+{
+   int min_curbe_entry_count;
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(info->interface_count <= urb->idrt_entry_count);
+
+   min_curbe_entry_count = 0;
+   for (i = 0; i < info->interface_count; i++) {
+      const int read_end =
+         compute_interface_get_gen6_read_end(dev, &info->interfaces[i]);
+
+      if (min_curbe_entry_count < read_end)
+         min_curbe_entry_count = read_end;
+   }
+
+   assert(min_curbe_entry_count <= urb->curbe_entry_count);
+
+   /*
+    * From the Broadwell PRM, volume 2b, page 452:
+    *
+    *     "CURBE Allocation Size should be 0 for GPGPU workloads that uses
+    *      indirect instead of CURBE."
+    */
+   if (!min_curbe_entry_count)
+      assert(!urb->curbe_entry_count);
+
+   return true;
+}
+
+static uint8_t
+compute_get_gen6_scratch_space(const struct ilo_dev *dev,
+                               const struct ilo_state_compute_info *info)
+{
+   uint32_t scratch_size = 0;
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   for (i = 0; i < info->interface_count; i++) {
+      if (scratch_size < info->interfaces[i].scratch_size)
+         scratch_size = info->interfaces[i].scratch_size;
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      assert(scratch_size <= 2 * 1024 * 1024);
+
+      /* next power of two, starting from 1KB */
+      return (scratch_size > 1024) ?
+         (util_last_bit(scratch_size - 1) - 10): 0;
+   } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+      assert(scratch_size <= 2 * 1024 * 1024);
+
+      /* next power of two, starting from 2KB */
+      return (scratch_size > 2048) ?
+         (util_last_bit(scratch_size - 1) - 11): 0;
+   } else {
+      assert(scratch_size <= 12 * 1024);
+
+      return (scratch_size > 1024) ?
+         (scratch_size - 1) / 1024 : 0;
+   }
+}
+
+static bool
+compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
+                                 const struct ilo_dev *dev,
+                                 const struct ilo_state_compute_info *info)
+{
+   struct compute_urb_configuration urb;
+   uint8_t scratch_space;
+
+   uint32_t dw1, dw2, dw4;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!compute_get_gen6_urb_configuration(dev, info, &urb) ||
+       !compute_validate_gen6(dev, info, &urb))
+      return false;
+
+   scratch_space = compute_get_gen6_scratch_space(dev, info);
+
+   dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
+   dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
+         urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
+         GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
+         GEN6_VFE_DW2_BYPASS_GATEWAY_CONTROL;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5))
+      dw2 |= GEN7_VFE_DW2_GPGPU_MODE;
+
+   assert(urb.urb_entry_size);
+
+   dw4 = (urb.urb_entry_size - 1) << GEN6_VFE_DW4_URB_ENTRY_SIZE__SHIFT |
+         urb.curbe_entry_count << GEN6_VFE_DW4_CURBE_SIZE__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(compute->vfe) >= 3);
+   compute->vfe[0] = dw1;
+   compute->vfe[1] = dw2;
+   compute->vfe[2] = dw4;
+
+   return true;
+}
+
+static uint8_t
+compute_interface_get_gen6_sampler_count(const struct ilo_dev *dev,
+                                         const struct ilo_state_compute_interface_info *interface)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+   return (interface->sampler_count <= 12) ?
+      (interface->sampler_count + 3) / 4 : 4;
+}
+
+static uint8_t
+compute_interface_get_gen6_surface_count(const struct ilo_dev *dev,
+                                         const struct ilo_state_compute_interface_info *interface)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+   return (interface->surface_count <= 31) ? interface->surface_count : 31;
+}
+
+static uint8_t
+compute_interface_get_gen7_slm_size(const struct ilo_dev *dev,
+                                    const struct ilo_state_compute_interface_info *interface)
+{
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 2, page 61:
+    *
+    *     "The amount is specified in 4k blocks, but only powers of 2 are
+    *      allowed: 0, 4k, 8k, 16k, 32k and 64k per half-slice."
+    */
+   assert(interface->slm_size <= 64 * 1024);
+
+   return util_next_power_of_two((interface->slm_size + 4095) / 4096);
+}
+
+static bool
+compute_set_gen6_INTERFACE_DESCRIPTOR_DATA(struct ilo_state_compute *compute,
+                                           const struct ilo_dev *dev,
+                                           const struct ilo_state_compute_info *info)
+{
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   for (i = 0; i < info->interface_count; i++) {
+      const struct ilo_state_compute_interface_info *interface =
+         &info->interfaces[i];
+      uint16_t read_offset, per_thread_read_len, cross_thread_read_len;
+      uint8_t sampler_count, surface_count;
+      uint32_t dw0, dw2, dw3, dw4, dw5, dw6;
+
+      assert(interface->kernel_offset % 64 == 0);
+      assert(interface->thread_group_size);
+
+      read_offset = interface->curbe_read_offset / 32;
+      per_thread_read_len = (interface->curbe_read_length + 31) / 32;
+      cross_thread_read_len =
+         (interface->cross_thread_curbe_read_length + 31) / 32;
+
+      sampler_count =
+         compute_interface_get_gen6_sampler_count(dev, interface);
+      surface_count =
+         compute_interface_get_gen6_surface_count(dev, interface);
+
+      dw0 = interface->kernel_offset;
+      dw2 = sampler_count << GEN6_IDRT_DW2_SAMPLER_COUNT__SHIFT;
+      dw3 = surface_count << GEN6_IDRT_DW3_BINDING_TABLE_SIZE__SHIFT;
+      dw4 = per_thread_read_len << GEN6_IDRT_DW4_CURBE_READ_LEN__SHIFT |
+            read_offset << GEN6_IDRT_DW4_CURBE_READ_OFFSET__SHIFT;
+
+      dw5 = 0;
+      dw6 = 0;
+      if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+         const uint8_t slm_size =
+            compute_interface_get_gen7_slm_size(dev, interface);
+
+         dw5 |= GEN7_IDRT_DW5_ROUNDING_MODE_RTNE;
+
+         if (slm_size) {
+            dw5 |= GEN7_IDRT_DW5_BARRIER_ENABLE |
+                   slm_size << GEN7_IDRT_DW5_SLM_SIZE__SHIFT;
+         }
+
+         /*
+          * From the Haswell PRM, volume 2d, page 199:
+          *
+          *     "(Number of Threads in GPGPU Thread Group) Specifies the
+          *      number of threads that are in this thread group.  Used to
+          *      program the barrier for the number of messages to expect. The
+          *      minimum value is 0 (which will disable the barrier), while
+          *      the maximum value is the number of threads in a subslice for
+          *      local barriers."
+          *
+          * From the Broadwell PRM, volume 2d, page 183:
+          *
+          *     "(Number of Threads in GPGPU Thread Group) Specifies the
+          *      number of threads that are in this thread group.  The minimum
+          *      value is 1, while the maximum value is the number of threads
+          *      in a subslice for local barriers. See vol1b Configurations
+          *      for the number of threads per subslice for different
+          *      products.  The maximum value for global barriers is limited
+          *      by the number of threads in the system, or by 511, whichever
+          *      is lower. This field should not be set to 0 even if the
+          *      barrier is disabled, since an accurate value is needed for
+          *      proper pre-emption."
+          */
+         if (slm_size || ilo_dev_gen(dev) >= ILO_GEN(8)) {
+            dw5 |= interface->thread_group_size <<
+               GEN7_IDRT_DW5_THREAD_GROUP_SIZE__SHIFT;
+         }
+
+         if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+            dw6 |= cross_thread_read_len <<
+               GEN75_IDRT_DW6_CROSS_THREAD_CURBE_READ_LEN__SHIFT;
+         }
+      }
+
+      STATIC_ASSERT(ARRAY_SIZE(compute->idrt[i]) >= 6);
+      compute->idrt[i][0] = dw0;
+      compute->idrt[i][1] = dw2;
+      compute->idrt[i][2] = dw3;
+      compute->idrt[i][3] = dw4;
+      compute->idrt[i][4] = dw5;
+      compute->idrt[i][5] = dw6;
+   }
+
+   return true;
+}
+
+bool
+ilo_state_compute_init(struct ilo_state_compute *compute,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_compute_info *info)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(compute, sizeof(*compute)));
+   assert(ilo_is_zeroed(info->data, info->data_size));
+
+   assert(ilo_state_compute_data_size(dev, info->interface_count) <=
+         info->data_size);
+   compute->idrt = (uint32_t (*)[6]) info->data;
+
+   ret &= compute_set_gen6_MEDIA_VFE_STATE(compute, dev, info);
+   ret &= compute_set_gen6_INTERFACE_DESCRIPTOR_DATA(compute, dev, info);
+
+   assert(ret);
+
+   return ret;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_compute.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_compute.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_compute.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_compute.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,92 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_COMPUTE_H
+#define ILO_STATE_COMPUTE_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Haswell PRM, volume 7, page 836:
+ *
+ *     "The first 64 URB entries are reserved for the interface
+ *      description..."
+ */
+#define ILO_STATE_COMPUTE_MAX_INTERFACE_COUNT 64
+
+struct ilo_state_compute_interface_info {
+   /* usually 0 unless there are multiple interfaces */
+   uint32_t kernel_offset;
+
+   uint32_t scratch_size;
+
+   uint8_t sampler_count;
+   uint8_t surface_count;
+
+   uint16_t thread_group_size;
+   uint32_t slm_size;
+
+   uint16_t curbe_read_offset;
+   uint16_t curbe_read_length;
+   uint16_t cross_thread_curbe_read_length;
+};
+
+struct ilo_state_compute_info {
+   void *data;
+   size_t data_size;
+
+   const struct ilo_state_compute_interface_info *interfaces;
+   uint8_t interface_count;
+
+   uint32_t cv_urb_alloc_size;
+   uint32_t curbe_alloc_size;
+};
+
+struct ilo_state_compute {
+   uint32_t vfe[3];
+
+   uint32_t (*idrt)[6];
+   uint8_t idrt_count;
+};
+
+static inline size_t
+ilo_state_compute_data_size(const struct ilo_dev *dev,
+                            uint8_t interface_count)
+{
+   const struct ilo_state_compute *compute = NULL;
+   return sizeof(compute->idrt[0]) * interface_count;
+}
+
+bool
+ilo_state_compute_init(struct ilo_state_compute *compute,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_compute_info *info);
+
+#endif /* ILO_STATE_COMPUTE_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_raster.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_raster.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_raster.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_raster.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,1252 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_raster.h"
+
+static bool
+raster_validate_gen6_clip(const struct ilo_dev *dev,
+                          const struct ilo_state_raster_info *info)
+{
+   const struct ilo_state_raster_clip_info *clip = &info->clip;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(clip->viewport_count);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 188:
+    *
+    *     ""Clip Distance Cull Test Enable Bitmask" and "Clip Distance Clip
+    *      Test Enable Bitmask" should not have overlapping bits in the mask,
+    *      else the results are undefined."
+    */
+   assert(!(clip->user_cull_enables & clip->user_clip_enables));
+
+   if (ilo_dev_gen(dev) < ILO_GEN(9))
+      assert(clip->z_near_enable == clip->z_far_enable);
+
+   return true;
+}
+
+static bool
+raster_set_gen6_3DSTATE_CLIP(struct ilo_state_raster *rs,
+                             const struct ilo_dev *dev,
+                             const struct ilo_state_raster_info *info)
+{
+   const struct ilo_state_raster_clip_info *clip = &info->clip;
+   const struct ilo_state_raster_setup_info *setup = &info->setup;
+   const struct ilo_state_raster_tri_info *tri = &info->tri;
+   const struct ilo_state_raster_scan_info *scan = &info->scan;
+   uint32_t dw1, dw2, dw3;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!raster_validate_gen6_clip(dev, info))
+      return false;
+
+   dw1 = clip->user_cull_enables << GEN6_CLIP_DW1_UCP_CULL_ENABLES__SHIFT;
+
+   if (clip->stats_enable)
+      dw1 |= GEN6_CLIP_DW1_STATISTICS;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      /*
+       * From the Ivy Bridge PRM, volume 2 part 1, page 219:
+       *
+       *     "Workaround : Due to Hardware issue "EarlyCull" needs to be
+       *      enabled only for the cases where the incoming primitive topology
+       *      into the clipper guaranteed to be Trilist."
+       *
+       * What does this mean?
+       */
+      dw1 |= GEN7_CLIP_DW1_SUBPIXEL_8BITS |
+             GEN7_CLIP_DW1_EARLY_CULL_ENABLE;
+
+      if (ilo_dev_gen(dev) <= ILO_GEN(7.5)) {
+         dw1 |= tri->front_winding << GEN7_CLIP_DW1_FRONT_WINDING__SHIFT |
+                tri->cull_mode << GEN7_CLIP_DW1_CULL_MODE__SHIFT;
+      }
+   }
+
+   dw2 = clip->user_clip_enables << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT |
+         GEN6_CLIPMODE_NORMAL << GEN6_CLIP_DW2_CLIP_MODE__SHIFT;
+
+   if (clip->clip_enable)
+      dw2 |= GEN6_CLIP_DW2_CLIP_ENABLE;
+
+   if (clip->z_near_zero)
+      dw2 |= GEN6_CLIP_DW2_APIMODE_D3D;
+   else
+      dw2 |= GEN6_CLIP_DW2_APIMODE_OGL;
+
+   if (clip->xy_test_enable)
+      dw2 |= GEN6_CLIP_DW2_XY_TEST_ENABLE;
+
+   if (ilo_dev_gen(dev) < ILO_GEN(8) && clip->z_near_enable)
+      dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE;
+
+   if (clip->gb_test_enable)
+      dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE;
+
+   if (scan->barycentric_interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL |
+                                    GEN6_INTERP_NONPERSPECTIVE_CENTROID |
+                                    GEN6_INTERP_NONPERSPECTIVE_SAMPLE))
+      dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE;
+
+   if (setup->first_vertex_provoking) {
+      dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
+             0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
+             1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
+   } else {
+      dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
+             1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
+             2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
+   }
+
+   dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT |
+         0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT |
+         (clip->viewport_count - 1) << GEN6_CLIP_DW3_MAX_VPINDEX__SHIFT;
+
+   if (clip->force_rtaindex_zero)
+      dw3 |= GEN6_CLIP_DW3_FORCE_RTAINDEX_ZERO;
+
+   STATIC_ASSERT(ARRAY_SIZE(rs->clip) >= 3);
+   rs->clip[0] = dw1;
+   rs->clip[1] = dw2;
+   rs->clip[2] = dw3;
+
+   return true;
+}
+
+static bool
+raster_params_is_gen6_line_aa_allowed(const struct ilo_dev *dev,
+                                      const struct ilo_state_raster_params_info *params)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 251:
+    *
+    *     "This field (Anti-aliasing Enable) must be disabled if any of the
+    *      render targets have integer (UINT or SINT) surface format."
+    */
+   if (params->any_integer_rt)
+      return false;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 321:
+    *
+    *     "[DevSNB+]: This field (Hierarchical Depth Buffer Enable) must be
+    *      disabled if Anti-aliasing Enable in 3DSTATE_SF is enabled.
+    */
+   if (ilo_dev_gen(dev) == ILO_GEN(6) && params->hiz_enable)
+      return false;
+
+   return true;
+}
+
+static void
+raster_get_gen6_effective_line(const struct ilo_dev *dev,
+                               const struct ilo_state_raster_info *info,
+                               struct ilo_state_raster_line_info *line)
+{
+   const struct ilo_state_raster_setup_info *setup = &info->setup;
+   const struct ilo_state_raster_params_info *params = &info->params;
+
+   *line = info->line;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 251:
+    *
+    *     "This field (Anti-aliasing Enable) is ignored when Multisample
+    *      Rasterization Mode is MSRASTMODE_ON_xx."
+    *
+    * From the Sandy Bridge PRM, volume 2 part 1, page 251:
+    *
+    *     "Setting a Line Width of 0.0 specifies the rasterization of the
+    *      "thinnest" (one-pixel-wide), non-antialiased lines. Note that
+    *      this effectively overrides the effect of AAEnable (though the
+    *      AAEnable state variable is not modified). Lines rendered with
+    *      zero Line Width are rasterized using GIQ (Grid Intersection
+    *      Quantization) rules as specified by the GDI and Direct3D APIs."
+    *
+    *     "Software must not program a value of 0.0 when running in
+    *      MSRASTMODE_ON_xxx modes - zero-width lines are not available
+    *      when multisampling rasterization is enabled."
+    *
+    * From the Sandy Bridge PRM, volume 2 part 1, page 294:
+    *
+    *     "Line stipple, controlled via the Line Stipple Enable state variable
+    *      in WM_STATE, discards certain pixels that are produced by non-AA
+    *      line rasterization."
+    */
+   if (setup->line_msaa_enable ||
+       !raster_params_is_gen6_line_aa_allowed(dev, params))
+      line->aa_enable = false;
+   if (setup->line_msaa_enable || line->aa_enable) {
+      line->stipple_enable = false;
+      line->giq_enable = false;
+      line->giq_last_pixel = false;
+   }
+}
+
+static bool
+raster_validate_gen8_raster(const struct ilo_dev *dev,
+                            const struct ilo_state_raster_info *info)
+{
+   const struct ilo_state_raster_setup_info *setup = &info->setup;
+   const struct ilo_state_raster_tri_info *tri = &info->tri;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 249:
+    *
+    *     "This setting (SOLID) is required when rendering rectangle
+    *      (RECTLIST) objects.
+    */
+   if (tri->fill_mode_front != GEN6_FILLMODE_SOLID ||
+       tri->fill_mode_back != GEN6_FILLMODE_SOLID)
+      assert(!setup->cv_is_rectangle);
+
+   return true;
+}
+
+static enum gen_msrast_mode
+raster_setup_get_gen6_msrast_mode(const struct ilo_dev *dev,
+                                  const struct ilo_state_raster_setup_info *setup)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (setup->line_msaa_enable) {
+      return (setup->msaa_enable) ? GEN6_MSRASTMODE_ON_PATTERN :
+                                    GEN6_MSRASTMODE_ON_PIXEL;
+   } else {
+      return (setup->msaa_enable) ? GEN6_MSRASTMODE_OFF_PATTERN :
+                                    GEN6_MSRASTMODE_OFF_PIXEL;
+   }
+}
+
+static int
+get_gen6_line_width(const struct ilo_dev *dev, float fwidth,
+                    bool line_aa_enable, bool line_giq_enable)
+{
+   int line_width;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* in U3.7 */
+   line_width = (int) (fwidth * 128.0f + 0.5f);
+
+   /*
+    * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
+    * pixels in the minor direction.  We have to make the lines slightly
+    * thicker, 0.5 pixel on both sides, so that they intersect that many
+    * pixels.
+    */
+   if (line_aa_enable)
+      line_width += 128;
+
+   line_width = CLAMP(line_width, 1, 1023);
+
+   if (line_giq_enable && line_width == 128)
+      line_width = 0;
+
+   return line_width;
+}
+
+static int
+get_gen6_point_width(const struct ilo_dev *dev, float fwidth)
+{
+   int point_width;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* in U8.3 */
+   point_width = (int) (fwidth * 8.0f + 0.5f);
+   point_width = CLAMP(point_width, 1, 2047);
+
+   return point_width;
+}
+
+static bool
+raster_set_gen7_3DSTATE_SF(struct ilo_state_raster *rs,
+                           const struct ilo_dev *dev,
+                           const struct ilo_state_raster_info *info,
+                           const struct ilo_state_raster_line_info *line)
+{
+   const struct ilo_state_raster_clip_info *clip = &info->clip;
+   const struct ilo_state_raster_setup_info *setup = &info->setup;
+   const struct ilo_state_raster_point_info *point = &info->point;
+   const struct ilo_state_raster_tri_info *tri = &info->tri;
+   const struct ilo_state_raster_params_info *params = &info->params;
+   const enum gen_msrast_mode msrast =
+      raster_setup_get_gen6_msrast_mode(dev, setup);
+   const int line_width = get_gen6_line_width(dev, params->line_width,
+         line->aa_enable, line->giq_enable);
+   const int point_width = get_gen6_point_width(dev, params->point_width);
+   uint32_t dw1, dw2, dw3;
+
+   ILO_DEV_ASSERT(dev, 6, 7.5);
+
+   if (!raster_validate_gen8_raster(dev, info))
+      return false;
+
+   dw1 = tri->fill_mode_front << GEN7_SF_DW1_FILL_MODE_FRONT__SHIFT |
+         tri->fill_mode_back << GEN7_SF_DW1_FILL_MODE_BACK__SHIFT |
+         tri->front_winding << GEN7_SF_DW1_FRONT_WINDING__SHIFT;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5)) {
+      enum gen_depth_format format;
+
+      /* do it here as we want 0x0 to be valid */
+      switch (tri->depth_offset_format) {
+      case GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT:
+         format = GEN6_ZFORMAT_D32_FLOAT;
+         break;
+      case GEN6_ZFORMAT_D24_UNORM_S8_UINT:
+         format = GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+         break;
+      default:
+         format = tri->depth_offset_format;
+         break;
+      }
+
+      dw1 |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT;
+   }
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+    *
+    *     "This bit (Statistics Enable) should be set whenever clipping is
+    *      enabled and the Statistics Enable bit is set in CLIP_STATE. It
+    *      should be cleared if clipping is disabled or Statistics Enable in
+    *      CLIP_STATE is clear."
+    */
+   if (clip->stats_enable && clip->clip_enable)
+      dw1 |= GEN7_SF_DW1_STATISTICS;
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 258:
+    *
+    *     "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
+    *      Enable Solid , Global Depth Offset Enable Wireframe, and Global
+    *      Depth Offset Enable Point) should be set whenever non zero depth
+    *      bias (Slope, Bias) values are used. Setting this bit may have some
+    *      degradation of performance for some workloads."
+    *
+    * But it seems fine to ignore that.
+    */
+   if (tri->depth_offset_solid)
+      dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID;
+   if (tri->depth_offset_wireframe)
+      dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME;
+   if (tri->depth_offset_point)
+      dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT;
+
+   if (setup->viewport_transform)
+      dw1 |= GEN7_SF_DW1_VIEWPORT_TRANSFORM;
+
+   dw2 = tri->cull_mode << GEN7_SF_DW2_CULL_MODE__SHIFT |
+         line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT |
+         GEN7_SF_DW2_AA_LINE_CAP_1_0 |
+         msrast << GEN7_SF_DW2_MSRASTMODE__SHIFT;
+
+   if (line->aa_enable)
+      dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE;
+
+   if (ilo_dev_gen(dev) == ILO_GEN(7.5) && line->stipple_enable)
+      dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE;
+
+   if (setup->scissor_enable)
+      dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE;
+
+   dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
+         GEN7_SF_DW3_SUBPIXEL_8BITS;
+
+   /* this has no effect when line_width != 0 */
+   if (line->giq_last_pixel)
+      dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
+
+   if (setup->first_vertex_provoking) {
+      dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+             0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+             1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+   } else {
+      dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+             1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+             2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+   }
+
+   /* setup->point_aa_enable is ignored */
+   if (!point->programmable_width) {
+      dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH |
+             point_width << GEN7_SF_DW3_POINT_WIDTH__SHIFT;
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(rs->sf) >= 3);
+   rs->sf[0] = dw1;
+   rs->sf[1] = dw2;
+   rs->sf[2] = dw3;
+
+   STATIC_ASSERT(ARRAY_SIZE(rs->raster) >= 4);
+   rs->raster[0] = 0;
+   rs->raster[1] = fui(params->depth_offset_const);
+   rs->raster[2] = fui(params->depth_offset_scale);
+   rs->raster[3] = fui(params->depth_offset_clamp);
+
+   rs->line_aa_enable = line->aa_enable;
+   rs->line_giq_enable = line->giq_enable;
+
+   return true;
+}
+
+static bool
+raster_set_gen8_3DSTATE_SF(struct ilo_state_raster *rs,
+                           const struct ilo_dev *dev,
+                           const struct ilo_state_raster_info *info,
+                           const struct ilo_state_raster_line_info *line)
+{
+   const struct ilo_state_raster_clip_info *clip = &info->clip;
+   const struct ilo_state_raster_setup_info *setup = &info->setup;
+   const struct ilo_state_raster_point_info *point = &info->point;
+   const struct ilo_state_raster_params_info *params = &info->params;
+   const int line_width = get_gen6_line_width(dev, params->line_width,
+         line->aa_enable, line->giq_enable);
+   const int point_width = get_gen6_point_width(dev, params->point_width);
+   uint32_t dw1, dw2, dw3;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   dw1 = 0;
+
+   if (clip->stats_enable && clip->clip_enable)
+      dw1 |= GEN7_SF_DW1_STATISTICS;
+
+   if (setup->viewport_transform)
+      dw1 |= GEN7_SF_DW1_VIEWPORT_TRANSFORM;
+
+   dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT |
+         GEN7_SF_DW2_AA_LINE_CAP_1_0;
+
+   dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
+         GEN7_SF_DW3_SUBPIXEL_8BITS;
+
+   /* this has no effect when line_width != 0 */
+   if (line->giq_last_pixel)
+      dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
+
+   if (setup->first_vertex_provoking) {
+      dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+             0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+             1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+   } else {
+      dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+             1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+             2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+   }
+
+   if (!point->programmable_width) {
+      dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH |
+             point_width << GEN7_SF_DW3_POINT_WIDTH__SHIFT;
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(rs->sf) >= 3);
+   rs->sf[0] = dw1;
+   rs->sf[1] = dw2;
+   rs->sf[2] = dw3;
+
+   return true;
+}
+
+static bool
+raster_set_gen8_3DSTATE_RASTER(struct ilo_state_raster *rs,
+                               const struct ilo_dev *dev,
+                               const struct ilo_state_raster_info *info,
+                               const struct ilo_state_raster_line_info *line)
+{
+   const struct ilo_state_raster_clip_info *clip = &info->clip;
+   const struct ilo_state_raster_setup_info *setup = &info->setup;
+   const struct ilo_state_raster_point_info *point = &info->point;
+   const struct ilo_state_raster_tri_info *tri = &info->tri;
+   const struct ilo_state_raster_params_info *params = &info->params;
+   uint32_t dw1;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   if (!raster_validate_gen8_raster(dev, info))
+      return false;
+
+   dw1 = tri->front_winding << GEN8_RASTER_DW1_FRONT_WINDING__SHIFT |
+         tri->cull_mode << GEN8_RASTER_DW1_CULL_MODE__SHIFT |
+         tri->fill_mode_front << GEN8_RASTER_DW1_FILL_MODE_FRONT__SHIFT |
+         tri->fill_mode_back << GEN8_RASTER_DW1_FILL_MODE_BACK__SHIFT;
+
+   if (point->aa_enable)
+      dw1 |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE;
+
+   /* where should line_msaa_enable be set? */
+   if (setup->msaa_enable)
+      dw1 |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE;
+
+   if (tri->depth_offset_solid)
+      dw1 |= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID;
+   if (tri->depth_offset_wireframe)
+      dw1 |= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME;
+   if (tri->depth_offset_point)
+      dw1 |= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT;
+
+   if (line->aa_enable)
+      dw1 |= GEN8_RASTER_DW1_AA_LINE_ENABLE;
+
+   if (setup->scissor_enable)
+      dw1 |= GEN8_RASTER_DW1_SCISSOR_ENABLE;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(9)) {
+      if (clip->z_far_enable)
+         dw1 |= GEN9_RASTER_DW1_Z_TEST_FAR_ENABLE;
+      if (clip->z_near_enable)
+         dw1 |= GEN9_RASTER_DW1_Z_TEST_NEAR_ENABLE;
+   } else {
+      if (clip->z_near_enable)
+         dw1 |= GEN8_RASTER_DW1_Z_TEST_ENABLE;
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(rs->raster) >= 4);
+   rs->raster[0] = dw1;
+   rs->raster[1] = fui(params->depth_offset_const);
+   rs->raster[2] = fui(params->depth_offset_scale);
+   rs->raster[3] = fui(params->depth_offset_clamp);
+
+   rs->line_aa_enable = line->aa_enable;
+   rs->line_giq_enable = line->giq_enable;
+
+   return true;
+}
+
+static enum gen_sample_count
+get_gen6_sample_count(const struct ilo_dev *dev, uint8_t sample_count)
+{
+   enum gen_sample_count c;
+   int min_gen;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   switch (sample_count) {
+   case 1:
+      c = GEN6_NUMSAMPLES_1;
+      min_gen = ILO_GEN(6);
+      break;
+   case 2:
+      c = GEN8_NUMSAMPLES_2;
+      min_gen = ILO_GEN(8);
+      break;
+   case 4:
+      c = GEN6_NUMSAMPLES_4;
+      min_gen = ILO_GEN(6);
+      break;
+   case 8:
+      c = GEN7_NUMSAMPLES_8;
+      min_gen = ILO_GEN(7);
+      break;
+   case 16:
+      c = GEN8_NUMSAMPLES_16;
+      min_gen = ILO_GEN(8);
+      break;
+   default:
+      assert(!"unexpected sample count");
+      c = GEN6_NUMSAMPLES_1;
+      break;
+   }
+
+   assert(ilo_dev_gen(dev) >= min_gen);
+
+   return c;
+}
+
+static bool
+raster_set_gen8_3DSTATE_MULTISAMPLE(struct ilo_state_raster *rs,
+                                    const struct ilo_dev *dev,
+                                    const struct ilo_state_raster_info *info)
+{
+   const struct ilo_state_raster_setup_info *setup = &info->setup;
+   const struct ilo_state_raster_scan_info *scan = &info->scan;
+   const enum gen_sample_count count =
+      get_gen6_sample_count(dev, scan->sample_count);
+   uint32_t dw1;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 307:
+    *
+    *     "Setting Multisample Rasterization Mode to MSRASTMODE_xxx_PATTERN
+    *      when Number of Multisamples == NUMSAMPLES_1 is UNDEFINED."
+    */
+   if (setup->msaa_enable)
+      assert(scan->sample_count > 1);
+
+   dw1 = scan->pixloc << GEN6_MULTISAMPLE_DW1_PIXEL_LOCATION__SHIFT |
+         count << GEN6_MULTISAMPLE_DW1_NUM_SAMPLES__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(rs->sample) >= 1);
+   rs->sample[0] = dw1;
+
+   return true;
+}
+
+static bool
+raster_set_gen6_3DSTATE_SAMPLE_MASK(struct ilo_state_raster *rs,
+                                    const struct ilo_dev *dev,
+                                    const struct ilo_state_raster_info *info)
+{
+   const struct ilo_state_raster_scan_info *scan = &info->scan;
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 294:
+    *
+    *     "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
+    *      (Sample Mask) must be zero.
+    *
+    *      If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
+    *      must be zero."
+    */
+   const uint32_t mask = (1 << scan->sample_count) - 1;
+   uint32_t dw1;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   dw1 = (scan->sample_mask & mask) << GEN6_SAMPLE_MASK_DW1_VAL__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(rs->sample) >= 2);
+   rs->sample[1] = dw1;
+
+   return true;
+}
+
+static bool
+raster_validate_gen6_wm(const struct ilo_dev *dev,
+                        const struct ilo_state_raster_info *info)
+{
+   const struct ilo_state_raster_scan_info *scan = &info->scan;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (ilo_dev_gen(dev) == ILO_GEN(6))
+      assert(scan->earlyz_control == GEN7_EDSC_NORMAL);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 272:
+    *
+    *     "This bit (Statistics Enable) must be disabled if either of these
+    *      bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
+    *      Enable or Depth Buffer Resolve Enable."
+    */
+   if (scan->earlyz_op != ILO_STATE_RASTER_EARLYZ_NORMAL)
+      assert(!scan->stats_enable);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 273:
+    *
+    *     "If this field (Depth Buffer Resolve Enable) is enabled, the Depth
+    *      Buffer Clear and Hierarchical Depth Buffer Resolve Enable fields
+    *      must both be disabled."
+    *
+    *     "If this field (Hierarchical Depth Buffer Resolve Enable) is
+    *      enabled, the Depth Buffer Clear and Depth Buffer Resolve Enable
+    *      fields must both be disabled."
+    *
+    * This is guaranteed.
+    */
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 314-315:
+    *
+    *     "Stencil buffer clear can be performed at the same time by enabling
+    *      Stencil Buffer Write Enable."
+    *
+    *     "Note also that stencil buffer clear can be performed without depth
+    *      buffer clear."
+    */
+   if (scan->earlyz_stencil_clear) {
+      assert(scan->earlyz_op == ILO_STATE_RASTER_EARLYZ_NORMAL ||
+             scan->earlyz_op == ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR);
+   }
+
+   return true;
+}
+
+static bool
+raster_set_gen6_3dstate_wm(struct ilo_state_raster *rs,
+                           const struct ilo_dev *dev,
+                           const struct ilo_state_raster_info *info,
+                           const struct ilo_state_raster_line_info *line)
+{
+   const struct ilo_state_raster_tri_info *tri = &info->tri;
+   const struct ilo_state_raster_setup_info *setup = &info->setup;
+   const struct ilo_state_raster_scan_info *scan = &info->scan;
+   const enum gen_msrast_mode msrast =
+      raster_setup_get_gen6_msrast_mode(dev, setup);
+   /* only scan conversion states are set, as in Gen8+ */
+   uint32_t dw4, dw5, dw6;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   if (!raster_validate_gen6_wm(dev, info))
+      return false;
+
+   dw4 = 0;
+
+   if (scan->stats_enable)
+      dw4 |= GEN6_WM_DW4_STATISTICS;
+
+   switch (scan->earlyz_op) {
+   case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR:
+      dw4 |= GEN6_WM_DW4_DEPTH_CLEAR;
+      break;
+   case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE:
+      dw4 |= GEN6_WM_DW4_DEPTH_RESOLVE;
+      break;
+   case ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE:
+      dw4 |= GEN6_WM_DW4_HIZ_RESOLVE;
+      break;
+   default:
+      if (scan->earlyz_stencil_clear)
+         dw4 |= GEN6_WM_DW4_DEPTH_CLEAR;
+      break;
+   }
+
+   dw5 = GEN6_WM_DW5_AA_LINE_CAP_1_0 | /* same as in 3DSTATE_SF */
+         GEN6_WM_DW5_AA_LINE_WIDTH_2_0;
+
+   if (tri->poly_stipple_enable)
+      dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE;
+   if (line->stipple_enable)
+      dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE;
+
+   dw6 = scan->zw_interp << GEN6_WM_DW6_ZW_INTERP__SHIFT |
+         scan->barycentric_interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT |
+         GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT |
+         msrast << GEN6_WM_DW6_MSRASTMODE__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(rs->wm) >= 3);
+   rs->wm[0] = dw4;
+   rs->wm[1] = dw5;
+   rs->wm[2] = dw6;
+
+   return true;
+}
+
+static bool
+raster_set_gen8_3DSTATE_WM(struct ilo_state_raster *rs,
+                           const struct ilo_dev *dev,
+                           const struct ilo_state_raster_info *info,
+                           const struct ilo_state_raster_line_info *line)
+{
+   const struct ilo_state_raster_tri_info *tri = &info->tri;
+   const struct ilo_state_raster_setup_info *setup = &info->setup;
+   const struct ilo_state_raster_scan_info *scan = &info->scan;
+   const enum gen_msrast_mode msrast =
+      raster_setup_get_gen6_msrast_mode(dev, setup);
+   uint32_t dw1;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   if (!raster_validate_gen6_wm(dev, info))
+      return false;
+
+   dw1 = scan->earlyz_control << GEN7_WM_DW1_EDSC__SHIFT |
+         scan->zw_interp << GEN7_WM_DW1_ZW_INTERP__SHIFT |
+         scan->barycentric_interps << GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT |
+         GEN7_WM_DW1_AA_LINE_CAP_1_0 | /* same as in 3DSTATE_SF */
+         GEN7_WM_DW1_AA_LINE_WIDTH_2_0 |
+         GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
+
+   if (scan->stats_enable)
+      dw1 |= GEN7_WM_DW1_STATISTICS;
+
+   if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+      switch (scan->earlyz_op) {
+      case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR:
+         dw1 |= GEN7_WM_DW1_DEPTH_CLEAR;
+         break;
+      case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE:
+         dw1 |= GEN7_WM_DW1_DEPTH_RESOLVE;
+         break;
+      case ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE:
+         dw1 |= GEN7_WM_DW1_HIZ_RESOLVE;
+         break;
+      default:
+         if (scan->earlyz_stencil_clear)
+            dw1 |= GEN7_WM_DW1_DEPTH_CLEAR;
+         break;
+      }
+   }
+
+   if (tri->poly_stipple_enable)
+      dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
+   if (line->stipple_enable)
+      dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
+
+   if (ilo_dev_gen(dev) < ILO_GEN(8))
+      dw1 |= msrast << GEN7_WM_DW1_MSRASTMODE__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(rs->wm) >= 1);
+   rs->wm[0] = dw1;
+
+   return true;
+}
+
+static bool
+raster_set_gen8_3dstate_wm_hz_op(struct ilo_state_raster *rs,
+                                 const struct ilo_dev *dev,
+                                 const struct ilo_state_raster_info *info)
+{
+   const struct ilo_state_raster_scan_info *scan = &info->scan;
+   const enum gen_sample_count count =
+      get_gen6_sample_count(dev, scan->sample_count);
+   const uint32_t mask = (1 << scan->sample_count) - 1;
+   uint32_t dw1, dw4;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   dw1 = count << GEN8_WM_HZ_DW1_NUM_SAMPLES__SHIFT;
+
+   if (scan->earlyz_stencil_clear)
+      dw1 |= GEN8_WM_HZ_DW1_STENCIL_CLEAR;
+
+   switch (scan->earlyz_op) {
+   case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR:
+      dw1 |= GEN8_WM_HZ_DW1_DEPTH_CLEAR;
+      break;
+   case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE:
+      dw1 |= GEN8_WM_HZ_DW1_DEPTH_RESOLVE;
+      break;
+   case ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE:
+      dw1 |= GEN8_WM_HZ_DW1_HIZ_RESOLVE;
+      break;
+   default:
+      break;
+   }
+
+   dw4 = (scan->sample_mask & mask) << GEN8_WM_HZ_DW4_SAMPLE_MASK__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(rs->wm) >= 3);
+   rs->wm[1] = dw1;
+   rs->wm[2] = dw4;
+
+   return true;
+}
+
+static bool
+sample_pattern_get_gen6_packed_offsets(const struct ilo_dev *dev,
+                                       uint8_t sample_count,
+                                       const struct ilo_state_sample_pattern_offset_info *in,
+                                       uint8_t *out)
+{
+   uint8_t max_dist, i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   max_dist = 0;
+   for (i = 0; i < sample_count; i++) {
+      const int8_t dist_x = (int8_t) in[i].x - 8;
+      const int8_t dist_y = (int8_t) in[i].y - 8;
+      const uint8_t dist = dist_x * dist_x + dist_y * dist_y;
+
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 305:
+       *
+       *     "Programming Note: When programming the sample offsets (for
+       *      NUMSAMPLES_4 or _8 and MSRASTMODE_xxx_PATTERN), the order of the
+       *      samples 0 to 3 (or 7 for 8X) must have monotonically increasing
+       *      distance from the pixel center. This is required to get the
+       *      correct centroid computation in the device."
+       */
+      assert(dist >= max_dist);
+      max_dist = dist;
+
+      assert(in[i].x < 16);
+      assert(in[i].y < 16);
+
+      out[i] = in[i].x << 4 | in[i].y;
+   }
+
+   return true;
+}
+
+static bool
+line_stipple_set_gen6_3DSTATE_LINE_STIPPLE(struct ilo_state_line_stipple *stipple,
+                                           const struct ilo_dev *dev,
+                                           const struct ilo_state_line_stipple_info *info)
+{
+   uint32_t dw1, dw2;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(info->repeat_count >= 1 && info->repeat_count <= 256);
+
+   dw1 = info->pattern;
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      /* in U1.16 */
+      const uint32_t inverse = 65536 / info->repeat_count;
+      dw2 = inverse << GEN7_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT |
+            info->repeat_count << GEN6_LINE_STIPPLE_DW2_REPEAT_COUNT__SHIFT;
+   } else {
+      /* in U1.13 */
+      const uint16_t inverse = 8192 / info->repeat_count;
+      dw2 = inverse << GEN6_LINE_STIPPLE_DW2_INVERSE_REPEAT_COUNT__SHIFT |
+            info->repeat_count << GEN6_LINE_STIPPLE_DW2_REPEAT_COUNT__SHIFT;
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(stipple->stipple) >= 2);
+   stipple->stipple[0] = dw1;
+   stipple->stipple[1] = dw2;
+
+   return true;
+}
+
+static bool
+sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN(struct ilo_state_sample_pattern *pattern,
+                                               const struct ilo_dev *dev,
+                                               const struct ilo_state_sample_pattern_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_1x) >= 1);
+   STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_2x) >= 2);
+   STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_4x) >= 4);
+   STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_8x) >= 8);
+   STATIC_ASSERT(ARRAY_SIZE(pattern->pattern_16x) >= 16);
+
+   return (sample_pattern_get_gen6_packed_offsets(dev, 1,
+              info->pattern_1x, pattern->pattern_1x) &&
+           sample_pattern_get_gen6_packed_offsets(dev, 2,
+              info->pattern_2x, pattern->pattern_2x) &&
+           sample_pattern_get_gen6_packed_offsets(dev, 4,
+              info->pattern_4x, pattern->pattern_4x) &&
+           sample_pattern_get_gen6_packed_offsets(dev, 8,
+              info->pattern_8x, pattern->pattern_8x) &&
+           sample_pattern_get_gen6_packed_offsets(dev, 16,
+              info->pattern_16x, pattern->pattern_16x));
+
+}
+
+static bool
+poly_stipple_set_gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_state_poly_stipple *stipple,
+                                                   const struct ilo_dev *dev,
+                                                   const struct ilo_state_poly_stipple_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   STATIC_ASSERT(ARRAY_SIZE(stipple->stipple) >= 32);
+   memcpy(stipple->stipple, info->pattern, sizeof(info->pattern));
+
+   return true;
+}
+
+bool
+ilo_state_raster_init(struct ilo_state_raster *rs,
+                      const struct ilo_dev *dev,
+                      const struct ilo_state_raster_info *info)
+{
+   assert(ilo_is_zeroed(rs, sizeof(*rs)));
+   return ilo_state_raster_set_info(rs, dev, info);
+}
+
+bool
+ilo_state_raster_init_for_rectlist(struct ilo_state_raster *rs,
+                                   const struct ilo_dev *dev,
+                                   uint8_t sample_count,
+                                   enum ilo_state_raster_earlyz_op earlyz_op,
+                                   bool earlyz_stencil_clear)
+{
+   struct ilo_state_raster_info info;
+
+   memset(&info, 0, sizeof(info));
+
+   info.clip.viewport_count = 1;
+   info.setup.cv_is_rectangle = true;
+   info.setup.msaa_enable = (sample_count > 1);
+   info.scan.sample_count = sample_count;
+   info.scan.sample_mask = ~0u;
+   info.scan.earlyz_op = earlyz_op;
+   info.scan.earlyz_stencil_clear = earlyz_stencil_clear;
+
+   return ilo_state_raster_init(rs, dev, &info);
+}
+
+bool
+ilo_state_raster_set_info(struct ilo_state_raster *rs,
+                          const struct ilo_dev *dev,
+                          const struct ilo_state_raster_info *info)
+{
+   struct ilo_state_raster_line_info line;
+   bool ret = true;
+
+   ret &= raster_set_gen6_3DSTATE_CLIP(rs, dev, info);
+
+   raster_get_gen6_effective_line(dev, info, &line);
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      ret &= raster_set_gen8_3DSTATE_SF(rs, dev, info, &line);
+      ret &= raster_set_gen8_3DSTATE_RASTER(rs, dev, info, &line);
+   } else {
+      ret &= raster_set_gen7_3DSTATE_SF(rs, dev, info, &line);
+   }
+
+   ret &= raster_set_gen8_3DSTATE_MULTISAMPLE(rs, dev, info);
+   ret &= raster_set_gen6_3DSTATE_SAMPLE_MASK(rs, dev, info);
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      ret &= raster_set_gen8_3DSTATE_WM(rs, dev, info, &line);
+
+      if (ilo_dev_gen(dev) >= ILO_GEN(8))
+         ret &= raster_set_gen8_3dstate_wm_hz_op(rs, dev, info);
+   } else {
+      ret &= raster_set_gen6_3dstate_wm(rs, dev, info, &line);
+   }
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_raster_set_params(struct ilo_state_raster *rs,
+                            const struct ilo_dev *dev,
+                            const struct ilo_state_raster_params_info *params)
+{
+   const bool line_aa_enable = (rs->line_aa_enable &&
+         raster_params_is_gen6_line_aa_allowed(dev, params));
+   const int line_width = get_gen6_line_width(dev, params->line_width,
+         line_aa_enable, rs->line_giq_enable);
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* modify line AA enable */
+   if (rs->line_aa_enable) {
+      if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+         if (line_aa_enable)
+            rs->raster[0] |= GEN8_RASTER_DW1_AA_LINE_ENABLE;
+         else
+            rs->raster[0] &= ~GEN8_RASTER_DW1_AA_LINE_ENABLE;
+      } else {
+         if (line_aa_enable)
+            rs->sf[1] |= GEN7_SF_DW2_AA_LINE_ENABLE;
+         else
+            rs->sf[1] &= ~GEN7_SF_DW2_AA_LINE_ENABLE;
+      }
+   }
+
+   /* modify line width */
+   rs->sf[1] = (rs->sf[1] & ~GEN7_SF_DW2_LINE_WIDTH__MASK) |
+               line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
+
+   /* modify point width */
+   if (rs->sf[2] & GEN7_SF_DW3_USE_POINT_WIDTH) {
+      const int point_width = get_gen6_point_width(dev, params->point_width);
+
+      rs->sf[2] = (rs->sf[2] & ~GEN7_SF_DW3_POINT_WIDTH__MASK) |
+                  point_width << GEN7_SF_DW3_POINT_WIDTH__SHIFT;
+   }
+
+   /* modify depth offset */
+   rs->raster[1] = fui(params->depth_offset_const);
+   rs->raster[2] = fui(params->depth_offset_scale);
+   rs->raster[3] = fui(params->depth_offset_clamp);
+
+   return true;
+}
+
+void
+ilo_state_raster_full_delta(const struct ilo_state_raster *rs,
+                            const struct ilo_dev *dev,
+                            struct ilo_state_raster_delta *delta)
+{
+   delta->dirty = ILO_STATE_RASTER_3DSTATE_CLIP |
+                  ILO_STATE_RASTER_3DSTATE_SF |
+                  ILO_STATE_RASTER_3DSTATE_MULTISAMPLE |
+                  ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK |
+                  ILO_STATE_RASTER_3DSTATE_WM |
+                  ILO_STATE_RASTER_3DSTATE_AA_LINE_PARAMETERS;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      delta->dirty |= ILO_STATE_RASTER_3DSTATE_RASTER |
+                      ILO_STATE_RASTER_3DSTATE_WM_HZ_OP;
+   }
+}
+
+void
+ilo_state_raster_get_delta(const struct ilo_state_raster *rs,
+                           const struct ilo_dev *dev,
+                           const struct ilo_state_raster *old,
+                           struct ilo_state_raster_delta *delta)
+{
+   delta->dirty = 0;
+
+   if (memcmp(rs->clip, old->clip, sizeof(rs->clip)))
+      delta->dirty |= ILO_STATE_RASTER_3DSTATE_CLIP;
+
+   if (memcmp(rs->sf, old->sf, sizeof(rs->sf)))
+      delta->dirty |= ILO_STATE_RASTER_3DSTATE_SF;
+
+   if (memcmp(rs->raster, old->raster, sizeof(rs->raster))) {
+      if (ilo_dev_gen(dev) >= ILO_GEN(8))
+         delta->dirty |= ILO_STATE_RASTER_3DSTATE_RASTER;
+      else
+         delta->dirty |= ILO_STATE_RASTER_3DSTATE_SF;
+   }
+
+   if (memcmp(rs->sample, old->sample, sizeof(rs->sample))) {
+      delta->dirty |= ILO_STATE_RASTER_3DSTATE_MULTISAMPLE |
+                      ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK;
+   }
+
+   if (memcmp(rs->wm, old->wm, sizeof(rs->wm))) {
+      delta->dirty |= ILO_STATE_RASTER_3DSTATE_WM;
+
+      if (ilo_dev_gen(dev) >= ILO_GEN(8))
+         delta->dirty |= ILO_STATE_RASTER_3DSTATE_WM_HZ_OP;
+   }
+}
+
+bool
+ilo_state_sample_pattern_init(struct ilo_state_sample_pattern *pattern,
+                              const struct ilo_dev *dev,
+                              const struct ilo_state_sample_pattern_info *info)
+{
+   bool ret = true;
+
+   ret &= sample_pattern_set_gen8_3DSTATE_SAMPLE_PATTERN(pattern, dev, info);
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_sample_pattern_init_default(struct ilo_state_sample_pattern *pattern,
+                                      const struct ilo_dev *dev)
+{
+   static const struct ilo_state_sample_pattern_info default_info = {
+      .pattern_1x = {
+         {  8,  8 },
+      },
+
+      .pattern_2x = {
+         {  4,  4 }, { 12, 12 },
+      },
+
+      .pattern_4x = {
+         {  6,  2 }, { 14,  6 }, {  2, 10 }, { 10, 14 },
+      },
+
+      /* \see brw_multisample_positions_8x */
+      .pattern_8x = {
+         {  7,  9 }, {  9, 13 }, { 11,  3 }, { 13, 11 },
+         {  1,  7 }, {  5,  1 }, { 15,  5 }, {  3, 15 },
+      },
+
+      .pattern_16x = {
+         {  8, 10 }, { 11,  8 }, {  5,  6 }, {  6,  4 },
+         { 12, 11 }, { 13,  9 }, { 14,  7 }, { 10,  2 },
+         {  4, 13 }, {  3,  3 }, {  7,  1 }, { 15,  5 },
+         {  1, 12 }, {  9,  0 }, {  2, 14 }, {  0, 15 },
+      },
+   };
+
+   return ilo_state_sample_pattern_init(pattern, dev, &default_info);
+}
+
+const uint8_t *
+ilo_state_sample_pattern_get_packed_offsets(const struct ilo_state_sample_pattern *pattern,
+                                            const struct ilo_dev *dev,
+                                            uint8_t sample_count)
+{
+   switch (sample_count) {
+   case 1:  return pattern->pattern_1x;
+   case 2:  return pattern->pattern_2x;
+   case 4:  return pattern->pattern_4x;
+   case 8:  return pattern->pattern_8x;
+   case 16: return pattern->pattern_16x;
+   default:
+      assert(!"unknown sample count");
+      return NULL;
+   }
+}
+
+void
+ilo_state_sample_pattern_get_offset(const struct ilo_state_sample_pattern *pattern,
+                                    const struct ilo_dev *dev,
+                                    uint8_t sample_count, uint8_t sample_index,
+                                    uint8_t *x, uint8_t *y)
+{
+   const const uint8_t *packed =
+      ilo_state_sample_pattern_get_packed_offsets(pattern, dev, sample_count);
+
+   assert(sample_index < sample_count);
+
+   *x = (packed[sample_index] >> 4) & 0xf;
+   *y = packed[sample_index] & 0xf;
+}
+
+/**
+ * No need to initialize first.
+ */
+bool
+ilo_state_line_stipple_set_info(struct ilo_state_line_stipple *stipple,
+                                const struct ilo_dev *dev,
+                                const struct ilo_state_line_stipple_info *info)
+{
+   bool ret = true;
+
+   ret &= line_stipple_set_gen6_3DSTATE_LINE_STIPPLE(stipple,
+         dev, info);
+
+   assert(ret);
+
+   return ret;
+}
+
+/**
+ * No need to initialize first.
+ */
+bool
+ilo_state_poly_stipple_set_info(struct ilo_state_poly_stipple *stipple,
+                                const struct ilo_dev *dev,
+                                const struct ilo_state_poly_stipple_info *info)
+{
+   bool ret = true;
+
+   ret &= poly_stipple_set_gen6_3DSTATE_POLY_STIPPLE_PATTERN(stipple,
+         dev, info);
+
+   assert(ret);
+
+   return ret;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_raster.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_raster.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_raster.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_raster.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,301 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_RASTER_H
+#define ILO_STATE_RASTER_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+enum ilo_state_raster_dirty_bits {
+   ILO_STATE_RASTER_3DSTATE_CLIP                   = (1 << 0),
+   ILO_STATE_RASTER_3DSTATE_SF                     = (1 << 1),
+   ILO_STATE_RASTER_3DSTATE_RASTER                 = (1 << 2),
+   ILO_STATE_RASTER_3DSTATE_MULTISAMPLE            = (1 << 3),
+   ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK            = (1 << 4),
+   ILO_STATE_RASTER_3DSTATE_WM                     = (1 << 5),
+   ILO_STATE_RASTER_3DSTATE_WM_HZ_OP               = (1 << 6),
+   ILO_STATE_RASTER_3DSTATE_AA_LINE_PARAMETERS     = (1 << 7),
+};
+
+enum ilo_state_raster_earlyz_op {
+   ILO_STATE_RASTER_EARLYZ_NORMAL,
+   ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR,
+   ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE,
+   ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE,
+};
+
+/**
+ * VUE readback, VertexClipTest, ClipDetermination, and primitive output.
+ */
+struct ilo_state_raster_clip_info {
+   bool clip_enable;
+   /* CL_INVOCATION_COUNT and CL_PRIMITIVES_COUNT */
+   bool stats_enable;
+
+   uint8_t viewport_count;
+   bool force_rtaindex_zero;
+
+   /* these should be mutually exclusive */
+   uint8_t user_cull_enables;
+   uint8_t user_clip_enables;
+
+   bool gb_test_enable;
+   bool xy_test_enable;
+
+   /* far/near must be enabled together prior to Gen9 */
+   bool z_far_enable;
+   bool z_near_enable;
+   bool z_near_zero;
+};
+
+/**
+ * Primitive assembly, viewport transformation, scissoring, MSAA, etc.
+ */
+struct ilo_state_raster_setup_info {
+   bool cv_is_rectangle;
+
+   bool first_vertex_provoking;
+   bool viewport_transform;
+
+   bool scissor_enable;
+
+   /* MSAA enables for lines and non-lines */
+   bool msaa_enable;
+   bool line_msaa_enable;
+};
+
+/**
+ * 3DOBJ_POINT rasterization rules.
+ */
+struct ilo_state_raster_point_info {
+   /* ignored when msaa_enable is set */
+   bool aa_enable;
+
+   bool programmable_width;
+};
+
+/**
+ * 3DOBJ_LINE rasterization rules.
+ */
+struct ilo_state_raster_line_info {
+   /* ignored when line_msaa_enable is set */
+   bool aa_enable;
+
+   /* ignored when line_msaa_enable or aa_enable is set */
+   bool stipple_enable;
+   bool giq_enable;
+   bool giq_last_pixel;
+};
+
+/**
+ * 3DOBJ_TRIANGLE rasterization rules.
+ */
+struct ilo_state_raster_tri_info {
+   enum gen_front_winding front_winding;
+   enum gen_cull_mode cull_mode;
+   enum gen_fill_mode fill_mode_front;
+   enum gen_fill_mode fill_mode_back;
+
+   enum gen_depth_format depth_offset_format;
+   bool depth_offset_solid;
+   bool depth_offset_wireframe;
+   bool depth_offset_point;
+
+   bool poly_stipple_enable;
+};
+
+/**
+ * Scan conversion.
+ */
+struct ilo_state_raster_scan_info {
+   /* PS_DEPTH_COUNT and PS_INVOCATION_COUNT */
+   bool stats_enable;
+
+   uint8_t sample_count;
+
+   /* pixel location for non-MSAA or 1x-MSAA */
+   enum gen_pixel_location pixloc;
+
+   uint32_t sample_mask;
+
+   /* interpolations */
+   enum gen_zw_interp zw_interp;
+   uint8_t barycentric_interps;
+
+   /* Gen7+ only */
+   enum gen_edsc_mode earlyz_control;
+   enum ilo_state_raster_earlyz_op earlyz_op;
+   bool earlyz_stencil_clear;
+};
+
+/**
+ * Raster parameters.
+ */
+struct ilo_state_raster_params_info {
+   bool any_integer_rt;
+   bool hiz_enable;
+
+   float point_width;
+   float line_width;
+
+   /* const term will be scaled by 'r' */
+   float depth_offset_const;
+   float depth_offset_scale;
+   float depth_offset_clamp;
+};
+
+struct ilo_state_raster_info {
+   struct ilo_state_raster_clip_info clip;
+   struct ilo_state_raster_setup_info setup;
+   struct ilo_state_raster_point_info point;
+   struct ilo_state_raster_line_info line;
+   struct ilo_state_raster_tri_info tri;
+   struct ilo_state_raster_scan_info scan;
+
+   struct ilo_state_raster_params_info params;
+};
+
+struct ilo_state_raster {
+   uint32_t clip[3];
+   uint32_t sf[3];
+   uint32_t raster[4];
+   uint32_t sample[2];
+   uint32_t wm[3];
+
+   bool line_aa_enable;
+   bool line_giq_enable;
+};
+
+struct ilo_state_raster_delta {
+   uint32_t dirty;
+};
+
+struct ilo_state_sample_pattern_offset_info {
+   /* in U0.4 */
+   uint8_t x;
+   uint8_t y;
+};
+
+struct ilo_state_sample_pattern_info {
+   struct ilo_state_sample_pattern_offset_info pattern_1x[1];
+   struct ilo_state_sample_pattern_offset_info pattern_2x[2];
+   struct ilo_state_sample_pattern_offset_info pattern_4x[4];
+   struct ilo_state_sample_pattern_offset_info pattern_8x[8];
+   struct ilo_state_sample_pattern_offset_info pattern_16x[16];
+};
+
+struct ilo_state_sample_pattern {
+   uint8_t pattern_1x[1];
+   uint8_t pattern_2x[2];
+   uint8_t pattern_4x[4];
+   uint8_t pattern_8x[8];
+   uint8_t pattern_16x[16];
+};
+
+struct ilo_state_line_stipple_info {
+   uint16_t pattern;
+   uint16_t repeat_count;
+};
+
+struct ilo_state_line_stipple {
+   uint32_t stipple[2];
+};
+
+struct ilo_state_poly_stipple_info {
+   uint32_t pattern[32];
+};
+
+struct ilo_state_poly_stipple {
+   uint32_t stipple[32];
+};
+
+bool
+ilo_state_raster_init(struct ilo_state_raster *rs,
+                      const struct ilo_dev *dev,
+                      const struct ilo_state_raster_info *info);
+
+bool
+ilo_state_raster_init_for_rectlist(struct ilo_state_raster *rs,
+                                   const struct ilo_dev *dev,
+                                   uint8_t sample_count,
+                                   enum ilo_state_raster_earlyz_op earlyz_op,
+                                   bool earlyz_stencil_clear);
+
+bool
+ilo_state_raster_set_info(struct ilo_state_raster *rs,
+                          const struct ilo_dev *dev,
+                          const struct ilo_state_raster_info *info);
+
+bool
+ilo_state_raster_set_params(struct ilo_state_raster *rs,
+                            const struct ilo_dev *dev,
+                            const struct ilo_state_raster_params_info *params);
+
+void
+ilo_state_raster_full_delta(const struct ilo_state_raster *rs,
+                            const struct ilo_dev *dev,
+                            struct ilo_state_raster_delta *delta);
+
+void
+ilo_state_raster_get_delta(const struct ilo_state_raster *rs,
+                           const struct ilo_dev *dev,
+                           const struct ilo_state_raster *old,
+                           struct ilo_state_raster_delta *delta);
+
+bool
+ilo_state_sample_pattern_init(struct ilo_state_sample_pattern *pattern,
+                              const struct ilo_dev *dev,
+                              const struct ilo_state_sample_pattern_info *info);
+
+bool
+ilo_state_sample_pattern_init_default(struct ilo_state_sample_pattern *pattern,
+                                      const struct ilo_dev *dev);
+
+const uint8_t *
+ilo_state_sample_pattern_get_packed_offsets(const struct ilo_state_sample_pattern *pattern,
+                                            const struct ilo_dev *dev,
+                                            uint8_t sample_count);
+
+void
+ilo_state_sample_pattern_get_offset(const struct ilo_state_sample_pattern *pattern,
+                                    const struct ilo_dev *dev,
+                                    uint8_t sample_count, uint8_t sample_index,
+                                    uint8_t *x, uint8_t *y);
+bool
+ilo_state_line_stipple_set_info(struct ilo_state_line_stipple *stipple,
+                                const struct ilo_dev *dev,
+                                const struct ilo_state_line_stipple_info *info);
+
+bool
+ilo_state_poly_stipple_set_info(struct ilo_state_poly_stipple *stipple,
+                                const struct ilo_dev *dev,
+                                const struct ilo_state_poly_stipple_info *info);
+
+#endif /* ILO_STATE_RASTER_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_sampler.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_sampler.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_sampler.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_sampler.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,742 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "util/u_half.h"
+
+#include "ilo_debug.h"
+#include "ilo_state_surface.h"
+#include "ilo_state_sampler.h"
+
+static bool
+sampler_validate_gen6_non_normalized(const struct ilo_dev *dev,
+                                     const struct ilo_state_sampler_info *info)
+{
+   const enum gen_texcoord_mode addr_ctrls[3] = {
+      info->tcx_ctrl, info->tcy_ctrl, info->tcz_ctrl,
+   };
+   int i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 98:
+    *
+    *     "The following state must be set as indicated if this field
+    *      (Non-normalized Coordinate Enable) is enabled:
+    *
+    *      - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
+    *        TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
+    *      - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
+    *      - Mag Mode Filter must be MAPFILTER_NEAREST or
+    *        MAPFILTER_LINEAR.
+    *      - Min Mode Filter must be MAPFILTER_NEAREST or
+    *        MAPFILTER_LINEAR.
+    *      - Mip Mode Filter must be MIPFILTER_NONE.
+    *      - Min LOD must be 0.
+    *      - Max LOD must be 0.
+    *      - MIP Count must be 0.
+    *      - Surface Min LOD must be 0.
+    *      - Texture LOD Bias must be 0."
+    */
+   for (i = 0; i < 3; i++) {
+      switch (addr_ctrls[i]) {
+      case GEN6_TEXCOORDMODE_CLAMP:
+      case GEN6_TEXCOORDMODE_CLAMP_BORDER:
+      case GEN8_TEXCOORDMODE_HALF_BORDER:
+         break;
+      default:
+         assert(!"bad non-normalized coordinate wrap mode");
+         break;
+      }
+   }
+
+   assert(info->mip_filter == GEN6_MIPFILTER_NONE);
+
+   assert((info->min_filter == GEN6_MAPFILTER_NEAREST ||
+           info->min_filter == GEN6_MAPFILTER_LINEAR) &&
+          (info->mag_filter == GEN6_MAPFILTER_NEAREST ||
+           info->mag_filter == GEN6_MAPFILTER_LINEAR));
+
+   assert(info->min_lod == 0.0f &&
+          info->max_lod == 0.0f &&
+          info->lod_bias == 0.0f);
+
+   return true;
+}
+
+static bool
+sampler_validate_gen6_sampler(const struct ilo_dev *dev,
+                              const struct ilo_state_sampler_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (info->non_normalized &&
+       !sampler_validate_gen6_non_normalized(dev, info))
+      return false;
+
+   if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+       assert(info->tcx_ctrl != GEN8_TEXCOORDMODE_HALF_BORDER &&
+              info->tcy_ctrl != GEN8_TEXCOORDMODE_HALF_BORDER &&
+              info->tcz_ctrl != GEN8_TEXCOORDMODE_HALF_BORDER);
+   }
+
+   return true;
+}
+
+static uint32_t
+sampler_get_gen6_integer_filters(const struct ilo_dev *dev,
+                                 const struct ilo_state_sampler_info *info)
+{
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 103:
+    *
+    *     "MIPFILTER_LINEAR is not supported for surface formats that do not
+    *      support "Sampling Engine Filtering" as indicated in the Surface
+    *      Formats table unless using the sample_c message type."
+    *
+    *     "Only MAPFILTER_NEAREST is supported for surface formats that do not
+    *      support "Sampling Engine Filtering" as indicated in the Surface
+    *      Formats table unless using the sample_c message type.
+    */
+   const enum gen_mip_filter mip_filter =
+      (info->mip_filter == GEN6_MIPFILTER_LINEAR) ?
+      GEN6_MIPFILTER_NEAREST : info->mip_filter;
+   const enum gen_map_filter min_filter = GEN6_MAPFILTER_NEAREST;
+   const enum gen_map_filter mag_filter = GEN6_MAPFILTER_NEAREST;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   return mip_filter << GEN6_SAMPLER_DW0_MIP_FILTER__SHIFT |
+          mag_filter << GEN6_SAMPLER_DW0_MAG_FILTER__SHIFT |
+          min_filter << GEN6_SAMPLER_DW0_MIN_FILTER__SHIFT;
+}
+
+static uint32_t
+sampler_get_gen6_3d_filters(const struct ilo_dev *dev,
+                            const struct ilo_state_sampler_info *info)
+{
+   const enum gen_mip_filter mip_filter = info->mip_filter;
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 103:
+    *
+    *     "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
+    *      surfaces of type SURFTYPE_3D."
+    */
+   const enum gen_map_filter min_filter =
+      (info->min_filter == GEN6_MAPFILTER_NEAREST ||
+       info->min_filter == GEN6_MAPFILTER_LINEAR) ?
+      info->min_filter : GEN6_MAPFILTER_LINEAR;
+   const enum gen_map_filter mag_filter =
+      (info->mag_filter == GEN6_MAPFILTER_NEAREST ||
+       info->mag_filter == GEN6_MAPFILTER_LINEAR) ?
+       info->mag_filter : GEN6_MAPFILTER_LINEAR;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   return mip_filter << GEN6_SAMPLER_DW0_MIP_FILTER__SHIFT |
+          mag_filter << GEN6_SAMPLER_DW0_MAG_FILTER__SHIFT |
+          min_filter << GEN6_SAMPLER_DW0_MIN_FILTER__SHIFT;
+}
+
+static uint32_t
+get_gen6_addr_controls(const struct ilo_dev *dev,
+                       enum gen_texcoord_mode tcx_ctrl,
+                       enum gen_texcoord_mode tcy_ctrl,
+                       enum gen_texcoord_mode tcz_ctrl)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      return tcx_ctrl << GEN7_SAMPLER_DW3_U_WRAP__SHIFT |
+             tcy_ctrl << GEN7_SAMPLER_DW3_V_WRAP__SHIFT |
+             tcz_ctrl << GEN7_SAMPLER_DW3_R_WRAP__SHIFT;
+   } else {
+      return tcx_ctrl << GEN6_SAMPLER_DW1_U_WRAP__SHIFT |
+             tcy_ctrl << GEN6_SAMPLER_DW1_V_WRAP__SHIFT |
+             tcz_ctrl << GEN6_SAMPLER_DW1_R_WRAP__SHIFT;
+   }
+}
+
+static uint32_t
+sampler_get_gen6_1d_addr_controls(const struct ilo_dev *dev,
+                                  const struct ilo_state_sampler_info *info)
+{
+   const enum gen_texcoord_mode tcx_ctrl =
+      (info->tcx_ctrl == GEN6_TEXCOORDMODE_CUBE) ?
+      GEN6_TEXCOORDMODE_CLAMP : info->tcx_ctrl;
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 100:
+    *
+    *     "If this field (TCY Address Control Mode) is set to
+    *      TEXCOORDMODE_CLAMP_BORDER or TEXCOORDMODE_HALF_BORDER and a 1D
+    *      surface is sampled, incorrect blending with the border color in the
+    *      vertical direction may occur."
+    */
+   const enum gen_texcoord_mode tcy_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+   const enum gen_texcoord_mode tcz_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   return get_gen6_addr_controls(dev, tcx_ctrl, tcy_ctrl, tcz_ctrl);
+}
+
+static uint32_t
+sampler_get_gen6_2d_3d_addr_controls(const struct ilo_dev *dev,
+                                     const struct ilo_state_sampler_info *info)
+{
+   const enum gen_texcoord_mode tcx_ctrl =
+      (info->tcx_ctrl == GEN6_TEXCOORDMODE_CUBE) ?
+      GEN6_TEXCOORDMODE_CLAMP : info->tcx_ctrl;
+   const enum gen_texcoord_mode tcy_ctrl =
+      (info->tcy_ctrl == GEN6_TEXCOORDMODE_CUBE) ?
+      GEN6_TEXCOORDMODE_CLAMP : info->tcy_ctrl;
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 108:
+    *
+    *     "[DevSNB]: if this field (TCZ Address Control Mode) is set to
+    *      TEXCOORDMODE_CLAMP_BORDER samples outside the map will clamp to 0
+    *      instead of boarder color"
+    *
+    * From the Ivy Bridge PRM, volume 4 part 1, page 100:
+    *
+    *     "If this field is set to TEXCOORDMODE_CLAMP_BORDER for 3D maps on
+    *      formats without an alpha channel, samples straddling the map in the
+    *      Z direction may have their alpha channels off by 1."
+    *
+    * Do we want to do something here?
+    */
+   const enum gen_texcoord_mode tcz_ctrl =
+      (info->tcz_ctrl == GEN6_TEXCOORDMODE_CUBE) ?
+      GEN6_TEXCOORDMODE_CLAMP : info->tcz_ctrl;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   return get_gen6_addr_controls(dev, tcx_ctrl, tcy_ctrl, tcz_ctrl);
+}
+
+static uint32_t
+sampler_get_gen6_cube_addr_controls(const struct ilo_dev *dev,
+                                    const struct ilo_state_sampler_info *info)
+{
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 99:
+    *
+    *     "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
+    *      and TEXCOORDMODE_CUBE settings are valid, and each TC component
+    *      must have the same Address Control mode.
+    *
+    *      When TEXCOORDMODE_CUBE is not used accessing a cube map, the map's
+    *      Cube Face Enable field must be programmed to 111111b (all faces
+    *      enabled)."
+    *
+    * From the Haswell PRM, volume 2d, page 278:
+    *
+    *     "When using cube map texture coordinates, each TC component must
+    *      have the same Address Control Mode.
+    *
+    *      When TEXCOORDMODE_CUBE is not used accessing a cube map, the map's
+    *      Cube Face Enable field must be programmed to 111111b (all faces
+    *      enabled)."
+    *
+    * We always enable all cube faces and only need to make sure all address
+    * control modes are the same.
+    */
+   const enum gen_texcoord_mode tcx_ctrl =
+      (ilo_dev_gen(dev) >= ILO_GEN(7.5) ||
+       info->tcx_ctrl == GEN6_TEXCOORDMODE_CUBE ||
+       info->tcx_ctrl == GEN6_TEXCOORDMODE_CLAMP) ?
+      info->tcx_ctrl : GEN6_TEXCOORDMODE_CLAMP;
+   const enum gen_texcoord_mode tcy_ctrl = tcx_ctrl;
+   const enum gen_texcoord_mode tcz_ctrl = tcx_ctrl;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   return get_gen6_addr_controls(dev, tcx_ctrl, tcy_ctrl, tcz_ctrl);
+}
+
+static uint16_t
+get_gen6_lod_bias(const struct ilo_dev *dev, float bias)
+{
+   /* [-16.0, 16.0) in S4.6 or S4.8 */
+   const int fbits = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 8 : 6;
+   const float max = 16.0f;
+   const float scale = (float) (1 << fbits);
+   const int mask = (1 << (1 + 4 + fbits)) - 1;
+   const int scaled_max = (16 << fbits) - 1;
+   int scaled;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (bias > max)
+      bias = max;
+   else if (bias < -max)
+      bias = -max;
+
+   scaled = (int) (bias * scale);
+   if (scaled > scaled_max)
+      scaled = scaled_max;
+
+   return (scaled & mask);
+}
+
+static uint16_t
+get_gen6_lod_clamp(const struct ilo_dev *dev, float clamp)
+{
+   /* [0.0, 13.0] in U4.6 or [0.0, 14.0] in U4.8 */
+   const int fbits = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 8 : 6;
+   const float max = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 14.0f : 13.0f;
+   const float scale = (float) (1 << fbits);
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (clamp > max)
+      clamp = max;
+   else if (clamp < 0.0f)
+      clamp = 0.0f;
+
+   return (int) (clamp * scale);
+}
+
+static bool
+sampler_set_gen6_SAMPLER_STATE(struct ilo_state_sampler *sampler,
+                               const struct ilo_dev *dev,
+                               const struct ilo_state_sampler_info *info)
+{
+   uint16_t lod_bias, max_lod, min_lod;
+   uint32_t dw0, dw1, dw3;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!sampler_validate_gen6_sampler(dev, info))
+      return false;
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 15:
+    *
+    *     "The per-pixel LOD is computed in an implementation-dependent manner
+    *      and approximates the log2 of the texel/pixel ratio at the given
+    *      pixel. The computation is typically based on the differential
+    *      texel-space distances associated with a one-pixel differential
+    *      distance along the screen x- and y-axes. These texel-space
+    *      distances are computed by evaluating neighboring pixel texture
+    *      coordinates, these coordinates being in units of texels on the base
+    *      MIP level (multiplied by the corresponding surface size in
+    *      texels)."
+    *
+    * Judging from the LOD computation pseudocode on page 16-18, the "base MIP
+    * level" should be given by SurfMinLod.  To summarize, for the "sample"
+    * message,
+    *
+    *   1) LOD is set to log2(texel/pixel ratio).  The number of texels is
+    *      measured against level SurfMinLod.
+    *   2) Bias is added to LOD.
+    *   3) if pre-clamp is enabled, LOD is clamped to [MinLod, MaxLod] first
+    *   4) LOD is compared with Base to determine whether magnification or
+    *      minification is needed.
+    *   5) If magnification is needed, or no mipmapping is requested, LOD is
+    *      set to floor(MinLod).
+    *   6) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
+    *
+    * As an example, we could set SurfMinLod to GL_TEXTURE_BASE_LEVEL and Base
+    * to 0 to match GL.  But GL expects LOD to be set to 0, instead of
+    * floor(MinLod), in 5).  Since this is only an issue when MinLod is
+    * greater than or equal to one, and, with Base being 0, a non-zero MinLod
+    * implies minification, we only need to deal with the case when mipmapping
+    * is disabled.  We can thus do:
+    *
+    *   if (MipFilter == MIPFILTER_NONE && MinLod) {
+    *     MinLod = 0;
+    *     MagFilter = MinFilter;
+    *   }
+    */
+
+   lod_bias = get_gen6_lod_bias(dev, info->lod_bias);
+   min_lod = get_gen6_lod_clamp(dev, info->min_lod);
+   max_lod = get_gen6_lod_clamp(dev, info->max_lod);
+
+   dw0 = GEN6_SAMPLER_DW0_LOD_PRECLAMP_ENABLE |
+         0 << GEN6_SAMPLER_DW0_BASE_LOD__SHIFT |
+         info->mip_filter << GEN6_SAMPLER_DW0_MIP_FILTER__SHIFT |
+         info->mag_filter << GEN6_SAMPLER_DW0_MAG_FILTER__SHIFT |
+         info->min_filter << GEN6_SAMPLER_DW0_MIN_FILTER__SHIFT;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      dw0 |= GEN7_SAMPLER_DW0_BORDER_COLOR_MODE_DX10_OGL |
+             lod_bias << GEN7_SAMPLER_DW0_LOD_BIAS__SHIFT;
+
+      if (info->min_filter == GEN6_MAPFILTER_ANISOTROPIC ||
+          info->mag_filter == GEN6_MAPFILTER_ANISOTROPIC)
+         dw0 |= GEN7_SAMPLER_DW0_ANISO_ALGO_EWA;
+   } else {
+      dw0 |= lod_bias << GEN6_SAMPLER_DW0_LOD_BIAS__SHIFT |
+             info->shadow_func << GEN6_SAMPLER_DW0_SHADOW_FUNC__SHIFT;
+
+      /*
+       * From the Sandy Bridge PRM, volume 4 part 1, page 102:
+       *
+       *     "(Min and Mag State Not Equal) Must be set to 1 if any of the
+       *      following are true:
+       *
+       *      - Mag Mode Filter and Min Mode Filter are not the same
+       *      - Address Rounding Enable: U address mag filter and U address
+       *        min filter are not the same
+       *      - Address Rounding Enable: V address mag filter and V address
+       *        min filter are not the same
+       *      - Address Rounding Enable: R address mag filter and R address
+       *        min filter are not the same"
+       *
+       * We set address rounding for U, V, and R uniformly.  Only need to
+       * check the filters.
+       */
+      if (info->min_filter != info->mag_filter)
+         dw0 |= GEN6_SAMPLER_DW0_MIN_MAG_NOT_EQUAL;
+   }
+
+   dw1 = 0;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      /*
+       * From the Ivy Bridge PRM, volume 4 part 1, page 96:
+       *
+       *     "This field (Cube Surface Control Mode) must be set to
+       *      CUBECTRLMODE_PROGRAMMED"
+       */
+      dw1 |= min_lod << GEN7_SAMPLER_DW1_MIN_LOD__SHIFT |
+             max_lod << GEN7_SAMPLER_DW1_MAX_LOD__SHIFT |
+             info->shadow_func << GEN7_SAMPLER_DW1_SHADOW_FUNC__SHIFT |
+             GEN7_SAMPLER_DW1_CUBECTRLMODE_PROGRAMMED;
+   } else {
+      dw1 |= min_lod << GEN6_SAMPLER_DW1_MIN_LOD__SHIFT |
+             max_lod << GEN6_SAMPLER_DW1_MAX_LOD__SHIFT |
+             GEN6_SAMPLER_DW1_CUBECTRLMODE_PROGRAMMED |
+             info->tcx_ctrl << GEN6_SAMPLER_DW1_U_WRAP__SHIFT |
+             info->tcy_ctrl << GEN6_SAMPLER_DW1_V_WRAP__SHIFT |
+             info->tcz_ctrl << GEN6_SAMPLER_DW1_R_WRAP__SHIFT;
+   }
+
+   dw3 = info->max_anisotropy << GEN6_SAMPLER_DW3_MAX_ANISO__SHIFT;
+
+   /* round the coordinates for linear filtering */
+   if (info->min_filter != GEN6_MAPFILTER_NEAREST) {
+      dw3 |= GEN6_SAMPLER_DW3_U_MIN_ROUND |
+             GEN6_SAMPLER_DW3_V_MIN_ROUND |
+             GEN6_SAMPLER_DW3_R_MIN_ROUND;
+   }
+   if (info->mag_filter != GEN6_MAPFILTER_NEAREST) {
+      dw3 |= GEN6_SAMPLER_DW3_U_MAG_ROUND |
+             GEN6_SAMPLER_DW3_V_MAG_ROUND |
+             GEN6_SAMPLER_DW3_R_MAG_ROUND;
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      dw3 |= GEN7_SAMPLER_DW3_TRIQUAL_FULL |
+             info->tcx_ctrl << GEN7_SAMPLER_DW3_U_WRAP__SHIFT |
+             info->tcy_ctrl << GEN7_SAMPLER_DW3_V_WRAP__SHIFT |
+             info->tcz_ctrl << GEN7_SAMPLER_DW3_R_WRAP__SHIFT;
+
+      if (info->non_normalized)
+         dw3 |= GEN7_SAMPLER_DW3_NON_NORMALIZED_COORD;
+   } else {
+      if (info->non_normalized)
+         dw3 |= GEN6_SAMPLER_DW3_NON_NORMALIZED_COORD;
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(sampler->sampler) >= 3);
+   sampler->sampler[0] = dw0;
+   sampler->sampler[1] = dw1;
+   sampler->sampler[2] = dw3;
+
+   sampler->filter_integer = sampler_get_gen6_integer_filters(dev, info);
+   sampler->filter_3d = sampler_get_gen6_3d_filters(dev, info);
+   sampler->addr_ctrl_1d = sampler_get_gen6_1d_addr_controls(dev, info);
+   sampler->addr_ctrl_2d_3d = sampler_get_gen6_2d_3d_addr_controls(dev, info);
+   sampler->addr_ctrl_cube = sampler_get_gen6_cube_addr_controls(dev, info);
+
+   sampler->non_normalized = info->non_normalized;
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 21:
+    *
+    *     "[DevSNB] Errata: Incorrect behavior is observed in cases where the
+    *      min and mag mode filters are different and SurfMinLOD is nonzero.
+    *      The determination of MagMode uses the following equation instead of
+    *      the one in the above pseudocode:
+    *
+    *      MagMode = (LOD + SurfMinLOD - Base <= 0)"
+    *
+    * As a way to work around that, request Base to be set to SurfMinLod.
+    */
+   if (ilo_dev_gen(dev) == ILO_GEN(6) &&
+       info->min_filter != info->mag_filter)
+      sampler->base_to_surf_min_lod = true;
+
+   return true;
+}
+
+static bool
+sampler_border_set_gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_state_sampler_border *border,
+                                                   const struct ilo_dev *dev,
+                                                   const struct ilo_state_sampler_border_info *info)
+{
+   uint32_t dw[12];
+   float rgba[4];
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 117:
+    *
+    *     "For ([DevSNB]), if border color is used, all formats must be
+    *      provided.  Hardware will choose the appropriate format based on
+    *      Surface Format and Texture Border Color Mode. The values
+    *      represented by each format should be the same (other than being
+    *      subject to range-based clamping and precision) to avoid unexpected
+    *      behavior."
+    *
+    * XXX We do not honor info->is_integer yet.
+    */
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   /* make a copy so that we can clamp for SNORM and UNORM */
+   memcpy(rgba, info->rgba.f, sizeof(rgba));
+
+   /* IEEE_FP */
+   dw[1] = fui(rgba[0]);
+   dw[2] = fui(rgba[1]);
+   dw[3] = fui(rgba[2]);
+   dw[4] = fui(rgba[3]);
+
+   /* FLOAT_16 */
+   dw[5] = util_float_to_half(rgba[0]) |
+           util_float_to_half(rgba[1]) << 16;
+   dw[6] = util_float_to_half(rgba[2]) |
+           util_float_to_half(rgba[3]) << 16;
+
+   /* clamp to [-1.0f, 1.0f] */
+   rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
+   rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
+   rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
+   rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
+
+   /* SNORM16 */
+   dw[9] =  (int16_t) util_iround(rgba[0] * 32767.0f) |
+            (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
+   dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
+            (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
+
+   /* SNORM8 */
+   dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
+            (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
+            (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
+            (int8_t) util_iround(rgba[3] * 127.0f) << 24;
+
+   /* clamp to [0.0f, 1.0f] */
+   rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
+   rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
+   rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
+   rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
+
+   /* UNORM8 */
+   dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
+           (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
+           (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
+           (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
+
+   /* UNORM16 */
+   dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
+           (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
+   dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
+           (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
+
+   STATIC_ASSERT(ARRAY_SIZE(border->color) >= 12);
+   memcpy(border->color, dw, sizeof(dw));
+
+   return true;
+}
+
+static bool
+sampler_border_set_gen7_SAMPLER_BORDER_COLOR_STATE(struct ilo_state_sampler_border *border,
+                                                   const struct ilo_dev *dev,
+                                                   const struct ilo_state_sampler_border_info *info)
+{
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 116:
+    *
+    *     "In DX10/OGL mode, the format of the border color is
+    *      R32G32B32A32_FLOAT, regardless of the surface format chosen."
+    *
+    * From the Haswell PRM, volume 2d, page 240:
+    *
+    *     "So, SW will have to program the table in SAMPLER_BORDER_COLOR_STATE
+    *      at offsets DWORD16 to 19, as per the integer surface format type."
+    *
+    * From the Broadwell PRM, volume 2d, page 297:
+    *
+    *     "DX10/OGL mode: the format of the border color depends on the format
+    *      of the surface being sampled. If the map format is UINT, then the
+    *      border color format is R32G32B32A32_UINT. If the map format is
+    *      SINT, then the border color format is R32G32B32A32_SINT. Otherwise,
+    *      the border color format is R32G32B32A32_FLOAT."
+    *
+    * XXX every Gen is different
+    */
+
+   STATIC_ASSERT(ARRAY_SIZE(border->color) >= 4);
+   memcpy(border->color, info->rgba.f, sizeof(info->rgba.f));
+
+   return true;
+}
+
+bool
+ilo_state_sampler_init(struct ilo_state_sampler *sampler,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_sampler_info *info)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(sampler, sizeof(*sampler)));
+
+   ret &= sampler_set_gen6_SAMPLER_STATE(sampler, dev, info);
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_sampler_init_disabled(struct ilo_state_sampler *sampler,
+                                const struct ilo_dev *dev)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(ilo_is_zeroed(sampler, sizeof(*sampler)));
+
+   sampler->sampler[0] = GEN6_SAMPLER_DW0_DISABLE;
+   sampler->sampler[1] = 0;
+   sampler->sampler[2] = 0;
+
+   return true;
+}
+
+/**
+ * Modify \p sampler to work with \p surf.  There will be loss of information.
+ * Callers should make a copy of the orignal sampler first.
+ */
+bool
+ilo_state_sampler_set_surface(struct ilo_state_sampler *sampler,
+                              const struct ilo_dev *dev,
+                              const struct ilo_state_surface *surf)
+{
+   uint32_t addr_ctrl;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (sampler->non_normalized) {
+      /* see sampler_validate_gen6_non_normalized() */
+      assert(surf->type == GEN6_SURFTYPE_2D ||
+             surf->type == GEN6_SURFTYPE_3D);
+      assert(!surf->min_lod && !surf->mip_count);
+   }
+
+   if (sampler->base_to_surf_min_lod) {
+      const uint8_t base = surf->min_lod << GEN6_SAMPLER_DW0_BASE_LOD__RADIX;
+
+      sampler->sampler[0] =
+         (sampler->sampler[0] & ~GEN6_SAMPLER_DW0_BASE_LOD__MASK) |
+         base << GEN6_SAMPLER_DW0_BASE_LOD__SHIFT;
+   }
+
+   if (surf->is_integer || surf->type == GEN6_SURFTYPE_3D) {
+      const uint32_t mask = (GEN6_SAMPLER_DW0_MIP_FILTER__MASK |
+                             GEN6_SAMPLER_DW0_MIN_FILTER__MASK |
+                             GEN6_SAMPLER_DW0_MAG_FILTER__MASK);
+      const uint32_t filter = (surf->is_integer) ?
+         sampler->filter_integer : sampler->filter_3d;
+
+      assert((filter & mask) == filter);
+      sampler->sampler[0] = (sampler->sampler[0] & ~mask) |
+                            filter;
+   }
+
+   switch (surf->type) {
+   case GEN6_SURFTYPE_1D:
+      addr_ctrl = sampler->addr_ctrl_1d;
+      break;
+   case GEN6_SURFTYPE_2D:
+   case GEN6_SURFTYPE_3D:
+      addr_ctrl = sampler->addr_ctrl_2d_3d;
+      break;
+   case GEN6_SURFTYPE_CUBE:
+      addr_ctrl = sampler->addr_ctrl_cube;
+      break;
+   default:
+      assert(!"unexpected surface type");
+      addr_ctrl = 0;
+      break;
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      const uint32_t mask = (GEN7_SAMPLER_DW3_U_WRAP__MASK |
+                             GEN7_SAMPLER_DW3_V_WRAP__MASK |
+                             GEN7_SAMPLER_DW3_R_WRAP__MASK);
+
+      assert((addr_ctrl & mask) == addr_ctrl);
+      sampler->sampler[2] = (sampler->sampler[2] & ~mask) |
+                            addr_ctrl;
+   } else {
+      const uint32_t mask = (GEN6_SAMPLER_DW1_U_WRAP__MASK |
+                             GEN6_SAMPLER_DW1_V_WRAP__MASK |
+                             GEN6_SAMPLER_DW1_R_WRAP__MASK);
+
+      assert((addr_ctrl & mask) == addr_ctrl);
+      sampler->sampler[1] = (sampler->sampler[1] & ~mask) |
+                            addr_ctrl;
+   }
+
+   return true;
+}
+
+bool
+ilo_state_sampler_border_init(struct ilo_state_sampler_border *border,
+                              const struct ilo_dev *dev,
+                              const struct ilo_state_sampler_border_info *info)
+{
+   bool ret = true;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      ret &= sampler_border_set_gen7_SAMPLER_BORDER_COLOR_STATE(border,
+            dev, info);
+   } else {
+      ret &= sampler_border_set_gen6_SAMPLER_BORDER_COLOR_STATE(border,
+            dev, info);
+   }
+
+   assert(ret);
+
+   return ret;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_sampler.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_sampler.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_sampler.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_sampler.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,103 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_SAMPLER_H
+#define ILO_STATE_SAMPLER_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+struct ilo_state_surface;
+
+struct ilo_state_sampler_info {
+   bool non_normalized;
+
+   float lod_bias;
+   float min_lod;
+   float max_lod;
+
+   enum gen_mip_filter mip_filter;
+   enum gen_map_filter min_filter;
+   enum gen_map_filter mag_filter;
+   enum gen_aniso_ratio max_anisotropy;
+
+   enum gen_texcoord_mode tcx_ctrl;
+   enum gen_texcoord_mode tcy_ctrl;
+   enum gen_texcoord_mode tcz_ctrl;
+
+   enum gen_prefilter_op shadow_func;
+};
+
+struct ilo_state_sampler_border_info {
+   union {
+      float f[4];
+      uint32_t ui[4];
+   } rgba;
+
+   bool is_integer;
+};
+
+struct ilo_state_sampler {
+   uint32_t sampler[3];
+
+   uint32_t filter_integer;
+   uint32_t filter_3d;
+
+   uint32_t addr_ctrl_1d;
+   uint32_t addr_ctrl_2d_3d;
+   uint32_t addr_ctrl_cube;
+
+   bool non_normalized;
+   bool base_to_surf_min_lod;
+};
+
+struct ilo_state_sampler_border {
+   uint32_t color[12];
+};
+
+bool
+ilo_state_sampler_init(struct ilo_state_sampler *sampler,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_sampler_info *info);
+
+bool
+ilo_state_sampler_init_disabled(struct ilo_state_sampler *sampler,
+                                const struct ilo_dev *dev);
+
+bool
+ilo_state_sampler_set_surface(struct ilo_state_sampler *sampler,
+                              const struct ilo_dev *dev,
+                              const struct ilo_state_surface *surf);
+
+bool
+ilo_state_sampler_border_init(struct ilo_state_sampler_border *border,
+                              const struct ilo_dev *dev,
+                              const struct ilo_state_sampler_border_info *info);
+
+#endif /* ILO_STATE_SAMPLER_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_sbe.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_sbe.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_sbe.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_sbe.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,350 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_sbe.h"
+
+static bool
+sbe_validate_gen8(const struct ilo_dev *dev,
+                  const struct ilo_state_sbe_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(info->attr_count <= ILO_STATE_SBE_MAX_ATTR_COUNT);
+
+   assert(info->vue_read_base + info->vue_read_count <=
+         info->cv_vue_attr_count);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+    *
+    *     "(Vertex URB Entry Read Length)
+    *      Format: U5
+    *      Range [1,16]
+    *
+    *      Specifies the amount of URB data read for each Vertex URB entry, in
+    *      256-bit register increments.
+    *
+    *      Programming Notes
+    *      It is UNDEFINED to set this field to 0 indicating no Vertex URB
+    *      data to be read."
+    *
+    *     "(Vertex URB Entry Read Offset)
+    *      Format: U6
+    *      Range [0,63]
+    *
+    *      Specifies the offset (in 256-bit units) at which Vertex URB data is
+    *      to be read from the URB."
+    */
+   assert(info->vue_read_base % 2 == 0 && info->vue_read_base <= 126);
+   assert(info->vue_read_count <= 32);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 268:
+    *
+    *     "This field (Point Sprite Texture Coordinate Enable) must be
+    *      programmed to 0 when non-point primitives are rendered."
+    */
+   if (ilo_dev_gen(dev) < ILO_GEN(7.5) && info->point_sprite_enables)
+      assert(info->cv_is_point);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 246:
+    *
+    *     "(Number of SF Output Attributes) 33-48: Specifies 17-32 attributes
+    *      (# attributes = field value - 16). Swizzling performed on
+    *      Attributes 16-31 (as required) only. Attributes 0-15 passed through
+    *      unmodified.
+    *
+    *      Note :
+    *
+    *      Attribute n Component Override and Constant Source states apply to
+    *      Attributes 16-31 (as required) instead of Attributes 0-15. E.g.,
+    *      this allows an Attribute 16-31 component to be overridden with the
+    *      PrimitiveID value.
+    *
+    *      Attribute n WrapShortest Enables still apply to Attributes 0-15.
+    *
+    *      Attribute n Swizzle Select and Attribute n Source Attribute states
+    *      are ignored and none of the swizzling functions available through
+    *      these controls are performed."
+    *
+    * From the Sandy Bridge PRM, volume 2 part 1, page 247:
+    *
+    *     "This bit (Attribute Swizzle Enable) controls the use of the
+    *      Attribute n Swizzle Select and Attribute n Source Attribute fields
+    *      only. If ENABLED, those fields are used as described below. If
+    *      DISABLED, attributes are copied from their corresponding source
+    *      attributes, for the purposes of Swizzle Select only.
+    *
+    *      Note that the following fields are unaffected by this bit, and are
+    *      therefore always used to control their respective fields:
+    *      Attribute n Component Override X/Y/Z/W
+    *      Attribute n Constant Source
+    *      Attribute n WrapShortest Enables"
+    *
+    * From the Ivy Bridge PRM, volume 2 part 1, page 264:
+    *
+    *     "When Attribute Swizzle Enable is ENABLED, this bit (Attribute
+    *      Swizzle Control Mode) controls whether attributes 0-15 or 16-31 are
+    *      subject to the following swizzle controls:
+    *
+    *      - Attribute n Component Override X/Y/Z/W
+    *      - Attribute n Constant Source
+    *      - Attribute n Swizzle Select
+    *      - Attribute n Source Attribute
+    *      - Attribute n Wrap Shortest Enables"
+    *
+    *     "SWIZ_16_31... Only valid when 16 or more attributes are output."
+    */
+   assert(info->swizzle_count <= ILO_STATE_SBE_MAX_SWIZZLE_COUNT);
+   if (info->swizzle_16_31) {
+      assert(ilo_dev_gen(dev) >= ILO_GEN(7) &&
+             info->swizzle_enable &&
+             info->attr_count > 16);
+   }
+
+   return true;
+}
+
+static uint8_t
+sbe_get_gen8_min_read_count(const struct ilo_dev *dev,
+                            const struct ilo_state_sbe_info *info)
+{
+   uint8_t min_count = 0;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* minimum read count for non-swizzled attributes */
+   if (!info->swizzle_enable || info->swizzle_count < info->attr_count) {
+      if (info->swizzle_16_31 && info->swizzle_count + 16 == info->attr_count)
+         min_count = 16;
+      else
+         min_count = info->attr_count;
+   }
+
+   if (info->swizzle_enable) {
+      uint8_t i;
+
+      for (i = 0; i < info->swizzle_count; i++) {
+         const struct ilo_state_sbe_swizzle_info *swizzle =
+            &info->swizzles[i];
+         bool inputattr_facing;
+
+         switch (swizzle->attr_select) {
+         case GEN6_INPUTATTR_FACING:
+         case GEN6_INPUTATTR_FACING_W:
+            inputattr_facing = true;
+            break;
+         default:
+            inputattr_facing = false;
+            break;
+         }
+
+         if (min_count < swizzle->attr + inputattr_facing + 1)
+            min_count = swizzle->attr + inputattr_facing + 1;
+      }
+   }
+
+   return min_count;
+}
+
+static uint8_t
+sbe_get_gen8_read_length(const struct ilo_dev *dev,
+                         const struct ilo_state_sbe_info *info)
+{
+   uint8_t read_len;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+    *
+    *     "(Vertex URB Entry Read Length)
+    *      This field should be set to the minimum length required to read the
+    *      maximum source attribute. The maximum source attribute is indicated
+    *      by the maximum value of the enabled Attribute # Source Attribute if
+    *      Attribute Swizzle Enable is set, Number of Output Attributes -1 if
+    *      enable is not set.
+    *      read_length = ceiling((max_source_attr+1)/2)
+    *
+    *      [errata] Corruption/Hang possible if length programmed larger than
+    *      recommended"
+    */
+   if (info->has_min_read_count) {
+      read_len = info->vue_read_count;
+      assert(read_len == sbe_get_gen8_min_read_count(dev, info));
+   } else {
+      read_len = sbe_get_gen8_min_read_count(dev, info);
+      assert(read_len <= info->vue_read_count);
+   }
+
+   /*
+    * In pairs.  URB entries are aligned to 1024-bits or 512-bits.  There is
+    * no need to worry about reading past entries.
+    */
+   read_len = (read_len + 1) / 2;
+   if (!read_len)
+      read_len = 1;
+
+   return read_len;
+}
+
+static bool
+sbe_set_gen8_3DSTATE_SBE(struct ilo_state_sbe *sbe,
+                         const struct ilo_dev *dev,
+                         const struct ilo_state_sbe_info *info)
+{
+   uint8_t vue_read_offset, vue_read_len;
+   uint8_t attr_count;
+   uint32_t dw1, dw2, dw3;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!sbe_validate_gen8(dev, info))
+      return false;
+
+   vue_read_offset = info->vue_read_base / 2;
+   vue_read_len = sbe_get_gen8_read_length(dev, info);
+
+   attr_count = info->attr_count;
+   if (ilo_dev_gen(dev) == ILO_GEN(6) && info->swizzle_16_31)
+      attr_count += 16;
+
+   dw1 = attr_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT |
+         vue_read_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      dw1 |= GEN8_SBE_DW1_USE_URB_READ_LEN |
+             GEN8_SBE_DW1_USE_URB_READ_OFFSET |
+             vue_read_offset << GEN8_SBE_DW1_URB_READ_OFFSET__SHIFT;
+   } else {
+      dw1 |= vue_read_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7) && info->swizzle_16_31)
+      dw1 |= GEN7_SBE_DW1_ATTR_SWIZZLE_16_31;
+
+   if (info->swizzle_enable)
+      dw1 |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE;
+
+   dw1 |= (info->point_sprite_origin_lower_left) ?
+      GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT :
+      GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT;
+
+   dw2 = info->point_sprite_enables;
+   dw3 = info->const_interp_enables;
+
+   STATIC_ASSERT(ARRAY_SIZE(sbe->sbe) >= 3);
+   sbe->sbe[0] = dw1;
+   sbe->sbe[1] = dw2;
+   sbe->sbe[2] = dw3;
+
+   return true;
+}
+
+static bool
+sbe_set_gen8_3DSTATE_SBE_SWIZ(struct ilo_state_sbe *sbe,
+                              const struct ilo_dev *dev,
+                              const struct ilo_state_sbe_info *info)
+{
+   uint16_t swiz[ILO_STATE_SBE_MAX_SWIZZLE_COUNT];
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   for (i = 0; i < info->swizzle_count; i++) {
+      const struct ilo_state_sbe_swizzle_info *swizzle = &info->swizzles[i];
+
+      /* U5 */
+      assert(swizzle->attr < 32);
+      swiz[i] = swizzle->attr_select << GEN8_SBE_SWIZ_SWIZZLE_SELECT__SHIFT |
+                swizzle->attr << GEN8_SBE_SWIZ_SRC_ATTR__SHIFT;
+
+      if (swizzle->force_zeros) {
+         swiz[i] |= GEN8_SBE_SWIZ_OVERRIDE_W |
+                    GEN8_SBE_SWIZ_OVERRIDE_Z |
+                    GEN8_SBE_SWIZ_OVERRIDE_Y |
+                    GEN8_SBE_SWIZ_OVERRIDE_X |
+                    GEN8_SBE_SWIZ_CONST_0000;
+      }
+   }
+
+   for (; i < ARRAY_SIZE(swiz); i++) {
+      swiz[i] = GEN6_INPUTATTR_NORMAL << GEN8_SBE_SWIZ_SWIZZLE_SELECT__SHIFT |
+                i << GEN8_SBE_SWIZ_SRC_ATTR__SHIFT;
+   }
+
+   STATIC_ASSERT(sizeof(sbe->swiz) == sizeof(swiz));
+   memcpy(sbe->swiz, swiz, sizeof(swiz));
+
+   return true;
+}
+
+bool
+ilo_state_sbe_init(struct ilo_state_sbe *sbe,
+                   const struct ilo_dev *dev,
+                   const struct ilo_state_sbe_info *info)
+{
+   assert(ilo_is_zeroed(sbe, sizeof(*sbe)));
+   return ilo_state_sbe_set_info(sbe, dev, info);
+}
+
+bool
+ilo_state_sbe_init_for_rectlist(struct ilo_state_sbe *sbe,
+                                const struct ilo_dev *dev,
+                                uint8_t read_base,
+                                uint8_t read_count)
+{
+   struct ilo_state_sbe_info info;
+
+   memset(&info, 0, sizeof(info));
+   info.attr_count = read_count;
+   info.cv_vue_attr_count = read_base + read_count;
+   info.vue_read_base = read_base;
+   info.vue_read_count = read_count;
+   info.has_min_read_count = true;
+
+   return ilo_state_sbe_set_info(sbe, dev, &info);
+}
+
+bool
+ilo_state_sbe_set_info(struct ilo_state_sbe *sbe,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_sbe_info *info)
+{
+   bool ret = true;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   ret &= sbe_set_gen8_3DSTATE_SBE(sbe, dev, info);
+   ret &= sbe_set_gen8_3DSTATE_SBE_SWIZ(sbe, dev, info);
+
+   assert(ret);
+
+   return true;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_sbe.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_sbe.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_sbe.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_sbe.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,103 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_SBE_H
+#define ILO_STATE_SBE_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 264:
+ *
+ *     "Number of SF Output Attributes sets the number of attributes that will
+ *      be output from the SF stage, not including position. This can be used
+ *      to specify up to 32, and may differ from the number of input
+ *      attributes."
+ *
+ *     "The first or last set of 16 attributes can be swizzled according to
+ *      certain state fields."
+ */
+#define ILO_STATE_SBE_MAX_ATTR_COUNT 32
+#define ILO_STATE_SBE_MAX_SWIZZLE_COUNT 16
+
+struct ilo_state_sbe_swizzle_info {
+   /* select an attribute from read ones */
+   enum gen_inputattr_select attr_select;
+   uint8_t attr;
+
+   bool force_zeros;
+};
+
+struct ilo_state_sbe_info {
+   uint8_t attr_count;
+
+   /* which VUE attributes to read */
+   uint8_t cv_vue_attr_count;
+   uint8_t vue_read_base;
+   uint8_t vue_read_count;
+   bool has_min_read_count;
+
+   bool cv_is_point;
+   bool point_sprite_origin_lower_left;
+   /* force sprite coordinates to the four corner vertices of the point */
+   uint32_t point_sprite_enables;
+
+   /* force attr at the provoking vertex to a0 and zero to a1/a2 */
+   uint32_t const_interp_enables;
+
+   bool swizzle_enable;
+   /* swizzle attribute 16 to 31 instead; Gen7+ only */
+   bool swizzle_16_31;
+   uint8_t swizzle_count;
+   const struct ilo_state_sbe_swizzle_info *swizzles;
+};
+
+struct ilo_state_sbe {
+   uint32_t sbe[3];
+   uint32_t swiz[8];
+};
+
+bool
+ilo_state_sbe_init(struct ilo_state_sbe *sbe,
+                   const struct ilo_dev *dev,
+                   const struct ilo_state_sbe_info *info);
+
+bool
+ilo_state_sbe_init_for_rectlist(struct ilo_state_sbe *sbe,
+                                const struct ilo_dev *dev,
+                                uint8_t read_base,
+                                uint8_t read_count);
+
+bool
+ilo_state_sbe_set_info(struct ilo_state_sbe *sbe,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_sbe_info *info);
+
+#endif /* ILO_STATE_SBE_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_shader.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_shader.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_shader.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_shader.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,737 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_shader.h"
+
+enum vertex_stage {
+   STAGE_VS,
+   STAGE_HS,
+   STAGE_DS,
+   STAGE_GS,
+};
+
+struct vertex_ff {
+   uint8_t grf_start;
+   uint8_t scratch_space;
+
+   uint8_t sampler_count;
+   uint8_t surface_count;
+   bool has_uav;
+
+   uint8_t vue_read_offset;
+   uint8_t vue_read_len;
+
+   uint8_t user_clip_enables;
+};
+
+static bool
+vertex_validate_gen6_kernel(const struct ilo_dev *dev,
+                            enum vertex_stage stage,
+                            const struct ilo_state_shader_kernel_info *kernel)
+{
+   /*
+    * "Dispatch GRF Start Register for URB Data" is U4 for GS and U5 for
+    * others.
+    */
+   const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32;
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 134:
+    *
+    *     "(Per-Thread Scratch Space)
+    *      Range    [0,11] indicating [1K Bytes, 2M Bytes]"
+    */
+   const uint32_t max_scratch_size = 2 * 1024 * 1024;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* we do not want to save it */
+   assert(!kernel->offset);
+
+   assert(kernel->grf_start < max_grf_start);
+   assert(kernel->scratch_size <= max_scratch_size);
+
+   return true;
+}
+
+static bool
+vertex_validate_gen6_urb(const struct ilo_dev *dev,
+                         enum vertex_stage stage,
+                         const struct ilo_state_shader_urb_info *urb)
+{
+   /* "Vertex/Patch URB Entry Read Offset" is U6, in pairs */
+   const uint8_t max_read_base = 63 * 2;
+   /*
+    * "Vertex/Patch URB Entry Read Length" is limited to 64 for DS and U6 for
+    * others, in pairs
+    */
+   const uint8_t max_read_count = ((stage == STAGE_DS) ? 64 : 63) * 2;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(urb->read_base + urb->read_count <= urb->cv_input_attr_count);
+
+   assert(urb->read_base % 2 == 0 && urb->read_base <= max_read_base);
+
+   /*
+    * There is no need to worry about reading past entries, as URB entries are
+    * aligned to 1024-bits (Gen6) or 512-bits (Gen7+).
+    */
+   assert(urb->read_count <= max_read_count);
+
+   return true;
+}
+
+static bool
+vertex_get_gen6_ff(const struct ilo_dev *dev,
+                   enum vertex_stage stage,
+                   const struct ilo_state_shader_kernel_info *kernel,
+                   const struct ilo_state_shader_resource_info *resource,
+                   const struct ilo_state_shader_urb_info *urb,
+                   struct vertex_ff *ff)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!vertex_validate_gen6_kernel(dev, stage, kernel) ||
+       !vertex_validate_gen6_urb(dev, stage, urb))
+      return false;
+
+   ff->grf_start = kernel->grf_start;
+   /* next power of two, starting from 1KB */
+   ff->scratch_space = (kernel->scratch_size > 1024) ?
+      (util_last_bit(kernel->scratch_size - 1) - 10): 0;
+
+   ff->sampler_count = (resource->sampler_count <= 12) ?
+      (resource->sampler_count + 3) / 4 : 4;
+   ff->surface_count = resource->surface_count;
+   ff->has_uav = resource->has_uav;
+
+   ff->vue_read_offset = urb->read_base / 2;
+   ff->vue_read_len = (urb->read_count + 1) / 2;
+
+   /* need to read something unless VUE handles are included */
+   switch (stage) {
+   case STAGE_VS:
+      if (!ff->vue_read_len)
+         ff->vue_read_len = 1;
+
+      /* one GRF per attribute */
+      assert(kernel->grf_start + urb->read_count * 2 <= 128);
+      break;
+   case STAGE_GS:
+      if (ilo_dev_gen(dev) == ILO_GEN(6) && !ff->vue_read_len)
+         ff->vue_read_len = 1;
+      break;
+   default:
+      break;
+   }
+
+   ff->user_clip_enables = urb->user_clip_enables;
+
+   return true;
+}
+
+static uint16_t
+vs_get_gen6_thread_count(const struct ilo_dev *dev,
+                         const struct ilo_state_vs_info *info)
+{
+   uint16_t thread_count;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* Maximum Number of Threads of 3DSTATE_VS */
+   switch (ilo_dev_gen(dev)) {
+   case ILO_GEN(8):
+      thread_count = 504;
+      break;
+   case ILO_GEN(7.5):
+      thread_count = (dev->gt >= 2) ? 280 : 70;
+      break;
+   case ILO_GEN(7):
+   case ILO_GEN(6):
+   default:
+      thread_count = dev->thread_count;
+      break;
+   }
+
+   return thread_count - 1;
+}
+
+static bool
+vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_vs_info *info)
+{
+   struct vertex_ff ff;
+   uint16_t thread_count;
+   uint32_t dw2, dw3, dw4, dw5;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel,
+            &info->resource, &info->urb, &ff))
+      return false;
+
+   thread_count = vs_get_gen6_thread_count(dev, info);
+
+   dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+         ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+   if (false)
+      dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
+      dw2 |= GEN75_THREADDISP_ACCESS_UAV;
+
+   dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+   dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
+         ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
+         ff.vue_read_offset << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
+
+   dw5 = 0;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+      dw5 |= thread_count << GEN75_VS_DW5_MAX_THREADS__SHIFT;
+   else
+      dw5 |= thread_count << GEN6_VS_DW5_MAX_THREADS__SHIFT;
+
+   if (info->stats_enable)
+      dw5 |= GEN6_VS_DW5_STATISTICS;
+   if (info->dispatch_enable)
+      dw5 |= GEN6_VS_DW5_VS_ENABLE;
+
+   STATIC_ASSERT(ARRAY_SIZE(vs->vs) >= 5);
+   vs->vs[0] = dw2;
+   vs->vs[1] = dw3;
+   vs->vs[2] = dw4;
+   vs->vs[3] = dw5;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8))
+      vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
+
+   return true;
+}
+
+static uint16_t
+hs_get_gen7_thread_count(const struct ilo_dev *dev,
+                         const struct ilo_state_hs_info *info)
+{
+   uint16_t thread_count;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   /* Maximum Number of Threads of 3DSTATE_HS */
+   switch (ilo_dev_gen(dev)) {
+   case ILO_GEN(8):
+      thread_count = 504;
+      break;
+   case ILO_GEN(7.5):
+      thread_count = (dev->gt >= 2) ? 256 : 70;
+      break;
+   case ILO_GEN(7):
+   default:
+      thread_count = dev->thread_count;
+      break;
+   }
+
+   return thread_count - 1;
+}
+
+static bool
+hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_hs_info *info)
+{
+   struct vertex_ff ff;
+   uint16_t thread_count;
+   uint32_t dw1, dw2, dw4, dw5;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel,
+            &info->resource, &info->urb, &ff))
+      return false;
+
+   thread_count = hs_get_gen7_thread_count(dev, info);
+
+   dw1 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+         ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+      dw1 |= thread_count << GEN75_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
+   else
+      dw1 |= thread_count << GEN7_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
+
+   dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT;
+
+   if (info->dispatch_enable)
+      dw2 |= GEN7_HS_DW2_HS_ENABLE;
+   if (info->stats_enable)
+      dw2 |= GEN7_HS_DW2_STATISTICS;
+
+   dw4 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+   dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES |
+         ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT |
+         ff.vue_read_len << GEN7_HS_DW5_URB_READ_LEN__SHIFT |
+         ff.vue_read_offset << GEN7_HS_DW5_URB_READ_OFFSET__SHIFT;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
+      dw5 |= GEN75_HS_DW5_ACCESS_UAV;
+
+   STATIC_ASSERT(ARRAY_SIZE(hs->hs) >= 4);
+   hs->hs[0] = dw1;
+   hs->hs[1] = dw2;
+   hs->hs[2] = dw4;
+   hs->hs[3] = dw5;
+
+   return true;
+}
+
+static bool
+ds_set_gen7_3DSTATE_TE(struct ilo_state_ds *ds,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_ds_info *info)
+{
+   uint32_t dw1;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   dw1 = 0;
+
+   if (info->dispatch_enable) {
+      dw1 |= GEN7_TE_DW1_MODE_HW |
+             GEN7_TE_DW1_TE_ENABLE;
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(ds->te) >= 3);
+   ds->te[0] = dw1;
+   ds->te[1] = fui(63.0f);
+   ds->te[2] = fui(64.0f);
+
+   return true;
+}
+
+static uint16_t
+ds_get_gen7_thread_count(const struct ilo_dev *dev,
+                         const struct ilo_state_ds_info *info)
+{
+   uint16_t thread_count;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   /* Maximum Number of Threads of 3DSTATE_DS */
+   switch (ilo_dev_gen(dev)) {
+   case ILO_GEN(8):
+      thread_count = 504;
+      break;
+   case ILO_GEN(7.5):
+      thread_count = (dev->gt >= 2) ? 280 : 70;
+      break;
+   case ILO_GEN(7):
+   default:
+      thread_count = dev->thread_count;
+      break;
+   }
+
+   return thread_count - 1;
+}
+
+static bool
+ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_ds_info *info)
+{
+   struct vertex_ff ff;
+   uint16_t thread_count;
+   uint32_t dw2, dw3, dw4, dw5;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel,
+            &info->resource, &info->urb, &ff))
+      return false;
+
+   thread_count = ds_get_gen7_thread_count(dev, info);
+
+   dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+         ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
+      dw2 |= GEN75_THREADDISP_ACCESS_UAV;
+
+   dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+   dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT |
+         ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT |
+         ff.vue_read_offset << GEN7_DS_DW4_URB_READ_OFFSET__SHIFT;
+
+   dw5 = 0;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+      dw5 |= thread_count << GEN75_DS_DW5_MAX_THREADS__SHIFT;
+   else
+      dw5 |= thread_count << GEN7_DS_DW5_MAX_THREADS__SHIFT;
+
+   if (info->stats_enable)
+      dw5 |= GEN7_DS_DW5_STATISTICS;
+   if (info->dispatch_enable)
+      dw5 |= GEN7_DS_DW5_DS_ENABLE;
+
+   STATIC_ASSERT(ARRAY_SIZE(ds->ds) >= 5);
+   ds->ds[0] = dw2;
+   ds->ds[1] = dw3;
+   ds->ds[2] = dw4;
+   ds->ds[3] = dw5;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8))
+      ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT;
+
+   return true;
+}
+
+static bool
+gs_get_gen6_ff(const struct ilo_dev *dev,
+               const struct ilo_state_gs_info *info,
+               struct vertex_ff *ff)
+{
+   const struct ilo_state_shader_urb_info *urb = &info->urb;
+   const struct ilo_state_gs_sol_info *sol = &info->sol;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel,
+            &info->resource, &info->urb, ff))
+      return false;
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 168-169:
+    *
+    *     "[0,62] indicating [1,63] 16B units"
+    *
+    *     "Programming Restrictions: The vertex size must be programmed as a
+    *      multiple of 32B units with the following exception: Rendering is
+    *      disabled (as per SOL stage state) and the vertex size output by the
+    *      GS thread is 16B.
+    *
+    *      If rendering is enabled (as per SOL state) the vertex size must be
+    *      programmed as a multiple of 32B units. In other words, the only
+    *      time software can program a vertex size with an odd number of 16B
+    *      units is when rendering is disabled."
+    */
+   assert(urb->output_attr_count <= 63);
+   if (!sol->render_disable)
+      assert(urb->output_attr_count % 2 == 0);
+
+   return true;
+}
+
+static uint16_t
+gs_get_gen6_thread_count(const struct ilo_dev *dev,
+                         const struct ilo_state_gs_info *info)
+{
+   const struct ilo_state_gs_sol_info *sol = &info->sol;
+   uint16_t thread_count;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* Maximum Number of Threads of 3DSTATE_GS */
+   switch (ilo_dev_gen(dev)) {
+   case ILO_GEN(8):
+      thread_count = 504;
+      break;
+   case ILO_GEN(7.5):
+      thread_count = (dev->gt >= 2) ? 256 : 70;
+      break;
+   case ILO_GEN(7):
+   case ILO_GEN(6):
+   default:
+      thread_count = dev->thread_count;
+
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 154:
+       *
+       *     "Maximum Number of Threads valid range is [0,27] when Rendering
+       *      Enabled bit is set."
+       *
+       * According to the classic driver, [0, 20] for GT1.
+       */
+      if (!sol->render_disable)
+         thread_count = (dev->gt == 2) ? 27 : 20;
+      break;
+   }
+
+   return thread_count - 1;
+}
+
+static bool
+gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_gs_info *info)
+{
+   const struct ilo_state_gs_sol_info *sol = &info->sol;
+   struct vertex_ff ff;
+   uint16_t thread_count;
+   uint32_t dw2, dw3, dw4, dw5, dw6;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   if (!gs_get_gen6_ff(dev, info, &ff))
+      return false;
+
+   thread_count = gs_get_gen6_thread_count(dev, info);
+
+   dw2 = GEN6_THREADDISP_SPF |
+         ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+         ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+   dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+   dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
+         ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
+         ff.grf_start << GEN6_GS_DW4_URB_GRF_START__SHIFT;
+
+   dw5 = thread_count << GEN6_GS_DW5_MAX_THREADS__SHIFT;
+
+   if (info->stats_enable)
+      dw5 |= GEN6_GS_DW5_STATISTICS;
+   if (sol->stats_enable)
+      dw5 |= GEN6_GS_DW5_SO_STATISTICS;
+   if (!sol->render_disable)
+      dw5 |= GEN6_GS_DW5_RENDER_ENABLE;
+
+   dw6 = 0;
+
+   /* GEN7_REORDER_TRAILING is handled by the kernel */
+   if (sol->tristrip_reorder == GEN7_REORDER_LEADING)
+      dw6 |= GEN6_GS_DW6_REORDER_LEADING_ENABLE;
+
+   if (sol->sol_enable) {
+      dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
+
+      if (sol->svbi_post_inc) {
+         dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
+                sol->svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
+      }
+   }
+
+   if (info->dispatch_enable)
+      dw6 |= GEN6_GS_DW6_GS_ENABLE;
+
+   STATIC_ASSERT(ARRAY_SIZE(gs->gs) >= 5);
+   gs->gs[0] = dw2;
+   gs->gs[1] = dw3;
+   gs->gs[2] = dw4;
+   gs->gs[3] = dw5;
+   gs->gs[4] = dw6;
+
+   return true;
+}
+
+static uint8_t
+gs_get_gen7_vertex_size(const struct ilo_dev *dev,
+                        const struct ilo_state_gs_info *info)
+{
+   const struct ilo_state_shader_urb_info *urb = &info->urb;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   return (urb->output_attr_count) ? urb->output_attr_count - 1 : 0;
+}
+
+static bool
+gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_gs_info *info)
+{
+   struct vertex_ff ff;
+   uint16_t thread_count;
+   uint8_t vertex_size;
+   uint32_t dw2, dw3, dw4, dw5;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   if (!gs_get_gen6_ff(dev, info, &ff))
+      return false;
+
+   thread_count = gs_get_gen6_thread_count(dev, info);
+   vertex_size = gs_get_gen7_vertex_size(dev, info);
+
+   dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+         ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
+      dw2 |= GEN75_THREADDISP_ACCESS_UAV;
+
+   dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+   dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT |
+         0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT |
+         ff.vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
+         GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
+         ff.vue_read_offset << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
+         ff.grf_start << GEN7_GS_DW4_URB_GRF_START__SHIFT;
+
+   dw5 = 0;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+      dw5 = thread_count << GEN75_GS_DW5_MAX_THREADS__SHIFT;
+   else
+      dw5 = thread_count << GEN7_GS_DW5_MAX_THREADS__SHIFT;
+
+   if (info->stats_enable)
+      dw5 |= GEN7_GS_DW5_STATISTICS;
+   if (info->dispatch_enable)
+      dw5 |= GEN7_GS_DW5_GS_ENABLE;
+
+   STATIC_ASSERT(ARRAY_SIZE(gs->gs) >= 5);
+   gs->gs[0] = dw2;
+   gs->gs[1] = dw3;
+   gs->gs[2] = dw4;
+   gs->gs[3] = dw5;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8))
+      gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT;
+
+   return true;
+}
+
+bool
+ilo_state_vs_init(struct ilo_state_vs *vs,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_vs_info *info)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(vs, sizeof(*vs)));
+
+   ret &= vs_set_gen6_3DSTATE_VS(vs, dev, info);
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
+                           const struct ilo_dev *dev)
+{
+   struct ilo_state_vs_info info;
+
+   memset(&info, 0, sizeof(info));
+
+   return ilo_state_vs_init(vs, dev, &info);
+}
+
+bool
+ilo_state_hs_init(struct ilo_state_hs *hs,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_hs_info *info)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(hs, sizeof(*hs)));
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7))
+      ret &= hs_set_gen7_3DSTATE_HS(hs, dev, info);
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_hs_init_disabled(struct ilo_state_hs *hs,
+                           const struct ilo_dev *dev)
+{
+   struct ilo_state_hs_info info;
+
+   memset(&info, 0, sizeof(info));
+
+   return ilo_state_hs_init(hs, dev, &info);
+}
+
+bool
+ilo_state_ds_init(struct ilo_state_ds *ds,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_ds_info *info)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(ds, sizeof(*ds)));
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      ret &= ds_set_gen7_3DSTATE_TE(ds, dev, info);
+      ret &= ds_set_gen7_3DSTATE_DS(ds, dev, info);
+   }
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
+                           const struct ilo_dev *dev)
+{
+   struct ilo_state_ds_info info;
+
+   memset(&info, 0, sizeof(info));
+
+   return ilo_state_ds_init(ds, dev, &info);
+}
+
+bool
+ilo_state_gs_init(struct ilo_state_gs *gs,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_gs_info *info)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(gs, sizeof(*gs)));
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7))
+      ret &= gs_set_gen7_3DSTATE_GS(gs, dev, info);
+   else
+      ret &= gs_set_gen6_3DSTATE_GS(gs, dev, info);
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
+                           const struct ilo_dev *dev)
+{
+   struct ilo_state_gs_info info;
+
+   memset(&info, 0, sizeof(info));
+
+   return ilo_state_gs_init(gs, dev, &info);
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_shader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_shader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_shader.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_shader.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,256 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_SHADER_H
+#define ILO_STATE_SHADER_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/**
+ * Kernel information.
+ */
+struct ilo_state_shader_kernel_info {
+   /* usually 0 unless the shader has multiple kernels */
+   uint32_t offset;
+
+   uint8_t grf_start;
+   uint8_t pcb_attr_count;
+
+   uint32_t scratch_size;
+};
+
+/**
+ * Shader resources.
+ */
+struct ilo_state_shader_resource_info {
+   /* for prefetches */
+   uint8_t sampler_count;
+   uint8_t surface_count;
+
+   bool has_uav;
+};
+
+/**
+ * URB inputs/outputs.
+ */
+struct ilo_state_shader_urb_info {
+   uint8_t cv_input_attr_count;
+
+   uint8_t read_base;
+   uint8_t read_count;
+
+   uint8_t output_attr_count;
+
+   uint8_t user_cull_enables;
+   uint8_t user_clip_enables;
+};
+
+struct ilo_state_vs_info {
+   struct ilo_state_shader_kernel_info kernel;
+   struct ilo_state_shader_resource_info resource;
+   struct ilo_state_shader_urb_info urb;
+
+   bool dispatch_enable;
+   bool stats_enable;
+};
+
+struct ilo_state_hs_info {
+   struct ilo_state_shader_kernel_info kernel;
+   struct ilo_state_shader_resource_info resource;
+   struct ilo_state_shader_urb_info urb;
+
+   bool dispatch_enable;
+   bool stats_enable;
+};
+
+struct ilo_state_ds_info {
+   struct ilo_state_shader_kernel_info kernel;
+   struct ilo_state_shader_resource_info resource;
+   struct ilo_state_shader_urb_info urb;
+
+   bool dispatch_enable;
+   bool stats_enable;
+};
+
+/**
+ * Stream output.  Must be consistent with ilo_state_sol_info.
+ */
+struct ilo_state_gs_sol_info {
+   bool sol_enable;
+   bool stats_enable;
+   bool render_disable;
+
+   uint16_t svbi_post_inc;
+
+   enum gen_reorder_mode tristrip_reorder;
+};
+
+struct ilo_state_gs_info {
+   struct ilo_state_shader_kernel_info kernel;
+   struct ilo_state_shader_resource_info resource;
+   struct ilo_state_shader_urb_info urb;
+
+   struct ilo_state_gs_sol_info sol;
+
+   bool dispatch_enable;
+   bool stats_enable;
+};
+
+struct ilo_state_ps_io_info {
+   /* inputs */
+   enum gen_position_offset posoffset;
+   uint8_t attr_count;
+   bool use_z;
+   bool use_w;
+   bool use_coverage_mask;
+
+   /* outputs */
+   enum gen_pscdepth_mode pscdepth;
+   bool has_rt_write;
+   bool write_pixel_mask;
+   bool write_omask;
+};
+
+struct ilo_state_ps_params_info {
+   /* compatibility with raster states */
+   uint32_t sample_mask;
+   bool earlyz_control_psexec;
+
+   /* compatibility with cc states */
+   bool alpha_may_kill;
+   bool dual_source_blending;
+   bool has_writeable_rt;
+};
+
+struct ilo_state_ps_info {
+   struct ilo_state_shader_kernel_info kernel_8;
+   struct ilo_state_shader_kernel_info kernel_16;
+   struct ilo_state_shader_kernel_info kernel_32;
+   struct ilo_state_shader_resource_info resource;
+
+   struct ilo_state_ps_io_info io;
+   struct ilo_state_ps_params_info params;
+
+   /* bitmask of GEN6_PS_DISPATCH_x */
+   uint8_t valid_kernels;
+   bool per_sample_dispatch;
+   bool sample_count_one;
+   bool cv_per_sample_interp;
+   bool cv_has_earlyz_op;
+
+   bool rt_clear_enable;
+   bool rt_resolve_enable;
+
+   bool cv_has_depth_buffer;
+};
+
+struct ilo_state_vs {
+   uint32_t vs[5];
+};
+
+struct ilo_state_hs {
+   uint32_t hs[4];
+};
+
+struct ilo_state_ds {
+   uint32_t te[3];
+   uint32_t ds[5];
+};
+
+struct ilo_state_gs {
+   uint32_t gs[5];
+};
+
+struct ilo_state_ps {
+   uint32_t ps[8];
+
+   struct ilo_state_ps_dispatch_conds {
+      bool ps_valid;
+
+      bool has_rt_write;
+      bool write_odepth;
+      bool write_ostencil;
+      bool has_uav_write;
+      bool ps_may_kill;
+   } conds;
+};
+
+bool
+ilo_state_vs_init(struct ilo_state_vs *vs,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_vs_info *info);
+
+bool
+ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
+                           const struct ilo_dev *dev);
+
+bool
+ilo_state_hs_init(struct ilo_state_hs *hs,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_hs_info *info);
+
+bool
+ilo_state_hs_init_disabled(struct ilo_state_hs *hs,
+                           const struct ilo_dev *dev);
+
+
+bool
+ilo_state_ds_init(struct ilo_state_ds *ds,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_ds_info *info);
+
+bool
+ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
+                           const struct ilo_dev *dev);
+
+bool
+ilo_state_gs_init(struct ilo_state_gs *gs,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_gs_info *info);
+
+bool
+ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
+                           const struct ilo_dev *dev);
+
+bool
+ilo_state_ps_init(struct ilo_state_ps *ps,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_ps_info *info);
+
+bool
+ilo_state_ps_init_disabled(struct ilo_state_ps *ps,
+                           const struct ilo_dev *dev);
+
+bool
+ilo_state_ps_set_params(struct ilo_state_ps *ps,
+                        const struct ilo_dev *dev,
+                        const struct ilo_state_ps_params_info *params);
+
+#endif /* ILO_STATE_SHADER_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,771 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_shader.h"
+
+struct pixel_ff {
+   uint8_t dispatch_modes;
+
+   uint32_t kernel_offsets[3];
+   uint8_t grf_starts[3];
+   bool pcb_enable;
+   uint8_t scratch_space;
+
+   uint8_t sampler_count;
+   uint8_t surface_count;
+   bool has_uav;
+
+   uint16_t thread_count;
+
+   struct ilo_state_ps_dispatch_conds conds;
+
+   bool kill_pixel;
+   bool dispatch_enable;
+   bool dual_source_blending;
+   uint32_t sample_mask;
+};
+
+static bool
+ps_kernel_validate_gen6(const struct ilo_dev *dev,
+                        const struct ilo_state_shader_kernel_info *kernel)
+{
+   /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
+   const uint8_t max_grf_start = 128;
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 271:
+    *
+    *     "(Per-Thread Scratch Space)
+    *      Range  [0,11] indicating [1k bytes, 2M bytes] in powers of two"
+    */
+   const uint32_t max_scratch_size = 2 * 1024 * 1024;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* "Kernel Start Pointer" is 64-byte aligned */
+   assert(kernel->offset % 64 == 0);
+
+   assert(kernel->grf_start < max_grf_start);
+   assert(kernel->scratch_size <= max_scratch_size);
+
+   return true;
+}
+
+static bool
+ps_validate_gen6(const struct ilo_dev *dev,
+                 const struct ilo_state_ps_info *info)
+{
+   const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
+   const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
+   const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
+   const struct ilo_state_ps_io_info *io = &info->io;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!ps_kernel_validate_gen6(dev, kernel_8) ||
+       !ps_kernel_validate_gen6(dev, kernel_16) ||
+       !ps_kernel_validate_gen6(dev, kernel_32))
+      return false;
+
+   /* unsupported on Gen6 */
+   if (ilo_dev_gen(dev) == ILO_GEN(6))
+      assert(!io->use_coverage_mask);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
+    *
+    *     "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
+    *      field must be set to disabled."
+    */
+   if (ilo_dev_gen(dev) == ILO_GEN(6) && io->pscdepth != GEN7_PSCDEPTH_OFF)
+      assert(info->cv_has_depth_buffer);
+
+   if (!info->per_sample_dispatch) {
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 281:
+       *
+       *     "MSDISPMODE_PERSAMPLE is required in order to select
+       *      POSOFFSET_SAMPLE."
+       */
+      assert(io->posoffset != GEN6_POSOFFSET_SAMPLE);
+
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 282:
+       *
+       *     "MSDISPMODE_PERSAMPLE is required in order to select
+       *      INTERP_SAMPLE."
+       *
+       * From the Sandy Bridge PRM, volume 2 part 1, page 283:
+       *
+       *     "MSDISPMODE_PERSAMPLE is required in order to select Perspective
+       *      Sample or Non-perspective Sample barycentric coordinates."
+       */
+      assert(!info->cv_per_sample_interp);
+   }
+
+   /*
+    *
+    * From the Sandy Bridge PRM, volume 2 part 1, page 314:
+    *
+    *     "Pixel Shader Dispatch, Alpha... must all be disabled."
+    *
+    * Simply disallow any valid kernel when there is early-z op.  Also, when
+    * there is no valid kernel, io should be zeroed.
+    */
+   if (info->valid_kernels)
+      assert(!info->cv_has_earlyz_op);
+   else
+      assert(ilo_is_zeroed(io, sizeof(*io)));
+
+   return true;
+}
+
+static uint8_t
+ps_get_gen6_dispatch_modes(const struct ilo_dev *dev,
+                           const struct ilo_state_ps_info *info)
+{
+   const struct ilo_state_ps_io_info *io = &info->io;
+   uint8_t dispatch_modes = info->valid_kernels;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!dispatch_modes)
+      return 0;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 334:
+    *
+    *     "Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader
+    *      computed depth."
+    *
+    *     "Valid on all products, except when in non-1x PERSAMPLE mode
+    *      (applies to [DevSNB+] only)"
+    *
+    * From the Sandy Bridge PRM, volume 4 part 1, page 239:
+    *
+    *     "[DevSNB]: When Pixel Shader outputs oDepth and PS invocation mode
+    *      is PERPIXEL, Message Type for Render Target Write must be SIMD8.
+    *
+    *      Errata: [DevSNB+]: When Pixel Shader outputs oMask, this message
+    *      type is not supported: SIMD8 (including SIMD8_DUALSRC_xx)."
+    *
+    * It is really hard to follow what combinations are valid on what
+    * platforms.  Judging from the restrictions on RT write messages on Gen6,
+    * oDepth and oMask related issues should be Gen6-specific.  PERSAMPLE
+    * issue should be universal, and disallows multiple dispatch modes.
+    */
+   if (ilo_dev_gen(dev) == ILO_GEN(6)) {
+      if (io->pscdepth != GEN7_PSCDEPTH_OFF && !info->per_sample_dispatch)
+         dispatch_modes &= GEN6_PS_DISPATCH_8;
+      if (io->write_omask)
+         dispatch_modes &= ~GEN6_PS_DISPATCH_8;
+   }
+   if (info->per_sample_dispatch && !info->sample_count_one) {
+      /* prefer 32 over 16 over 8 */
+      if (dispatch_modes & GEN6_PS_DISPATCH_32)
+         dispatch_modes &= GEN6_PS_DISPATCH_32;
+      else if (dispatch_modes & GEN6_PS_DISPATCH_16)
+         dispatch_modes &= GEN6_PS_DISPATCH_16;
+      else
+         dispatch_modes &= GEN6_PS_DISPATCH_8;
+   }
+
+   /*
+    * From the Broadwell PRM, volume 2b, page 149:
+    *
+    *     "When Render Target Fast Clear Enable is ENABLED or Render Target
+    *      Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit (8 Pixel
+    *      Dispatch or Dual-8 Pixel Dispatch Enable) must be DISABLED."
+    */
+   if (info->rt_clear_enable || info->rt_resolve_enable)
+      dispatch_modes &= ~GEN6_PS_DISPATCH_8;
+
+   assert(dispatch_modes);
+
+   return dispatch_modes;
+}
+
+static uint16_t
+ps_get_gen6_thread_count(const struct ilo_dev *dev,
+                         const struct ilo_state_ps_info *info)
+{
+   uint16_t thread_count;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* Maximum Number of Threads of 3DSTATE_PS */
+   switch (ilo_dev_gen(dev)) {
+   case ILO_GEN(8):
+      /* scaled automatically */
+      thread_count = 64 - 1;
+      break;
+   case ILO_GEN(7.5):
+      thread_count = (dev->gt == 3) ? 408 :
+                     (dev->gt == 2) ? 204 : 102;
+      break;
+   case ILO_GEN(7):
+      thread_count = (dev->gt == 2) ? 172 : 48;
+      break;
+   case ILO_GEN(6):
+   default:
+      /* from the classic driver instead of the PRM */
+      thread_count = (dev->gt == 2) ? 80 : 40;
+      break;
+   }
+
+   return thread_count - 1;
+}
+
+static bool
+ps_params_get_gen6_kill_pixel(const struct ilo_dev *dev,
+                              const struct ilo_state_ps_params_info *params,
+                              const struct ilo_state_ps_dispatch_conds *conds)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 275:
+    *
+    *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
+    *      PS kernel or color calculator has the ability to kill (discard)
+    *      pixels or samples, other than due to depth or stencil testing.
+    *      This bit is required to be ENABLED in the following situations:
+    *
+    *      The API pixel shader program contains "killpix" or "discard"
+    *      instructions, or other code in the pixel shader kernel that can
+    *      cause the final pixel mask to differ from the pixel mask received
+    *      on dispatch.
+    *
+    *      A sampler with chroma key enabled with kill pixel mode is used by
+    *      the pixel shader.
+    *
+    *      Any render target has Alpha Test Enable or AlphaToCoverage Enable
+    *      enabled.
+    *
+    *      The pixel shader kernel generates and outputs oMask.
+    *
+    *      Note: As ClipDistance clipping is fully supported in hardware and
+    *      therefore not via PS instructions, there should be no need to
+    *      ENABLE this bit due to ClipDistance clipping."
+    */
+   return (conds->ps_may_kill || params->alpha_may_kill);
+}
+
+static bool
+ps_params_get_gen6_dispatch_enable(const struct ilo_dev *dev,
+                                   const struct ilo_state_ps_params_info *params,
+                                   const struct ilo_state_ps_dispatch_conds *conds)
+{
+   /*
+    * We want to skip dispatching when EarlyZ suffices.  The conditions that
+    * require dispatching are
+    *
+    *  - PS writes RTs and RTs are writeable
+    *  - PS changes depth value and depth test/write is enabled
+    *  - PS changes stencil value and stencil test is enabled
+    *  - PS writes UAVs
+    *  - PS or CC kills pixels
+    *  - EDSC is PSEXEC, and depth test/write or stencil test is enabled
+    */
+   bool dispatch_required =
+      ((conds->has_rt_write && params->has_writeable_rt) ||
+       conds->write_odepth ||
+       conds->write_ostencil ||
+       conds->has_uav_write ||
+       ps_params_get_gen6_kill_pixel(dev, params, conds) ||
+       params->earlyz_control_psexec);
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 280:
+    *
+    *     "If EDSC_PSEXEC mode is selected, Thread Dispatch Enable must be
+    *      set."
+    */
+   if (ilo_dev_gen(dev) < ILO_GEN(8) && params->earlyz_control_psexec)
+      dispatch_required = true;
+
+   /* assert it is valid to dispatch */
+   if (dispatch_required)
+      assert(conds->ps_valid);
+
+   return dispatch_required;
+}
+
+static bool
+ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
+                       const struct ilo_state_ps_info *info,
+                       struct pixel_ff *ff)
+{
+   const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
+   const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
+   const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
+   uint32_t scratch_size;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   ff->dispatch_modes = ps_get_gen6_dispatch_modes(dev, info);
+
+   /* initialize kernel offsets and GRF starts */
+   if (util_is_power_of_two(ff->dispatch_modes)) {
+      if (ff->dispatch_modes & GEN6_PS_DISPATCH_8) {
+         ff->kernel_offsets[0] = kernel_8->offset;
+         ff->grf_starts[0] = kernel_8->grf_start;
+      } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_16) {
+         ff->kernel_offsets[0] = kernel_16->offset;
+         ff->grf_starts[0] = kernel_16->grf_start;
+      } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_32) {
+         ff->kernel_offsets[0] = kernel_32->offset;
+         ff->grf_starts[0] = kernel_32->grf_start;
+      }
+   } else {
+      ff->kernel_offsets[0] = kernel_8->offset;
+      ff->kernel_offsets[1] = kernel_32->offset;
+      ff->kernel_offsets[2] = kernel_16->offset;
+
+      ff->grf_starts[0] = kernel_8->grf_start;
+      ff->grf_starts[1] = kernel_32->grf_start;
+      ff->grf_starts[2] = kernel_16->grf_start;
+   }
+
+   /* we do not want to save it */
+   assert(ff->kernel_offsets[0] == 0);
+
+   ff->pcb_enable = (((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
+                      kernel_8->pcb_attr_count) ||
+                     ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
+                      kernel_16->pcb_attr_count) ||
+                     ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
+                      kernel_32->pcb_attr_count));
+
+   scratch_size = 0;
+   if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
+       scratch_size < kernel_8->scratch_size)
+      scratch_size = kernel_8->scratch_size;
+   if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
+       scratch_size < kernel_16->scratch_size)
+      scratch_size = kernel_16->scratch_size;
+   if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
+       scratch_size < kernel_32->scratch_size)
+      scratch_size = kernel_32->scratch_size;
+
+   /* next power of two, starting from 1KB */
+   ff->scratch_space = (scratch_size > 1024) ?
+      (util_last_bit(scratch_size - 1) - 10): 0;
+
+   /* GPU hangs on Haswell if none of the dispatch mode bits is set */
+   if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
+      ff->dispatch_modes |= GEN6_PS_DISPATCH_8;
+
+   return true;
+}
+
+static bool
+ps_get_gen6_ff(const struct ilo_dev *dev,
+               const struct ilo_state_ps_info *info,
+               struct pixel_ff *ff)
+{
+   const struct ilo_state_shader_resource_info *resource = &info->resource;
+   const struct ilo_state_ps_io_info *io = &info->io;
+   const struct ilo_state_ps_params_info *params = &info->params;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   memset(ff, 0, sizeof(*ff));
+
+   if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff))
+      return false;
+
+   ff->sampler_count = (resource->sampler_count <= 12) ?
+      (resource->sampler_count + 3) / 4 : 4;
+   ff->surface_count = resource->surface_count;
+   ff->has_uav = resource->has_uav;
+
+   ff->thread_count = ps_get_gen6_thread_count(dev, info);
+
+   ff->conds.ps_valid = (info->valid_kernels != 0x0);
+   ff->conds.has_rt_write = io->has_rt_write;
+   ff->conds.write_odepth = (io->pscdepth != GEN7_PSCDEPTH_OFF);
+   ff->conds.write_ostencil = false;
+   ff->conds.has_uav_write = resource->has_uav;
+   ff->conds.ps_may_kill = (io->write_pixel_mask || io->write_omask);
+
+   ff->kill_pixel = ps_params_get_gen6_kill_pixel(dev, params, &ff->conds);
+   ff->dispatch_enable =
+      ps_params_get_gen6_dispatch_enable(dev, params, &ff->conds);
+   ff->dual_source_blending = params->dual_source_blending;
+   ff->sample_mask = params->sample_mask;
+
+   return true;
+}
+
+static bool
+ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_ps_info *info,
+                       const struct pixel_ff *ff)
+{
+   const struct ilo_state_ps_io_info *io = &info->io;
+   uint32_t dw2, dw3, dw4, dw5, dw6;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+         ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+   if (false)
+      dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
+
+   dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+   dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
+         ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
+         ff->grf_starts[2] << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
+
+   dw5 = ff->thread_count << GEN6_WM_DW5_MAX_THREADS__SHIFT |
+         ff->dispatch_modes << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
+
+   if (ff->kill_pixel)
+      dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
+
+   if (io->pscdepth != GEN7_PSCDEPTH_OFF)
+      dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
+   if (io->use_z)
+      dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
+
+   if (ff->dispatch_enable)
+      dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
+
+   if (io->write_omask)
+      dw5 |= GEN6_WM_DW5_PS_COMPUTE_OMASK;
+   if (io->use_w)
+      dw5 |= GEN6_WM_DW5_PS_USE_W;
+
+   if (ff->dual_source_blending)
+      dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
+
+   dw6 = io->attr_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
+         io->posoffset << GEN6_WM_DW6_PS_POSOFFSET__SHIFT;
+
+   dw6 |= (info->per_sample_dispatch) ?
+      GEN6_WM_DW6_MSDISPMODE_PERSAMPLE : GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
+
+   STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 7);
+   ps->ps[0] = dw2;
+   ps->ps[1] = dw3;
+   ps->ps[2] = dw4;
+   ps->ps[3] = dw5;
+   ps->ps[4] = dw6;
+   ps->ps[5] = ff->kernel_offsets[1];
+   ps->ps[6] = ff->kernel_offsets[2];
+
+   return true;
+}
+
+static bool
+ps_set_gen7_3dstate_wm(struct ilo_state_ps *ps,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_ps_info *info,
+                       const struct pixel_ff *ff)
+{
+   const struct ilo_state_ps_io_info *io = &info->io;
+   uint32_t dw1, dw2;
+
+   ILO_DEV_ASSERT(dev, 7, 7.5);
+
+   dw1 = io->pscdepth << GEN7_WM_DW1_PSCDEPTH__SHIFT;
+
+   if (ff->dispatch_enable)
+      dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
+   if (ff->kill_pixel)
+      dw1 |= GEN7_WM_DW1_PS_KILL_PIXEL;
+
+   if (io->use_z)
+      dw1 |= GEN7_WM_DW1_PS_USE_DEPTH;
+   if (io->use_w)
+      dw1 |= GEN7_WM_DW1_PS_USE_W;
+   if (io->use_coverage_mask)
+      dw1 |= GEN7_WM_DW1_PS_USE_COVERAGE_MASK;
+
+   dw2 = (info->per_sample_dispatch) ?
+      GEN7_WM_DW2_MSDISPMODE_PERSAMPLE : GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
+
+   STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 2);
+   ps->ps[0] = dw1;
+   ps->ps[1] = dw2;
+
+   return true;
+}
+
+static bool
+ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_ps_info *info,
+                       const struct pixel_ff *ff)
+{
+   const struct ilo_state_ps_io_info *io = &info->io;
+   uint32_t dw2, dw3, dw4, dw5;
+
+   ILO_DEV_ASSERT(dev, 7, 7.5);
+
+   dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+         ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+   if (false)
+      dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
+
+   dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+   dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT |
+         ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
+
+   if (ilo_dev_gen(dev) == ILO_GEN(7.5)) {
+      dw4 |= ff->thread_count << GEN75_PS_DW4_MAX_THREADS__SHIFT |
+             (ff->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
+   } else {
+      dw4 |= ff->thread_count << GEN7_PS_DW4_MAX_THREADS__SHIFT;
+   }
+
+   if (ff->pcb_enable)
+      dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
+   if (io->attr_count)
+      dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
+   if (io->write_omask)
+      dw4 |= GEN7_PS_DW4_COMPUTE_OMASK;
+   if (info->rt_clear_enable)
+      dw4 |= GEN7_PS_DW4_RT_FAST_CLEAR;
+   if (ff->dual_source_blending)
+      dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
+   if (info->rt_resolve_enable)
+      dw4 |= GEN7_PS_DW4_RT_RESOLVE;
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff->has_uav)
+      dw4 |= GEN75_PS_DW4_ACCESS_UAV;
+
+   dw5 = ff->grf_starts[0] << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
+         ff->grf_starts[1] << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
+         ff->grf_starts[2] << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 8);
+   ps->ps[2] = dw2;
+   ps->ps[3] = dw3;
+   ps->ps[4] = dw4;
+   ps->ps[5] = dw5;
+   ps->ps[6] = ff->kernel_offsets[1];
+   ps->ps[7] = ff->kernel_offsets[2];
+
+   return true;
+}
+
+static bool
+ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_ps_info *info,
+                       const struct pixel_ff *ff)
+{
+   const struct ilo_state_ps_io_info *io = &info->io;
+   uint32_t dw3, dw4, dw6, dw7;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   dw3 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
+         ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
+
+   if (false)
+      dw3 |= GEN6_THREADDISP_FP_MODE_ALT;
+
+   dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+
+   dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT |
+         io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT |
+         ff->dispatch_modes << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
+
+   if (ff->pcb_enable)
+      dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
+
+   if (info->rt_clear_enable)
+      dw6 |= GEN8_PS_DW6_RT_FAST_CLEAR;
+   if (info->rt_resolve_enable)
+      dw6 |= GEN8_PS_DW6_RT_RESOLVE;
+
+   dw7 = ff->grf_starts[0] << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
+         ff->grf_starts[1] << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
+         ff->grf_starts[2] << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 6);
+   ps->ps[0] = dw3;
+   ps->ps[1] = dw4;
+   ps->ps[2] = dw6;
+   ps->ps[3] = dw7;
+   ps->ps[4] = ff->kernel_offsets[1];
+   ps->ps[5] = ff->kernel_offsets[2];
+
+   return true;
+}
+
+static bool
+ps_set_gen8_3DSTATE_PS_EXTRA(struct ilo_state_ps *ps,
+                             const struct ilo_dev *dev,
+                             const struct ilo_state_ps_info *info,
+                             const struct pixel_ff *ff)
+{
+   const struct ilo_state_ps_io_info *io = &info->io;
+   uint32_t dw1;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   dw1 = io->pscdepth << GEN8_PSX_DW1_PSCDEPTH__SHIFT;
+
+   if (info->valid_kernels)
+      dw1 |= GEN8_PSX_DW1_VALID;
+   if (!io->has_rt_write)
+      dw1 |= GEN8_PSX_DW1_UAV_ONLY;
+   if (io->write_omask)
+      dw1 |= GEN8_PSX_DW1_COMPUTE_OMASK;
+   if (io->write_pixel_mask)
+      dw1 |= GEN8_PSX_DW1_KILL_PIXEL;
+
+   if (io->use_z)
+      dw1 |= GEN8_PSX_DW1_USE_DEPTH;
+   if (io->use_w)
+      dw1 |= GEN8_PSX_DW1_USE_W;
+   if (io->attr_count)
+      dw1 |= GEN8_PSX_DW1_ATTR_ENABLE;
+
+   if (info->per_sample_dispatch)
+      dw1 |= GEN8_PSX_DW1_PER_SAMPLE;
+   if (ff->has_uav)
+      dw1 |= GEN8_PSX_DW1_ACCESS_UAV;
+   if (io->use_coverage_mask)
+      dw1 |= GEN8_PSX_DW1_USE_COVERAGE_MASK;
+
+   /*
+    * From the Broadwell PRM, volume 2b, page 151:
+    *
+    *     "When this bit (Pixel Shader Valid) clear the rest of this command
+    *      should also be clear.
+    */
+   if (!info->valid_kernels)
+      dw1 = 0;
+
+   STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 5);
+   ps->ps[4] = dw1;
+
+   return true;
+}
+
+bool
+ilo_state_ps_init(struct ilo_state_ps *ps,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_ps_info *info)
+{
+   struct pixel_ff ff;
+   bool ret = true;
+
+   assert(ilo_is_zeroed(ps, sizeof(*ps)));
+
+   ret &= ps_get_gen6_ff(dev, info, &ff);
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      ret &= ps_set_gen8_3DSTATE_PS(ps, dev, info, &ff);
+      ret &= ps_set_gen8_3DSTATE_PS_EXTRA(ps, dev, info, &ff);
+   } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      ret &= ps_set_gen7_3dstate_wm(ps, dev, info, &ff);
+      ret &= ps_set_gen7_3DSTATE_PS(ps, dev, info, &ff);
+   } else {
+      ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff);
+   }
+
+   /* save conditions */
+   ps->conds = ff.conds;
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_ps_init_disabled(struct ilo_state_ps *ps,
+                           const struct ilo_dev *dev)
+{
+   struct ilo_state_ps_info info;
+
+   memset(&info, 0, sizeof(info));
+
+   return ilo_state_ps_init(ps, dev, &info);
+}
+
+bool
+ilo_state_ps_set_params(struct ilo_state_ps *ps,
+                        const struct ilo_dev *dev,
+                        const struct ilo_state_ps_params_info *params)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* modify sample mask */
+   if (ilo_dev_gen(dev) == ILO_GEN(7.5)) {
+      ps->ps[4] = (ps->ps[4] & ~GEN75_PS_DW4_SAMPLE_MASK__MASK) |
+         (params->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
+   }
+
+   /* modify dispatch enable, pixel kill, and dual source blending */
+   if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+      if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+         if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds))
+            ps->ps[0] |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
+         else
+            ps->ps[0] &= ~GEN7_WM_DW1_PS_DISPATCH_ENABLE;
+
+         if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds))
+            ps->ps[0] |= GEN7_WM_DW1_PS_KILL_PIXEL;
+         else
+            ps->ps[0] &= ~GEN7_WM_DW1_PS_KILL_PIXEL;
+
+         if (params->dual_source_blending)
+            ps->ps[4] |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
+         else
+            ps->ps[4] &= ~GEN7_PS_DW4_DUAL_SOURCE_BLEND;
+      } else {
+         if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds))
+            ps->ps[3] |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
+         else
+            ps->ps[3] &= ~GEN6_WM_DW5_PS_DISPATCH_ENABLE;
+
+         if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds))
+            ps->ps[3] |= GEN6_WM_DW5_PS_KILL_PIXEL;
+         else
+            ps->ps[3] &= ~GEN6_WM_DW5_PS_KILL_PIXEL;
+
+         if (params->dual_source_blending)
+            ps->ps[3] |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
+         else
+            ps->ps[3] &= ~GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
+      }
+   }
+
+   return true;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_sol.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_sol.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_sol.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_sol.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,467 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_vma.h"
+#include "ilo_state_sol.h"
+
+static bool
+sol_stream_validate_gen7(const struct ilo_dev *dev,
+                         const struct ilo_state_sol_stream_info *stream)
+{
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   assert(stream->vue_read_base + stream->vue_read_count <=
+         stream->cv_vue_attr_count);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 200:
+    *
+    *     "(Stream 0 Vertex Read Offset)
+    *      Format: U1 count of 256-bit units
+    *
+    *      Specifies amount of data to skip over before reading back Stream 0
+    *      vertex data. Must be zero if the GS is enabled and the Output
+    *      Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B
+    *      unit)."
+    *
+    *     "(Stream 0 Vertex Read Length)
+    *      Format: U5-1 count of 256-bit units
+    *
+    *      Specifies amount of vertex data to read back for Stream 0 vertices,
+    *      starting at the Stream 0 Vertex Read Offset location. Maximum
+    *      readback is 17 256-bit units (34 128-bit vertex attributes). Read
+    *      data past the end of the valid vertex data has undefined contents,
+    *      and therefore shouldn't be used to source stream out data.  Must be
+    *      zero (i.e., read length = 256b) if the GS is enabled and the Output
+    *      Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B
+    *      unit)."
+    */
+   assert(stream->vue_read_base == 0 || stream->vue_read_base == 2);
+   assert(stream->vue_read_count <= 34);
+
+   assert(stream->decl_count <= ILO_STATE_SOL_MAX_DECL_COUNT);
+
+   for (i = 0; i < stream->decl_count; i++) {
+      const struct ilo_state_sol_decl_info *decl = &stream->decls[i];
+
+      assert(decl->is_hole || decl->attr < stream->vue_read_count);
+
+      /*
+       * From the Ivy Bridge PRM, volume 2 part 1, page 205:
+       *
+       *     "There is only enough internal storage for the 128-bit vertex
+       *      header and 32 128-bit vertex attributes."
+       */
+      assert(decl->attr < 33);
+
+      assert(decl->component_base < 4 &&
+             decl->component_base + decl->component_count <= 4);
+      assert(decl->buffer < ILO_STATE_SOL_MAX_BUFFER_COUNT);
+   }
+
+   return true;
+}
+
+static bool
+sol_validate_gen7(const struct ilo_dev *dev,
+                  const struct ilo_state_sol_info *info)
+{
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 198:
+    *
+    *     "This bit (Render Stream Select) is used even if SO Function Enable
+    *      is DISABLED."
+    *
+    * From the Haswell PRM, volume 2b, page 796:
+    *
+    *     "SO Function Enable must also be ENABLED in order for thiis field
+    *      (Render Stream Select) to select a stream for rendering. When SO
+    *      Function Enable is DISABLED and Rendering Disable is cleared (i.e.,
+    *      rendering is enabled), StreamID is ignored downstream of the SO
+    *      stage, allowing any stream to be rendered."
+    *
+    * We want Gen7 behavior, but we have to require users to follow Gen7.5
+    * behavior: info->sol_enable must be set for info->render_stream to work.
+    */
+
+   for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
+      if (!sol_stream_validate_gen7(dev, &info->streams[i]))
+         return false;
+   }
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 208:
+    *
+    *     "(Surface Pitch)
+    *      [0,2048]  Must be 0 or a multiple of 4 Bytes."
+    */
+   for (i = 0; i < ARRAY_SIZE(info->buffer_strides); i++) {
+      assert(info->buffer_strides[i] <= 2048 &&
+             info->buffer_strides[i] % 4 == 0);
+   }
+
+   return true;
+}
+
+static bool
+sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol *sol,
+                               const struct ilo_dev *dev,
+                               const struct ilo_state_sol_info *info)
+{
+   struct {
+      uint8_t offset;
+      uint8_t len;
+   } vue_read[ILO_STATE_SOL_MAX_STREAM_COUNT];
+   uint8_t i;
+   uint32_t dw1, dw2;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   if (!sol_validate_gen7(dev, info))
+      return false;
+
+   for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
+      const struct ilo_state_sol_stream_info *stream = &info->streams[i];
+
+      vue_read[i].offset = stream->vue_read_base / 2;
+      /*
+       * In pairs minus 1.  URB entries are aligned to 512-bits.  There is no
+       * need to worry about reading past entries.
+       */
+      vue_read[i].len = (stream->vue_read_count + 1) / 2;
+      if (vue_read[i].len)
+         vue_read[i].len--;
+   }
+
+   dw1 = info->render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT |
+         info->tristrip_reorder << GEN7_SO_DW1_REORDER_MODE__SHIFT;
+
+   if (info->sol_enable)
+      dw1 |= GEN7_SO_DW1_SO_ENABLE;
+
+   if (info->render_disable)
+      dw1 |= GEN7_SO_DW1_RENDER_DISABLE;
+
+   if (info->stats_enable)
+      dw1 |= GEN7_SO_DW1_STATISTICS;
+
+   if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+      const uint8_t buffer_enables = ((bool) info->buffer_strides[3]) << 3 |
+                                     ((bool) info->buffer_strides[2]) << 2 |
+                                     ((bool) info->buffer_strides[1]) << 1 |
+                                     ((bool) info->buffer_strides[0]);
+      dw1 |= buffer_enables << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT;
+   }
+
+   dw2 = vue_read[3].offset << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
+         vue_read[3].len << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
+         vue_read[2].offset << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
+         vue_read[2].len << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
+         vue_read[1].offset << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
+         vue_read[1].len << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
+         vue_read[0].offset << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
+         vue_read[0].len << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(sol->streamout) >= 2);
+   sol->streamout[0] = dw1;
+   sol->streamout[1] = dw2;
+
+   memcpy(sol->strides, info->buffer_strides, sizeof(sol->strides));
+
+   return true;
+}
+
+static bool
+sol_set_gen7_3DSTATE_SO_DECL_LIST(struct ilo_state_sol *sol,
+                                  const struct ilo_dev *dev,
+                                  const struct ilo_state_sol_info *info,
+                                  uint8_t max_decl_count)
+{
+   uint64_t decl_list[ILO_STATE_SOL_MAX_DECL_COUNT];
+   uint8_t decl_counts[ILO_STATE_SOL_MAX_STREAM_COUNT];
+   uint8_t buffer_selects[ILO_STATE_SOL_MAX_STREAM_COUNT];
+   uint32_t dw1, dw2;
+   uint8_t i, j;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   memset(decl_list, 0, sizeof(decl_list[0]) * max_decl_count);
+
+   for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
+      const struct ilo_state_sol_stream_info *stream = &info->streams[i];
+
+      assert(stream->decl_count <= max_decl_count);
+      decl_counts[i] = stream->decl_count;
+      buffer_selects[i] = 0;
+
+      for (j = 0; j < stream->decl_count; j++) {
+         const struct ilo_state_sol_decl_info *decl = &stream->decls[j];
+         const uint8_t mask = ((1 << decl->component_count) - 1) <<
+            decl->component_base;
+         uint16_t val;
+
+         val = decl->buffer << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
+               mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
+
+         if (decl->is_hole)
+            val |= GEN7_SO_DECL_HOLE_FLAG;
+         else
+            val |= decl->attr << GEN7_SO_DECL_REG_INDEX__SHIFT;
+
+         decl_list[j] |= (uint64_t) val << (16 * i);
+         buffer_selects[i] |= 1 << decl->buffer;
+      }
+   }
+
+   dw1 = buffer_selects[3] << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
+         buffer_selects[2] << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
+         buffer_selects[1] << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
+         buffer_selects[0] << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
+   dw2 = decl_counts[3] << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
+         decl_counts[2] << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
+         decl_counts[1] << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
+         decl_counts[0] << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(sol->so_decl) >= 2);
+   sol->so_decl[0] = dw1;
+   sol->so_decl[1] = dw2;
+
+   STATIC_ASSERT(ARRAY_SIZE(sol->decl[0]) == 2);
+   memcpy(sol->decl, decl_list, sizeof(sol->decl[0]) * max_decl_count);
+   sol->decl_count = max_decl_count;
+
+   return true;
+}
+
+static bool
+sol_buffer_validate_gen7(const struct ilo_dev *dev,
+                         const struct ilo_state_sol_buffer_info *info)
+{
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 208:
+    *
+    *     "(Surface Base Address) This field specifies the starting DWord
+    *      address..."
+    */
+   assert(info->offset % 4 == 0);
+
+   if (info->vma) {
+      assert(info->vma->vm_alignment % 4 == 0);
+      assert(info->size && info->offset + info->size <= info->vma->vm_size);
+   }
+
+   /* Gen8+ only */
+   if (info->write_offset_load || info->write_offset_save) {
+      assert(ilo_dev_gen(dev) >= ILO_GEN(8) && info->write_offset_vma);
+      assert(info->write_offset_offset + sizeof(uint32_t) <=
+            info->write_offset_vma->vm_size);
+   }
+
+   /*
+    * From the Broadwell PRM, volume 2b, page 206:
+    *
+    *     "This field (Stream Offset) specifies the Offset in stream output
+    *      buffer to start at, or whether to append to the end of an existing
+    *      buffer. The Offset must be DWORD aligned."
+    */
+   if (info->write_offset_imm_enable) {
+      assert(info->write_offset_load);
+      assert(info->write_offset_imm % 4 == 0);
+   }
+
+   return true;
+}
+
+static uint32_t
+sol_buffer_get_gen6_size(const struct ilo_dev *dev,
+                         const struct ilo_state_sol_buffer_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 208:
+    *
+    *     "(Surface End Address) This field specifies the ending DWord
+    *      address..."
+    */
+   return (info->vma) ? info->size & ~3 : 0;
+}
+
+static bool
+sol_buffer_set_gen7_3dstate_so_buffer(struct ilo_state_sol_buffer *sb,
+                                      const struct ilo_dev *dev,
+                                      const struct ilo_state_sol_buffer_info *info)
+{
+   const uint32_t size = sol_buffer_get_gen6_size(dev, info);
+
+   ILO_DEV_ASSERT(dev, 7, 7.5);
+
+   if (!sol_buffer_validate_gen7(dev, info))
+      return false;
+
+   STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 2);
+   sb->so_buf[0] = info->offset;
+   sb->so_buf[1] = (size) ? info->offset + size : 0;
+
+   return true;
+}
+
+static bool
+sol_buffer_set_gen8_3dstate_so_buffer(struct ilo_state_sol_buffer *sb,
+                                      const struct ilo_dev *dev,
+                                      const struct ilo_state_sol_buffer_info *info)
+{
+   const uint32_t size = sol_buffer_get_gen6_size(dev, info);
+   uint32_t dw1;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   if (!sol_buffer_validate_gen7(dev, info))
+      return false;
+
+   dw1 = 0;
+
+   if (info->vma)
+      dw1 |= GEN8_SO_BUF_DW1_ENABLE;
+   if (info->write_offset_load)
+      dw1 |= GEN8_SO_BUF_DW1_OFFSET_WRITE_ENABLE;
+   if (info->write_offset_save)
+      dw1 |= GEN8_SO_BUF_DW1_OFFSET_ENABLE;
+
+   STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 4);
+   sb->so_buf[0] = dw1;
+   sb->so_buf[1] = info->offset;
+
+   /*
+    * From the Broadwell PRM, volume 2b, page 205:
+    *
+    *     "This field (Surface Size) specifies the size of buffer in number
+    *      DWords minus 1 of the buffer in Graphics Memory."
+    */
+   sb->so_buf[2] = (size) ? size / 4 - 1 : 0;
+
+   /* load from imm or sb->write_offset_bo */
+   sb->so_buf[3] = (info->write_offset_imm_enable) ?
+      info->write_offset_imm : ~0u;
+
+   return true;
+}
+
+bool
+ilo_state_sol_init(struct ilo_state_sol *sol,
+                   const struct ilo_dev *dev,
+                   const struct ilo_state_sol_info *info)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(sol, sizeof(*sol)));
+   assert(ilo_is_zeroed(info->data, info->data_size));
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      uint8_t max_decl_count, i;
+
+      max_decl_count = info->streams[0].decl_count;
+      for (i = 1; i < ARRAY_SIZE(info->streams); i++) {
+         if (max_decl_count < info->streams[i].decl_count)
+            max_decl_count = info->streams[i].decl_count;
+      }
+
+      assert(ilo_state_sol_data_size(dev, max_decl_count) <= info->data_size);
+      sol->decl = (uint32_t (*)[2]) info->data;
+
+      ret &= sol_set_gen7_3DSTATE_STREAMOUT(sol, dev, info);
+      ret &= sol_set_gen7_3DSTATE_SO_DECL_LIST(sol, dev, info, max_decl_count);
+   }
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_sol_init_disabled(struct ilo_state_sol *sol,
+                            const struct ilo_dev *dev,
+                            bool render_disable)
+{
+   struct ilo_state_sol_info info;
+
+   memset(&info, 0, sizeof(info));
+   info.render_disable = render_disable;
+
+   return ilo_state_sol_init(sol, dev, &info);
+}
+
+uint32_t
+ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size,
+                          uint32_t *alignment)
+{
+   /* DWord aligned without padding */
+   *alignment = 4;
+   return size;
+}
+
+bool
+ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
+                          const struct ilo_dev *dev,
+                          const struct ilo_state_sol_buffer_info *info)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(sb, sizeof(*sb)));
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8))
+      ret &= sol_buffer_set_gen8_3dstate_so_buffer(sb, dev, info);
+   else
+      ret &= sol_buffer_set_gen7_3dstate_so_buffer(sb, dev, info);
+
+   sb->vma = info->vma;
+   sb->write_offset_vma = info->write_offset_vma;
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_sol_buffer_init_disabled(struct ilo_state_sol_buffer *sb,
+                                   const struct ilo_dev *dev)
+{
+   struct ilo_state_sol_buffer_info info;
+
+   memset(&info, 0, sizeof(info));
+
+   return ilo_state_sol_buffer_init(sb, dev, &info);
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_sol.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_sol.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_sol.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_sol.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,166 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_SOL_H
+#define ILO_STATE_SOL_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 193:
+ *
+ *     "Incoming topologies are tagged with a 2-bit StreamID."
+ */
+#define ILO_STATE_SOL_MAX_STREAM_COUNT 4
+
+/*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 195:
+ *
+ *     "Up to four SO buffers are supported."
+ */
+#define ILO_STATE_SOL_MAX_BUFFER_COUNT 4
+
+/*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 201:
+ *
+ *     "All 128 decls..."
+ */
+#define ILO_STATE_SOL_MAX_DECL_COUNT 128
+
+/**
+ * Output a vertex attribute.
+ */
+struct ilo_state_sol_decl_info {
+   /* select an attribute from read ones */
+   uint8_t attr;
+   bool is_hole;
+
+   /* which components to write */
+   uint8_t component_base;
+   uint8_t component_count;
+
+   /* destination buffer */
+   uint8_t buffer;
+};
+
+struct ilo_state_sol_stream_info {
+   /* which VUE attributes to read */
+   uint8_t cv_vue_attr_count;
+   uint8_t vue_read_base;
+   uint8_t vue_read_count;
+
+   uint8_t decl_count;
+   const struct ilo_state_sol_decl_info *decls;
+};
+
+struct ilo_state_sol_info {
+   void *data;
+   size_t data_size;
+
+   bool sol_enable;
+   bool stats_enable;
+   enum gen_reorder_mode tristrip_reorder;
+
+   bool render_disable;
+   /* ignored when SOL is disabled */
+   uint8_t render_stream;
+
+   /* a buffer is disabled when its stride is zero */
+   uint16_t buffer_strides[ILO_STATE_SOL_MAX_BUFFER_COUNT];
+
+   struct ilo_state_sol_stream_info streams[ILO_STATE_SOL_MAX_STREAM_COUNT];
+};
+
+struct ilo_state_sol {
+   uint32_t streamout[2];
+   uint16_t strides[4];
+
+   uint32_t so_decl[2];
+   uint32_t (*decl)[2];
+   uint8_t decl_count;
+};
+
+struct ilo_vma;
+
+struct ilo_state_sol_buffer_info {
+   const struct ilo_vma *vma;
+   uint32_t offset;
+   uint32_t size;
+
+   /* Gen8+ only; at least sizeof(uint32_t) bytes */
+   const struct ilo_vma *write_offset_vma;
+   uint32_t write_offset_offset;
+
+   bool write_offset_load;
+   bool write_offset_save;
+
+   bool write_offset_imm_enable;
+   uint32_t write_offset_imm;
+};
+
+struct ilo_state_sol_buffer {
+   uint32_t so_buf[5];
+
+   const struct ilo_vma *vma;
+   const struct ilo_vma *write_offset_vma;
+};
+
+static inline size_t
+ilo_state_sol_data_size(const struct ilo_dev *dev, uint8_t max_decl_count)
+{
+   const struct ilo_state_sol *so = NULL;
+   return (ilo_dev_gen(dev) >= ILO_GEN(7)) ?
+      sizeof(so->decl[0]) * max_decl_count : 0;
+}
+
+bool
+ilo_state_sol_init(struct ilo_state_sol *sol,
+                   const struct ilo_dev *dev,
+                   const struct ilo_state_sol_info *info);
+
+bool
+ilo_state_sol_init_disabled(struct ilo_state_sol *sol,
+                            const struct ilo_dev *dev,
+                            bool render_disable);
+
+uint32_t
+ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size,
+                          uint32_t *alignment);
+
+bool
+ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
+                          const struct ilo_dev *dev,
+                          const struct ilo_state_sol_buffer_info *info);
+
+bool
+ilo_state_sol_buffer_init_disabled(struct ilo_state_sol_buffer *sb,
+                                   const struct ilo_dev *dev);
+
+#endif /* ILO_STATE_SOL_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_surface.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_surface.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_surface.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_surface.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,1274 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_image.h"
+#include "ilo_vma.h"
+#include "ilo_state_surface.h"
+
+static bool
+surface_set_gen6_null_SURFACE_STATE(struct ilo_state_surface *surf,
+                                    const struct ilo_dev *dev)
+{
+   uint32_t dw0, dw3;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 71:
+    *
+    *     "All of the remaining fields in surface state are ignored for null
+    *      surfaces, with the following exceptions:
+    *
+    *        - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
+    *          depth buffer's corresponding state for all render target
+    *          surfaces, including null.
+    *        - Surface Format must be R8G8B8A8_UNORM."
+    *
+    * From the Sandy Bridge PRM, volume 4 part 1, page 82:
+    *
+    *     "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must
+    *      be true"
+    *
+    * Note that we ignore the first exception for all surface types.
+    */
+   dw0 = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT |
+         GEN6_FORMAT_R8G8B8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT;
+   dw3 = GEN6_TILING_X << GEN6_SURFACE_DW3_TILING__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 6);
+   surf->surface[0] = dw0;
+   surf->surface[1] = 0;
+   surf->surface[2] = 0;
+   surf->surface[3] = dw3;
+   surf->surface[4] = 0;
+   surf->surface[5] = 0;
+
+   return true;
+}
+
+static bool
+surface_set_gen7_null_SURFACE_STATE(struct ilo_state_surface *surf,
+                                    const struct ilo_dev *dev)
+{
+   uint32_t dw0;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   dw0 = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT |
+         GEN6_FORMAT_R8G8B8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT;
+   if (ilo_dev_gen(dev) >= ILO_GEN(8))
+      dw0 |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT;
+   else
+      dw0 |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 13);
+   surf->surface[0] = dw0;
+   memset(&surf->surface[1], 0, sizeof(uint32_t) *
+         (((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 13 : 8) - 1));
+
+   return true;
+}
+
+static uint32_t
+surface_get_gen6_buffer_offset_alignment(const struct ilo_dev *dev,
+                                         const struct ilo_state_surface_buffer_info *info)
+{
+   uint32_t alignment;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 68:
+    *
+    *     "The Base Address for linear render target surfaces and surfaces
+    *      accessed with the typed surface read/write data port messages must
+    *      be element-size aligned, for non-YUV surface formats, or a multiple
+    *      of 2 element-sizes for YUV surface formats.  Other linear surfaces
+    *      have no alignment requirements (byte alignment is sufficient)."
+    *
+    *     "Certain message types used to access surfaces have more stringent
+    *      alignment requirements. Please refer to the specific message
+    *      documentation for additional restrictions."
+    */
+   switch (info->access) {
+   case ILO_STATE_SURFACE_ACCESS_SAMPLER:
+      /* no alignment requirements */
+      alignment = 1;
+      break;
+   case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+   case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+      /* element-size aligned */
+      alignment = info->format_size;
+
+      assert(info->struct_size % alignment == 0);
+      break;
+   case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED:
+      /*
+       * Nothing is said about Untyped* messages, but I think they require the
+       * base address to be DWord aligned.
+       */
+      alignment = 4;
+
+      /*
+       * From the Ivy Bridge PRM, volume 4 part 1, page 70:
+       *
+       *     "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the
+       *      pitch must be a multiple of 4 bytes."
+       */
+      if (info->struct_size > 1)
+         assert(info->struct_size % alignment == 0);
+      break;
+   case ILO_STATE_SURFACE_ACCESS_DP_DATA:
+      /*
+       * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, and 237:
+       *
+       *     "the surface base address must be OWord aligned"
+       *
+       * for OWord Block Read/Write, Unaligned OWord Block Read, and OWord
+       * Dual Block Read/Write.
+       *
+       * From the Ivy Bridge PRM, volume 4 part 1, page 246 and 249:
+       *
+       *     "The surface base address must be DWord aligned"
+       *
+       * for DWord Scattered Read/Write and Byte Scattered Read/Write.
+       */
+      alignment = (info->format_size > 4) ? 16 : 4;
+
+      /*
+       * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, 237, and
+       * 246:
+       *
+       *     "the surface pitch is ignored, the surface is treated as a
+       *      1-dimensional surface. An element size (pitch) of 16 bytes is
+       *      used to determine the size of the buffer for out-of-bounds
+       *      checking if using the surface state model."
+       *
+       * for OWord Block Read/Write, Unaligned OWord Block Read, OWord
+       * Dual Block Read/Write, and DWord Scattered Read/Write.
+       *
+       * From the Ivy Bridge PRM, volume 4 part 1, page 248:
+       *
+       *     "The surface pitch is ignored, the surface is treated as a
+       *      1-dimensional surface. An element size (pitch) of 4 bytes is
+       *      used to determine the size of the buffer for out-of-bounds
+       *      checking if using the surface state model."
+       *
+       * for Byte Scattered Read/Write.
+       *
+       * It is programmable on Gen7.5+.
+       */
+      if (ilo_dev_gen(dev) < ILO_GEN(7.5)) {
+         const int fixed = (info->format_size > 1) ? 16 : 4;
+         assert(info->struct_size == fixed);
+      }
+      break;
+   case ILO_STATE_SURFACE_ACCESS_DP_SVB:
+      /*
+       * From the Sandy Bridge PRM, volume 4 part 1, page 259:
+       *
+       *     "Both the surface base address and surface pitch must be DWord
+       *      aligned."
+       */
+      alignment = 4;
+
+      assert(info->struct_size % alignment == 0);
+      break;
+   default:
+      assert(!"unknown access");
+      alignment = 1;
+      break;
+   }
+
+   return alignment;
+}
+
+static bool
+surface_validate_gen6_buffer(const struct ilo_dev *dev,
+                             const struct ilo_state_surface_buffer_info *info)
+{
+   uint32_t alignment;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (info->offset + info->size > info->vma->vm_size) {
+      ilo_warn("invalid buffer range\n");
+      return false;
+   }
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 81:
+    *
+    *     "For surfaces of type SURFTYPE_BUFFER: [0,2047] -> [1B, 2048B]
+    *      For surfaces of type SURFTYPE_STRBUF: [0,2047] -> [1B, 2048B]"
+    */
+   if (!info->struct_size || info->struct_size > 2048) {
+      ilo_warn("invalid buffer struct size\n");
+      return false;
+   }
+
+   alignment = surface_get_gen6_buffer_offset_alignment(dev, info);
+   if (info->offset % alignment || info->vma->vm_alignment % alignment) {
+      ilo_warn("bad buffer offset\n");
+      return false;
+   }
+
+   /* no STRBUF on Gen6 */
+   if (info->format == GEN6_FORMAT_RAW && info->struct_size > 1)
+      assert(ilo_dev_gen(dev) >= ILO_GEN(7));
+
+   /* SVB writes are Gen6 only */
+   if (info->access == ILO_STATE_SURFACE_ACCESS_DP_SVB)
+      assert(ilo_dev_gen(dev) == ILO_GEN(6));
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 83:
+    *
+    *     "NOTE: "RAW" is supported only with buffers and structured buffers
+    *      accessed via the untyped surface read/write and untyped atomic
+    *      operation messages, which do not have a column in the table."
+    *
+    * From the Ivy Bridge PRM, volume 4 part 1, page 252:
+    *
+    *     "For untyped messages, the Surface Format must be RAW and the
+    *      Surface Type must be SURFTYPE_BUFFER or SURFTYPE_STRBUF."
+    */
+   assert((info->access == ILO_STATE_SURFACE_ACCESS_DP_UNTYPED) ==
+          (info->format == GEN6_FORMAT_RAW));
+
+   return true;
+}
+
+static bool
+surface_get_gen6_buffer_struct_count(const struct ilo_dev *dev,
+                                     const struct ilo_state_surface_buffer_info *info,
+                                     uint32_t *count)
+{
+   uint32_t max_struct, c;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   c = info->size / info->struct_size;
+   if (info->format_size < info->size - info->struct_size * c)
+      c++;
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 77:
+    *
+    *     "For buffer surfaces, the number of entries in the buffer ranges
+    *      from 1 to 2^27."
+    *
+    * From the Ivy Bridge PRM, volume 4 part 1, page 68:
+    *
+    *     "For typed buffer and structured buffer surfaces, the number of
+    *      entries in the buffer ranges from 1 to 2^27.  For raw buffer
+    *      surfaces, the number of entries in the buffer is the number of
+    *      bytes which can range from 1 to 2^30."
+    *
+    * From the Ivy Bridge PRM, volume 4 part 1, page 69:
+    *
+    *      For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
+    *      11 if the Surface Format is RAW (the size of the buffer must be a
+    *      multiple of 4 bytes)."
+    */
+   max_struct = 1 << 27;
+   if (info->format == GEN6_FORMAT_RAW && info->struct_size == 1) {
+      if (ilo_dev_gen(dev) >= ILO_GEN(7))
+         max_struct = 1 << 30;
+
+      c &= ~3;
+   }
+
+   if (!c || c > max_struct) {
+      ilo_warn("too many or zero buffer structs\n");
+      return false;
+   }
+
+   *count = c - 1;
+
+   return true;
+}
+
+static bool
+surface_set_gen6_buffer_SURFACE_STATE(struct ilo_state_surface *surf,
+                                     const struct ilo_dev *dev,
+                                     const struct ilo_state_surface_buffer_info *info)
+{
+   uint32_t dw0, dw1, dw2, dw3;
+   uint32_t struct_count;
+   int width, height, depth;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   if (!surface_validate_gen6_buffer(dev, info) ||
+       !surface_get_gen6_buffer_struct_count(dev, info, &struct_count))
+      return false;
+
+   /* bits [6:0] */
+   width  = (struct_count & 0x0000007f);
+   /* bits [19:7] */
+   height = (struct_count & 0x000fff80) >> 7;
+   /* bits [26:20] */
+   depth  = (struct_count & 0x07f00000) >> 20;
+
+   dw0 = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT |
+         info->format << GEN6_SURFACE_DW0_FORMAT__SHIFT;
+   dw1 = info->offset;
+   dw2 = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
+         width << GEN6_SURFACE_DW2_WIDTH__SHIFT;
+   dw3 = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
+         (info->struct_size - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 6);
+   surf->surface[0] = dw0;
+   surf->surface[1] = dw1;
+   surf->surface[2] = dw2;
+   surf->surface[3] = dw3;
+   surf->surface[4] = 0;
+   surf->surface[5] = 0;
+
+   surf->type = GEN6_SURFTYPE_BUFFER;
+   surf->min_lod = 0;
+   surf->mip_count = 0;
+
+   return true;
+}
+
+static bool
+surface_set_gen7_buffer_SURFACE_STATE(struct ilo_state_surface *surf,
+                                     const struct ilo_dev *dev,
+                                     const struct ilo_state_surface_buffer_info *info)
+{
+   uint32_t dw0, dw1, dw2, dw3, dw7;
+   enum gen_surface_type type;
+   uint32_t struct_count;
+   int width, height, depth;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   if (!surface_validate_gen6_buffer(dev, info) ||
+       !surface_get_gen6_buffer_struct_count(dev, info, &struct_count))
+      return false;
+
+   type = (info->format == GEN6_FORMAT_RAW && info->struct_size > 1) ?
+      GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;
+
+   /* bits [6:0] */
+   width  = (struct_count & 0x0000007f);
+   /* bits [20:7] */
+   height = (struct_count & 0x001fff80) >> 7;
+   /* bits [30:21] */
+   depth  = (struct_count & 0x7fe00000) >> 21;
+
+   dw0 = type << GEN7_SURFACE_DW0_TYPE__SHIFT |
+         info->format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
+   dw1 = (ilo_dev_gen(dev) >= ILO_GEN(8)) ? 0 : info->offset;
+   dw2 = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) |
+         GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH);
+   dw3 = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) |
+         GEN_SHIFT32(info->struct_size - 1, GEN7_SURFACE_DW3_PITCH);
+
+   dw7 = 0;
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+      dw7 |= GEN_SHIFT32(GEN75_SCS_RED,   GEN75_SURFACE_DW7_SCS_R) |
+             GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
+             GEN_SHIFT32(GEN75_SCS_BLUE,  GEN75_SURFACE_DW7_SCS_B) |
+             GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 13);
+   surf->surface[0] = dw0;
+   surf->surface[1] = dw1;
+   surf->surface[2] = dw2;
+   surf->surface[3] = dw3;
+   surf->surface[4] = 0;
+   surf->surface[5] = 0;
+   surf->surface[6] = 0;
+   surf->surface[7] = dw7;
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      surf->surface[8] = info->offset;
+      surf->surface[9] = 0;
+      surf->surface[10] = 0;
+      surf->surface[11] = 0;
+      surf->surface[12] = 0;
+   }
+
+   surf->type = type;
+   surf->min_lod = 0;
+   surf->mip_count = 0;
+
+   return true;
+}
+
+static bool
+surface_validate_gen6_image(const struct ilo_dev *dev,
+                            const struct ilo_state_surface_image_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   switch (info->access) {
+   case ILO_STATE_SURFACE_ACCESS_SAMPLER:
+   case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+      break;
+   case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+      assert(ilo_dev_gen(dev) >= ILO_GEN(7));
+      break;
+   default:
+      assert(!"unsupported surface access");
+      break;
+   }
+
+   assert(info->img && info->vma);
+
+   if (info->img->tiling != GEN6_TILING_NONE)
+      assert(info->vma->vm_alignment % 4096 == 0);
+
+   if (info->aux_vma) {
+      assert(ilo_image_can_enable_aux(info->img, info->level_base));
+      /* always tiled */
+      assert(info->aux_vma->vm_alignment % 4096 == 0);
+   }
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 78:
+    *
+    *     "For surface types other than SURFTYPE_BUFFER, the Width specified
+    *      by this field must be less than or equal to the surface pitch
+    *      (specified in bytes via the Surface Pitch field)."
+    */
+   assert(info->img->bo_stride && info->img->bo_stride <= 512 * 1024 &&
+          info->img->width0 <= info->img->bo_stride);
+
+   if (info->type != info->img->type) {
+      assert(info->type == GEN6_SURFTYPE_2D &&
+             info->img->type == GEN6_SURFTYPE_CUBE);
+   }
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 78:
+    *
+    *     "For cube maps, Width must be set equal to the Height."
+    */
+   if (info->type == GEN6_SURFTYPE_CUBE)
+      assert(info->img->width0 == info->img->height0);
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 72:
+    *
+    *     "Tile Walk TILEWALK_YMAJOR is UNDEFINED for render target formats
+    *      that have 128 bits-per-element (BPE)."
+    *
+    *     "If Number of Multisamples is set to a value other than
+    *      MULTISAMPLECOUNT_1, this field cannot be set to the following
+    *      formats:
+    *
+    *      - any format with greater than 64 bits per element
+    *      - any compressed texture format (BC*)
+    *      - any YCRCB* format"
+    *
+    * From the Ivy Bridge PRM, volume 4 part 1, page 63:
+    *
+    *      If Number of Multisamples is set to a value other than
+    *      MULTISAMPLECOUNT_1, this field cannot be set to the following
+    *      formats: any format with greater than 64 bits per element, if
+    *      Number of Multisamples is MULTISAMPLECOUNT_8, any compressed
+    *      texture format (BC*), and any YCRCB* format.
+    *
+    * TODO
+    */
+
+   if (ilo_dev_gen(dev) < ILO_GEN(8) && info->img->tiling == GEN8_TILING_W) {
+      ilo_warn("tiling W is not supported\n");
+      return false;
+   }
+
+   return true;
+}
+
+static void
+surface_get_gen6_image_max_extent(const struct ilo_dev *dev,
+                                  const struct ilo_state_surface_image_info *info,
+                                  uint16_t *max_w, uint16_t *max_h)
+{
+   const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   switch (info->type) {
+   case GEN6_SURFTYPE_1D:
+      *max_w = max_size;
+      *max_h = 1;
+      break;
+   case GEN6_SURFTYPE_2D:
+   case GEN6_SURFTYPE_CUBE:
+      *max_w = max_size;
+      *max_h = max_size;
+      break;
+   case GEN6_SURFTYPE_3D:
+      *max_w = 2048;
+      *max_h = 2048;
+      break;
+   default:
+      assert(!"invalid surface type");
+      *max_w = 1;
+      *max_h = 1;
+      break;
+   }
+}
+
+static bool
+surface_get_gen6_image_extent(const struct ilo_dev *dev,
+                              const struct ilo_state_surface_image_info *info,
+                              uint16_t *width, uint16_t *height)
+{
+   uint16_t w, h, max_w, max_h;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   w = info->img->width0;
+   h = info->img->height0;
+
+   surface_get_gen6_image_max_extent(dev, info, &max_w, &max_h);
+   assert(w && h && w <= max_w && h <= max_h);
+
+   *width = w - 1;
+   *height = h - 1;
+
+   return true;
+}
+
+static bool
+surface_get_gen6_image_slices(const struct ilo_dev *dev,
+                              const struct ilo_state_surface_image_info *info,
+                              uint16_t *depth, uint16_t *min_array_elem,
+                              uint16_t *rt_view_extent)
+{
+   uint16_t max_slice, d;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 63:
+    *
+    *     "If this field (Surface Array) is enabled, the Surface Type must be
+    *      SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
+    *      disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
+    *      SURFTYPE_CUBE, the Depth field must be set to zero."
+    *
+    * From the Ivy Bridge PRM, volume 4 part 1, page 69:
+    *
+    *     "This field (Depth) specifies the total number of levels for a
+    *      volume texture or the number of array elements allowed to be
+    *      accessed starting at the Minimum Array Element for arrayed
+    *      surfaces.  If the volume texture is MIP-mapped, this field
+    *      specifies the depth of the base MIP level."
+    *
+    *     "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of this
+    *      field is [0,340], indicating the number of cube array elements
+    *      (equal to the number of underlying 2D array elements divided by 6).
+    *      For other surfaces, this field must be zero."
+    *
+    *     "Errata: For SURFTYPE_CUBE sampling engine surfaces, the range of
+    *      this field is limited to [0,85].
+    *
+    *      Errata: If Surface Array is enabled, and Depth is between 1024 and
+    *      2047, an incorrect array slice may be accessed if the requested
+    *      array index in the message is greater than or equal to 4096."
+    *
+    * The errata are for Gen7-specific, and they limit the number of useable
+    * layers to (86 * 6), about 512.
+    */
+
+   switch (info->type) {
+   case GEN6_SURFTYPE_1D:
+   case GEN6_SURFTYPE_2D:
+   case GEN6_SURFTYPE_CUBE:
+      max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ? 2048 : 512;
+
+      assert(info->img->array_size <= max_slice);
+      max_slice = info->img->array_size;
+
+      d = info->slice_count;
+      if (info->type == GEN6_SURFTYPE_CUBE) {
+         if (info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) {
+            if (!d || d % 6) {
+               ilo_warn("invalid cube slice count\n");
+               return false;
+            }
+
+            if (ilo_dev_gen(dev) == ILO_GEN(7) && d > 86 * 6) {
+               ilo_warn("cube slice count exceeds Gen7 limit\n");
+               return false;
+            }
+         } else {
+            /*
+             * Minumum Array Element and Depth must be 0; Render Target View
+             * Extent is ignored.
+             */
+            if (info->slice_base || d != 6) {
+               ilo_warn("no cube RT array support in data port\n");
+               return false;
+            }
+         }
+
+         d /= 6;
+      }
+
+      if (!info->is_array && d > 1) {
+         ilo_warn("non-array surface with non-zero depth\n");
+         return false;
+      }
+      break;
+   case GEN6_SURFTYPE_3D:
+      max_slice = 2048;
+
+      assert(info->img->depth0 <= max_slice);
+      max_slice = u_minify(info->img->depth0, info->level_base);
+
+      d = info->img->depth0;
+
+      if (info->is_array) {
+         ilo_warn("3D surfaces cannot be arrays\n");
+         return false;
+      }
+      break;
+   default:
+      assert(!"invalid surface type");
+      return false;
+      break;
+   }
+
+   if (!info->slice_count ||
+       info->slice_base + info->slice_count > max_slice) {
+      ilo_warn("invalid slice range\n");
+      return false;
+   }
+
+   assert(d);
+   *depth = d - 1;
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 84:
+    *
+    *     "For Sampling Engine and Render Target 1D and 2D Surfaces:
+    *      This field (Minimum Array Element) indicates the minimum array
+    *      element that can be accessed as part of this surface.  This field
+    *      is added to the delivered array index before it is used to address
+    *      the surface.
+    *
+    *      For Render Target 3D Surfaces:
+    *      This field indicates the minimum `R' coordinate on the LOD
+    *      currently being rendered to.  This field is added to the delivered
+    *      array index before it is used to address the surface.
+    *
+    *      For Sampling Engine Cube Surfaces on [DevSNB+] only:
+    *      This field indicates the minimum array element in the underlying 2D
+    *      surface array that can be accessed as part of this surface (the
+    *      cube array index is multipled by 6 to compute this value, although
+    *      this field is not restricted to only multiples of 6). This field is
+    *      added to the delivered array index before it is used to address the
+    *      surface.
+    *
+    *      For Other Surfaces:
+    *      This field must be set to zero."
+    *
+    * On Gen7+, typed sufaces are treated like sampling engine 1D and 2D
+    * surfaces.
+    */
+   *min_array_elem = info->slice_base;
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 84:
+    *
+    *     "For Render Target 3D Surfaces:
+    *      This field (Render Target View Extent) indicates the extent of the
+    *      accessible `R' coordinates minus 1 on the LOD currently being
+    *      rendered to.
+    *
+    *      For Render Target 1D and 2D Surfaces:
+    *      This field must be set to the same value as the Depth field.
+    *
+    *      For Other Surfaces:
+    *      This field is ignored."
+    */
+   *rt_view_extent = info->slice_count - 1;
+
+   return true;
+}
+
+static bool
+surface_get_gen6_image_levels(const struct ilo_dev *dev,
+                              const struct ilo_state_surface_image_info *info,
+                              uint8_t *min_lod, uint8_t *mip_count)
+{
+   uint8_t max_level = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 15 : 14;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(info->img->level_count <= max_level);
+   max_level = info->img->level_count;
+
+   if (!info->level_count ||
+       info->level_base + info->level_count > max_level) {
+      ilo_warn("invalid level range\n");
+      return false;
+   }
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 79:
+    *
+    *     "For Sampling Engine Surfaces:
+    *      This field (MIP Count / LOD) indicates the number of MIP levels
+    *      allowed to be accessed starting at Surface Min LOD, which must be
+    *      less than or equal to the number of MIP levels actually stored in
+    *      memory for this surface.
+    *
+    *      Force the mip map access to be between the mipmap specified by the
+    *      integer bits of the Min LOD and the ceiling of the value specified
+    *      here.
+    *
+    *      For Render Target Surfaces:
+    *      This field defines the MIP level that is currently being rendered
+    *      into. This is the absolute MIP level on the surface and is not
+    *      relative to the Surface Min LOD field, which is ignored for render
+    *      target surfaces.
+    *
+    *      For Other Surfaces:
+    *      This field is reserved : MBZ"
+    *
+    * From the Sandy Bridge PRM, volume 4 part 1, page 83:
+    *
+    *     "For Sampling Engine Surfaces:
+    *
+    *      This field (Surface Min LOD) indicates the most detailed LOD that
+    *      can be accessed as part of this surface.  This field is added to
+    *      the delivered LOD (sample_l, ld, or resinfo message types) before
+    *      it is used to address the surface.
+    *
+    *      For Other Surfaces:
+    *      This field is ignored."
+    *
+    * On Gen7+, typed sufaces are treated like sampling engine surfaces.
+    */
+   if (info->access == ILO_STATE_SURFACE_ACCESS_DP_RENDER) {
+      assert(info->level_count == 1);
+
+      *min_lod = 0;
+      *mip_count = info->level_base;
+   } else {
+      *min_lod = info->level_base;
+      *mip_count = info->level_count - 1;
+   }
+
+   return true;
+}
+
+static bool
+surface_get_gen6_image_sample_count(const struct ilo_dev *dev,
+                                    const struct ilo_state_surface_image_info *info,
+                                    enum gen_sample_count *sample_count)
+{
+   int min_gen;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   switch (info->img->sample_count) {
+   case 1:
+      *sample_count = GEN6_NUMSAMPLES_1;
+      min_gen = ILO_GEN(6);
+      break;
+   case 2:
+      *sample_count = GEN8_NUMSAMPLES_2;
+      min_gen = ILO_GEN(8);
+      break;
+   case 4:
+      *sample_count = GEN6_NUMSAMPLES_4;
+      min_gen = ILO_GEN(6);
+      break;
+   case 8:
+      *sample_count = GEN7_NUMSAMPLES_8;
+      min_gen = ILO_GEN(7);
+      break;
+   case 16:
+      *sample_count = GEN8_NUMSAMPLES_16;
+      min_gen = ILO_GEN(8);
+      break;
+   default:
+      assert(!"invalid sample count");
+      *sample_count = GEN6_NUMSAMPLES_1;
+      break;
+   }
+
+   assert(ilo_dev_gen(dev) >= min_gen);
+
+   return true;
+}
+
+static bool
+surface_get_gen6_image_alignments(const struct ilo_dev *dev,
+                                  const struct ilo_state_surface_image_info *info,
+                                  uint32_t *alignments)
+{
+   uint32_t a = 0;
+   bool err = false;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      switch (info->img->align_i) {
+      case 4:
+         a |= GEN8_SURFACE_DW0_HALIGN_4;
+         break;
+      case 8:
+         a |= GEN8_SURFACE_DW0_HALIGN_8;
+         break;
+      case 16:
+         a |= GEN8_SURFACE_DW0_HALIGN_16;
+         break;
+      default:
+         err = true;
+         break;
+      }
+
+      switch (info->img->align_j) {
+      case 4:
+         a |= GEN7_SURFACE_DW0_VALIGN_4;
+         break;
+      case 8:
+         a |= GEN8_SURFACE_DW0_VALIGN_8;
+         break;
+      case 16:
+         a |= GEN8_SURFACE_DW0_VALIGN_16;
+         break;
+      default:
+         err = true;
+         break;
+      }
+   } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      switch (info->img->align_i) {
+      case 4:
+         a |= GEN7_SURFACE_DW0_HALIGN_4;
+         break;
+      case 8:
+         a |= GEN7_SURFACE_DW0_HALIGN_8;
+         break;
+      default:
+         err = true;
+         break;
+      }
+
+      switch (info->img->align_j) {
+      case 2:
+         a |= GEN7_SURFACE_DW0_VALIGN_2;
+         break;
+      case 4:
+         a |= GEN7_SURFACE_DW0_VALIGN_4;
+         break;
+      default:
+         err = true;
+         break;
+      }
+   } else {
+      if (info->img->align_i != 4)
+         err = true;
+
+      switch (info->img->align_j) {
+      case 2:
+         a |= GEN6_SURFACE_DW5_VALIGN_2;
+         break;
+      case 4:
+         a |= GEN6_SURFACE_DW5_VALIGN_4;
+         break;
+      default:
+         err = true;
+         break;
+      }
+   }
+
+   if (err)
+      assert(!"invalid HALIGN or VALIGN");
+
+   *alignments = a;
+
+   return true;
+}
+
+static bool
+surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
+                                     const struct ilo_dev *dev,
+                                     const struct ilo_state_surface_image_info *info)
+{
+   uint16_t width, height, depth, array_base, view_extent;
+   uint8_t min_lod, mip_count;
+   enum gen_sample_count sample_count;
+   uint32_t alignments;
+   uint32_t dw0, dw2, dw3, dw4, dw5;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   if (!surface_validate_gen6_image(dev, info) ||
+       !surface_get_gen6_image_extent(dev, info, &width, &height) ||
+       !surface_get_gen6_image_slices(dev, info, &depth, &array_base,
+                                      &view_extent) ||
+       !surface_get_gen6_image_levels(dev, info, &min_lod, &mip_count) ||
+       !surface_get_gen6_image_sample_count(dev, info, &sample_count) ||
+       !surface_get_gen6_image_alignments(dev, info, &alignments))
+      return false;
+
+   /* no ARYSPC_LOD0 */
+   assert(info->img->walk != ILO_IMAGE_WALK_LOD);
+   /* no UMS/CMS */
+   if (info->img->sample_count > 1)
+      assert(info->img->interleaved_samples);
+
+   dw0 = info->type << GEN6_SURFACE_DW0_TYPE__SHIFT |
+         info->format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
+         GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 1, page 74:
+    *
+    *     "CUBE_AVERAGE may only be selected if all of the Cube Face Enable
+    *      fields are equal to one."
+    *
+    * From the Sandy Bridge PRM, volume 4 part 1, page 75-76:
+    *
+    *     "For SURFTYPE_CUBE Surfaces accessed via the Sampling Engine:
+    *      Bits 5:0 of this field (Cube Face Enables) enable the individual
+    *      faces of a cube map.  Enabling a face indicates that the face is
+    *      present in the cube map, while disabling it indicates that that
+    *      face is represented by the texture map's border color. Refer to
+    *      Memory Data Formats for the correlation between faces and the cube
+    *      map memory layout. Note that storage for disabled faces must be
+    *      provided.
+    *
+    *      For other surfaces:
+    *      This field is reserved : MBZ"
+    *
+    *     "When TEXCOORDMODE_CLAMP is used when accessing a cube map, this
+    *      field must be programmed to 111111b (all faces enabled)."
+    */
+   if (info->type == GEN6_SURFTYPE_CUBE &&
+       info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) {
+      dw0 |= GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE_AVERAGE |
+             GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
+   }
+
+   dw2 = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
+         width << GEN6_SURFACE_DW2_WIDTH__SHIFT |
+         mip_count << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
+
+   dw3 = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
+         (info->img->bo_stride - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT |
+         info->img->tiling << GEN6_SURFACE_DW3_TILING__SHIFT;
+
+   dw4 = min_lod << GEN6_SURFACE_DW4_MIN_LOD__SHIFT |
+         array_base << GEN6_SURFACE_DW4_MIN_ARRAY_ELEMENT__SHIFT |
+         view_extent << GEN6_SURFACE_DW4_RT_VIEW_EXTENT__SHIFT |
+         sample_count << GEN6_SURFACE_DW4_MULTISAMPLECOUNT__SHIFT;
+
+   dw5 = alignments;
+
+   STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 6);
+   surf->surface[0] = dw0;
+   surf->surface[1] = 0;
+   surf->surface[2] = dw2;
+   surf->surface[3] = dw3;
+   surf->surface[4] = dw4;
+   surf->surface[5] = dw5;
+
+   surf->type = info->type;
+   surf->min_lod = min_lod;
+   surf->mip_count = mip_count;
+
+   return true;
+}
+
+static bool
+surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
+                                     const struct ilo_dev *dev,
+                                     const struct ilo_state_surface_image_info *info)
+{
+   uint16_t width, height, depth, array_base, view_extent;
+   uint8_t min_lod, mip_count;
+   uint32_t alignments;
+   enum gen_sample_count sample_count;
+   uint32_t dw0, dw1, dw2, dw3, dw4, dw5, dw7;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   if (!surface_validate_gen6_image(dev, info) ||
+       !surface_get_gen6_image_extent(dev, info, &width, &height) ||
+       !surface_get_gen6_image_slices(dev, info, &depth, &array_base,
+                                      &view_extent) ||
+       !surface_get_gen6_image_levels(dev, info, &min_lod, &mip_count) ||
+       !surface_get_gen6_image_sample_count(dev, info, &sample_count) ||
+       !surface_get_gen6_image_alignments(dev, info, &alignments))
+      return false;
+
+   dw0 = info->type << GEN7_SURFACE_DW0_TYPE__SHIFT |
+         info->format << GEN7_SURFACE_DW0_FORMAT__SHIFT |
+         alignments;
+
+   if (info->is_array)
+      dw0 |= GEN7_SURFACE_DW0_IS_ARRAY;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      dw0 |= info->img->tiling << GEN8_SURFACE_DW0_TILING__SHIFT;
+   } else {
+      dw0 |= info->img->tiling << GEN7_SURFACE_DW0_TILING__SHIFT;
+
+      if (info->img->walk == ILO_IMAGE_WALK_LOD)
+         dw0 |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
+      else
+         dw0 |= GEN7_SURFACE_DW0_ARYSPC_FULL;
+   }
+
+   /*
+    * From the Ivy Bridge PRM, volume 4 part 1, page 67:
+    *
+    *     "For SURFTYPE_CUBE Surfaces accessed via the Sampling Engine: Bits
+    *      5:0 of this field (Cube Face Enables) enable the individual faces
+    *      of a cube map. Enabling a face indicates that the face is present
+    *      in the cube map, while disabling it indicates that that face is
+    *      represented by the texture map's border color. Refer to Memory Data
+    *      Formats for the correlation between faces and the cube map memory
+    *      layout. Note that storage for disabled faces must be provided. For
+    *      other surfaces this field is reserved and MBZ."
+    *
+    *     "When TEXCOORDMODE_CLAMP is used when accessing a cube map, this
+    *      field must be programmed to 111111b (all faces enabled). This field
+    *      is ignored unless the Surface Type is SURFTYPE_CUBE."
+    */
+   if (info->type == GEN6_SURFTYPE_CUBE &&
+       info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER)
+      dw0 |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
+
+   dw1 = 0;
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      assert(info->img->walk_layer_height % 4 == 0);
+      dw1 |= info->img->walk_layer_height / 4 <<
+         GEN8_SURFACE_DW1_QPITCH__SHIFT;
+   }
+
+   dw2 = height << GEN7_SURFACE_DW2_HEIGHT__SHIFT |
+         width << GEN7_SURFACE_DW2_WIDTH__SHIFT;
+
+   dw3 = depth << GEN7_SURFACE_DW3_DEPTH__SHIFT |
+         (info->img->bo_stride - 1) << GEN7_SURFACE_DW3_PITCH__SHIFT;
+
+   if (ilo_dev_gen(dev) == ILO_GEN(7.5))
+      dw3 |= 0 << GEN75_SURFACE_DW3_INTEGER_SURFACE_FORMAT__SHIFT;
+
+   dw4 = array_base << GEN7_SURFACE_DW4_MIN_ARRAY_ELEMENT__SHIFT |
+         view_extent << GEN7_SURFACE_DW4_RT_VIEW_EXTENT__SHIFT |
+         sample_count << GEN7_SURFACE_DW4_MULTISAMPLECOUNT__SHIFT;
+
+   /*
+    * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
+    * means the samples are interleaved.  The layouts are the same when the
+    * number of samples is 1.
+    */
+   if (info->img->interleaved_samples && info->img->sample_count > 1) {
+      assert(info->access != ILO_STATE_SURFACE_ACCESS_DP_RENDER);
+      dw4 |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
+   } else {
+      dw4 |= GEN7_SURFACE_DW4_MSFMT_MSS;
+   }
+
+   dw5 = min_lod << GEN7_SURFACE_DW5_MIN_LOD__SHIFT |
+         mip_count << GEN7_SURFACE_DW5_MIP_COUNT_LOD__SHIFT;
+
+   dw7 = 0;
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+      dw7 |= GEN_SHIFT32(GEN75_SCS_RED,   GEN75_SURFACE_DW7_SCS_R) |
+             GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
+             GEN_SHIFT32(GEN75_SCS_BLUE,  GEN75_SURFACE_DW7_SCS_B) |
+             GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(surf->surface) >= 13);
+   surf->surface[0] = dw0;
+   surf->surface[1] = dw1;
+   surf->surface[2] = dw2;
+   surf->surface[3] = dw3;
+   surf->surface[4] = dw4;
+   surf->surface[5] = dw5;
+   surf->surface[6] = 0;
+   surf->surface[7] = dw7;
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      surf->surface[8] = 0;
+      surf->surface[9] = 0;
+      surf->surface[10] = 0;
+      surf->surface[11] = 0;
+      surf->surface[12] = 0;
+   }
+
+   surf->type = info->type;
+   surf->min_lod = min_lod;
+   surf->mip_count = mip_count;
+
+   return true;
+}
+
+uint32_t
+ilo_state_surface_buffer_size(const struct ilo_dev *dev,
+                              enum ilo_state_surface_access access,
+                              uint32_t size, uint32_t *alignment)
+{
+   switch (access) {
+   case ILO_STATE_SURFACE_ACCESS_SAMPLER:
+      /*
+       * From the Sandy Bridge PRM, volume 1 part 1, page 118:
+       *
+       *     "For buffers, which have no inherent "height," padding
+       *      requirements are different. A buffer must be padded to the next
+       *      multiple of 256 array elements, with an additional 16 bytes
+       *      added beyond that to account for the L1 cache line."
+       *
+       * Assuming tightly packed GEN6_FORMAT_R32G32B32A32_FLOAT, the size
+       * needs to be padded to 4096 (= 16 * 256).
+       */
+      *alignment = 1;
+      size = align(size, 4096) + 16;
+      break;
+   case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+   case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+      /* element-size aligned for worst cases */
+      *alignment = 16;
+      break;
+   case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED:
+      /* DWord aligned? */
+      *alignment = 4;
+      break;
+   case ILO_STATE_SURFACE_ACCESS_DP_DATA:
+      /* OWord aligned */
+      *alignment = 16;
+      size = align(size, 16);
+      break;
+   case ILO_STATE_SURFACE_ACCESS_DP_SVB:
+      /* always DWord aligned */
+      *alignment = 4;
+      break;
+   default:
+      assert(!"unknown access");
+      *alignment = 1;
+      break;
+   }
+
+   return size;
+}
+
+bool
+ilo_state_surface_init_for_null(struct ilo_state_surface *surf,
+                                const struct ilo_dev *dev)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(surf, sizeof(*surf)));
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7))
+      ret &= surface_set_gen7_null_SURFACE_STATE(surf, dev);
+   else
+      ret &= surface_set_gen6_null_SURFACE_STATE(surf, dev);
+
+   surf->vma = NULL;
+   surf->type = GEN6_SURFTYPE_NULL;
+   surf->readonly = true;
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_surface_init_for_buffer(struct ilo_state_surface *surf,
+                                  const struct ilo_dev *dev,
+                                  const struct ilo_state_surface_buffer_info *info)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(surf, sizeof(*surf)));
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7))
+      ret &= surface_set_gen7_buffer_SURFACE_STATE(surf, dev, info);
+   else
+      ret &= surface_set_gen6_buffer_SURFACE_STATE(surf, dev, info);
+
+   surf->vma = info->vma;
+   surf->readonly = info->readonly;
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_surface_init_for_image(struct ilo_state_surface *surf,
+                                 const struct ilo_dev *dev,
+                                 const struct ilo_state_surface_image_info *info)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(surf, sizeof(*surf)));
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7))
+      ret &= surface_set_gen7_image_SURFACE_STATE(surf, dev, info);
+   else
+      ret &= surface_set_gen6_image_SURFACE_STATE(surf, dev, info);
+
+   surf->vma = info->vma;
+   surf->aux_vma = info->aux_vma;
+
+   surf->is_integer = info->is_integer;
+   surf->readonly = info->readonly;
+   surf->scanout = info->img->scanout;
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_surface_set_scs(struct ilo_state_surface *surf,
+                          const struct ilo_dev *dev,
+                          enum gen_surface_scs rgba[4])
+{
+   const uint32_t scs = GEN_SHIFT32(rgba[0], GEN75_SURFACE_DW7_SCS_R) |
+                        GEN_SHIFT32(rgba[1], GEN75_SURFACE_DW7_SCS_G) |
+                        GEN_SHIFT32(rgba[2], GEN75_SURFACE_DW7_SCS_B) |
+                        GEN_SHIFT32(rgba[3], GEN75_SURFACE_DW7_SCS_A);
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(ilo_dev_gen(dev) >= ILO_GEN(7.5));
+
+   surf->surface[7] = (surf->surface[7] & ~GEN75_SURFACE_DW7_SCS__MASK) | scs;
+
+   return true;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_surface_format.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_surface_format.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_surface_format.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_surface_format.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,351 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "genhw/genhw.h"
+#include "ilo_state_surface.h"
+
+static bool
+surface_valid_sampler_format(const struct ilo_dev *dev,
+                             enum ilo_state_surface_access access,
+                             enum gen_surface_format format)
+{
+   /*
+    * This table is based on:
+    *
+    *  - the Sandy Bridge PRM, volume 4 part 1, page 88-97
+    *  - the Ivy Bridge PRM, volume 4 part 1, page 84-87
+    */
+   static const struct sampler_cap {
+      int sampling;
+      int filtering;
+      int shadow_map;
+      int chroma_key;
+   } caps[] = {
+#define CAP(sampling, filtering, shadow_map, chroma_key) \
+      { ILO_GEN(sampling), ILO_GEN(filtering), ILO_GEN(shadow_map), ILO_GEN(chroma_key) }
+      [GEN6_FORMAT_R32G32B32A32_FLOAT]       = CAP(  1,   5,   0,   0),
+      [GEN6_FORMAT_R32G32B32A32_SINT]        = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R32G32B32A32_UINT]        = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R32G32B32X32_FLOAT]       = CAP(  1,   5,   0,   0),
+      [GEN6_FORMAT_R32G32B32_FLOAT]          = CAP(  1,   5,   0,   0),
+      [GEN6_FORMAT_R32G32B32_SINT]           = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R32G32B32_UINT]           = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R16G16B16A16_UNORM]       = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R16G16B16A16_SNORM]       = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R16G16B16A16_SINT]        = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R16G16B16A16_UINT]        = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R16G16B16A16_FLOAT]       = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R32G32_FLOAT]             = CAP(  1,   5,   0,   0),
+      [GEN6_FORMAT_R32G32_SINT]              = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R32G32_UINT]              = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R32_FLOAT_X8X24_TYPELESS] = CAP(  1,   5,   1,   0),
+      [GEN6_FORMAT_X32_TYPELESS_G8X24_UINT]  = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_L32A32_FLOAT]             = CAP(  1,   5,   0,   0),
+      [GEN6_FORMAT_R16G16B16X16_UNORM]       = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R16G16B16X16_FLOAT]       = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_A32X32_FLOAT]             = CAP(  1,   5,   0,   0),
+      [GEN6_FORMAT_L32X32_FLOAT]             = CAP(  1,   5,   0,   0),
+      [GEN6_FORMAT_I32X32_FLOAT]             = CAP(  1,   5,   0,   0),
+      [GEN6_FORMAT_B8G8R8A8_UNORM]           = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_B8G8R8A8_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R10G10B10A2_UNORM]        = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R10G10B10A2_UNORM_SRGB]   = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R10G10B10A2_UINT]         = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R8G8B8A8_UNORM]           = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R8G8B8A8_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R8G8B8A8_SNORM]           = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R8G8B8A8_SINT]            = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R8G8B8A8_UINT]            = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R16G16_UNORM]             = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R16G16_SNORM]             = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R16G16_SINT]              = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R16G16_UINT]              = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R16G16_FLOAT]             = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_B10G10R10A2_UNORM]        = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_B10G10R10A2_UNORM_SRGB]   = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R11G11B10_FLOAT]          = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R32_SINT]                 = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R32_UINT]                 = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R32_FLOAT]                = CAP(  1,   5,   1,   0),
+      [GEN6_FORMAT_R24_UNORM_X8_TYPELESS]    = CAP(  1,   5,   1,   0),
+      [GEN6_FORMAT_X24_TYPELESS_G8_UINT]     = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_L16A16_UNORM]             = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_I24X8_UNORM]              = CAP(  1,   5,   1,   0),
+      [GEN6_FORMAT_L24X8_UNORM]              = CAP(  1,   5,   1,   0),
+      [GEN6_FORMAT_A24X8_UNORM]              = CAP(  1,   5,   1,   0),
+      [GEN6_FORMAT_I32_FLOAT]                = CAP(  1,   5,   1,   0),
+      [GEN6_FORMAT_L32_FLOAT]                = CAP(  1,   5,   1,   0),
+      [GEN6_FORMAT_A32_FLOAT]                = CAP(  1,   5,   1,   0),
+      [GEN6_FORMAT_B8G8R8X8_UNORM]           = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_B8G8R8X8_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R8G8B8X8_UNORM]           = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R8G8B8X8_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R9G9B9E5_SHAREDEXP]       = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_B10G10R10X2_UNORM]        = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_L16A16_FLOAT]             = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_B5G6R5_UNORM]             = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_B5G6R5_UNORM_SRGB]        = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_B5G5R5A1_UNORM]           = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_B5G5R5A1_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_B4G4R4A4_UNORM]           = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_B4G4R4A4_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R8G8_UNORM]               = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R8G8_SNORM]               = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_R8G8_SINT]                = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R8G8_UINT]                = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R16_UNORM]                = CAP(  1,   1,   1,   0),
+      [GEN6_FORMAT_R16_SNORM]                = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R16_SINT]                 = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R16_UINT]                 = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R16_FLOAT]                = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_A8P8_UNORM_PALETTE0]      = CAP(  5,   5,   0,   0),
+      [GEN6_FORMAT_A8P8_UNORM_PALETTE1]      = CAP(  5,   5,   0,   0),
+      [GEN6_FORMAT_I16_UNORM]                = CAP(  1,   1,   1,   0),
+      [GEN6_FORMAT_L16_UNORM]                = CAP(  1,   1,   1,   0),
+      [GEN6_FORMAT_A16_UNORM]                = CAP(  1,   1,   1,   0),
+      [GEN6_FORMAT_L8A8_UNORM]               = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_I16_FLOAT]                = CAP(  1,   1,   1,   0),
+      [GEN6_FORMAT_L16_FLOAT]                = CAP(  1,   1,   1,   0),
+      [GEN6_FORMAT_A16_FLOAT]                = CAP(  1,   1,   1,   0),
+      [GEN6_FORMAT_L8A8_UNORM_SRGB]          = CAP(4.5, 4.5,   0,   0),
+      [GEN6_FORMAT_R5G5_SNORM_B6_UNORM]      = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_P8A8_UNORM_PALETTE0]      = CAP(  5,   5,   0,   0),
+      [GEN6_FORMAT_P8A8_UNORM_PALETTE1]      = CAP(  5,   5,   0,   0),
+      [GEN6_FORMAT_R8_UNORM]                 = CAP(  1,   1,   0, 4.5),
+      [GEN6_FORMAT_R8_SNORM]                 = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R8_SINT]                  = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_R8_UINT]                  = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_A8_UNORM]                 = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_I8_UNORM]                 = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_L8_UNORM]                 = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_P4A4_UNORM_PALETTE0]      = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_A4P4_UNORM_PALETTE0]      = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_P8_UNORM_PALETTE0]        = CAP(4.5, 4.5,   0,   0),
+      [GEN6_FORMAT_L8_UNORM_SRGB]            = CAP(4.5, 4.5,   0,   0),
+      [GEN6_FORMAT_P8_UNORM_PALETTE1]        = CAP(4.5, 4.5,   0,   0),
+      [GEN6_FORMAT_P4A4_UNORM_PALETTE1]      = CAP(4.5, 4.5,   0,   0),
+      [GEN6_FORMAT_A4P4_UNORM_PALETTE1]      = CAP(4.5, 4.5,   0,   0),
+      [GEN6_FORMAT_DXT1_RGB_SRGB]            = CAP(4.5, 4.5,   0,   0),
+      [GEN6_FORMAT_R1_UNORM]                 = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_YCRCB_NORMAL]             = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_YCRCB_SWAPUVY]            = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_P2_UNORM_PALETTE0]        = CAP(4.5, 4.5,   0,   0),
+      [GEN6_FORMAT_P2_UNORM_PALETTE1]        = CAP(4.5, 4.5,   0,   0),
+      [GEN6_FORMAT_BC1_UNORM]                = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_BC2_UNORM]                = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_BC3_UNORM]                = CAP(  1,   1,   0,   1),
+      [GEN6_FORMAT_BC4_UNORM]                = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_BC5_UNORM]                = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_BC1_UNORM_SRGB]           = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_BC2_UNORM_SRGB]           = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_BC3_UNORM_SRGB]           = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_MONO8]                    = CAP(  1,   0,   0,   0),
+      [GEN6_FORMAT_YCRCB_SWAPUV]             = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_YCRCB_SWAPY]              = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_DXT1_RGB]                 = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_FXT1]                     = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_BC4_SNORM]                = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_BC5_SNORM]                = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R16G16B16_FLOAT]          = CAP(  5,   5,   0,   0),
+      [GEN6_FORMAT_BC6H_SF16]                = CAP(  7,   7,   0,   0),
+      [GEN6_FORMAT_BC7_UNORM]                = CAP(  7,   7,   0,   0),
+      [GEN6_FORMAT_BC7_UNORM_SRGB]           = CAP(  7,   7,   0,   0),
+      [GEN6_FORMAT_BC6H_UF16]                = CAP(  7,   7,   0,   0),
+#undef CAP
+   };
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   return (format < ARRAY_SIZE(caps) && caps[format].sampling &&
+           ilo_dev_gen(dev) >= caps[format].sampling);
+}
+
+static bool
+surface_valid_dp_format(const struct ilo_dev *dev,
+                        enum ilo_state_surface_access access,
+                        enum gen_surface_format format)
+{
+   /*
+    * This table is based on:
+    *
+    *  - the Sandy Bridge PRM, volume 4 part 1, page 88-97
+    *  - the Ivy Bridge PRM, volume 4 part 1, page 172, 252-253, and 277-278
+    *  - the Haswell PRM, volume 7, page 262-264
+    */
+   static const struct dp_cap {
+      int rt_write;
+      int rt_write_blending;
+      int typed_write;
+      int media_color_processing;
+   } caps[] = {
+#define CAP(rt_write, rt_write_blending, typed_write, media_color_processing) \
+      { ILO_GEN(rt_write), ILO_GEN(rt_write_blending), ILO_GEN(typed_write), ILO_GEN(media_color_processing) }
+      [GEN6_FORMAT_R32G32B32A32_FLOAT]       = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_R32G32B32A32_SINT]        = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R32G32B32A32_UINT]        = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R16G16B16A16_UNORM]       = CAP(  1, 4.5,   7,   6),
+      [GEN6_FORMAT_R16G16B16A16_SNORM]       = CAP(  1,   6,   7,   0),
+      [GEN6_FORMAT_R16G16B16A16_SINT]        = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R16G16B16A16_UINT]        = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R16G16B16A16_FLOAT]       = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_R32G32_FLOAT]             = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_R32G32_SINT]              = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R32G32_UINT]              = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_B8G8R8A8_UNORM]           = CAP(  1,   1,   7,   6),
+      [GEN6_FORMAT_B8G8R8A8_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R10G10B10A2_UNORM]        = CAP(  1,   1,   7,   6),
+      [GEN6_FORMAT_R10G10B10A2_UNORM_SRGB]   = CAP(  0,   0,   0,   6),
+      [GEN6_FORMAT_R10G10B10A2_UINT]         = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R8G8B8A8_UNORM]           = CAP(  1,   1,   7,   6),
+      [GEN6_FORMAT_R8G8B8A8_UNORM_SRGB]      = CAP(  1,   1,   0,   6),
+      [GEN6_FORMAT_R8G8B8A8_SNORM]           = CAP(  1,   6,   7,   0),
+      [GEN6_FORMAT_R8G8B8A8_SINT]            = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R8G8B8A8_UINT]            = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R16G16_UNORM]             = CAP(  1, 4.5,   7,   0),
+      [GEN6_FORMAT_R16G16_SNORM]             = CAP(  1,   6,   7,   0),
+      [GEN6_FORMAT_R16G16_SINT]              = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R16G16_UINT]              = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R16G16_FLOAT]             = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_B10G10R10A2_UNORM]        = CAP(  1,   1,   7,   6),
+      [GEN6_FORMAT_B10G10R10A2_UNORM_SRGB]   = CAP(  1,   1,   0,   6),
+      [GEN6_FORMAT_R11G11B10_FLOAT]          = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_R32_SINT]                 = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R32_UINT]                 = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R32_FLOAT]                = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_B8G8R8X8_UNORM]           = CAP(  0,   0,   0,   6),
+      [GEN6_FORMAT_B5G6R5_UNORM]             = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_B5G6R5_UNORM_SRGB]        = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_B5G5R5A1_UNORM]           = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_B5G5R5A1_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_B4G4R4A4_UNORM]           = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_B4G4R4A4_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R8G8_UNORM]               = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_R8G8_SNORM]               = CAP(  1,   6,   7,   0),
+      [GEN6_FORMAT_R8G8_SINT]                = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R8G8_UINT]                = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R16_UNORM]                = CAP(  1, 4.5,   7,   7),
+      [GEN6_FORMAT_R16_SNORM]                = CAP(  1,   6,   7,   0),
+      [GEN6_FORMAT_R16_SINT]                 = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R16_UINT]                 = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R16_FLOAT]                = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_B5G5R5X1_UNORM]           = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_B5G5R5X1_UNORM_SRGB]      = CAP(  1,   1,   0,   0),
+      [GEN6_FORMAT_R8_UNORM]                 = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_R8_SNORM]                 = CAP(  1,   6,   7,   0),
+      [GEN6_FORMAT_R8_SINT]                  = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_R8_UINT]                  = CAP(  1,   0,   7,   0),
+      [GEN6_FORMAT_A8_UNORM]                 = CAP(  1,   1,   7,   0),
+      [GEN6_FORMAT_YCRCB_NORMAL]             = CAP(  1,   0,   0,   6),
+      [GEN6_FORMAT_YCRCB_SWAPUVY]            = CAP(  1,   0,   0,   6),
+      [GEN6_FORMAT_YCRCB_SWAPUV]             = CAP(  1,   0,   0,   6),
+      [GEN6_FORMAT_YCRCB_SWAPY]              = CAP(  1,   0,   0,   6),
+#undef CAP
+   };
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (format >= ARRAY_SIZE(caps))
+      return false;
+
+   switch (access) {
+   case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+      return (caps[format].rt_write &&
+              ilo_dev_gen(dev) >= caps[format].rt_write);
+   case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+      return (caps[format].typed_write &&
+              ilo_dev_gen(dev) >= caps[format].typed_write);
+   case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED:
+      return (format == GEN6_FORMAT_RAW);
+   case ILO_STATE_SURFACE_ACCESS_DP_DATA:
+      /* ignored, but can it be raw? */
+      assert(format != GEN6_FORMAT_RAW);
+      return true;
+   default:
+      return false;
+   }
+}
+
+static bool
+surface_valid_svb_format(const struct ilo_dev *dev,
+                         enum gen_surface_format format)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * This table is based on:
+    *
+    *  - the Sandy Bridge PRM, volume 4 part 1, page 88-97
+    *  - the Ivy Bridge PRM, volume 2 part 1, page 195
+    *  - the Haswell PRM, volume 7, page 535
+    */
+   switch (format) {
+   case GEN6_FORMAT_R32G32B32A32_FLOAT:
+   case GEN6_FORMAT_R32G32B32A32_SINT:
+   case GEN6_FORMAT_R32G32B32A32_UINT:
+   case GEN6_FORMAT_R32G32B32_FLOAT:
+   case GEN6_FORMAT_R32G32B32_SINT:
+   case GEN6_FORMAT_R32G32B32_UINT:
+   case GEN6_FORMAT_R32G32_FLOAT:
+   case GEN6_FORMAT_R32G32_SINT:
+   case GEN6_FORMAT_R32G32_UINT:
+   case GEN6_FORMAT_R32_SINT:
+   case GEN6_FORMAT_R32_UINT:
+   case GEN6_FORMAT_R32_FLOAT:
+      return true;
+   default:
+      return false;
+   }
+}
+
+bool
+ilo_state_surface_valid_format(const struct ilo_dev *dev,
+                               enum ilo_state_surface_access access,
+                               enum gen_surface_format format)
+{
+   bool valid;
+
+   switch (access) {
+   case ILO_STATE_SURFACE_ACCESS_SAMPLER:
+      valid = surface_valid_sampler_format(dev, access, format);
+      break;
+   case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+   case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+   case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED:
+   case ILO_STATE_SURFACE_ACCESS_DP_DATA:
+      valid = surface_valid_dp_format(dev, access, format);
+      break;
+   case ILO_STATE_SURFACE_ACCESS_DP_SVB:
+      valid = surface_valid_svb_format(dev, format);
+      break;
+   default:
+      valid = false;
+      break;
+   }
+
+   return valid;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_surface.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_surface.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_surface.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_surface.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,128 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_SURFACE_H
+#define ILO_STATE_SURFACE_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+enum ilo_state_surface_access {
+   ILO_STATE_SURFACE_ACCESS_SAMPLER,      /* sampling engine surfaces */
+   ILO_STATE_SURFACE_ACCESS_DP_RENDER,    /* render target surfaces */
+   ILO_STATE_SURFACE_ACCESS_DP_TYPED,     /* typed surfaces */
+   ILO_STATE_SURFACE_ACCESS_DP_UNTYPED,   /* untyped surfaces */
+   ILO_STATE_SURFACE_ACCESS_DP_DATA,
+   ILO_STATE_SURFACE_ACCESS_DP_SVB,
+};
+
+struct ilo_vma;
+struct ilo_image;
+
+struct ilo_state_surface_buffer_info {
+   const struct ilo_vma *vma;
+   uint32_t offset;
+   uint32_t size;
+
+   enum ilo_state_surface_access access;
+
+   /* format_size may be less than, equal to, or greater than struct_size */
+   enum gen_surface_format format;
+   uint8_t format_size;
+
+   bool readonly;
+   uint16_t struct_size;
+};
+
+struct ilo_state_surface_image_info {
+   const struct ilo_image *img;
+   uint8_t level_base;
+   uint8_t level_count;
+   uint16_t slice_base;
+   uint16_t slice_count;
+
+   const struct ilo_vma *vma;
+   const struct ilo_vma *aux_vma;
+
+   enum ilo_state_surface_access access;
+
+   enum gen_surface_type type;
+
+   enum gen_surface_format format;
+   bool is_integer;
+
+   bool readonly;
+   bool is_array;
+};
+
+struct ilo_state_surface {
+   uint32_t surface[13];
+
+   const struct ilo_vma *vma;
+   const struct ilo_vma *aux_vma;
+
+   enum gen_surface_type type;
+   uint8_t min_lod;
+   uint8_t mip_count;
+   bool is_integer;
+
+   bool readonly;
+   bool scanout;
+};
+
+bool
+ilo_state_surface_valid_format(const struct ilo_dev *dev,
+                               enum ilo_state_surface_access access,
+                               enum gen_surface_format format);
+
+uint32_t
+ilo_state_surface_buffer_size(const struct ilo_dev *dev,
+                              enum ilo_state_surface_access access,
+                              uint32_t size, uint32_t *alignment);
+
+bool
+ilo_state_surface_init_for_null(struct ilo_state_surface *surf,
+                                const struct ilo_dev *dev);
+
+bool
+ilo_state_surface_init_for_buffer(struct ilo_state_surface *surf,
+                                  const struct ilo_dev *dev,
+                                  const struct ilo_state_surface_buffer_info *info);
+
+bool
+ilo_state_surface_init_for_image(struct ilo_state_surface *surf,
+                                 const struct ilo_dev *dev,
+                                 const struct ilo_state_surface_image_info *info);
+
+bool
+ilo_state_surface_set_scs(struct ilo_state_surface *surf,
+                          const struct ilo_dev *dev,
+                          enum gen_surface_scs rgba[4]);
+
+#endif /* ILO_STATE_SURFACE_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_urb.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_urb.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_urb.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_urb.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,769 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_urb.h"
+
+struct urb_configuration {
+   uint8_t vs_pcb_alloc_kb;
+   uint8_t hs_pcb_alloc_kb;
+   uint8_t ds_pcb_alloc_kb;
+   uint8_t gs_pcb_alloc_kb;
+   uint8_t ps_pcb_alloc_kb;
+
+   uint8_t urb_offset_8kb;
+
+   uint8_t vs_urb_alloc_8kb;
+   uint8_t hs_urb_alloc_8kb;
+   uint8_t ds_urb_alloc_8kb;
+   uint8_t gs_urb_alloc_8kb;
+
+   uint8_t vs_entry_rows;
+   uint8_t hs_entry_rows;
+   uint8_t ds_entry_rows;
+   uint8_t gs_entry_rows;
+
+   int vs_entry_count;
+   int hs_entry_count;
+   int ds_entry_count;
+   int gs_entry_count;
+};
+
+static void
+urb_alloc_gen7_pcb(const struct ilo_dev *dev,
+                   const struct ilo_state_urb_info *info,
+                   struct urb_configuration *conf)
+{
+   /*
+    * From the Haswell PRM, volume 2b, page 940:
+    *
+    *     "[0,16] (0KB - 16KB) Increments of 1KB DevHSW:GT1, DevHSW:GT2
+    *      [0,32] (0KB - 32KB) Increments of 2KB DevHSW:GT3"
+    */
+   const uint8_t increment_kb =
+      (ilo_dev_gen(dev) >= ILO_GEN(8) ||
+       (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 3)) ? 2 : 1;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   /*
+    * Keep the strategy simple as we do not know the workloads and how
+    * expensive it is to change the configuration frequently.
+    */
+   if (info->hs_const_data || info->ds_const_data) {
+      conf->vs_pcb_alloc_kb = increment_kb * 4;
+      conf->hs_pcb_alloc_kb = increment_kb * 3;
+      conf->ds_pcb_alloc_kb = increment_kb * 3;
+      conf->gs_pcb_alloc_kb = increment_kb * 3;
+      conf->ps_pcb_alloc_kb = increment_kb * 3;
+   } else if (info->gs_const_data) {
+      conf->vs_pcb_alloc_kb = increment_kb * 6;
+      conf->gs_pcb_alloc_kb = increment_kb * 5;
+      conf->ps_pcb_alloc_kb = increment_kb * 5;
+   } else {
+      conf->vs_pcb_alloc_kb = increment_kb * 8;
+      conf->ps_pcb_alloc_kb = increment_kb * 8;
+   }
+
+   conf->urb_offset_8kb = increment_kb * 16 / 8;
+}
+
+static void
+urb_alloc_gen6_urb(const struct ilo_dev *dev,
+                   const struct ilo_state_urb_info *info,
+                   struct urb_configuration *conf)
+{
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 34:
+    *
+    *     "(VS URB Starting Address) Offset from the start of the URB memory
+    *      where VS starts its allocation, specified in multiples of 8 KB."
+    *
+    * Same for other stages.
+    */
+   const int space_avail_8kb = dev->urb_size / 8192 - conf->urb_offset_8kb;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 173:
+    *
+    *     "Programming Note: If the GS stage is enabled, software must always
+    *      allocate at least one GS URB Entry. This is true even if the GS
+    *      thread never needs to output vertices to the urb, e.g., when only
+    *      performing stream output. This is an artifact of the need to pass
+    *      the GS thread an initial destination URB handle."
+    */
+   const bool force_gs_alloc =
+      (ilo_dev_gen(dev) == ILO_GEN(6) && info->gs_enable);
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (info->hs_entry_size || info->ds_entry_size) {
+      conf->vs_urb_alloc_8kb = space_avail_8kb / 4;
+      conf->hs_urb_alloc_8kb = space_avail_8kb / 4;
+      conf->ds_urb_alloc_8kb = space_avail_8kb / 4;
+      conf->gs_urb_alloc_8kb = space_avail_8kb / 4;
+
+      if (space_avail_8kb % 4) {
+         assert(space_avail_8kb % 2 == 0);
+         conf->vs_urb_alloc_8kb++;
+         conf->gs_urb_alloc_8kb++;
+      }
+   } else if (info->gs_entry_size || force_gs_alloc) {
+      assert(space_avail_8kb % 2 == 0);
+      conf->vs_urb_alloc_8kb = space_avail_8kb / 2;
+      conf->gs_urb_alloc_8kb = space_avail_8kb / 2;
+   } else {
+      conf->vs_urb_alloc_8kb = space_avail_8kb;
+   }
+}
+
+static bool
+urb_init_gen6_vs_entry(const struct ilo_dev *dev,
+                       const struct ilo_state_urb_info *info,
+                       struct urb_configuration *conf)
+{
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 28:
+    *
+    *     "(VS URB Entry Allocation Size)
+    *      Range [0,4] = [1,5] 1024-bit URB rows"
+    *
+    *     "(VS Number of URB Entries)
+    *      Range [24,256] in multiples of 4
+    *            [24, 128] in multiples of 4[DevSNBGT1]"
+    */
+   const int max_entry_count = (dev->gt == 2) ? 256 : 252;
+   const int row_size = 1024 / 8;
+   int row_count, entry_count;
+   int entry_size;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   /* VE and VS share the same VUE for each vertex */
+   entry_size = info->vs_entry_size;
+   if (entry_size < info->ve_entry_size)
+      entry_size = info->ve_entry_size;
+
+   row_count = (entry_size + row_size - 1) / row_size;
+   if (row_count > 5)
+      return false;
+   else if (!row_count)
+      row_count++;
+
+   entry_count = conf->vs_urb_alloc_8kb * 8192 / (row_size * row_count);
+   if (entry_count > max_entry_count)
+      entry_count = max_entry_count;
+   entry_count &= ~3;
+   assert(entry_count >= 24);
+
+   conf->vs_entry_rows = row_count;
+   conf->vs_entry_count = entry_count;
+
+   return true;
+}
+
+static bool
+urb_init_gen6_gs_entry(const struct ilo_dev *dev,
+                       const struct ilo_state_urb_info *info,
+                       struct urb_configuration *conf)
+{
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 29:
+    *
+    *     "(GS Number of URB Entries)
+    *      Range [0,256] in multiples of 4
+    *            [0, 254] in multiples of 4[DevSNBGT1]"
+    *
+    *     "(GS URB Entry Allocation Size)
+    *      Range [0,4] = [1,5] 1024-bit URB rows"
+    */
+   const int max_entry_count = (dev->gt == 2) ? 256 : 252;
+   const int row_size = 1024 / 8;
+   int row_count, entry_count;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   row_count = (info->gs_entry_size + row_size - 1) / row_size;
+   if (row_count > 5)
+      return false;
+   else if (!row_count)
+      row_count++;
+
+   entry_count = conf->gs_urb_alloc_8kb * 8192 / (row_size * row_count);
+   if (entry_count > max_entry_count)
+      entry_count = max_entry_count;
+   entry_count &= ~3;
+
+   conf->gs_entry_rows = row_count;
+   conf->gs_entry_count = entry_count;
+
+   return true;
+}
+
+static bool
+urb_init_gen7_vs_entry(const struct ilo_dev *dev,
+                       const struct ilo_state_urb_info *info,
+                       struct urb_configuration *conf)
+{
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 34-35:
+    *
+    *     "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
+    *      cause performance to decrease due to banking in the URB. Element
+    *      sizes of 16 to 20 should be programmed with six 512-bit URB rows."
+    *
+    *     "(VS URB Entry Allocation Size)
+    *      Format: U9-1 count of 512-bit units"
+    *
+    *     "(VS Number of URB Entries)
+    *      [32,704]
+    *      [32,512]
+    *
+    *      Programming Restriction: VS Number of URB Entries must be divisible
+    *      by 8 if the VS URB Entry Allocation Size is less than 9 512-bit URB
+    *      entries."2:0" = reserved "000b""
+    *
+    * From the Haswell PRM, volume 2b, page 847:
+    *
+    *     "(VS Number of URB Entries)
+    *      [64,1664] DevHSW:GT3
+    *      [64,1664] DevHSW:GT2
+    *      [32,640]  DevHSW:GT1"
+    */
+   const int row_size = 512 / 8;
+   int row_count, entry_count;
+   int entry_size;
+   int max_entry_count, min_entry_count;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 35:
+    *
+    *     "Programming Restriction: As the VS URB entry serves as both the
+    *      per-vertex input and output of the VS shader, the VS URB Allocation
+    *      Size must be sized to the maximum of the vertex input and output
+    *      structures."
+    *
+    * From the Ivy Bridge PRM, volume 2 part 1, page 42:
+    *
+    *     "If the VS function is enabled, the VF-written VUEs are not required
+    *      to have Vertex Headers, as the VS-incoming vertices are guaranteed
+    *      to be consumed by the VS (i.e., the VS thread is responsible for
+    *      overwriting the input vertex data)."
+    *
+    * VE and VS share the same VUE for each vertex.
+    */
+   entry_size = info->vs_entry_size;
+   if (entry_size < info->ve_entry_size)
+      entry_size = info->ve_entry_size;
+
+   row_count = (entry_size + row_size - 1) / row_size;
+   if (row_count == 5 || !row_count)
+      row_count++;
+
+   entry_count = conf->vs_urb_alloc_8kb * 8192 / (row_size * row_count);
+   if (row_count < 9)
+      entry_count &= ~7;
+
+   switch (ilo_dev_gen(dev)) {
+   case ILO_GEN(8):
+   case ILO_GEN(7.5):
+      max_entry_count = (dev->gt >= 2) ? 1664 : 640;
+      min_entry_count = (dev->gt >= 2) ? 64 : 32;
+      break;
+   case ILO_GEN(7):
+      max_entry_count = (dev->gt == 2) ? 704 : 512;
+      min_entry_count = 32;
+      break;
+   default:
+      assert(!"unexpected gen");
+      return false;
+      break;
+   }
+
+   if (entry_count > max_entry_count)
+      entry_count = max_entry_count;
+   else if (entry_count < min_entry_count)
+      return false;
+
+   conf->vs_entry_rows = row_count;
+   conf->vs_entry_count = entry_count;
+
+   return true;
+}
+
+static bool
+urb_init_gen7_hs_entry(const struct ilo_dev *dev,
+                       const struct ilo_state_urb_info *info,
+                       struct urb_configuration *conf)
+{
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 37:
+    *
+    *     "HS Number of URB Entries must be divisible by 8 if the HS URB Entry
+    *      Allocation Size is less than 9 512-bit URB
+    *      entries."2:0" = reserved "000"
+    *
+    *      [0,64]
+    *      [0,32]"
+    *
+    * From the Haswell PRM, volume 2b, page 849:
+    *
+    *     "(HS Number of URB Entries)
+    *      [0,128] DevHSW:GT2
+    *      [0,64]  DevHSW:GT1"
+    */
+   const int row_size = 512 / 8;
+   int row_count, entry_count;
+   int max_entry_count;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   row_count = (info->hs_entry_size + row_size - 1) / row_size;
+   if (!row_count)
+      row_count++;
+
+   entry_count = conf->hs_urb_alloc_8kb * 8192 / (row_size * row_count);
+   if (row_count < 9)
+      entry_count &= ~7;
+
+   switch (ilo_dev_gen(dev)) {
+   case ILO_GEN(8):
+   case ILO_GEN(7.5):
+      max_entry_count = (dev->gt >= 2) ? 128 : 64;
+      break;
+   case ILO_GEN(7):
+      max_entry_count = (dev->gt == 2) ? 64 : 32;
+      break;
+   default:
+      assert(!"unexpected gen");
+      return false;
+      break;
+   }
+
+   if (entry_count > max_entry_count)
+      entry_count = max_entry_count;
+   else if (info->hs_entry_size && !entry_count)
+      return false;
+
+   conf->hs_entry_rows = row_count;
+   conf->hs_entry_count = entry_count;
+
+   return true;
+}
+
+static bool
+urb_init_gen7_ds_entry(const struct ilo_dev *dev,
+                       const struct ilo_state_urb_info *info,
+                       struct urb_configuration *conf)
+{
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 38:
+    *
+    *     "(DS URB Entry Allocation Size)
+    *      [0,9]"
+    *
+    *     "(DS Number of URB Entries) If Domain Shader Thread Dispatch is
+    *      Enabled then the minimum number handles that must be allocated is
+    *      138 URB entries.
+    *      "2:0" = reserved "000"
+    *
+    *      [0,448]
+    *      [0,288]
+    *
+    *      DS Number of URB Entries must be divisible by 8 if the DS URB Entry
+    *      Allocation Size is less than 9 512-bit URB entries.If Domain Shader
+    *      Thread Dispatch is Enabled then the minimum number of handles that
+    *      must be allocated is 10 URB entries."
+    *
+    * From the Haswell PRM, volume 2b, page 851:
+    *
+    *     "(DS Number of URB Entries)
+    *      [0,960] DevHSW:GT2
+    *      [0,384] DevHSW:GT1"
+    */
+   const int row_size = 512 / 8;
+   int row_count, entry_count;
+   int max_entry_count;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   row_count = (info->ds_entry_size + row_size - 1) / row_size;
+   if (row_count > 10)
+      return false;
+   else if (!row_count)
+      row_count++;
+
+   entry_count = conf->ds_urb_alloc_8kb * 8192 / (row_size * row_count);
+   if (row_count < 9)
+      entry_count &= ~7;
+
+   switch (ilo_dev_gen(dev)) {
+   case ILO_GEN(8):
+   case ILO_GEN(7.5):
+      max_entry_count = (dev->gt >= 2) ? 960 : 384;
+      break;
+   case ILO_GEN(7):
+      max_entry_count = (dev->gt == 2) ? 448 : 288;
+      break;
+   default:
+      assert(!"unexpected gen");
+      return false;
+      break;
+   }
+
+   if (entry_count > max_entry_count)
+      entry_count = max_entry_count;
+   else if (info->ds_entry_size && entry_count < 10)
+      return false;
+
+   conf->ds_entry_rows = row_count;
+   conf->ds_entry_count = entry_count;
+
+   return true;
+}
+
+static bool
+urb_init_gen7_gs_entry(const struct ilo_dev *dev,
+                       const struct ilo_state_urb_info *info,
+                       struct urb_configuration *conf)
+{
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 40:
+    *
+    *     "(GS Number of URB Entries) GS Number of URB Entries must be
+    *      divisible by 8 if the GS URB Entry Allocation Size is less than 9
+    *      512-bit URB entries.
+    *      "2:0" = reserved "000"
+    *
+    *      [0,320]
+    *      [0,192]"
+    *
+    * From the Ivy Bridge PRM, volume 2 part 1, page 171:
+    *
+    *     "(DUAL_INSTANCE and DUAL_OBJECT) The GS must be allocated at least
+    *      two URB handles or behavior is UNDEFINED."
+    *
+    * From the Haswell PRM, volume 2b, page 853:
+    *
+    *     "(GS Number of URB Entries)
+    *      [0,640] DevHSW:GT2
+    *      [0,256] DevHSW:GT1
+    *
+    *      Only if GS is disabled can this field be programmed to 0.  If GS is
+    *      enabled this field shall be programmed to a value greater than 0.
+    *      For GS Dispatch Mode "Single", this field shall be programmed to a
+    *      value greater than or equal to 1. For other GS Dispatch Modes,
+    *      refer to the definition of Dispatch Mode (3DSTATE_GS) for minimum
+    *      values of this field."
+    */
+   const int row_size = 512 / 8;
+   int row_count, entry_count;
+   int max_entry_count;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   row_count = (info->gs_entry_size + row_size - 1) / row_size;
+   if (!row_count)
+      row_count++;
+
+   entry_count = conf->gs_urb_alloc_8kb * 8192 / (row_size * row_count);
+   if (row_count < 9)
+      entry_count &= ~7;
+
+   switch (ilo_dev_gen(dev)) {
+   case ILO_GEN(8):
+   case ILO_GEN(7.5):
+      max_entry_count = (dev->gt >= 2) ? 640 : 256;
+      break;
+   case ILO_GEN(7):
+      max_entry_count = (dev->gt == 2) ? 320 : 192;
+      break;
+   default:
+      assert(!"unexpected gen");
+      return false;
+      break;
+   }
+
+   if (entry_count > max_entry_count)
+      entry_count = max_entry_count;
+   else if (info->gs_entry_size && entry_count < 2)
+      return false;
+
+   conf->gs_entry_rows = row_count;
+   conf->gs_entry_count = entry_count;
+
+   return true;
+}
+
+static bool
+urb_get_gen6_configuration(const struct ilo_dev *dev,
+                           const struct ilo_state_urb_info *info,
+                           struct urb_configuration *conf)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   memset(conf, 0, sizeof(*conf));
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7))
+      urb_alloc_gen7_pcb(dev, info, conf);
+
+   urb_alloc_gen6_urb(dev, info, conf);
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      if (!urb_init_gen7_vs_entry(dev, info, conf) ||
+          !urb_init_gen7_hs_entry(dev, info, conf) ||
+          !urb_init_gen7_ds_entry(dev, info, conf) ||
+          !urb_init_gen7_gs_entry(dev, info, conf))
+         return false;
+   } else {
+      if (!urb_init_gen6_vs_entry(dev, info, conf) ||
+          !urb_init_gen6_gs_entry(dev, info, conf))
+         return false;
+   }
+
+   return true;
+}
+
+static bool
+urb_set_gen7_3dstate_push_constant_alloc(struct ilo_state_urb *urb,
+                                         const struct ilo_dev *dev,
+                                         const struct ilo_state_urb_info *info,
+                                         const struct urb_configuration *conf)
+{
+   uint32_t dw1[5];
+   uint8_t sizes_kb[5], offset_kb;
+   int i;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   sizes_kb[0] = conf->vs_pcb_alloc_kb;
+   sizes_kb[1] = conf->hs_pcb_alloc_kb;
+   sizes_kb[2] = conf->ds_pcb_alloc_kb;
+   sizes_kb[3] = conf->gs_pcb_alloc_kb;
+   sizes_kb[4] = conf->ps_pcb_alloc_kb;
+   offset_kb = 0;
+
+   for (i = 0; i < 5; i++) {
+      /* careful for the valid range of offsets */
+      if (sizes_kb[i]) {
+         dw1[i] = offset_kb << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT |
+                  sizes_kb[i] << GEN7_PCB_ALLOC_DW1_SIZE__SHIFT;
+         offset_kb += sizes_kb[i];
+      } else {
+         dw1[i] = 0;
+      }
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(urb->pcb) >= 5);
+   memcpy(urb->pcb, dw1, sizeof(dw1));
+
+   return true;
+}
+
+static bool
+urb_set_gen6_3DSTATE_URB(struct ilo_state_urb *urb,
+                         const struct ilo_dev *dev,
+                         const struct ilo_state_urb_info *info,
+                         const struct urb_configuration *conf)
+{
+   uint32_t dw1, dw2;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   assert(conf->vs_entry_rows && conf->gs_entry_rows);
+
+   dw1 = (conf->vs_entry_rows - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
+         conf->vs_entry_count << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
+   dw2 = conf->gs_entry_count << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
+         (conf->gs_entry_rows - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(urb->urb) >= 2);
+   urb->urb[0] = dw1;
+   urb->urb[1] = dw2;
+
+   return true;
+}
+
+static bool
+urb_set_gen7_3dstate_urb(struct ilo_state_urb *urb,
+                         const struct ilo_dev *dev,
+                         const struct ilo_state_urb_info *info,
+                         const struct urb_configuration *conf)
+{
+   uint32_t dw1[4];
+   struct {
+      uint8_t alloc_8kb;
+      uint8_t entry_rows;
+      int entry_count;
+   } stages[4];
+   uint8_t offset_8kb;
+   int i;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   stages[0].alloc_8kb = conf->vs_urb_alloc_8kb;
+   stages[1].alloc_8kb = conf->hs_urb_alloc_8kb;
+   stages[2].alloc_8kb = conf->ds_urb_alloc_8kb;
+   stages[3].alloc_8kb = conf->gs_urb_alloc_8kb;
+
+   stages[0].entry_rows = conf->vs_entry_rows;
+   stages[1].entry_rows = conf->hs_entry_rows;
+   stages[2].entry_rows = conf->ds_entry_rows;
+   stages[3].entry_rows = conf->gs_entry_rows;
+
+   stages[0].entry_count = conf->vs_entry_count;
+   stages[1].entry_count = conf->hs_entry_count;
+   stages[2].entry_count = conf->ds_entry_count;
+   stages[3].entry_count = conf->gs_entry_count;
+
+   offset_8kb = conf->urb_offset_8kb;
+
+   for (i = 0; i < 4; i++) {
+      /* careful for the valid range of offsets */
+      if (stages[i].alloc_8kb) {
+         assert(stages[i].entry_rows);
+         dw1[i] =
+            offset_8kb << GEN7_URB_DW1_OFFSET__SHIFT |
+            (stages[i].entry_rows - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT |
+            stages[i].entry_count << GEN7_URB_DW1_ENTRY_COUNT__SHIFT;
+         offset_8kb += stages[i].alloc_8kb;
+      } else {
+         dw1[i] = 0;
+      }
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(urb->urb) >= 4);
+   memcpy(urb->urb, dw1, sizeof(dw1));
+
+   return true;
+}
+
+bool
+ilo_state_urb_init(struct ilo_state_urb *urb,
+                   const struct ilo_dev *dev,
+                   const struct ilo_state_urb_info *info)
+{
+   assert(ilo_is_zeroed(urb, sizeof(*urb)));
+   return ilo_state_urb_set_info(urb, dev, info);
+}
+
+bool
+ilo_state_urb_init_for_rectlist(struct ilo_state_urb *urb,
+                                const struct ilo_dev *dev,
+                                uint8_t vf_attr_count)
+{
+   struct ilo_state_urb_info info;
+
+   memset(&info, 0, sizeof(info));
+   info.ve_entry_size = sizeof(uint32_t) * 4 * vf_attr_count;
+
+   return ilo_state_urb_init(urb, dev, &info);
+}
+
+bool
+ilo_state_urb_set_info(struct ilo_state_urb *urb,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_urb_info *info)
+{
+   struct urb_configuration conf;
+   bool ret = true;
+
+   ret &= urb_get_gen6_configuration(dev, info, &conf);
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      ret &= urb_set_gen7_3dstate_push_constant_alloc(urb, dev, info, &conf);
+      ret &= urb_set_gen7_3dstate_urb(urb, dev, info, &conf);
+   } else {
+      ret &= urb_set_gen6_3DSTATE_URB(urb, dev, info, &conf);
+   }
+
+   assert(ret);
+
+   return ret;
+}
+
+void
+ilo_state_urb_full_delta(const struct ilo_state_urb *urb,
+                         const struct ilo_dev *dev,
+                         struct ilo_state_urb_delta *delta)
+{
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      delta->dirty = ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS |
+                     ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS |
+                     ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS |
+                     ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS |
+                     ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS |
+                     ILO_STATE_URB_3DSTATE_URB_VS |
+                     ILO_STATE_URB_3DSTATE_URB_HS |
+                     ILO_STATE_URB_3DSTATE_URB_DS |
+                     ILO_STATE_URB_3DSTATE_URB_GS;
+   } else {
+      delta->dirty = ILO_STATE_URB_3DSTATE_URB_VS |
+                     ILO_STATE_URB_3DSTATE_URB_GS;
+   }
+}
+
+void
+ilo_state_urb_get_delta(const struct ilo_state_urb *urb,
+                        const struct ilo_dev *dev,
+                        const struct ilo_state_urb *old,
+                        struct ilo_state_urb_delta *delta)
+{
+   delta->dirty = 0;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      if (memcmp(urb->pcb, old->pcb, sizeof(urb->pcb))) {
+         delta->dirty |= ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS |
+                         ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS |
+                         ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS |
+                         ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS |
+                         ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS;
+      }
+
+      /*
+       * From the Ivy Bridge PRM, volume 2 part 1, page 34:
+       *
+       *     "3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
+       *      programmed in order for the programming of this state
+       *      (3DSTATE_URB_VS) to be valid."
+       *
+       * The same is true for the other three states.
+       */
+      if (memcmp(urb->urb, old->urb, sizeof(urb->urb))) {
+         delta->dirty |= ILO_STATE_URB_3DSTATE_URB_VS |
+                         ILO_STATE_URB_3DSTATE_URB_HS |
+                         ILO_STATE_URB_3DSTATE_URB_DS |
+                         ILO_STATE_URB_3DSTATE_URB_GS;
+      }
+   } else {
+      if (memcmp(urb->urb, old->urb, sizeof(uint32_t) * 2)) {
+         delta->dirty |= ILO_STATE_URB_3DSTATE_URB_VS |
+                         ILO_STATE_URB_3DSTATE_URB_GS;
+      }
+   }
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_urb.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_urb.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_urb.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_urb.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,103 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_URB_H
+#define ILO_STATE_URB_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+enum ilo_state_urb_dirty_bits {
+   ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS = (1 << 0),
+   ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS = (1 << 1),
+   ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS = (1 << 2),
+   ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS = (1 << 3),
+   ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS = (1 << 4),
+   ILO_STATE_URB_3DSTATE_URB_VS                 = (1 << 5),
+   ILO_STATE_URB_3DSTATE_URB_HS                 = (1 << 6),
+   ILO_STATE_URB_3DSTATE_URB_DS                 = (1 << 7),
+   ILO_STATE_URB_3DSTATE_URB_GS                 = (1 << 8),
+};
+
+/**
+ * URB entry allocation sizes and sizes of constant data extracted from PCBs
+ * to threads.
+ */
+struct ilo_state_urb_info {
+   bool gs_enable;
+
+   bool vs_const_data;
+   bool hs_const_data;
+   bool ds_const_data;
+   bool gs_const_data;
+   bool ps_const_data;
+
+   uint16_t ve_entry_size;
+   uint16_t vs_entry_size;
+   uint16_t hs_entry_size;
+   uint16_t ds_entry_size;
+   uint16_t gs_entry_size;
+};
+
+struct ilo_state_urb {
+   uint32_t pcb[5];
+   uint32_t urb[4];
+};
+
+struct ilo_state_urb_delta {
+   uint32_t dirty;
+};
+
+bool
+ilo_state_urb_init(struct ilo_state_urb *urb,
+                   const struct ilo_dev *dev,
+                   const struct ilo_state_urb_info *info);
+
+bool
+ilo_state_urb_init_for_rectlist(struct ilo_state_urb *urb,
+                                const struct ilo_dev *dev,
+                                uint8_t vf_attr_count);
+
+bool
+ilo_state_urb_set_info(struct ilo_state_urb *urb,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_urb_info *info);
+
+void
+ilo_state_urb_full_delta(const struct ilo_state_urb *urb,
+                         const struct ilo_dev *dev,
+                         struct ilo_state_urb_delta *delta);
+
+void
+ilo_state_urb_get_delta(const struct ilo_state_urb *urb,
+                        const struct ilo_dev *dev,
+                        const struct ilo_state_urb *old,
+                        struct ilo_state_urb_delta *delta);
+
+#endif /* ILO_STATE_URB_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_vf.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_vf.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_vf.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_vf.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,1000 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_vma.h"
+#include "ilo_state_vf.h"
+
+static bool
+vf_validate_gen6_elements(const struct ilo_dev *dev,
+                          const struct ilo_state_vf_info *info)
+{
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 95:
+    *
+    *     "(Source Element Offset (in bytes))
+    *      Format: U11
+    *      Range [0,2047"
+    *
+    * From the Haswell PRM, volume 2d, page 415:
+    *
+    *     "(Source Element Offset)
+    *      Format: U12 byte offset
+    *      ...
+    *      [0,4095]"
+    *
+    * From the Broadwell PRM, volume 2d, page 469:
+    *
+    *     "(Source Element Offset)
+    *      Format: U12 byte offset
+    *      ...
+    *      [0,2047]"
+    */
+   const uint16_t max_vertex_offset =
+      (ilo_dev_gen(dev) == ILO_GEN(7.5)) ? 4096 : 2048;
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(info->element_count <= ILO_STATE_VF_MAX_ELEMENT_COUNT);
+
+   for (i = 0; i < info->element_count; i++) {
+      const struct ilo_state_vf_element_info *elem = &info->elements[i];
+
+      assert(elem->buffer < ILO_STATE_VF_MAX_BUFFER_COUNT);
+      assert(elem->vertex_offset < max_vertex_offset);
+      assert(ilo_state_vf_valid_element_format(dev, elem->format));
+   }
+
+   return true;
+}
+
+static uint32_t
+get_gen6_component_controls(const struct ilo_dev *dev,
+                            enum gen_vf_component comp_x,
+                            enum gen_vf_component comp_y,
+                            enum gen_vf_component comp_z,
+                            enum gen_vf_component comp_w)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   return comp_x << GEN6_VE_DW1_COMP0__SHIFT |
+          comp_y << GEN6_VE_DW1_COMP1__SHIFT |
+          comp_z << GEN6_VE_DW1_COMP2__SHIFT |
+          comp_w << GEN6_VE_DW1_COMP3__SHIFT;
+}
+
+static bool
+get_gen6_edge_flag_format(const struct ilo_dev *dev,
+                          const struct ilo_state_vf_element_info *elem,
+                          enum gen_surface_format *format)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+    *
+    *     "The Source Element Format must be set to the UINT format."
+    *
+    * From the Haswell PRM, volume 2d, page 413:
+    *
+    *     "The SourceElementFormat needs to be a single-component format with
+    *      an element which has edge flag enabled."
+    */
+   if (elem->component_count != 1)
+      return false;
+
+   /* pick the format we like */
+   switch (elem->format_size) {
+   case 1:
+      *format = GEN6_FORMAT_R8_UINT;
+      break;
+   case 2:
+      *format = GEN6_FORMAT_R16_UINT;
+      break;
+   case 4:
+      *format = GEN6_FORMAT_R32_UINT;
+      break;
+   default:
+      return false;
+      break;
+   }
+
+   return true;
+}
+
+static bool
+vf_set_gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_state_vf *vf,
+                                    const struct ilo_dev *dev,
+                                    const struct ilo_state_vf_info *info)
+{
+   enum gen_surface_format edge_flag_format;
+   uint32_t dw0, dw1;
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!vf_validate_gen6_elements(dev, info))
+      return false;
+
+   for (i = 0; i < info->element_count; i++) {
+      const struct ilo_state_vf_element_info *elem = &info->elements[i];
+      enum gen_vf_component components[4] = {
+         GEN6_VFCOMP_STORE_0,
+         GEN6_VFCOMP_STORE_0,
+         GEN6_VFCOMP_STORE_0,
+         (elem->is_integer) ? GEN6_VFCOMP_STORE_1_INT :
+                              GEN6_VFCOMP_STORE_1_FP,
+      };
+
+      switch (elem->component_count) {
+      case 4: components[3] = GEN6_VFCOMP_STORE_SRC; /* fall through */
+      case 3: components[2] = GEN6_VFCOMP_STORE_SRC; /* fall through */
+      case 2: components[1] = GEN6_VFCOMP_STORE_SRC; /* fall through */
+      case 1: components[0] = GEN6_VFCOMP_STORE_SRC; break;
+      default:
+              assert(!"unexpected component count");
+              break;
+      }
+
+      dw0 = elem->buffer << GEN6_VE_DW0_VB_INDEX__SHIFT |
+            GEN6_VE_DW0_VALID |
+            elem->format << GEN6_VE_DW0_FORMAT__SHIFT |
+            elem->vertex_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT;
+      dw1 = get_gen6_component_controls(dev,
+            components[0], components[1],
+            components[2], components[3]);
+
+      STATIC_ASSERT(ARRAY_SIZE(vf->user_ve[i]) >= 2);
+      vf->user_ve[i][0] = dw0;
+      vf->user_ve[i][1] = dw1;
+   }
+
+   vf->user_ve_count = i;
+
+   vf->edge_flag_supported = (i && get_gen6_edge_flag_format(dev,
+         &info->elements[i - 1], &edge_flag_format));
+   if (vf->edge_flag_supported) {
+      const struct ilo_state_vf_element_info *elem = &info->elements[i - 1];
+
+      /* without edge flag enable */
+      vf->last_user_ve[0][0] = dw0;
+      vf->last_user_ve[0][1] = dw1;
+
+      /*
+       * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+       *
+       *     "This bit (Edge Flag Enable) must only be ENABLED on the last
+       *      valid VERTEX_ELEMENT structure.
+       *
+       *      When set, Component 0 Control must be set to
+       *      VFCOMP_STORE_SRC, and Component 1-3 Control must be set to
+       *      VFCOMP_NOSTORE."
+       */
+      dw0 = elem->buffer << GEN6_VE_DW0_VB_INDEX__SHIFT |
+            GEN6_VE_DW0_VALID |
+            edge_flag_format << GEN6_VE_DW0_FORMAT__SHIFT |
+            GEN6_VE_DW0_EDGE_FLAG_ENABLE |
+            elem->vertex_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT;
+      dw1 = get_gen6_component_controls(dev, GEN6_VFCOMP_STORE_SRC,
+            GEN6_VFCOMP_NOSTORE, GEN6_VFCOMP_NOSTORE, GEN6_VFCOMP_NOSTORE);
+
+      /* with edge flag enable */
+      vf->last_user_ve[1][0] = dw0;
+      vf->last_user_ve[1][1] = dw1;
+   }
+
+   return true;
+}
+
+static bool
+vf_set_gen6_vertex_buffer_state(struct ilo_state_vf *vf,
+                                const struct ilo_dev *dev,
+                                const struct ilo_state_vf_info *info)
+{
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 7.5);
+
+   memset(vf->vb_to_first_elem, -1, sizeof(vf->vb_to_first_elem));
+
+   for (i = 0; i < info->element_count; i++) {
+      const struct ilo_state_vf_element_info *elem = &info->elements[i];
+
+      STATIC_ASSERT(ARRAY_SIZE(vf->user_instancing[i]) >= 2);
+      /* instancing enable only */
+      vf->user_instancing[i][0] = (elem->instancing_enable) ?
+         GEN6_VB_DW0_ACCESS_INSTANCEDATA :
+         GEN6_VB_DW0_ACCESS_VERTEXDATA;
+      vf->user_instancing[i][1] = elem->instancing_step_rate;
+
+      /*
+       * Instancing is per VB, not per VE, before Gen8.  Set up a VB-to-VE
+       * mapping as well.
+       */
+      if (vf->vb_to_first_elem[elem->buffer] < 0) {
+         vf->vb_to_first_elem[elem->buffer] = i;
+      } else {
+         const struct ilo_state_vf_element_info *first =
+            &info->elements[vf->vb_to_first_elem[elem->buffer]];
+
+         assert(elem->instancing_enable == first->instancing_enable &&
+                elem->instancing_step_rate == first->instancing_step_rate);
+      }
+   }
+
+   return true;
+}
+
+static bool
+vf_set_gen8_3DSTATE_VF_INSTANCING(struct ilo_state_vf *vf,
+                                  const struct ilo_dev *dev,
+                                  const struct ilo_state_vf_info *info)
+{
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   for (i = 0; i < info->element_count; i++) {
+      const struct ilo_state_vf_element_info *elem = &info->elements[i];
+
+      STATIC_ASSERT(ARRAY_SIZE(vf->user_instancing[i]) >= 2);
+      vf->user_instancing[i][0] = (elem->instancing_enable) ?
+         GEN8_INSTANCING_DW1_ENABLE : 0;
+      vf->user_instancing[i][1] = elem->instancing_step_rate;
+   }
+
+   return true;
+}
+
+static uint32_t
+get_gen6_component_zeros(const struct ilo_dev *dev)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   return get_gen6_component_controls(dev,
+         GEN6_VFCOMP_STORE_0,
+         GEN6_VFCOMP_STORE_0,
+         GEN6_VFCOMP_STORE_0,
+         GEN6_VFCOMP_STORE_0);
+}
+
+static uint32_t
+get_gen6_component_ids(const struct ilo_dev *dev,
+                       bool vertexid, bool instanceid)
+{
+   ILO_DEV_ASSERT(dev, 6, 7.5);
+
+   return get_gen6_component_controls(dev,
+      (vertexid) ? GEN6_VFCOMP_STORE_VID : GEN6_VFCOMP_STORE_0,
+      (instanceid) ? GEN6_VFCOMP_STORE_IID : GEN6_VFCOMP_STORE_0,
+      GEN6_VFCOMP_STORE_0,
+      GEN6_VFCOMP_STORE_0);
+}
+
+static bool
+vf_params_set_gen6_internal_ve(struct ilo_state_vf *vf,
+                               const struct ilo_dev *dev,
+                               const struct ilo_state_vf_params_info *params,
+                               uint8_t user_ve_count)
+{
+   const bool prepend_ids =
+      (params->prepend_vertexid || params->prepend_instanceid);
+   uint8_t internal_ve_count = 0, i;
+   uint32_t dw1[2];
+
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 92:
+    *
+    *     "- At least one VERTEX_ELEMENT_STATE structure must be included.
+    *
+    *      - Inclusion of partial VERTEX_ELEMENT_STATE structures is
+    *        UNDEFINED.
+    *
+    *      - SW must ensure that at least one vertex element is defined prior
+    *        to issuing a 3DPRIMTIVE command, or operation is UNDEFINED.
+    *
+    *      - There are no "holes" allowed in the destination vertex: NOSTORE
+    *        components must be overwritten by subsequent components unless
+    *        they are the trailing DWords of the vertex.  Software must
+    *        explicitly chose some value (probably 0) to be written into
+    *        DWords that would otherwise be "holes"."
+    *
+    *      - ...
+    *
+    *      - [DevILK+] Element[0] must be valid."
+    */
+   if (params->prepend_zeros || (!user_ve_count && !prepend_ids))
+      dw1[internal_ve_count++] = get_gen6_component_zeros(dev);
+
+   if (prepend_ids) {
+      if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+         /* placeholder for 3DSTATE_VF_SGVS */
+         dw1[internal_ve_count++] = get_gen6_component_zeros(dev);
+      } else {
+         dw1[internal_ve_count++] = get_gen6_component_ids(dev,
+               params->prepend_vertexid, params->prepend_instanceid);
+      }
+   }
+
+   for (i = 0; i < internal_ve_count; i++) {
+      STATIC_ASSERT(ARRAY_SIZE(vf->internal_ve[i]) >= 2);
+      vf->internal_ve[i][0] = GEN6_VE_DW0_VALID;
+      vf->internal_ve[i][1] = dw1[i];
+   }
+
+   vf->internal_ve_count = internal_ve_count;
+
+   return true;
+}
+
+static bool
+vf_params_set_gen8_3DSTATE_VF_SGVS(struct ilo_state_vf *vf,
+                                   const struct ilo_dev *dev,
+                                   const struct ilo_state_vf_params_info *params)
+{
+   const uint8_t attr = (params->prepend_zeros) ? 1 : 0;
+   uint32_t dw1;
+
+   ILO_DEV_ASSERT(dev, 8, 8);
+
+   dw1 = 0;
+
+   if (params->prepend_instanceid) {
+      dw1 |= GEN8_SGVS_DW1_IID_ENABLE |
+             1 << GEN8_SGVS_DW1_IID_VE_COMP__SHIFT |
+             attr << GEN8_SGVS_DW1_IID_VE_INDEX__SHIFT;
+   }
+
+   if (params->prepend_vertexid) {
+      dw1 |= GEN8_SGVS_DW1_VID_ENABLE |
+             0 << GEN8_SGVS_DW1_VID_VE_COMP__SHIFT |
+             attr << GEN8_SGVS_DW1_VID_VE_INDEX__SHIFT;
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(vf->sgvs) >= 1);
+   vf->sgvs[0] = dw1;
+
+   return true;
+}
+
+static uint32_t
+get_gen6_fixed_cut_index(const struct ilo_dev *dev,
+                         enum gen_index_format format)
+{
+   const uint32_t fixed = ~0u;
+
+   ILO_DEV_ASSERT(dev, 6, 7);
+
+   switch (format) {
+   case GEN6_INDEX_BYTE:   return (uint8_t)  fixed;
+   case GEN6_INDEX_WORD:   return (uint16_t) fixed;
+   case GEN6_INDEX_DWORD:  return (uint32_t) fixed;
+   default:
+      assert(!"unknown index format");
+      return fixed;
+   }
+}
+
+static bool
+get_gen6_cut_index_supported(const struct ilo_dev *dev,
+                             enum gen_3dprim_type topology)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * See the Sandy Bridge PRM, volume 2 part 1, page 80 and the Haswell PRM,
+    * volume 7, page 456.
+    */
+   switch (topology) {
+   case GEN6_3DPRIM_TRIFAN:
+   case GEN6_3DPRIM_QUADLIST:
+   case GEN6_3DPRIM_QUADSTRIP:
+   case GEN6_3DPRIM_POLYGON:
+   case GEN6_3DPRIM_LINELOOP:
+      return (ilo_dev_gen(dev) >= ILO_GEN(7.5));
+   case GEN6_3DPRIM_RECTLIST:
+   case GEN6_3DPRIM_TRIFAN_NOSTIPPLE:
+      return false;
+   default:
+      return true;
+   }
+}
+
+static bool
+vf_params_set_gen6_3dstate_index_buffer(struct ilo_state_vf *vf,
+                                        const struct ilo_dev *dev,
+                                        const struct ilo_state_vf_params_info *params)
+{
+   uint32_t dw0 = 0;
+
+   ILO_DEV_ASSERT(dev, 6, 7);
+
+   /* cut index only, as in 3DSTATE_VF */
+   if (params->cut_index_enable) {
+      assert(get_gen6_cut_index_supported(dev, params->cv_topology));
+      assert(get_gen6_fixed_cut_index(dev, params->cv_index_format) ==
+            params->cut_index);
+
+      dw0 |= GEN6_IB_DW0_CUT_INDEX_ENABLE;
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(vf->cut) >= 1);
+   vf->cut[0] = dw0;
+
+   return true;
+}
+
+static bool
+vf_params_set_gen75_3DSTATE_VF(struct ilo_state_vf *vf,
+                               const struct ilo_dev *dev,
+                               const struct ilo_state_vf_params_info *params)
+{
+   uint32_t dw0 = 0;
+
+   ILO_DEV_ASSERT(dev, 7.5, 8);
+
+   if (params->cut_index_enable) {
+      assert(get_gen6_cut_index_supported(dev, params->cv_topology));
+      dw0 |= GEN75_VF_DW0_CUT_INDEX_ENABLE;
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(vf->cut) >= 2);
+   vf->cut[0] = dw0;
+   vf->cut[1] = params->cut_index;
+
+   return true;
+}
+
+static bool
+vertex_buffer_validate_gen6(const struct ilo_dev *dev,
+                            const struct ilo_state_vertex_buffer_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (info->vma)
+      assert(info->size && info->offset + info->size <= info->vma->vm_size);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 86:
+    *
+    *     "(Buffer Pitch)
+    *      Range  [DevCTG+]: [0,2048] Bytes"
+    */
+   assert(info->stride <= 2048);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 86:
+    *
+    *     "64-bit floating point values must be 64-bit aligned in memory, or
+    *      UNPREDICTABLE data will be fetched. When accessing an element
+    *      containing 64-bit floating point values, the Buffer Starting
+    *      Address and Source Element Offset values must add to a 64-bit
+    *      aligned address, and BufferPitch must be a multiple of 64-bits."
+    */
+   if (info->cv_has_double) {
+      if (info->vma)
+         assert(info->vma->vm_alignment % 8 == 0);
+
+      assert(info->stride % 8 == 0);
+      assert((info->offset + info->cv_double_vertex_offset_mod_8) % 8 == 0);
+   }
+
+   return true;
+}
+
+static uint32_t
+vertex_buffer_get_gen6_size(const struct ilo_dev *dev,
+                            const struct ilo_state_vertex_buffer_info *info)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+   return (info->vma) ? info->size : 0;
+}
+
+static bool
+vertex_buffer_set_gen8_vertex_buffer_state(struct ilo_state_vertex_buffer *vb,
+                                           const struct ilo_dev *dev,
+                                           const struct ilo_state_vertex_buffer_info *info)
+{
+   const uint32_t size = vertex_buffer_get_gen6_size(dev, info);
+   uint32_t dw0;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!vertex_buffer_validate_gen6(dev, info))
+      return false;
+
+   dw0 = info->stride << GEN6_VB_DW0_PITCH__SHIFT;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7))
+      dw0 |= GEN7_VB_DW0_ADDR_MODIFIED;
+   if (!info->vma)
+      dw0 |= GEN6_VB_DW0_IS_NULL;
+
+   STATIC_ASSERT(ARRAY_SIZE(vb->vb) >= 3);
+   vb->vb[0] = dw0;
+   vb->vb[1] = info->offset;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      vb->vb[2] = size;
+   } else {
+      /* address of the last valid byte */
+      vb->vb[2] = (size) ? info->offset + size - 1 : 0;
+   }
+
+   vb->vma = info->vma;
+
+   return true;
+}
+
+static uint32_t
+get_index_format_size(enum gen_index_format format)
+{
+   switch (format) {
+   case GEN6_INDEX_BYTE:   return 1;
+   case GEN6_INDEX_WORD:   return 2;
+   case GEN6_INDEX_DWORD:  return 4;
+   default:
+      assert(!"unknown index format");
+      return 1;
+   }
+}
+
+static bool
+index_buffer_validate_gen6(const struct ilo_dev *dev,
+                           const struct ilo_state_index_buffer_info *info)
+{
+   const uint32_t format_size = get_index_format_size(info->format);
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 79:
+    *
+    *     "This field (Buffer Starting Address) contains the size-aligned (as
+    *      specified by Index Format) Graphics Address of the first element of
+    *      interest within the index buffer."
+    */
+   assert(info->offset % format_size == 0);
+
+   if (info->vma) {
+      assert(info->vma->vm_alignment % format_size == 0);
+      assert(info->size && info->offset + info->size <= info->vma->vm_size);
+   }
+
+   return true;
+}
+
+static uint32_t
+index_buffer_get_gen6_size(const struct ilo_dev *dev,
+                           const struct ilo_state_index_buffer_info *info)
+{
+   uint32_t size;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!info->vma)
+      return 0;
+
+   size = info->size;
+   if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+      const uint32_t format_size = get_index_format_size(info->format);
+      size -= (size % format_size);
+   }
+
+   return size;
+}
+
+static bool
+index_buffer_set_gen8_3DSTATE_INDEX_BUFFER(struct ilo_state_index_buffer *ib,
+                                           const struct ilo_dev *dev,
+                                           const struct ilo_state_index_buffer_info *info)
+{
+   const uint32_t size = index_buffer_get_gen6_size(dev, info);
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (!index_buffer_validate_gen6(dev, info))
+      return false;
+
+   STATIC_ASSERT(ARRAY_SIZE(ib->ib) >= 3);
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      ib->ib[0] = info->format << GEN8_IB_DW1_FORMAT__SHIFT;
+      ib->ib[1] = info->offset;
+      ib->ib[2] = size;
+   } else {
+      ib->ib[0] = info->format << GEN6_IB_DW0_FORMAT__SHIFT;
+      ib->ib[1] = info->offset;
+      /* address of the last valid byte, or 0 */
+      ib->ib[2] = (size) ? info->offset + size - 1 : 0;
+   }
+
+   ib->vma = info->vma;
+
+   return true;
+}
+
+bool
+ilo_state_vf_valid_element_format(const struct ilo_dev *dev,
+                                  enum gen_surface_format format)
+{
+   /*
+    * This table is based on:
+    *
+    *  - the Sandy Bridge PRM, volume 4 part 1, page 88-97
+    *  - the Ivy Bridge PRM, volume 2 part 1, page 97-99
+    *  - the Haswell PRM, volume 7, page 467-470
+    */
+   static const int vf_element_formats[] = {
+      [GEN6_FORMAT_R32G32B32A32_FLOAT]       = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32A32_SINT]        = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32A32_UINT]        = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32A32_UNORM]       = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32A32_SNORM]       = ILO_GEN(  1),
+      [GEN6_FORMAT_R64G64_FLOAT]             = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32A32_SSCALED]     = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32A32_USCALED]     = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32A32_SFIXED]      = ILO_GEN(7.5),
+      [GEN6_FORMAT_R32G32B32_FLOAT]          = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32_SINT]           = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32_UINT]           = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32_UNORM]          = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32_SNORM]          = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32_SSCALED]        = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32_USCALED]        = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32B32_SFIXED]         = ILO_GEN(7.5),
+      [GEN6_FORMAT_R16G16B16A16_UNORM]       = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16B16A16_SNORM]       = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16B16A16_SINT]        = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16B16A16_UINT]        = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16B16A16_FLOAT]       = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32_FLOAT]             = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32_SINT]              = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32_UINT]              = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32_UNORM]             = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32_SNORM]             = ILO_GEN(  1),
+      [GEN6_FORMAT_R64_FLOAT]                = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16B16A16_SSCALED]     = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16B16A16_USCALED]     = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32_SSCALED]           = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32_USCALED]           = ILO_GEN(  1),
+      [GEN6_FORMAT_R32G32_SFIXED]            = ILO_GEN(7.5),
+      [GEN6_FORMAT_B8G8R8A8_UNORM]           = ILO_GEN(  1),
+      [GEN6_FORMAT_R10G10B10A2_UNORM]        = ILO_GEN(  1),
+      [GEN6_FORMAT_R10G10B10A2_UINT]         = ILO_GEN(  1),
+      [GEN6_FORMAT_R10G10B10_SNORM_A2_UNORM] = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8B8A8_UNORM]           = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8B8A8_SNORM]           = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8B8A8_SINT]            = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8B8A8_UINT]            = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16_UNORM]             = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16_SNORM]             = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16_SINT]              = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16_UINT]              = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16_FLOAT]             = ILO_GEN(  1),
+      [GEN6_FORMAT_B10G10R10A2_UNORM]        = ILO_GEN(7.5),
+      [GEN6_FORMAT_R11G11B10_FLOAT]          = ILO_GEN(  1),
+      [GEN6_FORMAT_R32_SINT]                 = ILO_GEN(  1),
+      [GEN6_FORMAT_R32_UINT]                 = ILO_GEN(  1),
+      [GEN6_FORMAT_R32_FLOAT]                = ILO_GEN(  1),
+      [GEN6_FORMAT_R32_UNORM]                = ILO_GEN(  1),
+      [GEN6_FORMAT_R32_SNORM]                = ILO_GEN(  1),
+      [GEN6_FORMAT_R10G10B10X2_USCALED]      = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8B8A8_SSCALED]         = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8B8A8_USCALED]         = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16_SSCALED]           = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16_USCALED]           = ILO_GEN(  1),
+      [GEN6_FORMAT_R32_SSCALED]              = ILO_GEN(  1),
+      [GEN6_FORMAT_R32_USCALED]              = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8_UNORM]               = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8_SNORM]               = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8_SINT]                = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8_UINT]                = ILO_GEN(  1),
+      [GEN6_FORMAT_R16_UNORM]                = ILO_GEN(  1),
+      [GEN6_FORMAT_R16_SNORM]                = ILO_GEN(  1),
+      [GEN6_FORMAT_R16_SINT]                 = ILO_GEN(  1),
+      [GEN6_FORMAT_R16_UINT]                 = ILO_GEN(  1),
+      [GEN6_FORMAT_R16_FLOAT]                = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8_SSCALED]             = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8_USCALED]             = ILO_GEN(  1),
+      [GEN6_FORMAT_R16_SSCALED]              = ILO_GEN(  1),
+      [GEN6_FORMAT_R16_USCALED]              = ILO_GEN(  1),
+      [GEN6_FORMAT_R8_UNORM]                 = ILO_GEN(  1),
+      [GEN6_FORMAT_R8_SNORM]                 = ILO_GEN(  1),
+      [GEN6_FORMAT_R8_SINT]                  = ILO_GEN(  1),
+      [GEN6_FORMAT_R8_UINT]                  = ILO_GEN(  1),
+      [GEN6_FORMAT_R8_SSCALED]               = ILO_GEN(  1),
+      [GEN6_FORMAT_R8_USCALED]               = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8B8_UNORM]             = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8B8_SNORM]             = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8B8_SSCALED]           = ILO_GEN(  1),
+      [GEN6_FORMAT_R8G8B8_USCALED]           = ILO_GEN(  1),
+      [GEN6_FORMAT_R64G64B64A64_FLOAT]       = ILO_GEN(  1),
+      [GEN6_FORMAT_R64G64B64_FLOAT]          = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16B16_FLOAT]          = ILO_GEN(  6),
+      [GEN6_FORMAT_R16G16B16_UNORM]          = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16B16_SNORM]          = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16B16_SSCALED]        = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16B16_USCALED]        = ILO_GEN(  1),
+      [GEN6_FORMAT_R16G16B16_UINT]           = ILO_GEN(7.5),
+      [GEN6_FORMAT_R16G16B16_SINT]           = ILO_GEN(7.5),
+      [GEN6_FORMAT_R32_SFIXED]               = ILO_GEN(7.5),
+      [GEN6_FORMAT_R10G10B10A2_SNORM]        = ILO_GEN(7.5),
+      [GEN6_FORMAT_R10G10B10A2_USCALED]      = ILO_GEN(7.5),
+      [GEN6_FORMAT_R10G10B10A2_SSCALED]      = ILO_GEN(7.5),
+      [GEN6_FORMAT_R10G10B10A2_SINT]         = ILO_GEN(7.5),
+      [GEN6_FORMAT_B10G10R10A2_SNORM]        = ILO_GEN(7.5),
+      [GEN6_FORMAT_B10G10R10A2_USCALED]      = ILO_GEN(7.5),
+      [GEN6_FORMAT_B10G10R10A2_SSCALED]      = ILO_GEN(7.5),
+      [GEN6_FORMAT_B10G10R10A2_UINT]         = ILO_GEN(7.5),
+      [GEN6_FORMAT_B10G10R10A2_SINT]         = ILO_GEN(7.5),
+      [GEN6_FORMAT_R8G8B8_UINT]              = ILO_GEN(7.5),
+      [GEN6_FORMAT_R8G8B8_SINT]              = ILO_GEN(7.5),
+   };
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   return (format < ARRAY_SIZE(vf_element_formats) &&
+           vf_element_formats[format] &&
+           ilo_dev_gen(dev) >= vf_element_formats[format]);
+}
+
+bool
+ilo_state_vf_init(struct ilo_state_vf *vf,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_vf_info *info)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(vf, sizeof(*vf)));
+   assert(ilo_is_zeroed(info->data, info->data_size));
+
+   assert(ilo_state_vf_data_size(dev, info->element_count) <=
+         info->data_size);
+   vf->user_ve = (uint32_t (*)[2]) info->data;
+   vf->user_instancing =
+      (uint32_t (*)[2]) (vf->user_ve + info->element_count);
+
+   ret &= vf_set_gen6_3DSTATE_VERTEX_ELEMENTS(vf, dev, info);
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8))
+      ret &= vf_set_gen8_3DSTATE_VF_INSTANCING(vf, dev, info);
+   else
+      ret &= vf_set_gen6_vertex_buffer_state(vf, dev, info);
+
+   ret &= ilo_state_vf_set_params(vf, dev, &info->params);
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_vf_init_for_rectlist(struct ilo_state_vf *vf,
+                               const struct ilo_dev *dev,
+                               void *data, size_t data_size,
+                               const struct ilo_state_vf_element_info *elements,
+                               uint8_t element_count)
+{
+   struct ilo_state_vf_info info;
+
+   memset(&info, 0, sizeof(info));
+
+   info.data = data;
+   info.data_size = data_size;
+
+   info.elements = elements;
+   info.element_count = element_count;
+
+   /*
+    * For VUE header,
+    *
+    *   DW0: Reserved: MBZ
+    *   DW1: Render Target Array Index
+    *   DW2: Viewport Index
+    *   DW3: Point Width
+    */
+   info.params.prepend_zeros = true;
+
+   return ilo_state_vf_init(vf, dev, &info);
+}
+
+bool
+ilo_state_vf_set_params(struct ilo_state_vf *vf,
+                        const struct ilo_dev *dev,
+                        const struct ilo_state_vf_params_info *params)
+{
+   bool ret = true;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   ret &= vf_params_set_gen6_internal_ve(vf, dev, params, vf->user_ve_count);
+   if (ilo_dev_gen(dev) >= ILO_GEN(8))
+      ret &= vf_params_set_gen8_3DSTATE_VF_SGVS(vf, dev, params);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+    *
+    *     "Edge flags are supported for the following primitive topology types
+    *      only, otherwise EdgeFlagEnable must not be ENABLED.
+    *
+    *      - 3DPRIM_TRILIST*
+    *      - 3DPRIM_TRISTRIP*
+    *      - 3DPRIM_TRIFAN*
+    *      - 3DPRIM_POLYGON"
+    *
+    *     "[DevSNB]: Edge Flags are not supported for QUADLIST primitives.
+    *      Software may elect to convert QUADLIST primitives to some set of
+    *      corresponding edge-flag-supported primitive types (e.g., POLYGONs)
+    *      prior to submission to the 3D vf."
+    *
+    * From the Ivy Bridge PRM, volume 2 part 1, page 86:
+    *
+    *     "Edge flags are supported for all primitive topology types."
+    *
+    * Both PRMs are confusing...
+    */
+   if (params->last_element_edge_flag) {
+      assert(vf->edge_flag_supported);
+      if (ilo_dev_gen(dev) == ILO_GEN(6))
+         assert(params->cv_topology != GEN6_3DPRIM_QUADLIST);
+   }
+
+   if (vf->edge_flag_supported) {
+      assert(vf->user_ve_count);
+      memcpy(vf->user_ve[vf->user_ve_count - 1],
+            vf->last_user_ve[params->last_element_edge_flag],
+            sizeof(vf->user_ve[vf->user_ve_count - 1]));
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+      ret &= vf_params_set_gen75_3DSTATE_VF(vf, dev, params);
+   else
+      ret &= vf_params_set_gen6_3dstate_index_buffer(vf, dev, params);
+
+   assert(ret);
+
+   return ret;
+}
+
+void
+ilo_state_vf_full_delta(const struct ilo_state_vf *vf,
+                        const struct ilo_dev *dev,
+                        struct ilo_state_vf_delta *delta)
+{
+   delta->dirty = ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      delta->dirty |= ILO_STATE_VF_3DSTATE_VF_SGVS |
+                      ILO_STATE_VF_3DSTATE_VF_INSTANCING;
+   } else {
+      delta->dirty |= ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS;
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+      delta->dirty |= ILO_STATE_VF_3DSTATE_VF;
+   else
+      delta->dirty |= ILO_STATE_VF_3DSTATE_INDEX_BUFFER;
+}
+
+void
+ilo_state_vf_get_delta(const struct ilo_state_vf *vf,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_vf *old,
+                       struct ilo_state_vf_delta *delta)
+{
+   /* no shallow copying */
+   assert(vf->user_ve != old->user_ve &&
+          vf->user_instancing != old->user_instancing);
+
+   delta->dirty = 0;
+
+   if (vf->internal_ve_count != old->internal_ve_count ||
+       vf->user_ve_count != old->user_ve_count ||
+       memcmp(vf->internal_ve, old->internal_ve,
+          sizeof(vf->internal_ve[0]) * vf->internal_ve_count) ||
+       memcmp(vf->user_ve, old->user_ve,
+          sizeof(vf->user_ve[0]) * vf->user_ve_count))
+      delta->dirty |= ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS;
+
+   if (vf->user_ve_count != old->user_ve_count ||
+       memcmp(vf->user_instancing, old->user_instancing,
+          sizeof(vf->user_instancing[0]) * vf->user_ve_count)) {
+      if (ilo_dev_gen(dev) >= ILO_GEN(8))
+         delta->dirty |= ILO_STATE_VF_3DSTATE_VF_INSTANCING;
+      else
+         delta->dirty |= ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS;
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      if (vf->sgvs[0] != old->sgvs[0])
+         delta->dirty |= ILO_STATE_VF_3DSTATE_VF_SGVS;
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+      if (memcmp(vf->cut, old->cut, sizeof(vf->cut)))
+         delta->dirty |= ILO_STATE_VF_3DSTATE_VF;
+   } else {
+      if (vf->cut[0] != old->cut[0])
+         delta->dirty |= ILO_STATE_VF_3DSTATE_INDEX_BUFFER;
+   }
+}
+
+uint32_t
+ilo_state_vertex_buffer_size(const struct ilo_dev *dev, uint32_t size,
+                             uint32_t *alignment)
+{
+   /* align for doubles without padding */
+   *alignment = 8;
+   return size;
+}
+
+/**
+ * No need to initialize first.
+ */
+bool
+ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb,
+                                 const struct ilo_dev *dev,
+                                 const struct ilo_state_vertex_buffer_info *info)
+{
+   bool ret = true;
+
+   ret &= vertex_buffer_set_gen8_vertex_buffer_state(vb, dev, info);
+
+   assert(ret);
+
+   return ret;
+}
+
+uint32_t
+ilo_state_index_buffer_size(const struct ilo_dev *dev, uint32_t size,
+                            uint32_t *alignment)
+{
+   /* align for the worst case without padding */
+   *alignment = get_index_format_size(GEN6_INDEX_DWORD);
+   return size;
+}
+
+/**
+ * No need to initialize first.
+ */
+bool
+ilo_state_index_buffer_set_info(struct ilo_state_index_buffer *ib,
+                                const struct ilo_dev *dev,
+                                const struct ilo_state_index_buffer_info *info)
+{
+   bool ret = true;
+
+   ret &= index_buffer_set_gen8_3DSTATE_INDEX_BUFFER(ib, dev, info);
+
+   assert(ret);
+
+   return ret;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_vf.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_vf.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_vf.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_vf.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,230 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_VF_H
+#define ILO_STATE_VF_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 93:
+ *
+ *     "Up to 34 (DevSNB+) vertex elements are supported."
+ *
+ *     "Up to 33 VBs are supported"
+ *
+ * Reserve two VEs and one VB for internal use.
+ */
+#define ILO_STATE_VF_MAX_ELEMENT_COUNT (34 - 2)
+#define ILO_STATE_VF_MAX_BUFFER_COUNT (33 - 1)
+
+enum ilo_state_vf_dirty_bits {
+   ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS            = (1 << 0),
+   ILO_STATE_VF_3DSTATE_VF_SGVS                    = (1 << 1),
+   ILO_STATE_VF_3DSTATE_VF_INSTANCING              = (1 << 2),
+   ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS             = (1 << 3),
+   ILO_STATE_VF_3DSTATE_VF                         = (1 << 4),
+   ILO_STATE_VF_3DSTATE_INDEX_BUFFER               = (1 << 5),
+};
+
+/**
+ * Fetch a 128-bit vertex attribute.
+ */
+struct ilo_state_vf_element_info {
+   uint8_t buffer;
+   uint16_t vertex_offset;
+   enum gen_surface_format format;
+
+   uint8_t format_size;
+   uint8_t component_count;
+   bool is_integer;
+
+   /* must be the same for those share the same buffer before Gen8 */
+   bool instancing_enable;
+   uint32_t instancing_step_rate;
+};
+
+/**
+ * VF parameters.
+ */
+struct ilo_state_vf_params_info {
+   enum gen_3dprim_type cv_topology;
+
+   /* prepend an attribute of zeros */
+   bool prepend_zeros;
+
+   /* prepend an attribute of VertexID and/or InstanceID */
+   bool prepend_vertexid;
+   bool prepend_instanceid;
+
+   bool last_element_edge_flag;
+
+   enum gen_index_format cv_index_format;
+   bool cut_index_enable;
+   uint32_t cut_index;
+};
+
+/**
+ * Vertex fetch.
+ */
+struct ilo_state_vf_info {
+   void *data;
+   size_t data_size;
+
+   const struct ilo_state_vf_element_info *elements;
+   uint8_t element_count;
+
+   struct ilo_state_vf_params_info params;
+};
+
+struct ilo_state_vf {
+   uint32_t (*user_ve)[2];
+   uint32_t (*user_instancing)[2];
+   int8_t vb_to_first_elem[ILO_STATE_VF_MAX_BUFFER_COUNT];
+   uint8_t user_ve_count;
+
+   bool edge_flag_supported;
+   uint32_t last_user_ve[2][2];
+
+   /* two VEs are reserved for internal use */
+   uint32_t internal_ve[2][2];
+   uint8_t internal_ve_count;
+
+   uint32_t sgvs[1];
+
+   uint32_t cut[2];
+};
+
+struct ilo_state_vf_delta {
+   uint32_t dirty;
+};
+
+struct ilo_vma;
+
+struct ilo_state_vertex_buffer_info {
+   const struct ilo_vma *vma;
+   uint32_t offset;
+   uint32_t size;
+
+   uint16_t stride;
+
+   /* doubles must be at 64-bit aligned addresses */
+   bool cv_has_double;
+   uint8_t cv_double_vertex_offset_mod_8;
+};
+
+struct ilo_state_vertex_buffer {
+   uint32_t vb[3];
+
+   const struct ilo_vma *vma;
+};
+
+struct ilo_state_index_buffer_info {
+   const struct ilo_vma *vma;
+   uint32_t offset;
+   uint32_t size;
+
+   enum gen_index_format format;
+};
+
+struct ilo_state_index_buffer {
+   uint32_t ib[3];
+
+   const struct ilo_vma *vma;
+};
+
+static inline size_t
+ilo_state_vf_data_size(const struct ilo_dev *dev, uint8_t element_count)
+{
+   const struct ilo_state_vf *vf = NULL;
+   return (sizeof(vf->user_ve[0]) +
+           sizeof(vf->user_instancing[0])) * element_count;
+}
+
+bool
+ilo_state_vf_valid_element_format(const struct ilo_dev *dev,
+                                  enum gen_surface_format format);
+
+bool
+ilo_state_vf_init(struct ilo_state_vf *vf,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_vf_info *info);
+
+bool
+ilo_state_vf_init_for_rectlist(struct ilo_state_vf *vf,
+                               const struct ilo_dev *dev,
+                               void *data, size_t data_size,
+                               const struct ilo_state_vf_element_info *elements,
+                               uint8_t element_count);
+
+bool
+ilo_state_vf_set_params(struct ilo_state_vf *vf,
+                        const struct ilo_dev *dev,
+                        const struct ilo_state_vf_params_info *params);
+
+/**
+ * Return the number of attributes in the VUE.
+ */
+static inline uint8_t
+ilo_state_vf_get_attr_count(const struct ilo_state_vf *vf)
+{
+   return vf->internal_ve_count + vf->user_ve_count;
+}
+
+void
+ilo_state_vf_full_delta(const struct ilo_state_vf *vf,
+                        const struct ilo_dev *dev,
+                        struct ilo_state_vf_delta *delta);
+
+void
+ilo_state_vf_get_delta(const struct ilo_state_vf *vf,
+                       const struct ilo_dev *dev,
+                       const struct ilo_state_vf *old,
+                       struct ilo_state_vf_delta *delta);
+
+uint32_t
+ilo_state_vertex_buffer_size(const struct ilo_dev *dev, uint32_t size,
+                             uint32_t *alignment);
+
+bool
+ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb,
+                                 const struct ilo_dev *dev,
+                                 const struct ilo_state_vertex_buffer_info *info);
+
+uint32_t
+ilo_state_index_buffer_size(const struct ilo_dev *dev, uint32_t size,
+                            uint32_t *alignment);
+
+bool
+ilo_state_index_buffer_set_info(struct ilo_state_index_buffer *ib,
+                                const struct ilo_dev *dev,
+                                const struct ilo_state_index_buffer_info *info);
+
+#endif /* ILO_STATE_VF_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_viewport.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_viewport.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_viewport.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_viewport.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,378 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_state_viewport.h"
+
+static void
+viewport_matrix_get_gen6_guardband(const struct ilo_dev *dev,
+                                   const struct ilo_state_viewport_matrix_info *mat,
+                                   float *min_gbx, float *max_gbx,
+                                   float *min_gby, float *max_gby)
+{
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 234:
+    *
+    *     "Per-Device Guardband Extents
+    *
+    *       - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
+    *       - Maximum Post-Clamp Delta (X or Y): 16K"
+    *
+    *     "In addition, in order to be correctly rendered, objects must have a
+    *      screenspace bounding box not exceeding 8K in the X or Y direction.
+    *      This additional restriction must also be comprehended by software,
+    *      i.e., enforced by use of clipping."
+    *
+    * From the Ivy Bridge PRM, volume 2 part 1, page 248:
+    *
+    *     "Per-Device Guardband Extents
+    *
+    *       - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
+    *       - Maximum Post-Clamp Delta (X or Y): N/A"
+    *
+    *     "In addition, in order to be correctly rendered, objects must have a
+    *      screenspace bounding box not exceeding 8K in the X or Y direction.
+    *      This additional restriction must also be comprehended by software,
+    *      i.e., enforced by use of clipping."
+    *
+    * Combined, the bounding box of any object can not exceed 8K in both
+    * width and height.
+    *
+    * Below we set the guardband as a squre of length 8K, centered at where
+    * the viewport is.  This makes sure all objects passing the GB test are
+    * valid to the renderer, and those failing the XY clipping have a
+    * better chance of passing the GB test.
+    */
+   const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384;
+   const int half_len = 8192 / 2;
+   int center_x = (int) mat->translate[0];
+   int center_y = (int) mat->translate[1];
+   float scale_x, scale_y;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /* make sure the guardband is within the valid range */
+   if (center_x - half_len < -max_extent)
+      center_x = -max_extent + half_len;
+   else if (center_x + half_len > max_extent - 1)
+      center_x = max_extent - half_len;
+
+   if (center_y - half_len < -max_extent)
+      center_y = -max_extent + half_len;
+   else if (center_y + half_len > max_extent - 1)
+      center_y = max_extent - half_len;
+
+   scale_x = fabsf(mat->scale[0]);
+   scale_y = fabsf(mat->scale[1]);
+   /*
+    * From the Haswell PRM, volume 2d, page 292-293:
+    *
+    *     "Note: Minimum allowed value for this field (X/Y Min Clip Guardband)
+    *      is -16384."
+    *
+    *     "Note: Maximum allowed value for this field (X/Y Max Clip Guardband)
+    *      is 16383."
+    *
+    * Avoid small scales.
+    */
+   if (scale_x < 1.0f)
+      scale_x = 1.0f;
+   if (scale_y < 1.0f)
+      scale_y = 1.0f;
+
+   /* in NDC space */
+   *min_gbx = ((float) (center_x - half_len) - mat->translate[0]) / scale_x;
+   *max_gbx = ((float) (center_x + half_len) - mat->translate[0]) / scale_x;
+   *min_gby = ((float) (center_y - half_len) - mat->translate[1]) / scale_y;
+   *max_gby = ((float) (center_y + half_len) - mat->translate[1]) / scale_y;
+}
+
+static void
+viewport_matrix_get_extent(const struct ilo_state_viewport_matrix_info *mat,
+                           int axis, float *min, float *max)
+{
+   const float scale_abs = fabsf(mat->scale[axis]);
+
+   *min = -1.0f * scale_abs + mat->translate[axis];
+   *max =  1.0f * scale_abs + mat->translate[axis];
+}
+
+static bool
+viewport_matrix_set_gen7_SF_CLIP_VIEWPORT(struct ilo_state_viewport *vp,
+                                          const struct ilo_dev *dev,
+                                          const struct ilo_state_viewport_matrix_info *matrices,
+                                          uint8_t count)
+{
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   for (i = 0; i < count; i++) {
+      const struct ilo_state_viewport_matrix_info *mat = &matrices[i];
+      float min_gbx, max_gbx, min_gby, max_gby;
+      uint32_t dw[16];
+
+      viewport_matrix_get_gen6_guardband(dev, mat,
+            &min_gbx, &max_gbx, &min_gby, &max_gby);
+
+      dw[0] = fui(mat->scale[0]);
+      dw[1] = fui(mat->scale[1]);
+      dw[2] = fui(mat->scale[2]);
+      dw[3] = fui(mat->translate[0]);
+      dw[4] = fui(mat->translate[1]);
+      dw[5] = fui(mat->translate[2]);
+      dw[6] = 0;
+      dw[7] = 0;
+
+      dw[8] = fui(min_gbx);
+      dw[9] = fui(max_gbx);
+      dw[10] = fui(min_gby);
+      dw[11] = fui(max_gby);
+
+      if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+         float min_x, max_x, min_y, max_y;
+
+         viewport_matrix_get_extent(mat, 0, &min_x, &max_x);
+         viewport_matrix_get_extent(mat, 1, &min_y, &max_y);
+
+         dw[12] = fui(min_x);
+         dw[13] = fui(max_x - 1.0f);
+         dw[14] = fui(min_y);
+         dw[15] = fui(max_y - 1.0f);
+      } else {
+         dw[12] = 0;
+         dw[13] = 0;
+         dw[14] = 0;
+         dw[15] = 0;
+      }
+
+      STATIC_ASSERT(ARRAY_SIZE(vp->sf_clip[i]) >= 16);
+      memcpy(vp->sf_clip[i], dw, sizeof(dw));
+   }
+
+   return true;
+}
+
+static bool
+viewport_matrix_set_gen6_CC_VIEWPORT(struct ilo_state_viewport *vp,
+                                     const struct ilo_dev *dev,
+                                     const struct ilo_state_viewport_matrix_info *matrices,
+                                     uint8_t count)
+{
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   for (i = 0; i < count; i++) {
+      const struct ilo_state_viewport_matrix_info *mat = &matrices[i];
+      float min_z, max_z;
+
+      viewport_matrix_get_extent(mat, 2, &min_z, &max_z);
+
+      STATIC_ASSERT(ARRAY_SIZE(vp->cc[i]) >= 2);
+      vp->cc[i][0] = fui(min_z);
+      vp->cc[i][1] = fui(max_z);
+   }
+
+   return true;
+}
+
+static bool
+viewport_scissor_set_gen6_SCISSOR_RECT(struct ilo_state_viewport *vp,
+                                       const struct ilo_dev *dev,
+                                       const struct ilo_state_viewport_scissor_info *scissors,
+                                       uint8_t count)
+{
+   const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
+   uint8_t i;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   for (i = 0; i < count; i++) {
+      const struct ilo_state_viewport_scissor_info *scissor = &scissors[i];
+      uint16_t min_x, min_y, max_x, max_y;
+      uint32_t dw0, dw1;
+
+      min_x = (scissor->min_x < max_size) ? scissor->min_x : max_size - 1;
+      min_y = (scissor->min_y < max_size) ? scissor->min_y : max_size - 1;
+      max_x = (scissor->max_x < max_size) ? scissor->max_x : max_size - 1;
+      max_y = (scissor->max_y < max_size) ? scissor->max_y : max_size - 1;
+
+      dw0 = min_y << GEN6_SCISSOR_DW0_MIN_Y__SHIFT |
+            min_x << GEN6_SCISSOR_DW0_MIN_X__SHIFT;
+      dw1 = max_y << GEN6_SCISSOR_DW1_MAX_Y__SHIFT |
+            max_x << GEN6_SCISSOR_DW1_MAX_X__SHIFT;
+
+      STATIC_ASSERT(ARRAY_SIZE(vp->scissor[i]) >= 2);
+      vp->scissor[i][0] = dw0;
+      vp->scissor[i][1] = dw1;
+   }
+
+   return true;
+}
+
+bool
+ilo_state_viewport_init(struct ilo_state_viewport *vp,
+                        const struct ilo_dev *dev,
+                        const struct ilo_state_viewport_info *info)
+{
+   const size_t elem_size = ilo_state_viewport_data_size(dev, 1);
+
+   assert(ilo_is_zeroed(vp, sizeof(*vp)));
+   assert(ilo_is_zeroed(info->data, info->data_size));
+
+   vp->data = info->data;
+
+   if (info->data_size / elem_size < ILO_STATE_VIEWPORT_MAX_COUNT)
+      vp->array_size = info->data_size / elem_size;
+   else
+      vp->array_size = ILO_STATE_VIEWPORT_MAX_COUNT;
+
+   return ilo_state_viewport_set_params(vp, dev, &info->params, false);
+}
+
+bool
+ilo_state_viewport_init_data_only(struct ilo_state_viewport *vp,
+                                  const struct ilo_dev *dev,
+                                  void *data, size_t data_size)
+{
+   struct ilo_state_viewport_info info;
+
+   memset(&info, 0, sizeof(info));
+   info.data = data;
+   info.data_size = data_size;
+
+   return ilo_state_viewport_init(vp, dev, &info);
+}
+
+bool
+ilo_state_viewport_init_for_rectlist(struct ilo_state_viewport *vp,
+                                     const struct ilo_dev *dev,
+                                     void *data, size_t data_size)
+{
+   struct ilo_state_viewport_info info;
+   struct ilo_state_viewport_matrix_info mat;
+   struct ilo_state_viewport_scissor_info sci;
+
+   memset(&info, 0, sizeof(info));
+   memset(&mat, 0, sizeof(mat));
+   memset(&sci, 0, sizeof(sci));
+
+   info.data = data;
+   info.data_size = data_size;
+   info.params.matrices = &mat;
+   info.params.scissors = &sci;
+   info.params.count = 1;
+
+   mat.scale[0] = 1.0f;
+   mat.scale[1] = 1.0f;
+   mat.scale[2] = 1.0f;
+
+   return ilo_state_viewport_init(vp, dev, &info);
+}
+
+static void
+viewport_set_count(struct ilo_state_viewport *vp,
+                   const struct ilo_dev *dev,
+                   uint8_t count)
+{
+   assert(count <= vp->array_size);
+
+   vp->count = count;
+   vp->sf_clip = (uint32_t (*)[16]) vp->data;
+   vp->cc =      (uint32_t (*)[ 2]) (vp->sf_clip + count);
+   vp->scissor = (uint32_t (*)[ 2]) (vp->cc + count);
+}
+
+bool
+ilo_state_viewport_set_params(struct ilo_state_viewport *vp,
+                              const struct ilo_dev *dev,
+                              const struct ilo_state_viewport_params_info *params,
+                              bool scissors_only)
+{
+   bool ret = true;
+
+   if (scissors_only) {
+      assert(vp->count == params->count);
+
+      ret &= viewport_scissor_set_gen6_SCISSOR_RECT(vp, dev,
+            params->scissors, params->count);
+   } else {
+      viewport_set_count(vp, dev, params->count);
+
+      ret &= viewport_matrix_set_gen7_SF_CLIP_VIEWPORT(vp, dev,
+            params->matrices, params->count);
+      ret &= viewport_matrix_set_gen6_CC_VIEWPORT(vp, dev,
+            params->matrices, params->count);
+      ret &= viewport_scissor_set_gen6_SCISSOR_RECT(vp, dev,
+            params->scissors, params->count);
+   }
+
+   assert(ret);
+
+   return ret;
+}
+
+void
+ilo_state_viewport_full_delta(const struct ilo_state_viewport *vp,
+                              const struct ilo_dev *dev,
+                              struct ilo_state_viewport_delta *delta)
+{
+   delta->dirty = ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT |
+                  ILO_STATE_VIEWPORT_CC_VIEWPORT |
+                  ILO_STATE_VIEWPORT_SCISSOR_RECT;
+}
+
+void
+ilo_state_viewport_get_delta(const struct ilo_state_viewport *vp,
+                             const struct ilo_dev *dev,
+                             const struct ilo_state_viewport *old,
+                             struct ilo_state_viewport_delta *delta)
+{
+   const size_t sf_clip_size = sizeof(vp->sf_clip[0]) * vp->count;
+   const size_t cc_size = sizeof(vp->cc[0]) * vp->count;
+   const size_t scissor_size = sizeof(vp->scissor[0]) * vp->count;
+
+   /* no shallow copying */
+   assert(vp->data != old->data);
+
+   if (vp->count != old->count) {
+      ilo_state_viewport_full_delta(vp, dev, delta);
+      return;
+   }
+
+   delta->dirty = 0;
+
+   if (memcmp(vp->sf_clip, old->sf_clip, sf_clip_size))
+      delta->dirty |= ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT;
+
+   if (memcmp(vp->cc, old->cc, cc_size))
+      delta->dirty |= ILO_STATE_VIEWPORT_CC_VIEWPORT;
+
+   if (memcmp(vp->scissor, old->scissor, scissor_size))
+      delta->dirty |= ILO_STATE_VIEWPORT_SCISSOR_RECT;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_viewport.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_viewport.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_viewport.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_viewport.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,132 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_VIEWPORT_H
+#define ILO_STATE_VIEWPORT_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 38:
+ *
+ *     "... 16 sets of viewport (VP) state parameters in the Clip unit's
+ *      VertexClipTest function and in the SF unit's ViewportMapping and
+ *      Scissor functions."
+ */
+#define ILO_STATE_VIEWPORT_MAX_COUNT 16
+
+enum ilo_state_viewport_dirty_bits {
+   ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT             = (1 << 0),
+   ILO_STATE_VIEWPORT_CC_VIEWPORT                  = (1 << 1),
+   ILO_STATE_VIEWPORT_SCISSOR_RECT                 = (1 << 2),
+};
+
+struct ilo_state_viewport_matrix_info {
+   float scale[3];
+   float translate[3];
+};
+
+struct ilo_state_viewport_scissor_info {
+   /* all inclusive */
+   uint16_t min_x;
+   uint16_t min_y;
+   uint16_t max_x;
+   uint16_t max_y;
+};
+
+struct ilo_state_viewport_params_info {
+   const struct ilo_state_viewport_matrix_info *matrices;
+   const struct ilo_state_viewport_scissor_info *scissors;
+   uint8_t count;
+};
+
+struct ilo_state_viewport_info {
+   void *data;
+   size_t data_size;
+
+   struct ilo_state_viewport_params_info params;
+};
+
+struct ilo_state_viewport {
+   void *data;
+   uint8_t array_size;
+
+   uint8_t count;
+   uint32_t (*sf_clip)[16];
+   uint32_t (*cc)[2];
+   uint32_t (*scissor)[2];
+};
+
+struct ilo_state_viewport_delta {
+   uint32_t dirty;
+};
+
+static inline size_t
+ilo_state_viewport_data_size(const struct ilo_dev *dev, uint8_t array_size)
+{
+   const struct ilo_state_viewport *vp = NULL;
+   return (sizeof(vp->sf_clip[0]) +
+           sizeof(vp->cc[0]) +
+           sizeof(vp->scissor[0])) * array_size;
+}
+
+bool
+ilo_state_viewport_init(struct ilo_state_viewport *vp,
+                        const struct ilo_dev *dev,
+                        const struct ilo_state_viewport_info *info);
+
+bool
+ilo_state_viewport_init_data_only(struct ilo_state_viewport *vp,
+                                  const struct ilo_dev *dev,
+                                  void *data, size_t data_size);
+
+bool
+ilo_state_viewport_init_for_rectlist(struct ilo_state_viewport *vp,
+                                     const struct ilo_dev *dev,
+                                     void *data, size_t data_size);
+
+bool
+ilo_state_viewport_set_params(struct ilo_state_viewport *vp,
+                              const struct ilo_dev *dev,
+                              const struct ilo_state_viewport_params_info *params,
+                              bool scissors_only);
+
+void
+ilo_state_viewport_full_delta(const struct ilo_state_viewport *vp,
+                              const struct ilo_dev *dev,
+                              struct ilo_state_viewport_delta *delta);
+
+void
+ilo_state_viewport_get_delta(const struct ilo_state_viewport *vp,
+                             const struct ilo_dev *dev,
+                             const struct ilo_state_viewport *old,
+                             struct ilo_state_viewport_delta *delta);
+
+#endif /* ILO_STATE_VIEWPORT_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_zs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_zs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_zs.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_zs.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,677 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "ilo_debug.h"
+#include "ilo_image.h"
+#include "ilo_vma.h"
+#include "ilo_state_zs.h"
+
+static bool
+zs_set_gen6_null_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
+                                      const struct ilo_dev *dev)
+{
+   const enum gen_depth_format format = GEN6_ZFORMAT_D32_FLOAT;
+   uint32_t dw1;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      dw1 = GEN6_SURFTYPE_NULL << GEN7_DEPTH_DW1_TYPE__SHIFT |
+            format << GEN7_DEPTH_DW1_FORMAT__SHIFT;
+   } else {
+      dw1 = GEN6_SURFTYPE_NULL << GEN6_DEPTH_DW1_TYPE__SHIFT |
+            GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT |
+            format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(zs->depth) >= 5);
+   zs->depth[0] = dw1;
+   zs->depth[1] = 0;
+   zs->depth[2] = 0;
+   zs->depth[3] = 0;
+   zs->depth[4] = 0;
+
+   return true;
+}
+
+static bool
+zs_validate_gen6(const struct ilo_dev *dev,
+                 const struct ilo_state_zs_info *info)
+{
+   const struct ilo_image *img = (info->z_img) ? info->z_img : info->s_img;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(!info->z_img == !info->z_vma);
+   assert(!info->s_img == !info->s_vma);
+
+   /* all tiled */
+   if (info->z_img) {
+      assert(info->z_img->tiling == GEN6_TILING_Y);
+      assert(info->z_vma->vm_alignment % 4096 == 0);
+   }
+   if (info->s_img) {
+      assert(info->s_img->tiling == GEN8_TILING_W);
+      assert(info->s_vma->vm_alignment % 4096 == 0);
+   }
+   if (info->hiz_vma) {
+      assert(info->z_img &&
+             ilo_image_can_enable_aux(info->z_img, info->level));
+      assert(info->z_vma->vm_alignment % 4096 == 0);
+   }
+
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 315:
+    *
+    *     "The stencil buffer has a format of S8_UINT, and shares Surface
+    *      Type, Height, Width, and Depth, Minimum Array Element, Render
+    *      Target View Extent, Depth Coordinate Offset X/Y, LOD, and Depth
+    *      Buffer Object Control State fields of the depth buffer."
+    */
+   if (info->z_img && info->s_img && info->z_img != info->s_img) {
+      assert(info->z_img->type == info->s_img->type &&
+             info->z_img->height0 == info->s_img->height0 &&
+             info->z_img->depth0 == info->s_img->depth0);
+   }
+
+   if (info->type != img->type) {
+      assert(info->type == GEN6_SURFTYPE_2D &&
+             img->type == GEN6_SURFTYPE_CUBE);
+   }
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      switch (info->format) {
+      case GEN6_ZFORMAT_D32_FLOAT:
+      case GEN6_ZFORMAT_D24_UNORM_X8_UINT:
+      case GEN6_ZFORMAT_D16_UNORM:
+         break;
+      default:
+         assert(!"unknown depth format");
+         break;
+      }
+   } else {
+      /*
+       * From the Ironlake PRM, volume 2 part 1, page 330:
+       *
+       *     "If this field (Separate Stencil Buffer Enable) is disabled, the
+       *      Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
+       *
+       * From the Sandy Bridge PRM, volume 2 part 1, page 321:
+       *
+       *     "[DevSNB]: This field (Separate Stencil Buffer Enable) must be
+       *      set to the same value (enabled or disabled) as Hierarchical
+       *      Depth Buffer Enable."
+       */
+      if (info->hiz_vma)
+         assert(info->format != GEN6_ZFORMAT_D24_UNORM_S8_UINT);
+      else
+         assert(info->format != GEN6_ZFORMAT_D24_UNORM_X8_UINT);
+   }
+
+   assert(info->level < img->level_count);
+   assert(img->bo_stride);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 323:
+    *
+    *     "For cube maps, Width must be set equal to Height."
+    */
+   if (info->type == GEN6_SURFTYPE_CUBE)
+      assert(img->width0 == img->height0);
+
+   return true;
+}
+
+static void
+zs_get_gen6_max_extent(const struct ilo_dev *dev,
+                       const struct ilo_state_zs_info *info,
+                       uint16_t *max_w, uint16_t *max_h)
+{
+   const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   switch (info->type) {
+   case GEN6_SURFTYPE_1D:
+      *max_w = max_size;
+      *max_h = 1;
+      break;
+   case GEN6_SURFTYPE_2D:
+   case GEN6_SURFTYPE_CUBE:
+      *max_w = max_size;
+      *max_h = max_size;
+      break;
+   case GEN6_SURFTYPE_3D:
+      *max_w = 2048;
+      *max_h = 2048;
+      break;
+   default:
+      assert(!"invalid surface type");
+      *max_w = 1;
+      *max_h = 1;
+      break;
+   }
+}
+
+static void
+get_gen6_hiz_alignments(const struct ilo_dev *dev,
+                        const struct ilo_image *img,
+                        uint16_t *align_w, uint16_t *align_h)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 313:
+    *
+    *     "A rectangle primitive representing the clear area is delivered. The
+    *      primitive must adhere to the following restrictions on size:
+    *
+    *      - If Number of Multisamples is NUMSAMPLES_1, the rectangle must be
+    *        aligned to an 8x4 pixel block relative to the upper left corner
+    *        of the depth buffer, and contain an integer number of these pixel
+    *        blocks, and all 8x4 pixels must be lit.
+    *      - If Number of Multisamples is NUMSAMPLES_4, the rectangle must be
+    *        aligned to a 4x2 pixel block (8x4 sample block) relative to the
+    *        upper left corner of the depth buffer, and contain an integer
+    *        number of these pixel blocks, and all samples of the 4x2 pixels
+    *        must be lit
+    *      - If Number of Multisamples is NUMSAMPLES_8, the rectangle must be
+    *        aligned to a 2x2 pixel block (8x4 sample block) relative to the
+    *        upper left corner of the depth buffer, and contain an integer
+    *        number of these pixel blocks, and all samples of the 2x2 pixels
+    *        must be list."
+    *
+    * Experiments on Gen7.5 show that HiZ resolve also requires the rectangle
+    * to be aligned to 8x4 sample blocks.  But to be on the safe side, we
+    * always require a level to be aligned when HiZ is enabled.
+    */
+   switch (img->sample_count) {
+   case 1:
+      *align_w = 8;
+      *align_h = 4;
+      break;
+   case 2:
+      *align_w = 4;
+      *align_h = 4;
+      break;
+   case 4:
+      *align_w = 4;
+      *align_h = 2;
+      break;
+   case 8:
+      *align_w = 2;
+      *align_h = 2;
+      break;
+   case 16:
+      *align_w = 2;
+      *align_h = 1;
+      break;
+   default:
+      assert(!"unknown sample count");
+      *align_w = 1;
+      *align_h = 1;
+      break;
+   }
+}
+
+static bool
+zs_get_gen6_depth_extent(const struct ilo_dev *dev,
+                         const struct ilo_state_zs_info *info,
+                         uint16_t *width, uint16_t *height)
+{
+   const struct ilo_image *img = (info->z_img) ? info->z_img : info->s_img;
+   uint16_t w, h, max_w, max_h;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   w = img->width0;
+   h = img->height0;
+
+   if (info->hiz_vma) {
+      uint16_t align_w, align_h;
+
+      get_gen6_hiz_alignments(dev, info->z_img, &align_w, &align_h);
+
+      /*
+       * We want to force 8x4 alignment, but we can do so only for level 0 and
+       * only when it is padded.  ilo_image should know all these.
+       */
+      if (info->level)
+         assert(w % align_w == 0 && h % align_h == 0);
+
+      w = align(w, align_w);
+      h = align(h, align_h);
+   }
+
+   zs_get_gen6_max_extent(dev, info, &max_w, &max_h);
+   assert(w && h && w <= max_w && h <= max_h);
+
+   *width = w - 1;
+   *height = h - 1;
+
+   return true;
+}
+
+static bool
+zs_get_gen6_depth_slices(const struct ilo_dev *dev,
+                         const struct ilo_state_zs_info *info,
+                         uint16_t *depth, uint16_t *min_array_elem,
+                         uint16_t *rt_view_extent)
+{
+   const struct ilo_image *img = (info->z_img) ? info->z_img : info->s_img;
+   uint16_t max_slice, d;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 325:
+    *
+    *     "This field (Depth) specifies the total number of levels for a
+    *      volume texture or the number of array elements allowed to be
+    *      accessed starting at the Minimum Array Element for arrayed
+    *      surfaces. If the volume texture is MIP-mapped, this field specifies
+    *      the depth of the base MIP level."
+    */
+   switch (info->type) {
+   case GEN6_SURFTYPE_1D:
+   case GEN6_SURFTYPE_2D:
+   case GEN6_SURFTYPE_CUBE:
+      max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512;
+
+      assert(img->array_size <= max_slice);
+      max_slice = img->array_size;
+
+      d = info->slice_count;
+      if (info->type == GEN6_SURFTYPE_CUBE) {
+         /*
+          * Minumum Array Element and Depth must be 0; Render Target View
+          * Extent is ignored.
+          */
+         if (info->slice_base || d != 6) {
+            ilo_warn("no cube array dpeth buffer\n");
+            return false;
+         }
+
+         d /= 6;
+      }
+      break;
+   case GEN6_SURFTYPE_3D:
+      max_slice = 2048;
+
+      assert(img->depth0 <= max_slice);
+      max_slice = u_minify(img->depth0, info->level);
+
+      d = img->depth0;
+      break;
+   default:
+      assert(!"invalid surface type");
+      return false;
+      break;
+   }
+
+   if (!info->slice_count ||
+       info->slice_base + info->slice_count > max_slice) {
+      ilo_warn("invalid slice range\n");
+      return false;
+   }
+
+   assert(d);
+   *depth = d - 1;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 325:
+    *
+    *     "For 1D and 2D Surfaces:
+    *      This field (Minimum Array Element) indicates the minimum array
+    *      element that can be accessed as part of this surface. The delivered
+    *      array index is added to this field before being used to address the
+    *      surface.
+    *
+    *      For 3D Surfaces:
+    *      This field indicates the minimum `R' coordinate on the LOD
+    *      currently being rendered to.  This field is added to the delivered
+    *      array index before it is used to address the surface.
+    *
+    *      For Other Surfaces:
+    *      This field is ignored."
+    */
+   *min_array_elem = info->slice_base;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 326:
+    *
+    *     "For 3D Surfaces:
+    *      This field (Render Target View Extent) indicates the extent of the
+    *      accessible `R' coordinates minus 1 on the LOD currently being
+    *      rendered to.
+    *
+    *      For 1D and 2D Surfaces:
+    *      This field must be set to the same value as the Depth field.
+    *
+    *      For Other Surfaces:
+    *      This field is ignored."
+    */
+   *rt_view_extent = info->slice_count - 1;
+
+   return true;
+}
+
+static bool
+zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
+                                 const struct ilo_dev *dev,
+                                 const struct ilo_state_zs_info *info)
+{
+   uint16_t width, height, depth, array_base, view_extent;
+   uint32_t dw1, dw2, dw3, dw4;
+
+   ILO_DEV_ASSERT(dev, 6, 6);
+
+   if (!zs_validate_gen6(dev, info) ||
+       !zs_get_gen6_depth_extent(dev, info, &width, &height) ||
+       !zs_get_gen6_depth_slices(dev, info, &depth, &array_base,
+                                 &view_extent))
+      return false;
+
+   /* info->z_readonly and info->s_readonly are ignored on Gen6 */
+   dw1 = info->type << GEN6_DEPTH_DW1_TYPE__SHIFT |
+         GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT |
+         info->format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
+
+   if (info->z_img)
+      dw1 |= (info->z_img->bo_stride - 1) << GEN6_DEPTH_DW1_PITCH__SHIFT;
+
+   if (info->hiz_vma || !info->z_img) {
+      dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE |
+             GEN6_DEPTH_DW1_SEPARATE_STENCIL;
+   }
+
+   dw2 = 0;
+   dw3 = height << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
+         width << GEN6_DEPTH_DW3_WIDTH__SHIFT |
+         info->level << GEN6_DEPTH_DW3_LOD__SHIFT |
+         GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
+   dw4 = depth << GEN6_DEPTH_DW4_DEPTH__SHIFT |
+         array_base << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT |
+         view_extent << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT;
+
+   STATIC_ASSERT(ARRAY_SIZE(zs->depth) >= 5);
+   zs->depth[0] = dw1;
+   zs->depth[1] = dw2;
+   zs->depth[2] = dw3;
+   zs->depth[3] = dw4;
+   zs->depth[4] = 0;
+
+   return true;
+}
+
+static bool
+zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
+                                 const struct ilo_dev *dev,
+                                 const struct ilo_state_zs_info *info)
+{
+   uint16_t width, height, depth;
+   uint16_t array_base, view_extent;
+   uint32_t dw1, dw2, dw3, dw4, dw6;
+
+   ILO_DEV_ASSERT(dev, 7, 8);
+
+   if (!zs_validate_gen6(dev, info) ||
+       !zs_get_gen6_depth_extent(dev, info, &width, &height) ||
+       !zs_get_gen6_depth_slices(dev, info, &depth, &array_base,
+                                 &view_extent))
+      return false;
+
+   dw1 = info->type << GEN7_DEPTH_DW1_TYPE__SHIFT |
+         info->format << GEN7_DEPTH_DW1_FORMAT__SHIFT;
+
+   if (info->z_img) {
+      if (!info->z_readonly)
+         dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE;
+      if (info->hiz_vma)
+         dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE;
+
+      dw1 |= (info->z_img->bo_stride - 1) << GEN7_DEPTH_DW1_PITCH__SHIFT;
+   }
+
+   if (info->s_img && !info->s_readonly)
+      dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE;
+
+   dw2 = 0;
+   dw3 = height << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
+         width << GEN7_DEPTH_DW3_WIDTH__SHIFT |
+         info->level << GEN7_DEPTH_DW3_LOD__SHIFT;
+   dw4 = depth << GEN7_DEPTH_DW4_DEPTH__SHIFT |
+         array_base << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT;
+   dw6 = view_extent << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8) && info->z_img) {
+      assert(info->z_img->walk_layer_height % 4 == 0);
+      /* note that DW is off-by-one for Gen8+ */
+      dw6 |= (info->z_img->walk_layer_height / 4) <<
+         GEN8_DEPTH_DW7_QPITCH__SHIFT;
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(zs->depth) >= 5);
+   zs->depth[0] = dw1;
+   zs->depth[1] = dw2;
+   zs->depth[2] = dw3;
+   zs->depth[3] = dw4;
+   zs->depth[4] = dw6;
+
+   return true;
+}
+
+static bool
+zs_set_gen6_null_3DSTATE_STENCIL_BUFFER(struct ilo_state_zs *zs,
+                                        const struct ilo_dev *dev)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   STATIC_ASSERT(ARRAY_SIZE(zs->stencil) >= 3);
+   zs->stencil[0] = 0;
+   zs->stencil[1] = 0;
+   if (ilo_dev_gen(dev) >= ILO_GEN(8))
+      zs->stencil[2] = 0;
+
+   return true;
+}
+
+static bool
+zs_set_gen6_3DSTATE_STENCIL_BUFFER(struct ilo_state_zs *zs,
+                                   const struct ilo_dev *dev,
+                                   const struct ilo_state_zs_info *info)
+{
+   const struct ilo_image *img = info->s_img;
+   uint32_t dw1, dw2;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(img->bo_stride);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 329:
+    *
+    *     "The pitch must be set to 2x the value computed based on width, as
+    *      the stencil buffer is stored with two rows interleaved."
+    *
+    * For Gen7+, we still dobule the stride because we did not double the
+    * slice widths when initializing ilo_image.
+    */
+   dw1 = (img->bo_stride * 2 - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+      dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE;
+
+   dw2 = 0;
+   /* offset to the level as Gen6 does not support mipmapped stencil */
+   if (ilo_dev_gen(dev) == ILO_GEN(6)) {
+      unsigned x, y;
+
+      ilo_image_get_slice_pos(img, info->level, 0, &x, &y);
+      ilo_image_pos_to_mem(img, x, y, &x, &y);
+      dw2 |= ilo_image_mem_to_raw(img, x, y);
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(zs->stencil) >= 3);
+   zs->stencil[0] = dw1;
+   zs->stencil[1] = dw2;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      uint32_t dw4;
+
+      assert(img->walk_layer_height % 4 == 0);
+      dw4 = (img->walk_layer_height / 4) << GEN8_STENCIL_DW4_QPITCH__SHIFT;
+
+      zs->stencil[2] = dw4;
+   }
+
+   return true;
+}
+
+static bool
+zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_state_zs *zs,
+                                           const struct ilo_dev *dev)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   STATIC_ASSERT(ARRAY_SIZE(zs->hiz) >= 3);
+   zs->hiz[0] = 0;
+   zs->hiz[1] = 0;
+   if (ilo_dev_gen(dev) >= ILO_GEN(8))
+      zs->hiz[2] = 0;
+
+   return true;
+}
+
+static bool
+zs_set_gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_state_zs *zs,
+                                      const struct ilo_dev *dev,
+                                      const struct ilo_state_zs_info *info)
+{
+   const struct ilo_image *img = info->z_img;
+   uint32_t dw1, dw2;
+
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   assert(img->aux.bo_stride);
+
+   dw1 = (img->aux.bo_stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT;
+
+   dw2 = 0;
+   /* offset to the level as Gen6 does not support mipmapped HiZ */
+   if (ilo_dev_gen(dev) == ILO_GEN(6))
+      dw2 |= img->aux.walk_lod_offsets[info->level];
+
+   STATIC_ASSERT(ARRAY_SIZE(zs->hiz) >= 3);
+   zs->hiz[0] = dw1;
+   zs->hiz[1] = dw2;
+
+   if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+      uint32_t dw4;
+
+      assert(img->aux.walk_layer_height % 4 == 0);
+      dw4 = (img->aux.walk_layer_height / 4) << GEN8_HIZ_DW4_QPITCH__SHIFT;
+
+      zs->hiz[2] = dw4;
+   }
+
+   return true;
+}
+
+bool
+ilo_state_zs_init(struct ilo_state_zs *zs, const struct ilo_dev *dev,
+                  const struct ilo_state_zs_info *info)
+{
+   bool ret = true;
+
+   assert(ilo_is_zeroed(zs, sizeof(*zs)));
+
+   if (info->z_img || info->s_img) {
+      if (ilo_dev_gen(dev) >= ILO_GEN(7))
+         ret &= zs_set_gen7_3DSTATE_DEPTH_BUFFER(zs, dev, info);
+      else
+         ret &= zs_set_gen6_3DSTATE_DEPTH_BUFFER(zs, dev, info);
+   } else {
+      ret &= zs_set_gen6_null_3DSTATE_DEPTH_BUFFER(zs, dev);
+   }
+
+   if (info->s_img)
+      ret &= zs_set_gen6_3DSTATE_STENCIL_BUFFER(zs, dev, info);
+   else
+      ret &= zs_set_gen6_null_3DSTATE_STENCIL_BUFFER(zs, dev);
+
+   if (info->z_img && info->hiz_vma)
+      ret &= zs_set_gen6_3DSTATE_HIER_DEPTH_BUFFER(zs, dev, info);
+   else
+      ret &= zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev);
+
+   zs->z_vma = info->z_vma;
+   zs->s_vma = info->s_vma;
+   zs->hiz_vma = info->hiz_vma;
+
+   zs->z_readonly = info->z_readonly;
+   zs->s_readonly = info->s_readonly;
+
+   assert(ret);
+
+   return ret;
+}
+
+bool
+ilo_state_zs_init_for_null(struct ilo_state_zs *zs,
+                           const struct ilo_dev *dev)
+{
+   struct ilo_state_zs_info info;
+
+   memset(&info, 0, sizeof(info));
+   info.type = GEN6_SURFTYPE_NULL;
+   info.format = GEN6_ZFORMAT_D32_FLOAT;
+
+   return ilo_state_zs_init(zs, dev, &info);
+}
+
+bool
+ilo_state_zs_disable_hiz(struct ilo_state_zs *zs,
+                         const struct ilo_dev *dev)
+{
+   ILO_DEV_ASSERT(dev, 6, 8);
+
+   /*
+    * Separate stencil must be disabled simultaneously on Gen6.  We can make
+    * it work when there is no stencil buffer, but it is probably not worth
+    * it.
+    */
+   assert(ilo_dev_gen(dev) >= ILO_GEN(7));
+
+   if (zs->hiz_vma) {
+      zs->depth[0] &= ~GEN7_DEPTH_DW1_HIZ_ENABLE;
+      zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev);
+      zs->hiz_vma = NULL;
+   }
+
+   return true;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_zs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_zs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_state_zs.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_state_zs.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,85 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_ZS_H
+#define ILO_STATE_ZS_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+struct ilo_vma;
+struct ilo_image;
+
+struct ilo_state_zs_info {
+   /* both optional */
+   const struct ilo_image *z_img;
+   const struct ilo_image *s_img;
+   uint8_t level;
+   uint16_t slice_base;
+   uint16_t slice_count;
+
+   const struct ilo_vma *z_vma;
+   const struct ilo_vma *s_vma;
+   const struct ilo_vma *hiz_vma;
+
+   enum gen_surface_type type;
+   enum gen_depth_format format;
+
+   /* ignored prior to Gen7 */
+   bool z_readonly;
+   bool s_readonly;
+};
+
+struct ilo_state_zs {
+   uint32_t depth[5];
+   uint32_t stencil[3];
+   uint32_t hiz[3];
+
+   const struct ilo_vma *z_vma;
+   const struct ilo_vma *s_vma;
+   const struct ilo_vma *hiz_vma;
+
+   bool z_readonly;
+   bool s_readonly;
+};
+
+bool
+ilo_state_zs_init(struct ilo_state_zs *zs,
+                  const struct ilo_dev *dev,
+                  const struct ilo_state_zs_info *info);
+
+bool
+ilo_state_zs_init_for_null(struct ilo_state_zs *zs,
+                           const struct ilo_dev *dev);
+
+bool
+ilo_state_zs_disable_hiz(struct ilo_state_zs *zs,
+                         const struct ilo_dev *dev);
+
+#endif /* ILO_STATE_ZS_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_vma.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_vma.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/core/ilo_vma.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/core/ilo_vma.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,73 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_VMA_H
+#define ILO_VMA_H
+
+#include "ilo_core.h"
+#include "ilo_debug.h"
+#include "ilo_dev.h"
+
+struct intel_bo;
+
+/**
+ * A virtual memory area.
+ */
+struct ilo_vma {
+   /* address space */
+   uint32_t vm_size;
+   uint32_t vm_alignment;
+
+   /* backing storage */
+   struct intel_bo *bo;
+   uint32_t bo_offset;
+};
+
+static inline bool
+ilo_vma_init(struct ilo_vma *vma, const struct ilo_dev *dev,
+             uint32_t size, uint32_t alignment)
+{
+   assert(ilo_is_zeroed(vma, sizeof(*vma)));
+   assert(size && alignment);
+
+   vma->vm_alignment = alignment;
+   vma->vm_size = size;
+
+   return true;
+}
+
+static inline void
+ilo_vma_set_bo(struct ilo_vma *vma, const struct ilo_dev *dev,
+               struct intel_bo *bo, uint32_t offset)
+{
+   assert(offset % vma->vm_alignment == 0);
+
+   vma->bo = bo;
+   vma->bo_offset = offset;
+}
+
+#endif /* ILO_VMA_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/genhw/genhw.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/genhw/genhw.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/genhw/genhw.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/genhw/genhw.h	2015-09-16 14:36:09.000000000 +0000
@@ -1,6 +1,4 @@
 /*
- * Mesa 3-D graphics library
- *
  * Copyright (C) 2014 LunarG, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,8 +23,9 @@
 #ifndef GENHW_H
 #define GENHW_H
 
-#include "pipe/p_compiler.h"
-#include "util/u_debug.h"
+#include <stdbool.h>
+#include <stdint.h>
+#include <assert.h>
 
 #include "gen_regs.xml.h"
 #include "gen_mi.xml.h"
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/genhw/gen_mi.xml.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/genhw/gen_mi.xml.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/genhw/gen_mi.xml.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/genhw/gen_mi.xml.h	2015-09-16 14:36:09.000000000 +0000
@@ -97,6 +97,9 @@
 #define GEN6_MI_LENGTH__MASK					0x0000003f
 #define GEN6_MI_LENGTH__SHIFT					0
 #define GEN6_MI_NOOP__SIZE					1
+#define GEN6_MI_NOOP_DW0_WRITE_NOPID				(0x1 << 22)
+#define GEN6_MI_NOOP_DW0_VALUE__MASK				0x003fffff
+#define GEN6_MI_NOOP_DW0_VALUE__SHIFT				0
 
 #define GEN75_MI_SET_PREDICATE__SIZE				1
 #define GEN75_MI_SET_PREDICATE_DW0_PREDICATE__MASK		0x00000003
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/genhw/gen_regs.xml.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/genhw/gen_regs.xml.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/genhw/gen_regs.xml.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/genhw/gen_regs.xml.h	2015-09-16 14:36:09.000000000 +0000
@@ -35,6 +35,8 @@
 #define GEN6_REG_MASK__MASK					0xffff0000
 #define GEN6_REG_MASK__SHIFT					16
 #define GEN6_REG__SIZE						0x400000
+#define GEN6_REG_NOPID						0x2094
+
 #define GEN7_REG_HS_INVOCATION_COUNT				0x2300
 
 #define GEN7_REG_DS_INVOCATION_COUNT				0x2308
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/genhw/gen_render_3d.xml.h	2015-09-16 14:36:09.000000000 +0000
@@ -32,7 +32,7 @@
 */
 
 
-enum gen_prim_type {
+enum gen_3dprim_type {
     GEN6_3DPRIM_POINTLIST				      = 0x1,
     GEN6_3DPRIM_LINELIST				      = 0x2,
     GEN6_3DPRIM_LINESTRIP				      = 0x3,
@@ -105,6 +105,12 @@
     GEN8_ALIGNMENT_SURFACE_STATE			      = 0x40,
 };
 
+enum gen_index_format {
+    GEN6_INDEX_BYTE					      = 0x0,
+    GEN6_INDEX_WORD					      = 0x1,
+    GEN6_INDEX_DWORD					      = 0x2,
+};
+
 enum gen_vf_component {
     GEN6_VFCOMP_NOSTORE					      = 0x0,
     GEN6_VFCOMP_STORE_SRC				      = 0x1,
@@ -123,6 +129,87 @@
     GEN6_ZFORMAT_D16_UNORM				      = 0x5,
 };
 
+enum gen_reorder_mode {
+    GEN7_REORDER_LEADING				      = 0x0,
+    GEN7_REORDER_TRAILING				      = 0x1,
+};
+
+enum gen_clip_mode {
+    GEN6_CLIPMODE_NORMAL				      = 0x0,
+    GEN6_CLIPMODE_REJECT_ALL				      = 0x3,
+    GEN6_CLIPMODE_ACCEPT_ALL				      = 0x4,
+};
+
+enum gen_front_winding {
+    GEN6_FRONTWINDING_CW				      = 0x0,
+    GEN6_FRONTWINDING_CCW				      = 0x1,
+};
+
+enum gen_fill_mode {
+    GEN6_FILLMODE_SOLID					      = 0x0,
+    GEN6_FILLMODE_WIREFRAME				      = 0x1,
+    GEN6_FILLMODE_POINT					      = 0x2,
+};
+
+enum gen_cull_mode {
+    GEN6_CULLMODE_BOTH					      = 0x0,
+    GEN6_CULLMODE_NONE					      = 0x1,
+    GEN6_CULLMODE_FRONT					      = 0x2,
+    GEN6_CULLMODE_BACK					      = 0x3,
+};
+
+enum gen_pixel_location {
+    GEN6_PIXLOC_CENTER					      = 0x0,
+    GEN6_PIXLOC_UL_CORNER				      = 0x1,
+};
+
+enum gen_sample_count {
+    GEN6_NUMSAMPLES_1					      = 0x0,
+    GEN8_NUMSAMPLES_2					      = 0x1,
+    GEN6_NUMSAMPLES_4					      = 0x2,
+    GEN7_NUMSAMPLES_8					      = 0x3,
+    GEN8_NUMSAMPLES_16					      = 0x4,
+};
+
+enum gen_inputattr_select {
+    GEN6_INPUTATTR_NORMAL				      = 0x0,
+    GEN6_INPUTATTR_FACING				      = 0x1,
+    GEN6_INPUTATTR_W					      = 0x2,
+    GEN6_INPUTATTR_FACING_W				      = 0x3,
+};
+
+enum gen_zw_interp {
+    GEN6_ZW_INTERP_PIXEL				      = 0x0,
+    GEN6_ZW_INTERP_CENTROID				      = 0x2,
+    GEN6_ZW_INTERP_SAMPLE				      = 0x3,
+};
+
+enum gen_position_offset {
+    GEN6_POSOFFSET_NONE					      = 0x0,
+    GEN6_POSOFFSET_CENTROID				      = 0x2,
+    GEN6_POSOFFSET_SAMPLE				      = 0x3,
+};
+
+enum gen_edsc_mode {
+    GEN7_EDSC_NORMAL					      = 0x0,
+    GEN7_EDSC_PSEXEC					      = 0x1,
+    GEN7_EDSC_PREPS					      = 0x2,
+};
+
+enum gen_pscdepth_mode {
+    GEN7_PSCDEPTH_OFF					      = 0x0,
+    GEN7_PSCDEPTH_ON					      = 0x1,
+    GEN7_PSCDEPTH_ON_GE					      = 0x2,
+    GEN7_PSCDEPTH_ON_LE					      = 0x3,
+};
+
+enum gen_msrast_mode {
+    GEN6_MSRASTMODE_OFF_PIXEL				      = 0x0,
+    GEN6_MSRASTMODE_OFF_PATTERN				      = 0x1,
+    GEN6_MSRASTMODE_ON_PIXEL				      = 0x2,
+    GEN6_MSRASTMODE_ON_PATTERN				      = 0x3,
+};
+
 #define GEN6_INTERP_NONPERSPECTIVE_SAMPLE			(0x1 << 5)
 #define GEN6_INTERP_NONPERSPECTIVE_CENTROID			(0x1 << 4)
 #define GEN6_INTERP_NONPERSPECTIVE_PIXEL			(0x1 << 3)
@@ -285,9 +372,6 @@
 #define GEN6_IB_DW0_CUT_INDEX_ENABLE				(0x1 << 10)
 #define GEN6_IB_DW0_FORMAT__MASK				0x00000300
 #define GEN6_IB_DW0_FORMAT__SHIFT				8
-#define GEN6_IB_DW0_FORMAT_BYTE					(0x0 << 8)
-#define GEN6_IB_DW0_FORMAT_WORD					(0x1 << 8)
-#define GEN6_IB_DW0_FORMAT_DWORD				(0x2 << 8)
 
 
 
@@ -295,9 +379,6 @@
 
 #define GEN8_IB_DW1_FORMAT__MASK				0x00000300
 #define GEN8_IB_DW1_FORMAT__SHIFT				8
-#define GEN8_IB_DW1_FORMAT_BYTE					(0x0 << 8)
-#define GEN8_IB_DW1_FORMAT_WORD					(0x1 << 8)
-#define GEN8_IB_DW1_FORMAT_DWORD				(0x2 << 8)
 #define GEN8_IB_DW1_MOCS__MASK					0x0000007f
 #define GEN8_IB_DW1_MOCS__SHIFT					0
 
@@ -313,8 +394,8 @@
 
 
 #define GEN8_INSTANCING_DW1_ENABLE				(0x1 << 8)
-#define GEN8_INSTANCING_DW1_VB_INDEX__MASK			0x0000003f
-#define GEN8_INSTANCING_DW1_VB_INDEX__SHIFT			0
+#define GEN8_INSTANCING_DW1_VE_INDEX__MASK			0x0000003f
+#define GEN8_INSTANCING_DW1_VE_INDEX__SHIFT			0
 
 
 #define GEN8_3DSTATE_VF_SGVS__SIZE				2
@@ -614,7 +695,7 @@
 #define GEN6_GS_DW5_SO_STATISTICS				(0x1 << 9)
 #define GEN6_GS_DW5_RENDER_ENABLE				(0x1 << 8)
 
-#define GEN6_GS_DW6_REORDER_ENABLE				(0x1 << 30)
+#define GEN6_GS_DW6_REORDER_LEADING_ENABLE			(0x1 << 30)
 #define GEN6_GS_DW6_DISCARD_ADJACENCY				(0x1 << 29)
 #define GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE				(0x1 << 28)
 #define GEN6_GS_DW6_SVBI_POST_INC_ENABLE			(0x1 << 27)
@@ -666,11 +747,9 @@
 #define GEN7_GS_DW5_INVOCATION_INCR__SHIFT			5
 #define GEN7_GS_DW5_INCLUDE_PRIMITIVE_ID			(0x1 << 4)
 #define GEN7_GS_DW5_HINT					(0x1 << 3)
-#define GEN7_GS_DW5_REORDER_ENABLE				(0x1 << 2)
-#define GEN75_GS_DW5_REORDER__MASK				0x00000004
-#define GEN75_GS_DW5_REORDER__SHIFT				2
-#define GEN75_GS_DW5_REORDER_LEADING				(0x0 << 2)
-#define GEN75_GS_DW5_REORDER_TRAILING				(0x1 << 2)
+#define GEN7_GS_DW5_REORDER_LEADING_ENABLE			(0x1 << 2)
+#define GEN75_GS_DW5_REORDER_MODE__MASK				0x00000004
+#define GEN75_GS_DW5_REORDER_MODE__SHIFT			2
 #define GEN7_GS_DW5_DISCARD_ADJACENCY				(0x1 << 1)
 #define GEN7_GS_DW5_GS_ENABLE					(0x1 << 0)
 
@@ -727,10 +806,8 @@
 #define GEN8_GS_DW7_INVOCATION_INCR__SHIFT			5
 #define GEN8_GS_DW7_INCLUDE_PRIMITIVE_ID			(0x1 << 4)
 #define GEN8_GS_DW7_HINT					(0x1 << 3)
-#define GEN8_GS_DW7_REORDER__MASK				0x00000004
-#define GEN8_GS_DW7_REORDER__SHIFT				2
-#define GEN8_GS_DW7_REORDER_LEADING				(0x0 << 2)
-#define GEN8_GS_DW7_REORDER_TRAILING				(0x1 << 2)
+#define GEN8_GS_DW7_REORDER_MODE__MASK				0x00000004
+#define GEN8_GS_DW7_REORDER_MODE__SHIFT				2
 #define GEN8_GS_DW7_DISCARD_ADJACENCY				(0x1 << 1)
 #define GEN8_GS_DW7_GS_ENABLE					(0x1 << 0)
 
@@ -758,10 +835,8 @@
 #define GEN7_SO_DW1_RENDER_DISABLE				(0x1 << 30)
 #define GEN7_SO_DW1_RENDER_STREAM_SELECT__MASK			0x18000000
 #define GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT			27
-#define GEN7_SO_DW1_REORDER__MASK				0x04000000
-#define GEN7_SO_DW1_REORDER__SHIFT				26
-#define GEN7_SO_DW1_REORDER_LEADING				(0x0 << 26)
-#define GEN7_SO_DW1_REORDER_TRAILING				(0x1 << 26)
+#define GEN7_SO_DW1_REORDER_MODE__MASK				0x04000000
+#define GEN7_SO_DW1_REORDER_MODE__SHIFT				26
 #define GEN7_SO_DW1_STATISTICS					(0x1 << 25)
 #define GEN7_SO_DW1_BUFFER_ENABLES__MASK			0x00000f00
 #define GEN7_SO_DW1_BUFFER_ENABLES__SHIFT			8
@@ -862,21 +937,15 @@
 #define GEN6_3DSTATE_CLIP__SIZE					4
 
 
-#define GEN7_CLIP_DW1_FRONTWINDING__MASK			0x00100000
-#define GEN7_CLIP_DW1_FRONTWINDING__SHIFT			20
-#define GEN7_CLIP_DW1_FRONTWINDING_CW				(0x0 << 20)
-#define GEN7_CLIP_DW1_FRONTWINDING_CCW				(0x1 << 20)
+#define GEN7_CLIP_DW1_FRONT_WINDING__MASK			0x00100000
+#define GEN7_CLIP_DW1_FRONT_WINDING__SHIFT			20
 #define GEN7_CLIP_DW1_SUBPIXEL__MASK				0x00080000
 #define GEN7_CLIP_DW1_SUBPIXEL__SHIFT				19
 #define GEN7_CLIP_DW1_SUBPIXEL_8BITS				(0x0 << 19)
 #define GEN7_CLIP_DW1_SUBPIXEL_4BITS				(0x1 << 19)
 #define GEN7_CLIP_DW1_EARLY_CULL_ENABLE				(0x1 << 18)
-#define GEN7_CLIP_DW1_CULLMODE__MASK				0x00030000
-#define GEN7_CLIP_DW1_CULLMODE__SHIFT				16
-#define GEN7_CLIP_DW1_CULLMODE_BOTH				(0x0 << 16)
-#define GEN7_CLIP_DW1_CULLMODE_NONE				(0x1 << 16)
-#define GEN7_CLIP_DW1_CULLMODE_FRONT				(0x2 << 16)
-#define GEN7_CLIP_DW1_CULLMODE_BACK				(0x3 << 16)
+#define GEN7_CLIP_DW1_CULL_MODE__MASK				0x00030000
+#define GEN7_CLIP_DW1_CULL_MODE__SHIFT				16
 #define GEN6_CLIP_DW1_STATISTICS				(0x1 << 10)
 #define GEN6_CLIP_DW1_UCP_CULL_ENABLES__MASK			0x000000ff
 #define GEN6_CLIP_DW1_UCP_CULL_ENABLES__SHIFT			0
@@ -891,11 +960,8 @@
 #define GEN6_CLIP_DW2_GB_TEST_ENABLE				(0x1 << 26)
 #define GEN6_CLIP_DW2_UCP_CLIP_ENABLES__MASK			0x00ff0000
 #define GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT			16
-#define GEN6_CLIP_DW2_CLIPMODE__MASK				0x0000e000
-#define GEN6_CLIP_DW2_CLIPMODE__SHIFT				13
-#define GEN6_CLIP_DW2_CLIPMODE_NORMAL				(0x0 << 13)
-#define GEN6_CLIP_DW2_CLIPMODE_REJECT_ALL			(0x3 << 13)
-#define GEN6_CLIP_DW2_CLIPMODE_ACCEPT_ALL			(0x4 << 13)
+#define GEN6_CLIP_DW2_CLIP_MODE__MASK				0x0000e000
+#define GEN6_CLIP_DW2_CLIP_MODE__SHIFT				13
 #define GEN6_CLIP_DW2_PERSPECTIVE_DIVIDE_DISABLE		(0x1 << 9)
 #define GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE		(0x1 << 8)
 #define GEN6_CLIP_DW2_TRI_PROVOKE__MASK				0x00000030
@@ -911,7 +977,7 @@
 #define GEN6_CLIP_DW3_MAX_POINT_WIDTH__MASK			0x0001ffc0
 #define GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT			6
 #define GEN6_CLIP_DW3_MAX_POINT_WIDTH__RADIX			3
-#define GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO			(0x1 << 5)
+#define GEN6_CLIP_DW3_FORCE_RTAINDEX_ZERO			(0x1 << 5)
 #define GEN6_CLIP_DW3_MAX_VPINDEX__MASK				0x0000000f
 #define GEN6_CLIP_DW3_MAX_VPINDEX__SHIFT			0
 
@@ -927,29 +993,17 @@
 #define GEN7_SF_DW1_DEPTH_OFFSET_SOLID				(0x1 << 9)
 #define GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME			(0x1 << 8)
 #define GEN7_SF_DW1_DEPTH_OFFSET_POINT				(0x1 << 7)
-#define GEN7_SF_DW1_FRONTFACE__MASK				0x00000060
-#define GEN7_SF_DW1_FRONTFACE__SHIFT				5
-#define GEN7_SF_DW1_FRONTFACE_SOLID				(0x0 << 5)
-#define GEN7_SF_DW1_FRONTFACE_WIREFRAME				(0x1 << 5)
-#define GEN7_SF_DW1_FRONTFACE_POINT				(0x2 << 5)
-#define GEN7_SF_DW1_BACKFACE__MASK				0x00000018
-#define GEN7_SF_DW1_BACKFACE__SHIFT				3
-#define GEN7_SF_DW1_BACKFACE_SOLID				(0x0 << 3)
-#define GEN7_SF_DW1_BACKFACE_WIREFRAME				(0x1 << 3)
-#define GEN7_SF_DW1_BACKFACE_POINT				(0x2 << 3)
-#define GEN7_SF_DW1_VIEWPORT_ENABLE				(0x1 << 1)
-#define GEN7_SF_DW1_FRONTWINDING__MASK				0x00000001
-#define GEN7_SF_DW1_FRONTWINDING__SHIFT				0
-#define GEN7_SF_DW1_FRONTWINDING_CW				0x0
-#define GEN7_SF_DW1_FRONTWINDING_CCW				0x1
+#define GEN7_SF_DW1_FILL_MODE_FRONT__MASK			0x00000060
+#define GEN7_SF_DW1_FILL_MODE_FRONT__SHIFT			5
+#define GEN7_SF_DW1_FILL_MODE_BACK__MASK			0x00000018
+#define GEN7_SF_DW1_FILL_MODE_BACK__SHIFT			3
+#define GEN7_SF_DW1_VIEWPORT_TRANSFORM				(0x1 << 1)
+#define GEN7_SF_DW1_FRONT_WINDING__MASK				0x00000001
+#define GEN7_SF_DW1_FRONT_WINDING__SHIFT			0
 
 #define GEN7_SF_DW2_AA_LINE_ENABLE				(0x1 << 31)
-#define GEN7_SF_DW2_CULLMODE__MASK				0x60000000
-#define GEN7_SF_DW2_CULLMODE__SHIFT				29
-#define GEN7_SF_DW2_CULLMODE_BOTH				(0x0 << 29)
-#define GEN7_SF_DW2_CULLMODE_NONE				(0x1 << 29)
-#define GEN7_SF_DW2_CULLMODE_FRONT				(0x2 << 29)
-#define GEN7_SF_DW2_CULLMODE_BACK				(0x3 << 29)
+#define GEN7_SF_DW2_CULL_MODE__MASK				0x60000000
+#define GEN7_SF_DW2_CULL_MODE__SHIFT				29
 #define GEN7_SF_DW2_LINE_WIDTH__MASK				0x0ffc0000
 #define GEN7_SF_DW2_LINE_WIDTH__SHIFT				18
 #define GEN7_SF_DW2_LINE_WIDTH__RADIX				7
@@ -963,10 +1017,6 @@
 #define GEN7_SF_DW2_SCISSOR_ENABLE				(0x1 << 11)
 #define GEN7_SF_DW2_MSRASTMODE__MASK				0x00000300
 #define GEN7_SF_DW2_MSRASTMODE__SHIFT				8
-#define GEN7_SF_DW2_MSRASTMODE_OFF_PIXEL			(0x0 << 8)
-#define GEN7_SF_DW2_MSRASTMODE_OFF_PATTERN			(0x1 << 8)
-#define GEN7_SF_DW2_MSRASTMODE_ON_PIXEL				(0x2 << 8)
-#define GEN7_SF_DW2_MSRASTMODE_ON_PATTERN			(0x3 << 8)
 
 #define GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE			(0x1 << 31)
 #define GEN7_SF_DW3_TRI_PROVOKE__MASK				0x60000000
@@ -1021,14 +1071,10 @@
 #define GEN8_SBE_SWIZ_CONST_0001_FLOAT				(0x1 << 9)
 #define GEN8_SBE_SWIZ_CONST_1111_FLOAT				(0x2 << 9)
 #define GEN8_SBE_SWIZ_CONST_PRIM_ID				(0x3 << 9)
-#define GEN8_SBE_SWIZ_INPUTATTR__MASK				0x000000c0
-#define GEN8_SBE_SWIZ_INPUTATTR__SHIFT				6
-#define GEN8_SBE_SWIZ_INPUTATTR_NORMAL				(0x0 << 6)
-#define GEN8_SBE_SWIZ_INPUTATTR_FACING				(0x1 << 6)
-#define GEN8_SBE_SWIZ_INPUTATTR_W				(0x2 << 6)
-#define GEN8_SBE_SWIZ_INPUTATTR_FACING_W			(0x3 << 6)
-#define GEN8_SBE_SWIZ_URB_ENTRY_OFFSET__MASK			0x0000001f
-#define GEN8_SBE_SWIZ_URB_ENTRY_OFFSET__SHIFT			0
+#define GEN8_SBE_SWIZ_SWIZZLE_SELECT__MASK			0x000000c0
+#define GEN8_SBE_SWIZ_SWIZZLE_SELECT__SHIFT			6
+#define GEN8_SBE_SWIZ_SRC_ATTR__MASK				0x0000001f
+#define GEN8_SBE_SWIZ_SRC_ATTR__SHIFT				0
 
 #define GEN6_3DSTATE_SF__SIZE					20
 
@@ -1080,31 +1126,19 @@
 
 
 #define GEN9_RASTER_DW1_Z_TEST_FAR_ENABLE			(0x1 << 26)
-#define GEN8_RASTER_DW1_FRONTWINDING__MASK			0x00200000
-#define GEN8_RASTER_DW1_FRONTWINDING__SHIFT			21
-#define GEN8_RASTER_DW1_FRONTWINDING_CW				(0x0 << 21)
-#define GEN8_RASTER_DW1_FRONTWINDING_CCW			(0x1 << 21)
-#define GEN8_RASTER_DW1_CULLMODE__MASK				0x00030000
-#define GEN8_RASTER_DW1_CULLMODE__SHIFT				16
-#define GEN8_RASTER_DW1_CULLMODE_BOTH				(0x0 << 16)
-#define GEN8_RASTER_DW1_CULLMODE_NONE				(0x1 << 16)
-#define GEN8_RASTER_DW1_CULLMODE_FRONT				(0x2 << 16)
-#define GEN8_RASTER_DW1_CULLMODE_BACK				(0x3 << 16)
+#define GEN8_RASTER_DW1_FRONT_WINDING__MASK			0x00200000
+#define GEN8_RASTER_DW1_FRONT_WINDING__SHIFT			21
+#define GEN8_RASTER_DW1_CULL_MODE__MASK				0x00030000
+#define GEN8_RASTER_DW1_CULL_MODE__SHIFT			16
 #define GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE			(0x1 << 13)
 #define GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE			(0x1 << 12)
 #define GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID			(0x1 << 9)
 #define GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME			(0x1 << 8)
 #define GEN8_RASTER_DW1_DEPTH_OFFSET_POINT			(0x1 << 7)
-#define GEN8_RASTER_DW1_FRONTFACE__MASK				0x00000060
-#define GEN8_RASTER_DW1_FRONTFACE__SHIFT			5
-#define GEN8_RASTER_DW1_FRONTFACE_SOLID				(0x0 << 5)
-#define GEN8_RASTER_DW1_FRONTFACE_WIREFRAME			(0x1 << 5)
-#define GEN8_RASTER_DW1_FRONTFACE_POINT				(0x2 << 5)
-#define GEN8_RASTER_DW1_BACKFACE__MASK				0x00000018
-#define GEN8_RASTER_DW1_BACKFACE__SHIFT				3
-#define GEN8_RASTER_DW1_BACKFACE_SOLID				(0x0 << 3)
-#define GEN8_RASTER_DW1_BACKFACE_WIREFRAME			(0x1 << 3)
-#define GEN8_RASTER_DW1_BACKFACE_POINT				(0x2 << 3)
+#define GEN8_RASTER_DW1_FILL_MODE_FRONT__MASK			0x00000060
+#define GEN8_RASTER_DW1_FILL_MODE_FRONT__SHIFT			5
+#define GEN8_RASTER_DW1_FILL_MODE_BACK__MASK			0x00000018
+#define GEN8_RASTER_DW1_FILL_MODE_BACK__SHIFT			3
 #define GEN8_RASTER_DW1_AA_LINE_ENABLE				(0x1 << 2)
 #define GEN8_RASTER_DW1_SCISSOR_ENABLE				(0x1 << 1)
 #define GEN8_RASTER_DW1_Z_TEST_ENABLE				(0x1 << 0)
@@ -1164,14 +1198,8 @@
 #define GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT			20
 #define GEN6_WM_DW6_PS_POSOFFSET__MASK				0x000c0000
 #define GEN6_WM_DW6_PS_POSOFFSET__SHIFT				18
-#define GEN6_WM_DW6_PS_POSOFFSET_NONE				(0x0 << 18)
-#define GEN6_WM_DW6_PS_POSOFFSET_CENTROID			(0x2 << 18)
-#define GEN6_WM_DW6_PS_POSOFFSET_SAMPLE				(0x3 << 18)
 #define GEN6_WM_DW6_ZW_INTERP__MASK				0x00030000
 #define GEN6_WM_DW6_ZW_INTERP__SHIFT				16
-#define GEN6_WM_DW6_ZW_INTERP_PIXEL				(0x0 << 16)
-#define GEN6_WM_DW6_ZW_INTERP_CENTROID				(0x2 << 16)
-#define GEN6_WM_DW6_ZW_INTERP_SAMPLE				(0x3 << 16)
 #define GEN6_WM_DW6_BARYCENTRIC_INTERP__MASK			0x0000fc00
 #define GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT			10
 #define GEN6_WM_DW6_POINT_RASTRULE__MASK			0x00000200
@@ -1180,10 +1208,6 @@
 #define GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT			(0x1 << 9)
 #define GEN6_WM_DW6_MSRASTMODE__MASK				0x00000006
 #define GEN6_WM_DW6_MSRASTMODE__SHIFT				1
-#define GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL			(0x0 << 1)
-#define GEN6_WM_DW6_MSRASTMODE_OFF_PATTERN			(0x1 << 1)
-#define GEN6_WM_DW6_MSRASTMODE_ON_PIXEL				(0x2 << 1)
-#define GEN6_WM_DW6_MSRASTMODE_ON_PATTERN			(0x3 << 1)
 #define GEN6_WM_DW6_MSDISPMODE__MASK				0x00000001
 #define GEN6_WM_DW6_MSDISPMODE__SHIFT				0
 #define GEN6_WM_DW6_MSDISPMODE_PERSAMPLE			0x0
@@ -1207,22 +1231,12 @@
 #define GEN7_WM_DW1_PS_KILL_PIXEL				(0x1 << 25)
 #define GEN7_WM_DW1_PSCDEPTH__MASK				0x01800000
 #define GEN7_WM_DW1_PSCDEPTH__SHIFT				23
-#define GEN7_WM_DW1_PSCDEPTH_OFF				(0x0 << 23)
-#define GEN7_WM_DW1_PSCDEPTH_ON					(0x1 << 23)
-#define GEN7_WM_DW1_PSCDEPTH_ON_GE				(0x2 << 23)
-#define GEN7_WM_DW1_PSCDEPTH_ON_LE				(0x3 << 23)
 #define GEN7_WM_DW1_EDSC__MASK					0x00600000
 #define GEN7_WM_DW1_EDSC__SHIFT					21
-#define GEN7_WM_DW1_EDSC_NORMAL					(0x0 << 21)
-#define GEN7_WM_DW1_EDSC_PSEXEC					(0x1 << 21)
-#define GEN7_WM_DW1_EDSC_PREPS					(0x2 << 21)
 #define GEN7_WM_DW1_PS_USE_DEPTH				(0x1 << 20)
 #define GEN7_WM_DW1_PS_USE_W					(0x1 << 19)
 #define GEN7_WM_DW1_ZW_INTERP__MASK				0x00060000
 #define GEN7_WM_DW1_ZW_INTERP__SHIFT				17
-#define GEN7_WM_DW1_ZW_INTERP_PIXEL				(0x0 << 17)
-#define GEN7_WM_DW1_ZW_INTERP_CENTROID				(0x2 << 17)
-#define GEN7_WM_DW1_ZW_INTERP_SAMPLE				(0x3 << 17)
 #define GEN7_WM_DW1_BARYCENTRIC_INTERP__MASK			0x0001f800
 #define GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT			11
 #define GEN7_WM_DW1_PS_USE_COVERAGE_MASK			(0x1 << 10)
@@ -1247,10 +1261,6 @@
 #define GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT			(0x1 << 2)
 #define GEN7_WM_DW1_MSRASTMODE__MASK				0x00000003
 #define GEN7_WM_DW1_MSRASTMODE__SHIFT				0
-#define GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL			0x0
-#define GEN7_WM_DW1_MSRASTMODE_OFF_PATTERN			0x1
-#define GEN7_WM_DW1_MSRASTMODE_ON_PIXEL				0x2
-#define GEN7_WM_DW1_MSRASTMODE_ON_PATTERN			0x3
 
 #define GEN7_WM_DW2_MSDISPMODE__MASK				0x80000000
 #define GEN7_WM_DW2_MSDISPMODE__SHIFT				31
@@ -1265,12 +1275,12 @@
 #define GEN8_3DSTATE_WM_DEPTH_STENCIL__SIZE			4
 
 
-#define GEN8_ZS_DW1_STENCIL0_FAIL_OP__MASK			0xe0000000
-#define GEN8_ZS_DW1_STENCIL0_FAIL_OP__SHIFT			29
-#define GEN8_ZS_DW1_STENCIL0_ZFAIL_OP__MASK			0x1c000000
-#define GEN8_ZS_DW1_STENCIL0_ZFAIL_OP__SHIFT			26
-#define GEN8_ZS_DW1_STENCIL0_ZPASS_OP__MASK			0x03800000
-#define GEN8_ZS_DW1_STENCIL0_ZPASS_OP__SHIFT			23
+#define GEN8_ZS_DW1_STENCIL_FAIL_OP__MASK			0xe0000000
+#define GEN8_ZS_DW1_STENCIL_FAIL_OP__SHIFT			29
+#define GEN8_ZS_DW1_STENCIL_ZFAIL_OP__MASK			0x1c000000
+#define GEN8_ZS_DW1_STENCIL_ZFAIL_OP__SHIFT			26
+#define GEN8_ZS_DW1_STENCIL_ZPASS_OP__MASK			0x03800000
+#define GEN8_ZS_DW1_STENCIL_ZPASS_OP__SHIFT			23
 #define GEN8_ZS_DW1_STENCIL1_FUNC__MASK				0x00700000
 #define GEN8_ZS_DW1_STENCIL1_FUNC__SHIFT			20
 #define GEN8_ZS_DW1_STENCIL1_FAIL_OP__MASK			0x000e0000
@@ -1279,8 +1289,8 @@
 #define GEN8_ZS_DW1_STENCIL1_ZFAIL_OP__SHIFT			14
 #define GEN8_ZS_DW1_STENCIL1_ZPASS_OP__MASK			0x00003800
 #define GEN8_ZS_DW1_STENCIL1_ZPASS_OP__SHIFT			11
-#define GEN8_ZS_DW1_STENCIL0_FUNC__MASK				0x00000700
-#define GEN8_ZS_DW1_STENCIL0_FUNC__SHIFT			8
+#define GEN8_ZS_DW1_STENCIL_FUNC__MASK				0x00000700
+#define GEN8_ZS_DW1_STENCIL_FUNC__SHIFT				8
 #define GEN8_ZS_DW1_DEPTH_FUNC__MASK				0x000000e0
 #define GEN8_ZS_DW1_DEPTH_FUNC__SHIFT				5
 #define GEN8_ZS_DW1_STENCIL1_ENABLE				(0x1 << 4)
@@ -1289,17 +1299,17 @@
 #define GEN8_ZS_DW1_DEPTH_TEST_ENABLE				(0x1 << 1)
 #define GEN8_ZS_DW1_DEPTH_WRITE_ENABLE				(0x1 << 0)
 
-#define GEN8_ZS_DW2_STENCIL0_VALUEMASK__MASK			0xff000000
-#define GEN8_ZS_DW2_STENCIL0_VALUEMASK__SHIFT			24
-#define GEN8_ZS_DW2_STENCIL0_WRITEMASK__MASK			0x00ff0000
-#define GEN8_ZS_DW2_STENCIL0_WRITEMASK__SHIFT			16
-#define GEN8_ZS_DW2_STENCIL1_VALUEMASK__MASK			0x0000ff00
-#define GEN8_ZS_DW2_STENCIL1_VALUEMASK__SHIFT			8
-#define GEN8_ZS_DW2_STENCIL1_WRITEMASK__MASK			0x000000ff
-#define GEN8_ZS_DW2_STENCIL1_WRITEMASK__SHIFT			0
+#define GEN8_ZS_DW2_STENCIL_TEST_MASK__MASK			0xff000000
+#define GEN8_ZS_DW2_STENCIL_TEST_MASK__SHIFT			24
+#define GEN8_ZS_DW2_STENCIL_WRITE_MASK__MASK			0x00ff0000
+#define GEN8_ZS_DW2_STENCIL_WRITE_MASK__SHIFT			16
+#define GEN8_ZS_DW2_STENCIL1_TEST_MASK__MASK			0x0000ff00
+#define GEN8_ZS_DW2_STENCIL1_TEST_MASK__SHIFT			8
+#define GEN8_ZS_DW2_STENCIL1_WRITE_MASK__MASK			0x000000ff
+#define GEN8_ZS_DW2_STENCIL1_WRITE_MASK__SHIFT			0
 
-#define GEN9_ZS_DW3_STENCIL0_REF__MASK				0x0000ff00
-#define GEN9_ZS_DW3_STENCIL0_REF__SHIFT				8
+#define GEN9_ZS_DW3_STENCIL_REF__MASK				0x0000ff00
+#define GEN9_ZS_DW3_STENCIL_REF__SHIFT				8
 #define GEN9_ZS_DW3_STENCIL1_REF__MASK				0x000000ff
 #define GEN9_ZS_DW3_STENCIL1_REF__SHIFT				0
 
@@ -1314,13 +1324,8 @@
 #define GEN8_WM_HZ_DW1_FULL_SURFACE_DEPTH_CLEAR			(0x1 << 25)
 #define GEN8_WM_HZ_DW1_STENCIL_CLEAR_VALUE__MASK		0x00ff0000
 #define GEN8_WM_HZ_DW1_STENCIL_CLEAR_VALUE__SHIFT		16
-#define GEN8_WM_HZ_DW1_NUMSAMPLES__MASK				0x0000e000
-#define GEN8_WM_HZ_DW1_NUMSAMPLES__SHIFT			13
-#define GEN8_WM_HZ_DW1_NUMSAMPLES_1				(0x0 << 13)
-#define GEN8_WM_HZ_DW1_NUMSAMPLES_2				(0x1 << 13)
-#define GEN8_WM_HZ_DW1_NUMSAMPLES_4				(0x2 << 13)
-#define GEN8_WM_HZ_DW1_NUMSAMPLES_8				(0x3 << 13)
-#define GEN8_WM_HZ_DW1_NUMSAMPLES_16				(0x4 << 13)
+#define GEN8_WM_HZ_DW1_NUM_SAMPLES__MASK			0x0000e000
+#define GEN8_WM_HZ_DW1_NUM_SAMPLES__SHIFT			13
 
 #define GEN8_WM_HZ_DW2_RECT_MIN_Y__MASK				0xffff0000
 #define GEN8_WM_HZ_DW2_RECT_MIN_Y__SHIFT			16
@@ -1359,9 +1364,6 @@
 #define GEN75_PS_DW4_ACCESS_UAV					(0x1 << 5)
 #define GEN7_PS_DW4_POSOFFSET__MASK				0x00000018
 #define GEN7_PS_DW4_POSOFFSET__SHIFT				3
-#define GEN7_PS_DW4_POSOFFSET_NONE				(0x0 << 3)
-#define GEN7_PS_DW4_POSOFFSET_CENTROID				(0x2 << 3)
-#define GEN7_PS_DW4_POSOFFSET_SAMPLE				(0x3 << 3)
 #define GEN7_PS_DW4_DISPATCH_MODE__MASK				0x00000007
 #define GEN7_PS_DW4_DISPATCH_MODE__SHIFT			0
 
@@ -1397,9 +1399,6 @@
 #define GEN8_PS_DW6_RT_RESOLVE					(0x1 << 6)
 #define GEN8_PS_DW6_POSOFFSET__MASK				0x00000018
 #define GEN8_PS_DW6_POSOFFSET__SHIFT				3
-#define GEN8_PS_DW6_POSOFFSET_NONE				(0x0 << 3)
-#define GEN8_PS_DW6_POSOFFSET_CENTROID				(0x2 << 3)
-#define GEN8_PS_DW6_POSOFFSET_SAMPLE				(0x3 << 3)
 #define GEN8_PS_DW6_DISPATCH_MODE__MASK				0x00000007
 #define GEN8_PS_DW6_DISPATCH_MODE__SHIFT			0
 
@@ -1423,16 +1422,12 @@
 #define GEN8_3DSTATE_PS_EXTRA__SIZE				2
 
 
-#define GEN8_PSX_DW1_DISPATCH_ENABLE				(0x1 << 31)
+#define GEN8_PSX_DW1_VALID					(0x1 << 31)
 #define GEN8_PSX_DW1_UAV_ONLY					(0x1 << 30)
 #define GEN8_PSX_DW1_COMPUTE_OMASK				(0x1 << 29)
 #define GEN8_PSX_DW1_KILL_PIXEL					(0x1 << 28)
 #define GEN8_PSX_DW1_PSCDEPTH__MASK				0x0c000000
 #define GEN8_PSX_DW1_PSCDEPTH__SHIFT				26
-#define GEN8_PSX_DW1_PSCDEPTH_OFF				(0x0 << 26)
-#define GEN8_PSX_DW1_PSCDEPTH_ON				(0x1 << 26)
-#define GEN8_PSX_DW1_PSCDEPTH_ON_GE				(0x2 << 26)
-#define GEN8_PSX_DW1_PSCDEPTH_ON_LE				(0x3 << 26)
 #define GEN8_PSX_DW1_FORCE_COMPUTE_DEPTH			(0x1 << 25)
 #define GEN8_PSX_DW1_USE_DEPTH					(0x1 << 24)
 #define GEN8_PSX_DW1_USE_W					(0x1 << 23)
@@ -1696,17 +1691,10 @@
 
 
 #define GEN75_MULTISAMPLE_DW1_DX9_MULTISAMPLE_ENABLE		(0x1 << 5)
-#define GEN6_MULTISAMPLE_DW1_PIXLOC__MASK			0x00000010
-#define GEN6_MULTISAMPLE_DW1_PIXLOC__SHIFT			4
-#define GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER			(0x0 << 4)
-#define GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER			(0x1 << 4)
-#define GEN6_MULTISAMPLE_DW1_NUMSAMPLES__MASK			0x0000000e
-#define GEN6_MULTISAMPLE_DW1_NUMSAMPLES__SHIFT			1
-#define GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1			(0x0 << 1)
-#define GEN8_MULTISAMPLE_DW1_NUMSAMPLES_2			(0x1 << 1)
-#define GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4			(0x2 << 1)
-#define GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8			(0x3 << 1)
-#define GEN8_MULTISAMPLE_DW1_NUMSAMPLES_16			(0x4 << 1)
+#define GEN6_MULTISAMPLE_DW1_PIXEL_LOCATION__MASK		0x00000010
+#define GEN6_MULTISAMPLE_DW1_PIXEL_LOCATION__SHIFT		4
+#define GEN6_MULTISAMPLE_DW1_NUM_SAMPLES__MASK			0x0000000e
+#define GEN6_MULTISAMPLE_DW1_NUM_SAMPLES__SHIFT			1
 
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/genhw/gen_render_dynamic.xml.h	2015-09-16 14:36:09.000000000 +0000
@@ -84,7 +84,7 @@
     GEN6_BLENDFUNCTION_MAX				      = 0x4,
 };
 
-enum gen_logicop_function {
+enum gen_logic_op {
     GEN6_LOGICOP_CLEAR					      = 0x0,
     GEN6_LOGICOP_NOR					      = 0x1,
     GEN6_LOGICOP_AND_INVERTED				      = 0x2,
@@ -103,20 +103,31 @@
     GEN6_LOGICOP_SET					      = 0xf,
 };
 
-enum gen_sampler_mip_filter {
+enum gen_mip_filter {
     GEN6_MIPFILTER_NONE					      = 0x0,
     GEN6_MIPFILTER_NEAREST				      = 0x1,
     GEN6_MIPFILTER_LINEAR				      = 0x3,
 };
 
-enum gen_sampler_map_filter {
+enum gen_map_filter {
     GEN6_MAPFILTER_NEAREST				      = 0x0,
     GEN6_MAPFILTER_LINEAR				      = 0x1,
     GEN6_MAPFILTER_ANISOTROPIC				      = 0x2,
     GEN6_MAPFILTER_MONO					      = 0x6,
 };
 
-enum gen_sampler_aniso_ratio {
+enum gen_prefilter_op {
+    GEN6_PREFILTEROP_ALWAYS				      = 0x0,
+    GEN6_PREFILTEROP_NEVER				      = 0x1,
+    GEN6_PREFILTEROP_LESS				      = 0x2,
+    GEN6_PREFILTEROP_EQUAL				      = 0x3,
+    GEN6_PREFILTEROP_LEQUAL				      = 0x4,
+    GEN6_PREFILTEROP_GREATER				      = 0x5,
+    GEN6_PREFILTEROP_NOTEQUAL				      = 0x6,
+    GEN6_PREFILTEROP_GEQUAL				      = 0x7,
+};
+
+enum gen_aniso_ratio {
     GEN6_ANISORATIO_2					      = 0x0,
     GEN6_ANISORATIO_4					      = 0x1,
     GEN6_ANISORATIO_6					      = 0x2,
@@ -127,7 +138,7 @@
     GEN6_ANISORATIO_16					      = 0x7,
 };
 
-enum gen_sampler_texcoord_mode {
+enum gen_texcoord_mode {
     GEN6_TEXCOORDMODE_WRAP				      = 0x0,
     GEN6_TEXCOORDMODE_MIRROR				      = 0x1,
     GEN6_TEXCOORDMODE_CLAMP				      = 0x2,
@@ -137,15 +148,15 @@
     GEN8_TEXCOORDMODE_HALF_BORDER			      = 0x6,
 };
 
-enum gen_sampler_key_filter {
+enum gen_key_filter {
     GEN6_KEYFILTER_KILL_ON_ANY_MATCH			      = 0x0,
     GEN6_KEYFILTER_REPLACE_BLACK			      = 0x1,
 };
 
 #define GEN6_COLOR_CALC_STATE__SIZE				6
 
-#define GEN6_CC_DW0_STENCIL0_REF__MASK				0xff000000
-#define GEN6_CC_DW0_STENCIL0_REF__SHIFT				24
+#define GEN6_CC_DW0_STENCIL_REF__MASK				0xff000000
+#define GEN6_CC_DW0_STENCIL_REF__SHIFT				24
 #define GEN6_CC_DW0_STENCIL1_REF__MASK				0x00ff0000
 #define GEN6_CC_DW0_STENCIL1_REF__SHIFT				16
 #define GEN6_CC_DW0_ROUND_DISABLE_DISABLE			(0x1 << 15)
@@ -162,14 +173,14 @@
 #define GEN6_DEPTH_STENCIL_STATE__SIZE				3
 
 #define GEN6_ZS_DW0_STENCIL_TEST_ENABLE				(0x1 << 31)
-#define GEN6_ZS_DW0_STENCIL0_FUNC__MASK				0x70000000
-#define GEN6_ZS_DW0_STENCIL0_FUNC__SHIFT			28
-#define GEN6_ZS_DW0_STENCIL0_FAIL_OP__MASK			0x0e000000
-#define GEN6_ZS_DW0_STENCIL0_FAIL_OP__SHIFT			25
-#define GEN6_ZS_DW0_STENCIL0_ZFAIL_OP__MASK			0x01c00000
-#define GEN6_ZS_DW0_STENCIL0_ZFAIL_OP__SHIFT			22
-#define GEN6_ZS_DW0_STENCIL0_ZPASS_OP__MASK			0x00380000
-#define GEN6_ZS_DW0_STENCIL0_ZPASS_OP__SHIFT			19
+#define GEN6_ZS_DW0_STENCIL_FUNC__MASK				0x70000000
+#define GEN6_ZS_DW0_STENCIL_FUNC__SHIFT				28
+#define GEN6_ZS_DW0_STENCIL_FAIL_OP__MASK			0x0e000000
+#define GEN6_ZS_DW0_STENCIL_FAIL_OP__SHIFT			25
+#define GEN6_ZS_DW0_STENCIL_ZFAIL_OP__MASK			0x01c00000
+#define GEN6_ZS_DW0_STENCIL_ZFAIL_OP__SHIFT			22
+#define GEN6_ZS_DW0_STENCIL_ZPASS_OP__MASK			0x00380000
+#define GEN6_ZS_DW0_STENCIL_ZPASS_OP__SHIFT			19
 #define GEN6_ZS_DW0_STENCIL_WRITE_ENABLE			(0x1 << 18)
 #define GEN6_ZS_DW0_STENCIL1_ENABLE				(0x1 << 15)
 #define GEN6_ZS_DW0_STENCIL1_FUNC__MASK				0x00007000
@@ -181,14 +192,14 @@
 #define GEN6_ZS_DW0_STENCIL1_ZPASS_OP__MASK			0x00000038
 #define GEN6_ZS_DW0_STENCIL1_ZPASS_OP__SHIFT			3
 
-#define GEN6_ZS_DW1_STENCIL0_VALUEMASK__MASK			0xff000000
-#define GEN6_ZS_DW1_STENCIL0_VALUEMASK__SHIFT			24
-#define GEN6_ZS_DW1_STENCIL0_WRITEMASK__MASK			0x00ff0000
-#define GEN6_ZS_DW1_STENCIL0_WRITEMASK__SHIFT			16
-#define GEN6_ZS_DW1_STENCIL1_VALUEMASK__MASK			0x0000ff00
-#define GEN6_ZS_DW1_STENCIL1_VALUEMASK__SHIFT			8
-#define GEN6_ZS_DW1_STENCIL1_WRITEMASK__MASK			0x000000ff
-#define GEN6_ZS_DW1_STENCIL1_WRITEMASK__SHIFT			0
+#define GEN6_ZS_DW1_STENCIL_TEST_MASK__MASK			0xff000000
+#define GEN6_ZS_DW1_STENCIL_TEST_MASK__SHIFT			24
+#define GEN6_ZS_DW1_STENCIL_WRITE_MASK__MASK			0x00ff0000
+#define GEN6_ZS_DW1_STENCIL_WRITE_MASK__SHIFT			16
+#define GEN6_ZS_DW1_STENCIL1_TEST_MASK__MASK			0x0000ff00
+#define GEN6_ZS_DW1_STENCIL1_TEST_MASK__SHIFT			8
+#define GEN6_ZS_DW1_STENCIL1_WRITE_MASK__MASK			0x000000ff
+#define GEN6_ZS_DW1_STENCIL1_WRITE_MASK__SHIFT			0
 
 #define GEN6_ZS_DW2_DEPTH_TEST_ENABLE				(0x1 << 31)
 #define GEN6_ZS_DW2_DEPTH_FUNC__MASK				0x38000000
@@ -216,10 +227,12 @@
 #define GEN6_RT_DW1_ALPHA_TO_COVERAGE				(0x1 << 31)
 #define GEN6_RT_DW1_ALPHA_TO_ONE				(0x1 << 30)
 #define GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER			(0x1 << 29)
-#define GEN6_RT_DW1_WRITE_DISABLE_A				(0x1 << 27)
-#define GEN6_RT_DW1_WRITE_DISABLE_R				(0x1 << 26)
-#define GEN6_RT_DW1_WRITE_DISABLE_G				(0x1 << 25)
-#define GEN6_RT_DW1_WRITE_DISABLE_B				(0x1 << 24)
+#define GEN6_RT_DW1_WRITE_DISABLES__MASK			0x0f000000
+#define GEN6_RT_DW1_WRITE_DISABLES__SHIFT			24
+#define GEN6_RT_DW1_WRITE_DISABLES_A				(0x1 << 27)
+#define GEN6_RT_DW1_WRITE_DISABLES_R				(0x1 << 26)
+#define GEN6_RT_DW1_WRITE_DISABLES_G				(0x1 << 25)
+#define GEN6_RT_DW1_WRITE_DISABLES_B				(0x1 << 24)
 #define GEN6_RT_DW1_LOGICOP_ENABLE				(0x1 << 22)
 #define GEN6_RT_DW1_LOGICOP_FUNC__MASK				0x003c0000
 #define GEN6_RT_DW1_LOGICOP_FUNC__SHIFT				18
@@ -267,10 +280,12 @@
 #define GEN8_RT_DW0_DST_ALPHA_FACTOR__SHIFT			8
 #define GEN8_RT_DW0_ALPHA_FUNC__MASK				0x000000e0
 #define GEN8_RT_DW0_ALPHA_FUNC__SHIFT				5
-#define GEN8_RT_DW0_WRITE_DISABLE_A				(0x1 << 3)
-#define GEN8_RT_DW0_WRITE_DISABLE_R				(0x1 << 2)
-#define GEN8_RT_DW0_WRITE_DISABLE_G				(0x1 << 1)
-#define GEN8_RT_DW0_WRITE_DISABLE_B				(0x1 << 0)
+#define GEN8_RT_DW0_WRITE_DISABLES__MASK			0x0000000f
+#define GEN8_RT_DW0_WRITE_DISABLES__SHIFT			0
+#define GEN8_RT_DW0_WRITE_DISABLES_A				(0x1 << 3)
+#define GEN8_RT_DW0_WRITE_DISABLES_R				(0x1 << 2)
+#define GEN8_RT_DW0_WRITE_DISABLES_G				(0x1 << 1)
+#define GEN8_RT_DW0_WRITE_DISABLES_B				(0x1 << 0)
 
 #define GEN8_RT_DW1_LOGICOP_ENABLE				(0x1 << 31)
 #define GEN8_RT_DW1_LOGICOP_FUNC__MASK				0x78000000
@@ -419,6 +434,7 @@
 #define GEN8_SAMPLER_DW0_LOD_PRECLAMP_ENABLE__SHIFT		27
 #define GEN6_SAMPLER_DW0_BASE_LOD__MASK				0x07c00000
 #define GEN6_SAMPLER_DW0_BASE_LOD__SHIFT			22
+#define GEN6_SAMPLER_DW0_BASE_LOD__RADIX			1
 #define GEN6_SAMPLER_DW0_MIP_FILTER__MASK			0x00300000
 #define GEN6_SAMPLER_DW0_MIP_FILTER__SHIFT			20
 #define GEN6_SAMPLER_DW0_MAG_FILTER__MASK			0x000e0000
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/genhw/gen_render_surface.xml.h	2015-09-16 14:36:09.000000000 +0000
@@ -299,7 +299,10 @@
 #define GEN6_SURFACE_DW0_MIPLAYOUT__SHIFT			10
 #define GEN6_SURFACE_DW0_MIPLAYOUT_BELOW			(0x0 << 10)
 #define GEN6_SURFACE_DW0_MIPLAYOUT_RIGHT			(0x1 << 10)
-#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE			(0x1 << 9)
+#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE__MASK		0x00000200
+#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE__SHIFT		9
+#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE_REPLICATE		(0x0 << 9)
+#define GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE_AVERAGE		(0x1 << 9)
 #define GEN6_SURFACE_DW0_RENDER_CACHE_RW			(0x1 << 8)
 #define GEN6_SURFACE_DW0_MEDIA_BOUNDARY_PIXEL_MODE__MASK	0x000000c0
 #define GEN6_SURFACE_DW0_MEDIA_BOUNDARY_PIXEL_MODE__SHIFT	6
@@ -485,6 +488,8 @@
 #define GEN7_SURFACE_DW7_CC_B__SHIFT				29
 #define GEN7_SURFACE_DW7_CC_A__MASK				0x10000000
 #define GEN7_SURFACE_DW7_CC_A__SHIFT				28
+#define GEN75_SURFACE_DW7_SCS__MASK				0x0fff0000
+#define GEN75_SURFACE_DW7_SCS__SHIFT				16
 #define GEN75_SURFACE_DW7_SCS_R__MASK				0x0e000000
 #define GEN75_SURFACE_DW7_SCS_R__SHIFT				25
 #define GEN75_SURFACE_DW7_SCS_G__MASK				0x01c00000
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_blitter_blt.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_blitter_blt.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_blitter_blt.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_blitter_blt.c	2015-09-16 14:36:09.000000000 +0000
@@ -127,7 +127,7 @@
 
 static bool
 buf_clear_region(struct ilo_blitter *blitter,
-                 struct ilo_buffer *buf, unsigned offset,
+                 struct ilo_buffer_resource *buf, unsigned offset,
                  uint32_t val, unsigned size,
                  enum gen6_blt_mask value_mask,
                  enum gen6_blt_mask write_mask)
@@ -140,8 +140,8 @@
    if (offset % cpp || size % cpp)
       return false;
 
-   dst.bo = buf->bo;
-   dst.offset = offset;
+   dst.bo = buf->vma.bo;
+   dst.offset = buf->vma.bo_offset + offset;
 
    ilo_blitter_blt_begin(blitter, GEN6_COLOR_BLT__SIZE *
          (1 + size / 32764 / gen6_blt_max_scanlines),
@@ -179,25 +179,26 @@
 
 static bool
 buf_copy_region(struct ilo_blitter *blitter,
-                struct ilo_buffer *dst_buf, unsigned dst_offset,
-                struct ilo_buffer *src_buf, unsigned src_offset,
+                struct ilo_buffer_resource *dst_buf, unsigned dst_offset,
+                struct ilo_buffer_resource *src_buf, unsigned src_offset,
                 unsigned size)
 {
    const uint8_t rop = 0xcc; /* SRCCOPY */
    struct ilo_builder *builder = &blitter->ilo->cp->builder;
    struct gen6_blt_bo dst, src;
 
-   dst.bo = dst_buf->bo;
-   dst.offset = dst_offset;
+   dst.bo = dst_buf->vma.bo;
+   dst.offset = dst_buf->vma.bo_offset + dst_offset;
    dst.pitch = 0;
 
-   src.bo = src_buf->bo;
-   src.offset = src_offset;
+   src.bo = src_buf->vma.bo;
+   src.offset = src_buf->vma.bo_offset + src_offset;
    src.pitch = 0;
 
    ilo_blitter_blt_begin(blitter, GEN6_SRC_COPY_BLT__SIZE *
          (1 + size / 32764 / gen6_blt_max_scanlines),
-         dst_buf->bo, GEN6_TILING_NONE, src_buf->bo, GEN6_TILING_NONE);
+         dst_buf->vma.bo, GEN6_TILING_NONE,
+         src_buf->vma.bo, GEN6_TILING_NONE);
 
    while (size) {
       unsigned width, height;
@@ -258,14 +259,14 @@
    if (dst_box->width * cpp > gen6_blt_max_bytes_per_scanline)
       return false;
 
-   dst.bo = dst_tex->image.bo;
-   dst.offset = 0;
+   dst.bo = dst_tex->vma.bo;
+   dst.offset = dst_tex->vma.bo_offset;
    dst.pitch = dst_tex->image.bo_stride;
    dst.tiling = dst_tex->image.tiling;
 
    swctrl = ilo_blitter_blt_begin(blitter,
          GEN6_XY_COLOR_BLT__SIZE * dst_box->depth,
-         dst_tex->image.bo, dst_tex->image.tiling, NULL, GEN6_TILING_NONE);
+         dst_tex->vma.bo, dst_tex->image.tiling, NULL, GEN6_TILING_NONE);
 
    for (slice = 0; slice < dst_box->depth; slice++) {
       unsigned x, y;
@@ -299,7 +300,7 @@
                 const struct pipe_box *src_box)
 {
    const struct util_format_description *desc =
-      util_format_description(dst_tex->image.format);
+      util_format_description(dst_tex->image_format);
    const unsigned max_extent = 32767; /* INT16_MAX */
    const uint8_t rop = 0xcc; /* SRCCOPY */
    struct ilo_builder *builder = &blitter->ilo->cp->builder;
@@ -347,13 +348,13 @@
       break;
    }
 
-   dst.bo = dst_tex->image.bo;
-   dst.offset = 0;
+   dst.bo = dst_tex->vma.bo;
+   dst.offset = dst_tex->vma.bo_offset;
    dst.pitch = dst_tex->image.bo_stride;
    dst.tiling = dst_tex->image.tiling;
 
-   src.bo = src_tex->image.bo;
-   src.offset = 0;
+   src.bo = src_tex->vma.bo;
+   src.offset = src_tex->vma.bo_offset;
    src.pitch = src_tex->image.bo_stride;
    src.tiling = src_tex->image.tiling;
 
@@ -423,8 +424,8 @@
              src_box->height == 1 &&
              src_box->depth == 1);
 
-      success = buf_copy_region(blitter,
-            ilo_buffer(dst), dst_offset, ilo_buffer(src), src_offset, size);
+      success = buf_copy_region(blitter, ilo_buffer_resource(dst), dst_offset,
+            ilo_buffer_resource(src), src_offset, size);
    }
    else if (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER) {
       success = tex_copy_region(blitter,
@@ -488,7 +489,7 @@
       if (offset + size > end)
          size = end - offset;
 
-      success = buf_clear_region(blitter, ilo_buffer(rt->texture),
+      success = buf_clear_region(blitter, ilo_buffer_resource(rt->texture),
             offset, packed.ui[0], size, mask, mask);
    }
    else {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_blitter.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_blitter.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_blitter.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_blitter.h	2015-09-16 14:36:09.000000000 +0000
@@ -39,12 +39,6 @@
    ILO_BLITTER_USE_FB_STENCIL    = 1 << 4,
 };
 
-enum ilo_blitter_rectlist_op {
-   ILO_BLITTER_RECTLIST_CLEAR_ZS,
-   ILO_BLITTER_RECTLIST_RESOLVE_Z,
-   ILO_BLITTER_RECTLIST_RESOLVE_HIZ,
-};
-
 struct blitter_context;
 struct pipe_resource;
 struct pipe_surface;
@@ -57,30 +51,42 @@
    /*
     * A minimal context with the goal to send RECTLISTs down the pipeline.
     */
-   enum ilo_blitter_rectlist_op op;
+   enum ilo_state_raster_earlyz_op earlyz_op;
+   bool earlyz_stencil_clear;
    uint32_t uses;
 
    bool initialized;
 
    float vertices[3][2];
-   struct ilo_ve_state ve;
-   struct pipe_draw_info draw;
+   struct gen6_3dprimitive_info draw_info;
 
-   struct ilo_viewport_cso viewport;
-   struct ilo_dsa_state dsa;
+   uint32_t vf_data[4];
+   struct ilo_state_vf vf;
 
-   struct {
-      struct pipe_stencil_ref stencil_ref;
-      ubyte alpha_ref;
-      struct pipe_blend_color blend_color;
-   } cc;
+   struct ilo_state_vs vs;
+   struct ilo_state_hs hs;
+   struct ilo_state_ds ds;
+   struct ilo_state_gs gs;
+
+   struct ilo_state_sol sol;
+
+   struct ilo_state_viewport vp;
+   uint32_t vp_data[20];
+
+   struct ilo_state_sbe sbe;
+   struct ilo_state_ps ps;
+   struct ilo_state_cc cc;
 
    uint32_t depth_clear_value;
 
+   struct ilo_state_urb urb;
+
    struct {
       struct ilo_surface_cso dst;
       unsigned width, height;
       unsigned num_samples;
+
+      struct ilo_state_raster rs;
    } fb;
 };
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_blitter_pipe.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_blitter_pipe.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_blitter_pipe.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_blitter_pipe.c	2015-09-16 14:36:09.000000000 +0000
@@ -63,7 +63,7 @@
    util_blitter_save_viewport(b, &vec->viewport.viewport0);
 
    if (scissor_enable)
-      util_blitter_save_scissor(b, &vec->scissor.scissor0);
+      util_blitter_save_scissor(b, &vec->viewport.scissor0);
 
    switch (op) {
    case ILO_BLITTER_PIPE_BLIT:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_blitter_rectlist.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_blitter_rectlist.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_blitter_rectlist.c	2015-09-16 14:36:09.000000000 +0000
@@ -25,7 +25,6 @@
  *    Chia-I Wu <olv@lunarg.com>
  */
 
-#include "core/ilo_state_3d.h"
 #include "util/u_draw.h"
 #include "util/u_pack_color.h"
 
@@ -40,45 +39,48 @@
 static bool
 ilo_blitter_set_invariants(struct ilo_blitter *blitter)
 {
-   struct pipe_vertex_element velem;
-   struct pipe_viewport_state vp;
+   struct ilo_state_vf_element_info elem;
 
    if (blitter->initialized)
       return true;
 
-   /* only vertex X and Y */
-   memset(&velem, 0, sizeof(velem));
-   velem.src_format = PIPE_FORMAT_R32G32_FLOAT;
-   ilo_gpe_init_ve(blitter->ilo->dev, 1, &velem, &blitter->ve);
-
-   /* generate VUE header */
-   ilo_gpe_init_ve_nosrc(blitter->ilo->dev,
-         GEN6_VFCOMP_STORE_0, /* Reserved */
-         GEN6_VFCOMP_STORE_0, /* Render Target Array Index */
-         GEN6_VFCOMP_STORE_0, /* Viewport Index */
-         GEN6_VFCOMP_STORE_0, /* Point Width */
-         &blitter->ve.nosrc_cso);
-   blitter->ve.prepend_nosrc_cso = true;
-
    /* a rectangle has 3 vertices in a RECTLIST */
-   util_draw_init_info(&blitter->draw);
-   blitter->draw.mode = ILO_PRIM_RECTANGLES;
-   blitter->draw.count = 3;
+   blitter->draw_info.topology = GEN6_3DPRIM_RECTLIST;
+   blitter->draw_info.vertex_count = 3;
+   blitter->draw_info.instance_count = 1;
+
+   memset(&elem, 0, sizeof(elem));
+   /* only vertex X and Y */
+   elem.format = GEN6_FORMAT_R32G32_FLOAT;
+   elem.format_size = 8;
+   elem.component_count = 2;
+
+   ilo_state_vf_init_for_rectlist(&blitter->vf, blitter->ilo->dev,
+         blitter->vf_data, sizeof(blitter->vf_data), &elem, 1);
+
+   ilo_state_vs_init_disabled(&blitter->vs, blitter->ilo->dev);
+   ilo_state_hs_init_disabled(&blitter->hs, blitter->ilo->dev);
+   ilo_state_ds_init_disabled(&blitter->ds, blitter->ilo->dev);
+   ilo_state_gs_init_disabled(&blitter->gs, blitter->ilo->dev);
+   ilo_state_sol_init_disabled(&blitter->sol, blitter->ilo->dev, false);
 
    /**
     * From the Haswell PRM, volume 7, page 615:
     *
     *     "The clear value must be between the min and max depth values
-    *     (inclusive) defined in the CC_VIEWPORT."
+    *      (inclusive) defined in the CC_VIEWPORT."
     *
     * Even though clipping and viewport transformation will be disabled, we
     * still need to set up the viewport states.
     */
-   memset(&vp, 0, sizeof(vp));
-   vp.scale[0] = 1.0f;
-   vp.scale[1] = 1.0f;
-   vp.scale[2] = 1.0f;
-   ilo_gpe_set_viewport_cso(blitter->ilo->dev, &vp, &blitter->viewport);
+   ilo_state_viewport_init_for_rectlist(&blitter->vp, blitter->ilo->dev,
+         blitter->vp_data, sizeof(blitter->vp_data));
+
+   ilo_state_sbe_init_for_rectlist(&blitter->sbe, blitter->ilo->dev, 0, 0);
+   ilo_state_ps_init_disabled(&blitter->ps, blitter->ilo->dev);
+
+   ilo_state_urb_init_for_rectlist(&blitter->urb, blitter->ilo->dev,
+         ilo_state_vf_get_attr_count(&blitter->vf));
 
    blitter->initialized = true;
 
@@ -86,10 +88,12 @@
 }
 
 static void
-ilo_blitter_set_op(struct ilo_blitter *blitter,
-                   enum ilo_blitter_rectlist_op op)
+ilo_blitter_set_earlyz_op(struct ilo_blitter *blitter,
+                          enum ilo_state_raster_earlyz_op op,
+                          bool earlyz_stencil_clear)
 {
-   blitter->op = op;
+   blitter->earlyz_op = op;
+   blitter->earlyz_stencil_clear = earlyz_stencil_clear;
 }
 
 /**
@@ -117,18 +121,27 @@
 }
 
 static void
-ilo_blitter_set_clear_values(struct ilo_blitter *blitter,
-                             uint32_t depth, ubyte stencil)
+ilo_blitter_set_depth_clear_value(struct ilo_blitter *blitter,
+                                  uint32_t depth)
 {
    blitter->depth_clear_value = depth;
-   blitter->cc.stencil_ref.ref_value[0] = stencil;
 }
 
 static void
-ilo_blitter_set_dsa(struct ilo_blitter *blitter,
-                    const struct pipe_depth_stencil_alpha_state *state)
+ilo_blitter_set_cc(struct ilo_blitter *blitter,
+                   const struct ilo_state_cc_info *info)
+{
+   memset(&blitter->cc, 0, sizeof(blitter->cc));
+   ilo_state_cc_init(&blitter->cc, blitter->ilo->dev, info);
+}
+
+static void
+ilo_blitter_set_fb_rs(struct ilo_blitter *blitter)
 {
-   ilo_gpe_init_dsa(blitter->ilo->dev, state, &blitter->dsa);
+   memset(&blitter->fb.rs, 0, sizeof(blitter->fb.rs));
+   ilo_state_raster_init_for_rectlist(&blitter->fb.rs, blitter->ilo->dev,
+         blitter->fb.num_samples, blitter->earlyz_op,
+         blitter->earlyz_stencil_clear);
 }
 
 static void
@@ -146,6 +159,8 @@
       blitter->fb.num_samples = 1;
 
    memcpy(&blitter->fb.dst, cso, sizeof(*cso));
+
+   ilo_blitter_set_fb_rs(blitter);
 }
 
 static void
@@ -191,9 +206,9 @@
 {
    unsigned align_w, align_h;
 
-   switch (blitter->op) {
-   case ILO_BLITTER_RECTLIST_CLEAR_ZS:
-   case ILO_BLITTER_RECTLIST_RESOLVE_Z:
+   switch (blitter->earlyz_op) {
+   case ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR:
+   case ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE:
       break;
    default:
       return;
@@ -303,7 +318,7 @@
     * The truth is when HiZ is enabled, separate stencil is also enabled on
     * all GENs.  The depth buffer format cannot be combined depth/stencil.
     */
-   switch (tex->image.format) {
+   switch (tex->image_format) {
    case PIPE_FORMAT_Z16_UNORM:
       if (ilo_dev_gen(blitter->ilo->dev) == ILO_GEN(6) &&
           tex->base.width0 % 16)
@@ -328,7 +343,7 @@
                               double depth, unsigned stencil)
 {
    struct ilo_texture *tex = ilo_texture(zs->texture);
-   struct pipe_depth_stencil_alpha_state dsa_state;
+   struct ilo_state_cc_info info;
    uint32_t uses, clear_value;
 
    if (!ilo_image_can_enable_aux(&tex->image, zs->u.tex.level))
@@ -340,7 +355,7 @@
    if (ilo_dev_gen(blitter->ilo->dev) >= ILO_GEN(8))
       clear_value = fui(depth);
    else
-      clear_value = util_pack_z(tex->image.format, depth);
+      clear_value = util_pack_z(tex->image_format, depth);
 
    ilo_blit_resolve_surface(blitter->ilo, zs,
          ILO_TEXTURE_RENDER_WRITE | ILO_TEXTURE_CLEAR);
@@ -368,17 +383,20 @@
     *      - [DevSNB] errata: For stencil buffer only clear, the previous
     *        depth clear value must be delivered during the clear."
     */
-   memset(&dsa_state, 0, sizeof(dsa_state));
+   memset(&info, 0, sizeof(info));
 
-   if (clear_flags & PIPE_CLEAR_DEPTH)
-      dsa_state.depth.writemask = true;
+   if (clear_flags & PIPE_CLEAR_DEPTH) {
+      info.depth.cv_has_buffer = true;
+      info.depth.write_enable = true;
+   }
 
    if (clear_flags & PIPE_CLEAR_STENCIL) {
-      dsa_state.stencil[0].enabled = true;
-      dsa_state.stencil[0].func = PIPE_FUNC_ALWAYS;
-      dsa_state.stencil[0].fail_op = PIPE_STENCIL_OP_KEEP;
-      dsa_state.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
-      dsa_state.stencil[0].zfail_op = PIPE_STENCIL_OP_KEEP;
+      info.stencil.cv_has_buffer = true;
+      info.stencil.test_enable = true;
+      info.stencil.front.test_func = GEN6_COMPAREFUNCTION_ALWAYS;
+      info.stencil.front.fail_op = GEN6_STENCILOP_KEEP;
+      info.stencil.front.zfail_op = GEN6_STENCILOP_KEEP;
+      info.stencil.front.zpass_op = GEN6_STENCILOP_REPLACE;
 
       /*
        * From the Ivy Bridge PRM, volume 2 part 1, page 277:
@@ -389,18 +407,21 @@
        *      - DEPTH_STENCIL_STATE::Stencil Test Mask must be 0xFF
        *      - DEPTH_STENCIL_STATE::Back Face Stencil Write Mask must be 0xFF
        *      - DEPTH_STENCIL_STATE::Back Face Stencil Test Mask must be 0xFF"
+       *
+       * Back frace masks will be copied from front face masks.
        */
-      dsa_state.stencil[0].valuemask = 0xff;
-      dsa_state.stencil[0].writemask = 0xff;
-      dsa_state.stencil[1].valuemask = 0xff;
-      dsa_state.stencil[1].writemask = 0xff;
+      info.params.stencil_front.test_ref = (uint8_t) stencil;
+      info.params.stencil_front.test_mask = 0xff;
+      info.params.stencil_front.write_mask = 0xff;
    }
 
    ilo_blitter_set_invariants(blitter);
-   ilo_blitter_set_op(blitter, ILO_BLITTER_RECTLIST_CLEAR_ZS);
+   ilo_blitter_set_earlyz_op(blitter,
+         ILO_STATE_RASTER_EARLYZ_DEPTH_CLEAR,
+         clear_flags & PIPE_CLEAR_STENCIL);
 
-   ilo_blitter_set_dsa(blitter, &dsa_state);
-   ilo_blitter_set_clear_values(blitter, clear_value, (ubyte) stencil);
+   ilo_blitter_set_cc(blitter, &info);
+   ilo_blitter_set_depth_clear_value(blitter, clear_value);
    ilo_blitter_set_fb_from_surface(blitter, zs);
 
    uses = ILO_BLITTER_USE_DSA;
@@ -421,7 +442,7 @@
                                unsigned level, unsigned slice)
 {
    struct ilo_texture *tex = ilo_texture(res);
-   struct pipe_depth_stencil_alpha_state dsa_state;
+   struct ilo_state_cc_info info;
    const struct ilo_texture_slice *s =
       ilo_texture_get_slice(tex, level, slice);
 
@@ -435,16 +456,18 @@
     *      to NEVER. Depth Buffer Write Enable must be enabled. Stencil Test
     *      Enable and Stencil Buffer Write Enable must be disabled."
     */
-   memset(&dsa_state, 0, sizeof(dsa_state));
-   dsa_state.depth.writemask = true;
-   dsa_state.depth.enabled = true;
-   dsa_state.depth.func = PIPE_FUNC_NEVER;
+   memset(&info, 0, sizeof(info));
+   info.depth.cv_has_buffer = true;
+   info.depth.test_enable = true;
+   info.depth.write_enable = true;
+   info.depth.test_func = GEN6_COMPAREFUNCTION_NEVER;
 
    ilo_blitter_set_invariants(blitter);
-   ilo_blitter_set_op(blitter, ILO_BLITTER_RECTLIST_RESOLVE_Z);
+   ilo_blitter_set_earlyz_op(blitter,
+         ILO_STATE_RASTER_EARLYZ_DEPTH_RESOLVE, false);
 
-   ilo_blitter_set_dsa(blitter, &dsa_state);
-   ilo_blitter_set_clear_values(blitter, s->clear_value, 0);
+   ilo_blitter_set_cc(blitter, &info);
+   ilo_blitter_set_depth_clear_value(blitter, s->clear_value);
    ilo_blitter_set_fb_from_resource(blitter, res, res->format, level, slice);
    ilo_blitter_set_uses(blitter,
          ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH);
@@ -458,7 +481,7 @@
                                  unsigned level, unsigned slice)
 {
    struct ilo_texture *tex = ilo_texture(res);
-   struct pipe_depth_stencil_alpha_state dsa_state;
+   struct ilo_state_cc_info info;
 
    if (!ilo_image_can_enable_aux(&tex->image, level))
       return;
@@ -470,13 +493,15 @@
     *      disabled. Depth Buffer Write Enable must be enabled. Stencil Test
     *      Enable and Stencil Buffer Write Enable must be disabled."
     */
-   memset(&dsa_state, 0, sizeof(dsa_state));
-   dsa_state.depth.writemask = true;
+   memset(&info, 0, sizeof(info));
+   info.depth.cv_has_buffer = true;
+   info.depth.write_enable = true;
 
    ilo_blitter_set_invariants(blitter);
-   ilo_blitter_set_op(blitter, ILO_BLITTER_RECTLIST_RESOLVE_HIZ);
+   ilo_blitter_set_earlyz_op(blitter,
+         ILO_STATE_RASTER_EARLYZ_HIZ_RESOLVE, false);
 
-   ilo_blitter_set_dsa(blitter, &dsa_state);
+   ilo_blitter_set_cc(blitter, &info);
    ilo_blitter_set_fb_from_resource(blitter, res, res->format, level, slice);
    ilo_blitter_set_uses(blitter,
          ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_FB_DEPTH);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_common.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_common.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_common.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_common.h	2015-09-16 14:36:09.000000000 +0000
@@ -28,6 +28,14 @@
 #ifndef ILO_COMMON_H
 #define ILO_COMMON_H
 
+#include "pipe/p_format.h"
+#include "pipe/p_defines.h"
+
+#include "util/list.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_pointer.h"
+
 #include "core/ilo_core.h"
 #include "core/ilo_debug.h"
 #include "core/ilo_dev.h"
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -62,6 +62,8 @@
          (flags & PIPE_FLUSH_END_OF_FRAME) ? "frame end" : "user request");
 
    if (f) {
+      struct pipe_screen *screen = pipe->screen;
+      screen->fence_reference(screen, f, NULL);
       *f = ilo_screen_fence_create(pipe->screen, ilo->cp->last_submitted_bo);
    }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_draw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_draw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_draw.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_draw.c	2015-09-16 14:36:09.000000000 +0000
@@ -444,6 +444,7 @@
                          const struct pipe_draw_info *info)
 {
    const struct ilo_ib_state *ib = &ilo->state_vector.ib;
+   const struct ilo_vma *vma;
    union {
       const void *ptr;
       const uint8_t *u8;
@@ -452,12 +453,14 @@
    } u;
 
    /* we will draw with IB mapped */
-   if (ib->buffer) {
-      u.ptr = intel_bo_map(ilo_buffer(ib->buffer)->bo, false);
+   if (ib->state.buffer) {
+      vma = ilo_resource_get_vma(ib->state.buffer);
+      u.ptr = intel_bo_map(vma->bo, false);
       if (u.ptr)
-         u.u8 += ib->offset;
+         u.u8 += vma->bo_offset + ib->state.offset;
    } else {
-      u.ptr = ib->user_buffer;
+      vma = NULL;
+      u.ptr = ib->state.user_buffer;
    }
 
    if (!u.ptr)
@@ -483,7 +486,7 @@
       (pipe)->draw_vbo(pipe, &subinfo);                  \
 } while (0)
 
-   switch (ib->index_size) {
+   switch (ib->state.index_size) {
    case 1:
       DRAW_VBO_WITH_SW_RESTART(&ilo->base, info, u.u8);
       break;
@@ -500,8 +503,8 @@
 
 #undef DRAW_VBO_WITH_SW_RESTART
 
-   if (ib->buffer)
-      intel_bo_unmap(ilo_buffer(ib->buffer)->bo);
+   if (vma)
+      intel_bo_unmap(vma->bo);
 }
 
 static bool
@@ -511,9 +514,9 @@
    /* the restart index is fixed prior to GEN7.5 */
    if (ilo_dev_gen(ilo->dev) < ILO_GEN(7.5)) {
       const unsigned cut_index =
-         (ilo->state_vector.ib.index_size == 1) ? 0xff :
-         (ilo->state_vector.ib.index_size == 2) ? 0xffff :
-         (ilo->state_vector.ib.index_size == 4) ? 0xffffffff : 0;
+         (ilo->state_vector.ib.state.index_size == 1) ? 0xff :
+         (ilo->state_vector.ib.state.index_size == 2) ? 0xffff :
+         (ilo->state_vector.ib.state.index_size == 4) ? 0xffffffff : 0;
 
       if (info->restart_index < cut_index)
          return true;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_format.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_format.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_format.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_format.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,356 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "genhw/genhw.h"
+#include "core/ilo_state_surface.h"
+#include "core/ilo_state_vf.h"
+#include "ilo_format.h"
+
+bool
+ilo_format_support_vb(const struct ilo_dev *dev,
+                      enum pipe_format format)
+{
+   const int idx = ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER);
+
+   return (idx >= 0 && ilo_state_vf_valid_element_format(dev, idx));
+}
+
+bool
+ilo_format_support_sol(const struct ilo_dev *dev,
+                       enum pipe_format format)
+{
+   const int idx = ilo_format_translate(dev, format, PIPE_BIND_STREAM_OUTPUT);
+
+   return (idx >= 0 && ilo_state_surface_valid_format(dev,
+            ILO_STATE_SURFACE_ACCESS_DP_SVB, idx));
+}
+
+bool
+ilo_format_support_sampler(const struct ilo_dev *dev,
+                           enum pipe_format format)
+{
+   const int idx = ilo_format_translate(dev, format, PIPE_BIND_SAMPLER_VIEW);
+
+   return (idx >= 0 && ilo_state_surface_valid_format(dev,
+            ILO_STATE_SURFACE_ACCESS_SAMPLER, idx));
+}
+
+bool
+ilo_format_support_rt(const struct ilo_dev *dev,
+                      enum pipe_format format)
+{
+   const int idx = ilo_format_translate(dev, format, PIPE_BIND_RENDER_TARGET);
+
+   return (idx >= 0 && ilo_state_surface_valid_format(dev,
+            ILO_STATE_SURFACE_ACCESS_DP_RENDER, idx));
+}
+
+bool
+ilo_format_support_zs(const struct ilo_dev *dev,
+                      enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_Z16_UNORM:
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_Z32_FLOAT:
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+      return true;
+   case PIPE_FORMAT_S8_UINT:
+      /* TODO separate stencil */
+   default:
+      return false;
+   }
+}
+
+/**
+ * Translate a color (non-depth/stencil) pipe format to the matching hardware
+ * format.  Return -1 on errors.
+ */
+int
+ilo_format_translate_color(const struct ilo_dev *dev,
+                           enum pipe_format format)
+{
+   static const int format_mapping[PIPE_FORMAT_COUNT] = {
+      [PIPE_FORMAT_NONE]                  = 0,
+      [PIPE_FORMAT_B8G8R8A8_UNORM]        = GEN6_FORMAT_B8G8R8A8_UNORM,
+      [PIPE_FORMAT_B8G8R8X8_UNORM]        = GEN6_FORMAT_B8G8R8X8_UNORM,
+      [PIPE_FORMAT_A8R8G8B8_UNORM]        = 0,
+      [PIPE_FORMAT_X8R8G8B8_UNORM]        = 0,
+      [PIPE_FORMAT_B5G5R5A1_UNORM]        = GEN6_FORMAT_B5G5R5A1_UNORM,
+      [PIPE_FORMAT_B4G4R4A4_UNORM]        = GEN6_FORMAT_B4G4R4A4_UNORM,
+      [PIPE_FORMAT_B5G6R5_UNORM]          = GEN6_FORMAT_B5G6R5_UNORM,
+      [PIPE_FORMAT_R10G10B10A2_UNORM]     = GEN6_FORMAT_R10G10B10A2_UNORM,
+      [PIPE_FORMAT_L8_UNORM]              = GEN6_FORMAT_L8_UNORM,
+      [PIPE_FORMAT_A8_UNORM]              = GEN6_FORMAT_A8_UNORM,
+      [PIPE_FORMAT_I8_UNORM]              = GEN6_FORMAT_I8_UNORM,
+      [PIPE_FORMAT_L8A8_UNORM]            = GEN6_FORMAT_L8A8_UNORM,
+      [PIPE_FORMAT_L16_UNORM]             = GEN6_FORMAT_L16_UNORM,
+      [PIPE_FORMAT_UYVY]                  = GEN6_FORMAT_YCRCB_SWAPUVY,
+      [PIPE_FORMAT_YUYV]                  = GEN6_FORMAT_YCRCB_NORMAL,
+      [PIPE_FORMAT_Z16_UNORM]             = 0,
+      [PIPE_FORMAT_Z32_UNORM]             = 0,
+      [PIPE_FORMAT_Z32_FLOAT]             = 0,
+      [PIPE_FORMAT_Z24_UNORM_S8_UINT]     = 0,
+      [PIPE_FORMAT_S8_UINT_Z24_UNORM]     = 0,
+      [PIPE_FORMAT_Z24X8_UNORM]           = 0,
+      [PIPE_FORMAT_X8Z24_UNORM]           = 0,
+      [PIPE_FORMAT_S8_UINT]               = 0,
+      [PIPE_FORMAT_R64_FLOAT]             = GEN6_FORMAT_R64_FLOAT,
+      [PIPE_FORMAT_R64G64_FLOAT]          = GEN6_FORMAT_R64G64_FLOAT,
+      [PIPE_FORMAT_R64G64B64_FLOAT]       = GEN6_FORMAT_R64G64B64_FLOAT,
+      [PIPE_FORMAT_R64G64B64A64_FLOAT]    = GEN6_FORMAT_R64G64B64A64_FLOAT,
+      [PIPE_FORMAT_R32_FLOAT]             = GEN6_FORMAT_R32_FLOAT,
+      [PIPE_FORMAT_R32G32_FLOAT]          = GEN6_FORMAT_R32G32_FLOAT,
+      [PIPE_FORMAT_R32G32B32_FLOAT]       = GEN6_FORMAT_R32G32B32_FLOAT,
+      [PIPE_FORMAT_R32G32B32A32_FLOAT]    = GEN6_FORMAT_R32G32B32A32_FLOAT,
+      [PIPE_FORMAT_R32_UNORM]             = GEN6_FORMAT_R32_UNORM,
+      [PIPE_FORMAT_R32G32_UNORM]          = GEN6_FORMAT_R32G32_UNORM,
+      [PIPE_FORMAT_R32G32B32_UNORM]       = GEN6_FORMAT_R32G32B32_UNORM,
+      [PIPE_FORMAT_R32G32B32A32_UNORM]    = GEN6_FORMAT_R32G32B32A32_UNORM,
+      [PIPE_FORMAT_R32_USCALED]           = GEN6_FORMAT_R32_USCALED,
+      [PIPE_FORMAT_R32G32_USCALED]        = GEN6_FORMAT_R32G32_USCALED,
+      [PIPE_FORMAT_R32G32B32_USCALED]     = GEN6_FORMAT_R32G32B32_USCALED,
+      [PIPE_FORMAT_R32G32B32A32_USCALED]  = GEN6_FORMAT_R32G32B32A32_USCALED,
+      [PIPE_FORMAT_R32_SNORM]             = GEN6_FORMAT_R32_SNORM,
+      [PIPE_FORMAT_R32G32_SNORM]          = GEN6_FORMAT_R32G32_SNORM,
+      [PIPE_FORMAT_R32G32B32_SNORM]       = GEN6_FORMAT_R32G32B32_SNORM,
+      [PIPE_FORMAT_R32G32B32A32_SNORM]    = GEN6_FORMAT_R32G32B32A32_SNORM,
+      [PIPE_FORMAT_R32_SSCALED]           = GEN6_FORMAT_R32_SSCALED,
+      [PIPE_FORMAT_R32G32_SSCALED]        = GEN6_FORMAT_R32G32_SSCALED,
+      [PIPE_FORMAT_R32G32B32_SSCALED]     = GEN6_FORMAT_R32G32B32_SSCALED,
+      [PIPE_FORMAT_R32G32B32A32_SSCALED]  = GEN6_FORMAT_R32G32B32A32_SSCALED,
+      [PIPE_FORMAT_R16_UNORM]             = GEN6_FORMAT_R16_UNORM,
+      [PIPE_FORMAT_R16G16_UNORM]          = GEN6_FORMAT_R16G16_UNORM,
+      [PIPE_FORMAT_R16G16B16_UNORM]       = GEN6_FORMAT_R16G16B16_UNORM,
+      [PIPE_FORMAT_R16G16B16A16_UNORM]    = GEN6_FORMAT_R16G16B16A16_UNORM,
+      [PIPE_FORMAT_R16_USCALED]           = GEN6_FORMAT_R16_USCALED,
+      [PIPE_FORMAT_R16G16_USCALED]        = GEN6_FORMAT_R16G16_USCALED,
+      [PIPE_FORMAT_R16G16B16_USCALED]     = GEN6_FORMAT_R16G16B16_USCALED,
+      [PIPE_FORMAT_R16G16B16A16_USCALED]  = GEN6_FORMAT_R16G16B16A16_USCALED,
+      [PIPE_FORMAT_R16_SNORM]             = GEN6_FORMAT_R16_SNORM,
+      [PIPE_FORMAT_R16G16_SNORM]          = GEN6_FORMAT_R16G16_SNORM,
+      [PIPE_FORMAT_R16G16B16_SNORM]       = GEN6_FORMAT_R16G16B16_SNORM,
+      [PIPE_FORMAT_R16G16B16A16_SNORM]    = GEN6_FORMAT_R16G16B16A16_SNORM,
+      [PIPE_FORMAT_R16_SSCALED]           = GEN6_FORMAT_R16_SSCALED,
+      [PIPE_FORMAT_R16G16_SSCALED]        = GEN6_FORMAT_R16G16_SSCALED,
+      [PIPE_FORMAT_R16G16B16_SSCALED]     = GEN6_FORMAT_R16G16B16_SSCALED,
+      [PIPE_FORMAT_R16G16B16A16_SSCALED]  = GEN6_FORMAT_R16G16B16A16_SSCALED,
+      [PIPE_FORMAT_R8_UNORM]              = GEN6_FORMAT_R8_UNORM,
+      [PIPE_FORMAT_R8G8_UNORM]            = GEN6_FORMAT_R8G8_UNORM,
+      [PIPE_FORMAT_R8G8B8_UNORM]          = GEN6_FORMAT_R8G8B8_UNORM,
+      [PIPE_FORMAT_R8G8B8A8_UNORM]        = GEN6_FORMAT_R8G8B8A8_UNORM,
+      [PIPE_FORMAT_X8B8G8R8_UNORM]        = 0,
+      [PIPE_FORMAT_R8_USCALED]            = GEN6_FORMAT_R8_USCALED,
+      [PIPE_FORMAT_R8G8_USCALED]          = GEN6_FORMAT_R8G8_USCALED,
+      [PIPE_FORMAT_R8G8B8_USCALED]        = GEN6_FORMAT_R8G8B8_USCALED,
+      [PIPE_FORMAT_R8G8B8A8_USCALED]      = GEN6_FORMAT_R8G8B8A8_USCALED,
+      [PIPE_FORMAT_R8_SNORM]              = GEN6_FORMAT_R8_SNORM,
+      [PIPE_FORMAT_R8G8_SNORM]            = GEN6_FORMAT_R8G8_SNORM,
+      [PIPE_FORMAT_R8G8B8_SNORM]          = GEN6_FORMAT_R8G8B8_SNORM,
+      [PIPE_FORMAT_R8G8B8A8_SNORM]        = GEN6_FORMAT_R8G8B8A8_SNORM,
+      [PIPE_FORMAT_R8_SSCALED]            = GEN6_FORMAT_R8_SSCALED,
+      [PIPE_FORMAT_R8G8_SSCALED]          = GEN6_FORMAT_R8G8_SSCALED,
+      [PIPE_FORMAT_R8G8B8_SSCALED]        = GEN6_FORMAT_R8G8B8_SSCALED,
+      [PIPE_FORMAT_R8G8B8A8_SSCALED]      = GEN6_FORMAT_R8G8B8A8_SSCALED,
+      [PIPE_FORMAT_R32_FIXED]             = GEN6_FORMAT_R32_SFIXED,
+      [PIPE_FORMAT_R32G32_FIXED]          = GEN6_FORMAT_R32G32_SFIXED,
+      [PIPE_FORMAT_R32G32B32_FIXED]       = GEN6_FORMAT_R32G32B32_SFIXED,
+      [PIPE_FORMAT_R32G32B32A32_FIXED]    = GEN6_FORMAT_R32G32B32A32_SFIXED,
+      [PIPE_FORMAT_R16_FLOAT]             = GEN6_FORMAT_R16_FLOAT,
+      [PIPE_FORMAT_R16G16_FLOAT]          = GEN6_FORMAT_R16G16_FLOAT,
+      [PIPE_FORMAT_R16G16B16_FLOAT]       = GEN6_FORMAT_R16G16B16_FLOAT,
+      [PIPE_FORMAT_R16G16B16A16_FLOAT]    = GEN6_FORMAT_R16G16B16A16_FLOAT,
+      [PIPE_FORMAT_L8_SRGB]               = GEN6_FORMAT_L8_UNORM_SRGB,
+      [PIPE_FORMAT_L8A8_SRGB]             = GEN6_FORMAT_L8A8_UNORM_SRGB,
+      [PIPE_FORMAT_R8G8B8_SRGB]           = GEN6_FORMAT_R8G8B8_UNORM_SRGB,
+      [PIPE_FORMAT_A8B8G8R8_SRGB]         = 0,
+      [PIPE_FORMAT_X8B8G8R8_SRGB]         = 0,
+      [PIPE_FORMAT_B8G8R8A8_SRGB]         = GEN6_FORMAT_B8G8R8A8_UNORM_SRGB,
+      [PIPE_FORMAT_B8G8R8X8_SRGB]         = GEN6_FORMAT_B8G8R8X8_UNORM_SRGB,
+      [PIPE_FORMAT_A8R8G8B8_SRGB]         = 0,
+      [PIPE_FORMAT_X8R8G8B8_SRGB]         = 0,
+      [PIPE_FORMAT_R8G8B8A8_SRGB]         = GEN6_FORMAT_R8G8B8A8_UNORM_SRGB,
+      [PIPE_FORMAT_DXT1_RGB]              = GEN6_FORMAT_DXT1_RGB,
+      [PIPE_FORMAT_DXT1_RGBA]             = GEN6_FORMAT_BC1_UNORM,
+      [PIPE_FORMAT_DXT3_RGBA]             = GEN6_FORMAT_BC2_UNORM,
+      [PIPE_FORMAT_DXT5_RGBA]             = GEN6_FORMAT_BC3_UNORM,
+      [PIPE_FORMAT_DXT1_SRGB]             = GEN6_FORMAT_DXT1_RGB_SRGB,
+      [PIPE_FORMAT_DXT1_SRGBA]            = GEN6_FORMAT_BC1_UNORM_SRGB,
+      [PIPE_FORMAT_DXT3_SRGBA]            = GEN6_FORMAT_BC2_UNORM_SRGB,
+      [PIPE_FORMAT_DXT5_SRGBA]            = GEN6_FORMAT_BC3_UNORM_SRGB,
+      [PIPE_FORMAT_RGTC1_UNORM]           = GEN6_FORMAT_BC4_UNORM,
+      [PIPE_FORMAT_RGTC1_SNORM]           = GEN6_FORMAT_BC4_SNORM,
+      [PIPE_FORMAT_RGTC2_UNORM]           = GEN6_FORMAT_BC5_UNORM,
+      [PIPE_FORMAT_RGTC2_SNORM]           = GEN6_FORMAT_BC5_SNORM,
+      [PIPE_FORMAT_R8G8_B8G8_UNORM]       = 0,
+      [PIPE_FORMAT_G8R8_G8B8_UNORM]       = 0,
+      [PIPE_FORMAT_R8SG8SB8UX8U_NORM]     = 0,
+      [PIPE_FORMAT_R5SG5SB6U_NORM]        = 0,
+      [PIPE_FORMAT_A8B8G8R8_UNORM]        = 0,
+      [PIPE_FORMAT_B5G5R5X1_UNORM]        = GEN6_FORMAT_B5G5R5X1_UNORM,
+      [PIPE_FORMAT_R10G10B10A2_USCALED]   = GEN6_FORMAT_R10G10B10A2_USCALED,
+      [PIPE_FORMAT_R11G11B10_FLOAT]       = GEN6_FORMAT_R11G11B10_FLOAT,
+      [PIPE_FORMAT_R9G9B9E5_FLOAT]        = GEN6_FORMAT_R9G9B9E5_SHAREDEXP,
+      [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT]  = 0,
+      [PIPE_FORMAT_R1_UNORM]              = GEN6_FORMAT_R1_UNORM,
+      [PIPE_FORMAT_R10G10B10X2_USCALED]   = GEN6_FORMAT_R10G10B10X2_USCALED,
+      [PIPE_FORMAT_R10G10B10X2_SNORM]     = 0,
+      [PIPE_FORMAT_L4A4_UNORM]            = 0,
+      [PIPE_FORMAT_B10G10R10A2_UNORM]     = GEN6_FORMAT_B10G10R10A2_UNORM,
+      [PIPE_FORMAT_R10SG10SB10SA2U_NORM]  = 0,
+      [PIPE_FORMAT_R8G8Bx_SNORM]          = 0,
+      [PIPE_FORMAT_R8G8B8X8_UNORM]        = GEN6_FORMAT_R8G8B8X8_UNORM,
+      [PIPE_FORMAT_B4G4R4X4_UNORM]        = 0,
+      [PIPE_FORMAT_X24S8_UINT]            = 0,
+      [PIPE_FORMAT_S8X24_UINT]            = 0,
+      [PIPE_FORMAT_X32_S8X24_UINT]        = 0,
+      [PIPE_FORMAT_B2G3R3_UNORM]          = 0,
+      [PIPE_FORMAT_L16A16_UNORM]          = GEN6_FORMAT_L16A16_UNORM,
+      [PIPE_FORMAT_A16_UNORM]             = GEN6_FORMAT_A16_UNORM,
+      [PIPE_FORMAT_I16_UNORM]             = GEN6_FORMAT_I16_UNORM,
+      [PIPE_FORMAT_LATC1_UNORM]           = 0,
+      [PIPE_FORMAT_LATC1_SNORM]           = 0,
+      [PIPE_FORMAT_LATC2_UNORM]           = 0,
+      [PIPE_FORMAT_LATC2_SNORM]           = 0,
+      [PIPE_FORMAT_A8_SNORM]              = 0,
+      [PIPE_FORMAT_L8_SNORM]              = 0,
+      [PIPE_FORMAT_L8A8_SNORM]            = 0,
+      [PIPE_FORMAT_I8_SNORM]              = 0,
+      [PIPE_FORMAT_A16_SNORM]             = 0,
+      [PIPE_FORMAT_L16_SNORM]             = 0,
+      [PIPE_FORMAT_L16A16_SNORM]          = 0,
+      [PIPE_FORMAT_I16_SNORM]             = 0,
+      [PIPE_FORMAT_A16_FLOAT]             = GEN6_FORMAT_A16_FLOAT,
+      [PIPE_FORMAT_L16_FLOAT]             = GEN6_FORMAT_L16_FLOAT,
+      [PIPE_FORMAT_L16A16_FLOAT]          = GEN6_FORMAT_L16A16_FLOAT,
+      [PIPE_FORMAT_I16_FLOAT]             = GEN6_FORMAT_I16_FLOAT,
+      [PIPE_FORMAT_A32_FLOAT]             = GEN6_FORMAT_A32_FLOAT,
+      [PIPE_FORMAT_L32_FLOAT]             = GEN6_FORMAT_L32_FLOAT,
+      [PIPE_FORMAT_L32A32_FLOAT]          = GEN6_FORMAT_L32A32_FLOAT,
+      [PIPE_FORMAT_I32_FLOAT]             = GEN6_FORMAT_I32_FLOAT,
+      [PIPE_FORMAT_YV12]                  = 0,
+      [PIPE_FORMAT_YV16]                  = 0,
+      [PIPE_FORMAT_IYUV]                  = 0,
+      [PIPE_FORMAT_NV12]                  = 0,
+      [PIPE_FORMAT_NV21]                  = 0,
+      [PIPE_FORMAT_A4R4_UNORM]            = 0,
+      [PIPE_FORMAT_R4A4_UNORM]            = 0,
+      [PIPE_FORMAT_R8A8_UNORM]            = 0,
+      [PIPE_FORMAT_A8R8_UNORM]            = 0,
+      [PIPE_FORMAT_R10G10B10A2_SSCALED]   = GEN6_FORMAT_R10G10B10A2_SSCALED,
+      [PIPE_FORMAT_R10G10B10A2_SNORM]     = GEN6_FORMAT_R10G10B10A2_SNORM,
+      [PIPE_FORMAT_B10G10R10A2_USCALED]   = GEN6_FORMAT_B10G10R10A2_USCALED,
+      [PIPE_FORMAT_B10G10R10A2_SSCALED]   = GEN6_FORMAT_B10G10R10A2_SSCALED,
+      [PIPE_FORMAT_B10G10R10A2_SNORM]     = GEN6_FORMAT_B10G10R10A2_SNORM,
+      [PIPE_FORMAT_R8_UINT]               = GEN6_FORMAT_R8_UINT,
+      [PIPE_FORMAT_R8G8_UINT]             = GEN6_FORMAT_R8G8_UINT,
+      [PIPE_FORMAT_R8G8B8_UINT]           = GEN6_FORMAT_R8G8B8_UINT,
+      [PIPE_FORMAT_R8G8B8A8_UINT]         = GEN6_FORMAT_R8G8B8A8_UINT,
+      [PIPE_FORMAT_R8_SINT]               = GEN6_FORMAT_R8_SINT,
+      [PIPE_FORMAT_R8G8_SINT]             = GEN6_FORMAT_R8G8_SINT,
+      [PIPE_FORMAT_R8G8B8_SINT]           = GEN6_FORMAT_R8G8B8_SINT,
+      [PIPE_FORMAT_R8G8B8A8_SINT]         = GEN6_FORMAT_R8G8B8A8_SINT,
+      [PIPE_FORMAT_R16_UINT]              = GEN6_FORMAT_R16_UINT,
+      [PIPE_FORMAT_R16G16_UINT]           = GEN6_FORMAT_R16G16_UINT,
+      [PIPE_FORMAT_R16G16B16_UINT]        = GEN6_FORMAT_R16G16B16_UINT,
+      [PIPE_FORMAT_R16G16B16A16_UINT]     = GEN6_FORMAT_R16G16B16A16_UINT,
+      [PIPE_FORMAT_R16_SINT]              = GEN6_FORMAT_R16_SINT,
+      [PIPE_FORMAT_R16G16_SINT]           = GEN6_FORMAT_R16G16_SINT,
+      [PIPE_FORMAT_R16G16B16_SINT]        = GEN6_FORMAT_R16G16B16_SINT,
+      [PIPE_FORMAT_R16G16B16A16_SINT]     = GEN6_FORMAT_R16G16B16A16_SINT,
+      [PIPE_FORMAT_R32_UINT]              = GEN6_FORMAT_R32_UINT,
+      [PIPE_FORMAT_R32G32_UINT]           = GEN6_FORMAT_R32G32_UINT,
+      [PIPE_FORMAT_R32G32B32_UINT]        = GEN6_FORMAT_R32G32B32_UINT,
+      [PIPE_FORMAT_R32G32B32A32_UINT]     = GEN6_FORMAT_R32G32B32A32_UINT,
+      [PIPE_FORMAT_R32_SINT]              = GEN6_FORMAT_R32_SINT,
+      [PIPE_FORMAT_R32G32_SINT]           = GEN6_FORMAT_R32G32_SINT,
+      [PIPE_FORMAT_R32G32B32_SINT]        = GEN6_FORMAT_R32G32B32_SINT,
+      [PIPE_FORMAT_R32G32B32A32_SINT]     = GEN6_FORMAT_R32G32B32A32_SINT,
+      [PIPE_FORMAT_A8_UINT]               = 0,
+      [PIPE_FORMAT_I8_UINT]               = GEN6_FORMAT_I8_UINT,
+      [PIPE_FORMAT_L8_UINT]               = GEN6_FORMAT_L8_UINT,
+      [PIPE_FORMAT_L8A8_UINT]             = GEN6_FORMAT_L8A8_UINT,
+      [PIPE_FORMAT_A8_SINT]               = 0,
+      [PIPE_FORMAT_I8_SINT]               = GEN6_FORMAT_I8_SINT,
+      [PIPE_FORMAT_L8_SINT]               = GEN6_FORMAT_L8_SINT,
+      [PIPE_FORMAT_L8A8_SINT]             = GEN6_FORMAT_L8A8_SINT,
+      [PIPE_FORMAT_A16_UINT]              = 0,
+      [PIPE_FORMAT_I16_UINT]              = 0,
+      [PIPE_FORMAT_L16_UINT]              = 0,
+      [PIPE_FORMAT_L16A16_UINT]           = 0,
+      [PIPE_FORMAT_A16_SINT]              = 0,
+      [PIPE_FORMAT_I16_SINT]              = 0,
+      [PIPE_FORMAT_L16_SINT]              = 0,
+      [PIPE_FORMAT_L16A16_SINT]           = 0,
+      [PIPE_FORMAT_A32_UINT]              = 0,
+      [PIPE_FORMAT_I32_UINT]              = 0,
+      [PIPE_FORMAT_L32_UINT]              = 0,
+      [PIPE_FORMAT_L32A32_UINT]           = 0,
+      [PIPE_FORMAT_A32_SINT]              = 0,
+      [PIPE_FORMAT_I32_SINT]              = 0,
+      [PIPE_FORMAT_L32_SINT]              = 0,
+      [PIPE_FORMAT_L32A32_SINT]           = 0,
+      [PIPE_FORMAT_B10G10R10A2_UINT]      = GEN6_FORMAT_B10G10R10A2_UINT,
+      [PIPE_FORMAT_ETC1_RGB8]             = GEN6_FORMAT_ETC1_RGB8,
+      [PIPE_FORMAT_R8G8_R8B8_UNORM]       = 0,
+      [PIPE_FORMAT_G8R8_B8R8_UNORM]       = 0,
+      [PIPE_FORMAT_R8G8B8X8_SNORM]        = 0,
+      [PIPE_FORMAT_R8G8B8X8_SRGB]         = 0,
+      [PIPE_FORMAT_R8G8B8X8_UINT]         = 0,
+      [PIPE_FORMAT_R8G8B8X8_SINT]         = 0,
+      [PIPE_FORMAT_B10G10R10X2_UNORM]     = GEN6_FORMAT_B10G10R10X2_UNORM,
+      [PIPE_FORMAT_R16G16B16X16_UNORM]    = GEN6_FORMAT_R16G16B16X16_UNORM,
+      [PIPE_FORMAT_R16G16B16X16_SNORM]    = 0,
+      [PIPE_FORMAT_R16G16B16X16_FLOAT]    = GEN6_FORMAT_R16G16B16X16_FLOAT,
+      [PIPE_FORMAT_R16G16B16X16_UINT]     = 0,
+      [PIPE_FORMAT_R16G16B16X16_SINT]     = 0,
+      [PIPE_FORMAT_R32G32B32X32_FLOAT]    = GEN6_FORMAT_R32G32B32X32_FLOAT,
+      [PIPE_FORMAT_R32G32B32X32_UINT]     = 0,
+      [PIPE_FORMAT_R32G32B32X32_SINT]     = 0,
+      [PIPE_FORMAT_R8A8_SNORM]            = 0,
+      [PIPE_FORMAT_R16A16_UNORM]          = 0,
+      [PIPE_FORMAT_R16A16_SNORM]          = 0,
+      [PIPE_FORMAT_R16A16_FLOAT]          = 0,
+      [PIPE_FORMAT_R32A32_FLOAT]          = 0,
+      [PIPE_FORMAT_R8A8_UINT]             = 0,
+      [PIPE_FORMAT_R8A8_SINT]             = 0,
+      [PIPE_FORMAT_R16A16_UINT]           = 0,
+      [PIPE_FORMAT_R16A16_SINT]           = 0,
+      [PIPE_FORMAT_R32A32_UINT]           = 0,
+      [PIPE_FORMAT_R32A32_SINT]           = 0,
+      [PIPE_FORMAT_R10G10B10A2_UINT]      = GEN6_FORMAT_R10G10B10A2_UINT,
+      [PIPE_FORMAT_B5G6R5_SRGB]           = GEN6_FORMAT_B5G6R5_UNORM_SRGB,
+   };
+   int sfmt = format_mapping[format];
+
+   /* GEN6_FORMAT_R32G32B32A32_FLOAT happens to be 0 */
+   if (!sfmt && format != PIPE_FORMAT_R32G32B32A32_FLOAT)
+      sfmt = -1;
+
+   return sfmt;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_format.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_format.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_format.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_format.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,203 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_FORMAT_H
+#define ILO_FORMAT_H
+
+#include "genhw/genhw.h"
+
+#include "ilo_common.h"
+
+bool
+ilo_format_support_vb(const struct ilo_dev *dev,
+                      enum pipe_format format);
+
+bool
+ilo_format_support_sol(const struct ilo_dev *dev,
+                       enum pipe_format format);
+
+bool
+ilo_format_support_sampler(const struct ilo_dev *dev,
+                           enum pipe_format format);
+
+bool
+ilo_format_support_rt(const struct ilo_dev *dev,
+                      enum pipe_format format);
+
+bool
+ilo_format_support_zs(const struct ilo_dev *dev,
+                      enum pipe_format format);
+
+int
+ilo_format_translate_color(const struct ilo_dev *dev,
+                           enum pipe_format format);
+
+/**
+ * Translate a pipe format to a hardware surface format suitable for
+ * the given purpose.  Return -1 on errors.
+ *
+ * This is an inline function not only for performance reasons.  There are
+ * caveats that the callers should be aware of before calling this function.
+ */
+static inline int
+ilo_format_translate(const struct ilo_dev *dev,
+                     enum pipe_format format, unsigned bind)
+{
+   switch (bind) {
+   case PIPE_BIND_RENDER_TARGET:
+      /*
+       * Some RGBX formats are not supported as render target formats.  But we
+       * can use their RGBA counterparts and force the destination alpha to be
+       * one when blending is enabled.
+       */
+      switch (format) {
+      case PIPE_FORMAT_B8G8R8X8_UNORM:
+         return GEN6_FORMAT_B8G8R8A8_UNORM;
+      default:
+         return ilo_format_translate_color(dev, format);
+      }
+      break;
+   case PIPE_BIND_SAMPLER_VIEW:
+      /*
+       * For depth formats, we want the depth values to be returned as R
+       * values.  But we assume in many places that the depth values are
+       * returned as I values (util_make_fragment_tex_shader_writedepth() is
+       * one such example).  We have to live with that at least for now.
+       *
+       * For ETC1 format, the texture data will be decompressed before being
+       * written to the bo.  See tex_staging_sys_convert_write().
+       */
+      switch (format) {
+      case PIPE_FORMAT_Z16_UNORM:
+         return GEN6_FORMAT_I16_UNORM;
+      case PIPE_FORMAT_Z32_FLOAT:
+         return GEN6_FORMAT_I32_FLOAT;
+      case PIPE_FORMAT_Z24X8_UNORM:
+      case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+         return GEN6_FORMAT_I24X8_UNORM;
+      case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+         return GEN6_FORMAT_I32X32_FLOAT;
+      case PIPE_FORMAT_ETC1_RGB8:
+         return GEN6_FORMAT_R8G8B8X8_UNORM;
+      default:
+         return ilo_format_translate_color(dev, format);
+      }
+      break;
+   case PIPE_BIND_VERTEX_BUFFER:
+      if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+         return ilo_format_translate_color(dev, format);
+
+      /*
+       * Some 3-component formats are not supported as vertex element formats.
+       * But since we move between vertices using vb->stride, we should be
+       * good to use their 4-component counterparts if we force the W
+       * component to be one.  The only exception is that the vb boundary
+       * check for the last vertex may fail.
+       */
+      switch (format) {
+      case PIPE_FORMAT_R16G16B16_FLOAT:
+         return GEN6_FORMAT_R16G16B16A16_FLOAT;
+      case PIPE_FORMAT_R16G16B16_UINT:
+         return GEN6_FORMAT_R16G16B16A16_UINT;
+      case PIPE_FORMAT_R16G16B16_SINT:
+         return GEN6_FORMAT_R16G16B16A16_SINT;
+      case PIPE_FORMAT_R8G8B8_UINT:
+         return GEN6_FORMAT_R8G8B8A8_UINT;
+      case PIPE_FORMAT_R8G8B8_SINT:
+         return GEN6_FORMAT_R8G8B8A8_SINT;
+      default:
+         return ilo_format_translate_color(dev, format);
+      }
+      break;
+   case PIPE_BIND_STREAM_OUTPUT:
+      return ilo_format_translate_color(dev, format);
+      break;
+   default:
+      assert(!"cannot translate format");
+      break;
+   }
+
+   return -1;
+}
+
+static inline int
+ilo_format_translate_render(const struct ilo_dev *dev,
+                            enum pipe_format format)
+{
+   return ilo_format_translate(dev, format, PIPE_BIND_RENDER_TARGET);
+}
+
+static inline int
+ilo_format_translate_texture(const struct ilo_dev *dev,
+                             enum pipe_format format)
+{
+   return ilo_format_translate(dev, format, PIPE_BIND_SAMPLER_VIEW);
+}
+
+static inline int
+ilo_format_translate_vertex(const struct ilo_dev *dev,
+                            enum pipe_format format)
+{
+   return ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER);
+}
+
+static inline enum gen_depth_format
+ilo_format_translate_depth(const struct ilo_dev *dev,
+                           enum pipe_format format)
+{
+   if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+      switch (format) {
+      case PIPE_FORMAT_Z32_FLOAT:
+         return GEN6_ZFORMAT_D32_FLOAT;
+      case PIPE_FORMAT_Z24X8_UNORM:
+         return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+      case PIPE_FORMAT_Z16_UNORM:
+         return GEN6_ZFORMAT_D16_UNORM;
+      default:
+         assert(!"unknown depth format");
+         return GEN6_ZFORMAT_D32_FLOAT;
+      }
+   } else {
+      switch (format) {
+      case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+         return GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
+      case PIPE_FORMAT_Z32_FLOAT:
+         return GEN6_ZFORMAT_D32_FLOAT;
+      case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+         return GEN6_ZFORMAT_D24_UNORM_S8_UINT;
+      case PIPE_FORMAT_Z24X8_UNORM:
+         return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+      case PIPE_FORMAT_Z16_UNORM:
+         return GEN6_ZFORMAT_D16_UNORM;
+      default:
+         assert(!"unknown depth format");
+         return GEN6_ZFORMAT_D32_FLOAT;
+      }
+   }
+}
+
+#endif /* ILO_FORMAT_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render.c	2015-09-16 14:36:09.000000000 +0000
@@ -35,76 +35,10 @@
 #include "ilo_query.h"
 #include "ilo_render_gen.h"
 
-/* in S1.3 */
-struct sample_position {
-   int8_t x, y;
-};
-
-static const struct sample_position ilo_sample_pattern_1x[1] = {
-   {  0,  0 },
-};
-
-static const struct sample_position ilo_sample_pattern_2x[2] = {
-   { -4, -4 },
-   {  4,  4 },
-};
-
-static const struct sample_position ilo_sample_pattern_4x[4] = {
-   { -2, -6 },
-   {  6, -2 },
-   { -6,  2 },
-   {  2,  6 },
-};
-
-/* \see brw_multisample_positions_8x */
-static const struct sample_position ilo_sample_pattern_8x[8] = {
-   { -1,  1 },
-   {  1,  5 },
-   {  3, -5 },
-   {  5,  3 },
-   { -7, -1 },
-   { -3, -7 },
-   {  7, -3 },
-   { -5,  7 },
-};
-
-static const struct sample_position ilo_sample_pattern_16x[16] = {
-   {  0,  2 },
-   {  3,  0 },
-   { -3, -2 },
-   { -2, -4 },
-   {  4,  3 },
-   {  5,  1 },
-   {  6, -1 },
-   {  2, -6 },
-   { -4,  5 },
-   { -5, -5 },
-   { -1, -7 },
-   {  7, -3 },
-   { -7,  4 },
-   {  1, -8 },
-   { -6,  6 },
-   { -8,  7 },
-};
-
-static uint8_t
-pack_sample_position(const struct sample_position *pos)
-{
-   return (pos->x + 8) << 4 | (pos->y + 8);
-}
-
-static void
-get_sample_position(const struct sample_position *pos, float *x, float *y)
-{
-   *x = (float) (pos->x + 8) / 16.0f;
-   *y = (float) (pos->y + 8) / 16.0f;
-}
-
 struct ilo_render *
 ilo_render_create(struct ilo_builder *builder)
 {
    struct ilo_render *render;
-   int i;
 
    render = CALLOC_STRUCT(ilo_render);
    if (!render)
@@ -121,29 +55,8 @@
       return NULL;
    }
 
-   /* pack into dwords */
-   render->sample_pattern_1x = pack_sample_position(ilo_sample_pattern_1x);
-   render->sample_pattern_2x =
-      pack_sample_position(&ilo_sample_pattern_2x[1]) << 8 |
-      pack_sample_position(&ilo_sample_pattern_2x[0]);
-   for (i = 0; i < 4; i++) {
-      render->sample_pattern_4x |=
-         pack_sample_position(&ilo_sample_pattern_4x[i]) << (8 * i);
-
-      render->sample_pattern_8x[0] |=
-         pack_sample_position(&ilo_sample_pattern_8x[i]) << (8 * i);
-      render->sample_pattern_8x[1] |=
-         pack_sample_position(&ilo_sample_pattern_8x[i + 4]) << (8 * i);
-
-      render->sample_pattern_16x[0] |=
-         pack_sample_position(&ilo_sample_pattern_16x[i]) << (8 * i);
-      render->sample_pattern_16x[1] |=
-         pack_sample_position(&ilo_sample_pattern_16x[i + 4]) << (8 * i);
-      render->sample_pattern_16x[2] |=
-         pack_sample_position(&ilo_sample_pattern_16x[i + 8]) << (8 * i);
-      render->sample_pattern_16x[3] |=
-         pack_sample_position(&ilo_sample_pattern_16x[i + 12]) << (8 * i);
-   }
+   ilo_state_sample_pattern_init_default(&render->sample_pattern,
+         render->dev);
 
    ilo_render_invalidate_hw(render);
    ilo_render_invalidate_builder(render);
@@ -164,38 +77,13 @@
                                unsigned sample_index,
                                float *x, float *y)
 {
-   const struct sample_position *pattern;
+   uint8_t off_x, off_y;
 
-   switch (sample_count) {
-   case 1:
-      assert(sample_index < Elements(ilo_sample_pattern_1x));
-      pattern = ilo_sample_pattern_1x;
-      break;
-   case 2:
-      assert(sample_index < Elements(ilo_sample_pattern_2x));
-      pattern = ilo_sample_pattern_2x;
-      break;
-   case 4:
-      assert(sample_index < Elements(ilo_sample_pattern_4x));
-      pattern = ilo_sample_pattern_4x;
-      break;
-   case 8:
-      assert(sample_index < Elements(ilo_sample_pattern_8x));
-      pattern = ilo_sample_pattern_8x;
-      break;
-   case 16:
-      assert(sample_index < Elements(ilo_sample_pattern_16x));
-      pattern = ilo_sample_pattern_16x;
-      break;
-   default:
-      assert(!"unknown sample count");
-      *x = 0.5f;
-      *y = 0.5f;
-      return;
-      break;
-   }
+   ilo_state_sample_pattern_get_offset(&render->sample_pattern, render->dev,
+         sample_count, sample_index, &off_x, &off_y);
 
-   get_sample_position(&pattern[sample_index], x, y);
+   *x = (float) off_x / 16.0f;
+   *y = (float) off_y / 16.0f;
 }
 
 void
@@ -446,12 +334,44 @@
       render->instruction_bo_changed = true;
 
       session->prim_changed = true;
-      session->primitive_restart_changed = true;
+
+      ilo_state_urb_full_delta(&vec->urb, render->dev, &session->urb_delta);
+      ilo_state_vf_full_delta(&vec->ve->vf, render->dev, &session->vf_delta);
+
+      ilo_state_raster_full_delta(&vec->rasterizer->rs, render->dev,
+            &session->rs_delta);
+
+      ilo_state_viewport_full_delta(&vec->viewport.vp, render->dev,
+            &session->vp_delta);
+
+      ilo_state_cc_full_delta(&vec->blend->cc, render->dev,
+            &session->cc_delta);
    } else {
       session->prim_changed =
          (render->state.reduced_prim != session->reduced_prim);
-      session->primitive_restart_changed =
-         (render->state.primitive_restart != vec->draw->primitive_restart);
+
+      ilo_state_urb_get_delta(&vec->urb, render->dev,
+            &render->state.urb, &session->urb_delta);
+
+      if (vec->dirty & ILO_DIRTY_VE) {
+         ilo_state_vf_full_delta(&vec->ve->vf, render->dev,
+               &session->vf_delta);
+      }
+
+      if (vec->dirty & ILO_DIRTY_RASTERIZER) {
+         ilo_state_raster_get_delta(&vec->rasterizer->rs, render->dev,
+               &render->state.rs, &session->rs_delta);
+      }
+
+      if (vec->dirty & ILO_DIRTY_VIEWPORT) {
+         ilo_state_viewport_full_delta(&vec->viewport.vp, render->dev,
+               &session->vp_delta);
+      }
+
+      if (vec->dirty & ILO_DIRTY_BLEND) {
+         ilo_state_cc_get_delta(&vec->blend->cc, render->dev,
+               &render->state.cc, &session->cc_delta);
+      }
    }
 }
 
@@ -467,7 +387,10 @@
    render->instruction_bo_changed = false;
 
    render->state.reduced_prim = session->reduced_prim;
-   render->state.primitive_restart = vec->draw->primitive_restart;
+
+   render->state.urb = vec->urb;
+   render->state.rs = vec->rasterizer->rs;
+   render->state.cc = vec->blend->cc;
 }
 
 void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_dynamic.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_dynamic.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_dynamic.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_dynamic.c	2015-09-16 14:36:09.000000000 +0000
@@ -30,6 +30,7 @@
 
 #include "ilo_common.h"
 #include "ilo_blitter.h"
+#include "ilo_shader.h"
 #include "ilo_state.h"
 #include "ilo_render_gen.h"
 
@@ -42,16 +43,14 @@
 {
    ILO_DEV_ASSERT(r->dev, 6, 6);
 
-   /* SF_VIEWPORT, CLIP_VIEWPORT, and CC_VIEWPORT */
-   if (DIRTY(VIEWPORT)) {
+   /* CLIP_VIEWPORT, SF_VIEWPORT, and CC_VIEWPORT */
+   if ((session->vp_delta.dirty & (ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT |
+                                   ILO_STATE_VIEWPORT_CC_VIEWPORT)) ||
+       r->state_bo_changed) {
       r->state.CLIP_VIEWPORT = gen6_CLIP_VIEWPORT(r->builder,
-            vec->viewport.cso, vec->viewport.count);
-
-      r->state.SF_VIEWPORT = gen6_SF_VIEWPORT(r->builder,
-            vec->viewport.cso, vec->viewport.count);
-
-      r->state.CC_VIEWPORT = gen6_CC_VIEWPORT(r->builder,
-            vec->viewport.cso, vec->viewport.count);
+            &vec->viewport.vp);
+      r->state.SF_VIEWPORT = gen6_SF_VIEWPORT(r->builder, &vec->viewport.vp);
+      r->state.CC_VIEWPORT = gen6_CC_VIEWPORT(r->builder, &vec->viewport.vp);
 
       session->viewport_changed = true;
    }
@@ -65,12 +64,12 @@
    ILO_DEV_ASSERT(r->dev, 7, 8);
 
    /* SF_CLIP_VIEWPORT and CC_VIEWPORT */
-   if (DIRTY(VIEWPORT)) {
+   if ((session->vp_delta.dirty & (ILO_STATE_VIEWPORT_SF_CLIP_VIEWPORT |
+                                   ILO_STATE_VIEWPORT_CC_VIEWPORT)) ||
+       r->state_bo_changed) {
       r->state.SF_CLIP_VIEWPORT = gen7_SF_CLIP_VIEWPORT(r->builder,
-            vec->viewport.cso, vec->viewport.count);
-
-      r->state.CC_VIEWPORT = gen6_CC_VIEWPORT(r->builder,
-            vec->viewport.cso, vec->viewport.count);
+            &vec->viewport.vp);
+      r->state.CC_VIEWPORT = gen6_CC_VIEWPORT(r->builder, &vec->viewport.vp);
 
       session->viewport_changed = true;
    }
@@ -84,10 +83,10 @@
    ILO_DEV_ASSERT(r->dev, 6, 8);
 
    /* SCISSOR_RECT */
-   if (DIRTY(SCISSOR) || DIRTY(VIEWPORT)) {
-      /* there should be as many scissors as there are viewports */
+   if ((session->vp_delta.dirty & ILO_STATE_VIEWPORT_SCISSOR_RECT) ||
+       r->state_bo_changed) {
       r->state.SCISSOR_RECT = gen6_SCISSOR_RECT(r->builder,
-            &vec->scissor, vec->viewport.count);
+            &vec->viewport.vp);
 
       session->scissor_changed = true;
    }
@@ -101,32 +100,30 @@
    ILO_DEV_ASSERT(r->dev, 6, 8);
 
    /* BLEND_STATE */
-   if (DIRTY(BLEND) || DIRTY(FB) || DIRTY(DSA)) {
-      if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) {
-         r->state.BLEND_STATE = gen8_BLEND_STATE(r->builder,
-               vec->blend, &vec->fb, vec->dsa);
-      } else {
-         r->state.BLEND_STATE = gen6_BLEND_STATE(r->builder,
-               vec->blend, &vec->fb, vec->dsa);
-      }
+   if ((session->cc_delta.dirty & ILO_STATE_CC_BLEND_STATE) ||
+        r->state_bo_changed) {
+      if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
+         r->state.BLEND_STATE = gen8_BLEND_STATE(r->builder, &vec->blend->cc);
+      else
+         r->state.BLEND_STATE = gen6_BLEND_STATE(r->builder, &vec->blend->cc);
 
       session->blend_changed = true;
    }
 
    /* COLOR_CALC_STATE */
-   if (DIRTY(DSA) || DIRTY(STENCIL_REF) || DIRTY(BLEND_COLOR)) {
+   if ((session->cc_delta.dirty & ILO_STATE_CC_COLOR_CALC_STATE) ||
+       r->state_bo_changed) {
       r->state.COLOR_CALC_STATE =
-         gen6_COLOR_CALC_STATE(r->builder, &vec->stencil_ref,
-               vec->dsa->alpha_ref, &vec->blend_color);
-
+         gen6_COLOR_CALC_STATE(r->builder, &vec->blend->cc);
       session->cc_changed = true;
    }
 
    /* DEPTH_STENCIL_STATE */
-   if (ilo_dev_gen(r->dev) < ILO_GEN(8) && DIRTY(DSA)) {
+   if (ilo_dev_gen(r->dev) < ILO_GEN(8) &&
+       ((session->cc_delta.dirty & ILO_STATE_CC_DEPTH_STENCIL_STATE) ||
+        r->state_bo_changed)) {
       r->state.DEPTH_STENCIL_STATE =
-         gen6_DEPTH_STENCIL_STATE(r->builder, vec->dsa);
-
+         gen6_DEPTH_STENCIL_STATE(r->builder, &vec->blend->cc);
       session->dsa_changed = true;
    }
 }
@@ -137,12 +134,11 @@
                                 int shader_type,
                                 struct ilo_render_draw_session *session)
 {
-   const struct ilo_sampler_cso * const *samplers =
-      vec->sampler[shader_type].cso;
-   const struct pipe_sampler_view * const *views =
-      (const struct pipe_sampler_view **) vec->view[shader_type].states;
+   const struct ilo_view_cso * const *views =
+      (const struct ilo_view_cso **) vec->view[shader_type].states;
+   struct ilo_state_sampler samplers[ILO_MAX_SAMPLERS];
    uint32_t *sampler_state, *border_color_state;
-   int sampler_count;
+   int sampler_count, i;
    bool emit_border_color = false;
    bool skip = false;
 
@@ -194,16 +190,28 @@
           sampler_count <= Elements(vec->sampler[shader_type].cso));
 
    if (emit_border_color) {
-      int i;
-
       for (i = 0; i < sampler_count; i++) {
-         border_color_state[i] = (samplers[i]) ?
-            gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, samplers[i]) : 0;
+         const struct ilo_sampler_cso *cso = vec->sampler[shader_type].cso[i];
+
+         border_color_state[i] = (cso) ?
+            gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, &cso->border) : 0;
+      }
+   }
+
+   for (i = 0; i < sampler_count; i++) {
+      const struct ilo_sampler_cso *cso = vec->sampler[shader_type].cso[i];
+
+      if (cso && views[i]) {
+         samplers[i] = cso->sampler;
+         ilo_state_sampler_set_surface(&samplers[i],
+               r->dev, &views[i]->surface);
+      } else {
+         samplers[i] = vec->disabled_sampler;
       }
    }
 
-   *sampler_state = gen6_SAMPLER_STATE(r->builder,
-         samplers, views, border_color_state, sampler_count);
+   *sampler_state = gen6_SAMPLER_STATE(r->builder, samplers,
+         border_color_state, sampler_count);
 }
 
 static void
@@ -234,13 +242,13 @@
             const struct ilo_cbuf_state *cbuf =
                &vec->cbuf[PIPE_SHADER_VERTEX];
 
-            if (cbuf0_size <= cbuf->cso[0].user_buffer_size) {
+            if (cbuf0_size <= cbuf->cso[0].info.size) {
                memcpy(pcb, cbuf->cso[0].user_buffer, cbuf0_size);
             } else {
                memcpy(pcb, cbuf->cso[0].user_buffer,
-                     cbuf->cso[0].user_buffer_size);
-               memset(pcb + cbuf->cso[0].user_buffer_size, 0,
-                     cbuf0_size - cbuf->cso[0].user_buffer_size);
+                     cbuf->cso[0].info.size);
+               memset(pcb + cbuf->cso[0].info.size, 0,
+                     cbuf0_size - cbuf->cso[0].info.size);
             }
 
             pcb += cbuf0_size;
@@ -271,13 +279,13 @@
             gen6_push_constant_buffer(r->builder, cbuf0_size, &pcb);
          r->state.wm.PUSH_CONSTANT_BUFFER_size = cbuf0_size;
 
-         if (cbuf0_size <= cbuf->cso[0].user_buffer_size) {
+         if (cbuf0_size <= cbuf->cso[0].info.size) {
             memcpy(pcb, cbuf->cso[0].user_buffer, cbuf0_size);
          } else {
             memcpy(pcb, cbuf->cso[0].user_buffer,
-                  cbuf->cso[0].user_buffer_size);
-            memset(pcb + cbuf->cso[0].user_buffer_size, 0,
-                  cbuf0_size - cbuf->cso[0].user_buffer_size);
+                  cbuf->cso[0].info.size);
+            memset(pcb + cbuf->cso[0].info.size, 0,
+                  cbuf0_size - cbuf->cso[0].info.size);
          }
 
          session->pcb_fs_changed = true;
@@ -441,18 +449,17 @@
 
    if (blitter->uses & ILO_BLITTER_USE_DSA) {
       render->state.DEPTH_STENCIL_STATE =
-         gen6_DEPTH_STENCIL_STATE(render->builder, &blitter->dsa);
+         gen6_DEPTH_STENCIL_STATE(render->builder, &blitter->cc);
    }
 
    if (blitter->uses & ILO_BLITTER_USE_CC) {
       render->state.COLOR_CALC_STATE =
-         gen6_COLOR_CALC_STATE(render->builder, &blitter->cc.stencil_ref,
-               blitter->cc.alpha_ref, &blitter->cc.blend_color);
+         gen6_COLOR_CALC_STATE(render->builder, &blitter->cc);
    }
 
    if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) {
       render->state.CC_VIEWPORT =
-         gen6_CC_VIEWPORT(render->builder, &blitter->viewport, 1);
+         gen6_CC_VIEWPORT(render->builder, &blitter->vp);
    }
 
    assert(ilo_builder_dynamic_used(render->builder) <= dynamic_used +
@@ -466,10 +473,9 @@
 {
    const unsigned shader_type = PIPE_SHADER_COMPUTE;
    const struct ilo_shader_state *cs = vec->cs;
-   const struct ilo_sampler_cso * const *samplers =
-      vec->sampler[shader_type].cso;
-   const struct pipe_sampler_view * const *views =
-      (const struct pipe_sampler_view **) vec->view[shader_type].states;
+   const struct ilo_view_cso * const *views =
+      (const struct ilo_view_cso **) vec->view[shader_type].states;
+   struct ilo_state_sampler samplers[ILO_MAX_SAMPLERS];
    int sampler_count, i;
 
    ILO_DEV_ASSERT(r->dev, 7, 7.5);
@@ -480,11 +486,25 @@
           sampler_count <= Elements(vec->sampler[shader_type].cso));
 
    for (i = 0; i < sampler_count; i++) {
-      r->state.cs.SAMPLER_BORDER_COLOR_STATE[i] = (samplers[i]) ?
-         gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, samplers[i]) : 0;
+      const struct ilo_sampler_cso *cso = vec->sampler[shader_type].cso[i];
+
+      r->state.cs.SAMPLER_BORDER_COLOR_STATE[i] = (cso) ?
+         gen6_SAMPLER_BORDER_COLOR_STATE(r->builder, &cso->border) : 0;
+   }
+
+   for (i = 0; i < sampler_count; i++) {
+      const struct ilo_sampler_cso *cso = vec->sampler[shader_type].cso[i];
+
+      if (cso && views[i]) {
+         samplers[i] = cso->sampler;
+         ilo_state_sampler_set_surface(&samplers[i],
+               r->dev, &views[i]->surface);
+      } else {
+         samplers[i] = vec->disabled_sampler;
+      }
    }
 
-   r->state.cs.SAMPLER_STATE = gen6_SAMPLER_STATE(r->builder, samplers, views,
+   r->state.cs.SAMPLER_STATE = gen6_SAMPLER_STATE(r->builder, samplers,
          r->state.cs.SAMPLER_BORDER_COLOR_STATE, sampler_count);
 }
 
@@ -503,20 +523,39 @@
                                    struct ilo_render_launch_grid_session *session)
 {
    const struct ilo_shader_state *cs = vec->cs;
-   struct gen6_idrt_data data;
+   struct ilo_state_compute_interface_info interface;
+   struct ilo_state_compute_info info;
+   uint32_t kernel_offset;
 
    ILO_DEV_ASSERT(r->dev, 7, 7.5);
 
-   memset(&data, 0, sizeof(data));
-
-   data.cs = cs;
-   data.sampler_offset = r->state.cs.SAMPLER_STATE;
-   data.binding_table_offset = r->state.cs.BINDING_TABLE_STATE;
+   memset(&interface, 0, sizeof(interface));
 
-   data.curbe_size = r->state.cs.PUSH_CONSTANT_BUFFER_size;
-   data.thread_group_size = session->thread_group_size;
+   interface.sampler_count =
+      ilo_shader_get_kernel_param(cs, ILO_KERNEL_SAMPLER_COUNT);
+   interface.surface_count =
+      ilo_shader_get_kernel_param(cs, ILO_KERNEL_SURFACE_TOTAL_COUNT);
+   interface.thread_group_size = session->thread_group_size;
+   interface.slm_size =
+      ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_LOCAL_SIZE);
+   interface.curbe_read_length = r->state.cs.PUSH_CONSTANT_BUFFER_size;
+
+   memset(&info, 0, sizeof(info));
+   info.data = session->compute_data;
+   info.data_size = sizeof(session->compute_data);
+   info.interfaces = &interface;
+   info.interface_count = 1;
+   info.cv_urb_alloc_size = r->dev->urb_size;
+   info.curbe_alloc_size = r->state.cs.PUSH_CONSTANT_BUFFER_size;
+
+   ilo_state_compute_init(&session->compute, r->dev, &info);
+
+   kernel_offset = ilo_shader_get_kernel_offset(cs);
+
+   session->idrt = gen6_INTERFACE_DESCRIPTOR_DATA(r->builder,
+         &session->compute, &kernel_offset,
+         &r->state.cs.SAMPLER_STATE, &r->state.cs.BINDING_TABLE_STATE);
 
-   session->idrt = gen6_INTERFACE_DESCRIPTOR_DATA(r->builder, &data, 1);
    session->idrt_size = 32;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_gen6.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_gen6.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_gen6.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_gen6.c	2015-09-16 14:36:09.000000000 +0000
@@ -29,11 +29,11 @@
 #include "core/ilo_builder_3d.h"
 #include "core/ilo_builder_mi.h"
 #include "core/ilo_builder_render.h"
-#include "util/u_dual_blend.h"
 #include "util/u_prim.h"
 
 #include "ilo_blitter.h"
 #include "ilo_query.h"
+#include "ilo_resource.h"
 #include "ilo_shader.h"
 #include "ilo_state.h"
 #include "ilo_render_gen.h"
@@ -330,64 +330,19 @@
                      const struct ilo_state_vector *vec,
                      struct ilo_render_draw_session *session)
 {
-   /* 3DSTATE_URB */
-   if (DIRTY(VE) || DIRTY(VS) || DIRTY(GS)) {
-      const bool gs_active = (vec->gs || (vec->vs &&
-               ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)));
-      int vs_entry_size, gs_entry_size;
-      int vs_total_size, gs_total_size;
-
-      vs_entry_size = (vec->vs) ?
-         ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
-
-      /*
-       * As indicated by 2e712e41db0c0676e9f30fc73172c0e8de8d84d4, VF and VS
-       * share VUE handles.  The VUE allocation size must be large enough to
-       * store either VF outputs (number of VERTEX_ELEMENTs) and VS outputs.
-       *
-       * I am not sure if the PRM explicitly states that VF and VS share VUE
-       * handles.  But here is a citation that implies so:
-       *
-       * From the Sandy Bridge PRM, volume 2 part 1, page 44:
-       *
-       *     "Once a FF stage that spawn threads has sufficient input to
-       *      initiate a thread, it must guarantee that it is safe to request
-       *      the thread initiation. For all these FF stages, this check is
-       *      based on :
-       *
-       *      - The availability of output URB entries:
-       *        - VS: As the input URB entries are overwritten with the
-       *          VS-generated output data, output URB availability isn't a
-       *          factor."
-       */
-      if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
-         vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
-
-      gs_entry_size = (vec->gs) ?
-         ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT) :
-         (gs_active) ? vs_entry_size : 0;
-
-      /* in bytes */
-      vs_entry_size *= sizeof(float) * 4;
-      gs_entry_size *= sizeof(float) * 4;
-      vs_total_size = r->dev->urb_size;
-
-      if (gs_active) {
-         vs_total_size /= 2;
-         gs_total_size = vs_total_size;
-      }
-      else {
-         gs_total_size = 0;
-      }
+   const bool gs_active = (vec->gs || (vec->vs &&
+            ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)));
 
-      gen6_3DSTATE_URB(r->builder, vs_total_size, gs_total_size,
-            vs_entry_size, gs_entry_size);
+   /* 3DSTATE_URB */
+   if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS |
+                                   ILO_STATE_URB_3DSTATE_URB_GS)) {
+      gen6_3DSTATE_URB(r->builder, &vec->urb);
 
       if (r->state.gs.active && !gs_active)
          gen6_wa_post_3dstate_urb_no_gs(r);
-
-      r->state.gs.active = gs_active;
    }
+
+   r->state.gs.active = gs_active;
 }
 
 static void
@@ -459,33 +414,30 @@
 {
    if (ilo_dev_gen(r->dev) >= ILO_GEN(7.5)) {
       /* 3DSTATE_INDEX_BUFFER */
-      if (DIRTY(IB) || r->batch_bo_changed) {
-         gen6_3DSTATE_INDEX_BUFFER(r->builder,
-               &vec->ib, false);
-      }
+      if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) ||
+          DIRTY(IB) || r->batch_bo_changed)
+         gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib.ib);
 
       /* 3DSTATE_VF */
-      if (session->primitive_restart_changed) {
-         gen75_3DSTATE_VF(r->builder, vec->draw->primitive_restart,
-               vec->draw->restart_index);
-      }
-   }
-   else {
+      if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF)
+         gen75_3DSTATE_VF(r->builder, &vec->ve->vf);
+   } else {
       /* 3DSTATE_INDEX_BUFFER */
-      if (DIRTY(IB) || session->primitive_restart_changed ||
-          r->batch_bo_changed) {
-         gen6_3DSTATE_INDEX_BUFFER(r->builder,
-               &vec->ib, vec->draw->primitive_restart);
-      }
+      if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) ||
+          DIRTY(IB) || r->batch_bo_changed)
+         gen6_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib.ib);
    }
 
    /* 3DSTATE_VERTEX_BUFFERS */
-   if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed)
-      gen6_3DSTATE_VERTEX_BUFFERS(r->builder, vec->ve, &vec->vb);
+   if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS) ||
+       DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) {
+      gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->ve->vf,
+            vec->vb.vb, vec->ve->vb_count);
+   }
 
    /* 3DSTATE_VERTEX_ELEMENTS */
-   if (DIRTY(VE))
-      gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, vec->ve);
+   if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS)
+      gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &vec->ve->vf);
 }
 
 void
@@ -516,10 +468,17 @@
 
    /* 3DSTATE_VS */
    if (DIRTY(VS) || r->instruction_bo_changed) {
+      const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs);
+      const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs);
+
       if (ilo_dev_gen(r->dev) == ILO_GEN(6))
          gen6_wa_pre_3dstate_vs_toggle(r);
 
-      gen6_3DSTATE_VS(r->builder, vec->vs);
+      if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
+          ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))
+         gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset);
+      else
+         gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
    }
 }
 
@@ -535,14 +494,39 @@
    /* 3DSTATE_GS */
    if (DIRTY(GS) || DIRTY(VS) ||
        session->prim_changed || r->instruction_bo_changed) {
+      const union ilo_shader_cso *cso;
+      uint32_t kernel_offset;
+
       if (vec->gs) {
-         gen6_3DSTATE_GS(r->builder, vec->gs);
-      } else if (vec->vs &&
+         cso = ilo_shader_get_kernel_cso(vec->gs);
+         kernel_offset = ilo_shader_get_kernel_offset(vec->gs);
+
+         gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset);
+      } else if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
             ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
-         const int verts_per_prim = u_vertices_per_prim(session->reduced_prim);
-         gen6_so_3DSTATE_GS(r->builder, vec->vs, verts_per_prim);
+         const int verts_per_prim =
+            u_vertices_per_prim(session->reduced_prim);
+         enum ilo_kernel_param param;
+
+         switch (verts_per_prim) {
+         case 1:
+            param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
+            break;
+         case 2:
+            param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
+            break;
+         default:
+            param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
+            break;
+         }
+
+         cso = ilo_shader_get_kernel_cso(vec->vs);
+         kernel_offset = ilo_shader_get_kernel_offset(vec->vs) +
+            ilo_shader_get_kernel_param(vec->vs, param);
+
+         gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset);
       } else {
-         gen6_disable_3DSTATE_GS(r->builder);
+         gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0);
       }
    }
 }
@@ -633,30 +617,8 @@
                struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_CLIP */
-   if (DIRTY(RASTERIZER) || DIRTY(FS) || DIRTY(VIEWPORT) || DIRTY(FB)) {
-      bool enable_guardband = true;
-      unsigned i;
-
-      /*
-       * Gen8+ has viewport extent test.  Guard band test can be enabled on
-       * prior Gens only when the viewport is larger than the framebuffer,
-       * unless we emulate viewport extent test on them.
-       */
-      if (ilo_dev_gen(r->dev) < ILO_GEN(8)) {
-         for (i = 0; i < vec->viewport.count; i++) {
-            const struct ilo_viewport_cso *vp = &vec->viewport.cso[i];
-
-            if (vp->min_x > 0.0f || vp->max_x < vec->fb.state.width ||
-                vp->min_y > 0.0f || vp->max_y < vec->fb.state.height) {
-               enable_guardband = false;
-               break;
-            }
-         }
-      }
-
-      gen6_3DSTATE_CLIP(r->builder, vec->rasterizer,
-            vec->fs, enable_guardband, 1);
-   }
+   if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_CLIP)
+      gen6_3DSTATE_CLIP(r->builder, &vec->rasterizer->rs);
 }
 
 static void
@@ -665,9 +627,9 @@
              struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_SF */
-   if (DIRTY(RASTERIZER) || DIRTY(FS) || DIRTY(FB)) {
-      gen6_3DSTATE_SF(r->builder, vec->rasterizer, vec->fs,
-            vec->fb.num_samples);
+   if ((session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SF) || DIRTY(FS)) {
+      const struct ilo_state_sbe *sbe = ilo_shader_get_kernel_sbe(vec->fs);
+      gen6_3DSTATE_SF(r->builder, &vec->rasterizer->rs, sbe);
    }
 }
 
@@ -700,17 +662,17 @@
    }
 
    /* 3DSTATE_WM */
-   if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) ||
-       DIRTY(RASTERIZER) || r->instruction_bo_changed) {
-      const bool dual_blend = vec->blend->dual_blend;
-      const bool cc_may_kill = (vec->dsa->dw_blend_alpha ||
-                                vec->blend->alpha_to_coverage);
+   if (DIRTY(FS) ||
+       (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM) ||
+       r->instruction_bo_changed) {
+      const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs);
+      const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs);
 
       if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed)
          gen6_wa_pre_3dstate_wm_max_threads(r);
 
-      gen6_3DSTATE_WM(r->builder, vec->fs,
-            vec->rasterizer, dual_blend, cc_may_kill);
+      gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs,
+            &cso->ps, kernel_offset);
    }
 }
 
@@ -719,25 +681,23 @@
                          const struct ilo_state_vector *vec,
                          struct ilo_render_draw_session *session)
 {
-   /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */
-   if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
-      const uint32_t *pattern;
-
-      pattern = (vec->fb.num_samples > 1) ?
-         &r->sample_pattern_4x : &r->sample_pattern_1x;
+   /* 3DSTATE_MULTISAMPLE */
+   if (DIRTY(FB) || (session->rs_delta.dirty &
+            ILO_STATE_RASTER_3DSTATE_MULTISAMPLE)) {
+      const uint8_t sample_count = (vec->fb.num_samples > 1) ? 4 : 1;
 
       if (ilo_dev_gen(r->dev) == ILO_GEN(6)) {
          gen6_wa_pre_non_pipelined(r);
          gen6_wa_pre_3dstate_multisample(r);
       }
 
-      gen6_3DSTATE_MULTISAMPLE(r->builder,
-            vec->fb.num_samples, pattern,
-            vec->rasterizer->state.half_pixel_center);
-
-      gen6_3DSTATE_SAMPLE_MASK(r->builder,
-            (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1);
+      gen6_3DSTATE_MULTISAMPLE(r->builder, &vec->rasterizer->rs,
+            &r->sample_pattern, sample_count);
    }
+
+   /* 3DSTATE_SAMPLE_MASK */
+   if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK)
+      gen6_3DSTATE_SAMPLE_MASK(r->builder, &vec->rasterizer->rs);
 }
 
 static void
@@ -747,7 +707,7 @@
 {
    /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
    if (DIRTY(FB) || r->batch_bo_changed) {
-      const struct ilo_zs_surface *zs;
+      const struct ilo_state_zs *zs;
       uint32_t clear_params;
 
       if (vec->fb.state.zsbuf) {
@@ -772,7 +732,7 @@
          gen6_wa_pre_depth(r);
       }
 
-      gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs, false);
+      gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs);
       gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, zs);
       gen6_3DSTATE_STENCIL_BUFFER(r->builder, zs);
       gen6_3DSTATE_CLEAR_PARAMS(r->builder, clear_params);
@@ -790,10 +750,8 @@
       if (ilo_dev_gen(r->dev) == ILO_GEN(6))
          gen6_wa_pre_non_pipelined(r);
 
-      gen6_3DSTATE_POLY_STIPPLE_PATTERN(r->builder,
-            &vec->poly_stipple);
-
-      gen6_3DSTATE_POLY_STIPPLE_OFFSET(r->builder, 0, 0);
+      gen6_3DSTATE_POLY_STIPPLE_PATTERN(r->builder, &vec->poly_stipple);
+      gen6_3DSTATE_POLY_STIPPLE_OFFSET(r->builder, &vec->poly_stipple);
    }
 
    /* 3DSTATE_LINE_STIPPLE */
@@ -801,17 +759,16 @@
       if (ilo_dev_gen(r->dev) == ILO_GEN(6))
          gen6_wa_pre_non_pipelined(r);
 
-      gen6_3DSTATE_LINE_STIPPLE(r->builder,
-            vec->rasterizer->state.line_stipple_pattern,
-            vec->rasterizer->state.line_stipple_factor + 1);
+      gen6_3DSTATE_LINE_STIPPLE(r->builder, &vec->line_stipple);
    }
 
    /* 3DSTATE_AA_LINE_PARAMETERS */
-   if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_smooth) {
+   if (session->rs_delta.dirty &
+         ILO_STATE_RASTER_3DSTATE_AA_LINE_PARAMETERS) {
       if (ilo_dev_gen(r->dev) == ILO_GEN(6))
          gen6_wa_pre_non_pipelined(r);
 
-      gen6_3DSTATE_AA_LINE_PARAMETERS(r->builder);
+      gen6_3DSTATE_AA_LINE_PARAMETERS(r->builder, &vec->rasterizer->rs);
    }
 }
 
@@ -849,7 +806,7 @@
    gen6_draw_sf_rect(render, vec, session);
    gen6_draw_vf(render, vec, session);
 
-   ilo_render_3dprimitive(render, vec->draw, &vec->ib);
+   ilo_render_3dprimitive(render, &vec->draw_info);
 }
 
 static void
@@ -860,40 +817,23 @@
    gen6_wa_post_3dstate_constant_vs(r);
 
    gen6_wa_pre_3dstate_vs_toggle(r);
-   gen6_disable_3DSTATE_VS(r->builder);
+   gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
 
    gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
-   gen6_disable_3DSTATE_GS(r->builder);
+   gen6_3DSTATE_GS(r->builder, &blitter->gs, 0);
 
-   gen6_disable_3DSTATE_CLIP(r->builder);
-   gen6_3DSTATE_SF(r->builder, NULL, NULL, blitter->fb.num_samples);
+   gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
+   gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe);
 }
 
 static void
 gen6_rectlist_wm(struct ilo_render *r,
                  const struct ilo_blitter *blitter)
 {
-   uint32_t hiz_op;
-
-   switch (blitter->op) {
-   case ILO_BLITTER_RECTLIST_CLEAR_ZS:
-      hiz_op = GEN6_WM_DW4_DEPTH_CLEAR;
-      break;
-   case ILO_BLITTER_RECTLIST_RESOLVE_Z:
-      hiz_op = GEN6_WM_DW4_DEPTH_RESOLVE;
-      break;
-   case ILO_BLITTER_RECTLIST_RESOLVE_HIZ:
-      hiz_op = GEN6_WM_DW4_HIZ_RESOLVE;
-      break;
-   default:
-      hiz_op = 0;
-      break;
-   }
-
    gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
 
    gen6_wa_pre_3dstate_wm_max_threads(r);
-   gen6_hiz_3DSTATE_WM(r->builder, hiz_op);
+   gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0);
 }
 
 static void
@@ -903,10 +843,8 @@
    gen6_wa_pre_depth(r);
 
    if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
-                        ILO_BLITTER_USE_FB_STENCIL)) {
-      gen6_3DSTATE_DEPTH_BUFFER(r->builder,
-            &blitter->fb.dst.u.zs, true);
-   }
+                        ILO_BLITTER_USE_FB_STENCIL))
+      gen6_3DSTATE_DEPTH_BUFFER(r->builder, &blitter->fb.dst.u.zs);
 
    if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
       gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder,
@@ -926,16 +864,12 @@
 gen6_rectlist_wm_multisample(struct ilo_render *r,
                              const struct ilo_blitter *blitter)
 {
-   const uint32_t *pattern = (blitter->fb.num_samples > 1) ?
-      &r->sample_pattern_4x : &r->sample_pattern_1x;
+   const uint8_t sample_count = (blitter->fb.num_samples > 1) ? 4 : 1;
 
    gen6_wa_pre_3dstate_multisample(r);
 
-   gen6_3DSTATE_MULTISAMPLE(r->builder, blitter->fb.num_samples,
-         pattern, true);
-
-   gen6_3DSTATE_SAMPLE_MASK(r->builder,
-         (1 << blitter->fb.num_samples) - 1);
+   gen6_3DSTATE_MULTISAMPLE(r->builder, &blitter->fb.rs, &r->sample_pattern, sample_count);
+   gen6_3DSTATE_SAMPLE_MASK(r->builder, &blitter->fb.rs);
 }
 
 int
@@ -964,11 +898,9 @@
          session->vb_start, session->vb_end,
          sizeof(blitter->vertices[0]));
 
-   gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve);
+   gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->vf);
 
-   gen6_3DSTATE_URB(r->builder, r->dev->urb_size, 0,
-         (blitter->ve.count + blitter->ve.prepend_nosrc_cso) * 4 * sizeof(float),
-         0);
+   gen6_3DSTATE_URB(r->builder, &blitter->urb);
 
    if (r->state.gs.active) {
       gen6_wa_post_3dstate_urb_no_gs(r);
@@ -994,7 +926,7 @@
    gen6_3DSTATE_DRAWING_RECTANGLE(r->builder, 0, 0,
          blitter->fb.width, blitter->fb.height);
 
-   ilo_render_3dprimitive(r, &blitter->draw, NULL);
+   ilo_render_3dprimitive(r, &blitter->draw_info);
 }
 
 int
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_gen7.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_gen7.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_gen7.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_gen7.c	2015-09-16 14:36:09.000000000 +0000
@@ -28,9 +28,9 @@
 #include "genhw/genhw.h"
 #include "core/ilo_builder_3d.h"
 #include "core/ilo_builder_render.h"
-#include "util/u_dual_blend.h"
 
 #include "ilo_blitter.h"
+#include "ilo_resource.h"
 #include "ilo_shader.h"
 #include "ilo_state.h"
 #include "ilo_render_gen.h"
@@ -201,40 +201,17 @@
                      struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_URB_{VS,GS,HS,DS} */
-   if (DIRTY(VE) || DIRTY(VS)) {
-      /* the first 16KB are reserved for VS and PS PCBs */
-      const int offset =
-         (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
-          (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
-          32768 : 16384;
-      int vs_entry_size, vs_total_size;
-
-      vs_entry_size = (vec->vs) ?
-         ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
-
-      /*
-       * From the Ivy Bridge PRM, volume 2 part 1, page 35:
-       *
-       *     "Programming Restriction: As the VS URB entry serves as both the
-       *      per-vertex input and output of the VS shader, the VS URB
-       *      Allocation Size must be sized to the maximum of the vertex input
-       *      and output structures."
-       */
-      if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
-         vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
-
-      vs_entry_size *= sizeof(float) * 4;
-      vs_total_size = r->dev->urb_size - offset;
-
+   if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS |
+                                   ILO_STATE_URB_3DSTATE_URB_HS |
+                                   ILO_STATE_URB_3DSTATE_URB_DS |
+                                   ILO_STATE_URB_3DSTATE_URB_GS)) {
       if (ilo_dev_gen(r->dev) == ILO_GEN(7))
          gen7_wa_pre_vs(r);
 
-      gen7_3DSTATE_URB_VS(r->builder,
-            offset, vs_total_size, vs_entry_size);
-
-      gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
-      gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
-      gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
+      gen7_3DSTATE_URB_VS(r->builder, &vec->urb);
+      gen7_3DSTATE_URB_GS(r->builder, &vec->urb);
+      gen7_3DSTATE_URB_HS(r->builder, &vec->urb);
+      gen7_3DSTATE_URB_DS(r->builder, &vec->urb);
    }
 }
 
@@ -244,22 +221,15 @@
                            struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */
-   if (r->hw_ctx_changed) {
-      /*
-       * Push constant buffers are only allowed to take up at most the first
-       * 16KB of the URB.  Split the space evenly for VS and FS.
-       */
-      const int max_size =
-         (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
-          (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
-          32768 : 16384;
-      const int size = max_size / 2;
-      int offset = 0;
-
-      gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
-      offset += size;
-
-      gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
+   if (session->urb_delta.dirty &
+         (ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS |
+          ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS |
+          ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS |
+          ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS |
+          ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS)) {
+      gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &vec->urb);
+      gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(r->builder, &vec->urb);
+      gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &vec->urb);
 
       if (ilo_dev_gen(r->dev) == ILO_GEN(7))
          gen7_wa_post_3dstate_push_constant_alloc_ps(r);
@@ -344,14 +314,14 @@
    }
 
    /* 3DSTATE_VS */
-   if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) {
-      if (emit_3dstate_vs || DIRTY(RASTERIZER)) {
-         gen8_3DSTATE_VS(r->builder, vec->vs,
-               vec->rasterizer->state.clip_plane_enable);
-      }
-   } else {
-      if (emit_3dstate_vs)
-         gen6_3DSTATE_VS(r->builder, vec->vs);
+   if (emit_3dstate_vs) {
+      const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->vs);
+      const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs);
+
+      if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
+         gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+      else
+         gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
    }
 }
 
@@ -362,8 +332,15 @@
 {
    /* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */
    if (r->hw_ctx_changed) {
+      const struct ilo_state_hs *hs = &vec->disabled_hs;
+      const uint32_t kernel_offset = 0;
+
       gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
-      gen7_disable_3DSTATE_HS(r->builder);
+
+      if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
+         gen8_3DSTATE_HS(r->builder, hs, kernel_offset);
+      else
+         gen7_3DSTATE_HS(r->builder, hs, kernel_offset);
    }
 
    /* 3DSTATE_BINDING_TABLE_POINTERS_HS */
@@ -377,8 +354,10 @@
              struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_TE */
-   if (r->hw_ctx_changed)
-      gen7_3DSTATE_TE(r->builder);
+   if (r->hw_ctx_changed) {
+      const struct ilo_state_ds *ds = &vec->disabled_ds;
+      gen7_3DSTATE_TE(r->builder, ds);
+   }
 }
 
 void
@@ -388,8 +367,15 @@
 {
    /* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */
    if (r->hw_ctx_changed) {
+      const struct ilo_state_ds *ds = &vec->disabled_ds;
+      const uint32_t kernel_offset = 0;
+
       gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
-      gen7_disable_3DSTATE_DS(r->builder);
+
+      if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
+         gen8_3DSTATE_DS(r->builder, ds, kernel_offset);
+      else
+         gen7_3DSTATE_DS(r->builder, ds, kernel_offset);
    }
 
    /* 3DSTATE_BINDING_TABLE_POINTERS_DS */
@@ -405,8 +391,15 @@
 {
    /* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */
    if (r->hw_ctx_changed) {
+      const struct ilo_state_gs *gs = &vec->disabled_gs;
+      const uint32_t kernel_offset = 0;
+
       gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
-      gen7_disable_3DSTATE_GS(r->builder);
+
+      if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
+         gen8_3DSTATE_GS(r->builder, gs, kernel_offset);
+      else
+         gen7_3DSTATE_GS(r->builder, gs, kernel_offset);
    }
 
    /* 3DSTATE_BINDING_TABLE_POINTERS_GS */
@@ -421,7 +414,7 @@
               const struct ilo_state_vector *vec,
               struct ilo_render_draw_session *session)
 {
-   const struct pipe_stream_output_info *so_info;
+   const struct ilo_state_sol *sol;
    const struct ilo_shader_state *shader;
    bool dirty_sh = false;
 
@@ -434,41 +427,54 @@
       dirty_sh = DIRTY(VS);
    }
 
-   so_info = ilo_shader_get_kernel_so_info(shader);
+   sol = ilo_shader_get_kernel_sol(shader);
 
    /* 3DSTATE_SO_BUFFER */
    if ((DIRTY(SO) || dirty_sh || r->batch_bo_changed) &&
        vec->so.enabled) {
       int i;
 
-      for (i = 0; i < vec->so.count; i++) {
-         const int stride = so_info->stride[i] * 4; /* in bytes */
-
-         gen7_3DSTATE_SO_BUFFER(r->builder, i, stride, vec->so.states[i]);
+      for (i = 0; i < ILO_STATE_SOL_MAX_BUFFER_COUNT; i++) {
+         const struct pipe_stream_output_target *target =
+            (i < vec->so.count && vec->so.states[i]) ?
+            vec->so.states[i] : NULL;
+         const struct ilo_state_sol_buffer *sb = (target) ?
+            &((const struct ilo_stream_output_target *) target)->sb :
+            &vec->so.dummy_sb;
+
+         if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
+            gen8_3DSTATE_SO_BUFFER(r->builder, sol, sb, i);
+         else
+            gen7_3DSTATE_SO_BUFFER(r->builder, sol, sb, i);
       }
-
-      for (; i < 4; i++)
-         gen7_disable_3DSTATE_SO_BUFFER(r->builder, i);
    }
 
    /* 3DSTATE_SO_DECL_LIST */
    if (dirty_sh && vec->so.enabled)
-      gen7_3DSTATE_SO_DECL_LIST(r->builder, so_info);
+      gen7_3DSTATE_SO_DECL_LIST(r->builder, sol);
 
-   /* 3DSTATE_STREAMOUT */
-   if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) {
-      const int output_count = ilo_shader_get_kernel_param(shader,
-            ILO_KERNEL_OUTPUT_COUNT);
-      int buf_strides[4] = { 0, 0, 0, 0 };
-      int i;
-
-      for (i = 0; i < vec->so.count; i++)
-         buf_strides[i] = so_info->stride[i] * 4;
+   /*
+    * From the Ivy Bridge PRM, volume 2 part 1, page 196-197:
+    *
+    *     "Anytime the SOL unit MMIO registers or non-pipeline state are
+    *      written, the SOL unit needs to receive a pipeline state update with
+    *      SOL unit dirty state for information programmed in MMIO/NP to get
+    *      loaded into the SOL unit.
+    *
+    *      The SOL unit incorrectly double buffers MMIO/NP registers and only
+    *      moves them into the design for usage when control topology is
+    *      received with the SOL unit dirty state.
+    *
+    *      If the state does not change, need to resend the same state.
+    *
+    *      Because of corruption, software must flush the whole fixed function
+    *      pipeline when 3DSTATE_STREAMOUT changes state."
+    *
+    * The first and fourth paragraphs are gone on Gen7.5+.
+    */
 
-      gen7_3DSTATE_STREAMOUT(r->builder, 0,
-            vec->rasterizer->state.rasterizer_discard,
-            output_count, buf_strides);
-   }
+   /* 3DSTATE_STREAMOUT */
+   gen7_3DSTATE_STREAMOUT(r->builder, sol);
 }
 
 static void
@@ -477,22 +483,17 @@
              struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_SBE */
-   if (DIRTY(RASTERIZER) || DIRTY(FS)) {
-      gen7_3DSTATE_SBE(r->builder, vec->fs, (vec->rasterizer) ?
-            vec->rasterizer->state.sprite_coord_mode : 0);
+   if (DIRTY(FS)) {
+      const struct ilo_state_sbe *sbe = ilo_shader_get_kernel_sbe(vec->fs);
+      gen7_3DSTATE_SBE(r->builder, sbe);
    }
 
    /* 3DSTATE_SF */
-   if (DIRTY(RASTERIZER) || DIRTY(FB)) {
-      struct pipe_surface *zs = vec->fb.state.zsbuf;
-
+   if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SF) {
       if (ilo_dev_gen(r->dev) == ILO_GEN(7))
          gen7_wa_pre_3dstate_sf_depth_bias(r);
 
-      gen7_3DSTATE_SF(r->builder,
-            (vec->rasterizer) ? &vec->rasterizer->sf : NULL,
-            (zs) ? zs->format : PIPE_FORMAT_NONE,
-            vec->fb.num_samples);
+      gen7_3DSTATE_SF(r->builder, &vec->rasterizer->rs);
    }
 }
 
@@ -501,13 +502,12 @@
              const struct ilo_state_vector *vec,
              struct ilo_render_draw_session *session)
 {
-   /* 3DSTATE_WM */
-   if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || DIRTY(RASTERIZER)) {
-      const bool cc_may_kill = (vec->dsa->dw_blend_alpha ||
-                                vec->blend->alpha_to_coverage);
+   const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs);
+   const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs);
 
-      gen7_3DSTATE_WM(r->builder, vec->fs, vec->rasterizer, cc_may_kill);
-   }
+   /* 3DSTATE_WM */
+   if (DIRTY(FS) || (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM))
+      gen7_3DSTATE_WM(r->builder, &vec->rasterizer->rs, &cso->ps);
 
    /* 3DSTATE_BINDING_TABLE_POINTERS_PS */
    if (session->binding_table_fs_changed) {
@@ -530,13 +530,11 @@
    }
 
    /* 3DSTATE_PS */
-   if (DIRTY(FS) || DIRTY(BLEND) || r->instruction_bo_changed) {
-      const bool dual_blend = vec->blend->dual_blend;
-
+   if (DIRTY(FS) || r->instruction_bo_changed) {
       if (r->hw_ctx_changed)
          gen7_wa_pre_3dstate_ps_max_threads(r);
 
-      gen7_3DSTATE_PS(r->builder, vec->fs, dual_blend);
+      gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
    }
 
    /* 3DSTATE_SCISSOR_STATE_POINTERS */
@@ -569,7 +567,7 @@
 
    /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
    if (DIRTY(FB) || r->batch_bo_changed) {
-      const struct ilo_zs_surface *zs;
+      const struct ilo_state_zs *zs;
       uint32_t clear_params;
 
       if (vec->fb.state.zsbuf) {
@@ -588,7 +586,7 @@
          clear_params = 0;
       }
 
-      gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs, false);
+      gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs);
       gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, zs);
       gen6_3DSTATE_STENCIL_BUFFER(r->builder, zs);
       gen7_3DSTATE_CLEAR_PARAMS(r->builder, clear_params);
@@ -600,24 +598,21 @@
                          const struct ilo_state_vector *vec,
                          struct ilo_render_draw_session *session)
 {
-   /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */
-   if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
-      const uint32_t *pattern;
+   /* 3DSTATE_MULTISAMPLE */
+   if (DIRTY(FB) || (session->rs_delta.dirty &
+            ILO_STATE_RASTER_3DSTATE_MULTISAMPLE)) {
+      const uint8_t sample_count = (vec->fb.num_samples > 4) ? 8 :
+                                   (vec->fb.num_samples > 1) ? 4 : 1;
 
       gen7_wa_pre_3dstate_multisample(r);
 
-      pattern = (vec->fb.num_samples > 4) ? r->sample_pattern_8x :
-                (vec->fb.num_samples > 1) ? &r->sample_pattern_4x :
-                &r->sample_pattern_1x;
-
-      gen6_3DSTATE_MULTISAMPLE(r->builder,
-            vec->fb.num_samples, pattern,
-            vec->rasterizer->state.half_pixel_center);
-
-      gen7_3DSTATE_SAMPLE_MASK(r->builder,
-            (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1,
-            vec->fb.num_samples);
+      gen6_3DSTATE_MULTISAMPLE(r->builder, &vec->rasterizer->rs,
+            &r->sample_pattern, sample_count);
    }
+
+   /* 3DSTATE_SAMPLE_MASK */
+   if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK)
+      gen6_3DSTATE_SAMPLE_MASK(r->builder, &vec->rasterizer->rs);
 }
 
 void
@@ -654,28 +649,15 @@
    gen6_draw_sf_rect(render, vec, session);
    gen6_draw_vf(render, vec, session);
 
-   ilo_render_3dprimitive(render, vec->draw, &vec->ib);
+   ilo_render_3dprimitive(render, &vec->draw_info);
 }
 
 static void
 gen7_rectlist_pcb_alloc(struct ilo_render *r,
                         const struct ilo_blitter *blitter)
 {
-   /*
-    * Push constant buffers are only allowed to take up at most the first
-    * 16KB of the URB.  Split the space evenly for VS and FS.
-    */
-   const int max_size =
-      (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
-       (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
-       32768 : 16384;
-   const int size = max_size / 2;
-   int offset = 0;
-
-   gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
-   offset += size;
-
-   gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
+   gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &blitter->urb);
+   gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &blitter->urb);
 
    if (ilo_dev_gen(r->dev) == ILO_GEN(7))
       gen7_wa_post_3dstate_push_constant_alloc_ps(r);
@@ -685,19 +667,10 @@
 gen7_rectlist_urb(struct ilo_render *r,
                   const struct ilo_blitter *blitter)
 {
-   /* the first 16KB are reserved for VS and PS PCBs */
-   const int offset =
-      (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
-       (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
-       32768 : 16384;
-
-   gen7_3DSTATE_URB_VS(r->builder, offset, r->dev->urb_size - offset,
-         (blitter->ve.count + blitter->ve.prepend_nosrc_cso) *
-         4 * sizeof(float));
-
-   gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
-   gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
-   gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
+   gen7_3DSTATE_URB_VS(r->builder, &blitter->urb);
+   gen7_3DSTATE_URB_GS(r->builder, &blitter->urb);
+   gen7_3DSTATE_URB_HS(r->builder, &blitter->urb);
+   gen7_3DSTATE_URB_DS(r->builder, &blitter->urb);
 }
 
 static void
@@ -705,58 +678,40 @@
                        const struct ilo_blitter *blitter)
 {
    gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
-   gen6_disable_3DSTATE_VS(r->builder);
+   gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
 
    gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
-   gen7_disable_3DSTATE_HS(r->builder);
+   gen7_3DSTATE_HS(r->builder, &blitter->hs, 0);
 
-   gen7_3DSTATE_TE(r->builder);
+   gen7_3DSTATE_TE(r->builder, &blitter->ds);
 
    gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
-   gen7_disable_3DSTATE_DS(r->builder);
+   gen7_3DSTATE_DS(r->builder, &blitter->ds, 0);
 
    gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
-   gen7_disable_3DSTATE_GS(r->builder);
+   gen7_3DSTATE_GS(r->builder, &blitter->gs, 0);
 
-   gen7_3DSTATE_STREAMOUT(r->builder, 0, false, 0x0, 0);
+   gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol);
 
-   gen6_disable_3DSTATE_CLIP(r->builder);
+   gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
 
    if (ilo_dev_gen(r->dev) == ILO_GEN(7))
       gen7_wa_pre_3dstate_sf_depth_bias(r);
 
-   gen7_3DSTATE_SF(r->builder, NULL, blitter->fb.dst.base.format,
-         blitter->fb.num_samples);
-   gen7_3DSTATE_SBE(r->builder, NULL, 0);
+   gen7_3DSTATE_SF(r->builder, &blitter->fb.rs);
+   gen7_3DSTATE_SBE(r->builder, &blitter->sbe);
 }
 
 static void
 gen7_rectlist_wm(struct ilo_render *r,
                  const struct ilo_blitter *blitter)
 {
-   uint32_t hiz_op;
-
-   switch (blitter->op) {
-   case ILO_BLITTER_RECTLIST_CLEAR_ZS:
-      hiz_op = GEN7_WM_DW1_DEPTH_CLEAR;
-      break;
-   case ILO_BLITTER_RECTLIST_RESOLVE_Z:
-      hiz_op = GEN7_WM_DW1_DEPTH_RESOLVE;
-      break;
-   case ILO_BLITTER_RECTLIST_RESOLVE_HIZ:
-      hiz_op = GEN7_WM_DW1_HIZ_RESOLVE;
-      break;
-   default:
-      hiz_op = 0;
-      break;
-   }
-
-   gen7_hiz_3DSTATE_WM(r->builder, hiz_op);
+   gen7_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps);
 
    gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
 
    gen7_wa_pre_3dstate_ps_max_threads(r);
-   gen7_disable_3DSTATE_PS(r->builder);
+   gen7_3DSTATE_PS(r->builder, &blitter->ps, 0);
 }
 
 static void
@@ -766,10 +721,8 @@
    gen7_wa_pre_depth(r);
 
    if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
-                        ILO_BLITTER_USE_FB_STENCIL)) {
-      gen6_3DSTATE_DEPTH_BUFFER(r->builder,
-            &blitter->fb.dst.u.zs, true);
-   }
+                        ILO_BLITTER_USE_FB_STENCIL))
+      gen6_3DSTATE_DEPTH_BUFFER(r->builder, &blitter->fb.dst.u.zs);
 
    if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
       gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder,
@@ -789,18 +742,15 @@
 gen7_rectlist_wm_multisample(struct ilo_render *r,
                              const struct ilo_blitter *blitter)
 {
-   const uint32_t *pattern =
-      (blitter->fb.num_samples > 4) ? r->sample_pattern_8x :
-      (blitter->fb.num_samples > 1) ? &r->sample_pattern_4x :
-      &r->sample_pattern_1x;
+   const uint8_t sample_count = (blitter->fb.num_samples > 4) ? 8 :
+                                (blitter->fb.num_samples > 1) ? 4 : 1;
 
    gen7_wa_pre_3dstate_multisample(r);
 
-   gen6_3DSTATE_MULTISAMPLE(r->builder, blitter->fb.num_samples,
-         pattern, true);
+   gen6_3DSTATE_MULTISAMPLE(r->builder, &blitter->fb.rs,
+         &r->sample_pattern, sample_count);
 
-   gen7_3DSTATE_SAMPLE_MASK(r->builder,
-         (1 << blitter->fb.num_samples) - 1, blitter->fb.num_samples);
+   gen6_3DSTATE_SAMPLE_MASK(r->builder, &blitter->fb.rs);
 }
 
 void
@@ -818,7 +768,7 @@
          session->vb_start, session->vb_end,
          sizeof(blitter->vertices[0]));
 
-   gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve);
+   gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->vf);
 
    gen7_rectlist_pcb_alloc(r, blitter);
 
@@ -854,7 +804,7 @@
    if (ilo_dev_gen(r->dev) == ILO_GEN(7))
       gen7_wa_post_ps_and_later(r);
 
-   ilo_render_3dprimitive(r, &blitter->draw, NULL);
+   ilo_render_3dprimitive(r, &blitter->draw_info);
 }
 
 int
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_gen8.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_gen8.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_gen8.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_gen8.c	2015-09-16 14:36:09.000000000 +0000
@@ -28,9 +28,9 @@
 #include "genhw/genhw.h"
 #include "core/ilo_builder_3d.h"
 #include "core/ilo_builder_render.h"
-#include "util/u_dual_blend.h"
 
 #include "ilo_blitter.h"
+#include "ilo_resource.h"
 #include "ilo_shader.h"
 #include "ilo_state.h"
 #include "ilo_render_gen.h"
@@ -66,26 +66,20 @@
              struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_RASTER */
-   if (DIRTY(RASTERIZER)) {
-      gen8_3DSTATE_RASTER(r->builder, (vec->rasterizer) ?
-            &vec->rasterizer->sf : NULL);
-   }
+   if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_RASTER)
+      gen8_3DSTATE_RASTER(r->builder, &vec->rasterizer->rs);
 
-   /* 3DSTATE_SBE */
-   if (DIRTY(RASTERIZER) || DIRTY(FS)) {
-      gen8_3DSTATE_SBE(r->builder, vec->fs, (vec->rasterizer) ?
-            vec->rasterizer->state.sprite_coord_mode : 0);
-   }
+   /* 3DSTATE_SBE and 3DSTATE_SBE_SWIZ */
+   if (DIRTY(FS)) {
+      const struct ilo_state_sbe *sbe = ilo_shader_get_kernel_sbe(vec->fs);
 
-   /* 3DSTATE_SBE_SWIZ */
-   if (DIRTY(FS))
-      gen8_3DSTATE_SBE_SWIZ(r->builder, vec->fs);
+      gen8_3DSTATE_SBE(r->builder, sbe);
+      gen8_3DSTATE_SBE_SWIZ(r->builder, sbe);
+   }
 
    /* 3DSTATE_SF */
-   if (DIRTY(RASTERIZER)) {
-      gen8_3DSTATE_SF(r->builder, (vec->rasterizer) ?
-            &vec->rasterizer->sf : NULL);
-   }
+   if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SF)
+      gen7_3DSTATE_SF(r->builder, &vec->rasterizer->rs);
 }
 
 static void
@@ -93,12 +87,15 @@
              const struct ilo_state_vector *vec,
              struct ilo_render_draw_session *session)
 {
+   const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs);
+   const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs);
+
    /* 3DSTATE_WM */
-   if (DIRTY(FS) || DIRTY(RASTERIZER))
-      gen8_3DSTATE_WM(r->builder, vec->fs, vec->rasterizer);
+   if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM)
+      gen8_3DSTATE_WM(r->builder, &vec->rasterizer->rs);
 
-   if (DIRTY(DSA))
-      gen8_3DSTATE_WM_DEPTH_STENCIL(r->builder, vec->dsa);
+   if (session->cc_delta.dirty & ILO_STATE_CC_3DSTATE_WM_DEPTH_STENCIL)
+      gen8_3DSTATE_WM_DEPTH_STENCIL(r->builder, &vec->blend->cc);
 
    /* 3DSTATE_WM_HZ_OP and 3DSTATE_WM_CHROMAKEY */
    if (r->hw_ctx_changed) {
@@ -128,18 +125,15 @@
 
    /* 3DSTATE_PS */
    if (DIRTY(FS) || r->instruction_bo_changed)
-      gen8_3DSTATE_PS(r->builder, vec->fs);
+      gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
 
    /* 3DSTATE_PS_EXTRA */
-   if (DIRTY(FS) || DIRTY(DSA) || DIRTY(BLEND)) {
-      const bool cc_may_kill = (vec->dsa->dw_blend_alpha ||
-                                vec->blend->alpha_to_coverage);
-      gen8_3DSTATE_PS_EXTRA(r->builder, vec->fs, cc_may_kill, false);
-   }
+   if (DIRTY(FS))
+      gen8_3DSTATE_PS_EXTRA(r->builder, &cso->ps);
 
    /* 3DSTATE_PS_BLEND */
-   if (DIRTY(BLEND) || DIRTY(FB) || DIRTY(DSA))
-      gen8_3DSTATE_PS_BLEND(r->builder, vec->blend, &vec->fb, vec->dsa);
+   if (session->cc_delta.dirty & ILO_STATE_CC_3DSTATE_PS_BLEND)
+      gen8_3DSTATE_PS_BLEND(r->builder, &vec->blend->cc);
 
    /* 3DSTATE_SCISSOR_STATE_POINTERS */
    if (session->scissor_changed) {
@@ -149,7 +143,7 @@
 
    /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
    if (DIRTY(FB) || r->batch_bo_changed) {
-      const struct ilo_zs_surface *zs;
+      const struct ilo_state_zs *zs;
       uint32_t clear_params;
 
       if (vec->fb.state.zsbuf) {
@@ -170,7 +164,7 @@
 
       gen8_wa_pre_depth(r);
 
-      gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs, false);
+      gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs);
       gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, zs);
       gen6_3DSTATE_STENCIL_BUFFER(r->builder, zs);
       gen7_3DSTATE_CLEAR_PARAMS(r->builder, clear_params);
@@ -183,14 +177,8 @@
                             struct ilo_render_draw_session *session)
 {
    /* 3DSTATE_SAMPLE_PATTERN */
-   if (r->hw_ctx_changed) {
-      gen8_3DSTATE_SAMPLE_PATTERN(r->builder,
-            &r->sample_pattern_1x,
-            &r->sample_pattern_2x,
-            &r->sample_pattern_4x,
-            r->sample_pattern_8x,
-            r->sample_pattern_16x);
-   }
+   if (r->hw_ctx_changed)
+      gen8_3DSTATE_SAMPLE_PATTERN(r->builder, &r->sample_pattern);
 }
 
 static void
@@ -198,15 +186,13 @@
                          const struct ilo_state_vector *vec,
                          struct ilo_render_draw_session *session)
 {
-   /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */
-   if (DIRTY(SAMPLE_MASK) || DIRTY(FB) || DIRTY(RASTERIZER)) {
-      gen8_3DSTATE_MULTISAMPLE(r->builder, vec->fb.num_samples,
-            vec->rasterizer->state.half_pixel_center);
-
-      gen7_3DSTATE_SAMPLE_MASK(r->builder,
-            (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1,
-            vec->fb.num_samples);
-   }
+   /* 3DSTATE_MULTISAMPLE */
+   if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_MULTISAMPLE)
+      gen8_3DSTATE_MULTISAMPLE(r->builder, &vec->rasterizer->rs);
+
+   /* 3DSTATE_SAMPLE_MASK */
+   if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_SAMPLE_MASK)
+      gen6_3DSTATE_SAMPLE_MASK(r->builder, &vec->rasterizer->rs);
 }
 
 static void
@@ -214,36 +200,38 @@
              const struct ilo_state_vector *vec,
              struct ilo_render_draw_session *session)
 {
-   int i;
-
    /* 3DSTATE_INDEX_BUFFER */
-   if (DIRTY(IB) || r->batch_bo_changed)
-      gen8_3DSTATE_INDEX_BUFFER(r->builder, &vec->ib);
+   if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_INDEX_BUFFER) ||
+       DIRTY(IB) || r->batch_bo_changed)
+      gen8_3DSTATE_INDEX_BUFFER(r->builder, &vec->ve->vf, &vec->ib.ib);
 
    /* 3DSTATE_VF */
-   if (session->primitive_restart_changed) {
-      gen75_3DSTATE_VF(r->builder, vec->draw->primitive_restart,
-            vec->draw->restart_index);
-   }
+   if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF)
+      gen75_3DSTATE_VF(r->builder, &vec->ve->vf);
 
    /* 3DSTATE_VERTEX_BUFFERS */
-   if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed)
-      gen6_3DSTATE_VERTEX_BUFFERS(r->builder, vec->ve, &vec->vb);
+   if ((session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_BUFFERS) ||
+       DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed) {
+      gen6_3DSTATE_VERTEX_BUFFERS(r->builder, &vec->ve->vf,
+            vec->vb.vb, vec->ve->vb_count);
+   }
 
    /* 3DSTATE_VERTEX_ELEMENTS */
-   if (DIRTY(VE))
-      gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, vec->ve);
+   if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VERTEX_ELEMENTS)
+      gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &vec->ve->vf);
+
+   gen8_3DSTATE_VF_TOPOLOGY(r->builder, vec->draw_info.topology);
 
-   gen8_3DSTATE_VF_TOPOLOGY(r->builder, vec->draw->mode);
+   if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF_INSTANCING) {
+      const uint8_t attr_count = ilo_state_vf_get_attr_count(&vec->ve->vf);
+      uint8_t i;
 
-   for (i = 0; i < vec->ve->vb_count; i++) {
-      gen8_3DSTATE_VF_INSTANCING(r->builder, i,
-            vec->ve->instance_divisors[i]);
+      for (i = 0; i < attr_count; i++)
+         gen8_3DSTATE_VF_INSTANCING(r->builder, &vec->ve->vf, i);
    }
 
-   gen8_3DSTATE_VF_SGVS(r->builder,
-         false, 0, 0,
-         false, 0, 0);
+   if (session->vf_delta.dirty & ILO_STATE_VF_3DSTATE_VF_SGVS)
+      gen8_3DSTATE_VF_SGVS(r->builder, &vec->ve->vf);
 }
 
 void
@@ -281,7 +269,7 @@
    gen6_draw_sf_rect(render, vec, session);
    gen8_draw_vf(render, vec, session);
 
-   ilo_render_3dprimitive(render, vec->draw, &vec->ib);
+   ilo_render_3dprimitive(render, &vec->draw_info);
 }
 
 int
@@ -365,17 +353,13 @@
                                        const struct ilo_blitter *blitter,
                                        const struct ilo_render_rectlist_session *session)
 {
-   uint32_t op;
-
    ILO_DEV_ASSERT(r->dev, 8, 8);
 
    gen8_wa_pre_depth(r);
 
    if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
-                        ILO_BLITTER_USE_FB_STENCIL)) {
-      gen6_3DSTATE_DEPTH_BUFFER(r->builder,
-            &blitter->fb.dst.u.zs, true);
-   }
+                        ILO_BLITTER_USE_FB_STENCIL))
+      gen6_3DSTATE_DEPTH_BUFFER(r->builder, &blitter->fb.dst.u.zs);
 
    if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
       gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder,
@@ -393,27 +377,8 @@
    gen6_3DSTATE_DRAWING_RECTANGLE(r->builder, 0, 0,
          blitter->fb.width, blitter->fb.height);
 
-   switch (blitter->op) {
-   case ILO_BLITTER_RECTLIST_CLEAR_ZS:
-      op = 0;
-      if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH)
-         op |= GEN8_WM_HZ_DW1_DEPTH_CLEAR;
-      if (blitter->uses & ILO_BLITTER_USE_FB_STENCIL)
-         op |= GEN8_WM_HZ_DW1_STENCIL_CLEAR;
-      break;
-   case ILO_BLITTER_RECTLIST_RESOLVE_Z:
-      op = GEN8_WM_HZ_DW1_DEPTH_RESOLVE;
-      break;
-   case ILO_BLITTER_RECTLIST_RESOLVE_HIZ:
-      op = GEN8_WM_HZ_DW1_HIZ_RESOLVE;
-      break;
-   default:
-      op = 0;
-      break;
-   }
-
-   gen8_3DSTATE_WM_HZ_OP(r->builder, op, blitter->fb.width,
-         blitter->fb.height, blitter->fb.num_samples);
+   gen8_3DSTATE_WM_HZ_OP(r->builder, &blitter->fb.rs,
+         blitter->fb.width, blitter->fb.height);
 
    ilo_render_pipe_control(r, GEN6_PIPE_CONTROL_WRITE_IMM);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_gen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_gen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_gen.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_gen.h	2015-09-16 14:36:09.000000000 +0000
@@ -31,6 +31,7 @@
 #include "core/ilo_builder.h"
 #include "core/ilo_builder_3d.h"
 #include "core/ilo_builder_render.h"
+#include "core/ilo_state_raster.h"
 
 #include "ilo_common.h"
 #include "ilo_state.h"
@@ -50,11 +51,7 @@
 
    struct intel_bo *workaround_bo;
 
-   uint32_t sample_pattern_1x;
-   uint32_t sample_pattern_2x;
-   uint32_t sample_pattern_4x;
-   uint32_t sample_pattern_8x[2];
-   uint32_t sample_pattern_16x[4];
+   struct ilo_state_sample_pattern sample_pattern;
 
    bool hw_ctx_changed;
 
@@ -85,10 +82,13 @@
        */
       uint32_t deferred_pipe_control_dw1;
 
-      bool primitive_restart;
       int reduced_prim;
       int so_max_vertices;
 
+      struct ilo_state_urb urb;
+      struct ilo_state_raster rs;
+      struct ilo_state_cc cc;
+
       uint32_t SF_VIEWPORT;
       uint32_t CLIP_VIEWPORT;
       uint32_t SF_CLIP_VIEWPORT; /* GEN7+ */
@@ -142,7 +142,12 @@
    int reduced_prim;
 
    bool prim_changed;
-   bool primitive_restart_changed;
+
+   struct ilo_state_urb_delta urb_delta;
+   struct ilo_state_vf_delta vf_delta;
+   struct ilo_state_raster_delta rs_delta;
+   struct ilo_state_viewport_delta vp_delta;
+   struct ilo_state_cc_delta cc_delta;
 
    /* dynamic states */
    bool viewport_changed;
@@ -180,6 +185,9 @@
 
    uint32_t idrt;
    int idrt_size;
+
+   uint32_t compute_data[6];
+   struct ilo_state_compute compute;
 };
 
 int
@@ -381,8 +389,7 @@
  */
 static inline void
 ilo_render_3dprimitive(struct ilo_render *r,
-                       const struct pipe_draw_info *info,
-                       const struct ilo_ib_state *ib)
+                       const struct gen6_3dprimitive_info *info)
 {
    ILO_DEV_ASSERT(r->dev, 6, 8);
 
@@ -391,9 +398,9 @@
 
    /* 3DPRIMITIVE */
    if (ilo_dev_gen(r->dev) >= ILO_GEN(7))
-      gen7_3DPRIMITIVE(r->builder, info, ib);
+      gen7_3DPRIMITIVE(r->builder, info);
    else
-      gen6_3DPRIMITIVE(r->builder, info, ib);
+      gen6_3DPRIMITIVE(r->builder, info);
 
    r->state.current_pipe_control_dw1 = 0;
    assert(!r->state.deferred_pipe_control_dw1);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render.h	2015-09-16 14:36:09.000000000 +0000
@@ -43,9 +43,6 @@
 void
 ilo_render_destroy(struct ilo_render *render);
 
-/**
- * Estimate the size of an action.
- */
 void
 ilo_render_get_sample_position(const struct ilo_render *render,
                                unsigned sample_count,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_media.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_media.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_media.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_media.c	2015-09-16 14:36:09.000000000 +0000
@@ -30,6 +30,7 @@
 #include "core/ilo_builder_mi.h"
 #include "core/ilo_builder_render.h"
 
+#include "ilo_shader.h"
 #include "ilo_state.h"
 #include "ilo_render_gen.h"
 
@@ -206,7 +207,7 @@
 
    gen6_state_base_address(render->builder, true);
 
-   gen6_MEDIA_VFE_STATE(render->builder, pcb_size, use_slm);
+   gen6_MEDIA_VFE_STATE(render->builder, &session->compute);
 
    if (pcb_size)
       gen6_MEDIA_CURBE_LOAD(render->builder, pcb, pcb_size);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_surface.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_surface.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_render_surface.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_render_surface.c	2015-09-16 14:36:09.000000000 +0000
@@ -29,11 +29,65 @@
 
 #include "ilo_common.h"
 #include "ilo_blitter.h"
+#include "ilo_resource.h"
+#include "ilo_shader.h"
 #include "ilo_state.h"
 #include "ilo_render_gen.h"
 
 #define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
 
+static inline uint32_t
+gen6_so_SURFACE_STATE(struct ilo_builder *builder,
+                      const struct pipe_stream_output_target *so,
+                      const struct pipe_stream_output_info *so_info,
+                      int so_index)
+{
+   struct ilo_state_surface_buffer_info info;
+   struct ilo_state_surface surf;
+
+   ILO_DEV_ASSERT(builder->dev, 6, 6);
+
+   memset(&info, 0, sizeof(info));
+
+   info.vma = ilo_resource_get_vma(so->buffer);
+   info.offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
+   info.size = so->buffer_size - so_info->output[so_index].dst_offset * 4;
+
+   info.access = ILO_STATE_SURFACE_ACCESS_DP_SVB;
+
+   switch (so_info->output[so_index].num_components) {
+   case 1:
+      info.format = GEN6_FORMAT_R32_FLOAT;
+      info.format_size = 4;
+      break;
+   case 2:
+      info.format = GEN6_FORMAT_R32G32_FLOAT;
+      info.format_size = 8;
+      break;
+   case 3:
+      info.format = GEN6_FORMAT_R32G32B32_FLOAT;
+      info.format_size = 12;
+      break;
+   case 4:
+      info.format = GEN6_FORMAT_R32G32B32A32_FLOAT;
+      info.format_size = 16;
+      break;
+   default:
+      assert(!"unexpected SO components length");
+      info.format = GEN6_FORMAT_R32_FLOAT;
+      info.format_size = 4;
+      break;
+   }
+
+   info.struct_size =
+      so_info->stride[so_info->output[so_index].output_buffer] * 4;
+
+   memset(&surf, 0, sizeof(surf));
+   ilo_state_surface_init_for_buffer(&surf, builder->dev, &info);
+
+   return gen6_SURFACE_STATE(builder, &surf);
+}
+
 static void
 gen6_emit_draw_surface_rt(struct ilo_render *r,
                           const struct ilo_state_vector *vec,
@@ -64,11 +118,9 @@
             (const struct ilo_surface_cso *) fb->state.cbufs[i];
 
          assert(surface->is_rt);
-         surface_state[i] =
-            gen6_SURFACE_STATE(r->builder, &surface->u.rt, true);
+         surface_state[i] = gen6_SURFACE_STATE(r->builder, &surface->u.rt);
       } else {
-         surface_state[i] =
-            gen6_SURFACE_STATE(r->builder, &fb->null_rt, true);
+         surface_state[i] = gen6_SURFACE_STATE(r->builder, &fb->null_rt);
       }
    }
 }
@@ -173,8 +225,7 @@
          const struct ilo_view_cso *cso =
             (const struct ilo_view_cso *) view->states[i];
 
-         surface_state[i] =
-            gen6_SURFACE_STATE(r->builder, &cso->surface, false);
+         surface_state[i] = gen6_SURFACE_STATE(r->builder, &cso->surface);
       } else {
          surface_state[i] = 0;
       }
@@ -228,12 +279,10 @@
    for (i = 0; i < count; i++) {
       const struct ilo_cbuf_cso *cso = &cbuf->cso[i];
 
-      if (cso->resource) {
-         surface_state[i] = gen6_SURFACE_STATE(r->builder,
-               &cso->surface, false);
-      } else {
+      if (cso->resource)
+         surface_state[i] = gen6_SURFACE_STATE(r->builder, &cso->surface);
+      else
          surface_state[i] = 0;
-      }
    }
 }
 
@@ -406,8 +455,7 @@
          const struct ilo_view_cso *cso =
             (const struct ilo_view_cso *) view->states[i];
 
-         surface_state[i] =
-            gen6_SURFACE_STATE(r->builder, &cso->surface, false);
+         surface_state[i] = gen6_SURFACE_STATE(r->builder, &cso->surface);
       } else {
          surface_state[i] = 0;
       }
@@ -421,7 +469,8 @@
 {
    const struct ilo_shader_state *cs = vec->cs;
    uint32_t *surface_state = r->state.cs.SURFACE_STATE;
-   struct ilo_view_surface view;
+   struct ilo_state_surface_buffer_info info;
+   struct ilo_state_surface surf;
    int base, count;
 
    ILO_DEV_ASSERT(r->dev, 7, 7.5);
@@ -432,15 +481,23 @@
    if (!count)
       return;
 
-   ilo_gpe_init_view_surface_for_buffer(r->dev,
-         ilo_buffer(session->input->buffer),
-         session->input->buffer_offset,
-         session->input->buffer_size,
-         1, PIPE_FORMAT_NONE,
-         false, false, &view);
+   memset(&info, 0, sizeof(info));
+
+   info.vma = ilo_resource_get_vma(session->input->buffer);
+   info.offset = session->input->buffer_offset;
+   info.size = session->input->buffer_size;
+
+   info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED;
+   info.format = GEN6_FORMAT_RAW;
+   info.format_size = 1;
+   info.struct_size = 1;
+   info.readonly = true;
+
+   memset(&surf, 0, sizeof(surf));
+   ilo_state_surface_init_for_buffer(&surf, r->dev, &info);
 
    assert(count == 1 && session->input->buffer);
-   surface_state[base] = gen6_SURFACE_STATE(r->builder, &view, false);
+   surface_state[base] = gen6_SURFACE_STATE(r->builder, &surf);
 }
 
 static void
@@ -482,15 +539,25 @@
    surface_state += base;
    for (i = 0; i < count; i++) {
       if (i < vec->global_binding.count && bindings[i].resource) {
-         const struct ilo_buffer *buf = ilo_buffer(bindings[i].resource);
-         struct ilo_view_surface view;
+         struct ilo_state_surface_buffer_info info;
+         struct ilo_state_surface surf;
 
          assert(bindings[i].resource->target == PIPE_BUFFER);
 
-         ilo_gpe_init_view_surface_for_buffer(r->dev, buf, 0, buf->bo_size,
-               1, PIPE_FORMAT_NONE, true, true, &view);
-         surface_state[i] =
-            gen6_SURFACE_STATE(r->builder, &view, true);
+         memset(&info, 0, sizeof(info));
+
+         info.vma = ilo_resource_get_vma(bindings[i].resource);
+         info.size = info.vma->vm_size;
+
+         info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED;
+         info.format = GEN6_FORMAT_RAW;
+         info.format_size = 1;
+         info.struct_size = 1;
+
+         memset(&surf, 0, sizeof(surf));
+         ilo_state_surface_init_for_buffer(&surf, r->dev, &info);
+
+         surface_state[i] = gen6_SURFACE_STATE(r->builder, &surf);
       } else {
          surface_state[i] = 0;
       }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_resource.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_resource.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_resource.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_resource.c	2015-09-16 14:36:09.000000000 +0000
@@ -25,7 +25,12 @@
  *    Chia-I Wu <olv@lunarg.com>
  */
 
+#include "core/ilo_state_vf.h"
+#include "core/ilo_state_sol.h"
+#include "core/ilo_state_surface.h"
+
 #include "ilo_screen.h"
+#include "ilo_format.h"
 #include "ilo_resource.h"
 
 /*
@@ -83,6 +88,134 @@
                           PIPE_BIND_STREAM_OUTPUT)) ? false : true;
 }
 
+static enum gen_surface_type
+get_surface_type(enum pipe_texture_target target)
+{
+   switch (target) {
+   case PIPE_TEXTURE_1D:
+   case PIPE_TEXTURE_1D_ARRAY:
+      return GEN6_SURFTYPE_1D;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_RECT:
+   case PIPE_TEXTURE_2D_ARRAY:
+      return GEN6_SURFTYPE_2D;
+   case PIPE_TEXTURE_3D:
+      return GEN6_SURFTYPE_3D;
+   case PIPE_TEXTURE_CUBE:
+   case PIPE_TEXTURE_CUBE_ARRAY:
+      return GEN6_SURFTYPE_CUBE;
+   default:
+      assert(!"unknown texture target");
+      return GEN6_SURFTYPE_NULL;
+   }
+}
+
+static enum pipe_format
+resource_get_image_format(const struct pipe_resource *templ,
+                          const struct ilo_dev *dev,
+                          bool *separate_stencil_ret)
+{
+   enum pipe_format format = templ->format;
+   bool separate_stencil;
+
+   /* silently promote ETC1 */
+   if (templ->format == PIPE_FORMAT_ETC1_RGB8)
+      format = PIPE_FORMAT_R8G8B8X8_UNORM;
+
+   /* separate stencil buffers */
+   separate_stencil = false;
+   if ((templ->bind & PIPE_BIND_DEPTH_STENCIL) &&
+       util_format_is_depth_and_stencil(templ->format)) {
+      switch (templ->format) {
+      case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+         /* Gen6 requires HiZ to be available for all levels */
+         if (ilo_dev_gen(dev) >= ILO_GEN(7) || templ->last_level == 0) {
+            format = PIPE_FORMAT_Z32_FLOAT;
+            separate_stencil = true;
+         }
+         break;
+      case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+         format = PIPE_FORMAT_Z24X8_UNORM;
+         separate_stencil = true;
+         break;
+      default:
+         break;
+      }
+   }
+
+   if (separate_stencil_ret)
+      *separate_stencil_ret = separate_stencil;
+
+   return format;
+}
+
+static inline enum gen_surface_format
+pipe_to_surface_format(const struct ilo_dev *dev, enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+      return GEN6_FORMAT_R32_FLOAT_X8X24_TYPELESS;
+   case PIPE_FORMAT_Z32_FLOAT:
+      return GEN6_FORMAT_R32_FLOAT;
+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+   case PIPE_FORMAT_Z24X8_UNORM:
+      return GEN6_FORMAT_R24_UNORM_X8_TYPELESS;
+   case PIPE_FORMAT_Z16_UNORM:
+      return GEN6_FORMAT_R16_UNORM;
+   case PIPE_FORMAT_S8_UINT:
+      return GEN6_FORMAT_R8_UINT;
+   default:
+      return ilo_format_translate_color(dev, format);
+   }
+}
+
+static void
+resource_get_image_info(const struct pipe_resource *templ,
+                        const struct ilo_dev *dev,
+                        enum pipe_format image_format,
+                        struct ilo_image_info *info)
+{
+   memset(info, 0, sizeof(*info));
+
+   info->type = get_surface_type(templ->target);
+
+   info->format = pipe_to_surface_format(dev, image_format);
+   info->interleaved_stencil = util_format_is_depth_and_stencil(image_format);
+   info->is_integer = util_format_is_pure_integer(image_format);
+   info->compressed = util_format_is_compressed(image_format);
+   info->block_width = util_format_get_blockwidth(image_format);
+   info->block_height = util_format_get_blockheight(image_format);
+   info->block_size = util_format_get_blocksize(image_format);
+
+   info->width = templ->width0;
+   info->height = templ->height0;
+   info->depth = templ->depth0;
+   info->array_size = templ->array_size;
+   info->level_count = templ->last_level + 1;
+   info->sample_count = (templ->nr_samples) ? templ->nr_samples : 1;
+
+   info->aux_disable = (templ->usage == PIPE_USAGE_STAGING);
+
+   if (templ->bind & PIPE_BIND_LINEAR)
+      info->valid_tilings = 1 << GEN6_TILING_NONE;
+
+   /*
+    * Tiled images must be mapped via GTT to get a linear view.  Prefer linear
+    * images when the image size is greater than one-fourth of the mappable
+    * aperture.
+    */
+   if (templ->bind & (PIPE_BIND_TRANSFER_WRITE | PIPE_BIND_TRANSFER_READ))
+      info->prefer_linear_threshold = dev->aperture_mappable / 4;
+
+   info->bind_surface_sampler = (templ->bind & PIPE_BIND_SAMPLER_VIEW);
+   info->bind_surface_dp_render = (templ->bind & PIPE_BIND_RENDER_TARGET);
+   info->bind_surface_dp_typed = (templ->bind &
+         (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_COMPUTE_RESOURCE));
+   info->bind_zs = (templ->bind & PIPE_BIND_DEPTH_STENCIL);
+   info->bind_scanout = (templ->bind & PIPE_BIND_SCANOUT);
+   info->bind_cursor = (templ->bind & PIPE_BIND_CURSOR);
+}
+
 static enum gen_surface_tiling
 winsys_to_surface_tiling(enum intel_tiling_mode tiling)
 {
@@ -178,8 +311,8 @@
    if (!bo)
       return false;
 
-   ilo_image_set_bo(&tex->image, bo);
-   intel_bo_unref(bo);
+   intel_bo_unref(tex->vma.bo);
+   ilo_vma_set_bo(&tex->vma, &is->dev, bo, 0);
 
    return true;
 }
@@ -206,7 +339,7 @@
 
    tex->separate_s8 = ilo_texture(s8);
 
-   assert(tex->separate_s8->image.format == PIPE_FORMAT_S8_UINT);
+   assert(tex->separate_s8->image_format == PIPE_FORMAT_S8_UINT);
 
    return true;
 }
@@ -215,15 +348,16 @@
 tex_create_hiz(struct ilo_texture *tex)
 {
    const struct pipe_resource *templ = &tex->base;
+   const uint32_t size = tex->image.aux.bo_stride * tex->image.aux.bo_height;
    struct ilo_screen *is = ilo_screen(tex->base.screen);
    struct intel_bo *bo;
 
-   bo = intel_winsys_alloc_bo(is->dev.winsys, "hiz texture",
-         tex->image.aux.bo_stride * tex->image.aux.bo_height, false);
+   bo = intel_winsys_alloc_bo(is->dev.winsys, "hiz texture", size, false);
    if (!bo)
       return false;
 
-   ilo_image_set_aux_bo(&tex->image, bo);
+   ilo_vma_init(&tex->aux_vma, &is->dev, size, 4096);
+   ilo_vma_set_bo(&tex->aux_vma, &is->dev, bo, 0);
 
    if (tex->imported) {
       unsigned lv;
@@ -246,17 +380,18 @@
 static bool
 tex_create_mcs(struct ilo_texture *tex)
 {
+   const uint32_t size = tex->image.aux.bo_stride * tex->image.aux.bo_height;
    struct ilo_screen *is = ilo_screen(tex->base.screen);
    struct intel_bo *bo;
 
    assert(tex->image.aux.enables == (1 << (tex->base.last_level + 1)) - 1);
 
-   bo = intel_winsys_alloc_bo(is->dev.winsys, "mcs texture",
-         tex->image.aux.bo_stride * tex->image.aux.bo_height, false);
+   bo = intel_winsys_alloc_bo(is->dev.winsys, "mcs texture", size, false);
    if (!bo)
       return false;
 
-   ilo_image_set_aux_bo(&tex->image, bo);
+   ilo_vma_init(&tex->aux_vma, &is->dev, size, 4096);
+   ilo_vma_set_bo(&tex->aux_vma, &is->dev, bo, 0);
 
    return true;
 }
@@ -267,7 +402,8 @@
    if (tex->separate_s8)
       tex_destroy(tex->separate_s8);
 
-   ilo_image_cleanup(&tex->image);
+   intel_bo_unref(tex->vma.bo);
+   intel_bo_unref(tex->aux_vma.bo);
 
    tex_free_slices(tex);
    FREE(tex);
@@ -276,23 +412,13 @@
 static bool
 tex_alloc_bos(struct ilo_texture *tex)
 {
-   struct ilo_screen *is = ilo_screen(tex->base.screen);
-
    if (!tex->imported && !tex_create_bo(tex))
       return false;
 
-   /* allocate separate stencil resource */
-   if (tex->image.separate_stencil && !tex_create_separate_stencil(tex))
-      return false;
-
    switch (tex->image.aux.type) {
    case ILO_IMAGE_AUX_HIZ:
-      if (!tex_create_hiz(tex)) {
-         /* Separate Stencil Buffer requires HiZ to be enabled */
-         if (ilo_dev_gen(&is->dev) == ILO_GEN(6) &&
-             tex->image.separate_stencil)
-            return false;
-      }
+      if (!tex_create_hiz(tex))
+         return false;
       break;
    case ILO_IMAGE_AUX_MCS:
       if (!tex_create_mcs(tex))
@@ -305,9 +431,10 @@
    return true;
 }
 
-static bool
+static struct intel_bo *
 tex_import_handle(struct ilo_texture *tex,
-                  const struct winsys_handle *handle)
+                  const struct winsys_handle *handle,
+                  struct ilo_image_info *info)
 {
    struct ilo_screen *is = ilo_screen(tex->base.screen);
    const struct pipe_resource *templ = &tex->base;
@@ -318,46 +445,94 @@
 
    bo = intel_winsys_import_handle(is->dev.winsys, name, handle,
          tex->image.bo_height, &tiling, &pitch);
-   if (!bo)
-      return false;
+   /* modify image info */
+   if (bo) {
+      const uint8_t valid_tilings = 1 << winsys_to_surface_tiling(tiling);
 
-   if (!ilo_image_init_for_imported(&tex->image, &is->dev, templ,
-            winsys_to_surface_tiling(tiling), pitch)) {
-      ilo_err("failed to import handle for texture\n");
-      intel_bo_unref(bo);
-      return false;
-   }
+      if (info->valid_tilings && !(info->valid_tilings & valid_tilings)) {
+         intel_bo_unref(bo);
+         return NULL;
+      }
 
-   ilo_image_set_bo(&tex->image, bo);
-   intel_bo_unref(bo);
+      info->valid_tilings = valid_tilings;
+      info->force_bo_stride = pitch;
 
-   tex->imported = true;
+      /* assume imported RTs are also scanouts */
+      if (!info->bind_scanout)
+         info->bind_scanout = (templ->usage & PIPE_BIND_RENDER_TARGET);
+   }
 
-   return true;
+   return bo;
 }
 
 static bool
 tex_init_image(struct ilo_texture *tex,
-               const struct winsys_handle *handle)
+               const struct winsys_handle *handle,
+               bool *separate_stencil)
 {
    struct ilo_screen *is = ilo_screen(tex->base.screen);
    const struct pipe_resource *templ = &tex->base;
    struct ilo_image *img = &tex->image;
+   struct intel_bo *imported_bo = NULL;;
+   struct ilo_image_info info;
+
+   tex->image_format = resource_get_image_format(templ,
+         &is->dev, separate_stencil);
+   resource_get_image_info(templ, &is->dev, tex->image_format, &info);
 
    if (handle) {
-      if (!tex_import_handle(tex, handle))
+      imported_bo = tex_import_handle(tex, handle, &info);
+      if (!imported_bo)
          return false;
-   } else {
-      ilo_image_init(img, &is->dev, templ);
    }
 
-   if (img->bo_height > ilo_max_resource_size / img->bo_stride)
+   if (!ilo_image_init(img, &is->dev, &info)) {
+      intel_bo_unref(imported_bo);
       return false;
+   }
+
+   /*
+    * HiZ requires 8x4 alignment and some levels might need HiZ disabled.  It
+    * is generally fine except on Gen6, where HiZ and separate stencil must be
+    * enabled together.  For PIPE_FORMAT_Z24X8_UNORM with separate stencil, we
+    * can live with stencil values being interleaved for levels where HiZ is
+    * disabled.  But it is not the case for PIPE_FORMAT_Z32_FLOAT with
+    * separate stencil.  If HiZ was disabled for a level, we had to change the
+    * format to PIPE_FORMAT_Z32_FLOAT_S8X24_UINT for the level and that format
+    * had a different bpp.  In other words, HiZ has to be available for all
+    * levels.
+    */
+   if (ilo_dev_gen(&is->dev) == ILO_GEN(6) &&
+       templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+       tex->image_format == PIPE_FORMAT_Z32_FLOAT &&
+       img->aux.enables != (1 << templ->last_level)) {
+      tex->image_format = templ->format;
+      info.format = pipe_to_surface_format(&is->dev, tex->image_format);
+      info.interleaved_stencil = true;
+
+      memset(img, 0, sizeof(*img));
+      if (!ilo_image_init(img, &is->dev, &info)) {
+         intel_bo_unref(imported_bo);
+         return false;
+      }
+   }
+
+   if (img->bo_height > ilo_max_resource_size / img->bo_stride ||
+       !ilo_vma_init(&tex->vma, &is->dev, img->bo_stride * img->bo_height,
+          4096)) {
+      intel_bo_unref(imported_bo);
+      return false;
+   }
+
+   if (imported_bo) {
+      ilo_vma_set_bo(&tex->vma, &is->dev, imported_bo, 0);
+      tex->imported = true;
+   }
 
    if (templ->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) {
       /* require on-the-fly tiling/untiling or format conversion */
-      if (img->tiling == GEN8_TILING_W || img->separate_stencil ||
-          img->format != templ->format)
+      if (img->tiling == GEN8_TILING_W || *separate_stencil ||
+          tex->image_format != templ->format)
          return false;
    }
 
@@ -373,6 +548,7 @@
            const struct winsys_handle *handle)
 {
    struct ilo_texture *tex;
+   bool separate_stencil;
 
    tex = CALLOC_STRUCT(ilo_texture);
    if (!tex)
@@ -382,12 +558,13 @@
    tex->base.screen = screen;
    pipe_reference_init(&tex->base.reference, 1);
 
-   if (!tex_init_image(tex, handle)) {
+   if (!tex_init_image(tex, handle, &separate_stencil)) {
       FREE(tex);
       return NULL;
    }
 
-   if (!tex_alloc_bos(tex)) {
+   if (!tex_alloc_bos(tex) ||
+       (separate_stencil && !tex_create_separate_stencil(tex))) {
       tex_destroy(tex);
       return NULL;
    }
@@ -408,7 +585,7 @@
    else
       tiling = surface_to_winsys_tiling(tex->image.tiling);
 
-   err = intel_winsys_export_handle(is->dev.winsys, tex->image.bo, tiling,
+   err = intel_winsys_export_handle(is->dev.winsys, tex->vma.bo, tiling,
          tex->image.bo_stride, tex->image.bo_height, handle);
 
    return !err;
@@ -422,13 +599,12 @@
    const bool cpu_init = resource_get_cpu_init(&buf->base);
    struct intel_bo *bo;
 
-   bo = intel_winsys_alloc_bo(is->dev.winsys, name,
-         buf->buffer.bo_size, cpu_init);
+   bo = intel_winsys_alloc_bo(is->dev.winsys, name, buf->bo_size, cpu_init);
    if (!bo)
       return false;
 
-   ilo_buffer_set_bo(&buf->buffer, bo);
-   intel_bo_unref(bo);
+   intel_bo_unref(buf->vma.bo);
+   ilo_vma_set_bo(&buf->vma, &is->dev, bo, 0);
 
    return true;
 }
@@ -436,7 +612,7 @@
 static void
 buf_destroy(struct ilo_buffer_resource *buf)
 {
-   ilo_buffer_cleanup(&buf->buffer);
+   intel_bo_unref(buf->vma.bo);
    FREE(buf);
 }
 
@@ -445,6 +621,8 @@
 {
    const struct ilo_screen *is = ilo_screen(screen);
    struct ilo_buffer_resource *buf;
+   uint32_t alignment;
+   unsigned size;
 
    buf = CALLOC_STRUCT(ilo_buffer_resource);
    if (!buf)
@@ -454,11 +632,35 @@
    buf->base.screen = screen;
    pipe_reference_init(&buf->base.reference, 1);
 
-   ilo_buffer_init(&buf->buffer, &is->dev,
-         templ->width0, templ->bind, templ->flags);
+   size = templ->width0;
+
+   /*
+    * As noted in ilo_format_translate(), we treat some 3-component formats as
+    * 4-component formats to work around hardware limitations.  Imagine the
+    * case where the vertex buffer holds a single PIPE_FORMAT_R16G16B16_FLOAT
+    * vertex, and buf->bo_size is 6.  The hardware would fail to fetch it at
+    * boundary check because the vertex buffer is expected to hold a
+    * PIPE_FORMAT_R16G16B16A16_FLOAT vertex and that takes at least 8 bytes.
+    *
+    * For the workaround to work, we should add 2 to the bo size.  But that
+    * would waste a page when the bo size is already page aligned.  Let's
+    * round it to page size for now and revisit this when needed.
+    */
+   if ((templ->bind & PIPE_BIND_VERTEX_BUFFER) &&
+       ilo_dev_gen(&is->dev) < ILO_GEN(7.5))
+      size = align(size, 4096);
+
+   if (templ->bind & PIPE_BIND_VERTEX_BUFFER)
+      size = ilo_state_vertex_buffer_size(&is->dev, size, &alignment);
+   if (templ->bind & PIPE_BIND_INDEX_BUFFER)
+      size = ilo_state_index_buffer_size(&is->dev, size, &alignment);
+   if (templ->bind & PIPE_BIND_STREAM_OUTPUT)
+      size = ilo_state_sol_buffer_size(&is->dev, size, &alignment);
+
+   buf->bo_size = size;
+   ilo_vma_init(&buf->vma, &is->dev, buf->bo_size, 4096);
 
-   if (buf->buffer.bo_size < templ->width0 ||
-       buf->buffer.bo_size > ilo_max_resource_size ||
+   if (buf->bo_size < templ->width0 || buf->bo_size > ilo_max_resource_size ||
        !buf_create_bo(buf)) {
       FREE(buf);
       return NULL;
@@ -471,13 +673,30 @@
 ilo_can_create_resource(struct pipe_screen *screen,
                         const struct pipe_resource *templ)
 {
+   struct ilo_screen *is = ilo_screen(screen);
+   enum pipe_format image_format;
+   struct ilo_image_info info;
    struct ilo_image img;
 
    if (templ->target == PIPE_BUFFER)
       return (templ->width0 <= ilo_max_resource_size);
 
+   image_format = resource_get_image_format(templ, &is->dev, NULL);
+   resource_get_image_info(templ, &is->dev, image_format, &info);
+
    memset(&img, 0, sizeof(img));
-   ilo_image_init(&img, &ilo_screen(screen)->dev, templ);
+   ilo_image_init(&img, &ilo_screen(screen)->dev, &info);
+
+   /* as in tex_init_image() */
+   if (ilo_dev_gen(&is->dev) == ILO_GEN(6) &&
+       templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+       image_format == PIPE_FORMAT_Z32_FLOAT &&
+       img.aux.enables != (1 << templ->last_level)) {
+      info.format = pipe_to_surface_format(&is->dev, templ->format);
+      info.interleaved_stencil = true;
+      memset(&img, 0, sizeof(img));
+      ilo_image_init(&img, &ilo_screen(screen)->dev, &info);
+   }
 
    return (img.bo_height <= ilo_max_resource_size / img.bo_stride);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_resource.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_resource.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_resource.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_resource.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,8 +29,8 @@
 #define ILO_RESOURCE_H
 
 #include "core/intel_winsys.h"
-#include "core/ilo_buffer.h"
 #include "core/ilo_image.h"
+#include "core/ilo_vma.h"
 
 #include "ilo_common.h"
 #include "ilo_screen.h"
@@ -92,7 +92,10 @@
 
    bool imported;
 
+   enum pipe_format image_format;
    struct ilo_image image;
+   struct ilo_vma vma;
+   struct ilo_vma aux_vma;
 
    /* XXX thread-safety */
    struct ilo_texture_slice *slices[PIPE_MAX_TEXTURE_LEVELS];
@@ -103,14 +106,15 @@
 struct ilo_buffer_resource {
    struct pipe_resource base;
 
-   struct ilo_buffer buffer;
+   uint32_t bo_size;
+   struct ilo_vma vma;
 };
 
-static inline struct ilo_buffer *
-ilo_buffer(struct pipe_resource *res)
+static inline struct ilo_buffer_resource *
+ilo_buffer_resource(struct pipe_resource *res)
 {
-   return (res && res->target == PIPE_BUFFER) ?
-      &((struct ilo_buffer_resource *) res)->buffer : NULL;
+   return (struct ilo_buffer_resource *)
+      ((res && res->target == PIPE_BUFFER) ? res : NULL);
 }
 
 static inline struct ilo_texture *
@@ -127,13 +131,14 @@
 ilo_resource_rename_bo(struct pipe_resource *res);
 
 /**
- * Return the bo of the resource.
+ * Return the VMA of the resource.
  */
-static inline struct intel_bo *
-ilo_resource_get_bo(struct pipe_resource *res)
+static inline const struct ilo_vma *
+ilo_resource_get_vma(struct pipe_resource *res)
 {
    return (res->target == PIPE_BUFFER) ?
-      ilo_buffer(res)->bo : ilo_texture(res)->image.bo;
+      &((struct ilo_buffer_resource *) res)->vma :
+      &((struct ilo_texture *) res)->vma;
 }
 
 static inline struct ilo_texture_slice *
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -31,11 +31,10 @@
 #include "vl/vl_decoder.h"
 #include "vl/vl_video_buffer.h"
 #include "genhw/genhw.h" /* for GEN6_REG_TIMESTAMP */
-#include "core/ilo_fence.h"
-#include "core/ilo_format.h"
 #include "core/intel_winsys.h"
 
 #include "ilo_context.h"
+#include "ilo_format.h"
 #include "ilo_resource.h"
 #include "ilo_transfer.h" /* for ILO_TRANSFER_MAP_BUFFER_ALIGNMENT */
 #include "ilo_public.h"
@@ -43,8 +42,7 @@
 
 struct pipe_fence_handle {
    struct pipe_reference reference;
-
-   struct ilo_fence fence;
+   struct intel_bo *seqno_bo;
 };
 
 static float
@@ -195,6 +193,7 @@
       uint32_t max_clock_frequency;
       uint32_t max_compute_units;
       uint32_t images_supported;
+      uint32_t subgroup_size;
    } val;
    const void *ptr;
    int size;
@@ -286,6 +285,13 @@
       ptr = &val.images_supported;
       size = sizeof(val.images_supported);
       break;
+   case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+      /* best case is actually SIMD32 */
+      val.subgroup_size = 16;
+
+      ptr = &val.subgroup_size;
+      size = sizeof(val.subgroup_size);
+      break;
    default:
       ptr = NULL;
       size = 0;
@@ -347,7 +353,7 @@
    case PIPE_CAP_INDEP_BLEND_FUNC:
       return true;
    case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
-      return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 2048 : 512;
+      return (ilo_dev_gen(&is->dev) >= ILO_GEN(7.5)) ? 2048 : 512;
    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
@@ -445,6 +451,8 @@
    case PIPE_CAP_TEXTURE_GATHER_SM5:
       return 0;
    case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
       return true;
    case PIPE_CAP_FAKE_SW_MSAA:
    case PIPE_CAP_TEXTURE_QUERY_LOD:
@@ -459,6 +467,8 @@
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+   case PIPE_CAP_DEPTH_BOUNDS_TEST:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -642,7 +652,7 @@
 
    STATIC_ASSERT(&((struct pipe_fence_handle *) NULL)->reference == NULL);
    if (pipe_reference(&old->reference, &fence->reference)) {
-      ilo_fence_cleanup(&old->fence);
+      intel_bo_unref(old->seqno_bo);
       FREE(old);
    }
 }
@@ -655,21 +665,18 @@
    const int64_t wait_timeout = (timeout > INT64_MAX) ? -1 : timeout;
    bool signaled;
 
-   signaled = ilo_fence_wait(&fence->fence, wait_timeout);
+   signaled = (!fence->seqno_bo ||
+         intel_bo_wait(fence->seqno_bo, wait_timeout) == 0);
+
    /* XXX not thread safe */
-   if (signaled)
-      ilo_fence_set_seq_bo(&fence->fence, NULL);
+   if (signaled && fence->seqno_bo) {
+      intel_bo_unref(fence->seqno_bo);
+      fence->seqno_bo = NULL;
+   }
 
    return signaled;
 }
 
-static boolean
-ilo_screen_fence_signalled(struct pipe_screen *screen,
-                           struct pipe_fence_handle *fence)
-{
-   return ilo_screen_fence_finish(screen, fence, 0);
-}
-
 /**
  * Create a fence for \p bo.  When \p bo is not NULL, it must be submitted
  * before waited on or checked.
@@ -677,7 +684,6 @@
 struct pipe_fence_handle *
 ilo_screen_fence_create(struct pipe_screen *screen, struct intel_bo *bo)
 {
-   struct ilo_screen *is = ilo_screen(screen);
    struct pipe_fence_handle *fence;
 
    fence = CALLOC_STRUCT(pipe_fence_handle);
@@ -686,8 +692,7 @@
 
    pipe_reference_init(&fence->reference, 1);
 
-   ilo_fence_init(&fence->fence, &is->dev);
-   ilo_fence_set_seq_bo(&fence->fence, bo);
+   fence->seqno_bo = intel_bo_ref(bo);
 
    return fence;
 }
@@ -697,7 +702,7 @@
 {
    struct ilo_screen *is = ilo_screen(screen);
 
-   ilo_dev_cleanup(&is->dev);
+   intel_winsys_destroy(is->dev.winsys);
 
    FREE(is);
 }
@@ -738,7 +743,6 @@
    is->base.flush_frontbuffer = NULL;
 
    is->base.fence_reference = ilo_screen_fence_reference;
-   is->base.fence_signalled = ilo_screen_fence_signalled;
    is->base.fence_finish = ilo_screen_fence_finish;
 
    is->base.get_driver_query_info = NULL;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_shader.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_shader.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_shader.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_shader.c	2015-09-16 14:36:09.000000000 +0000
@@ -27,7 +27,6 @@
 
 #include "genhw/genhw.h" /* for SBE setup */
 #include "core/ilo_builder.h"
-#include "core/ilo_state_3d.h"
 #include "core/intel_winsys.h"
 #include "shader/ilo_shader_internal.h"
 #include "tgsi/tgsi_parse.h"
@@ -557,39 +556,255 @@
 }
 
 static void
-copy_so_info(struct ilo_shader *sh,
-             const struct pipe_stream_output_info *so_info)
+init_shader_urb(const struct ilo_shader *kernel,
+                const struct ilo_shader_state *state,
+                struct ilo_state_shader_urb_info *urb)
+{
+   urb->cv_input_attr_count = kernel->in.count;
+   urb->read_base = 0;
+   urb->read_count = kernel->in.count;
+
+   urb->output_attr_count = kernel->out.count;
+   urb->user_cull_enables = 0x0;
+   urb->user_clip_enables = 0x0;
+}
+
+static void
+init_shader_kernel(const struct ilo_shader *kernel,
+                   const struct ilo_shader_state *state,
+                   struct ilo_state_shader_kernel_info *kern)
+{
+   kern->offset = 0;
+   kern->grf_start = kernel->in.start_grf;
+   kern->pcb_attr_count =
+      (kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16;
+   kern->scratch_size = 0;
+}
+
+static void
+init_shader_resource(const struct ilo_shader *kernel,
+                     const struct ilo_shader_state *state,
+                     struct ilo_state_shader_resource_info *resource)
+{
+   resource->sampler_count = state->info.num_samplers;
+   resource->surface_count = 0;
+   resource->has_uav = false;
+}
+
+static void
+init_vs(struct ilo_shader *kernel,
+        const struct ilo_shader_state *state)
+{
+   struct ilo_state_vs_info info;
+
+   memset(&info, 0, sizeof(info));
+
+   init_shader_urb(kernel, state, &info.urb);
+   init_shader_kernel(kernel, state, &info.kernel);
+   init_shader_resource(kernel, state, &info.resource);
+   info.dispatch_enable = true;
+   info.stats_enable = true;
+
+   if (ilo_dev_gen(state->info.dev) == ILO_GEN(6) && kernel->stream_output) {
+      struct ilo_state_gs_info gs_info;
+
+      memset(&gs_info, 0, sizeof(gs_info));
+
+      gs_info.urb.cv_input_attr_count = kernel->out.count;
+      gs_info.urb.read_count = kernel->out.count;
+      gs_info.kernel.grf_start = kernel->gs_start_grf;
+      gs_info.sol.sol_enable = true;
+      gs_info.sol.stats_enable = true;
+      gs_info.sol.render_disable = kernel->variant.u.vs.rasterizer_discard;
+      gs_info.sol.svbi_post_inc = kernel->svbi_post_inc;
+      gs_info.sol.tristrip_reorder = GEN7_REORDER_LEADING;
+      gs_info.dispatch_enable = true;
+      gs_info.stats_enable = true;
+
+      ilo_state_vs_init(&kernel->cso.vs_sol.vs, state->info.dev, &info);
+      ilo_state_gs_init(&kernel->cso.vs_sol.sol, state->info.dev, &gs_info);
+   } else {
+      ilo_state_vs_init(&kernel->cso.vs, state->info.dev, &info);
+   }
+}
+
+static void
+init_gs(struct ilo_shader *kernel,
+        const struct ilo_shader_state *state)
 {
-   unsigned i, attr;
+   const struct pipe_stream_output_info *so_info = &state->info.stream_output;
+   struct ilo_state_gs_info info;
+
+   memset(&info, 0, sizeof(info));
+
+   init_shader_urb(kernel, state, &info.urb);
+   init_shader_kernel(kernel, state, &info.kernel);
+   init_shader_resource(kernel, state, &info.resource);
+   info.dispatch_enable = true;
+   info.stats_enable = true;
 
-   if (!so_info->num_outputs)
+   if (so_info->num_outputs > 0) {
+      info.sol.sol_enable = true;
+      info.sol.stats_enable = true;
+      info.sol.render_disable = kernel->variant.u.gs.rasterizer_discard;
+      info.sol.tristrip_reorder = GEN7_REORDER_LEADING;
+   }
+
+   ilo_state_gs_init(&kernel->cso.gs, state->info.dev, &info);
+}
+
+static void
+init_ps(struct ilo_shader *kernel,
+        const struct ilo_shader_state *state)
+{
+   struct ilo_state_ps_info info;
+
+   memset(&info, 0, sizeof(info));
+
+   init_shader_kernel(kernel, state, &info.kernel_8);
+   init_shader_resource(kernel, state, &info.resource);
+
+   info.io.has_rt_write = true;
+   info.io.posoffset = GEN6_POSOFFSET_NONE;
+   info.io.attr_count = kernel->in.count;
+   info.io.use_z = kernel->in.has_pos;
+   info.io.use_w = kernel->in.has_pos;
+   info.io.use_coverage_mask = false;
+   info.io.pscdepth = (kernel->out.has_pos) ?
+      GEN7_PSCDEPTH_ON : GEN7_PSCDEPTH_OFF;
+   info.io.write_pixel_mask = kernel->has_kill;
+   info.io.write_omask = false;
+
+   info.params.sample_mask = 0x1;
+   info.params.earlyz_control_psexec = false;
+   info.params.alpha_may_kill = false;
+   info.params.dual_source_blending = false;
+   info.params.has_writeable_rt = true;
+
+   info.valid_kernels = GEN6_PS_DISPATCH_8;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 284:
+    *
+    *     "(MSDISPMODE_PERSAMPLE) This is the high-quality multisample mode
+    *      where (over and above PERPIXEL mode) the PS is run for each covered
+    *      sample. This mode is also used for "normal" non-multisample
+    *      rendering (aka 1X), given Number of Multisamples is programmed to
+    *      NUMSAMPLES_1."
+    */
+   info.per_sample_dispatch = true;
+
+   info.rt_clear_enable = false;
+   info.rt_resolve_enable = false;
+   info.cv_per_sample_interp = false;
+   info.cv_has_earlyz_op = false;
+   info.sample_count_one = true;
+   info.cv_has_depth_buffer = true;
+
+   ilo_state_ps_init(&kernel->cso.ps, state->info.dev, &info);
+
+   /* remember current parameters */
+   kernel->ps_params = info.params;
+}
+
+static void
+init_sol(struct ilo_shader *kernel,
+         const struct ilo_dev *dev,
+         const struct pipe_stream_output_info *so_info,
+         bool rasterizer_discard)
+{
+   struct ilo_state_sol_decl_info decls[4][PIPE_MAX_SO_OUTPUTS];
+   unsigned buf_offsets[PIPE_MAX_SO_BUFFERS];
+   struct ilo_state_sol_info info;
+   unsigned i;
+
+   if (!so_info->num_outputs) {
+      ilo_state_sol_init_disabled(&kernel->sol, dev, rasterizer_discard);
       return;
+   }
 
-   sh->so_info = *so_info;
+   memset(&info, 0, sizeof(info));
+   info.data = kernel->sol_data;
+   info.data_size = sizeof(kernel->sol_data);
+   info.sol_enable = true;
+   info.stats_enable = true;
+   info.tristrip_reorder = GEN7_REORDER_TRAILING;
+   info.render_disable = rasterizer_discard;
+   info.render_stream = 0;
+
+   for (i = 0; i < 4; i++) {
+      info.buffer_strides[i] = so_info->stride[i] * 4;
 
+      info.streams[i].cv_vue_attr_count = kernel->out.count;
+      info.streams[i].decls = decls[i];
+   }
+
+   memset(decls, 0, sizeof(decls));
+   memset(buf_offsets, 0, sizeof(buf_offsets));
    for (i = 0; i < so_info->num_outputs; i++) {
+      const unsigned stream = so_info->output[i].stream;
+      const unsigned buffer = so_info->output[i].output_buffer;
+      struct ilo_state_sol_decl_info *decl;
+      unsigned attr;
+
       /* figure out which attribute is sourced */
-      for (attr = 0; attr < sh->out.count; attr++) {
-         const int reg_idx = sh->out.register_indices[attr];
+      for (attr = 0; attr < kernel->out.count; attr++) {
+         const int reg_idx = kernel->out.register_indices[attr];
          if (reg_idx == so_info->output[i].register_index)
             break;
       }
-
-      if (attr < sh->out.count) {
-         sh->so_info.output[i].register_index = attr;
-      }
-      else {
+      if (attr >= kernel->out.count) {
          assert(!"stream output an undefined register");
-         sh->so_info.output[i].register_index = 0;
+         attr = 0;
+      }
+
+      if (info.streams[stream].vue_read_count < attr + 1)
+         info.streams[stream].vue_read_count = attr + 1;
+
+      /* pad with holes first */
+      while (buf_offsets[buffer] < so_info->output[i].dst_offset) {
+         int num_dwords;
+
+         num_dwords = so_info->output[i].dst_offset - buf_offsets[buffer];
+         if (num_dwords > 4)
+            num_dwords = 4;
+
+         assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream]));
+         decl = &decls[stream][info.streams[stream].decl_count];
+
+         decl->attr = 0;
+         decl->is_hole = true;
+         decl->component_base = 0;
+         decl->component_count = num_dwords;
+         decl->buffer = buffer;
+
+         info.streams[stream].decl_count++;
+         buf_offsets[buffer] += num_dwords;
       }
+      assert(buf_offsets[buffer] == so_info->output[i].dst_offset);
+
+      assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream]));
+      decl = &decls[stream][info.streams[stream].decl_count];
 
+      decl->attr = attr;
+      decl->is_hole = false;
       /* PSIZE is at W channel */
-      if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
+      if (kernel->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
          assert(so_info->output[i].start_component == 0);
          assert(so_info->output[i].num_components == 1);
-         sh->so_info.output[i].start_component = 3;
+         decl->component_base = 3;
+         decl->component_count = 1;
+      } else {
+         decl->component_base = so_info->output[i].start_component;
+         decl->component_count = so_info->output[i].num_components;
       }
+      decl->buffer = buffer;
+
+      info.streams[stream].decl_count++;
+      buf_offsets[buffer] += so_info->output[i].num_components;
    }
+
+   ilo_state_sol_init(&kernel->sol, dev, &info);
 }
 
 /**
@@ -599,17 +814,20 @@
 ilo_shader_state_add_variant(struct ilo_shader_state *state,
                              const struct ilo_shader_variant *variant)
 {
+   bool rasterizer_discard = false;
    struct ilo_shader *sh;
 
    switch (state->info.type) {
    case PIPE_SHADER_VERTEX:
       sh = ilo_shader_compile_vs(state, variant);
+      rasterizer_discard = variant->u.vs.rasterizer_discard;
       break;
    case PIPE_SHADER_FRAGMENT:
       sh = ilo_shader_compile_fs(state, variant);
       break;
    case PIPE_SHADER_GEOMETRY:
       sh = ilo_shader_compile_gs(state, variant);
+      rasterizer_discard = variant->u.gs.rasterizer_discard;
       break;
    case PIPE_SHADER_COMPUTE:
       sh = ilo_shader_compile_cs(state, variant);
@@ -625,7 +843,8 @@
 
    sh->variant = *variant;
 
-   copy_so_info(sh, &state->info.stream_output);
+   init_sol(sh, state->info.dev, &state->info.stream_output,
+         rasterizer_discard);
 
    ilo_shader_state_add_shader(state, sh);
 
@@ -665,13 +884,13 @@
    if (construct_cso) {
       switch (state->info.type) {
       case PIPE_SHADER_VERTEX:
-         ilo_gpe_init_vs_cso(state->info.dev, state, &sh->cso);
+         init_vs(sh, state);
          break;
       case PIPE_SHADER_GEOMETRY:
-         ilo_gpe_init_gs_cso(state->info.dev, state, &sh->cso);
+         init_gs(sh, state);
          break;
       case PIPE_SHADER_FRAGMENT:
-         ilo_gpe_init_fs_cso(state->info.dev, state, &sh->cso);
+         init_ps(sh, state);
          break;
       default:
          break;
@@ -789,16 +1008,33 @@
                          const struct ilo_state_vector *vec,
                          uint32_t dirty)
 {
-   const struct ilo_shader * const cur = shader->shader;
    struct ilo_shader_variant variant;
+   bool changed = false;
 
-   if (!(shader->info.non_orthogonal_states & dirty))
-      return false;
+   if (shader->info.non_orthogonal_states & dirty) {
+      const struct ilo_shader * const old = shader->shader;
+
+      ilo_shader_variant_init(&variant, &shader->info, vec);
+      ilo_shader_state_use_variant(shader, &variant);
+      changed = (shader->shader != old);
+   }
+
+   if (shader->info.type == PIPE_SHADER_FRAGMENT) {
+      struct ilo_shader *kernel = shader->shader;
 
-   ilo_shader_variant_init(&variant, &shader->info, vec);
-   ilo_shader_state_use_variant(shader, &variant);
+      if (kernel->ps_params.sample_mask != vec->sample_mask ||
+          kernel->ps_params.alpha_may_kill != vec->blend->alpha_may_kill) {
+         kernel->ps_params.sample_mask = vec->sample_mask;
+         kernel->ps_params.alpha_may_kill = vec->blend->alpha_may_kill;
 
-   return (shader->shader != cur);
+         ilo_state_ps_set_params(&kernel->cso.ps, shader->info.dev,
+               &kernel->ps_params);
+
+         changed = true;
+      }
+   }
+
+   return changed;
 }
 
 static int
@@ -829,82 +1065,104 @@
  * \return true if a different routing is selected
  */
 bool
-ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
-                                 const struct ilo_shader_state *source,
-                                 const struct ilo_rasterizer_state *rasterizer)
+ilo_shader_select_kernel_sbe(struct ilo_shader_state *shader,
+                             const struct ilo_shader_state *source,
+                             const struct ilo_rasterizer_state *rasterizer)
 {
-   const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable;
+   const bool is_point = true;
    const bool light_twoside = rasterizer->state.light_twoside;
+   const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable;
+   const int sprite_coord_mode = rasterizer->state.sprite_coord_mode;
    struct ilo_shader *kernel = shader->shader;
    struct ilo_kernel_routing *routing = &kernel->routing;
+   struct ilo_state_sbe_swizzle_info swizzles[ILO_STATE_SBE_MAX_SWIZZLE_COUNT];
+   struct ilo_state_sbe_info info;
    const int *src_semantics, *src_indices;
-   int src_len, max_src_slot;
+   int src_skip, src_len, src_slot;
    int dst_len, dst_slot;
 
-   /* we are constructing 3DSTATE_SBE here */
-   ILO_DEV_ASSERT(shader->info.dev, 6, 8);
-
    assert(kernel);
 
    if (source) {
       assert(source->shader);
+
       src_semantics = source->shader->out.semantic_names;
       src_indices = source->shader->out.semantic_indices;
       src_len = source->shader->out.count;
-   }
-   else {
+      src_skip = 0;
+
+      assert(src_len >= 2 &&
+             src_semantics[0] == TGSI_SEMANTIC_PSIZE &&
+             src_semantics[1] == TGSI_SEMANTIC_POSITION);
+
+      /*
+       * skip PSIZE and POSITION (how about the optional CLIPDISTs?), unless
+       * they are all the source shader has and FS needs to read some
+       * attributes.
+       */
+      if (src_len > 2 || !kernel->in.count) {
+         src_semantics += 2;
+         src_indices += 2;
+         src_len -= 2;
+         src_skip = 2;
+      }
+   } else {
       src_semantics = kernel->in.semantic_names;
       src_indices = kernel->in.semantic_indices;
       src_len = kernel->in.count;
+      src_skip = 0;
    }
 
    /* no change */
-   if (kernel->routing_initialized &&
-       routing->source_skip + routing->source_len <= src_len &&
-       kernel->routing_sprite_coord_enable == sprite_coord_enable &&
-       !memcmp(kernel->routing_src_semantics,
-          &src_semantics[routing->source_skip],
-          sizeof(kernel->routing_src_semantics[0]) * routing->source_len) &&
-       !memcmp(kernel->routing_src_indices,
-          &src_indices[routing->source_skip],
-          sizeof(kernel->routing_src_indices[0]) * routing->source_len))
+   if (routing->initialized &&
+       routing->is_point == is_point &&
+       routing->light_twoside == light_twoside &&
+       routing->sprite_coord_enable == sprite_coord_enable &&
+       routing->sprite_coord_mode == sprite_coord_mode &&
+       routing->src_len <= src_len &&
+       !memcmp(routing->src_semantics, src_semantics,
+          sizeof(src_semantics[0]) * routing->src_len) &&
+       !memcmp(routing->src_indices, src_indices,
+          sizeof(src_indices[0]) * routing->src_len))
       return false;
 
-   if (source) {
-      /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
-      assert(src_semantics[0] == TGSI_SEMANTIC_PSIZE);
-      assert(src_semantics[1] == TGSI_SEMANTIC_POSITION);
-      routing->source_skip = 2;
-
-      routing->source_len = src_len - routing->source_skip;
-      src_semantics += routing->source_skip;
-      src_indices += routing->source_skip;
-   }
-   else {
-      routing->source_skip = 0;
-      routing->source_len = src_len;
-   }
-
-   routing->const_interp_enable = kernel->in.const_interp_enable;
-   routing->point_sprite_enable = 0;
-   routing->swizzle_enable = false;
-
-   assert(kernel->in.count <= Elements(routing->swizzles));
-   dst_len = MIN2(kernel->in.count, Elements(routing->swizzles));
-   max_src_slot = -1;
+   routing->is_point = is_point;
+   routing->light_twoside = light_twoside;
+   routing->sprite_coord_enable = sprite_coord_enable;
+   routing->sprite_coord_mode = sprite_coord_mode;
+
+   assert(kernel->in.count <= Elements(swizzles));
+   dst_len = MIN2(kernel->in.count, Elements(swizzles));
+
+   memset(&swizzles, 0, sizeof(swizzles));
+   memset(&info, 0, sizeof(info));
+
+   info.attr_count = dst_len;
+   info.cv_vue_attr_count = src_skip + src_len;
+   info.vue_read_base = src_skip;
+   info.vue_read_count = 0;
+   info.has_min_read_count = true;
+   info.swizzle_enable = false;
+   info.swizzle_16_31 = false;
+   info.swizzle_count = 0;
+   info.swizzles = swizzles;
+   info.const_interp_enables = kernel->in.const_interp_enable;
+   info.point_sprite_enables = 0x0;
+   info.point_sprite_origin_lower_left =
+      (sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
+   info.cv_is_point = is_point;
 
    for (dst_slot = 0; dst_slot < dst_len; dst_slot++) {
       const int semantic = kernel->in.semantic_names[dst_slot];
       const int index = kernel->in.semantic_indices[dst_slot];
-      int src_slot;
 
       if (semantic == TGSI_SEMANTIC_GENERIC &&
           (sprite_coord_enable & (1 << index)))
-         routing->point_sprite_enable |= 1 << dst_slot;
+         info.point_sprite_enables |= 1 << dst_slot;
 
       if (source) {
-         src_slot = route_attr(src_semantics, src_indices,
-               routing->source_len, semantic, index);
+         src_slot = route_attr(src_semantics, src_indices, src_len,
+               semantic, index);
 
          /*
           * The source shader stage does not output this attribute.  The value
@@ -918,58 +1176,47 @@
           */
          if (src_slot < 0)
             src_slot = 0;
-      }
-      else {
+      } else {
          src_slot = dst_slot;
       }
 
-      routing->swizzles[dst_slot] = src_slot;
-
       /* use the following slot for two-sided lighting */
       if (semantic == TGSI_SEMANTIC_COLOR && light_twoside &&
-          src_slot + 1 < routing->source_len &&
+          src_slot + 1 < src_len &&
           src_semantics[src_slot + 1] == TGSI_SEMANTIC_BCOLOR &&
           src_indices[src_slot + 1] == index) {
-         routing->swizzles[dst_slot] |= GEN8_SBE_SWIZ_INPUTATTR_FACING;
+         swizzles[dst_slot].attr_select = GEN6_INPUTATTR_FACING;
+         swizzles[dst_slot].attr = src_slot;
+         info.swizzle_enable = true;
          src_slot++;
+      } else {
+         swizzles[dst_slot].attr_select = GEN6_INPUTATTR_NORMAL;
+         swizzles[dst_slot].attr = src_slot;
+         if (src_slot != dst_slot)
+            info.swizzle_enable = true;
       }
 
-      if (routing->swizzles[dst_slot] != dst_slot)
-         routing->swizzle_enable = true;
+      swizzles[dst_slot].force_zeros = false;
 
-      if (max_src_slot < src_slot)
-         max_src_slot = src_slot;
+      if (info.vue_read_count < src_slot + 1)
+         info.vue_read_count = src_slot + 1;
    }
 
-   memset(&routing->swizzles[dst_slot], 0, sizeof(routing->swizzles) -
-         sizeof(routing->swizzles[0]) * dst_slot);
+   if (info.swizzle_enable)
+      info.swizzle_count = dst_len;
 
-   /*
-    * From the Sandy Bridge PRM, volume 2 part 1, page 248:
-    *
-    *     "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
-    *      0 indicating no Vertex URB data to be read.
-    *
-    *      This field should be set to the minimum length required to read the
-    *      maximum source attribute. The maximum source attribute is indicated
-    *      by the maximum value of the enabled Attribute # Source Attribute if
-    *      Attribute Swizzle Enable is set, Number of Output Attributes-1 if
-    *      enable is not set.
-    *
-    *        read_length = ceiling((max_source_attr+1)/2)
-    *
-    *      [errata] Corruption/Hang possible if length programmed larger than
-    *      recommended"
-    */
-   routing->source_len = max_src_slot + 1;
+   if (routing->initialized)
+      ilo_state_sbe_set_info(&routing->sbe, shader->info.dev, &info);
+   else
+      ilo_state_sbe_init(&routing->sbe, shader->info.dev, &info);
+
+   routing->src_len = info.vue_read_count;
+   memcpy(routing->src_semantics, src_semantics,
+         sizeof(src_semantics[0]) * routing->src_len);
+   memcpy(routing->src_indices, src_indices,
+         sizeof(src_indices[0]) * routing->src_len);
 
-   /* remember the states of the source */
-   kernel->routing_initialized = true;
-   kernel->routing_sprite_coord_enable = sprite_coord_enable;
-   memcpy(kernel->routing_src_semantics, src_semantics,
-         sizeof(kernel->routing_src_semantics[0]) * routing->source_len);
-   memcpy(kernel->routing_src_indices, src_indices,
-         sizeof(kernel->routing_src_indices[0]) * routing->source_len);
+   routing->initialized = true;
 
    return true;
 }
@@ -1147,7 +1394,7 @@
 /**
  * Return the CSO of the selected kernel.
  */
-const struct ilo_shader_cso *
+const union ilo_shader_cso *
 ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader)
 {
    const struct ilo_shader *kernel = shader->shader;
@@ -1163,22 +1410,28 @@
 const struct pipe_stream_output_info *
 ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader)
 {
+   return &shader->info.stream_output;
+}
+
+const struct ilo_state_sol *
+ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader)
+{
    const struct ilo_shader *kernel = shader->shader;
 
    assert(kernel);
 
-   return &kernel->so_info;
+   return &kernel->sol;
 }
 
 /**
  * Return the routing info of the selected kernel.
  */
-const struct ilo_kernel_routing *
-ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader)
+const struct ilo_state_sbe *
+ilo_shader_get_kernel_sbe(const struct ilo_shader_state *shader)
 {
    const struct ilo_shader *kernel = shader->shader;
 
    assert(kernel);
 
-   return &kernel->routing;
+   return &kernel->routing.sbe;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_shader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_shader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_shader.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_shader.h	2015-09-16 14:36:09.000000000 +0000
@@ -28,6 +28,8 @@
 #ifndef ILO_SHADER_H
 #define ILO_SHADER_H
 
+#include "core/ilo_state_shader.h"
+
 #include "ilo_common.h"
 
 enum ilo_kernel_param {
@@ -81,23 +83,28 @@
    ILO_KERNEL_PARAM_COUNT,
 };
 
-struct ilo_kernel_routing {
-   uint32_t const_interp_enable;
-   uint32_t point_sprite_enable;
-   unsigned source_skip, source_len;
-
-   bool swizzle_enable;
-   uint16_t swizzles[16];
-};
-
 struct intel_bo;
 struct ilo_builder;
 struct ilo_rasterizer_state;
 struct ilo_shader_cache;
 struct ilo_shader_state;
-struct ilo_shader_cso;
+struct ilo_state_sbe;
+struct ilo_state_sol;
 struct ilo_state_vector;
 
+union ilo_shader_cso {
+   struct ilo_state_vs vs;
+   struct ilo_state_hs hs;
+   struct ilo_state_ds ds;
+   struct ilo_state_gs gs;
+   struct ilo_state_ps ps;
+
+   struct {
+      struct ilo_state_vs vs;
+      struct ilo_state_gs sol;
+   } vs_sol;
+};
+
 struct ilo_shader_cache *
 ilo_shader_cache_create(void);
 
@@ -151,9 +158,9 @@
                          uint32_t dirty);
 
 bool
-ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
-                                 const struct ilo_shader_state *source,
-                                 const struct ilo_rasterizer_state *rasterizer);
+ilo_shader_select_kernel_sbe(struct ilo_shader_state *shader,
+                             const struct ilo_shader_state *source,
+                             const struct ilo_rasterizer_state *rasterizer);
 
 uint32_t
 ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader);
@@ -162,13 +169,16 @@
 ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
                             enum ilo_kernel_param param);
 
-const struct ilo_shader_cso *
+const union ilo_shader_cso *
 ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader);
 
 const struct pipe_stream_output_info *
 ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader);
 
-const struct ilo_kernel_routing *
-ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader);
+const struct ilo_state_sol *
+ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader);
+
+const struct ilo_state_sbe *
+ilo_shader_get_kernel_sbe(const struct ilo_shader_state *shader);
 
 #endif /* ILO_SHADER_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -25,16 +25,288 @@
  *    Chia-I Wu <olv@lunarg.com>
  */
 
-#include "core/ilo_state_3d.h"
+#include "util/u_dual_blend.h"
 #include "util/u_dynarray.h"
+#include "util/u_framebuffer.h"
 #include "util/u_helpers.h"
+#include "util/u_resource.h"
 #include "util/u_upload_mgr.h"
 
 #include "ilo_context.h"
+#include "ilo_format.h"
 #include "ilo_resource.h"
 #include "ilo_shader.h"
 #include "ilo_state.h"
 
+/**
+ * Translate a pipe primitive type to the matching hardware primitive type.
+ */
+static enum gen_3dprim_type
+ilo_translate_draw_mode(unsigned mode)
+{
+   static const enum gen_3dprim_type prim_mapping[PIPE_PRIM_MAX] = {
+      [PIPE_PRIM_POINTS]                     = GEN6_3DPRIM_POINTLIST,
+      [PIPE_PRIM_LINES]                      = GEN6_3DPRIM_LINELIST,
+      [PIPE_PRIM_LINE_LOOP]                  = GEN6_3DPRIM_LINELOOP,
+      [PIPE_PRIM_LINE_STRIP]                 = GEN6_3DPRIM_LINESTRIP,
+      [PIPE_PRIM_TRIANGLES]                  = GEN6_3DPRIM_TRILIST,
+      [PIPE_PRIM_TRIANGLE_STRIP]             = GEN6_3DPRIM_TRISTRIP,
+      [PIPE_PRIM_TRIANGLE_FAN]               = GEN6_3DPRIM_TRIFAN,
+      [PIPE_PRIM_QUADS]                      = GEN6_3DPRIM_QUADLIST,
+      [PIPE_PRIM_QUAD_STRIP]                 = GEN6_3DPRIM_QUADSTRIP,
+      [PIPE_PRIM_POLYGON]                    = GEN6_3DPRIM_POLYGON,
+      [PIPE_PRIM_LINES_ADJACENCY]            = GEN6_3DPRIM_LINELIST_ADJ,
+      [PIPE_PRIM_LINE_STRIP_ADJACENCY]       = GEN6_3DPRIM_LINESTRIP_ADJ,
+      [PIPE_PRIM_TRIANGLES_ADJACENCY]        = GEN6_3DPRIM_TRILIST_ADJ,
+      [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]   = GEN6_3DPRIM_TRISTRIP_ADJ,
+   };
+
+   assert(prim_mapping[mode]);
+
+   return prim_mapping[mode];
+}
+
+static enum gen_index_format
+ilo_translate_index_size(unsigned index_size)
+{
+   switch (index_size) {
+   case 1:                             return GEN6_INDEX_BYTE;
+   case 2:                             return GEN6_INDEX_WORD;
+   case 4:                             return GEN6_INDEX_DWORD;
+   default:
+      assert(!"unknown index size");
+      return GEN6_INDEX_BYTE;
+   }
+}
+
+static enum gen_mip_filter
+ilo_translate_mip_filter(unsigned filter)
+{
+   switch (filter) {
+   case PIPE_TEX_MIPFILTER_NEAREST:    return GEN6_MIPFILTER_NEAREST;
+   case PIPE_TEX_MIPFILTER_LINEAR:     return GEN6_MIPFILTER_LINEAR;
+   case PIPE_TEX_MIPFILTER_NONE:       return GEN6_MIPFILTER_NONE;
+   default:
+      assert(!"unknown mipfilter");
+      return GEN6_MIPFILTER_NONE;
+   }
+}
+
+static int
+ilo_translate_img_filter(unsigned filter)
+{
+   switch (filter) {
+   case PIPE_TEX_FILTER_NEAREST:       return GEN6_MAPFILTER_NEAREST;
+   case PIPE_TEX_FILTER_LINEAR:        return GEN6_MAPFILTER_LINEAR;
+   default:
+      assert(!"unknown sampler filter");
+      return GEN6_MAPFILTER_NEAREST;
+   }
+}
+
+static enum gen_texcoord_mode
+ilo_translate_address_wrap(unsigned wrap)
+{
+   switch (wrap) {
+   case PIPE_TEX_WRAP_CLAMP:           return GEN8_TEXCOORDMODE_HALF_BORDER;
+   case PIPE_TEX_WRAP_REPEAT:          return GEN6_TEXCOORDMODE_WRAP;
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:   return GEN6_TEXCOORDMODE_CLAMP;
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER;
+   case PIPE_TEX_WRAP_MIRROR_REPEAT:   return GEN6_TEXCOORDMODE_MIRROR;
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+   default:
+      assert(!"unknown sampler wrap mode");
+      return GEN6_TEXCOORDMODE_WRAP;
+   }
+}
+
+static enum gen_aniso_ratio
+ilo_translate_max_anisotropy(unsigned max_anisotropy)
+{
+   switch (max_anisotropy) {
+   case 0: case 1: case 2:             return GEN6_ANISORATIO_2;
+   case 3: case 4:                     return GEN6_ANISORATIO_4;
+   case 5: case 6:                     return GEN6_ANISORATIO_6;
+   case 7: case 8:                     return GEN6_ANISORATIO_8;
+   case 9: case 10:                    return GEN6_ANISORATIO_10;
+   case 11: case 12:                   return GEN6_ANISORATIO_12;
+   case 13: case 14:                   return GEN6_ANISORATIO_14;
+   default:                            return GEN6_ANISORATIO_16;
+   }
+}
+
+static enum gen_prefilter_op
+ilo_translate_shadow_func(unsigned func)
+{
+   /*
+    * For PIPE_FUNC_x, the reference value is on the left-hand side of the
+    * comparison, and 1.0 is returned when the comparison is true.
+    *
+    * For GEN6_PREFILTEROP_x, the reference value is on the right-hand side of
+    * the comparison, and 0.0 is returned when the comparison is true.
+    */
+   switch (func) {
+   case PIPE_FUNC_NEVER:               return GEN6_PREFILTEROP_ALWAYS;
+   case PIPE_FUNC_LESS:                return GEN6_PREFILTEROP_LEQUAL;
+   case PIPE_FUNC_EQUAL:               return GEN6_PREFILTEROP_NOTEQUAL;
+   case PIPE_FUNC_LEQUAL:              return GEN6_PREFILTEROP_LESS;
+   case PIPE_FUNC_GREATER:             return GEN6_PREFILTEROP_GEQUAL;
+   case PIPE_FUNC_NOTEQUAL:            return GEN6_PREFILTEROP_EQUAL;
+   case PIPE_FUNC_GEQUAL:              return GEN6_PREFILTEROP_GREATER;
+   case PIPE_FUNC_ALWAYS:              return GEN6_PREFILTEROP_NEVER;
+   default:
+      assert(!"unknown shadow compare function");
+      return GEN6_PREFILTEROP_NEVER;
+   }
+}
+
+static enum gen_front_winding
+ilo_translate_front_ccw(unsigned front_ccw)
+{
+   return (front_ccw) ? GEN6_FRONTWINDING_CCW : GEN6_FRONTWINDING_CW;
+}
+
+static enum gen_cull_mode
+ilo_translate_cull_face(unsigned cull_face)
+{
+   switch (cull_face) {
+   case PIPE_FACE_NONE:                return GEN6_CULLMODE_NONE;
+   case PIPE_FACE_FRONT:               return GEN6_CULLMODE_FRONT;
+   case PIPE_FACE_BACK:                return GEN6_CULLMODE_BACK;
+   case PIPE_FACE_FRONT_AND_BACK:      return GEN6_CULLMODE_BOTH;
+   default:
+      assert(!"unknown face culling");
+      return GEN6_CULLMODE_NONE;
+   }
+}
+
+static enum gen_fill_mode
+ilo_translate_poly_mode(unsigned poly_mode)
+{
+   switch (poly_mode) {
+   case PIPE_POLYGON_MODE_FILL:        return GEN6_FILLMODE_SOLID;
+   case PIPE_POLYGON_MODE_LINE:        return GEN6_FILLMODE_WIREFRAME;
+   case PIPE_POLYGON_MODE_POINT:       return GEN6_FILLMODE_POINT;
+   default:
+      assert(!"unknown polygon mode");
+      return GEN6_FILLMODE_SOLID;
+   }
+}
+
+static enum gen_pixel_location
+ilo_translate_half_pixel_center(bool half_pixel_center)
+{
+   return (half_pixel_center) ? GEN6_PIXLOC_CENTER : GEN6_PIXLOC_UL_CORNER;
+}
+
+static enum gen_compare_function
+ilo_translate_compare_func(unsigned func)
+{
+   switch (func) {
+   case PIPE_FUNC_NEVER:               return GEN6_COMPAREFUNCTION_NEVER;
+   case PIPE_FUNC_LESS:                return GEN6_COMPAREFUNCTION_LESS;
+   case PIPE_FUNC_EQUAL:               return GEN6_COMPAREFUNCTION_EQUAL;
+   case PIPE_FUNC_LEQUAL:              return GEN6_COMPAREFUNCTION_LEQUAL;
+   case PIPE_FUNC_GREATER:             return GEN6_COMPAREFUNCTION_GREATER;
+   case PIPE_FUNC_NOTEQUAL:            return GEN6_COMPAREFUNCTION_NOTEQUAL;
+   case PIPE_FUNC_GEQUAL:              return GEN6_COMPAREFUNCTION_GEQUAL;
+   case PIPE_FUNC_ALWAYS:              return GEN6_COMPAREFUNCTION_ALWAYS;
+   default:
+      assert(!"unknown compare function");
+      return GEN6_COMPAREFUNCTION_NEVER;
+   }
+}
+
+static enum gen_stencil_op
+ilo_translate_stencil_op(unsigned stencil_op)
+{
+   switch (stencil_op) {
+   case PIPE_STENCIL_OP_KEEP:          return GEN6_STENCILOP_KEEP;
+   case PIPE_STENCIL_OP_ZERO:          return GEN6_STENCILOP_ZERO;
+   case PIPE_STENCIL_OP_REPLACE:       return GEN6_STENCILOP_REPLACE;
+   case PIPE_STENCIL_OP_INCR:          return GEN6_STENCILOP_INCRSAT;
+   case PIPE_STENCIL_OP_DECR:          return GEN6_STENCILOP_DECRSAT;
+   case PIPE_STENCIL_OP_INCR_WRAP:     return GEN6_STENCILOP_INCR;
+   case PIPE_STENCIL_OP_DECR_WRAP:     return GEN6_STENCILOP_DECR;
+   case PIPE_STENCIL_OP_INVERT:        return GEN6_STENCILOP_INVERT;
+   default:
+      assert(!"unknown stencil op");
+      return GEN6_STENCILOP_KEEP;
+   }
+}
+
+static enum gen_logic_op
+ilo_translate_logicop(unsigned logicop)
+{
+   switch (logicop) {
+   case PIPE_LOGICOP_CLEAR:            return GEN6_LOGICOP_CLEAR;
+   case PIPE_LOGICOP_NOR:              return GEN6_LOGICOP_NOR;
+   case PIPE_LOGICOP_AND_INVERTED:     return GEN6_LOGICOP_AND_INVERTED;
+   case PIPE_LOGICOP_COPY_INVERTED:    return GEN6_LOGICOP_COPY_INVERTED;
+   case PIPE_LOGICOP_AND_REVERSE:      return GEN6_LOGICOP_AND_REVERSE;
+   case PIPE_LOGICOP_INVERT:           return GEN6_LOGICOP_INVERT;
+   case PIPE_LOGICOP_XOR:              return GEN6_LOGICOP_XOR;
+   case PIPE_LOGICOP_NAND:             return GEN6_LOGICOP_NAND;
+   case PIPE_LOGICOP_AND:              return GEN6_LOGICOP_AND;
+   case PIPE_LOGICOP_EQUIV:            return GEN6_LOGICOP_EQUIV;
+   case PIPE_LOGICOP_NOOP:             return GEN6_LOGICOP_NOOP;
+   case PIPE_LOGICOP_OR_INVERTED:      return GEN6_LOGICOP_OR_INVERTED;
+   case PIPE_LOGICOP_COPY:             return GEN6_LOGICOP_COPY;
+   case PIPE_LOGICOP_OR_REVERSE:       return GEN6_LOGICOP_OR_REVERSE;
+   case PIPE_LOGICOP_OR:               return GEN6_LOGICOP_OR;
+   case PIPE_LOGICOP_SET:              return GEN6_LOGICOP_SET;
+   default:
+      assert(!"unknown logicop function");
+      return GEN6_LOGICOP_CLEAR;
+   }
+}
+
+static int
+ilo_translate_blend_func(unsigned blend)
+{
+   switch (blend) {
+   case PIPE_BLEND_ADD:                return GEN6_BLENDFUNCTION_ADD;
+   case PIPE_BLEND_SUBTRACT:           return GEN6_BLENDFUNCTION_SUBTRACT;
+   case PIPE_BLEND_REVERSE_SUBTRACT:   return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT;
+   case PIPE_BLEND_MIN:                return GEN6_BLENDFUNCTION_MIN;
+   case PIPE_BLEND_MAX:                return GEN6_BLENDFUNCTION_MAX;
+   default:
+      assert(!"unknown blend function");
+      return GEN6_BLENDFUNCTION_ADD;
+   }
+}
+
+static int
+ilo_translate_blend_factor(unsigned factor)
+{
+   switch (factor) {
+   case PIPE_BLENDFACTOR_ONE:                return GEN6_BLENDFACTOR_ONE;
+   case PIPE_BLENDFACTOR_SRC_COLOR:          return GEN6_BLENDFACTOR_SRC_COLOR;
+   case PIPE_BLENDFACTOR_SRC_ALPHA:          return GEN6_BLENDFACTOR_SRC_ALPHA;
+   case PIPE_BLENDFACTOR_DST_ALPHA:          return GEN6_BLENDFACTOR_DST_ALPHA;
+   case PIPE_BLENDFACTOR_DST_COLOR:          return GEN6_BLENDFACTOR_DST_COLOR;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE;
+   case PIPE_BLENDFACTOR_CONST_COLOR:        return GEN6_BLENDFACTOR_CONST_COLOR;
+   case PIPE_BLENDFACTOR_CONST_ALPHA:        return GEN6_BLENDFACTOR_CONST_ALPHA;
+   case PIPE_BLENDFACTOR_SRC1_COLOR:         return GEN6_BLENDFACTOR_SRC1_COLOR;
+   case PIPE_BLENDFACTOR_SRC1_ALPHA:         return GEN6_BLENDFACTOR_SRC1_ALPHA;
+   case PIPE_BLENDFACTOR_ZERO:               return GEN6_BLENDFACTOR_ZERO;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:      return GEN6_BLENDFACTOR_INV_SRC_COLOR;
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:      return GEN6_BLENDFACTOR_INV_SRC_ALPHA;
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:      return GEN6_BLENDFACTOR_INV_DST_ALPHA;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:      return GEN6_BLENDFACTOR_INV_DST_COLOR;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:    return GEN6_BLENDFACTOR_INV_CONST_COLOR;
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:    return GEN6_BLENDFACTOR_INV_CONST_ALPHA;
+   case PIPE_BLENDFACTOR_INV_SRC1_COLOR:     return GEN6_BLENDFACTOR_INV_SRC1_COLOR;
+   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:     return GEN6_BLENDFACTOR_INV_SRC1_ALPHA;
+   default:
+      assert(!"unknown blend factor");
+      return GEN6_BLENDFACTOR_ONE;
+   }
+}
+
 static void
 finalize_shader_states(struct ilo_state_vector *vec)
 {
@@ -78,7 +350,7 @@
       /* need to setup SBE for FS */
       if (type == PIPE_SHADER_FRAGMENT && vec->dirty &
             (state | ILO_DIRTY_GS | ILO_DIRTY_VS | ILO_DIRTY_RASTERIZER)) {
-         if (ilo_shader_select_kernel_routing(shader,
+         if (ilo_shader_select_kernel_sbe(shader,
                (vec->gs) ? vec->gs : vec->vs, vec->rasterizer))
             vec->dirty |= state;
       }
@@ -97,7 +369,6 @@
       ~ilo_shader_get_kernel_param(sh, ILO_KERNEL_SKIP_CBUF0_UPLOAD);
 
    while (upload_mask) {
-      const enum pipe_format elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
       unsigned offset, i;
 
       i = u_bit_scan(&upload_mask);
@@ -105,14 +376,15 @@
       if (cbuf->cso[i].resource)
          continue;
 
-      u_upload_data(ilo->uploader, 0, cbuf->cso[i].user_buffer_size,
+      u_upload_data(ilo->uploader, 0, cbuf->cso[i].info.size,
             cbuf->cso[i].user_buffer, &offset, &cbuf->cso[i].resource);
 
-      ilo_gpe_init_view_surface_for_buffer(ilo->dev,
-            ilo_buffer(cbuf->cso[i].resource),
-            offset, cbuf->cso[i].user_buffer_size,
-            util_format_get_blocksize(elem_format), elem_format,
-            false, false, &cbuf->cso[i].surface);
+      cbuf->cso[i].info.vma = ilo_resource_get_vma(cbuf->cso[i].resource);
+      cbuf->cso[i].info.offset = offset;
+
+      memset(&cbuf->cso[i].surface, 0, sizeof(cbuf->cso[i].surface));
+      ilo_state_surface_init_for_buffer(&cbuf->cso[i].surface,
+            ilo->dev, &cbuf->cso[i].info);
 
       ilo->state_vector.dirty |= ILO_DIRTY_CBUF;
    }
@@ -133,114 +405,376 @@
 static void
 finalize_index_buffer(struct ilo_context *ilo)
 {
+   const struct ilo_dev *dev = ilo->dev;
    struct ilo_state_vector *vec = &ilo->state_vector;
    const bool need_upload = (vec->draw->indexed &&
-         (vec->ib.user_buffer || vec->ib.offset % vec->ib.index_size));
+         (vec->ib.state.user_buffer ||
+          vec->ib.state.offset % vec->ib.state.index_size));
    struct pipe_resource *current_hw_res = NULL;
+   struct ilo_state_index_buffer_info info;
+   int64_t vertex_start_bias = 0;
 
    if (!(vec->dirty & ILO_DIRTY_IB) && !need_upload)
       return;
 
+   /* make sure vec->ib.hw_resource changes when reallocated */
    pipe_resource_reference(&current_hw_res, vec->ib.hw_resource);
 
    if (need_upload) {
-      const unsigned offset = vec->ib.index_size * vec->draw->start;
-      const unsigned size = vec->ib.index_size * vec->draw->count;
+      const unsigned offset = vec->ib.state.index_size * vec->draw->start;
+      const unsigned size = vec->ib.state.index_size * vec->draw->count;
       unsigned hw_offset;
 
-      if (vec->ib.user_buffer) {
+      if (vec->ib.state.user_buffer) {
          u_upload_data(ilo->uploader, 0, size,
-               vec->ib.user_buffer + offset, &hw_offset, &vec->ib.hw_resource);
-      }
-      else {
-         u_upload_buffer(ilo->uploader, 0, vec->ib.offset + offset, size,
-               vec->ib.buffer, &hw_offset, &vec->ib.hw_resource);
+               vec->ib.state.user_buffer + offset,
+               &hw_offset, &vec->ib.hw_resource);
+      } else {
+         u_upload_buffer(ilo->uploader, 0,
+               vec->ib.state.offset + offset, size, vec->ib.state.buffer,
+               &hw_offset, &vec->ib.hw_resource);
       }
 
       /* the HW offset should be aligned */
-      assert(hw_offset % vec->ib.index_size == 0);
-      vec->ib.draw_start_offset = hw_offset / vec->ib.index_size;
+      assert(hw_offset % vec->ib.state.index_size == 0);
+      vertex_start_bias = hw_offset / vec->ib.state.index_size;
 
       /*
        * INDEX[vec->draw->start] in the original buffer is INDEX[0] in the HW
        * resource
        */
-      vec->ib.draw_start_offset -= vec->draw->start;
-   }
-   else {
-      pipe_resource_reference(&vec->ib.hw_resource, vec->ib.buffer);
+      vertex_start_bias -= vec->draw->start;
+   } else {
+      pipe_resource_reference(&vec->ib.hw_resource, vec->ib.state.buffer);
 
       /* note that index size may be zero when the draw is not indexed */
       if (vec->draw->indexed)
-         vec->ib.draw_start_offset = vec->ib.offset / vec->ib.index_size;
-      else
-         vec->ib.draw_start_offset = 0;
+         vertex_start_bias = vec->ib.state.offset / vec->ib.state.index_size;
    }
 
+   vec->draw_info.vertex_start += vertex_start_bias;
+
    /* treat the IB as clean if the HW states do not change */
    if (vec->ib.hw_resource == current_hw_res &&
-       vec->ib.hw_index_size == vec->ib.index_size)
+       vec->ib.hw_index_size == vec->ib.state.index_size)
       vec->dirty &= ~ILO_DIRTY_IB;
    else
-      vec->ib.hw_index_size = vec->ib.index_size;
+      vec->ib.hw_index_size = vec->ib.state.index_size;
 
    pipe_resource_reference(&current_hw_res, NULL);
+
+   memset(&info, 0, sizeof(info));
+   if (vec->ib.hw_resource) {
+      info.vma = ilo_resource_get_vma(vec->ib.hw_resource);
+      info.size = info.vma->vm_size;
+      info.format = ilo_translate_index_size(vec->ib.hw_index_size);
+   }
+
+   ilo_state_index_buffer_set_info(&vec->ib.ib, dev, &info);
 }
 
 static void
 finalize_vertex_elements(struct ilo_context *ilo)
 {
+   const struct ilo_dev *dev = ilo->dev;
    struct ilo_state_vector *vec = &ilo->state_vector;
+   struct ilo_ve_state *ve = vec->ve;
+   const bool last_element_edge_flag = (vec->vs &&
+         ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_EDGEFLAG));
+   const bool prepend_vertexid = (vec->vs &&
+         ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_VERTEXID));
+   const bool prepend_instanceid = (vec->vs &&
+         ilo_shader_get_kernel_param(vec->vs,
+            ILO_KERNEL_VS_INPUT_INSTANCEID));
+   const enum gen_index_format index_format = (vec->draw->indexed) ?
+      ilo_translate_index_size(vec->ib.state.index_size) : GEN6_INDEX_DWORD;
+
+   /* check for non-orthogonal states */
+   if (ve->vf_params.cv_topology != vec->draw_info.topology ||
+       ve->vf_params.prepend_vertexid != prepend_vertexid ||
+       ve->vf_params.prepend_instanceid != prepend_instanceid ||
+       ve->vf_params.last_element_edge_flag != last_element_edge_flag ||
+       ve->vf_params.cv_index_format != index_format ||
+       ve->vf_params.cut_index_enable != vec->draw->primitive_restart ||
+       ve->vf_params.cut_index != vec->draw->restart_index) {
+      ve->vf_params.cv_topology = vec->draw_info.topology;
+      ve->vf_params.prepend_vertexid = prepend_vertexid;
+      ve->vf_params.prepend_instanceid = prepend_instanceid;
+      ve->vf_params.last_element_edge_flag = last_element_edge_flag;
+      ve->vf_params.cv_index_format = index_format;
+      ve->vf_params.cut_index_enable = vec->draw->primitive_restart;
+      ve->vf_params.cut_index = vec->draw->restart_index;
+
+      ilo_state_vf_set_params(&ve->vf, dev, &ve->vf_params);
 
-   if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS)))
+      vec->dirty |= ILO_DIRTY_VE;
+   }
+}
+
+static void
+finalize_vertex_buffers(struct ilo_context *ilo)
+{
+   const struct ilo_dev *dev = ilo->dev;
+   struct ilo_state_vector *vec = &ilo->state_vector;
+   struct ilo_state_vertex_buffer_info info;
+   unsigned i;
+
+   if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VB)))
       return;
 
-   vec->dirty |= ILO_DIRTY_VE;
+   memset(&info, 0, sizeof(info));
+
+   for (i = 0; i < vec->ve->vb_count; i++) {
+      const unsigned pipe_idx = vec->ve->vb_mapping[i];
+      const struct pipe_vertex_buffer *cso = &vec->vb.states[pipe_idx];
+
+      if (cso->buffer) {
+         info.vma = ilo_resource_get_vma(cso->buffer);
+         info.offset = cso->buffer_offset;
+         info.size = info.vma->vm_size - cso->buffer_offset;
+
+         info.stride = cso->stride;
+      } else {
+         memset(&info, 0, sizeof(info));
+      }
+
+      ilo_state_vertex_buffer_set_info(&vec->vb.vb[i], dev, &info);
+   }
+}
+
+static void
+finalize_urb(struct ilo_context *ilo)
+{
+   const uint16_t attr_size = sizeof(uint32_t) * 4;
+   const struct ilo_dev *dev = ilo->dev;
+   struct ilo_state_vector *vec = &ilo->state_vector;
+   struct ilo_state_urb_info info;
+
+   if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS |
+                       ILO_DIRTY_GS | ILO_DIRTY_FS)))
+      return;
+
+   memset(&info, 0, sizeof(info));
+
+   info.ve_entry_size = attr_size * ilo_state_vf_get_attr_count(&vec->ve->vf);
+
+   if (vec->vs) {
+      info.vs_const_data = (bool)
+         (ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_PCB_CBUF0_SIZE) +
+          ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_PCB_UCP_SIZE));
+      info.vs_entry_size = attr_size *
+         ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT);
+   }
+
+   if (vec->gs) {
+      info.gs_const_data = (bool)
+         ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_PCB_CBUF0_SIZE);
 
-   vec->ve->last_cso_edgeflag = false;
-   if (vec->ve->count && vec->vs &&
-         ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_INPUT_EDGEFLAG)) {
-      vec->ve->edgeflag_cso = vec->ve->cso[vec->ve->count - 1];
-      ilo_gpe_set_ve_edgeflag(ilo->dev, &vec->ve->edgeflag_cso);
-      vec->ve->last_cso_edgeflag = true;
-   }
-
-   vec->ve->prepend_nosrc_cso = false;
-   if (vec->vs &&
-       (ilo_shader_get_kernel_param(vec->vs,
-                                    ILO_KERNEL_VS_INPUT_INSTANCEID) ||
-        ilo_shader_get_kernel_param(vec->vs,
-                                    ILO_KERNEL_VS_INPUT_VERTEXID))) {
-      ilo_gpe_init_ve_nosrc(ilo->dev,
-            GEN6_VFCOMP_STORE_VID,
-            GEN6_VFCOMP_STORE_IID,
-            GEN6_VFCOMP_NOSTORE,
-            GEN6_VFCOMP_NOSTORE,
-            &vec->ve->nosrc_cso);
-      vec->ve->prepend_nosrc_cso = true;
-   } else if (!vec->vs) {
-      /* generate VUE header */
-      ilo_gpe_init_ve_nosrc(ilo->dev,
-            GEN6_VFCOMP_STORE_0, /* Reserved */
-            GEN6_VFCOMP_STORE_0, /* Render Target Array Index */
-            GEN6_VFCOMP_STORE_0, /* Viewport Index */
-            GEN6_VFCOMP_STORE_0, /* Point Width */
-            &vec->ve->nosrc_cso);
-      vec->ve->prepend_nosrc_cso = true;
-   } else if (!vec->ve->count) {
       /*
-       * From the Sandy Bridge PRM, volume 2 part 1, page 92:
+       * From the Ivy Bridge PRM, volume 2 part 1, page 189:
        *
-       *    "SW must ensure that at least one vertex element is defined prior
-       *     to issuing a 3DPRIMTIVE command, or operation is UNDEFINED."
+       *     "All outputs of a GS thread will be stored in the single GS
+       *      thread output URB entry."
+       *
+       * TODO
        */
-      ilo_gpe_init_ve_nosrc(ilo->dev,
-            GEN6_VFCOMP_STORE_0,
-            GEN6_VFCOMP_STORE_0,
-            GEN6_VFCOMP_STORE_0,
-            GEN6_VFCOMP_STORE_1_FP,
-            &vec->ve->nosrc_cso);
-      vec->ve->prepend_nosrc_cso = true;
+      info.gs_entry_size = attr_size *
+         ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT);
+   }
+
+   if (vec->fs) {
+      info.ps_const_data = (bool)
+         ilo_shader_get_kernel_param(vec->fs, ILO_KERNEL_PCB_CBUF0_SIZE);
+   }
+
+   ilo_state_urb_set_info(&vec->urb, dev, &info);
+}
+
+static void
+finalize_viewport(struct ilo_context *ilo)
+{
+   const struct ilo_dev *dev = ilo->dev;
+   struct ilo_state_vector *vec = &ilo->state_vector;
+
+   if (vec->dirty & ILO_DIRTY_VIEWPORT) {
+      ilo_state_viewport_set_params(&vec->viewport.vp,
+            dev, &vec->viewport.params, false);
+   } else if (vec->dirty & ILO_DIRTY_SCISSOR) {
+      ilo_state_viewport_set_params(&vec->viewport.vp,
+            dev, &vec->viewport.params, true);
+      vec->dirty |= ILO_DIRTY_VIEWPORT;
+   }
+}
+
+static bool
+can_enable_gb_test(const struct ilo_rasterizer_state *rasterizer,
+                   const struct ilo_viewport_state *viewport,
+                   const struct ilo_fb_state *fb)
+{
+   unsigned i;
+
+   /*
+    * There are several reasons that guard band test should be disabled
+    *
+    *  - GL wide points (to avoid partially visibie object)
+    *  - GL wide or AA lines (to avoid partially visibie object)
+    *  - missing 2D clipping
+    */
+   if (rasterizer->state.point_size_per_vertex ||
+       rasterizer->state.point_size > 1.0f ||
+       rasterizer->state.line_width > 1.0f ||
+       rasterizer->state.line_smooth)
+      return false;
+
+   for (i = 0; i < viewport->params.count; i++) {
+      const struct ilo_state_viewport_matrix_info *mat =
+         &viewport->matrices[i];
+      float min_x, max_x, min_y, max_y;
+
+      min_x = -1.0f * fabsf(mat->scale[0]) + mat->translate[0];
+      max_x =  1.0f * fabsf(mat->scale[0]) + mat->translate[0];
+      min_y = -1.0f * fabsf(mat->scale[1]) + mat->translate[1];
+      max_y =  1.0f * fabsf(mat->scale[1]) + mat->translate[1];
+
+      if (min_x > 0.0f || max_x < fb->state.width ||
+          min_y > 0.0f || max_y < fb->state.height)
+         return false;
+   }
+
+   return true;
+}
+
+static void
+finalize_rasterizer(struct ilo_context *ilo)
+{
+   const struct ilo_dev *dev = ilo->dev;
+   struct ilo_state_vector *vec = &ilo->state_vector;
+   struct ilo_rasterizer_state *rasterizer = vec->rasterizer;
+   struct ilo_state_raster_info *info = &vec->rasterizer->info;
+   const bool gb_test_enable =
+      can_enable_gb_test(rasterizer, &vec->viewport, &vec->fb);
+   const bool multisample =
+      (rasterizer->state.multisample && vec->fb.num_samples > 1);
+   const uint8_t barycentric_interps = ilo_shader_get_kernel_param(vec->fs,
+         ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
+
+   /* check for non-orthogonal states */
+   if (info->clip.viewport_count != vec->viewport.params.count ||
+       info->clip.gb_test_enable != gb_test_enable ||
+       info->setup.msaa_enable != multisample ||
+       info->setup.line_msaa_enable != multisample ||
+       info->tri.depth_offset_format != vec->fb.depth_offset_format ||
+       info->scan.sample_count != vec->fb.num_samples ||
+       info->scan.sample_mask != vec->sample_mask ||
+       info->scan.barycentric_interps != barycentric_interps ||
+       info->params.any_integer_rt != vec->fb.has_integer_rt ||
+       info->params.hiz_enable != vec->fb.has_hiz) {
+      info->clip.viewport_count = vec->viewport.params.count;
+      info->clip.gb_test_enable = gb_test_enable;
+      info->setup.msaa_enable = multisample;
+      info->setup.line_msaa_enable = multisample;
+      info->tri.depth_offset_format = vec->fb.depth_offset_format;
+      info->scan.sample_count = vec->fb.num_samples;
+      info->scan.sample_mask = vec->sample_mask;
+      info->scan.barycentric_interps = barycentric_interps;
+      info->params.any_integer_rt = vec->fb.has_integer_rt;
+      info->params.hiz_enable = vec->fb.has_hiz;
+
+      ilo_state_raster_set_info(&rasterizer->rs, dev, &rasterizer->info);
+
+      vec->dirty |= ILO_DIRTY_RASTERIZER;
+   }
+}
+
+static bool
+finalize_blend_rt(struct ilo_context *ilo)
+{
+   struct ilo_state_vector *vec = &ilo->state_vector;
+   const struct ilo_fb_state *fb = &vec->fb;
+   struct ilo_blend_state *blend = vec->blend;
+   struct ilo_state_cc_blend_info *info = &vec->blend->info.blend;
+   bool changed = false;
+   unsigned i;
+
+   if (!(vec->dirty & (ILO_DIRTY_FB | ILO_DIRTY_BLEND)))
+      return false;
+
+   /* set up one for dummy RT writes */
+   if (!fb->state.nr_cbufs) {
+      if (info->rt != &blend->dummy_rt) {
+         info->rt = &blend->dummy_rt;
+         info->rt_count = 1;
+         changed = true;
+      }
+
+      return changed;
+   }
+
+   if (info->rt != blend->effective_rt ||
+       info->rt_count != fb->state.nr_cbufs) {
+      info->rt = blend->effective_rt;
+      info->rt_count = fb->state.nr_cbufs;
+      changed = true;
+   }
+
+   for (i = 0; i < fb->state.nr_cbufs; i++) {
+      const struct ilo_fb_blend_caps *caps = &fb->blend_caps[i];
+      struct ilo_state_cc_blend_rt_info *rt = &blend->effective_rt[i];
+      /* ignore logicop when not UNORM */
+      const bool logicop_enable =
+         (blend->rt[i].logicop_enable && caps->is_unorm);
+
+      if (rt->cv_is_unorm != caps->is_unorm ||
+          rt->cv_is_integer != caps->is_integer ||
+          rt->logicop_enable != logicop_enable ||
+          rt->force_dst_alpha_one != caps->force_dst_alpha_one) {
+         rt->cv_is_unorm = caps->is_unorm;
+         rt->cv_is_integer = caps->is_integer;
+         rt->logicop_enable = logicop_enable;
+         rt->force_dst_alpha_one = caps->force_dst_alpha_one;
+
+         changed = true;
+      }
+   }
+
+   return changed;
+}
+
+static void
+finalize_blend(struct ilo_context *ilo)
+{
+   const struct ilo_dev *dev = ilo->dev;
+   struct ilo_state_vector *vec = &ilo->state_vector;
+   struct ilo_blend_state *blend = vec->blend;
+   struct ilo_state_cc_info *info = &blend->info;
+   const bool sample_count_one = (vec->fb.num_samples <= 1);
+   const bool float_source0_alpha =
+      (!vec->fb.state.nr_cbufs || !vec->fb.state.cbufs[0] ||
+       !util_format_is_pure_integer(vec->fb.state.cbufs[0]->format));
+
+   /* check for non-orthogonal states */
+   if (finalize_blend_rt(ilo) ||
+       info->alpha.cv_sample_count_one != sample_count_one ||
+       info->alpha.cv_float_source0_alpha != float_source0_alpha ||
+       info->alpha.test_enable != vec->dsa->alpha_test ||
+       info->alpha.test_func != vec->dsa->alpha_func ||
+       memcmp(&info->stencil, &vec->dsa->stencil, sizeof(info->stencil)) ||
+       memcmp(&info->depth, &vec->dsa->depth, sizeof(info->depth)) ||
+       memcmp(&info->params, &vec->cc_params, sizeof(info->params))) {
+      info->alpha.cv_sample_count_one = sample_count_one;
+      info->alpha.cv_float_source0_alpha = float_source0_alpha;
+      info->alpha.test_enable = vec->dsa->alpha_test;
+      info->alpha.test_func = vec->dsa->alpha_func;
+      info->stencil = vec->dsa->stencil;
+      info->depth = vec->dsa->depth;
+      info->params = vec->cc_params;
+
+      ilo_state_cc_set_info(&blend->cc, dev, info);
+
+      blend->alpha_may_kill = (info->alpha.alpha_to_coverage ||
+                               info->alpha.test_enable);
+
+      vec->dirty |= ILO_DIRTY_BLEND;
    }
 }
 
@@ -254,10 +788,24 @@
 {
    ilo->state_vector.draw = draw;
 
+   ilo->state_vector.draw_info.topology = ilo_translate_draw_mode(draw->mode);
+   ilo->state_vector.draw_info.indexed = draw->indexed;
+   ilo->state_vector.draw_info.vertex_count = draw->count;
+   ilo->state_vector.draw_info.vertex_start = draw->start;
+   ilo->state_vector.draw_info.instance_count = draw->instance_count;
+   ilo->state_vector.draw_info.instance_start = draw->start_instance;
+   ilo->state_vector.draw_info.vertex_base = draw->index_bias;
+
+   finalize_blend(ilo);
    finalize_shader_states(&ilo->state_vector);
    finalize_constant_buffers(ilo);
    finalize_index_buffer(ilo);
    finalize_vertex_elements(ilo);
+   finalize_vertex_buffers(ilo);
+
+   finalize_urb(ilo);
+   finalize_rasterizer(ilo);
+   finalize_viewport(ilo);
 
    u_upload_unmap(ilo->uploader);
 }
@@ -301,12 +849,79 @@
                        const struct pipe_blend_state *state)
 {
    const struct ilo_dev *dev = ilo_context(pipe)->dev;
+   struct ilo_state_cc_info *info;
    struct ilo_blend_state *blend;
+   int i;
 
-   blend = MALLOC_STRUCT(ilo_blend_state);
+   blend = CALLOC_STRUCT(ilo_blend_state);
    assert(blend);
 
-   ilo_gpe_init_blend(dev, state, blend);
+   info = &blend->info;
+
+   info->alpha.cv_float_source0_alpha = true;
+   info->alpha.cv_sample_count_one = true;
+   info->alpha.alpha_to_one = state->alpha_to_one;
+   info->alpha.alpha_to_coverage = state->alpha_to_coverage;
+   info->alpha.test_enable = false;
+   info->alpha.test_func = GEN6_COMPAREFUNCTION_ALWAYS;
+
+   info->stencil.cv_has_buffer = true;
+   info->depth.cv_has_buffer= true;
+
+   info->blend.rt = blend->effective_rt;
+   info->blend.rt_count = 1;
+   info->blend.dither_enable = state->dither;
+
+   for (i = 0; i < ARRAY_SIZE(blend->rt); i++) {
+      const struct pipe_rt_blend_state *rt = &state->rt[i];
+      struct ilo_state_cc_blend_rt_info *rt_info = &blend->rt[i];
+
+      rt_info->cv_has_buffer = true;
+      rt_info->cv_is_unorm = true;
+      rt_info->cv_is_integer = false;
+
+      /* logic op takes precedence over blending */
+      if (state->logicop_enable) {
+         rt_info->logicop_enable = true;
+         rt_info->logicop_func = ilo_translate_logicop(state->logicop_func);
+      } else if (rt->blend_enable) {
+         rt_info->blend_enable = true;
+
+         rt_info->rgb_src = ilo_translate_blend_factor(rt->rgb_src_factor);
+         rt_info->rgb_dst = ilo_translate_blend_factor(rt->rgb_dst_factor);
+         rt_info->rgb_func = ilo_translate_blend_func(rt->rgb_func);
+
+         rt_info->a_src = ilo_translate_blend_factor(rt->alpha_src_factor);
+         rt_info->a_dst = ilo_translate_blend_factor(rt->alpha_dst_factor);
+         rt_info->a_func = ilo_translate_blend_func(rt->alpha_func);
+      }
+
+      if (!(rt->colormask & PIPE_MASK_A))
+         rt_info->argb_write_disables |= (1 << 3);
+      if (!(rt->colormask & PIPE_MASK_R))
+         rt_info->argb_write_disables |= (1 << 2);
+      if (!(rt->colormask & PIPE_MASK_G))
+         rt_info->argb_write_disables |= (1 << 1);
+      if (!(rt->colormask & PIPE_MASK_B))
+         rt_info->argb_write_disables |= (1 << 0);
+
+      if (!state->independent_blend_enable) {
+         for (i = 1; i < ARRAY_SIZE(blend->rt); i++)
+            blend->rt[i] = *rt_info;
+         break;
+      }
+   }
+
+   memcpy(blend->effective_rt, blend->rt, sizeof(blend->rt));
+
+   blend->dummy_rt.argb_write_disables = 0xf;
+
+   if (!ilo_state_cc_init(&blend->cc, dev, &blend->info)) {
+      FREE(blend);
+      return NULL;
+   }
+
+   blend->dual_blend = util_blend_state_is_dual(state, 0);
 
    return blend;
 }
@@ -333,11 +948,105 @@
 {
    const struct ilo_dev *dev = ilo_context(pipe)->dev;
    struct ilo_sampler_cso *sampler;
+   struct ilo_state_sampler_info info;
+   struct ilo_state_sampler_border_info border;
 
-   sampler = MALLOC_STRUCT(ilo_sampler_cso);
+   sampler = CALLOC_STRUCT(ilo_sampler_cso);
    assert(sampler);
 
-   ilo_gpe_init_sampler_cso(dev, state, sampler);
+   memset(&info, 0, sizeof(info));
+
+   info.non_normalized = !state->normalized_coords;
+   if (state->normalized_coords) {
+      info.lod_bias = state->lod_bias;
+      info.min_lod = state->min_lod;
+      info.max_lod = state->max_lod;
+
+      info.mip_filter = ilo_translate_mip_filter(state->min_mip_filter);
+   } else {
+      /* work around a bug in util_blitter */
+      info.mip_filter = GEN6_MIPFILTER_NONE;
+   }
+
+   if (state->max_anisotropy) {
+      info.min_filter = GEN6_MAPFILTER_ANISOTROPIC;
+      info.mag_filter = GEN6_MAPFILTER_ANISOTROPIC;
+   } else {
+      info.min_filter = ilo_translate_img_filter(state->min_img_filter);
+      info.mag_filter = ilo_translate_img_filter(state->mag_img_filter);
+   }
+
+   info.max_anisotropy = ilo_translate_max_anisotropy(state->max_anisotropy);
+
+   /* use LOD 0 when no mipmapping (see sampler_set_gen6_SAMPLER_STATE()) */
+   if (info.mip_filter == GEN6_MIPFILTER_NONE && info.min_lod > 0.0f) {
+      info.min_lod = 0.0f;
+      info.mag_filter = info.min_filter;
+   }
+
+   if (state->seamless_cube_map) {
+      if (state->min_img_filter == PIPE_TEX_FILTER_NEAREST ||
+          state->mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
+         info.tcx_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+         info.tcy_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+         info.tcz_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+      } else {
+         info.tcx_ctrl = GEN6_TEXCOORDMODE_CUBE;
+         info.tcy_ctrl = GEN6_TEXCOORDMODE_CUBE;
+         info.tcz_ctrl = GEN6_TEXCOORDMODE_CUBE;
+      }
+   } else {
+      info.tcx_ctrl = ilo_translate_address_wrap(state->wrap_s);
+      info.tcy_ctrl = ilo_translate_address_wrap(state->wrap_t);
+      info.tcz_ctrl = ilo_translate_address_wrap(state->wrap_r);
+
+      if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+         /*
+          * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
+          * PIPE_TEX_WRAP_CLAMP_TO_EDGE;  for linear filtering,
+          * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
+          * additionally clamping the texture coordinates to [0.0, 1.0].
+          *
+          * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8.  The
+          * clamping has to be taken care of in the shaders.  There are two
+          * filters here, but let the minification one has a say.
+          */
+         const bool clamp_is_to_edge =
+            (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
+
+         if (clamp_is_to_edge) {
+            if (info.tcx_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER)
+               info.tcx_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+            if (info.tcy_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER)
+               info.tcy_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+            if (info.tcz_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER)
+               info.tcz_ctrl = GEN6_TEXCOORDMODE_CLAMP;
+         } else {
+            if (info.tcx_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER) {
+               info.tcx_ctrl = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+               sampler->saturate_s = true;
+            }
+            if (info.tcy_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER) {
+               info.tcy_ctrl = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+               sampler->saturate_t = true;
+            }
+            if (info.tcz_ctrl == GEN8_TEXCOORDMODE_HALF_BORDER) {
+               info.tcz_ctrl = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+               sampler->saturate_r = true;
+            }
+         }
+      }
+   }
+
+   if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
+      info.shadow_func = ilo_translate_shadow_func(state->compare_func);
+
+   ilo_state_sampler_init(&sampler->sampler, dev, &info);
+
+   memset(&border, 0, sizeof(border));
+   memcpy(border.rgba.f, state->border_color.f, sizeof(border.rgba.f));
+
+   ilo_state_sampler_border_init(&sampler->border, dev, &border);
 
    return sampler;
 }
@@ -403,12 +1112,74 @@
 {
    const struct ilo_dev *dev = ilo_context(pipe)->dev;
    struct ilo_rasterizer_state *rast;
+   struct ilo_state_raster_info *info;
 
-   rast = MALLOC_STRUCT(ilo_rasterizer_state);
+   rast = CALLOC_STRUCT(ilo_rasterizer_state);
    assert(rast);
 
    rast->state = *state;
-   ilo_gpe_init_rasterizer(dev, state, rast);
+
+   info = &rast->info;
+
+   info->clip.clip_enable = true;
+   info->clip.stats_enable = true;
+   info->clip.viewport_count = 1;
+   info->clip.force_rtaindex_zero = true;
+   info->clip.user_clip_enables = state->clip_plane_enable;
+   info->clip.gb_test_enable = true;
+   info->clip.xy_test_enable = true;
+   info->clip.z_far_enable = state->depth_clip;
+   info->clip.z_near_enable = state->depth_clip;
+   info->clip.z_near_zero = state->clip_halfz;
+
+   info->setup.first_vertex_provoking = state->flatshade_first;
+   info->setup.viewport_transform = true;
+   info->setup.scissor_enable = state->scissor;
+   info->setup.msaa_enable = false;
+   info->setup.line_msaa_enable = false;
+   info->point.aa_enable = state->point_smooth;
+   info->point.programmable_width = state->point_size_per_vertex;
+   info->line.aa_enable = state->line_smooth;
+   info->line.stipple_enable = state->line_stipple_enable;
+   info->line.giq_enable = true;
+   info->line.giq_last_pixel = state->line_last_pixel;
+   info->tri.front_winding = ilo_translate_front_ccw(state->front_ccw);
+   info->tri.cull_mode = ilo_translate_cull_face(state->cull_face);
+   info->tri.fill_mode_front = ilo_translate_poly_mode(state->fill_front);
+   info->tri.fill_mode_back = ilo_translate_poly_mode(state->fill_back);
+   info->tri.depth_offset_format = GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+   info->tri.depth_offset_solid = state->offset_tri;
+   info->tri.depth_offset_wireframe = state->offset_line;
+   info->tri.depth_offset_point = state->offset_point;
+   info->tri.poly_stipple_enable = state->poly_stipple_enable;
+
+   info->scan.stats_enable = true;
+   info->scan.sample_count = 1;
+   info->scan.pixloc =
+      ilo_translate_half_pixel_center(state->half_pixel_center);
+   info->scan.sample_mask = ~0u;
+   info->scan.zw_interp = GEN6_ZW_INTERP_PIXEL;
+   info->scan.barycentric_interps = GEN6_INTERP_PERSPECTIVE_PIXEL;
+   info->scan.earlyz_control = GEN7_EDSC_NORMAL;
+   info->scan.earlyz_op = ILO_STATE_RASTER_EARLYZ_NORMAL;
+   info->scan.earlyz_stencil_clear = false;
+
+   info->params.any_integer_rt = false;
+   info->params.hiz_enable = true;
+   info->params.point_width =
+      (state->point_size == 0.0f) ? 1.0f : state->point_size;
+   info->params.line_width =
+      (state->line_width == 0.0f) ? 1.0f : state->line_width;
+
+   info->params.depth_offset_scale = state->offset_scale;
+   /*
+    * Scale the constant term.  The minimum representable value used by the HW
+    * is not large enouch to be the minimum resolvable difference.
+    */
+   info->params.depth_offset_const = state->offset_units * 2.0f;
+   info->params.depth_offset_clamp = state->offset_clamp;
+
+   ilo_state_raster_init(&rast->rs, dev, info);
 
    return rast;
 }
@@ -416,10 +1187,20 @@
 static void
 ilo_bind_rasterizer_state(struct pipe_context *pipe, void *state)
 {
+   const struct ilo_dev *dev = ilo_context(pipe)->dev;
    struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
    vec->rasterizer = state;
 
+   if (vec->rasterizer) {
+      struct ilo_state_line_stipple_info info;
+
+      info.pattern = vec->rasterizer->state.line_stipple_pattern;
+      info.repeat_count = vec->rasterizer->state.line_stipple_factor + 1;
+
+      ilo_state_line_stipple_set_info(&vec->line_stipple, dev, &info);
+   }
+
    vec->dirty |= ILO_DIRTY_RASTERIZER;
 }
 
@@ -433,13 +1214,48 @@
 ilo_create_depth_stencil_alpha_state(struct pipe_context *pipe,
                                      const struct pipe_depth_stencil_alpha_state *state)
 {
-   const struct ilo_dev *dev = ilo_context(pipe)->dev;
    struct ilo_dsa_state *dsa;
+   int i;
 
-   dsa = MALLOC_STRUCT(ilo_dsa_state);
+   dsa = CALLOC_STRUCT(ilo_dsa_state);
    assert(dsa);
 
-   ilo_gpe_init_dsa(dev, state, dsa);
+   dsa->depth.cv_has_buffer = true;
+   dsa->depth.test_enable = state->depth.enabled;
+   dsa->depth.write_enable = state->depth.writemask;
+   dsa->depth.test_func = ilo_translate_compare_func(state->depth.func);
+
+   dsa->stencil.cv_has_buffer = true;
+   for (i = 0; i < ARRAY_SIZE(state->stencil); i++) {
+      const struct pipe_stencil_state *stencil = &state->stencil[i];
+      struct ilo_state_cc_stencil_op_info *op;
+
+      if (!stencil->enabled)
+         break;
+
+      if (i == 0) {
+         dsa->stencil.test_enable = true;
+         dsa->stencil_front.test_mask = stencil->valuemask;
+         dsa->stencil_front.write_mask = stencil->writemask;
+
+         op = &dsa->stencil.front;
+      } else {
+         dsa->stencil.twosided_enable = true;
+         dsa->stencil_back.test_mask = stencil->valuemask;
+         dsa->stencil_back.write_mask = stencil->writemask;
+
+         op = &dsa->stencil.back;
+      }
+
+      op->test_func = ilo_translate_compare_func(stencil->func);
+      op->fail_op = ilo_translate_stencil_op(stencil->fail_op);
+      op->zfail_op = ilo_translate_stencil_op(stencil->zfail_op);
+      op->zpass_op = ilo_translate_stencil_op(stencil->zpass_op);
+   }
+
+   dsa->alpha_test = state->alpha.enabled;
+   dsa->alpha_ref = state->alpha.ref_value;
+   dsa->alpha_func = ilo_translate_compare_func(state->alpha.func);
 
    return dsa;
 }
@@ -450,6 +1266,17 @@
    struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
    vec->dsa = state;
+   if (vec->dsa) {
+      vec->cc_params.alpha_ref = vec->dsa->alpha_ref;
+      vec->cc_params.stencil_front.test_mask =
+         vec->dsa->stencil_front.test_mask;
+      vec->cc_params.stencil_front.write_mask =
+         vec->dsa->stencil_front.write_mask;
+      vec->cc_params.stencil_back.test_mask =
+         vec->dsa->stencil_back.test_mask;
+      vec->cc_params.stencil_back.write_mask =
+         vec->dsa->stencil_back.write_mask;
+   }
 
    vec->dirty |= ILO_DIRTY_DSA;
 }
@@ -575,12 +1402,60 @@
                                  const struct pipe_vertex_element *elements)
 {
    const struct ilo_dev *dev = ilo_context(pipe)->dev;
+   struct ilo_state_vf_element_info vf_elements[PIPE_MAX_ATTRIBS];
+   unsigned instance_divisors[PIPE_MAX_ATTRIBS];
+   struct ilo_state_vf_info vf_info;
    struct ilo_ve_state *ve;
+   unsigned i;
 
-   ve = MALLOC_STRUCT(ilo_ve_state);
+   ve = CALLOC_STRUCT(ilo_ve_state);
    assert(ve);
 
-   ilo_gpe_init_ve(dev, num_elements, elements, ve);
+   for (i = 0; i < num_elements; i++) {
+      const struct pipe_vertex_element *elem = &elements[i];
+      struct ilo_state_vf_element_info *attr = &vf_elements[i];
+      unsigned hw_idx;
+
+      /*
+       * map the pipe vb to the hardware vb, which has a fixed instance
+       * divisor
+       */
+      for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+         if (ve->vb_mapping[hw_idx] == elem->vertex_buffer_index &&
+             instance_divisors[hw_idx] == elem->instance_divisor)
+            break;
+      }
+
+      /* create one if there is no matching hardware vb */
+      if (hw_idx >= ve->vb_count) {
+         hw_idx = ve->vb_count++;
+
+         ve->vb_mapping[hw_idx] = elem->vertex_buffer_index;
+         instance_divisors[hw_idx] = elem->instance_divisor;
+      }
+
+      attr->buffer = hw_idx;
+      attr->vertex_offset = elem->src_offset;
+      attr->format = ilo_format_translate_vertex(dev, elem->src_format);
+      attr->format_size = util_format_get_blocksize(elem->src_format);
+      attr->component_count = util_format_get_nr_components(elem->src_format);
+      attr->is_integer = util_format_is_pure_integer(elem->src_format);
+
+      attr->instancing_enable = (elem->instance_divisor != 0);
+      attr->instancing_step_rate = elem->instance_divisor;
+   }
+
+   memset(&vf_info, 0, sizeof(vf_info));
+   vf_info.data = ve->vf_data;
+   vf_info.data_size = sizeof(ve->vf_data);
+   vf_info.elements = vf_elements;
+   vf_info.element_count = num_elements;
+   /* vf_info.params and ve->vf_params are both zeroed */
+
+   if (!ilo_state_vf_init(&ve->vf, dev, &vf_info)) {
+      FREE(ve);
+      return NULL;
+   }
 
    return ve;
 }
@@ -609,7 +1484,7 @@
 {
    struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
-   vec->blend_color = *state;
+   memcpy(vec->cc_params.blend_rgba, state->color, sizeof(state->color));
 
    vec->dirty |= ILO_DIRTY_BLEND_COLOR;
 }
@@ -626,6 +1501,9 @@
 
    vec->stencil_ref = *state;
 
+   vec->cc_params.stencil_front.test_ref = state->ref_value[0];
+   vec->cc_params.stencil_back.test_ref = state->ref_value[1];
+
    vec->dirty |= ILO_DIRTY_STENCIL_REF;
 }
 
@@ -675,47 +1553,46 @@
 
          pipe_resource_reference(&cso->resource, buf[i].buffer);
 
+         cso->info.access = ILO_STATE_SURFACE_ACCESS_DP_DATA;
+         cso->info.format = GEN6_FORMAT_R32G32B32A32_FLOAT;
+         cso->info.format_size = 16;
+         cso->info.struct_size = 16;
+         cso->info.readonly = true;
+         cso->info.size = buf[i].buffer_size;
+
          if (buf[i].buffer) {
-            const enum pipe_format elem_format =
-               PIPE_FORMAT_R32G32B32A32_FLOAT;
+            cso->info.vma = ilo_resource_get_vma(buf[i].buffer);
+            cso->info.offset = buf[i].buffer_offset;
 
-            ilo_gpe_init_view_surface_for_buffer(dev,
-                  ilo_buffer(buf[i].buffer),
-                  buf[i].buffer_offset, buf[i].buffer_size,
-                  util_format_get_blocksize(elem_format), elem_format,
-                  false, false, &cso->surface);
+            memset(&cso->surface, 0, sizeof(cso->surface));
+            ilo_state_surface_init_for_buffer(&cso->surface, dev, &cso->info);
 
             cso->user_buffer = NULL;
-            cso->user_buffer_size = 0;
 
             cbuf->enabled_mask |= 1 << (index + i);
-         }
-         else if (buf[i].user_buffer) {
-            cso->surface.bo = NULL;
-
+         } else if (buf[i].user_buffer) {
+            cso->info.vma = NULL;
             /* buffer_offset does not apply for user buffer */
             cso->user_buffer = buf[i].user_buffer;
-            cso->user_buffer_size = buf[i].buffer_size;
 
             cbuf->enabled_mask |= 1 << (index + i);
-         }
-         else {
-            cso->surface.bo = NULL;
+         } else {
+            cso->info.vma = NULL;
+            cso->info.size = 0;
             cso->user_buffer = NULL;
-            cso->user_buffer_size = 0;
 
             cbuf->enabled_mask &= ~(1 << (index + i));
          }
       }
-   }
-   else {
+   } else {
       for (i = 0; i < count; i++) {
          struct ilo_cbuf_cso *cso = &cbuf->cso[index + i];
 
          pipe_resource_reference(&cso->resource, NULL);
-         cso->surface.bo = NULL;
+
+         cso->info.vma = NULL;
+         cso->info.size = 0;
          cso->user_buffer = NULL;
-         cso->user_buffer_size = 0;
 
          cbuf->enabled_mask &= ~(1 << (index + i));
       }
@@ -725,13 +1602,117 @@
 }
 
 static void
+fb_set_blend_caps(const struct ilo_dev *dev,
+                  enum pipe_format format,
+                  struct ilo_fb_blend_caps *caps)
+{
+   const struct util_format_description *desc =
+      util_format_description(format);
+   const int ch = util_format_get_first_non_void_channel(format);
+
+   memset(caps, 0, sizeof(*caps));
+
+   if (format == PIPE_FORMAT_NONE || desc->is_mixed)
+      return;
+
+   caps->is_unorm = (ch >= 0 && desc->channel[ch].normalized &&
+         desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED &&
+         desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
+   caps->is_integer = util_format_is_pure_integer(format);
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+    *
+    *     "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
+    *      variants), otherwise Logic Ops must be DISABLED."
+    *
+    * According to the classic driver, this is lifted on Gen8+.
+    */
+   caps->can_logicop = (ilo_dev_gen(dev) >= ILO_GEN(8) || caps->is_unorm);
+
+   /* no blending for pure integer formats */
+   caps->can_blend = !caps->is_integer;
+
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 382:
+    *
+    *     "Alpha Test can only be enabled if Pixel Shader outputs a float
+    *      alpha value."
+    */
+   caps->can_alpha_test = !caps->is_integer;
+
+   caps->force_dst_alpha_one =
+      (ilo_format_translate_render(dev, format) !=
+       ilo_format_translate_color(dev, format));
+
+   /* sanity check */
+   if (caps->force_dst_alpha_one) {
+      enum pipe_format render_format;
+
+      switch (format) {
+      case PIPE_FORMAT_B8G8R8X8_UNORM:
+         render_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+         break;
+      default:
+         render_format = PIPE_FORMAT_NONE;
+         break;
+      }
+
+      assert(ilo_format_translate_render(dev, format) ==
+             ilo_format_translate_color(dev, render_format));
+   }
+}
+
+static void
 ilo_set_framebuffer_state(struct pipe_context *pipe,
                           const struct pipe_framebuffer_state *state)
 {
    const struct ilo_dev *dev = ilo_context(pipe)->dev;
    struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+   struct ilo_fb_state *fb = &vec->fb;
+   const struct pipe_surface *first_surf = NULL;
+   int i;
 
-   ilo_gpe_set_fb(dev, state, &vec->fb);
+   util_copy_framebuffer_state(&fb->state, state);
+
+   fb->has_integer_rt = false;
+   for (i = 0; i < state->nr_cbufs; i++) {
+      if (state->cbufs[i]) {
+         fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]);
+
+         fb->has_integer_rt |= fb->blend_caps[i].is_integer;
+
+         if (!first_surf)
+            first_surf = state->cbufs[i];
+      } else {
+         fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]);
+      }
+   }
+
+   if (!first_surf && state->zsbuf)
+      first_surf = state->zsbuf;
+
+   fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1;
+   if (!fb->num_samples)
+      fb->num_samples = 1;
+
+   if (state->zsbuf) {
+      const struct ilo_surface_cso *cso =
+         (const struct ilo_surface_cso *) state->zsbuf;
+      const struct ilo_texture *tex = ilo_texture(cso->base.texture);
+
+      fb->has_hiz = cso->u.zs.hiz_vma;
+      fb->depth_offset_format =
+         ilo_format_translate_depth(dev, tex->image_format);
+   } else {
+      fb->has_hiz = false;
+      fb->depth_offset_format = GEN6_ZFORMAT_D32_FLOAT;
+   }
+
+   /*
+    * The PRMs list several restrictions when the framebuffer has more than
+    * one surface.  It seems they are actually lifted on GEN6+.
+    */
 
    vec->dirty |= ILO_DIRTY_FB;
 }
@@ -740,9 +1721,15 @@
 ilo_set_polygon_stipple(struct pipe_context *pipe,
                         const struct pipe_poly_stipple *state)
 {
+   const struct ilo_dev *dev = ilo_context(pipe)->dev;
    struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+   struct ilo_state_poly_stipple_info info;
+   int i;
 
-   vec->poly_stipple = *state;
+   for (i = 0; i < 32; i++)
+      info.pattern[i] = state->stipple[i];
+
+   ilo_state_poly_stipple_set_info(&vec->poly_stipple, dev, &info);
 
    vec->dirty |= ILO_DIRTY_POLY_STIPPLE;
 }
@@ -753,11 +1740,26 @@
                        unsigned num_scissors,
                        const struct pipe_scissor_state *scissors)
 {
-   const struct ilo_dev *dev = ilo_context(pipe)->dev;
    struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
+   unsigned i;
 
-   ilo_gpe_set_scissor(dev, start_slot, num_scissors,
-         scissors, &vec->scissor);
+   for (i = 0; i < num_scissors; i++) {
+      struct ilo_state_viewport_scissor_info *info =
+         &vec->viewport.scissors[start_slot + i];
+
+      if (scissors[i].minx < scissors[i].maxx &&
+          scissors[i].miny < scissors[i].maxy) {
+         info->min_x = scissors[i].minx;
+         info->min_y = scissors[i].miny;
+         info->max_x = scissors[i].maxx - 1;
+         info->max_y = scissors[i].maxy - 1;
+      } else {
+         info->min_x = 1;
+         info->min_y = 1;
+         info->max_x = 0;
+         info->max_y = 0;
+      }
+   }
 
    vec->dirty |= ILO_DIRTY_SCISSOR;
 }
@@ -768,28 +1770,31 @@
                         unsigned num_viewports,
                         const struct pipe_viewport_state *viewports)
 {
-   const struct ilo_dev *dev = ilo_context(pipe)->dev;
    struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
    if (viewports) {
       unsigned i;
 
       for (i = 0; i < num_viewports; i++) {
-         ilo_gpe_set_viewport_cso(dev, &viewports[i],
-               &vec->viewport.cso[start_slot + i]);
+         struct ilo_state_viewport_matrix_info *info =
+            &vec->viewport.matrices[start_slot + i];
+
+         memcpy(info->scale, viewports[i].scale, sizeof(info->scale));
+         memcpy(info->translate, viewports[i].translate,
+               sizeof(info->translate));
       }
 
-      if (vec->viewport.count < start_slot + num_viewports)
-         vec->viewport.count = start_slot + num_viewports;
+      if (vec->viewport.params.count < start_slot + num_viewports)
+         vec->viewport.params.count = start_slot + num_viewports;
 
       /* need to save viewport 0 for util_blitter */
       if (!start_slot && num_viewports)
          vec->viewport.viewport0 = viewports[0];
    }
    else {
-      if (vec->viewport.count <= start_slot + num_viewports &&
-          vec->viewport.count > start_slot)
-         vec->viewport.count = start_slot;
+      if (vec->viewport.params.count <= start_slot + num_viewports &&
+          vec->viewport.params.count > start_slot)
+         vec->viewport.params.count = start_slot;
    }
 
    vec->dirty |= ILO_DIRTY_VIEWPORT;
@@ -844,10 +1849,11 @@
 }
 
 static void
-ilo_set_shader_resources(struct pipe_context *pipe,
-                         unsigned start, unsigned count,
-                         struct pipe_surface **surfaces)
+ilo_set_shader_images(struct pipe_context *pipe, unsigned shader,
+                      unsigned start, unsigned count,
+                      struct pipe_image_view **views)
 {
+#if 0
    struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
    struct ilo_resource_state *dst = &vec->resource;
    unsigned i;
@@ -876,6 +1882,7 @@
    }
 
    vec->dirty |= ILO_DIRTY_RESOURCE;
+#endif
 }
 
 static void
@@ -905,16 +1912,11 @@
    struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
 
    if (state) {
-      pipe_resource_reference(&vec->ib.buffer, state->buffer);
-      vec->ib.user_buffer = state->user_buffer;
-      vec->ib.offset = state->offset;
-      vec->ib.index_size = state->index_size;
-   }
-   else {
-      pipe_resource_reference(&vec->ib.buffer, NULL);
-      vec->ib.user_buffer = NULL;
-      vec->ib.offset = 0;
-      vec->ib.index_size = 0;
+      pipe_resource_reference(&vec->ib.state.buffer, state->buffer);
+      vec->ib.state = *state;
+   } else {
+      pipe_resource_reference(&vec->ib.state.buffer, NULL);
+      memset(&vec->ib.state, 0, sizeof(vec->ib.state));
    }
 
    vec->dirty |= ILO_DIRTY_IB;
@@ -926,19 +1928,27 @@
                                 unsigned buffer_offset,
                                 unsigned buffer_size)
 {
-   struct pipe_stream_output_target *target;
+   const struct ilo_dev *dev = ilo_context(pipe)->dev;
+   struct ilo_stream_output_target *target;
+   struct ilo_state_sol_buffer_info info;
 
-   target = MALLOC_STRUCT(pipe_stream_output_target);
+   target = CALLOC_STRUCT(ilo_stream_output_target);
    assert(target);
 
-   pipe_reference_init(&target->reference, 1);
-   target->buffer = NULL;
-   pipe_resource_reference(&target->buffer, res);
-   target->context = pipe;
-   target->buffer_offset = buffer_offset;
-   target->buffer_size = buffer_size;
+   pipe_reference_init(&target->base.reference, 1);
+   pipe_resource_reference(&target->base.buffer, res);
+   target->base.context = pipe;
+   target->base.buffer_offset = buffer_offset;
+   target->base.buffer_size = buffer_size;
+
+   memset(&info, 0, sizeof(info));
+   info.vma = ilo_resource_get_vma(res);
+   info.offset = buffer_offset;
+   info.size = buffer_size;
+
+   ilo_state_sol_buffer_init(&target->sb, dev, &info);
 
-   return target;
+   return &target->base;
 }
 
 static void
@@ -991,7 +2001,7 @@
    const struct ilo_dev *dev = ilo_context(pipe)->dev;
    struct ilo_view_cso *view;
 
-   view = MALLOC_STRUCT(ilo_view_cso);
+   view = CALLOC_STRUCT(ilo_view_cso);
    assert(view);
 
    view->base = *templ;
@@ -1001,16 +2011,23 @@
    view->base.context = pipe;
 
    if (res->target == PIPE_BUFFER) {
-      const unsigned elem_size = util_format_get_blocksize(templ->format);
-      const unsigned first_elem = templ->u.buf.first_element;
-      const unsigned num_elems = templ->u.buf.last_element - first_elem + 1;
-
-      ilo_gpe_init_view_surface_for_buffer(dev, ilo_buffer(res),
-            first_elem * elem_size, num_elems * elem_size,
-            elem_size, templ->format, false, false, &view->surface);
-   }
-   else {
+      struct ilo_state_surface_buffer_info info;
+
+      memset(&info, 0, sizeof(info));
+      info.vma = ilo_resource_get_vma(res);
+      info.offset = templ->u.buf.first_element * info.struct_size;
+      info.size = (templ->u.buf.last_element -
+            templ->u.buf.first_element + 1) * info.struct_size;
+      info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER;
+      info.format = ilo_format_translate_color(dev, templ->format);
+      info.format_size = util_format_get_blocksize(templ->format);
+      info.struct_size = info.format_size;
+      info.readonly = true;
+
+      ilo_state_surface_init_for_buffer(&view->surface, dev, &info);
+   } else {
       struct ilo_texture *tex = ilo_texture(res);
+      struct ilo_state_surface_image_info info;
 
       /* warn about degraded performance because of a missing binding flag */
       if (tex->image.tiling == GEN6_TILING_NONE &&
@@ -1019,13 +2036,32 @@
                   "not created for sampling\n");
       }
 
-      ilo_gpe_init_view_surface_for_image(dev, &tex->image,
-            tex->base.target, templ->format,
-            templ->u.tex.first_level,
-            templ->u.tex.last_level - templ->u.tex.first_level + 1,
-            templ->u.tex.first_layer,
-            templ->u.tex.last_layer - templ->u.tex.first_layer + 1,
-            false, &view->surface);
+      memset(&info, 0, sizeof(info));
+
+      info.img = &tex->image;
+      info.level_base = templ->u.tex.first_level;
+      info.level_count = templ->u.tex.last_level -
+         templ->u.tex.first_level + 1;
+      info.slice_base = templ->u.tex.first_layer;
+      info.slice_count = templ->u.tex.last_layer -
+         templ->u.tex.first_layer + 1;
+
+      info.vma = &tex->vma;
+      info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER;
+      info.type = tex->image.type;
+
+      if (templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+          tex->separate_s8) {
+         info.format = ilo_format_translate_texture(dev,
+               PIPE_FORMAT_Z32_FLOAT);
+      } else {
+         info.format = ilo_format_translate_texture(dev, templ->format);
+      }
+
+      info.is_array = util_resource_is_array_texture(&tex->base);
+      info.readonly = true;
+
+      ilo_state_surface_init_for_image(&view->surface, dev, &info);
    }
 
    return &view->base;
@@ -1048,7 +2084,7 @@
    struct ilo_texture *tex = ilo_texture(res);
    struct ilo_surface_cso *surf;
 
-   surf = MALLOC_STRUCT(ilo_surface_cso);
+   surf = CALLOC_STRUCT(ilo_surface_cso);
    assert(surf);
 
    surf->base = *templ;
@@ -1063,28 +2099,70 @@
    surf->is_rt = !util_format_is_depth_or_stencil(templ->format);
 
    if (surf->is_rt) {
+      struct ilo_state_surface_image_info info;
+
       /* relax this? */
       assert(tex->base.target != PIPE_BUFFER);
 
-      /*
-       * classic i965 sets render_cache_rw for constant buffers and sol
-       * surfaces but not render buffers.  Why?
-       */
-      ilo_gpe_init_view_surface_for_image(dev,
-            &tex->image, tex->base.target,
-            templ->format, templ->u.tex.level, 1,
-            templ->u.tex.first_layer,
-            templ->u.tex.last_layer - templ->u.tex.first_layer + 1,
-            true, &surf->u.rt);
+      memset(&info, 0, sizeof(info));
+
+      info.img = &tex->image;
+      info.level_base = templ->u.tex.level;
+      info.level_count = 1;
+      info.slice_base = templ->u.tex.first_layer;
+      info.slice_count = templ->u.tex.last_layer -
+         templ->u.tex.first_layer + 1;
+
+      info.vma = &tex->vma;
+      if (ilo_image_can_enable_aux(&tex->image, templ->u.tex.level))
+         info.aux_vma = &tex->aux_vma;
+
+      info.access = ILO_STATE_SURFACE_ACCESS_DP_RENDER;
+
+      info.type = (tex->image.type == GEN6_SURFTYPE_CUBE) ?
+         GEN6_SURFTYPE_2D : tex->image.type;
+
+      info.format = ilo_format_translate_render(dev, templ->format);
+      info.is_array = util_resource_is_array_texture(&tex->base);
+
+      ilo_state_surface_init_for_image(&surf->u.rt, dev, &info);
    } else {
+      struct ilo_state_zs_info info;
+
       assert(res->target != PIPE_BUFFER);
 
-      ilo_gpe_init_zs_surface(dev, &tex->image,
-            (tex->separate_s8) ? &tex->separate_s8->image : NULL,
-            tex->base.target, templ->format,
-            templ->u.tex.level, templ->u.tex.first_layer,
-            templ->u.tex.last_layer - templ->u.tex.first_layer + 1,
-            &surf->u.zs);
+      memset(&info, 0, sizeof(info));
+
+      if (templ->format == PIPE_FORMAT_S8_UINT) {
+         info.s_vma = &tex->vma;
+         info.s_img = &tex->image;
+      } else {
+         info.z_vma = &tex->vma;
+         info.z_img = &tex->image;
+
+         if (tex->separate_s8) {
+            info.s_vma = &tex->separate_s8->vma;
+            info.s_img = &tex->separate_s8->image;
+         }
+
+         if (ilo_image_can_enable_aux(&tex->image, templ->u.tex.level))
+            info.hiz_vma = &tex->aux_vma;
+      }
+
+      info.level = templ->u.tex.level;
+      info.slice_base = templ->u.tex.first_layer;
+      info.slice_count = templ->u.tex.last_layer -
+         templ->u.tex.first_layer + 1;
+
+      info.type = (tex->image.type == GEN6_SURFTYPE_CUBE) ?
+         GEN6_SURFTYPE_2D : tex->image.type;
+
+      info.format = ilo_format_translate_depth(dev, tex->image_format);
+      if (ilo_dev_gen(dev) == ILO_GEN(6) && !info.hiz_vma &&
+          tex->image_format == PIPE_FORMAT_Z24X8_UNORM)
+         info.format = GEN6_ZFORMAT_D24_UNORM_S8_UINT;
+
+      ilo_state_zs_init(&surf->u.zs, dev, &info);
    }
 
    return &surf->base;
@@ -1269,7 +2347,7 @@
    ilo->base.set_scissor_states = ilo_set_scissor_states;
    ilo->base.set_viewport_states = ilo_set_viewport_states;
    ilo->base.set_sampler_views = ilo_set_sampler_views;
-   ilo->base.set_shader_resources = ilo_set_shader_resources;
+   ilo->base.set_shader_images = ilo_set_shader_images;
    ilo->base.set_vertex_buffers = ilo_set_vertex_buffers;
    ilo->base.set_index_buffer = ilo_set_index_buffer;
 
@@ -1294,10 +2372,30 @@
 ilo_state_vector_init(const struct ilo_dev *dev,
                       struct ilo_state_vector *vec)
 {
-   ilo_gpe_set_scissor_null(dev, &vec->scissor);
+   struct ilo_state_urb_info urb_info;
+
+   vec->sample_mask = ~0u;
+
+   ilo_state_viewport_init_data_only(&vec->viewport.vp, dev,
+         vec->viewport.vp_data, sizeof(vec->viewport.vp_data));
+   assert(vec->viewport.vp.array_size >= ILO_MAX_VIEWPORTS);
 
-   ilo_gpe_init_zs_surface(dev, NULL, NULL, PIPE_TEXTURE_2D,
-         PIPE_FORMAT_NONE, 0, 0, 1, &vec->fb.null_zs);
+   vec->viewport.params.matrices = vec->viewport.matrices;
+   vec->viewport.params.scissors = vec->viewport.scissors;
+
+   ilo_state_hs_init_disabled(&vec->disabled_hs, dev);
+   ilo_state_ds_init_disabled(&vec->disabled_ds, dev);
+   ilo_state_gs_init_disabled(&vec->disabled_gs, dev);
+
+   ilo_state_sol_buffer_init_disabled(&vec->so.dummy_sb, dev);
+
+   ilo_state_surface_init_for_null(&vec->fb.null_rt, dev);
+   ilo_state_zs_init_for_null(&vec->fb.null_zs, dev);
+
+   ilo_state_sampler_init_disabled(&vec->disabled_sampler, dev);
+
+   memset(&urb_info, 0, sizeof(urb_info));
+   ilo_state_urb_init(&vec->urb, dev, &urb_info);
 
    util_dynarray_init(&vec->global_binding.bindings);
 
@@ -1314,7 +2412,7 @@
          pipe_resource_reference(&vec->vb.states[i].buffer, NULL);
    }
 
-   pipe_resource_reference(&vec->ib.buffer, NULL);
+   pipe_resource_reference(&vec->ib.state.buffer, NULL);
    pipe_resource_reference(&vec->ib.hw_resource, NULL);
 
    for (i = 0; i < vec->so.count; i++)
@@ -1361,7 +2459,6 @@
 ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
                                   struct pipe_resource *res)
 {
-   struct intel_bo *bo = ilo_resource_get_bo(res);
    uint32_t states = 0;
    unsigned sh, i;
 
@@ -1377,7 +2474,7 @@
          }
       }
 
-      if (vec->ib.buffer == res) {
+      if (vec->ib.state.buffer == res) {
          states |= ILO_DIRTY_IB;
 
          /*
@@ -1409,7 +2506,6 @@
                [PIPE_SHADER_GEOMETRY]  = ILO_DIRTY_VIEW_GS,
                [PIPE_SHADER_COMPUTE]   = ILO_DIRTY_VIEW_CS,
             };
-            cso->surface.bo = bo;
 
             states |= view_dirty_bits[sh];
             break;
@@ -1421,7 +2517,6 @@
             struct ilo_cbuf_cso *cbuf = &vec->cbuf[sh].cso[i];
 
             if (cbuf->resource == res) {
-               cbuf->surface.bo = bo;
                states |= ILO_DIRTY_CBUF;
                break;
             }
@@ -1434,7 +2529,6 @@
          (struct ilo_surface_cso *) vec->resource.states[i];
 
       if (cso->base.texture == res) {
-         cso->u.rt.bo = bo;
          states |= ILO_DIRTY_RESOURCE;
          break;
       }
@@ -1446,26 +2540,19 @@
          struct ilo_surface_cso *cso =
             (struct ilo_surface_cso *) vec->fb.state.cbufs[i];
          if (cso && cso->base.texture == res) {
-            cso->u.rt.bo = bo;
             states |= ILO_DIRTY_FB;
             break;
          }
       }
 
-      if (vec->fb.state.zsbuf && vec->fb.state.zsbuf->texture == res) {
-         struct ilo_surface_cso *cso =
-            (struct ilo_surface_cso *) vec->fb.state.zsbuf;
-
-         cso->u.rt.bo = bo;
+      if (vec->fb.state.zsbuf && vec->fb.state.zsbuf->texture == res)
          states |= ILO_DIRTY_FB;
-      }
    }
 
    for (i = 0; i < vec->cs_resource.count; i++) {
       struct ilo_surface_cso *cso =
          (struct ilo_surface_cso *) vec->cs_resource.states[i];
       if (cso->base.texture == res) {
-         cso->u.rt.bo = bo;
          states |= ILO_DIRTY_CS_RESOURCE;
          break;
       }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_state.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_state.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_state.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_state.h	2015-09-16 14:36:09.000000000 +0000
@@ -28,13 +28,38 @@
 #ifndef ILO_STATE_H
 #define ILO_STATE_H
 
-#include "core/ilo_state_3d.h"
+#include "core/ilo_builder_3d.h" /* for gen6_3dprimitive_info */
+#include "core/ilo_state_cc.h"
+#include "core/ilo_state_compute.h"
+#include "core/ilo_state_raster.h"
+#include "core/ilo_state_sampler.h"
+#include "core/ilo_state_sbe.h"
+#include "core/ilo_state_shader.h"
+#include "core/ilo_state_sol.h"
+#include "core/ilo_state_surface.h"
+#include "core/ilo_state_urb.h"
+#include "core/ilo_state_vf.h"
+#include "core/ilo_state_viewport.h"
+#include "core/ilo_state_zs.h"
 #include "pipe/p_state.h"
 #include "util/u_dynarray.h"
 
 #include "ilo_common.h"
 
 /**
+ * \see brw_context.h
+ */
+#define ILO_MAX_DRAW_BUFFERS    8
+#define ILO_MAX_CONST_BUFFERS   (1 + 12)
+#define ILO_MAX_SAMPLER_VIEWS   16
+#define ILO_MAX_SAMPLERS        16
+#define ILO_MAX_SO_BINDINGS     64
+#define ILO_MAX_SO_BUFFERS      4
+#define ILO_MAX_VIEWPORTS       1
+
+#define ILO_MAX_SURFACES        256
+
+/**
  * States that we track.
  *
  * XXX Do we want to count each sampler or vertex buffer as a state?  If that
@@ -120,6 +145,172 @@
 };
 
 struct ilo_context;
+struct ilo_shader_state;
+
+struct ilo_ve_state {
+   unsigned vb_mapping[PIPE_MAX_ATTRIBS];
+   unsigned vb_count;
+
+   /* these are not valid until the state is finalized */
+   uint32_t vf_data[PIPE_MAX_ATTRIBS][4];
+   struct ilo_state_vf_params_info vf_params;
+   struct ilo_state_vf vf;
+};
+
+struct ilo_vb_state {
+   struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS];
+   struct ilo_state_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+   uint32_t enabled_mask;
+};
+
+struct ilo_ib_state {
+   struct pipe_index_buffer state;
+
+   /* these are not valid until the state is finalized */
+   struct pipe_resource *hw_resource;
+   unsigned hw_index_size;
+   struct ilo_state_index_buffer ib;
+};
+
+struct ilo_cbuf_cso {
+   struct pipe_resource *resource;
+   struct ilo_state_surface_buffer_info info;
+   struct ilo_state_surface surface;
+
+   /*
+    * this CSO is not so constant because user buffer needs to be uploaded in
+    * finalize_constant_buffers()
+    */
+   const void *user_buffer;
+};
+
+struct ilo_sampler_cso {
+   struct ilo_state_sampler sampler;
+   struct ilo_state_sampler_border border;
+   bool saturate_s;
+   bool saturate_t;
+   bool saturate_r;
+};
+
+struct ilo_sampler_state {
+   const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS];
+};
+
+struct ilo_cbuf_state {
+   struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS];
+   uint32_t enabled_mask;
+};
+
+struct ilo_resource_state {
+   struct pipe_surface *states[PIPE_MAX_SHADER_IMAGES];
+   unsigned count;
+};
+
+struct ilo_view_cso {
+   struct pipe_sampler_view base;
+
+   struct ilo_state_surface surface;
+};
+
+struct ilo_view_state {
+   struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS];
+   unsigned count;
+};
+
+struct ilo_stream_output_target {
+   struct pipe_stream_output_target base;
+
+   struct ilo_state_sol_buffer sb;
+};
+
+struct ilo_so_state {
+   struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS];
+   unsigned count;
+   unsigned append_bitmask;
+
+   struct ilo_state_sol_buffer dummy_sb;
+
+   bool enabled;
+};
+
+struct ilo_rasterizer_state {
+   struct pipe_rasterizer_state state;
+
+   /* these are invalid until finalize_rasterizer() */
+   struct ilo_state_raster_info info;
+   struct ilo_state_raster rs;
+};
+
+struct ilo_viewport_state {
+   struct ilo_state_viewport_matrix_info matrices[ILO_MAX_VIEWPORTS];
+   struct ilo_state_viewport_scissor_info scissors[ILO_MAX_VIEWPORTS];
+   struct ilo_state_viewport_params_info params;
+
+   struct pipe_viewport_state viewport0;
+   struct pipe_scissor_state scissor0;
+
+   struct ilo_state_viewport vp;
+   uint32_t vp_data[20 * ILO_MAX_VIEWPORTS];
+};
+
+struct ilo_surface_cso {
+   struct pipe_surface base;
+
+   bool is_rt;
+   union {
+      struct ilo_state_surface rt;
+      struct ilo_state_zs zs;
+   } u;
+};
+
+struct ilo_fb_state {
+   struct pipe_framebuffer_state state;
+
+   struct ilo_state_surface null_rt;
+   struct ilo_state_zs null_zs;
+
+   struct ilo_fb_blend_caps {
+      bool is_unorm;
+      bool is_integer;
+      bool force_dst_alpha_one;
+
+      bool can_logicop;
+      bool can_blend;
+      bool can_alpha_test;
+   } blend_caps[PIPE_MAX_COLOR_BUFS];
+
+   unsigned num_samples;
+
+   bool has_integer_rt;
+   bool has_hiz;
+   enum gen_depth_format depth_offset_format;
+};
+
+struct ilo_dsa_state {
+   struct ilo_state_cc_depth_info depth;
+
+   struct ilo_state_cc_stencil_info stencil;
+   struct {
+      uint8_t test_mask;
+      uint8_t write_mask;
+   } stencil_front, stencil_back;
+
+   bool alpha_test;
+   float alpha_ref;
+   enum gen_compare_function alpha_func;
+};
+
+struct ilo_blend_state {
+   struct ilo_state_cc_blend_rt_info rt[PIPE_MAX_COLOR_BUFS];
+   struct ilo_state_cc_blend_rt_info dummy_rt;
+   bool dual_blend;
+
+   /* these are invalid until finalize_blend() */
+   struct ilo_state_cc_blend_rt_info effective_rt[PIPE_MAX_COLOR_BUFS];
+   struct ilo_state_cc_info info;
+   struct ilo_state_cc cc;
+   bool alpha_may_kill;
+};
 
 struct ilo_global_binding_cso {
    struct pipe_resource *resource;
@@ -147,6 +338,7 @@
 
 struct ilo_state_vector {
    const struct pipe_draw_info *draw;
+   struct gen6_3dprimitive_info draw_info;
 
    uint32_t dirty;
 
@@ -157,30 +349,41 @@
    struct ilo_shader_state *vs;
    struct ilo_shader_state *gs;
 
+   struct ilo_state_hs disabled_hs;
+   struct ilo_state_ds disabled_ds;
+   struct ilo_state_gs disabled_gs;
+
    struct ilo_so_state so;
 
    struct pipe_clip_state clip;
+
    struct ilo_viewport_state viewport;
-   struct ilo_scissor_state scissor;
 
-   const struct ilo_rasterizer_state *rasterizer;
-   struct pipe_poly_stipple poly_stipple;
+   struct ilo_rasterizer_state *rasterizer;
+
+   struct ilo_state_line_stipple line_stipple;
+   struct ilo_state_poly_stipple poly_stipple;
    unsigned sample_mask;
 
    struct ilo_shader_state *fs;
 
-   const struct ilo_dsa_state *dsa;
+   struct ilo_state_cc_params_info cc_params;
    struct pipe_stencil_ref stencil_ref;
-   const struct ilo_blend_state *blend;
-   struct pipe_blend_color blend_color;
+   const struct ilo_dsa_state *dsa;
+   struct ilo_blend_state *blend;
+
    struct ilo_fb_state fb;
 
+   struct ilo_state_urb urb;
+
    /* shader resources */
    struct ilo_sampler_state sampler[PIPE_SHADER_TYPES];
    struct ilo_view_state view[PIPE_SHADER_TYPES];
    struct ilo_cbuf_state cbuf[PIPE_SHADER_TYPES];
    struct ilo_resource_state resource;
 
+   struct ilo_state_sampler disabled_sampler;
+
    /* GPGPU */
    struct ilo_shader_state *cs;
    struct ilo_resource_state cs_resource;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_transfer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_transfer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/ilo_transfer.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/ilo_transfer.c	2015-09-16 14:36:09.000000000 +0000
@@ -100,7 +100,7 @@
             m = ILO_TRANSFER_MAP_SW_ZS;
             need_convert = true;
          }
-      } else if (tex->image.format != tex->base.format) {
+      } else if (tex->image_format != tex->base.format) {
          m = ILO_TRANSFER_MAP_SW_CONVERT;
          need_convert = true;
       }
@@ -268,23 +268,27 @@
 static void *
 xfer_map(struct ilo_transfer *xfer)
 {
+   const struct ilo_vma *vma;
    void *ptr;
 
    switch (xfer->method) {
    case ILO_TRANSFER_MAP_CPU:
-      ptr = intel_bo_map(ilo_resource_get_bo(xfer->base.resource),
-            xfer->base.usage & PIPE_TRANSFER_WRITE);
+      vma = ilo_resource_get_vma(xfer->base.resource);
+      ptr = intel_bo_map(vma->bo, xfer->base.usage & PIPE_TRANSFER_WRITE);
       break;
    case ILO_TRANSFER_MAP_GTT:
-      ptr = intel_bo_map_gtt(ilo_resource_get_bo(xfer->base.resource));
+      vma = ilo_resource_get_vma(xfer->base.resource);
+      ptr = intel_bo_map_gtt(vma->bo);
       break;
    case ILO_TRANSFER_MAP_GTT_ASYNC:
-      ptr = intel_bo_map_gtt_async(ilo_resource_get_bo(xfer->base.resource));
+      vma = ilo_resource_get_vma(xfer->base.resource);
+      ptr = intel_bo_map_gtt_async(vma->bo);
       break;
    case ILO_TRANSFER_MAP_STAGING:
       {
          const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
-         struct intel_bo *bo = ilo_resource_get_bo(xfer->staging.res);
+
+         vma = ilo_resource_get_vma(xfer->staging.res);
 
          /*
           * We want a writable, optionally persistent and coherent, mapping
@@ -292,25 +296,29 @@
           * this turns out to be fairly simple.
           */
          if (is->dev.has_llc)
-            ptr = intel_bo_map(bo, true);
+            ptr = intel_bo_map(vma->bo, true);
          else
-            ptr = intel_bo_map_gtt(bo);
+            ptr = intel_bo_map_gtt(vma->bo);
 
          if (ptr && xfer->staging.res->target == PIPE_BUFFER)
             ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
-
       }
       break;
    case ILO_TRANSFER_MAP_SW_CONVERT:
    case ILO_TRANSFER_MAP_SW_ZS:
+      vma = NULL;
       ptr = xfer->staging.sys;
       break;
    default:
       assert(!"unknown mapping method");
+      vma = NULL;
       ptr = NULL;
       break;
    }
 
+   if (ptr && vma)
+      ptr = (void *) ((char *) ptr + vma->bo_offset);
+
    return ptr;
 }
 
@@ -324,10 +332,10 @@
    case ILO_TRANSFER_MAP_CPU:
    case ILO_TRANSFER_MAP_GTT:
    case ILO_TRANSFER_MAP_GTT_ASYNC:
-      intel_bo_unmap(ilo_resource_get_bo(xfer->base.resource));
+      intel_bo_unmap(ilo_resource_get_vma(xfer->base.resource)->bo);
       break;
    case ILO_TRANSFER_MAP_STAGING:
-      intel_bo_unmap(ilo_resource_get_bo(xfer->staging.res));
+      intel_bo_unmap(ilo_resource_get_vma(xfer->staging.res)->bo);
       break;
    default:
       break;
@@ -541,9 +549,12 @@
 
    if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE ||
                       !linear_view))
-      ptr = intel_bo_map(tex->image.bo, !for_read_back);
+      ptr = intel_bo_map(tex->vma.bo, !for_read_back);
    else
-      ptr = intel_bo_map_gtt(tex->image.bo);
+      ptr = intel_bo_map_gtt(tex->vma.bo);
+
+   if (ptr)
+      ptr = (void *) ((char *) ptr + tex->vma.bo_offset);
 
    return ptr;
 }
@@ -551,7 +562,7 @@
 static void
 tex_staging_sys_unmap_bo(struct ilo_texture *tex)
 {
-   intel_bo_unmap(tex->image.bo);
+   intel_bo_unmap(tex->vma.bo);
 }
 
 static bool
@@ -590,7 +601,7 @@
       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 
       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
-         assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
+         assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
 
          dst_cpp = 4;
          dst_s8_pos = 3;
@@ -598,7 +609,7 @@
       }
       else {
          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
-         assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
+         assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
 
          dst_cpp = 8;
          dst_s8_pos = 4;
@@ -644,7 +655,7 @@
       tex_staging_sys_unmap_bo(s8_tex);
    }
    else {
-      assert(tex->image.format == PIPE_FORMAT_S8_UINT);
+      assert(tex->image_format == PIPE_FORMAT_S8_UINT);
 
       for (slice = 0; slice < box->depth; slice++) {
          unsigned mem_x, mem_y;
@@ -717,7 +728,7 @@
       s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 
       if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
-         assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
+         assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
 
          src_cpp = 4;
          src_s8_pos = 3;
@@ -725,7 +736,7 @@
       }
       else {
          assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
-         assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
+         assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
 
          src_cpp = 8;
          src_s8_pos = 4;
@@ -771,7 +782,7 @@
       tex_staging_sys_unmap_bo(s8_tex);
    }
    else {
-      assert(tex->image.format == PIPE_FORMAT_S8_UINT);
+      assert(tex->image_format == PIPE_FORMAT_S8_UINT);
 
       for (slice = 0; slice < box->depth; slice++) {
          unsigned mem_x, mem_y;
@@ -829,8 +840,8 @@
    else
       dst_slice_stride = 0;
 
-   if (unlikely(tex->image.format == tex->base.format)) {
-      util_copy_box(dst, tex->image.format, tex->image.bo_stride,
+   if (unlikely(tex->image_format == tex->base.format)) {
+      util_copy_box(dst, tex->image_format, tex->image.bo_stride,
             dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth,
             xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
             0, 0, 0);
@@ -842,7 +853,7 @@
 
    switch (tex->base.format) {
    case PIPE_FORMAT_ETC1_RGB8:
-      assert(tex->image.format == PIPE_FORMAT_R8G8B8X8_UNORM);
+      assert(tex->image_format == PIPE_FORMAT_R8G8B8X8_UNORM);
 
       for (slice = 0; slice < box->depth; slice++) {
          const void *src =
@@ -1055,7 +1066,7 @@
       return false;
 
    /* see if we can avoid blocking */
-   if (is_bo_busy(ilo, ilo_resource_get_bo(res), &need_submit)) {
+   if (is_bo_busy(ilo, ilo_resource_get_vma(res)->bo, &need_submit)) {
       bool resource_renamed;
 
       if (!xfer_unblock(xfer, &resource_renamed)) {
@@ -1078,11 +1089,11 @@
 buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
            unsigned usage, int offset, int size, const void *data)
 {
-   struct ilo_buffer *buf = ilo_buffer(res);
+   struct ilo_buffer_resource *buf = ilo_buffer_resource(res);
    bool need_submit;
 
    /* see if we can avoid blocking */
-   if (is_bo_busy(ilo, buf->bo, &need_submit)) {
+   if (is_bo_busy(ilo, buf->vma.bo, &need_submit)) {
       bool unblocked = false;
 
       if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
@@ -1103,9 +1114,12 @@
          templ.bind = PIPE_BIND_TRANSFER_WRITE;
          staging = ilo->base.screen->resource_create(ilo->base.screen, &templ);
          if (staging) {
+            const struct ilo_vma *staging_vma = ilo_resource_get_vma(staging);
             struct pipe_box staging_box;
 
-            intel_bo_pwrite(ilo_buffer(staging)->bo, 0, size, data);
+            /* offset by staging_vma->bo_offset for pwrite */
+            intel_bo_pwrite(staging_vma->bo, staging_vma->bo_offset,
+                  size, data);
 
             u_box_1d(0, size, &staging_box);
             ilo_blitter_blt_copy_resource(ilo->blitter,
@@ -1123,7 +1137,8 @@
          ilo_cp_submit(ilo->cp, "syncing for pwrites");
    }
 
-   intel_bo_pwrite(buf->bo, offset, size, data);
+   /* offset by buf->vma.bo_offset for pwrite */
+   intel_bo_pwrite(buf->vma.bo, buf->vma.bo_offset + offset, size, data);
 }
 
 static void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/Makefile.am	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -21,8 +21,6 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
-AUTOMAKE_OPTIONS = subdir-objects
-
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/Makefile.sources	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/Makefile.sources	2015-09-16 14:36:09.000000000 +0000
@@ -1,5 +1,4 @@
 C_SOURCES := \
-	core/ilo_buffer.h \
 	core/ilo_builder.c \
 	core/ilo_builder.h \
 	core/ilo_builder_3d.h \
@@ -15,14 +14,35 @@
 	core/ilo_debug.h \
 	core/ilo_dev.c \
 	core/ilo_dev.h \
-	core/ilo_format.c \
-	core/ilo_format.h \
-	core/ilo_fence.h \
 	core/ilo_image.c \
 	core/ilo_image.h \
-	core/ilo_state_3d.h \
-	core/ilo_state_3d_bottom.c \
-	core/ilo_state_3d_top.c \
+	core/ilo_state_cc.c \
+	core/ilo_state_cc.h \
+	core/ilo_state_compute.c \
+	core/ilo_state_compute.h \
+	core/ilo_state_raster.c \
+	core/ilo_state_raster.h \
+	core/ilo_state_sampler.c \
+	core/ilo_state_sampler.h \
+	core/ilo_state_sbe.c \
+	core/ilo_state_sbe.h \
+	core/ilo_state_shader.c \
+	core/ilo_state_shader_ps.c \
+	core/ilo_state_shader.h \
+	core/ilo_state_sol.c \
+	core/ilo_state_sol.h \
+	core/ilo_state_surface.c \
+	core/ilo_state_surface_format.c \
+	core/ilo_state_surface.h \
+	core/ilo_state_urb.c \
+	core/ilo_state_urb.h \
+	core/ilo_state_vf.c \
+	core/ilo_state_vf.h \
+	core/ilo_state_viewport.c \
+	core/ilo_state_viewport.h \
+	core/ilo_state_zs.c \
+	core/ilo_state_zs.h \
+	core/ilo_vma.h \
 	core/intel_winsys.h \
 	ilo_blit.c \
 	ilo_blit.h \
@@ -38,13 +58,13 @@
 	ilo_cp.h \
 	ilo_draw.c \
 	ilo_draw.h \
+	ilo_format.c \
+	ilo_format.h \
 	ilo_gpgpu.c \
 	ilo_gpgpu.h \
 	ilo_public.h \
 	ilo_query.c \
 	ilo_query.h \
-	ilo_resource.c \
-	ilo_resource.h \
 	ilo_render.c \
 	ilo_render.h \
 	ilo_render_gen.h \
@@ -54,6 +74,8 @@
 	ilo_render_gen8.c \
 	ilo_render_media.c \
 	ilo_render_surface.c \
+	ilo_resource.c \
+	ilo_resource.h \
 	ilo_screen.c \
 	ilo_screen.h \
 	ilo_shader.c \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/shader/ilo_shader_internal.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/shader/ilo_shader_internal.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/shader/ilo_shader_internal.h	2015-09-16 14:36:09.000000000 +0000
@@ -28,6 +28,9 @@
 #ifndef ILO_SHADER_INTERNAL_H
 #define ILO_SHADER_INTERNAL_H
 
+#include "core/ilo_state_sbe.h"
+#include "core/ilo_state_sol.h"
+
 #include "ilo_common.h"
 #include "ilo_state.h"
 #include "ilo_shader.h"
@@ -72,13 +75,27 @@
    uint32_t saturate_tex_coords[3];
 };
 
+struct ilo_kernel_routing {
+   bool initialized;
+
+   bool is_point;
+   bool light_twoside;
+   uint32_t sprite_coord_enable;
+   int sprite_coord_mode;
+   int src_len;
+   int src_semantics[PIPE_MAX_SHADER_OUTPUTS];
+   int src_indices[PIPE_MAX_SHADER_OUTPUTS];
+
+   struct ilo_state_sbe sbe;
+};
+
 /**
  * A compiled shader.
  */
 struct ilo_shader {
    struct ilo_shader_variant variant;
 
-   struct ilo_shader_cso cso;
+   union ilo_shader_cso cso;
 
    struct {
       int semantic_names[PIPE_MAX_SHADER_INPUTS];
@@ -111,7 +128,9 @@
 
    bool stream_output;
    int svbi_post_inc;
-   struct pipe_stream_output_info so_info;
+
+   uint32_t sol_data[PIPE_MAX_SO_OUTPUTS][2];
+   struct ilo_state_sol sol;
 
    /* for VS stream output / rasterizer discard */
    int gs_offsets[3];
@@ -121,11 +140,8 @@
    void *kernel;
    int kernel_size;
 
-   bool routing_initialized;
-   int routing_src_semantics[PIPE_MAX_SHADER_OUTPUTS];
-   int routing_src_indices[PIPE_MAX_SHADER_OUTPUTS];
-   uint32_t routing_sprite_coord_enable;
    struct ilo_kernel_routing routing;
+   struct ilo_state_ps_params_info ps_params;
 
    /* what does the push constant buffer consist of? */
    struct {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/shader/toy_tgsi.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/shader/toy_tgsi.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/ilo/shader/toy_tgsi.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/ilo/shader/toy_tgsi.c	2015-09-16 14:36:09.000000000 +0000
@@ -2036,9 +2036,6 @@
       if (!dst_is_scratch[i])
          continue;
 
-      if (tgsi_inst->Instruction.Saturate == TGSI_SAT_MINUS_PLUS_ONE)
-         tc_fail(tgsi->tc, "TGSI_SAT_MINUS_PLUS_ONE unhandled");
-
       tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate;
 
       /* emit indirect store */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_bld_blend.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_bld_blend.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_bld_blend.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_bld_blend.c	2015-09-16 14:36:09.000000000 +0000
@@ -78,7 +78,7 @@
 /**
  * Whether the blending factors are complementary of each other.
  */
-static INLINE boolean
+static inline boolean
 lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor)
 {
    return dst_factor == (src_factor ^ 0x10);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_bld_depth.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_bld_depth.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_bld_depth.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_bld_depth.c	2015-09-16 14:36:09.000000000 +0000
@@ -975,10 +975,6 @@
                                          s_bld.int_vec_type, "");
       }
 
-      /* convert scalar stencil refs into vectors */
-      stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
-      stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
-
       s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
                                           stencil_refs, stencil_vals,
                                           front_facing);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_context.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -169,7 +169,7 @@
                             unsigned bind_flags);
 
 
-static INLINE struct llvmpipe_context *
+static inline struct llvmpipe_context *
 llvmpipe_context( struct pipe_context *pipe )
 {
    return (struct llvmpipe_context *)pipe;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_debug.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_debug.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_debug.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_debug.h	2015-09-16 14:36:09.000000000 +0000
@@ -71,7 +71,7 @@
 
 void st_debug_init( void );
 
-static INLINE void
+static inline void
 LP_DBG( unsigned flag, const char *fmt, ... )
 {
     if (LP_DEBUG & flag)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_fence.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_fence.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_fence.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_fence.h	2015-09-16 14:36:09.000000000 +0000
@@ -72,7 +72,7 @@
 void
 lp_fence_destroy(struct lp_fence *fence);
 
-static INLINE void
+static inline void
 lp_fence_reference(struct lp_fence **ptr,
                    struct lp_fence *f)
 {
@@ -85,7 +85,7 @@
    *ptr = f;
 }
 
-static INLINE boolean
+static inline boolean
 lp_fence_issued(const struct lp_fence *fence)
 {
    return fence->issued;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_rast.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_rast.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_rast.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_rast.h	2015-09-16 14:36:09.000000000 +0000
@@ -184,7 +184,7 @@
 
 /* Cast wrappers.  Hopefully these compile to noops!
  */
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
 lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile )
 {
    union lp_rast_cmd_arg arg;
@@ -192,7 +192,7 @@
    return arg;
 }
 
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
 lp_rast_arg_triangle( const struct lp_rast_triangle *triangle,
                       unsigned plane_mask)
 {
@@ -208,7 +208,7 @@
  * All planes are enabled, so instead of the plane mask we pass the upper
  * left coordinates of the a block that fully encloses the triangle.
  */
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
 lp_rast_arg_triangle_contained( const struct lp_rast_triangle *triangle,
                                 unsigned x, unsigned y)
 {
@@ -218,7 +218,7 @@
    return arg;
 }
 
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
 lp_rast_arg_state( const struct lp_rast_state *state )
 {
    union lp_rast_cmd_arg arg;
@@ -226,7 +226,7 @@
    return arg;
 }
 
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
 lp_rast_arg_fence( struct lp_fence *fence )
 {
    union lp_rast_cmd_arg arg;
@@ -235,7 +235,7 @@
 }
 
 
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
 lp_rast_arg_clearzs( uint64_t value, uint64_t mask )
 {
    union lp_rast_cmd_arg arg;
@@ -245,7 +245,7 @@
 }
 
 
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
 lp_rast_arg_query( struct llvmpipe_query *pq )
 {
    union lp_rast_cmd_arg arg;
@@ -253,7 +253,7 @@
    return arg;
 }
 
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
 lp_rast_arg_null( void )
 {
    union lp_rast_cmd_arg arg;
@@ -312,7 +312,7 @@
 #include <emmintrin.h>
 #include "util/u_sse.h"
 
-static INLINE __m128i
+static inline __m128i
 lp_plane_to_m128i(const struct lp_rast_plane *plane)
 {
    return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_rast_priv.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_rast_priv.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_rast_priv.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_rast_priv.h	2015-09-16 14:36:09.000000000 +0000
@@ -145,7 +145,7 @@
  * Get the pointer to a 4x4 color block (within a 64x64 tile).
  * \param x, y location of 4x4 block in window coords
  */
-static INLINE uint8_t *
+static inline uint8_t *
 lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
                                 unsigned buf, unsigned x, unsigned y,
                                 unsigned layer)
@@ -186,7 +186,7 @@
  * Get the pointer to a 4x4 depth block (within a 64x64 tile).
  * \param x, y location of 4x4 block in window coords
  */
-static INLINE uint8_t *
+static inline uint8_t *
 lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
                                 unsigned x, unsigned y, unsigned layer)
 {
@@ -222,7 +222,7 @@
  * triangle in/out tests.
  * \param x, y location of 4x4 block in window coords
  */
-static INLINE void
+static inline void
 lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
                          const struct lp_rast_shader_inputs *inputs,
                          unsigned x, unsigned y )
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_rast_tri.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_rast_tri.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_rast_tri.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_rast_tri.c	2015-09-16 14:36:09.000000000 +0000
@@ -63,7 +63,7 @@
 	 block_full_4(task, tri, x + ix, y + iy);
 }
 
-static INLINE unsigned
+static inline unsigned
 build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy)
 {
    unsigned mask = 0;
@@ -94,7 +94,7 @@
 }
 
 
-static INLINE void
+static inline void
 build_masks(int64_t c,
             int64_t cdiff,
             int64_t dcdx,
@@ -167,7 +167,7 @@
 #include "util/u_sse.h"
 
 
-static INLINE void
+static inline void
 build_masks_32(int c, 
                int cdiff,
                int dcdx,
@@ -213,7 +213,7 @@
 }
 
 
-static INLINE unsigned
+static inline unsigned
 build_mask_linear_32(int c, int dcdx, int dcdy)
 {
    __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
@@ -239,7 +239,7 @@
    return _mm_movemask_epi8(result);
 }
 
-static INLINE unsigned
+static inline unsigned
 sign_bits4(const __m128i *cstep, int cdiff)
 {
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_scene.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_scene.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_scene.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_scene.h	2015-09-16 14:36:09.000000000 +0000
@@ -207,7 +207,7 @@
  * Allocate space for a command/data in the bin's data buffer.
  * Grow the block list if needed.
  */
-static INLINE void *
+static inline void *
 lp_scene_alloc( struct lp_scene *scene, unsigned size)
 {
    struct data_block_list *list = &scene->data;
@@ -240,7 +240,7 @@
 /**
  * As above, but with specific alignment.
  */
-static INLINE void *
+static inline void *
 lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size,
 			unsigned alignment )
 {
@@ -272,7 +272,7 @@
 
 /* Put back data if we decide not to use it, eg. culled triangles.
  */
-static INLINE void
+static inline void
 lp_scene_putback_data( struct lp_scene *scene, unsigned size)
 {
    struct data_block_list *list = &scene->data;
@@ -282,7 +282,7 @@
 
 
 /** Return pointer to a particular tile's bin. */
-static INLINE struct cmd_bin *
+static inline struct cmd_bin *
 lp_scene_get_bin(struct lp_scene *scene, unsigned x, unsigned y)
 {
    return &scene->tile[x][y];
@@ -296,7 +296,7 @@
 
 /* Add a command to bin[x][y].
  */
-static INLINE boolean
+static inline boolean
 lp_scene_bin_command( struct lp_scene *scene,
                       unsigned x, unsigned y,
                       unsigned cmd,
@@ -328,7 +328,7 @@
 }
 
 
-static INLINE boolean
+static inline boolean
 lp_scene_bin_cmd_with_state( struct lp_scene *scene,
                              unsigned x, unsigned y,
                              const struct lp_rast_state *state,
@@ -354,7 +354,7 @@
 
 /* Add a command to all active bins.
  */
-static INLINE boolean
+static inline boolean
 lp_scene_bin_everywhere( struct lp_scene *scene,
 			 unsigned cmd,
 			 const union lp_rast_cmd_arg arg )
@@ -371,7 +371,7 @@
 }
 
 
-static INLINE unsigned
+static inline unsigned
 lp_scene_get_num_bins( const struct lp_scene *scene )
 {
    return scene->tiles_x * scene->tiles_y;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -165,7 +165,7 @@
    case PIPE_CAP_DEPTH_CLIP_DISABLE:
       return 1;
    case PIPE_CAP_SHADER_STENCIL_EXPORT:
-      return 0;
+      return 1;
    case PIPE_CAP_TGSI_INSTANCEID:
    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
    case PIPE_CAP_START_INSTANCE:
@@ -288,10 +288,14 @@
    case PIPE_CAP_VERTEXID_NOBASE:
       return 0;
    case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
       return 1;
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+   case PIPE_CAP_DEPTH_BOUNDS_TEST:
       return 0;
    }
    /* should only get here on unhandled cases */
@@ -529,18 +533,6 @@
 
 
 /**
- * Has the fence been executed/finished?
- */
-static boolean
-llvmpipe_fence_signalled(struct pipe_screen *screen,
-                         struct pipe_fence_handle *fence)
-{
-   struct lp_fence *f = (struct lp_fence *) fence;
-   return lp_fence_signalled(f);
-}
-
-
-/**
  * Wait for the fence to finish.
  */
 static boolean
@@ -550,6 +542,9 @@
 {
    struct lp_fence *f = (struct lp_fence *) fence_handle;
 
+   if (!timeout)
+      return lp_fence_signalled(f);
+
    lp_fence_wait(f);
    return TRUE;
 }
@@ -601,7 +596,6 @@
    screen->base.context_create = llvmpipe_create_context;
    screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer;
    screen->base.fence_reference = llvmpipe_fence_reference;
-   screen->base.fence_signalled = llvmpipe_fence_signalled;
    screen->base.fence_finish = llvmpipe_fence_finish;
 
    screen->base.get_timestamp = llvmpipe_get_timestamp;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_screen.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -62,7 +62,7 @@
 
 
 
-static INLINE struct llvmpipe_screen *
+static inline struct llvmpipe_screen *
 llvmpipe_screen( struct pipe_screen *pipe )
 {
    return (struct llvmpipe_screen *)pipe;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_setup.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_setup.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_setup.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_setup.c	2015-09-16 14:36:09.000000000 +0000
@@ -1069,10 +1069,13 @@
    if (setup->dirty & LP_SETUP_NEW_CONSTANTS) {
       for (i = 0; i < Elements(setup->constants); ++i) {
          struct pipe_resource *buffer = setup->constants[i].current.buffer;
-         const unsigned current_size = setup->constants[i].current.buffer_size;
+         const unsigned current_size = MIN2(setup->constants[i].current.buffer_size,
+                                            LP_MAX_TGSI_CONST_BUFFER_SIZE);
          const ubyte *current_data = NULL;
          int num_constants;
 
+         STATIC_ASSERT(DATA_BLOCK_SIZE >= LP_MAX_TGSI_CONST_BUFFER_SIZE);
+
          if (buffer) {
             /* resource buffer */
             current_data = (ubyte *) llvmpipe_resource_data(buffer);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_setup.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_setup.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_setup.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_setup.h	2015-09-16 14:36:09.000000000 +0000
@@ -159,7 +159,7 @@
 lp_setup_end_query(struct lp_setup_context *setup,
                    struct llvmpipe_query *pq);
 
-static INLINE unsigned
+static inline unsigned
 lp_clamp_viewport_idx(int idx)
 {
    return (PIPE_MAX_VIEWPORTS > idx && idx >= 0) ? idx : 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_setup_line.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_setup_line.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_setup_line.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_setup_line.c	2015-09-16 14:36:09.000000000 +0000
@@ -233,7 +233,7 @@
 
 
 
-static INLINE int subpixel_snap( float a )
+static inline int subpixel_snap( float a )
 {
    return util_iround(FIXED_ONE * a);
 }
@@ -262,14 +262,14 @@
 }
 
 
-static INLINE boolean sign(float x){
+static inline boolean sign(float x){
    return x >= 0;  
 }  
 
 
 /* Used on positive floats only:
  */
-static INLINE float fracf(float f)
+static inline float fracf(float f)
 {
    return f - floorf(f);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_setup_point.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_setup_point.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_setup_point.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_setup_point.c	2015-09-16 14:36:09.000000000 +0000
@@ -296,7 +296,7 @@
 }
 
 
-static INLINE int
+static inline int
 subpixel_snap(float a)
 {
    return util_iround(FIXED_ONE * a);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_setup_tri.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_setup_tri.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_setup_tri.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_setup_tri.c	2015-09-16 14:36:09.000000000 +0000
@@ -48,13 +48,13 @@
 #include <emmintrin.h>
 #endif
 
-static INLINE int
+static inline int
 subpixel_snap(float a)
 {
    return util_iround(FIXED_ONE * a);
 }
 
-static INLINE float
+static inline float
 fixed_to_float(int a)
 {
    return a * (1.0f / FIXED_ONE);
@@ -579,7 +579,7 @@
  *
  * Undefined if no bit set exists, so code should check against 0 first.
  */
-static INLINE uint32_t 
+static inline uint32_t 
 floor_pot(uint32_t n)
 {
 #if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
@@ -841,7 +841,7 @@
 /**
  * Calculate fixed position data for a triangle
  */
-static INLINE void
+static inline void
 calc_fixed_position( struct lp_setup_context *setup,
                      struct fixed_position* position,
                      const float (*v0)[4],
@@ -873,7 +873,7 @@
  * Rotate a triangle, flipping its clockwise direction,
  * Swaps values for xy[0] and xy[1]
  */
-static INLINE void
+static inline void
 rotate_fixed_position_01( struct fixed_position* position )
 {
    int x, y;
@@ -898,7 +898,7 @@
  * Rotate a triangle, flipping its clockwise direction,
  * Swaps values for xy[1] and xy[2]
  */
-static INLINE void
+static inline void
 rotate_fixed_position_12( struct fixed_position* position )
 {
    int x, y;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c	2015-09-16 14:36:09.000000000 +0000
@@ -122,7 +122,7 @@
 
 typedef const float (*const_float4_ptr)[4];
 
-static INLINE const_float4_ptr get_vert( const void *vertex_buffer,
+static inline const_float4_ptr get_vert( const void *vertex_buffer,
                                          int index,
                                          int stride )
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_state_fs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_state_fs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_state_fs.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_state_fs.c	2015-09-16 14:36:09.000000000 +0000
@@ -260,7 +260,8 @@
 {
    const struct util_format_description *zs_format_desc = NULL;
    const struct tgsi_token *tokens = shader->base.tokens;
-   LLVMTypeRef vec_type;
+   struct lp_type int_type = lp_int_type(type);
+   LLVMTypeRef vec_type, int_vec_type;
    LLVMValueRef mask_ptr, mask_val;
    LLVMValueRef consts_ptr, num_consts_ptr;
    LLVMValueRef z;
@@ -295,7 +296,7 @@
       zs_format_desc = util_format_description(key->zsbuf_format);
       assert(zs_format_desc);
 
-      if (!shader->info.base.writes_z) {
+      if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) {
          if (key->alpha.enabled ||
              key->blend.alpha_to_coverage ||
              shader->info.base.uses_kill) {
@@ -329,11 +330,14 @@
       depth_mode = 0;
    }
 
+   vec_type = lp_build_vec_type(gallivm, type);
+   int_vec_type = lp_build_vec_type(gallivm, int_type);
 
    stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr);
    stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr);
-
-   vec_type = lp_build_vec_type(gallivm, type);
+   /* convert scalar stencil refs into vectors */
+   stencil_refs[0] = lp_build_broadcast(gallivm, int_vec_type, stencil_refs[0]);
+   stencil_refs[1] = lp_build_broadcast(gallivm, int_vec_type, stencil_refs[1]);
 
    consts_ptr = lp_jit_context_constants(gallivm, context_ptr);
    num_consts_ptr = lp_jit_context_num_constants(gallivm, context_ptr);
@@ -462,7 +466,9 @@
       int pos0 = find_output_by_semantic(&shader->info.base,
                                          TGSI_SEMANTIC_POSITION,
                                          0);
-
+      int s_out = find_output_by_semantic(&shader->info.base,
+                                          TGSI_SEMANTIC_STENCIL,
+                                          0);
       if (pos0 != -1 && outputs[pos0][2]) {
          z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
 
@@ -512,6 +518,15 @@
          }
       }
 
+      if (s_out != -1 && outputs[s_out][1]) {
+         /* there's only one value, and spec says to discard additional bits */
+         LLVMValueRef s_max_mask = lp_build_const_int_vec(gallivm, int_type, 255);
+         stencil_refs[0] = LLVMBuildLoad(builder, outputs[s_out][1], "output.s");
+         stencil_refs[0] = LLVMBuildBitCast(builder, stencil_refs[0], int_vec_type, "");
+         stencil_refs[0] = LLVMBuildAnd(builder, stencil_refs[0], s_max_mask, "");
+         stencil_refs[1] = stencil_refs[0];
+      }
+
       lp_build_depth_stencil_load_swizzled(gallivm, type,
                                            zs_format_desc, key->resource_1d,
                                            depth_ptr, depth_stride,
@@ -825,7 +840,7 @@
  *
  * A format which has irregular channel sizes such as R3_G3_B2 or R5_G6_B5.
  */
-static INLINE boolean
+static inline boolean
 is_arithmetic_format(const struct util_format_description *format_desc)
 {
    boolean arith = false;
@@ -845,7 +860,7 @@
  * to floats for blending, and furthermore has "natural" packed AoS -> unpacked
  * SoA conversion.
  */
-static INLINE boolean
+static inline boolean
 format_expands_to_float_soa(const struct util_format_description *format_desc)
 {
    if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
@@ -861,7 +876,7 @@
  *
  * e.g. RGBA16F = 4x half-float and R3G3B2 = 1x byte
  */
-static INLINE void
+static inline void
 lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
                              struct lp_type* type)
 {
@@ -909,7 +924,7 @@
  *
  * e.g. RGBA16F = 4x float, R3G3B2 = 3x byte
  */
-static INLINE void
+static inline void
 lp_blend_type_from_format_desc(const struct util_format_description *format_desc,
                                struct lp_type* type)
 {
@@ -981,7 +996,7 @@
  *
  * but we try to avoid division and multiplication through shifts.
  */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 scale_bits(struct gallivm_state *gallivm,
            int src_bits,
            int dst_bits,
@@ -1093,7 +1108,7 @@
 /**
  * If RT is a smallfloat (needing denorms) format
  */
-static INLINE int
+static inline int
 have_smallfloat_format(struct lp_type dst_type,
                        enum pipe_format format)
 {
@@ -2865,7 +2880,7 @@
 /**
  * Return the blend factor equivalent to a destination alpha of one.
  */
-static INLINE unsigned
+static inline unsigned
 force_dst_alpha_one(unsigned factor, boolean clamped_zero)
 {
    switch(factor) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_surface.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_surface.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_surface.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_surface.c	2015-09-16 14:36:09.000000000 +0000
@@ -42,13 +42,6 @@
                  struct pipe_resource *src, unsigned src_level,
                  const struct pipe_box *src_box)
 {
-   struct llvmpipe_resource *src_tex = llvmpipe_resource(src);
-   struct llvmpipe_resource *dst_tex = llvmpipe_resource(dst);
-   const enum pipe_format format = src_tex->base.format;
-   unsigned width = src_box->width;
-   unsigned height = src_box->height;
-   unsigned depth = src_box->depth;
-
    llvmpipe_flush_resource(pipe,
                            dst, dst_level,
                            FALSE, /* read_only */
@@ -63,58 +56,8 @@
                            FALSE, /* do_not_block */
                            "blit src");
 
-   /* Fallback for buffers. */
-   if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
-      util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
-                                src, src_level, src_box);
-      return;
-   }
-
-   /*
-   printf("surface copy from %u lvl %u to %u lvl %u: %u,%u,%u to %u,%u,%u %u x %u x %u\n",
-          src_tex->id, src_level, dst_tex->id, dst_level,
-          src_box->x, src_box->y, src_box->z, dstx, dsty, dstz,
-          src_box->width, src_box->height, src_box->depth);
-   */
-
-   /* make sure display target resources (which cannot have levels/layers) are mapped */
-   if (src_tex->dt)
-      (void) llvmpipe_resource_map(src, src_level, 0, LP_TEX_USAGE_READ);
-   if (dst_tex->dt)
-      /*
-       * Could set this to WRITE_ALL if complete dst is covered but it gets
-       * ignored anyway.
-       */
-      (void) llvmpipe_resource_map(dst, dst_level, 0, LP_TEX_USAGE_READ_WRITE);
-
-
-   /* copy */
-   {
-      const ubyte *src_linear_ptr
-         = llvmpipe_get_texture_image_address(src_tex, src_box->z,
-                                              src_level);
-      ubyte *dst_linear_ptr
-         = llvmpipe_get_texture_image_address(dst_tex, dstz,
-                                              dst_level);
-
-      if (dst_linear_ptr && src_linear_ptr) {
-         util_copy_box(dst_linear_ptr, format,
-                       llvmpipe_resource_stride(&dst_tex->base, dst_level),
-                       dst_tex->img_stride[dst_level],
-                       dstx, dsty, 0,
-                       width, height, depth,
-                       src_linear_ptr,
-                       llvmpipe_resource_stride(&src_tex->base, src_level),
-                       src_tex->img_stride[src_level],
-                       src_box->x, src_box->y, 0);
-      }
-   }
-
-   if (src_tex->dt)
-      llvmpipe_resource_unmap(src, 0, 0);
-   if (dst_tex->dt)
-      llvmpipe_resource_unmap(dst, 0, 0);
-
+   util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
+                             src, src_level, src_box);
 }
 
 
@@ -139,11 +82,6 @@
       return; /* done */
    }
 
-   if (info.mask & PIPE_MASK_S) {
-      debug_printf("llvmpipe: cannot blit stencil, skipping\n");
-      info.mask &= ~PIPE_MASK_S;
-   }
-
    if (!util_blitter_is_blit_supported(lp->blitter, &info)) {
       debug_printf("llvmpipe: blit unsupported %s -> %s\n",
                    util_format_short_name(info.src.resource->format),
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_test.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_test.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_test.h	2012-08-30 05:23:50.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_test.h	2015-09-16 14:36:09.000000000 +0000
@@ -77,7 +77,7 @@
 
 #elif defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
 
-static INLINE uint64_t
+static inline uint64_t
 rdtsc(void)
 {
    uint32_t hi, lo;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_texture.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_texture.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/llvmpipe/lp_texture.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/llvmpipe/lp_texture.h	2015-09-16 14:36:09.000000000 +0000
@@ -106,21 +106,21 @@
 
 
 /** cast wrappers */
-static INLINE struct llvmpipe_resource *
+static inline struct llvmpipe_resource *
 llvmpipe_resource(struct pipe_resource *pt)
 {
    return (struct llvmpipe_resource *) pt;
 }
 
 
-static INLINE const struct llvmpipe_resource *
+static inline const struct llvmpipe_resource *
 llvmpipe_resource_const(const struct pipe_resource *pt)
 {
    return (const struct llvmpipe_resource *) pt;
 }
 
 
-static INLINE struct llvmpipe_transfer *
+static inline struct llvmpipe_transfer *
 llvmpipe_transfer(struct pipe_transfer *pt)
 {
    return (struct llvmpipe_transfer *) pt;
@@ -131,7 +131,7 @@
 void llvmpipe_init_context_resource_funcs(struct pipe_context *pipe);
 
 
-static INLINE boolean
+static inline boolean
 llvmpipe_resource_is_texture(const struct pipe_resource *resource)
 {
    switch (resource->target) {
@@ -153,7 +153,7 @@
 }
 
 
-static INLINE boolean
+static inline boolean
 llvmpipe_resource_is_1d(const struct pipe_resource *resource)
 {
    switch (resource->target) {
@@ -175,7 +175,7 @@
 }
 
 
-static INLINE unsigned
+static inline unsigned
 llvmpipe_layer_stride(struct pipe_resource *resource,
                       unsigned level)
 {
@@ -185,7 +185,7 @@
 }
 
 
-static INLINE unsigned
+static inline unsigned
 llvmpipe_resource_stride(struct pipe_resource *resource,
                          unsigned level)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/Android.mk	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/Android.mk	2015-09-16 14:36:09.000000000 +0000
@@ -39,6 +39,10 @@
 LOCAL_SHARED_LIBRARIES := libdrm libdrm_nouveau
 LOCAL_MODULE := libmesa_pipe_nouveau
 
+ifeq ($(MESA_LOLLIPOP_BUILD),true)
+LOCAL_C_INCLUDES := external/libcxx/include
+else
 include external/stlport/libstlport.mk
+endif
 include $(GALLIUM_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -428,8 +428,7 @@
 {
    Symbol *sym = new_Symbol(prog, FILE_SYSTEM_VALUE, 0);
 
-   assert(svIndex < 4 ||
-          (svName == SV_CLIP_DISTANCE || svName == SV_TESS_FACTOR));
+   assert(svIndex < 4 || svName == SV_CLIP_DISTANCE);
 
    switch (svName) {
    case SV_POSITION:
@@ -438,7 +437,9 @@
    case SV_POINT_SIZE:
    case SV_POINT_COORD:
    case SV_CLIP_DISTANCE:
-   case SV_TESS_FACTOR:
+   case SV_TESS_OUTER:
+   case SV_TESS_INNER:
+   case SV_TESS_COORD:
       sym->reg.type = TYPE_F32;
       break;
    default:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -1153,8 +1153,8 @@
 
    switch (info->type) {
    PROG_TYPE_CASE(VERTEX, VERTEX);
-// PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL);
-// PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL);
+   PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
+   PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
    PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
    PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
    PROG_TYPE_CASE(COMPUTE, COMPUTE);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h	2015-09-16 14:36:09.000000000 +0000
@@ -69,18 +69,6 @@
 # define NV50_IR_DEBUG_REG_ALLOC 0
 #endif
 
-#define NV50_SEMANTIC_CLIPDISTANCE  (TGSI_SEMANTIC_COUNT + 0)
-#define NV50_SEMANTIC_TESSFACTOR    (TGSI_SEMANTIC_COUNT + 7)
-#define NV50_SEMANTIC_TESSCOORD     (TGSI_SEMANTIC_COUNT + 8)
-#define NV50_SEMANTIC_COUNT         (TGSI_SEMANTIC_COUNT + 10)
-
-#define NV50_TESS_PART_FRACT_ODD  0
-#define NV50_TESS_PART_FRACT_EVEN 1
-#define NV50_TESS_PART_POW2       2
-#define NV50_TESS_PART_INTEGER    3
-
-#define NV50_PRIM_PATCHES PIPE_PRIM_MAX
-
 struct nv50_ir_prog_symbol
 {
    uint32_t label;
@@ -151,10 +139,10 @@
       } gp;
       struct {
          unsigned numColourResults;
-         boolean writesDepth;
-         boolean earlyFragTests;
-         boolean separateFragData;
-         boolean usesDiscard;
+         bool writesDepth;
+         bool earlyFragTests;
+         bool separateFragData;
+         bool usesDiscard;
       } fp;
       struct {
          uint32_t inputOffset; /* base address for user args */
@@ -180,11 +168,11 @@
       int8_t viewportId;         /* output index of ViewportIndex */
       uint8_t fragDepth;         /* output index of FragDepth */
       uint8_t sampleMask;        /* output index of SampleMask */
-      boolean sampleInterp;      /* perform sample interp on all fp inputs */
+      bool sampleInterp;         /* perform sample interp on all fp inputs */
       uint8_t backFaceColor[2];  /* input/output indices of back face colour */
       uint8_t globalAccess;      /* 1 for read, 2 for wr, 3 for rw */
-      boolean fp64;              /* program uses fp64 math */
-      boolean nv50styleSurfaces; /* generate gX[] access for raw buffers */
+      bool fp64;                 /* program uses fp64 math */
+      bool nv50styleSurfaces;    /* generate gX[] access for raw buffers */
       uint8_t resInfoCBSlot;     /* cX[] used for tex handles, surface info */
       uint16_t texBindBase;      /* base address for tex handles (nve4) */
       uint16_t suInfoBase;       /* base address for surface info (nve4) */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -77,6 +77,7 @@
    void emitMOV(const Instruction *);
 
    void emitINTERP(const Instruction *);
+   void emitAFETCH(const Instruction *);
    void emitPFETCH(const Instruction *);
    void emitVFETCH(const Instruction *);
    void emitEXPORT(const Instruction *);
@@ -120,6 +121,8 @@
 
    void emitPIXLD(const Instruction *);
 
+   void emitBAR(const Instruction *);
+
    void emitFlow(const Instruction *);
 
    inline void defId(const ValueDef&, const int pos);
@@ -930,6 +933,7 @@
 
    code[0] |= typeSizeofLog2(dType) << 10;
    code[0] |= typeSizeofLog2(i->sType) << 12;
+   code[1] |= i->subOp << 12;
 
    if (isSignedIntType(dType))
       code[0] |= 0x4000;
@@ -967,8 +971,8 @@
       code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0);
       if (i->defExists(1))
          defId(i->def(1), 2);
-   else
-      code[0] |= 0x1c;
+      else
+         code[0] |= 0x1c;
    } else {
       switch (i->sType) {
       case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
@@ -990,8 +994,12 @@
       }
       FTZ_(3a);
 
-      if (i->dType == TYPE_F32)
-         code[1] |= 1 << 23;
+      if (i->dType == TYPE_F32) {
+         if (isFloatType(i->sType))
+            code[1] |= 1 << 23;
+         else
+            code[1] |= 1 << 15;
+      }
    }
    if (i->sType == TYPE_S32)
       code[1] |= 1 << 19;
@@ -1246,6 +1254,13 @@
 }
 
 void
+CodeEmitterGK110::emitBAR(const Instruction *i)
+{
+   /* TODO */
+   emitNOP(i);
+}
+
+void
 CodeEmitterGK110::emitFlow(const Instruction *i)
 {
    const FlowInstruction *f = i->asFlow();
@@ -1316,6 +1331,8 @@
    } else
    if (mask & 2) {
       int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
+      if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
+         pcRel += 8;
       // currently we don't want absolute branches
       assert(!f->absolute);
       code[0] |= (pcRel & 0x1ff) << 23;
@@ -1324,6 +1341,23 @@
 }
 
 void
+CodeEmitterGK110::emitAFETCH(const Instruction *i)
+{
+   uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff;
+
+   code[0] = 0x00000002 | (offset << 23);
+   code[1] = 0x7d000000 | (offset >> 9);
+
+   if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
+      code[1] |= 0x8;
+
+   emitPredicate(i);
+
+   defId(i->def(0), 2);
+   srcId(i->src(0).getIndirect(0), 10);
+}
+
+void
 CodeEmitterGK110::emitPFETCH(const Instruction *i)
 {
    uint32_t prim = i->src(0).get()->reg.data.u32;
@@ -1692,6 +1726,9 @@
    case OP_EXPORT:
       emitEXPORT(insn);
       break;
+   case OP_AFETCH:
+      emitAFETCH(insn);
+      break;
    case OP_PFETCH:
       emitPFETCH(insn);
       break;
@@ -1850,6 +1887,9 @@
       emitNOP(insn);
       insn->join = 1;
       break;
+   case OP_BAR:
+      emitBAR(insn);
+      break;
    case OP_PHI:
    case OP_UNION:
    case OP_CONSTRAINT:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -174,6 +174,7 @@
    void emitALD();
    void emitAST();
    void emitISBERD();
+   void emitAL2P();
    void emitIPA();
 
    void emitPIXLD();
@@ -509,10 +510,13 @@
    emitCond5(0x00, CC_TR);
 
    if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
+      int32_t pos = insn->target.bb->binPos;
+      if (writeIssueDelays && !(pos & 0x1f))
+         pos += 8;
       if (!insn->absolute)
-         emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
+         emitField(0x14, 24, pos - (codeSize + 8));
       else
-         emitField(0x14, 32, insn->target.bb->binPos);
+         emitField(0x14, 32, pos);
    } else {
       emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
       emitField(0x05, 1, 1);
@@ -814,6 +818,7 @@
    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
    emitCC   (0x2f);
    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
+   emitField(0x29, 2, insn->subOp);
    emitRND  (0x27, rnd, -1);
    emitField(0x0d, 1, isSignedType(insn->sType));
    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
@@ -846,6 +851,7 @@
    emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
    emitCC   (0x2f);
    emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
+   emitField(0x29, 2, insn->subOp);
    emitField(0x0d, 1, isSignedType(insn->sType));
    emitField(0x0c, 1, isSignedType(insn->dType));
    emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
@@ -1827,6 +1833,7 @@
    emitCond3(0x31, insn->setCond);
    emitField(0x30, 1, isSignedType(insn->sType));
    emitCC   (0x2f);
+   emitField(0x2c, 1, insn->dType == TYPE_F32);
    emitX    (0x2b);
    emitGPR  (0x08, insn->src(0));
    emitGPR  (0x00, insn->def(0));
@@ -2200,6 +2207,17 @@
 }
 
 void
+CodeEmitterGM107::emitAL2P()
+{
+   emitInsn (0xefa00000);
+   emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
+   emitO    (0x20);
+   emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
+   emitGPR  (0x08, insn->src(0).getIndirect(0));
+   emitGPR  (0x00, insn->def(0));
+}
+
+void
 CodeEmitterGM107::emitIPA()
 {
    int ipam = 0, ipas = 0;
@@ -2755,6 +2773,9 @@
    case OP_PFETCH:
       emitISBERD();
       break;
+   case OP_AFETCH:
+      emitAL2P();
+      break;
    case OP_LINTERP:
    case OP_PINTERP:
       emitIPA();
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -884,7 +884,7 @@
    defId(i->def(0), 2);
    srcAddr8(i->src(0), 16);
 
-   if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
+   if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
       code[0] |= 1 << 8;
    } else {
       if (i->op == OP_PINTERP) {
@@ -896,10 +896,11 @@
    }
 
    if (i->encSize == 8) {
-      code[1] =
-         (code[0] & (3 << 24)) >> (24 - 16) |
-         (code[0] & (1 <<  8)) << (18 -  8);
-      code[0] &= ~0x03000100;
+      if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
+         code[1] = 4 << 16;
+      else
+         code[1] = (code[0] & (3 << 24)) >> (24 - 16);
+      code[0] &= ~0x03000000;
       code[0] |= 1;
       emitFlagsRd(i);
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -85,6 +85,7 @@
    void emitCCTL(const Instruction *);
 
    void emitINTERP(const Instruction *);
+   void emitAFETCH(const Instruction *);
    void emitPFETCH(const Instruction *);
    void emitVFETCH(const Instruction *);
    void emitEXPORT(const Instruction *);
@@ -1019,6 +1020,10 @@
       code[0] |= util_logbase2(typeSizeof(dType)) << 20;
       code[0] |= util_logbase2(typeSizeof(i->sType)) << 23;
 
+      // for 8/16 source types, the byte/word is in subOp. word 1 is
+      // represented as 2.
+      code[1] |= i->subOp << 0x17;
+
       if (sat)
          code[0] |= 0x20;
       if (abs)
@@ -1078,8 +1083,14 @@
    if (!isFloatType(i->sType))
       lo = 0x3;
 
-   if (isFloatType(i->dType) || isSignedIntType(i->sType))
+   if (isSignedIntType(i->sType))
       lo |= 0x20;
+   if (isFloatType(i->dType)) {
+      if (isFloatType(i->sType))
+         lo |= 0x20;
+      else
+         lo |= 0x80;
+   }
 
    switch (i->op) {
    case OP_SET_AND: hi = 0x10000000; break;
@@ -1406,6 +1417,8 @@
    } else
    if (mask & 2) {
       int32_t pcRel = f->target.bb->binPos - (codeSize + 8);
+      if (writeIssueDelays && !(f->target.bb->binPos & 0x3f))
+         pcRel += 8;
       // currently we don't want absolute branches
       assert(!f->absolute);
       code[0] |= (pcRel & 0x3f) << 26;
@@ -1442,6 +1455,7 @@
       ImmediateValue *imm = i->getSrc(0)->asImm();
       assert(imm);
       code[0] |= imm->reg.data.u32 << 20;
+      code[1] |= 0x8000;
    }
 
    // thread count
@@ -1452,6 +1466,7 @@
       assert(imm);
       code[0] |= imm->reg.data.u32 << 26;
       code[1] |= imm->reg.data.u32 >> 6;
+      code[1] |= 0x4000;
    }
 
    if (i->srcExists(2) && (i->predSrc != 2)) {
@@ -1486,6 +1501,21 @@
 }
 
 void
+CodeEmitterNVC0::emitAFETCH(const Instruction *i)
+{
+   code[0] = 0x00000006;
+   code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);
+
+   if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
+      code[0] |= 0x200;
+
+   emitPredicate(i);
+
+   defId(i->def(0), 14);
+   srcId(i->src(0).getIndirect(0), 20);
+}
+
+void
 CodeEmitterNVC0::emitPFETCH(const Instruction *i)
 {
    uint32_t prim = i->src(0).get()->reg.data.u32;
@@ -2588,11 +2618,12 @@
          int imul; // integer MUL to MUL delay 3
       } res;
       struct ScoreData {
-         int r[64];
+         int r[256];
          int p[8];
          int c;
       } rd, wr;
       int base;
+      int regs;
 
       void rebase(const int base)
       {
@@ -2601,7 +2632,7 @@
             return;
          this->base = 0;
 
-         for (int i = 0; i < 64; ++i) {
+         for (int i = 0; i < regs; ++i) {
             rd.r[i] += delta;
             wr.r[i] += delta;
          }
@@ -2620,16 +2651,17 @@
          res.imul += delta;
          res.tex += delta;
       }
-      void wipe()
+      void wipe(int regs)
       {
          memset(&rd, 0, sizeof(rd));
          memset(&wr, 0, sizeof(wr));
          memset(&res, 0, sizeof(res));
+         this->regs = regs;
       }
       int getLatest(const ScoreData& d) const
       {
          int max = 0;
-         for (int i = 0; i < 64; ++i)
+         for (int i = 0; i < regs; ++i)
             if (d.r[i] > max)
                max = d.r[i];
          for (int i = 0; i < 8; ++i)
@@ -2664,7 +2696,7 @@
       }
       void setMax(const RegScores *that)
       {
-         for (int i = 0; i < 64; ++i) {
+         for (int i = 0; i < regs; ++i) {
             rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
             wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
          }
@@ -2685,7 +2717,7 @@
       }
       void print(int cycle)
       {
-         for (int i = 0; i < 64; ++i) {
+         for (int i = 0; i < regs; ++i) {
             if (rd.r[i] > cycle)
                INFO("rd $r%i @ %i\n", i, rd.r[i]);
             if (wr.r[i] > cycle)
@@ -2780,9 +2812,10 @@
 bool
 SchedDataCalculator::visit(Function *func)
 {
+   int regs = targ->getFileSize(FILE_GPR) + 1;
    scoreBoards.resize(func->cfg.getSize());
    for (size_t i = 0; i < scoreBoards.size(); ++i)
-      scoreBoards[i].wipe();
+      scoreBoards[i].wipe(regs);
    return true;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -372,6 +372,10 @@
    case TGSI_SEMANTIC_SAMPLEPOS:  return nv50_ir::SV_SAMPLE_POS;
    case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK;
    case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID;
+   case TGSI_SEMANTIC_TESSCOORD:  return nv50_ir::SV_TESS_COORD;
+   case TGSI_SEMANTIC_TESSOUTER:  return nv50_ir::SV_TESS_OUTER;
+   case TGSI_SEMANTIC_TESSINNER:  return nv50_ir::SV_TESS_INNER;
+   case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
    default:
       assert(0);
       return nv50_ir::SV_CLOCK;
@@ -826,7 +830,7 @@
    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
       tgsi_dump(tokens, 0);
 
-   mainTempsInLMem = FALSE;
+   mainTempsInLMem = false;
 }
 
 Source::~Source()
@@ -937,7 +941,7 @@
       info->prop.gp.instanceCount = prop->u[0].Data;
       break;
    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
-      info->prop.fp.separateFragData = TRUE;
+      info->prop.fp.separateFragData = true;
       break;
    case TGSI_PROPERTY_FS_COORD_ORIGIN:
    case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
@@ -946,6 +950,24 @@
    case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
       info->io.genUserClip = -1;
       break;
+   case TGSI_PROPERTY_TCS_VERTICES_OUT:
+      info->prop.tp.outputPatchSize = prop->u[0].Data;
+      break;
+   case TGSI_PROPERTY_TES_PRIM_MODE:
+      info->prop.tp.domain = prop->u[0].Data;
+      break;
+   case TGSI_PROPERTY_TES_SPACING:
+      info->prop.tp.partitioning = prop->u[0].Data;
+      break;
+   case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
+      info->prop.tp.winding = prop->u[0].Data;
+      break;
+   case TGSI_PROPERTY_TES_POINT_MODE:
+      if (prop->u[0].Data)
+         info->prop.tp.outputPrim = PIPE_PRIM_POINTS;
+      else
+         info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
+      break;
    default:
       INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
       break;
@@ -1034,6 +1056,11 @@
                if (decl->Interp.Location || info->io.sampleInterp)
                   info->in[i].centroid = 1;
             }
+
+            if (sn == TGSI_SEMANTIC_PATCH)
+               info->in[i].patch = 1;
+            if (sn == TGSI_SEMANTIC_PATCH)
+               info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
          }
       }
       break;
@@ -1068,6 +1095,13 @@
          case TGSI_SEMANTIC_VIEWPORT_INDEX:
             info->io.viewportId = i;
             break;
+         case TGSI_SEMANTIC_PATCH:
+            info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
+            /* fallthrough */
+         case TGSI_SEMANTIC_TESSOUTER:
+         case TGSI_SEMANTIC_TESSINNER:
+            info->out[i].patch = 1;
+            break;
          default:
             break;
          }
@@ -1091,6 +1125,13 @@
          info->sv[i].sn = sn;
          info->sv[i].si = si;
          info->sv[i].input = inferSysValDirection(sn);
+
+         switch (sn) {
+         case TGSI_SEMANTIC_TESSOUTER:
+         case TGSI_SEMANTIC_TESSINNER:
+            info->sv[i].patch = 1;
+            break;
+         }
       }
       break;
    case TGSI_FILE_RESOURCE:
@@ -1155,7 +1196,7 @@
       } else
       if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
          if (insn.getDst(0).isIndirect(0))
-            mainTempsInLMem = TRUE;
+            mainTempsInLMem = true;
       }
    }
 
@@ -1163,12 +1204,22 @@
       Instruction::SrcRegister src = insn.getSrc(s);
       if (src.getFile() == TGSI_FILE_TEMPORARY) {
          if (src.isIndirect(0))
-            mainTempsInLMem = TRUE;
+            mainTempsInLMem = true;
       } else
       if (src.getFile() == TGSI_FILE_RESOURCE) {
          if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
             info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
                0x1 : 0x2;
+      } else
+      if (src.getFile() == TGSI_FILE_OUTPUT) {
+         if (src.isIndirect(0)) {
+            // We don't know which one is accessed, just mark everything for
+            // reading. This is an extremely unlikely occurrence.
+            for (unsigned i = 0; i < info->numOutputs; ++i)
+               info->out[i].oread = 1;
+         } else {
+            info->out[src.getIndex(0)].oread = 1;
+         }
       }
       if (src.getFile() != TGSI_FILE_INPUT)
          continue;
@@ -1245,6 +1296,7 @@
 
    Value *shiftAddress(Value *);
    Value *getVertexBase(int s);
+   Value *getOutputBase(int s);
    DataArray *getArrayForFile(unsigned file, int idx);
    Value *fetchSrc(int s, int c);
    Value *acquireDst(int d, int c);
@@ -1342,6 +1394,8 @@
    Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
    uint8_t vtxBaseValid;
 
+   Value *outBase; // base address of vertex out patch (for TCP)
+
    Stack condBBs;  // fork BB, then else clause BB
    Stack joinBBs;  // fork BB, for inserting join ops on ENDIF
    Stack loopBBs;  // loop headers
@@ -1475,6 +1529,22 @@
 }
 
 Value *
+Converter::getOutputBase(int s)
+{
+   assert(s < 5);
+   if (!(vtxBaseValid & (1 << s))) {
+      Value *offset = loadImm(NULL, tgsi.getSrc(s).getIndex(1));
+      if (tgsi.getSrc(s).isIndirect(1))
+         offset = mkOp2v(OP_ADD, TYPE_U32, getSSA(),
+                         fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL),
+                         offset);
+      vtxBaseValid |= 1 << s;
+      vtxBase[s] = mkOp2v(OP_ADD, TYPE_U32, getSSA(), outBase, offset);
+   }
+   return vtxBase[s];
+}
+
+Value *
 Converter::fetchSrc(int s, int c)
 {
    Value *res;
@@ -1487,6 +1557,9 @@
 
    if (src.is2D()) {
       switch (src.getFile()) {
+      case TGSI_FILE_OUTPUT:
+         dimRel = getOutputBase(s);
+         break;
       case TGSI_FILE_INPUT:
          dimRel = getVertexBase(s);
          break;
@@ -1541,6 +1614,7 @@
    const int idx2d = src.is2D() ? src.getIndex(1) : 0;
    const int idx = src.getIndex(0);
    const int swz = src.getSwizzle(c);
+   Instruction *ld;
 
    switch (src.getFile()) {
    case TGSI_FILE_IMMEDIATE:
@@ -1568,13 +1642,19 @@
          if (ptr)
             return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
       }
-      return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));
+      ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
+      ld->perPatch = info->in[idx].patch;
+      return ld->getDef(0);
    case TGSI_FILE_OUTPUT:
-      assert(!"load from output file");
-      return NULL;
+      assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
+      ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
+      ld->perPatch = info->out[idx].patch;
+      return ld->getDef(0);
    case TGSI_FILE_SYSTEM_VALUE:
       assert(!ptr);
-      return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
+      ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
+      ld->perPatch = info->sv[idx].patch;
+      return ld->getDef(0);
    default:
       return getArrayForFile(src.getFile(), idx2d)->load(
          sub.cur->values, idx, swz, shiftAddress(ptr));
@@ -1605,19 +1685,8 @@
 {
    const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
 
-   switch (tgsi.getSaturate()) {
-   case TGSI_SAT_NONE:
-      break;
-   case TGSI_SAT_ZERO_ONE:
+   if (tgsi.getSaturate()) {
       mkOp1(OP_SAT, dstTy, val, val);
-      break;
-   case TGSI_SAT_MINUS_PLUS_ONE:
-      mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f));
-      mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f));
-      break;
-   default:
-      assert(!"invalid saturation mode");
-      break;
    }
 
    Value *ptr = NULL;
@@ -1655,7 +1724,8 @@
              viewport != NULL)
             mkOp1(OP_MOV, TYPE_U32, viewport, val);
          else
-            mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
+            mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val)->perPatch =
+               info->out[idx].patch;
       }
    } else
    if (f == TGSI_FILE_TEMPORARY ||
@@ -2920,9 +2990,15 @@
    case TGSI_OPCODE_UBFE:
       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
          src0 = fetchSrc(0, c);
-         src1 = fetchSrc(1, c);
-         src2 = fetchSrc(2, c);
-         mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1);
+         if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE &&
+             tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) {
+            src1 = loadImm(NULL, tgsi.getSrc(2).getValueU32(c, info) << 8 |
+                           tgsi.getSrc(1).getValueU32(c, info));
+         } else {
+            src1 = fetchSrc(1, c);
+            src2 = fetchSrc(2, c);
+            mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1);
+         }
          mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1);
       }
       break;
@@ -3295,10 +3371,21 @@
          clipVtx[c] = getScratch();
    }
 
-   if (prog->getType() == Program::TYPE_FRAGMENT) {
+   switch (prog->getType()) {
+   case Program::TYPE_TESSELLATION_CONTROL:
+      outBase = mkOp2v(
+         OP_SUB, TYPE_U32, getSSA(),
+         mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
+         mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
+      break;
+   case Program::TYPE_FRAGMENT: {
       Symbol *sv = mkSysVal(SV_POSITION, 3);
       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
       mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
+      break;
+   }
+   default:
+      break;
    }
 
    if (info->io.viewportId >= 0)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,8 +29,8 @@
 #include <deque>
 #include <list>
 #include <vector>
-#include <tr1/unordered_set>
 
+#include "codegen/unordered_set.h"
 #include "codegen/nv50_ir_util.h"
 #include "codegen/nv50_ir_graph.h"
 
@@ -106,6 +106,7 @@
    OP_MEMBAR, // memory barrier (mfence, lfence, sfence)
    OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
    OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
+   OP_AFETCH, // fetch base address of shader input (a[%r1+0x10])
    OP_EXPORT,
    OP_LINTERP,
    OP_PINTERP,
@@ -372,7 +373,8 @@
    SV_SAMPLE_INDEX,
    SV_SAMPLE_POS,
    SV_SAMPLE_MASK,
-   SV_TESS_FACTOR,
+   SV_TESS_OUTER,
+   SV_TESS_INNER,
    SV_TESS_COORD,
    SV_TID,
    SV_CTAID,
@@ -583,10 +585,10 @@
 
    static inline Value *get(Iterator&);
 
-   std::tr1::unordered_set<ValueRef *> uses;
+   unordered_set<ValueRef *> uses;
    std::list<ValueDef *> defs;
-   typedef std::tr1::unordered_set<ValueRef *>::iterator UseIterator;
-   typedef std::tr1::unordered_set<ValueRef *>::const_iterator UseCIterator;
+   typedef unordered_set<ValueRef *>::iterator UseIterator;
+   typedef unordered_set<ValueRef *>::const_iterator UseCIterator;
    typedef std::list<ValueDef *>::iterator DefIterator;
    typedef std::list<ValueDef *>::const_iterator DefCIterator;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -176,7 +176,7 @@
                            i->getSrc(0), i->getSrc(1));
    i->setSrc(0, tmp);
    i->setSrc(1, NULL);
-   return TRUE;
+   return true;
 }
 
 //
@@ -193,100 +193,16 @@
       checkPredicate(i);
 
    switch (i->op) {
-   case OP_TEX:
-   case OP_TXB:
-   case OP_TXL:
-   case OP_TXF:
-   case OP_TXG:
-      return handleTEX(i->asTex());
-   case OP_TXD:
-      return handleTXD(i->asTex());
-   case OP_TXLQ:
-      return handleTXLQ(i->asTex());
-   case OP_TXQ:
-      return handleTXQ(i->asTex());
-   case OP_EX2:
-      bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0));
-      i->setSrc(0, i->getDef(0));
-      break;
-   case OP_POW:
-      return handlePOW(i);
-   case OP_DIV:
-      return handleDIV(i);
-   case OP_MOD:
-      return handleMOD(i);
-   case OP_SQRT:
-      return handleSQRT(i);
-   case OP_EXPORT:
-      return handleEXPORT(i);
    case OP_PFETCH:
       return handlePFETCH(i);
-   case OP_EMIT:
-   case OP_RESTART:
-      return handleOUT(i);
-   case OP_RDSV:
-      return handleRDSV(i);
-   case OP_WRSV:
-      return handleWRSV(i);
-   case OP_LOAD:
-      if (i->src(0).getFile() == FILE_SHADER_INPUT) {
-         if (prog->getType() == Program::TYPE_COMPUTE) {
-            i->getSrc(0)->reg.file = FILE_MEMORY_CONST;
-            i->getSrc(0)->reg.fileIndex = 0;
-         } else
-         if (prog->getType() == Program::TYPE_GEOMETRY &&
-             i->src(0).isIndirect(0)) {
-            // XXX: this assumes vec4 units
-            Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
-                                    i->getIndirect(0, 0), bld.mkImm(4));
-            i->setIndirect(0, 0, ptr);
-            i->op = OP_VFETCH;
-         } else {
-            i->op = OP_VFETCH;
-            assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
-         }
-      } else if (i->src(0).getFile() == FILE_MEMORY_CONST) {
-         if (i->src(0).isIndirect(1)) {
-            Value *ptr;
-            if (i->src(0).isIndirect(0))
-               ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(),
-                                i->getIndirect(0, 1), bld.mkImm(0x1010),
-                                i->getIndirect(0, 0));
-            else
-               ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
-                                i->getIndirect(0, 1), bld.mkImm(16));
-            i->setIndirect(0, 1, NULL);
-            i->setIndirect(0, 0, ptr);
-            i->subOp = NV50_IR_SUBOP_LDC_IS;
-         }
-      }
-      break;
-   case OP_ATOM:
-   {
-      const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL;
-      handleATOM(i);
-      handleCasExch(i, cctl);
-   }
-      break;
-   case OP_SULDB:
-   case OP_SULDP:
-   case OP_SUSTB:
-   case OP_SUSTP:
-   case OP_SUREDB:
-   case OP_SUREDP:
-      handleSurfaceOpNVE4(i->asTex());
-      break;
    case OP_DFDX:
    case OP_DFDY:
-      handleDFDX(i);
-      break;
+      return handleDFDX(i);
    case OP_POPCNT:
-      handlePOPCNT(i);
-      break;
+      return handlePOPCNT(i);
    default:
-      break;
+      return NVC0LoweringPass::visit(i);
    }
-   return true;
 }
 
 } // namespace nv50_ir
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -202,7 +202,10 @@
    Program *prog = fn->getProgram();
 
    r63 = new_LValue(fn, FILE_GPR);
-   r63->reg.data.id = 63;
+   if (prog->maxGPR < 63)
+      r63->reg.data.id = 63;
+   else
+      r63->reg.data.id = 127;
 
    // this is actually per-program, but we can do it all on visiting main()
    std::list<Instruction *> *outWrites =
@@ -888,7 +891,7 @@
       }
    }
    bld.setPosition(joinBB, false);
-   bld.mkOp(OP_JOIN, TYPE_NONE, NULL);
+   bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
    return true;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -224,7 +224,7 @@
       const Instruction *texi,
       const Instruction *insn,
       std::list<TexUse> &uses,
-      std::tr1::unordered_set<const Instruction *>& visited)
+      unordered_set<const Instruction *>& visited)
 {
    for (int d = 0; insn->defExists(d); ++d) {
       Value *v = insn->getDef(d);
@@ -323,7 +323,7 @@
    if (!uses)
       return false;
    for (size_t i = 0; i < texes.size(); ++i) {
-      std::tr1::unordered_set<const Instruction *> visited;
+      unordered_set<const Instruction *> visited;
       findFirstUses(texes[i], texes[i], uses[i], visited);
    }
 
@@ -559,6 +559,12 @@
       } else
       if (i->isNop()) {
          bb->remove(i);
+      } else
+      if (i->op == OP_BAR && i->subOp == NV50_IR_SUBOP_BAR_SYNC &&
+          prog->getType() != Program::TYPE_COMPUTE) {
+         // It seems like barriers are never required for tessellation since
+         // the warp size is 32, and there are always at most 32 tcs threads.
+         bb->remove(i);
       } else {
          // TODO: Move this to before register allocation for operations that
          // need the $c register !
@@ -1521,6 +1527,10 @@
          i->op = OP_MOV;
          i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0));
       }
+      if (sv == SV_VERTEX_COUNT) {
+         bld.setPosition(i, true);
+         bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0), bld.mkImm(0x808));
+      }
       return true;
    }
 
@@ -1590,7 +1600,7 @@
       ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK;
       break;
    default:
-      if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
+      if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch)
          vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
       ld = bld.mkFetch(i->getDef(0), i->dType,
                        FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
@@ -1741,6 +1751,7 @@
 bool
 NVC0LoweringPass::visit(Instruction *i)
 {
+   bool ret = true;
    bld.setPosition(i, false);
 
    if (i->cc != CC_ALWAYS)
@@ -1772,7 +1783,8 @@
    case OP_SQRT:
       return handleSQRT(i);
    case OP_EXPORT:
-      return handleEXPORT(i);
+      ret = handleEXPORT(i);
+      break;
    case OP_EMIT:
    case OP_RESTART:
       return handleOUT(i);
@@ -1811,6 +1823,9 @@
             i->setIndirect(0, 0, ptr);
             i->subOp = NV50_IR_SUBOP_LDC_IS;
          }
+      } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
+         assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
+         i->op = OP_VFETCH;
       }
       break;
    case OP_ATOM:
@@ -1832,7 +1847,20 @@
    default:
       break;
    }
-   return true;
+
+   /* Kepler+ has a special opcode to compute a new base address to be used
+    * for indirect loads.
+    */
+   if (targ->getChipset() >= NVISA_GK104_CHIPSET && !i->perPatch &&
+       (i->op == OP_VFETCH || i->op == OP_EXPORT) && i->src(0).isIndirect(0)) {
+      Instruction *afetch = bld.mkOp1(OP_AFETCH, TYPE_U32, bld.getSSA(),
+                                      cloneShallow(func, i->getSrc(0)));
+      afetch->setIndirect(0, 0, i->getIndirect(0, 0));
+      i->src(0).get()->reg.data.offset = 0;
+      i->setIndirect(0, 0, afetch->getDef(0));
+   }
+
+   return ret;
 }
 
 bool
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h	2015-09-16 14:36:09.000000000 +0000
@@ -20,8 +20,6 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <tr1/unordered_set>
-
 #include "codegen/nv50_ir.h"
 #include "codegen/nv50_ir_build_util.h"
 
@@ -73,7 +71,7 @@
    inline bool insnDominatedBy(const Instruction *, const Instruction *) const;
    void findFirstUses(const Instruction *tex, const Instruction *def,
                       std::list<TexUse>&,
-                      std::tr1::unordered_set<const Instruction *>&);
+                      unordered_set<const Instruction *>&);
    void findOverwritingDefs(const Instruction *tex, Instruction *insn,
                             const BasicBlock *term,
                             std::list<TexUse>&);
@@ -111,10 +109,11 @@
 
    void checkPredicate(Instruction *);
 
+   virtual bool visit(Instruction *);
+
 private:
    virtual bool visit(Function *);
    virtual bool visit(BasicBlock *);
-   virtual bool visit(Instruction *);
 
    void readTessCoord(LValue *dst, int c);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -281,7 +281,6 @@
 
    void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
 
-   // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
    CmpInstruction *findOriginForTestWithZero(Value *);
 
    unsigned int foldCount;
@@ -340,25 +339,33 @@
       return NULL;
    Instruction *insn = value->getInsn();
 
-   while (insn && insn->op != OP_SET) {
-      Instruction *next = NULL;
-      switch (insn->op) {
-      case OP_NEG:
-      case OP_ABS:
-      case OP_CVT:
-         next = insn->getSrc(0)->getInsn();
-         if (insn->sType != next->dType)
+   if (insn->asCmp() && insn->op != OP_SLCT)
+      return insn->asCmp();
+
+   /* Sometimes mov's will sneak in as a result of other folding. This gets
+    * cleaned up later.
+    */
+   if (insn->op == OP_MOV)
+      return findOriginForTestWithZero(insn->getSrc(0));
+
+   /* Deal with AND 1.0 here since nv50 can't fold into boolean float */
+   if (insn->op == OP_AND) {
+      int s = 0;
+      ImmediateValue imm;
+      if (!insn->src(s).getImmediate(imm)) {
+         s = 1;
+         if (!insn->src(s).getImmediate(imm))
             return NULL;
-         break;
-      case OP_MOV:
-         next = insn->getSrc(0)->getInsn();
-         break;
-      default:
-         return NULL;
       }
-      insn = next;
+      if (imm.reg.data.f32 != 1.0f)
+         return NULL;
+      /* TODO: Come up with a way to handle the condition being inverted */
+      if (insn->src(!s).mod != Modifier(0))
+         return NULL;
+      return findOriginForTestWithZero(insn->getSrc(!s));
    }
-   return insn ? insn->asCmp() : NULL;
+
+   return NULL;
 }
 
 void
@@ -965,33 +972,108 @@
 
    case OP_SET: // TODO: SET_AND,OR,XOR
    {
+      /* This optimizes the case where the output of a set is being compared
+       * to zero. Since the set can only produce 0/-1 (int) or 0/1 (float), we
+       * can be a lot cleverer in our comparison.
+       */
       CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
       CondCode cc, ccZ;
-      if (i->src(t).mod != Modifier(0))
-         return;
-      if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
+      if (imm0.reg.data.u32 != 0 || !si)
          return;
       cc = si->setCond;
       ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
+      // We do everything assuming var (cmp) 0, reverse the condition if 0 is
+      // first.
       if (s == 0)
          ccZ = reverseCondCode(ccZ);
+      // If there is a negative modifier, we need to undo that, by flipping
+      // the comparison to zero.
+      if (i->src(t).mod.neg())
+         ccZ = reverseCondCode(ccZ);
+      // If this is a signed comparison, we expect the input to be a regular
+      // boolean, i.e. 0/-1. However the rest of the logic assumes that true
+      // is positive, so just flip the sign.
+      if (i->sType == TYPE_S32) {
+         assert(!isFloatType(si->dType));
+         ccZ = reverseCondCode(ccZ);
+      }
       switch (ccZ) {
-      case CC_LT: cc = CC_FL; break;
-      case CC_GE: cc = CC_TR; break;
-      case CC_EQ: cc = inverseCondCode(cc); break;
-      case CC_LE: cc = inverseCondCode(cc); break;
-      case CC_GT: break;
-      case CC_NE: break;
+      case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true
+      case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true
+      case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool
+      case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool
+      case CC_GT: break; // bool > 0 -- bool
+      case CC_NE: break; // bool != 0 -- bool
       default:
          return;
       }
+
+      // Update the condition of this SET to be identical to the origin set,
+      // but with the updated condition code. The original SET should get
+      // DCE'd, ideally.
+      i->op = si->op;
       i->asCmp()->setCond = cc;
       i->setSrc(0, si->src(0));
       i->setSrc(1, si->src(1));
+      if (si->srcExists(2))
+         i->setSrc(2, si->src(2));
       i->sType = si->sType;
    }
       break;
 
+   case OP_AND:
+   {
+      Instruction *src = i->getSrc(t)->getInsn();
+      ImmediateValue imm1;
+      if (imm0.reg.data.u32 == 0) {
+         i->op = OP_MOV;
+         i->setSrc(0, new_ImmediateValue(prog, 0u));
+         i->src(0).mod = Modifier(0);
+         i->setSrc(1, NULL);
+      } else if (imm0.reg.data.u32 == ~0U) {
+         i->op = i->src(t).mod.getOp();
+         if (t) {
+            i->setSrc(0, i->getSrc(t));
+            i->src(0).mod = i->src(t).mod;
+         }
+         i->setSrc(1, NULL);
+      } else if (src->asCmp()) {
+         CmpInstruction *cmp = src->asCmp();
+         if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1)
+            return;
+         if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
+            return;
+         if (imm0.reg.data.f32 != 1.0)
+            return;
+         if (cmp->dType != TYPE_U32)
+            return;
+
+         cmp->dType = TYPE_F32;
+         if (i->src(t).mod != Modifier(0)) {
+            assert(i->src(t).mod == Modifier(NV50_IR_MOD_NOT));
+            i->src(t).mod = Modifier(0);
+            cmp->setCond = inverseCondCode(cmp->setCond);
+         }
+         i->op = OP_MOV;
+         i->setSrc(s, NULL);
+         if (t) {
+            i->setSrc(0, i->getSrc(t));
+            i->setSrc(t, NULL);
+         }
+      } else if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32) &&
+                 src->op == OP_SHR &&
+                 src->src(1).getImmediate(imm1) &&
+                 i->src(t).mod == Modifier(0) &&
+                 util_is_power_of_two(imm0.reg.data.u32 + 1)) {
+         // low byte = offset, high byte = width
+         uint32_t ext = (util_last_bit(imm0.reg.data.u32) << 8) | imm1.reg.data.u32;
+         i->op = OP_EXTBF;
+         i->setSrc(0, src->getSrc(0));
+         i->setSrc(1, new_ImmediateValue(prog, ext));
+      }
+   }
+      break;
+
    case OP_SHL:
    {
       if (s != 1 || i->src(0).mod != Modifier(0))
@@ -1050,6 +1132,84 @@
       i->op = OP_MOV;
       break;
    }
+   case OP_CVT: {
+      Storage res;
+
+      // TODO: handle 64-bit values properly
+      if (typeSizeof(i->dType) == 8 || typeSizeof(i->sType) == 8)
+         return;
+
+      // TODO: handle single byte/word extractions
+      if (i->subOp)
+         return;
+
+      bld.setPosition(i, true); /* make sure bld is init'ed */
+
+#define CASE(type, dst, fmin, fmax, imin, imax, umin, umax) \
+   case type: \
+      switch (i->sType) { \
+      case TYPE_F32: \
+         res.data.dst = util_iround(i->saturate ? \
+                                    CLAMP(imm0.reg.data.f32, fmin, fmax) : \
+                                    imm0.reg.data.f32); \
+         break; \
+      case TYPE_S32: \
+         res.data.dst = i->saturate ? \
+                        CLAMP(imm0.reg.data.s32, imin, imax) : \
+                        imm0.reg.data.s32; \
+         break; \
+      case TYPE_U32: \
+         res.data.dst = i->saturate ? \
+                        CLAMP(imm0.reg.data.u32, umin, umax) : \
+                        imm0.reg.data.u32; \
+         break; \
+      case TYPE_S16: \
+         res.data.dst = i->saturate ? \
+                        CLAMP(imm0.reg.data.s16, imin, imax) : \
+                        imm0.reg.data.s16; \
+         break; \
+      case TYPE_U16: \
+         res.data.dst = i->saturate ? \
+                        CLAMP(imm0.reg.data.u16, umin, umax) : \
+                        imm0.reg.data.u16; \
+         break; \
+      default: return; \
+      } \
+      i->setSrc(0, bld.mkImm(res.data.dst)); \
+      break
+
+      switch(i->dType) {
+      CASE(TYPE_U16, u16, 0, UINT16_MAX, 0, UINT16_MAX, 0, UINT16_MAX);
+      CASE(TYPE_S16, s16, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, 0, INT16_MAX);
+      CASE(TYPE_U32, u32, 0, UINT32_MAX, 0, INT32_MAX, 0, UINT32_MAX);
+      CASE(TYPE_S32, s32, INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX, 0, INT32_MAX);
+      case TYPE_F32:
+         switch (i->sType) {
+         case TYPE_F32:
+            res.data.f32 = i->saturate ?
+               CLAMP(imm0.reg.data.f32, 0.0f, 1.0f) :
+               imm0.reg.data.f32;
+            break;
+         case TYPE_U16: res.data.f32 = (float) imm0.reg.data.u16; break;
+         case TYPE_U32: res.data.f32 = (float) imm0.reg.data.u32; break;
+         case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break;
+         case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break;
+         default:
+            return;
+         }
+         i->setSrc(0, bld.mkImm(res.data.f32));
+         break;
+      default:
+         return;
+      }
+#undef CASE
+
+      i->setType(i->dType); /* Remove i->sType, which we don't need anymore */
+      i->op = OP_MOV;
+      i->saturate = 0;
+      i->src(0).mod = Modifier(0); /* Clear the already applied modifier */
+      break;
+   }
    default:
       return;
    }
@@ -1156,7 +1316,8 @@
    void handleRCP(Instruction *);
    void handleSLCT(Instruction *);
    void handleLOGOP(Instruction *);
-   void handleCVT(Instruction *);
+   void handleCVT_NEG(Instruction *);
+   void handleCVT_EXTBF(Instruction *);
    void handleSUCLAMP(Instruction *);
 
    BuildUtil bld;
@@ -1407,12 +1568,12 @@
 // nv50:
 //  F2I(NEG(I2F(ABS(SET))))
 void
-AlgebraicOpt::handleCVT(Instruction *cvt)
+AlgebraicOpt::handleCVT_NEG(Instruction *cvt)
 {
+   Instruction *insn = cvt->getSrc(0)->getInsn();
    if (cvt->sType != TYPE_F32 ||
        cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0))
       return;
-   Instruction *insn = cvt->getSrc(0)->getInsn();
    if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32)
       return;
    if (insn->src(0).mod != Modifier(0))
@@ -1442,6 +1603,104 @@
    delete_Instruction(prog, cvt);
 }
 
+// Some shaders extract packed bytes out of words and convert them to
+// e.g. float. The Fermi+ CVT instruction can extract those directly, as can
+// nv50 for word sizes.
+//
+// CVT(EXTBF(x, byte/word))
+// CVT(AND(bytemask, x))
+// CVT(AND(bytemask, SHR(x, 8/16/24)))
+// CVT(SHR(x, 16/24))
+void
+AlgebraicOpt::handleCVT_EXTBF(Instruction *cvt)
+{
+   Instruction *insn = cvt->getSrc(0)->getInsn();
+   ImmediateValue imm;
+   Value *arg = NULL;
+   unsigned width, offset;
+   if ((cvt->sType != TYPE_U32 && cvt->sType != TYPE_S32) || !insn)
+      return;
+   if (insn->op == OP_EXTBF && insn->src(1).getImmediate(imm)) {
+      width = (imm.reg.data.u32 >> 8) & 0xff;
+      offset = imm.reg.data.u32 & 0xff;
+      arg = insn->getSrc(0);
+
+      if (width != 8 && width != 16)
+         return;
+      if (width == 8 && offset & 0x7)
+         return;
+      if (width == 16 && offset & 0xf)
+         return;
+   } else if (insn->op == OP_AND) {
+      int s;
+      if (insn->src(0).getImmediate(imm))
+         s = 0;
+      else if (insn->src(1).getImmediate(imm))
+         s = 1;
+      else
+         return;
+
+      if (imm.reg.data.u32 == 0xff)
+         width = 8;
+      else if (imm.reg.data.u32 == 0xffff)
+         width = 16;
+      else
+         return;
+
+      arg = insn->getSrc(!s);
+      Instruction *shift = arg->getInsn();
+      offset = 0;
+      if (shift && shift->op == OP_SHR &&
+          shift->sType == cvt->sType &&
+          shift->src(1).getImmediate(imm) &&
+          ((width == 8 && (imm.reg.data.u32 & 0x7) == 0) ||
+           (width == 16 && (imm.reg.data.u32 & 0xf) == 0))) {
+         arg = shift->getSrc(0);
+         offset = imm.reg.data.u32;
+      }
+   } else if (insn->op == OP_SHR &&
+              insn->sType == cvt->sType &&
+              insn->src(1).getImmediate(imm)) {
+      arg = insn->getSrc(0);
+      if (imm.reg.data.u32 == 24) {
+         width = 8;
+         offset = 24;
+      } else if (imm.reg.data.u32 == 16) {
+         width = 16;
+         offset = 16;
+      } else {
+         return;
+      }
+   }
+
+   if (!arg)
+      return;
+
+   // Irrespective of what came earlier, we can undo a shift on the argument
+   // by adjusting the offset.
+   Instruction *shift = arg->getInsn();
+   if (shift && shift->op == OP_SHL &&
+       shift->src(1).getImmediate(imm) &&
+       ((width == 8 && (imm.reg.data.u32 & 0x7) == 0) ||
+        (width == 16 && (imm.reg.data.u32 & 0xf) == 0)) &&
+       imm.reg.data.u32 <= offset) {
+      arg = shift->getSrc(0);
+      offset -= imm.reg.data.u32;
+   }
+
+   // The unpackSnorm lowering still leaves a few shifts behind, but it's too
+   // annoying to detect them.
+
+   if (width == 8) {
+      cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U8 : TYPE_S8;
+   } else {
+      assert(width == 16);
+      cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U16 : TYPE_S16;
+   }
+   cvt->setSrc(0, arg);
+   cvt->subOp = offset >> 3;
+}
+
 // SUCLAMP dst, (ADD b imm), k, 0 -> SUCLAMP dst, b, k, imm (if imm fits s6)
 void
 AlgebraicOpt::handleSUCLAMP(Instruction *insn)
@@ -1512,7 +1771,9 @@
          handleLOGOP(i);
          break;
       case OP_CVT:
-         handleCVT(i);
+         handleCVT_NEG(i);
+         if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32))
+             handleCVT_EXTBF(i);
          break;
       case OP_SUCLAMP:
          handleSUCLAMP(i);
@@ -2029,6 +2290,8 @@
       }
       if (ldst->getPredicate()) // TODO: handle predicated ld/st
          continue;
+      if (ldst->perPatch) // TODO: create separate per-patch lists
+         continue;
 
       if (isLoad) {
          DataFile file = ldst->src(0).getFile();
@@ -2339,6 +2602,10 @@
              !isFloatType(i->dType))
             break;
 
+         if (i->getDef(0)->reg.data.id >= 64 ||
+             i->getSrc(0)->reg.data.id >= 64)
+            break;
+
          def = i->getSrc(1)->getInsn();
          if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
             vtmp = i->getSrc(1);
@@ -2462,6 +2729,8 @@
       case FILE_MEMORY_CONST:
       case FILE_SHADER_INPUT:
          return true;
+      case FILE_SHADER_OUTPUT:
+         return bb->getProgram()->getType() == Program::TYPE_TESSELLATION_EVAL;
       default:
          return false;
       }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -135,6 +135,7 @@
    "membar",
    "vfetch",
    "pfetch",
+   "afetch",
    "export",
    "linterp",
    "pinterp",
@@ -258,7 +259,8 @@
    "SAMPLE_INDEX",
    "SAMPLE_POS",
    "SAMPLE_MASK",
-   "TESS_FACTOR",
+   "TESS_OUTER",
+   "TESS_INNER",
    "TESS_COORD",
    "TID",
    "CTAID",
@@ -409,7 +411,7 @@
    case TYPE_U64:
    case TYPE_S64:
    default:
-      PRINT("0x%016"PRIx64, reg.data.u64);
+      PRINT("0x%016" PRIx64, reg.data.u64);
       break;
    }
    return pos;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp	2014-11-01 15:59:41.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -25,7 +25,7 @@
 
 #include <stack>
 #include <limits>
-#include <tr1/unordered_set>
+#include <tr1/unordered_map>
 
 namespace nv50_ir {
 
@@ -223,6 +223,7 @@
    private:
       virtual bool visit(BasicBlock *);
       inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
+      inline void splitEdges(BasicBlock *b);
    };
 
    class ArgumentMovesPass : public Pass {
@@ -346,28 +347,55 @@
    return (n == 2);
 }
 
-// For each operand of each PHI in b, generate a new value by inserting a MOV
-// at the end of the block it is coming from and replace the operand with its
-// result. This eliminates liveness conflicts and enables us to let values be
-// copied to the right register if such a conflict exists nonetheless.
+struct PhiMapHash {
+   size_t operator()(const std::pair<Instruction *, BasicBlock *>& val) const {
+      return std::tr1::hash<Instruction*>()(val.first) * 31 +
+         std::tr1::hash<BasicBlock*>()(val.second);
+   }
+};
+
+typedef std::tr1::unordered_map<
+   std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap;
+
+// Critical edges need to be split up so that work can be inserted along
+// specific edge transitions. Unfortunately manipulating incident edges into a
+// BB invalidates all the PHI nodes since their sources are implicitly ordered
+// by incident edge order.
 //
-// These MOVs are also crucial in making sure the live intervals of phi srces
-// are extended until the end of the loop, since they are not included in the
-// live-in sets.
-bool
-RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+// TODO: Make it so that that is not the case, and PHI nodes store pointers to
+// the original BBs.
+void
+RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb)
 {
-   Instruction *phi, *mov;
    BasicBlock *pb, *pn;
-
+   Instruction *phi;
+   Graph::EdgeIterator ei;
    std::stack<BasicBlock *> stack;
+   int j = 0;
 
-   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next()) {
       pb = BasicBlock::get(ei.getNode());
       assert(pb);
       if (needNewElseBlock(bb, pb))
          stack.push(pb);
    }
+
+   // No critical edges were found, no need to perform any work.
+   if (stack.empty())
+      return;
+
+   // We're about to, potentially, reorder the inbound edges. This means that
+   // we need to hold on to the (phi, bb) -> src mapping, and fix up the phi
+   // nodes after the graph has been modified.
+   PhiMap phis;
+
+   j = 0;
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
+      pb = BasicBlock::get(ei.getNode());
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next)
+         phis.insert(std::make_pair(std::make_pair(phi, pb), phi->getSrc(j)));
+   }
+
    while (!stack.empty()) {
       pb = stack.top();
       pn = new BasicBlock(func);
@@ -380,12 +408,47 @@
       assert(pb->getExit()->op != OP_CALL);
       if (pb->getExit()->asFlow()->target.bb == bb)
          pb->getExit()->asFlow()->target.bb = pn;
+
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+         PhiMap::iterator it = phis.find(std::make_pair(phi, pb));
+         assert(it != phis.end());
+         phis.insert(std::make_pair(std::make_pair(phi, pn), it->second));
+         phis.erase(it);
+      }
    }
 
+   // Now go through and fix up all of the phi node sources.
+   j = 0;
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
+      pb = BasicBlock::get(ei.getNode());
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+         PhiMap::const_iterator it = phis.find(std::make_pair(phi, pb));
+         assert(it != phis.end());
+
+         phi->setSrc(j, it->second);
+      }
+   }
+}
+
+// For each operand of each PHI in b, generate a new value by inserting a MOV
+// at the end of the block it is coming from and replace the operand with its
+// result. This eliminates liveness conflicts and enables us to let values be
+// copied to the right register if such a conflict exists nonetheless.
+//
+// These MOVs are also crucial in making sure the live intervals of phi srces
+// are extended until the end of the loop, since they are not included in the
+// live-in sets.
+bool
+RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+{
+   Instruction *phi, *mov;
+
+   splitEdges(bb);
+
    // insert MOVs (phi->src(j) should stem from j-th in-BB)
    int j = 0;
    for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
-      pb = BasicBlock::get(ei.getNode());
+      BasicBlock *pb = BasicBlock::get(ei.getNode());
       if (!pb->isTerminated())
          pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
 
@@ -1551,7 +1614,7 @@
       // Keep track of which instructions to delete later. Deleting them
       // inside the loop is unsafe since a single instruction may have
       // multiple destinations that all need to be spilled (like OP_SPLIT).
-      std::tr1::unordered_set<Instruction *> to_del;
+      unordered_set<Instruction *> to_del;
 
       for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end();
            ++d) {
@@ -1593,7 +1656,7 @@
          }
       }
 
-      for (std::tr1::unordered_set<Instruction *>::const_iterator it = to_del.begin();
+      for (unordered_set<Instruction *>::const_iterator it = to_del.begin();
            it != to_del.end(); ++it)
          delete_Instruction(func->getProgram(), *it);
    }
@@ -2066,6 +2129,8 @@
          condenseDefs(i);
          if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8)
             addHazard(i, i->src(0).getIndirect(0));
+         if (i->src(0).isIndirect(1) && typeSizeof(i->dType) >= 8)
+            addHazard(i, i->src(0).getIndirect(1));
       } else
       if (i->op == OP_UNION ||
           i->op == OP_MERGE ||
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -41,7 +41,7 @@
    0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
    0, 0, 0,                // PRERET,CONT,BREAK
    0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
-   1, 1, 2, 1, 2,          // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
+   1, 1, 1, 2, 1, 2,       // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
    1, 1,                   // EMIT, RESTART
    1, 1, 1,                // TEX, TXB, TXL,
    1, 1, 1, 1, 1, 1, 2,    // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
@@ -96,8 +96,8 @@
    OPCLASS_FLOW, OPCLASS_FLOW,
    // MEMBAR
    OPCLASS_CONTROL,
-   // VFETCH, PFETCH, EXPORT
-   OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
+   // VFETCH, PFETCH, AFETCH, EXPORT
+   OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
    // LINTERP, PINTERP
    OPCLASS_SFU, OPCLASS_SFU,
    // EMIT, RESTART
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -118,7 +118,7 @@
    static const uint32_t shortForm[(OP_LAST + 31) / 32] =
    {
       // MOV,ADD,SUB,MUL,MAD,SAD,L/PINTERP,RCP,TEX,TXF
-      0x00014e40, 0x00000040, 0x00000498, 0x00000000
+      0x00014e40, 0x00000040, 0x00000930, 0x00000000
    };
    static const operation noDestList[] =
    {
@@ -416,6 +416,8 @@
       return false;
    case OP_SAD:
       return ty == TYPE_S32;
+   case OP_SET:
+      return !isFloatType(ty);
    default:
       return true;
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -286,7 +286,8 @@
    case SV_CLIP_DISTANCE:  return 0x2c0 + idx * 4;
    case SV_POINT_COORD:    return 0x2e0 + idx * 4;
    case SV_FACE:           return 0x3fc;
-   case SV_TESS_FACTOR:    return 0x000 + idx * 4;
+   case SV_TESS_OUTER:     return 0x000 + idx * 4;
+   case SV_TESS_INNER:     return 0x010 + idx * 4;
    case SV_TESS_COORD:     return 0x2f0 + idx * 4;
    case SV_NTID:           return kepler ? (0x00 + idx * 4) : ~0;
    case SV_NCTAID:         return kepler ? (0x0c + idx * 4) : ~0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/unordered_set.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/unordered_set.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/codegen/unordered_set.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/codegen/unordered_set.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,48 @@
+#ifndef __NV50_UNORDERED_SET_H__
+#define __NV50_UNORDERED_SET_H__
+
+#if (__cplusplus >= 201103L) || defined(ANDROID)
+#include <unordered_set>
+#else
+#include <tr1/unordered_set>
+#endif
+
+namespace nv50_ir {
+
+#if __cplusplus >= 201103L
+using std::unordered_set;
+#elif !defined(ANDROID)
+using std::tr1::unordered_set;
+#else // Android release before lollipop
+using std::isfinite;
+typedef std::tr1::unordered_set<void *> voidptr_unordered_set;
+
+template <typename V>
+class unordered_set : public voidptr_unordered_set {
+  public:
+    typedef voidptr_unordered_set _base;
+    typedef _base::iterator _biterator;
+    typedef _base::const_iterator const_biterator;
+
+    class iterator : public _biterator {
+      public:
+        iterator(const _biterator & i) : _biterator(i) {}
+        V operator*() const { return reinterpret_cast<V>(*_biterator(*this)); }
+    };
+    class const_iterator : public const_biterator {
+      public:
+        const_iterator(const iterator & i) : const_biterator(i) {}
+        const_iterator(const const_biterator & i) : const_biterator(i) {}
+        const V operator*() const { return reinterpret_cast<const V>(*const_biterator(*this)); }
+    };
+
+    iterator begin() { return _base::begin(); }
+    iterator end() { return _base::end(); }
+    const_iterator begin() const { return _base::begin(); }
+    const_iterator end() const { return _base::end(); }
+};
+#endif
+
+} // namespace nv50_ir
+
+#endif // __NV50_UNORDERED_SET_H__
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/Makefile.am	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -20,8 +20,6 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
-AUTOMAKE_OPTIONS = subdir-objects
-
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
@@ -48,7 +46,7 @@
 
 nouveau_compiler_LDADD = \
 	libnouveau.la \
-	../../auxiliary/libgallium.la \
+	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
 	$(top_builddir)/src/util/libmesautil.la \
 	$(GALLIUM_COMMON_LIB_DEPS)
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/Makefile.sources	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/Makefile.sources	2015-09-16 14:36:09.000000000 +0000
@@ -121,7 +121,8 @@
 	codegen/nv50_ir_target_nv50.cpp \
 	codegen/nv50_ir_target_nv50.h \
 	codegen/nv50_ir_util.cpp \
-	codegen/nv50_ir_util.h
+	codegen/nv50_ir_util.h \
+	codegen/unordered_set.h
 
 NVC0_CODEGEN_SOURCES := \
 	codegen/nv50_ir_emit_gk110.cpp \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_buffer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_buffer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_buffer.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_buffer.c	2015-09-16 14:36:09.000000000 +0000
@@ -22,13 +22,13 @@
    uint32_t offset;
 };
 
-static INLINE struct nouveau_transfer *
+static inline struct nouveau_transfer *
 nouveau_transfer(struct pipe_transfer *transfer)
 {
    return (struct nouveau_transfer *)transfer;
 }
 
-static INLINE boolean
+static inline bool
 nouveau_buffer_malloc(struct nv04_resource *buf)
 {
    if (!buf->data)
@@ -36,16 +36,11 @@
    return !!buf->data;
 }
 
-static INLINE boolean
+static inline bool
 nouveau_buffer_allocate(struct nouveau_screen *screen,
                         struct nv04_resource *buf, unsigned domain)
 {
-   uint32_t size = buf->base.width0;
-
-   if (buf->base.bind & (PIPE_BIND_CONSTANT_BUFFER |
-                         PIPE_BIND_COMPUTE_RESOURCE |
-                         PIPE_BIND_SHADER_RESOURCE))
-      size = align(size, 0x100);
+   uint32_t size = align(buf->base.width0, 0x100);
 
    if (domain == NOUVEAU_BO_VRAM) {
       buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size,
@@ -58,12 +53,12 @@
       buf->mm = nouveau_mm_allocate(screen->mm_GART, size,
                                     &buf->bo, &buf->offset);
       if (!buf->bo)
-         return FALSE;
+         return false;
       NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_sys, buf->base.width0);
    } else {
       assert(domain == 0);
       if (!nouveau_buffer_malloc(buf))
-         return FALSE;
+         return false;
    }
    buf->domain = domain;
    if (buf->bo)
@@ -71,10 +66,10 @@
 
    util_range_set_empty(&buf->valid_buffer_range);
 
-   return TRUE;
+   return true;
 }
 
-static INLINE void
+static inline void
 release_allocation(struct nouveau_mm_allocation **mm,
                    struct nouveau_fence *fence)
 {
@@ -82,7 +77,7 @@
    (*mm) = NULL;
 }
 
-INLINE void
+inline void
 nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
 {
    nouveau_bo_ref(NULL, &buf->bo);
@@ -98,7 +93,7 @@
    buf->domain = 0;
 }
 
-static INLINE boolean
+static inline bool
 nouveau_buffer_reallocate(struct nouveau_screen *screen,
                           struct nv04_resource *buf, unsigned domain)
 {
@@ -139,13 +134,13 @@
  */
 static uint8_t *
 nouveau_transfer_staging(struct nouveau_context *nv,
-                         struct nouveau_transfer *tx, boolean permit_pb)
+                         struct nouveau_transfer *tx, bool permit_pb)
 {
    const unsigned adj = tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK;
    const unsigned size = align(tx->base.box.width, 4) + adj;
 
    if (!nv->push_data)
-      permit_pb = FALSE;
+      permit_pb = false;
 
    if ((size <= NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD) && permit_pb) {
       tx->map = align_malloc(size, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
@@ -167,7 +162,7 @@
  * buffer. Also updates buf->data if present.
  *
  * Maybe just migrate to GART right away if we actually need to do this. */
-static boolean
+static bool
 nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx)
 {
    struct nv04_resource *buf = nv04_resource(tx->base.resource);
@@ -180,12 +175,12 @@
                  buf->bo, buf->offset + base, buf->domain, size);
 
    if (nouveau_bo_wait(tx->bo, NOUVEAU_BO_RD, nv->client))
-      return FALSE;
+      return false;
 
    if (buf->data)
       memcpy(buf->data + base, tx->map, size);
 
-   return TRUE;
+   return true;
 }
 
 static void
@@ -195,7 +190,7 @@
    struct nv04_resource *buf = nv04_resource(tx->base.resource);
    uint8_t *data = tx->map + offset;
    const unsigned base = tx->base.box.x + offset;
-   const boolean can_cb = !((base | size) & 3);
+   const bool can_cb = !((base | size) & 3);
 
    if (buf->data)
       memcpy(data, buf->data + base, size);
@@ -211,8 +206,8 @@
       nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
                     tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
    else
-   if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb)
-      nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0,
+   if (nv->push_cb && can_cb)
+      nv->push_cb(nv, buf,
                   base, size / 4, (const uint32_t *)data);
    else
       nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
@@ -224,32 +219,32 @@
 /* Does a CPU wait for the buffer's backing data to become reliably accessible
  * for write/read by waiting on the buffer's relevant fences.
  */
-static INLINE boolean
+static inline bool
 nouveau_buffer_sync(struct nv04_resource *buf, unsigned rw)
 {
    if (rw == PIPE_TRANSFER_READ) {
       if (!buf->fence_wr)
-         return TRUE;
+         return true;
       NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
                            !nouveau_fence_signalled(buf->fence_wr));
       if (!nouveau_fence_wait(buf->fence_wr))
-         return FALSE;
+         return false;
    } else {
       if (!buf->fence)
-         return TRUE;
+         return true;
       NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
                            !nouveau_fence_signalled(buf->fence));
       if (!nouveau_fence_wait(buf->fence))
-         return FALSE;
+         return false;
 
       nouveau_fence_ref(NULL, &buf->fence);
    }
    nouveau_fence_ref(NULL, &buf->fence_wr);
 
-   return TRUE;
+   return true;
 }
 
-static INLINE boolean
+static inline bool
 nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
 {
    if (rw == PIPE_TRANSFER_READ)
@@ -258,7 +253,7 @@
       return (buf->fence && !nouveau_fence_signalled(buf->fence));
 }
 
-static INLINE void
+static inline void
 nouveau_buffer_transfer_init(struct nouveau_transfer *tx,
                              struct pipe_resource *resource,
                              const struct pipe_box *box,
@@ -280,7 +275,7 @@
    tx->map = NULL;
 }
 
-static INLINE void
+static inline void
 nouveau_buffer_transfer_del(struct nouveau_context *nv,
                             struct nouveau_transfer *tx)
 {
@@ -297,11 +292,11 @@
 }
 
 /* Creates a cache in system memory of the buffer data. */
-static boolean
+static bool
 nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
 {
    struct nouveau_transfer tx;
-   boolean ret;
+   bool ret;
    tx.base.resource = &buf->base;
    tx.base.box.x = 0;
    tx.base.box.width = buf->base.width0;
@@ -310,13 +305,13 @@
 
    if (!buf->data)
       if (!nouveau_buffer_malloc(buf))
-         return FALSE;
+         return false;
    if (!(buf->status & NOUVEAU_BUFFER_STATUS_DIRTY))
-      return TRUE;
+      return true;
    nv->stats.buf_cache_count++;
 
-   if (!nouveau_transfer_staging(nv, &tx, FALSE))
-      return FALSE;
+   if (!nouveau_transfer_staging(nv, &tx, false))
+      return false;
 
    ret = nouveau_transfer_read(nv, &tx);
    if (ret) {
@@ -335,15 +330,15 @@
  * resource. This can be useful if we would otherwise have to wait for a read
  * operation to complete on this data.
  */
-static INLINE boolean
+static inline bool
 nouveau_buffer_should_discard(struct nv04_resource *buf, unsigned usage)
 {
    if (!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE))
-      return FALSE;
+      return false;
    if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
-      return FALSE;
+      return false;
    if (unlikely(usage & PIPE_TRANSFER_PERSISTENT))
-      return FALSE;
+      return false;
    return buf->mm && nouveau_buffer_busy(buf, PIPE_TRANSFER_WRITE);
 }
 
@@ -413,7 +408,7 @@
           * back into VRAM on unmap. */
          if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
             buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
-         nouveau_transfer_staging(nv, tx, TRUE);
+         nouveau_transfer_staging(nv, tx, true);
       } else {
          if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
             /* The GPU is currently writing to this buffer. Copy its current
@@ -424,13 +419,13 @@
                align_free(buf->data);
                buf->data = NULL;
             }
-            nouveau_transfer_staging(nv, tx, FALSE);
+            nouveau_transfer_staging(nv, tx, false);
             nouveau_transfer_read(nv, tx);
          } else {
             /* The buffer is currently idle. Create a staging area for writes,
              * and make sure that the cached data is up-to-date. */
             if (usage & PIPE_TRANSFER_WRITE)
-               nouveau_transfer_staging(nv, tx, TRUE);
+               nouveau_transfer_staging(nv, tx, true);
             if (!buf->data)
                nouveau_buffer_cache(nv, buf);
          }
@@ -482,7 +477,7 @@
       if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
          /* The whole range is being discarded, so it doesn't matter what was
           * there before. No need to copy anything over. */
-         nouveau_transfer_staging(nv, tx, TRUE);
+         nouveau_transfer_staging(nv, tx, true);
          map = tx->map;
       } else
       if (nouveau_buffer_busy(buf, PIPE_TRANSFER_READ)) {
@@ -493,7 +488,7 @@
       } else {
          /* It is expected that the returned buffer be a representation of the
           * data in question, so we must copy it over from the buffer. */
-         nouveau_transfer_staging(nv, tx, TRUE);
+         nouveau_transfer_staging(nv, tx, true);
          if (tx->map)
             memcpy(tx->map, map, box->width);
          map = tx->map;
@@ -537,18 +532,20 @@
    struct nv04_resource *buf = nv04_resource(transfer->resource);
 
    if (tx->base.usage & PIPE_TRANSFER_WRITE) {
-      if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && tx->map)
-         nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+      if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+         if (tx->map)
+            nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+
+         util_range_add(&buf->valid_buffer_range,
+                        tx->base.box.x, tx->base.box.x + tx->base.box.width);
+      }
 
       if (likely(buf->domain)) {
          const uint8_t bind = buf->base.bind;
          /* make sure we invalidate dedicated caches */
          if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
-            nv->vbo_dirty = TRUE;
+            nv->vbo_dirty = true;
       }
-
-      util_range_add(&buf->valid_buffer_range,
-                     tx->base.box.x, tx->base.box.x + tx->base.box.width);
    }
 
    if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE))
@@ -639,7 +636,7 @@
 {
    struct nouveau_screen *screen = nouveau_screen(pscreen);
    struct nv04_resource *buffer;
-   boolean ret;
+   bool ret;
 
    buffer = CALLOC_STRUCT(nv04_resource);
    if (!buffer)
@@ -658,13 +655,13 @@
       switch (buffer->base.usage) {
       case PIPE_USAGE_DEFAULT:
       case PIPE_USAGE_IMMUTABLE:
-         buffer->domain = NOUVEAU_BO_VRAM;
+         buffer->domain = NV_VRAM_DOMAIN(screen);
          break;
       case PIPE_USAGE_DYNAMIC:
          /* For most apps, we'd have to do staging transfers to avoid sync
           * with this usage, and GART -> GART copies would be suboptimal.
           */
-         buffer->domain = NOUVEAU_BO_VRAM;
+         buffer->domain = NV_VRAM_DOMAIN(screen);
          break;
       case PIPE_USAGE_STAGING:
       case PIPE_USAGE_STREAM:
@@ -676,14 +673,14 @@
       }
    } else {
       if (buffer->base.bind & screen->vidmem_bindings)
-         buffer->domain = NOUVEAU_BO_VRAM;
+         buffer->domain = NV_VRAM_DOMAIN(screen);
       else
       if (buffer->base.bind & screen->sysmem_bindings)
          buffer->domain = NOUVEAU_BO_GART;
    }
    ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
 
-   if (ret == FALSE)
+   if (ret == false)
       goto fail;
 
    if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy)
@@ -730,20 +727,20 @@
    return &buffer->base;
 }
 
-static INLINE boolean
+static inline bool
 nouveau_buffer_data_fetch(struct nouveau_context *nv, struct nv04_resource *buf,
                           struct nouveau_bo *bo, unsigned offset, unsigned size)
 {
    if (!nouveau_buffer_malloc(buf))
-      return FALSE;
+      return false;
    if (nouveau_bo_map(bo, NOUVEAU_BO_RD, nv->client))
-      return FALSE;
+      return false;
    memcpy(buf->data, (uint8_t *)bo->map + offset, size);
-   return TRUE;
+   return true;
 }
 
 /* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
-boolean
+bool
 nouveau_buffer_migrate(struct nouveau_context *nv,
                        struct nv04_resource *buf, const unsigned new_domain)
 {
@@ -758,7 +755,7 @@
 
    if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
       if (!nouveau_buffer_allocate(screen, buf, new_domain))
-         return FALSE;
+         return false;
       ret = nouveau_bo_map(buf->bo, 0, nv->client);
       if (ret)
          return ret;
@@ -771,7 +768,7 @@
       if (new_domain == NOUVEAU_BO_VRAM) {
          /* keep a system memory copy of our data in case we hit a fallback */
          if (!nouveau_buffer_data_fetch(nv, buf, buf->bo, buf->offset, size))
-            return FALSE;
+            return false;
          if (nouveau_mesa_debug)
             debug_printf("migrating %u KiB to VRAM\n", size / 1024);
       }
@@ -792,28 +789,28 @@
    if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
       struct nouveau_transfer tx;
       if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
-         return FALSE;
+         return false;
       tx.base.resource = &buf->base;
       tx.base.box.x = 0;
       tx.base.box.width = buf->base.width0;
       tx.bo = NULL;
       tx.map = NULL;
-      if (!nouveau_transfer_staging(nv, &tx, FALSE))
-         return FALSE;
+      if (!nouveau_transfer_staging(nv, &tx, false))
+         return false;
       nouveau_transfer_write(nv, &tx, 0, tx.base.box.width);
       nouveau_buffer_transfer_del(nv, &tx);
    } else
-      return FALSE;
+      return false;
 
    assert(buf->domain == new_domain);
-   return TRUE;
+   return true;
 }
 
 /* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
  * We'd like to only allocate @size bytes here, but then we'd have to rebase
  * the vertex indices ...
  */
-boolean
+bool
 nouveau_user_buffer_upload(struct nouveau_context *nv,
                            struct nv04_resource *buf,
                            unsigned base, unsigned size)
@@ -825,20 +822,20 @@
 
    buf->base.width0 = base + size;
    if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
-      return FALSE;
+      return false;
 
    ret = nouveau_bo_map(buf->bo, 0, nv->client);
    if (ret)
-      return FALSE;
+      return false;
    memcpy((uint8_t *)buf->bo->map + buf->offset + base, buf->data + base, size);
 
-   return TRUE;
+   return true;
 }
 
 
 /* Scratch data allocation. */
 
-static INLINE int
+static inline int
 nouveau_scratch_bo_alloc(struct nouveau_context *nv, struct nouveau_bo **pbo,
                          unsigned size)
 {
@@ -875,7 +872,7 @@
 /* Allocate an extra bo if we can't fit everything we need simultaneously.
  * (Could happen for very large user arrays.)
  */
-static INLINE boolean
+static inline bool
 nouveau_scratch_runout(struct nouveau_context *nv, unsigned size)
 {
    int ret;
@@ -909,7 +906,7 @@
 /* Continue to next scratch buffer, if available (no wrapping, large enough).
  * Allocate it if it has not yet been created.
  */
-static INLINE boolean
+static inline bool
 nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
 {
    struct nouveau_bo *bo;
@@ -917,14 +914,14 @@
    const unsigned i = (nv->scratch.id + 1) % NOUVEAU_MAX_SCRATCH_BUFS;
 
    if ((size > nv->scratch.bo_size) || (i == nv->scratch.wrap))
-      return FALSE;
+      return false;
    nv->scratch.id = i;
 
    bo = nv->scratch.bo[i];
    if (!bo) {
       ret = nouveau_scratch_bo_alloc(nv, &bo, nv->scratch.bo_size);
       if (ret)
-         return FALSE;
+         return false;
       nv->scratch.bo[i] = bo;
    }
    nv->scratch.current = bo;
@@ -937,10 +934,10 @@
    return !ret;
 }
 
-static boolean
+static bool
 nouveau_scratch_more(struct nouveau_context *nv, unsigned min_size)
 {
-   boolean ret;
+   bool ret;
 
    ret = nouveau_scratch_next(nv, min_size);
    if (!ret)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_buffer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_buffer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_buffer.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_buffer.h	2015-09-16 14:36:09.000000000 +0000
@@ -41,6 +41,8 @@
    uint8_t status;
    uint8_t domain;
 
+   uint16_t cb_bindings[6]; /* per-shader per-slot bindings */
+
    struct nouveau_fence *fence;
    struct nouveau_fence *fence_wr;
 
@@ -58,7 +60,7 @@
                     struct nv04_resource *dst, unsigned dst_pos,
                     struct nv04_resource *src, unsigned src_pos, unsigned size);
 
-boolean
+bool
 nouveau_buffer_migrate(struct nouveau_context *,
                        struct nv04_resource *, unsigned domain);
 
@@ -66,20 +68,20 @@
 nouveau_resource_map_offset(struct nouveau_context *, struct nv04_resource *,
                             uint32_t offset, uint32_t flags);
 
-static INLINE void
+static inline void
 nouveau_resource_unmap(struct nv04_resource *res)
 {
    /* no-op */
 }
 
-static INLINE struct nv04_resource *
+static inline struct nv04_resource *
 nv04_resource(struct pipe_resource *resource)
 {
    return (struct nv04_resource *)resource;
 }
 
 /* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */
-static INLINE boolean
+static inline bool
 nouveau_resource_mapped_by_gpu(struct pipe_resource *resource)
 {
    return nv04_resource(resource)->domain != 0;
@@ -93,7 +95,7 @@
 nouveau_user_buffer_create(struct pipe_screen *screen, void *ptr,
                            unsigned bytes, unsigned usage);
 
-boolean
+bool
 nouveau_user_buffer_upload(struct nouveau_context *, struct nv04_resource *,
                            unsigned base, unsigned size);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_compiler.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_compiler.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_compiler.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_compiler.c	2015-09-16 14:36:09.000000000 +0000
@@ -190,6 +190,10 @@
       type = PIPE_SHADER_GEOMETRY;
    else if (!strncmp(text, "COMP", 4))
       type = PIPE_SHADER_COMPUTE;
+   else if (!strncmp(text, "TESS_CTRL", 9))
+      type = PIPE_SHADER_TESS_CTRL;
+   else if (!strncmp(text, "TESS_EVAL", 9))
+      type = PIPE_SHADER_TESS_EVAL;
    else {
       _debug_printf("Unrecognized TGSI header\n");
       return 1;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_context.h	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -6,6 +6,8 @@
 
 #define NOUVEAU_MAX_SCRATCH_BUFS 4
 
+struct nv04_resource;
+
 struct nouveau_context {
    struct pipe_context pipe;
    struct nouveau_screen *screen;
@@ -13,7 +15,7 @@
    struct nouveau_client *client;
    struct nouveau_pushbuf *pushbuf;
 
-   boolean vbo_dirty;
+   bool vbo_dirty;
 
    void (*copy_data)(struct nouveau_context *,
                      struct nouveau_bo *dst, unsigned, unsigned,
@@ -23,8 +25,7 @@
                      unsigned, const void *);
    /* base, size refer to the whole constant buffer */
    void (*push_cb)(struct nouveau_context *,
-                   struct nouveau_bo *, unsigned domain,
-                   unsigned base, unsigned size,
+                   struct nv04_resource *,
                    unsigned offset, unsigned words, const uint32_t *);
 
    /* @return: @ref reduced by nr of references found in context */
@@ -53,7 +54,7 @@
    } stats;
 };
 
-static INLINE struct nouveau_context *
+static inline struct nouveau_context *
 nouveau_context(struct pipe_context *pipe)
 {
    return (struct nouveau_context *)pipe;
@@ -69,7 +70,7 @@
  * because we don't want to un-bo_ref each allocation every time. This is less
  * work, and we need the wrap index anyway for extreme situations.
  */
-static INLINE void
+static inline void
 nouveau_scratch_done(struct nouveau_context *nv)
 {
    nv->scratch.wrap = nv->scratch.id;
@@ -84,7 +85,7 @@
 nouveau_scratch_get(struct nouveau_context *, unsigned size, uint64_t *gpu_addr,
                     struct nouveau_bo **);
 
-static INLINE void
+static inline void
 nouveau_context_destroy(struct nouveau_context *ctx)
 {
    int i;
@@ -96,7 +97,7 @@
    FREE(ctx);
 }
 
-static INLINE  void
+static inline  void
 nouveau_context_update_frame_stats(struct nouveau_context *nv)
 {
    nv->stats.buf_cache_frame <<= 1;
@@ -104,7 +105,7 @@
       nv->stats.buf_cache_count = 0;
       nv->stats.buf_cache_frame |= 1;
       if ((nv->stats.buf_cache_frame & 0xf) == 0xf)
-         nv->screen->hint_buf_keep_sysmem_copy = TRUE;
+         nv->screen->hint_buf_keep_sysmem_copy = true;
    }
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_fence.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_fence.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_fence.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_fence.c	2015-09-16 14:36:09.000000000 +0000
@@ -28,13 +28,13 @@
 #include <sched.h>
 #endif
 
-boolean
+bool
 nouveau_fence_new(struct nouveau_screen *screen, struct nouveau_fence **fence,
-                  boolean emit)
+                  bool emit)
 {
    *fence = CALLOC_STRUCT(nouveau_fence);
    if (!*fence)
-      return FALSE;
+      return false;
 
    (*fence)->screen = screen;
    (*fence)->ref = 1;
@@ -43,7 +43,7 @@
    if (emit)
       nouveau_fence_emit(*fence);
 
-   return TRUE;
+   return true;
 }
 
 static void
@@ -58,7 +58,7 @@
    }
 }
 
-boolean
+bool
 nouveau_fence_work(struct nouveau_fence *fence,
                    void (*func)(void *), void *data)
 {
@@ -66,16 +66,16 @@
 
    if (!fence || fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
       func(data);
-      return TRUE;
+      return true;
    }
 
    work = CALLOC_STRUCT(nouveau_fence_work);
    if (!work)
-      return FALSE;
+      return false;
    work->func = func;
    work->data = data;
    LIST_ADD(&work->list, &fence->work);
-   return TRUE;
+   return true;
 }
 
 void
@@ -132,7 +132,7 @@
 }
 
 void
-nouveau_fence_update(struct nouveau_screen *screen, boolean flushed)
+nouveau_fence_update(struct nouveau_screen *screen, bool flushed)
 {
    struct nouveau_fence *fence;
    struct nouveau_fence *next = NULL;
@@ -167,21 +167,21 @@
 
 #define NOUVEAU_FENCE_MAX_SPINS (1 << 31)
 
-boolean
+bool
 nouveau_fence_signalled(struct nouveau_fence *fence)
 {
    struct nouveau_screen *screen = fence->screen;
 
    if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED)
-      return TRUE;
+      return true;
 
    if (fence->state >= NOUVEAU_FENCE_STATE_EMITTED)
-      nouveau_fence_update(screen, FALSE);
+      nouveau_fence_update(screen, false);
 
    return fence->state == NOUVEAU_FENCE_STATE_SIGNALLED;
 }
 
-boolean
+bool
 nouveau_fence_wait(struct nouveau_fence *fence)
 {
    struct nouveau_screen *screen = fence->screen;
@@ -195,16 +195,16 @@
 
    if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
       if (nouveau_pushbuf_kick(screen->pushbuf, screen->pushbuf->channel))
-         return FALSE;
+         return false;
 
    if (fence == screen->fence.current)
       nouveau_fence_next(screen);
 
    do {
-      nouveau_fence_update(screen, FALSE);
+      nouveau_fence_update(screen, false);
 
       if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED)
-         return TRUE;
+         return true;
       if (!spins)
          NOUVEAU_DRV_STAT(screen, any_non_kernel_fence_sync_count, 1);
       spins++;
@@ -218,7 +218,7 @@
                 fence->sequence,
                 screen->fence.sequence_ack, screen->fence.sequence);
 
-   return FALSE;
+   return false;
 }
 
 void
@@ -229,5 +229,5 @@
 
    nouveau_fence_ref(NULL, &screen->fence.current);
 
-   nouveau_fence_new(screen, &screen->fence.current, FALSE);
+   nouveau_fence_new(screen, &screen->fence.current, false);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_fence.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_fence.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_fence.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_fence.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,15 +29,15 @@
 void nouveau_fence_emit(struct nouveau_fence *);
 void nouveau_fence_del(struct nouveau_fence *);
 
-boolean nouveau_fence_new(struct nouveau_screen *, struct nouveau_fence **,
-                          boolean emit);
-boolean nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
-void    nouveau_fence_update(struct nouveau_screen *, boolean flushed);
-void    nouveau_fence_next(struct nouveau_screen *);
-boolean nouveau_fence_wait(struct nouveau_fence *);
-boolean nouveau_fence_signalled(struct nouveau_fence *);
+bool nouveau_fence_new(struct nouveau_screen *, struct nouveau_fence **,
+                       bool emit);
+bool nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
+void nouveau_fence_update(struct nouveau_screen *, bool flushed);
+void nouveau_fence_next(struct nouveau_screen *);
+bool nouveau_fence_wait(struct nouveau_fence *);
+bool nouveau_fence_signalled(struct nouveau_fence *);
 
-static INLINE void
+static inline void
 nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
 {
    if (fence)
@@ -51,7 +51,7 @@
    *ref = fence;
 }
 
-static INLINE struct nouveau_fence *
+static inline struct nouveau_fence *
 nouveau_fence(struct pipe_fence_handle *fence)
 {
    return (struct nouveau_fence *)fence;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_gldefs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_gldefs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_gldefs.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_gldefs.h	2015-09-16 14:36:09.000000000 +0000
@@ -1,7 +1,7 @@
 #ifndef __NOUVEAU_GLDEFS_H__
 #define __NOUVEAU_GLDEFS_H__
 
-static INLINE unsigned
+static inline unsigned
 nvgl_blend_func(unsigned factor)
 {
 	switch (factor) {
@@ -40,7 +40,7 @@
 	}
 }
 
-static INLINE unsigned
+static inline unsigned
 nvgl_blend_eqn(unsigned func)
 {
 	switch (func) {
@@ -59,7 +59,7 @@
 	}
 }
 
-static INLINE unsigned
+static inline unsigned
 nvgl_logicop_func(unsigned func)
 {
 	switch (func) {
@@ -100,7 +100,7 @@
 	}
 }
 
-static INLINE unsigned
+static inline unsigned
 nvgl_comparison_op(unsigned op)
 {
 	switch (op) {
@@ -125,7 +125,7 @@
 	}
 }
 
-static INLINE unsigned
+static inline unsigned
 nvgl_polygon_mode(unsigned mode)
 {
 	switch (mode) {
@@ -140,7 +140,7 @@
 	}
 }
 
-static INLINE unsigned
+static inline unsigned
 nvgl_stencil_op(unsigned op)
 {
 	switch (op) {
@@ -165,7 +165,7 @@
 	}
 }
 
-static INLINE unsigned
+static inline unsigned
 nvgl_primitive(unsigned prim) {
 	switch (prim) {
 	case PIPE_PRIM_POINTS:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_mm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_mm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_mm.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_mm.c	2015-09-16 14:36:09.000000000 +0000
@@ -70,7 +70,7 @@
    return -1;
 }
 
-static INLINE void
+static inline void
 mm_slab_free(struct mm_slab *slab, int i)
 {
    assert(i < slab->count);
@@ -79,7 +79,7 @@
    assert(slab->free <= slab->count);
 }
 
-static INLINE int
+static inline int
 mm_get_order(uint32_t size)
 {
    int s = __builtin_clz(size) ^ 31;
@@ -104,7 +104,7 @@
 }
 
 /* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */
-static INLINE uint32_t
+static inline uint32_t
 mm_default_slab_size(unsigned chunk_order)
 {
    static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] =
@@ -263,7 +263,7 @@
    return cache;
 }
 
-static INLINE void
+static inline void
 nouveau_mm_free_slabs(struct list_head *head)
 {
    struct mm_slab *slab, *next;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -68,17 +68,13 @@
 }
 
 static boolean
-nouveau_screen_fence_signalled(struct pipe_screen *screen,
-                               struct pipe_fence_handle *pfence)
-{
-        return nouveau_fence_signalled(nouveau_fence(pfence));
-}
-
-static boolean
 nouveau_screen_fence_finish(struct pipe_screen *screen,
 			    struct pipe_fence_handle *pfence,
                             uint64_t timeout)
 {
+	if (!timeout)
+		return nouveau_fence_signalled(nouveau_fence(pfence));
+
 	return nouveau_fence_wait(nouveau_fence(pfence));
 }
 
@@ -115,7 +111,7 @@
 }
 
 
-boolean
+bool
 nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
 			     struct nouveau_bo *bo,
 			     unsigned stride,
@@ -127,11 +123,11 @@
 		return nouveau_bo_name_get(bo, &whandle->handle) == 0;
 	} else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
 		whandle->handle = bo->handle;
-		return TRUE;
+		return true;
 	} else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
 		return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0;
 	} else {
-		return FALSE;
+		return false;
 	}
 }
 
@@ -164,6 +160,16 @@
 		size = sizeof(nvc0_data);
 	}
 
+	/*
+	 * Set default VRAM domain if not overridden
+	 */
+	if (!screen->vram_domain) {
+		if (dev->vram_size > 0)
+			screen->vram_domain = NOUVEAU_BO_VRAM;
+		else
+			screen->vram_domain = NOUVEAU_BO_GART;
+	}
+
 	ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS,
 				 data, size, &screen->channel);
 	if (ret)
@@ -193,7 +199,6 @@
 	pscreen->get_timestamp = nouveau_screen_get_timestamp;
 
 	pscreen->fence_reference = nouveau_screen_fence_ref;
-	pscreen->fence_signalled = nouveau_screen_fence_signalled;
 	pscreen->fence_finish = nouveau_screen_fence_finish;
 
 	util_format_s3tc_init();
@@ -204,7 +209,8 @@
 		PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
 		PIPE_BIND_CURSOR |
 		PIPE_BIND_SAMPLER_VIEW |
-		PIPE_BIND_SHADER_RESOURCE | PIPE_BIND_COMPUTE_RESOURCE |
+		PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE |
+                PIPE_BIND_COMPUTE_RESOURCE |
 		PIPE_BIND_GLOBAL;
 	screen->sysmem_bindings =
 		PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT |
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_screen.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -49,7 +49,9 @@
 
 	int64_t cpu_gpu_time_delta;
 
-	boolean hint_buf_keep_sysmem_copy;
+	bool hint_buf_keep_sysmem_copy;
+
+	unsigned vram_domain;
 
 	struct {
 		unsigned profiles_checked;
@@ -94,6 +96,8 @@
 #endif
 };
 
+#define NV_VRAM_DOMAIN(screen) ((screen)->vram_domain)
+
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
 # define NOUVEAU_DRV_STAT(s, n, v) do {         \
       (s)->stats.named.n += (v);               \
@@ -108,15 +112,15 @@
 # define NOUVEAU_DRV_STAT_IFD(x)
 #endif
 
-static INLINE struct nouveau_screen *
+static inline struct nouveau_screen *
 nouveau_screen(struct pipe_screen *pscreen)
 {
 	return (struct nouveau_screen *)pscreen;
 }
 
-boolean nouveau_drm_screen_unref(struct nouveau_screen *screen);
+bool nouveau_drm_screen_unref(struct nouveau_screen *screen);
 
-boolean
+bool
 nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
 			     struct nouveau_bo *bo,
 			     unsigned stride,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_statebuf.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_statebuf.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_statebuf.h	2012-08-30 05:23:50.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_statebuf.h	2015-09-16 14:36:09.000000000 +0000
@@ -20,7 +20,7 @@
 #define sb_data(sb, v) *(sb).p++ = (v)
 #endif
 
-static INLINE uint32_t sb_header(unsigned subc, unsigned mthd, unsigned size)
+static inline uint32_t sb_header(unsigned subc, unsigned mthd, unsigned size)
 {
 	return (size << 18) | (subc << 13) | mthd;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_video.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_video.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_video.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_video.c	2015-09-16 14:36:09.000000000 +0000
@@ -100,7 +100,7 @@
    dec->current = dec->future = dec->past = 8;
 }
 
-static INLINE void
+static inline void
 nouveau_vpe_mb_dct_blocks(struct nouveau_decoder *dec, const struct pipe_mpeg12_macroblock *mb)
 {
    int cbb;
@@ -125,7 +125,7 @@
    }
 }
 
-static INLINE void
+static inline void
 nouveau_vpe_mb_data_blocks(struct nouveau_decoder *dec, const struct pipe_mpeg12_macroblock *mb)
 {
    int cbb;
@@ -143,7 +143,7 @@
    }
 }
 
-static INLINE void
+static inline void
 nouveau_vpe_mb_dct_header(struct nouveau_decoder *dec,
                           const struct pipe_mpeg12_macroblock *mb,
                           bool luma)
@@ -187,7 +187,7 @@
                      x | (y << NV17_MPEG_CMD_MB_COORDS_Y__SHIFT));
 }
 
-static INLINE unsigned int
+static inline unsigned int
 nouveau_vpe_mb_mv_flags(bool luma, int mv_h, int mv_v, bool forward, bool first, bool vert)
 {
    unsigned mc_header = 0;
@@ -228,7 +228,7 @@
    return val / mult;
 }
 
-static INLINE void
+static inline void
 nouveau_vpe_mb_mv(struct nouveau_decoder *dec, unsigned mc_header,
                    bool luma, bool frame, bool forward, bool vert,
                    int x, int y, const short motions[2],
@@ -296,16 +296,16 @@
       case PIPE_MPEG12_MO_TYPE_DUAL_PRIME: {
          base = NV17_MPEG_CMD_CHROMA_MV_HEADER_COUNT_2;
          if (forward) {
-            nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE, FALSE,
-                              x, y, mb->PMV[0][0], dec->past, TRUE);
-            nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE, TRUE,
-                              x, y2, mb->PMV[0][0], dec->past, FALSE);
+            nouveau_vpe_mb_mv(dec, base, luma, frame, true, false,
+                              x, y, mb->PMV[0][0], dec->past, true);
+            nouveau_vpe_mb_mv(dec, base, luma, frame, true, true,
+                              x, y2, mb->PMV[0][0], dec->past, false);
          }
          if (backward && forward) {
-            nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, TRUE,
-                              x, y, mb->PMV[1][0], dec->future, TRUE);
-            nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, FALSE,
-                              x, y2, mb->PMV[1][1], dec->future, FALSE);
+            nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, true,
+                              x, y, mb->PMV[1][0], dec->future, true);
+            nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, false,
+                              x, y2, mb->PMV[1][1], dec->future, false);
          } else assert(!backward);
          break;
       }
@@ -320,13 +320,13 @@
          if (frame)
             base |= NV17_MPEG_CMD_CHROMA_MV_HEADER_TYPE_FRAME;
          if (forward)
-            nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE,
+            nouveau_vpe_mb_mv(dec, base, luma, frame, true,
                               dec->picture_structure != PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_TOP,
-                              x, y, mb->PMV[0][0], dec->past, TRUE);
+                              x, y, mb->PMV[0][0], dec->past, true);
          if (backward && forward)
-            nouveau_vpe_mb_mv(dec, base, luma, frame, FALSE,
+            nouveau_vpe_mb_mv(dec, base, luma, frame, false,
                               dec->picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_TOP,
-                              x, y, mb->PMV[0][1], dec->future, TRUE);
+                              x, y, mb->PMV[0][1], dec->future, true);
          else assert(!backward);
          break;
       }
@@ -341,11 +341,11 @@
        base |= NV17_MPEG_CMD_CHROMA_MV_HEADER_TYPE_FRAME;
     /* frame 16x16 */
    if (forward)
-       nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE, FALSE,
-                         x, y, mb->PMV[0][0], dec->past, TRUE);
+       nouveau_vpe_mb_mv(dec, base, luma, frame, true, false,
+                         x, y, mb->PMV[0][0], dec->past, true);
    if (backward)
-       nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, FALSE,
-                         x, y, mb->PMV[0][1], dec->future, TRUE);
+       nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, false,
+                         x, y, mb->PMV[0][1], dec->future, true);
     return;
 
 mv2:
@@ -353,20 +353,20 @@
    if (!frame)
       base |= NV17_MPEG_CMD_CHROMA_MV_HEADER_MV_SPLIT_HALF_MB;
    if (forward) {
-      nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE,
+      nouveau_vpe_mb_mv(dec, base, luma, frame, true,
                         mb->motion_vertical_field_select & PIPE_MPEG12_FS_FIRST_FORWARD,
-                        x, y, mb->PMV[0][0], dec->past, TRUE);
-      nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE,
+                        x, y, mb->PMV[0][0], dec->past, true);
+      nouveau_vpe_mb_mv(dec, base, luma, frame, true,
                         mb->motion_vertical_field_select & PIPE_MPEG12_FS_SECOND_FORWARD,
-                        x, y2, mb->PMV[1][0], dec->past, FALSE);
+                        x, y2, mb->PMV[1][0], dec->past, false);
    }
    if (backward) {
       nouveau_vpe_mb_mv(dec, base, luma, frame, !forward,
                         mb->motion_vertical_field_select & PIPE_MPEG12_FS_FIRST_BACKWARD,
-                        x, y, mb->PMV[0][1], dec->future, TRUE);
+                        x, y, mb->PMV[0][1], dec->future, true);
       nouveau_vpe_mb_mv(dec, base, luma, frame, !forward,
                         mb->motion_vertical_field_select & PIPE_MPEG12_FS_SECOND_BACKWARD,
-                        x, y2, mb->PMV[1][1], dec->future, FALSE);
+                        x, y2, mb->PMV[1][1], dec->future, false);
    }
 }
 
@@ -438,14 +438,14 @@
    mb = (const struct pipe_mpeg12_macroblock *)pipe_mb;
    for (i = 0; i < num_macroblocks; ++i, mb++) {
       if (mb->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA) {
-         nouveau_vpe_mb_dct_header(dec, mb, TRUE);
-         nouveau_vpe_mb_dct_header(dec, mb, FALSE);
+         nouveau_vpe_mb_dct_header(dec, mb, true);
+         nouveau_vpe_mb_dct_header(dec, mb, false);
       } else {
-         nouveau_vpe_mb_mv_header(dec, mb, TRUE);
-         nouveau_vpe_mb_dct_header(dec, mb, TRUE);
+         nouveau_vpe_mb_mv_header(dec, mb, true);
+         nouveau_vpe_mb_dct_header(dec, mb, true);
 
-         nouveau_vpe_mb_mv_header(dec, mb, FALSE);
-         nouveau_vpe_mb_dct_header(dec, mb, FALSE);
+         nouveau_vpe_mb_mv_header(dec, mb, false);
+         nouveau_vpe_mb_dct_header(dec, mb, false);
       }
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
          nouveau_vpe_mb_dct_blocks(dec, mb);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_video.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_video.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_video.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_video.h	2015-09-16 14:36:09.000000000 +0000
@@ -45,7 +45,7 @@
 #define NV31_VIDEO_BIND_CMD     NV31_MPEG_IMAGE_Y_OFFSET__LEN
 #define NV31_VIDEO_BIND_COUNT  (NV31_MPEG_IMAGE_Y_OFFSET__LEN + 1)
 
-static INLINE void
+static inline void
 nouveau_vpe_write(struct nouveau_decoder *dec, unsigned data) {
    dec->cmds[dec->ofs++] = data;
 }
@@ -54,33 +54,33 @@
 #define NV31_MPEG(mthd) SUBC_MPEG(NV31_MPEG_##mthd)
 #define NV84_MPEG(mthd) SUBC_MPEG(NV84_MPEG_##mthd)
 
-static INLINE uint32_t
+static inline uint32_t
 NV04_FIFO_PKHDR(int subc, int mthd, unsigned size)
 {
    return 0x00000000 | (size << 18) | (subc << 13) | mthd;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 NV04_FIFO_PKHDR_NI(int subc, int mthd, unsigned size)
 {
    return 0x40000000 | (size << 18) | (subc << 13) | mthd;
 }
 
-static INLINE void
+static inline void
 BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
 {
    PUSH_SPACE(push, size + 1);
    PUSH_DATA (push, NV04_FIFO_PKHDR(subc, mthd, size));
 }
 
-static INLINE void
+static inline void
 BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
 {
    PUSH_SPACE(push, size + 1);
    PUSH_DATA (push, NV04_FIFO_PKHDR_NI(subc, mthd, size));
 }
 
-static INLINE void
+static inline void
 PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd,
            struct nouveau_bo *bo, uint32_t offset,
 	   struct nouveau_bufctx *ctx, int bin, uint32_t rw)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_vp3_video.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_vp3_video.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_vp3_video.h	2014-09-25 15:15:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_vp3_video.h	2015-09-16 14:36:09.000000000 +0000
@@ -135,22 +135,22 @@
 	uint32_t parse_endpos[0x10]; // 1c0
 };
 
-static INLINE uint32_t nouveau_vp3_video_align(uint32_t h)
+static inline uint32_t nouveau_vp3_video_align(uint32_t h)
 {
    return ((h+0x3f)&~0x3f);
 };
 
-static INLINE uint32_t mb(uint32_t coord)
+static inline uint32_t mb(uint32_t coord)
 {
    return (coord + 0xf)>>4;
 }
 
-static INLINE uint32_t mb_half(uint32_t coord)
+static inline uint32_t mb_half(uint32_t coord)
 {
    return (coord + 0x1f)>>5;
 }
 
-static INLINE uint64_t
+static inline uint64_t
 nouveau_vp3_video_addr(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target)
 {
    uint64_t ret;
@@ -161,7 +161,7 @@
    return dec->ref_bo->offset + ret;
 }
 
-static INLINE void
+static inline void
 nouveau_vp3_ycbcr_offsets(struct nouveau_vp3_decoder *dec, uint32_t *y2,
                           uint32_t *cbcr, uint32_t *cbcr2)
 {
@@ -182,7 +182,7 @@
    }
 }
 
-static INLINE void
+static inline void
 nouveau_vp3_inter_sizes(struct nouveau_vp3_decoder *dec, uint32_t slice_count,
                         uint32_t *slice_size, uint32_t *bucket_size,
                         uint32_t *ring_size)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_winsys.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_winsys.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nouveau_winsys.h	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nouveau_winsys.h	2015-09-16 14:36:09.000000000 +0000
@@ -15,34 +15,34 @@
 #define NOUVEAU_MIN_BUFFER_MAP_ALIGN      64
 #define NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK (NOUVEAU_MIN_BUFFER_MAP_ALIGN - 1)
 
-static INLINE uint32_t
+static inline uint32_t
 PUSH_AVAIL(struct nouveau_pushbuf *push)
 {
    return push->end - push->cur;
 }
 
-static INLINE boolean
+static inline bool
 PUSH_SPACE(struct nouveau_pushbuf *push, uint32_t size)
 {
    if (PUSH_AVAIL(push) < size)
       return nouveau_pushbuf_space(push, size, 0, 0) == 0;
-   return TRUE;
+   return true;
 }
 
-static INLINE void
+static inline void
 PUSH_DATA(struct nouveau_pushbuf *push, uint32_t data)
 {
    *push->cur++ = data;
 }
 
-static INLINE void
+static inline void
 PUSH_DATAp(struct nouveau_pushbuf *push, const void *data, uint32_t size)
 {
    memcpy(push->cur, data, size * 4);
    push->cur += size;
 }
 
-static INLINE void
+static inline void
 PUSH_DATAf(struct nouveau_pushbuf *push, float f)
 {
    union { float f; uint32_t i; } u;
@@ -50,7 +50,7 @@
    PUSH_DATA(push, u.i);
 }
 
-static INLINE void
+static inline void
 PUSH_KICK(struct nouveau_pushbuf *push)
 {
    nouveau_pushbuf_kick(push, push->channel);
@@ -60,7 +60,7 @@
 #define NOUVEAU_RESOURCE_FLAG_LINEAR   (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
 #define NOUVEAU_RESOURCE_FLAG_DRV_PRIV (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
 
-static INLINE uint32_t
+static inline uint32_t
 nouveau_screen_transfer_flags(unsigned pipe)
 {
 	uint32_t flags = 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h	2015-09-16 14:36:09.000000000 +0000
@@ -1459,6 +1459,8 @@
 
 #define NV40_3D_VTX_CACHE_INVALIDATE				0x00001714
 
+#define NV40_3D_VB_ELEMENT_BASE					0x0000173c
+
 #define NV30_3D_VTXFMT(i0)				       (0x00001740 + 0x4*(i0))
 #define NV30_3D_VTXFMT__ESIZE					0x00000004
 #define NV30_3D_VTXFMT__LEN					0x00000010
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_clear.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_clear.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_clear.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_clear.c	2015-09-16 14:36:09.000000000 +0000
@@ -32,7 +32,7 @@
 #include "nv30/nv30_context.h"
 #include "nv30/nv30_format.h"
 
-static INLINE uint32_t
+static inline uint32_t
 pack_rgba(enum pipe_format format, const float *rgba)
 {
    union util_color uc;
@@ -40,7 +40,7 @@
    return uc.ui[0];
 }
 
-static INLINE uint32_t
+static inline uint32_t
 pack_zeta(enum pipe_format format, double depth, unsigned stencil)
 {
    uint32_t zuint = (uint32_t)(depth * 4294967295.0);
@@ -58,7 +58,7 @@
    struct pipe_framebuffer_state *fb = &nv30->framebuffer;
    uint32_t colr = 0, zeta = 0, mode = 0;
 
-   if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, TRUE))
+   if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, true))
       return;
 
    if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_context.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -45,7 +45,7 @@
    screen = &nv30->screen->base;
 
    nouveau_fence_next(screen);
-   nouveau_fence_update(screen, TRUE);
+   nouveau_fence_update(screen, true);
 
    if (push->bufctx) {
       struct nouveau_bufref *bref;
@@ -165,6 +165,12 @@
    if (nv30->draw)
       draw_destroy(nv30->draw);
 
+   if (nv30->blit_vp)
+      nouveau_heap_free(&nv30->blit_vp);
+
+   if (nv30->blit_fp)
+      pipe_resource_reference(&nv30->blit_fp, NULL);
+
    if (nv30->screen->base.pushbuf->user_priv == &nv30->bufctx)
       nv30->screen->base.pushbuf->user_priv = NULL;
 
@@ -233,7 +239,7 @@
 
    nv30->config.aniso = NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION_OFF;
 
-   if (debug_get_bool_option("NV30_SWTNL", FALSE))
+   if (debug_get_bool_option("NV30_SWTNL", false))
       nv30->draw_flags |= NV30_NEW_SWTNL;
 
    nv30->sample_mask = 0xffff;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_context.h	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -51,7 +51,8 @@
       unsigned rt_enable;
       unsigned scissor_off;
       unsigned num_vtxelts;
-      boolean  prim_restart;
+      int index_bias;
+      bool prim_restart;
       struct nv30_fragprog *fragprog;
    } state;
 
@@ -114,17 +115,17 @@
    uint32_t vbo_user;
    unsigned vbo_min_index;
    unsigned vbo_max_index;
-   boolean  vbo_push_hint;
+   bool vbo_push_hint;
 
    struct nouveau_heap  *blit_vp;
    struct pipe_resource *blit_fp;
 
    struct pipe_query *render_cond_query;
    unsigned render_cond_mode;
-   boolean render_cond_cond;
+   bool render_cond_cond;
 };
 
-static INLINE struct nv30_context *
+static inline struct nv30_context *
 nv30_context(struct pipe_context *pipe)
 {
    return (struct nv30_context *)pipe;
@@ -203,8 +204,8 @@
 void
 nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);
 
-boolean
-nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl);
+bool
+nv30_state_validate(struct nv30_context *nv30, uint32_t mask, bool hwtnl);
 
 void
 nv30_state_release(struct nv30_context *nv30);
@@ -213,7 +214,7 @@
 #define NV30_PRIM_GL_CASE(n) \
    case PIPE_PRIM_##n: return NV30_3D_VERTEX_BEGIN_END_##n
 
-static INLINE unsigned
+static inline unsigned
 nv30_prim_gl(unsigned prim)
 {
    switch (prim) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_draw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_draw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_draw.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_draw.c	2015-09-16 14:36:09.000000000 +0000
@@ -52,7 +52,7 @@
    uint32_t prim;
 };
 
-static INLINE struct nv30_render *
+static inline struct nv30_render *
 nv30_render(struct vbuf_render *render)
 {
    return (struct nv30_render *)render;
@@ -79,12 +79,12 @@
                                      PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM,
                                      render->max_vertex_buffer_bytes);
       if (!r->buffer)
-         return FALSE;
+         return false;
 
       r->offset = 0;
    }
 
-   return TRUE;
+   return true;
 }
 
 static void *
@@ -134,7 +134,7 @@
                        NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
    }
 
-   if (!nv30_state_validate(nv30, ~0, FALSE))
+   if (!nv30_state_validate(nv30, ~0, false))
       return;
 
    BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
@@ -179,7 +179,7 @@
                        NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
    }
 
-   if (!nv30_state_validate(nv30, ~0, FALSE))
+   if (!nv30_state_validate(nv30, ~0, false))
       return;
 
    BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
@@ -221,7 +221,7 @@
    [TGSI_SEMANTIC_TEXCOORD] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 },
 };
 
-static boolean
+static bool
 vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
 {
    struct nv30_screen *screen = r->nv30->screen;
@@ -245,7 +245,7 @@
    }
 
    if (emit == EMIT_OMIT)
-      return FALSE;
+      return false;
 
    draw_emit_vertex_attr(vinfo, emit, vroute[sem].interp, attrib);
    format = draw_translate_vinfo_format(emit);
@@ -272,10 +272,10 @@
       assert(sem == TGSI_SEMANTIC_TEXCOORD);
       *idx = 0x00001000 << (result - 8);
    }
-   return TRUE;
+   return true;
 }
 
-static boolean
+static bool
 nv30_render_validate(struct nv30_context *nv30)
 {
    struct nv30_render *r = nv30_render(nv30->draw->render);
@@ -300,7 +300,7 @@
          }
 
          if (nouveau_heap_alloc(heap, 16, &r->vertprog, &r->vertprog))
-            return FALSE;
+            return false;
       }
    }
 
@@ -370,7 +370,7 @@
    }
 
    vinfo->size /= 4;
-   return TRUE;
+   return true;
 }
 
 void
@@ -519,6 +519,6 @@
    draw_set_rasterize_stage(draw, stage);
    draw_wide_line_threshold(draw, 10000000.f);
    draw_wide_point_threshold(draw, 10000000.f);
-   draw_wide_point_sprites(draw, TRUE);
+   draw_wide_point_sprites(draw, true);
    nv30->draw = draw;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_format.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_format.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_format.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_format.h	2015-09-16 14:36:09.000000000 +0000
@@ -27,28 +27,28 @@
 };
 
 extern const struct nv30_format_info nv30_format_info_table[];
-static INLINE const struct nv30_format_info *
+static inline const struct nv30_format_info *
 nv30_format_info(struct pipe_screen *pscreen, enum pipe_format format)
 {
    return &nv30_format_info_table[format];
 }
 
 extern const struct nv30_format nv30_format_table[];
-static INLINE const struct nv30_format *
+static inline const struct nv30_format *
 nv30_format(struct pipe_screen *pscreen, enum pipe_format format)
 {
    return &nv30_format_table[format];
 }
 
 extern const struct nv30_vtxfmt nv30_vtxfmt_table[];
-static INLINE const struct nv30_vtxfmt *
+static inline const struct nv30_vtxfmt *
 nv30_vtxfmt(struct pipe_screen *pscreen, enum pipe_format format)
 {
    return &nv30_vtxfmt_table[format];
 }
 
 extern const struct nv30_texfmt nv30_texfmt_table[];
-static INLINE const struct nv30_texfmt *
+static inline const struct nv30_texfmt *
 nv30_texfmt(struct pipe_screen *pscreen, enum pipe_format format)
 {
    return &nv30_texfmt_table[format];
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c	2015-09-16 14:36:09.000000000 +0000
@@ -37,22 +37,26 @@
    struct nouveau_context *nv = &nv30->base;
    struct nv30_fragprog *fp = nv30->fragprog.program;
    struct pipe_context *pipe = &nv30->base.pipe;
-   struct pipe_transfer *transfer;
-   uint32_t *map;
-   int i; (void)i;
 
-   if (unlikely(!fp->buffer)) {
+   if (unlikely(!fp->buffer))
       fp->buffer = pipe_buffer_create(pipe->screen, 0, 0, fp->insn_len * 4);
-   }
 
-   map = pipe_buffer_map(pipe, fp->buffer, PIPE_TRANSFER_WRITE, &transfer);
 #ifndef PIPE_ARCH_BIG_ENDIAN
-   memcpy(map, fp->insn, fp->insn_len * 4);
+   pipe_buffer_write(pipe, fp->buffer, 0, fp->insn_len * 4, fp->insn);
 #else
-   for (i = 0; i < fp->insn_len; i++)
-      *map++ = (fp->insn[i] >> 16) | (fp->insn[i] << 16);
+   {
+      struct pipe_transfer *transfer;
+      uint32_t *map;
+      int i;
+
+      map = pipe_buffer_map(pipe, fp->buffer,
+                            PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE,
+                            &transfer);
+      for (i = 0; i < fp->insn_len; i++)
+         *map++ = (fp->insn[i] >> 16) | (fp->insn[i] << 16);
+      pipe_buffer_unmap(pipe, transfer);
+   }
 #endif
-   pipe_buffer_unmap(pipe, transfer);
 
    if (nv04_resource(fp->buffer)->domain != NOUVEAU_BO_VRAM)
       nouveau_buffer_migrate(nv, nv04_resource(fp->buffer), NOUVEAU_BO_VRAM);
@@ -64,7 +68,7 @@
    struct nouveau_pushbuf *push = nv30->base.pushbuf;
    struct nouveau_object *eng3d = nv30->screen->eng3d;
    struct nv30_fragprog *fp = nv30->fragprog.program;
-   boolean upload = FALSE;
+   bool upload = false;
    int i;
 
    if (!fp->translated) {
@@ -72,7 +76,7 @@
       if (!fp->translated)
          return;
 
-      upload = TRUE;
+      upload = true;
    }
 
    /* update constants, also needs to be done on every fp switch as we
@@ -89,7 +93,7 @@
          if (!memcmp(&fp->insn[off], &cbuf[idx], 4 * 4))
             continue;
          memcpy(&fp->insn[off], &cbuf[idx], 4 * 4);
-         upload = TRUE;
+         upload = true;
       }
    }
 
@@ -161,8 +165,15 @@
 nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso)
 {
    struct nv30_context *nv30 = nv30_context(pipe);
+   struct nv30_fragprog *fp = hwcso;
+
+   /* reset the bucftx so that we don't keep a dangling reference to the fp
+    * code
+    */
+   if (fp != nv30->state.fragprog)
+      PUSH_RESET(nv30->base.pushbuf, BUFCTX_FRAGPROG);
 
-   nv30->fragprog.program = hwcso;
+   nv30->fragprog.program = fp;
    nv30->dirty |= NV30_NEW_FRAGPROG;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_miptree.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_miptree.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_miptree.c	2015-09-16 14:36:09.000000000 +0000
@@ -28,12 +28,13 @@
 #include "util/u_surface.h"
 
 #include "nv_m2mf.xml.h"
+#include "nv_object.xml.h"
 #include "nv30/nv30_screen.h"
 #include "nv30/nv30_context.h"
 #include "nv30/nv30_resource.h"
 #include "nv30/nv30_transfer.h"
 
-static INLINE unsigned
+static inline unsigned
 layer_offset(struct pipe_resource *pt, unsigned level, unsigned layer)
 {
    struct nv30_miptree *mt = nv30_miptree(pt);
@@ -54,7 +55,7 @@
    unsigned stride;
 
    if (!mt || !mt->base.bo)
-      return FALSE;
+      return false;
 
    stride = mt->level[0].pitch;
 
@@ -78,13 +79,13 @@
    unsigned nblocksy;
 };
 
-static INLINE struct nv30_transfer *
+static inline struct nv30_transfer *
 nv30_transfer(struct pipe_transfer *ptx)
 {
    return (struct nv30_transfer *)ptx;
 }
 
-static INLINE void
+static inline void
 define_rect(struct pipe_resource *pt, unsigned level, unsigned z,
             unsigned x, unsigned y, unsigned w, unsigned h,
             struct nv30_rect *rect)
@@ -144,21 +145,54 @@
    nv30_transfer_rect(nv30, NEAREST, &src, &dst);
 }
 
-void
-nv30_resource_resolve(struct pipe_context *pipe,
-                      const struct pipe_resolve_info *info)
+static void
+nv30_resource_resolve(struct nv30_context *nv30,
+                      const struct pipe_blit_info *info)
 {
-#if 0
-   struct nv30_context *nv30 = nv30_context(pipe);
+   struct nv30_miptree *src_mt = nv30_miptree(info->src.resource);
    struct nv30_rect src, dst;
+   unsigned x, x0, x1, y, y1, w, h;
 
-   define_rect(info->src.res, 0, 0, info->src.x0, info->src.y0,
-               info->src.x1 - info->src.x0, info->src.y1 - info->src.y0, &src);
-   define_rect(info->dst.res, info->dst.level, 0, info->dst.x0, info->dst.y0,
-               info->dst.x1 - info->dst.x0, info->dst.y1 - info->dst.y0, &dst);
+   define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x,
+      info->src.box.y, info->src.box.width, info->src.box.height, &src);
+   define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x,
+      info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst);
+
+   x0 = src.x0;
+   x1 = src.x1;
+   y1 = src.y1;
+
+   /* On nv3x we must use sifm which is restricted to 1024x1024 tiles */
+   for (y = src.y0; y < y1; y += h) {
+      h = y1 - y;
+      if (h > 1024)
+         h = 1024;
+
+      src.y0 = 0;
+      src.y1 = h;
+      src.h = h;
+
+      dst.y1 = dst.y0 + (h >> src_mt->ms_y);
+      dst.h = h >> src_mt->ms_y;
+
+      for (x = x0; x < x1; x += w) {
+         w = x1 - x;
+         if (w > 1024)
+            w = 1024;
+
+         src.offset = y * src.pitch + x * src.cpp;
+         src.x0 = 0;
+         src.x1 = w;
+         src.w = w;
+
+         dst.offset = (y >> src_mt->ms_y) * dst.pitch +
+                      (x >> src_mt->ms_x) * dst.cpp;
+         dst.x1 = dst.x0 + (w >> src_mt->ms_x);
+         dst.w = w >> src_mt->ms_x;
 
-   nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
-#endif
+         nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
+      }
+   }
 }
 
 void
@@ -172,7 +206,7 @@
        info.dst.resource->nr_samples <= 1 &&
        !util_format_is_depth_or_stencil(info.src.resource->format) &&
        !util_format_is_pure_integer(info.src.resource->format)) {
-      debug_printf("nv30: color resolve unimplemented\n");
+      nv30_resource_resolve(nv30, blit_info);
       return;
    }
 
@@ -242,8 +276,8 @@
    tx->base.level = level;
    tx->base.usage = usage;
    tx->base.box = *box;
-   tx->base.stride = util_format_get_nblocksx(pt->format, box->width) *
-                     util_format_get_blocksize(pt->format);
+   tx->base.stride = align(util_format_get_nblocksx(pt->format, box->width) *
+                           util_format_get_blocksize(pt->format), 64);
    tx->base.layer_stride = util_format_get_nblocksy(pt->format, box->height) *
                            tx->base.stride;
 
@@ -362,6 +396,7 @@
    blocksz = util_format_get_blocksize(pt->format);
 
    if ((pt->target == PIPE_TEXTURE_RECT) ||
+       (pt->bind & PIPE_BIND_SCANOUT) ||
        !util_is_power_of_two(pt->width0) ||
        !util_is_power_of_two(pt->height0) ||
        !util_is_power_of_two(pt->depth0) ||
@@ -369,10 +404,18 @@
        util_format_is_float(pt->format) || mt->ms_mode) {
       mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz;
       mt->uniform_pitch = align(mt->uniform_pitch, 64);
+      if (pt->bind & PIPE_BIND_SCANOUT) {
+         struct nv30_screen *screen = nv30_screen(pscreen);
+         int pitch_align = MAX2(
+               screen->eng3d->oclass >= NV40_3D_CLASS ? 1024 : 256,
+               /* round_down_pow2(mt->uniform_pitch / 4) */
+               1 << (util_last_bit(mt->uniform_pitch / 4) - 1));
+         mt->uniform_pitch = align(mt->uniform_pitch, pitch_align);
+      }
    }
 
    if (!mt->uniform_pitch)
-      mt->swizzled = TRUE;
+      mt->swizzled = true;
 
    size = 0;
    for (l = 0; l <= pt->last_level; l++) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_push.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_push.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_push.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_push.c	2015-09-16 14:36:09.000000000 +0000
@@ -47,12 +47,12 @@
 
    struct translate *translate;
 
-   boolean primitive_restart;
+   bool primitive_restart;
    uint32_t prim;
    uint32_t restart_index;
 };
 
-static INLINE unsigned
+static inline unsigned
 prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
 {
    unsigned i;
@@ -62,7 +62,7 @@
    return i;
 }
 
-static INLINE unsigned
+static inline unsigned
 prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
 {
    unsigned i;
@@ -72,7 +72,7 @@
    return i;
 }
 
-static INLINE unsigned
+static inline unsigned
 prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
 {
    unsigned i;
@@ -199,7 +199,7 @@
 {
    struct push_context ctx;
    unsigned i, index_size;
-   boolean apply_bias = info->indexed && info->index_bias;
+   bool apply_bias = info->indexed && info->index_bias;
 
    ctx.push = nv30->base.pushbuf;
    ctx.translate = nv30->vertex->translate;
@@ -241,7 +241,7 @@
    } else {
       ctx.idxbuf = NULL;
       index_size = 0;
-      ctx.primitive_restart = FALSE;
+      ctx.primitive_restart = false;
       ctx.restart_index = 0;
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_query.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_query.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_query.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_query.c	2015-09-16 14:36:09.000000000 +0000
@@ -98,7 +98,7 @@
    uint64_t result;
 };
 
-static INLINE struct nv30_query *
+static inline struct nv30_query *
 nv30_query(struct pipe_query *pipe)
 {
    return (struct nv30_query *)pipe;
@@ -208,7 +208,7 @@
    if (ntfy1) {
       while (ntfy1[3] & 0xff000000) {
          if (!wait)
-            return FALSE;
+            return false;
       }
 
       switch (q->type) {
@@ -228,7 +228,7 @@
    }
 
    *res64 = q->result;
-   return TRUE;
+   return true;
 }
 
 static void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_resource.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_resource.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_resource.c	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_resource.c	2015-09-16 14:36:09.000000000 +0000
@@ -42,12 +42,12 @@
          if (!nv30->vtxbuf[i].buffer)
             continue;
          if (nv30->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
-            nv30->base.vbo_dirty = TRUE;
+            nv30->base.vbo_dirty = true;
       }
 
       if (nv30->idxbuf.buffer &&
           nv30->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
-         nv30->base.vbo_dirty = TRUE;
+         nv30->base.vbo_dirty = true;
    }
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_resource.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_resource.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_resource.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_resource.h	2015-09-16 14:36:09.000000000 +0000
@@ -15,7 +15,7 @@
    uint16_t depth;
 };
 
-static INLINE struct nv30_surface *
+static inline struct nv30_surface *
 nv30_surface(struct pipe_surface *ps)
 {
    return (struct nv30_surface *)ps;
@@ -32,13 +32,13 @@
    struct nv30_miptree_level level[13];
    uint32_t uniform_pitch;
    uint32_t layer_size;
-   boolean swizzled;
+   bool swizzled;
    unsigned ms_mode;
    unsigned ms_x:1;
    unsigned ms_y:1;
 };
 
-static INLINE struct nv30_miptree *
+static inline struct nv30_miptree *
 nv30_miptree(struct pipe_resource *pt)
 {
    return (struct nv30_miptree *)pt;
@@ -66,9 +66,6 @@
                           const struct pipe_box *src_box);
 
 void
-nv30_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *);
-
-void
 nv30_blit(struct pipe_context *pipe,
           const struct pipe_blit_info *blit_info);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -69,6 +69,8 @@
       return PIPE_ENDIAN_LITTLE;
    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
       return 16;
+   case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+      return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
    case PIPE_CAP_MAX_VIEWPORTS:
       return 1;
    case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
@@ -96,6 +98,9 @@
    case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
    case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
       return 1;
+   /* nv35 capabilities */
+   case PIPE_CAP_DEPTH_BOUNDS_TEST:
+      return eng3d->oclass == NV35_3D_CLASS || eng3d->oclass >= NV40_3D_CLASS;
    /* nv4x capabilities */
    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
    case PIPE_CAP_NPOT_TEXTURES:
@@ -135,7 +140,6 @@
    case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
    case PIPE_CAP_START_INSTANCE:
    case PIPE_CAP_TEXTURE_MULTISAMPLE:
-   case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
    case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
    case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
@@ -162,6 +166,9 @@
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -252,6 +259,7 @@
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
          return 0;
       default:
          debug_printf("unknown vertex shader param %d\n", param);
@@ -292,6 +300,7 @@
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
          return 0;
       default:
          debug_printf("unknown fragment shader param %d\n", param);
@@ -310,13 +319,14 @@
                                 unsigned sample_count,
                                 unsigned bindings)
 {
-   if (sample_count > 4)
-      return FALSE;
+   if (sample_count > nv30_screen(pscreen)->max_sample_count)
+      return false;
+
    if (!(0x00000017 & (1 << sample_count)))
-      return FALSE;
+      return false;
 
    if (!util_format_is_supported(format, bindings)) {
-      return FALSE;
+      return false;
    }
 
    /* transfers & shared are always supported */
@@ -441,6 +451,23 @@
       return NULL;
    }
 
+   /*
+    * Some modern apps try to use msaa without keeping in mind the
+    * restrictions on videomem of older cards. Resulting in dmesg saying:
+    * [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
+    * [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
+    * [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12
+    *
+    * Because we are running out of video memory, after which the program
+    * using the msaa visual freezes, and eventually the entire system freezes.
+    *
+    * To work around this we do not allow msaa visauls by default and allow
+    * the user to override this via NV30_MAX_MSAA.
+    */
+   screen->max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0);
+   if (screen->max_sample_count > 4)
+      screen->max_sample_count = 4;
+
    pscreen = &screen->base.base;
    pscreen->destroy = nv30_screen_destroy;
    pscreen->get_param = nv30_screen_get_param;
@@ -524,7 +551,7 @@
 
    ret = nouveau_bo_wrap(screen->base.device, fifo->notify, &screen->notify);
    if (ret == 0)
-      nouveau_bo_map(screen->notify, 0, screen->base.client);
+      ret = nouveau_bo_map(screen->notify, 0, screen->base.client);
    if (ret)
       FAIL_SCREEN_INIT("error mapping notifier memory: %d\n", ret);
 
@@ -654,6 +681,6 @@
 
    nouveau_pushbuf_kick(push, push->channel);
 
-   nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+   nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
    return pscreen;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_screen.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -38,9 +38,11 @@
    /*XXX: nvfx state */
    struct nouveau_heap *vp_exec_heap;
    struct nouveau_heap *vp_data_heap;
+
+   unsigned max_sample_count;
 };
 
-static INLINE struct nv30_screen *
+static inline struct nv30_screen *
 nv30_screen(struct pipe_screen *pscreen)
 {
    return (struct nv30_screen *)pscreen;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_state.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -211,6 +211,7 @@
 nv30_zsa_state_create(struct pipe_context *pipe,
                       const struct pipe_depth_stencil_alpha_state *cso)
 {
+   struct nouveau_object *eng3d = nv30_context(pipe)->screen->eng3d;
    struct nv30_zsa_stateobj *so;
 
    so = CALLOC_STRUCT(nv30_zsa_stateobj);
@@ -223,6 +224,13 @@
    SB_DATA  (so, cso->depth.writemask);
    SB_DATA  (so, cso->depth.enabled);
 
+   if (eng3d->oclass == NV35_3D_CLASS || eng3d->oclass >= NV40_3D_CLASS) {
+      SB_MTHD35(so, DEPTH_BOUNDS_TEST_ENABLE, 3);
+      SB_DATA  (so, cso->depth.bounds_test);
+      SB_DATA  (so, fui(cso->depth.bounds_min));
+      SB_DATA  (so, fui(cso->depth.bounds_max));
+   }
+
    if (cso->stencil[0].enabled) {
       SB_MTHD30(so, STENCIL_ENABLE(0), 3);
       SB_DATA  (so, 1);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_state.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_state.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_state.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_state.h	2015-09-16 14:36:09.000000000 +0000
@@ -13,6 +13,8 @@
 #define SB_DATA(so, u)        (so)->data[(so)->size++] = (u)
 #define SB_MTHD30(so, mthd, size)                                          \
    SB_DATA((so), ((size) << 18) | (7 << 13) | NV30_3D_##mthd)
+#define SB_MTHD35(so, mthd, size)                                          \
+   SB_DATA((so), ((size) << 18) | (7 << 13) | NV35_3D_##mthd)
 #define SB_MTHD40(so, mthd, size)                                          \
    SB_DATA((so), ((size) << 18) | (7 << 13) | NV40_3D_##mthd)
 
@@ -30,7 +32,7 @@
 
 struct nv30_zsa_stateobj {
    struct pipe_depth_stencil_alpha_state pipe;
-   unsigned data[32];
+   unsigned data[36];
    unsigned size;
 };
 
@@ -80,7 +82,7 @@
    struct tgsi_shader_info info;
 
    struct draw_vertex_shader *draw;
-   boolean translated;
+   bool translated;
    unsigned enabled_ucps;
    uint16_t texcoord[10];
 
@@ -109,7 +111,7 @@
    struct tgsi_shader_info info;
 
    struct draw_fragment_shader *draw;
-   boolean translated;
+   bool translated;
 
    uint32_t *insn;
    unsigned insn_len;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c	2015-09-16 14:36:09.000000000 +0000
@@ -453,8 +453,8 @@
    nv30->base.pushbuf->user_priv = &nv30->bufctx;
 }
 
-boolean
-nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl)
+bool
+nv30_state_validate(struct nv30_context *nv30, uint32_t mask, bool hwtnl)
 {
    struct nouveau_screen *screen = &nv30->screen->base;
    struct nouveau_pushbuf *push = nv30->base.pushbuf;
@@ -494,7 +494,7 @@
    nouveau_pushbuf_bufctx(push, bctx);
    if (nouveau_pushbuf_validate(push)) {
       nouveau_pushbuf_bufctx(push, NULL);
-      return FALSE;
+      return false;
    }
 
    /*XXX*/
@@ -528,7 +528,7 @@
       }
    }
 
-   return TRUE;
+   return true;
 }
 
 void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_texture.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_texture.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_texture.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_texture.c	2015-09-16 14:36:09.000000000 +0000
@@ -37,7 +37,7 @@
 #define NV40_WRAP(n) \
    case PIPE_TEX_WRAP_##n: ret = NV40_3D_TEX_WRAP_S_##n; break
 
-static INLINE unsigned
+static inline unsigned
 wrap_mode(unsigned pipe)
 {
    unsigned ret = NV30_3D_TEX_WRAP_S_REPEAT;
@@ -58,7 +58,7 @@
    return ret >> NV30_3D_TEX_WRAP_S__SHIFT;
 }
 
-static INLINE unsigned
+static inline unsigned
 filter_mode(const struct pipe_sampler_state *cso)
 {
    unsigned filter;
@@ -104,7 +104,7 @@
    return filter;
 }
 
-static INLINE unsigned
+static inline unsigned
 compare_mode(const struct pipe_sampler_state *cso)
 {
    if (cso->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE)
@@ -201,7 +201,7 @@
    }
 }
 
-static INLINE uint32_t
+static inline uint32_t
 swizzle(const struct nv30_texfmt *fmt, unsigned cmp, unsigned swz)
 {
    uint32_t data = fmt->swz[swz].src << 8;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_transfer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_transfer.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_transfer.c	2015-09-16 14:36:09.000000000 +0000
@@ -41,33 +41,33 @@
  * of different ways.
  */
 
-static INLINE boolean
+static inline bool
 nv30_transfer_scaled(struct nv30_rect *src, struct nv30_rect *dst)
 {
    if (src->x1 - src->x0 != dst->x1 - dst->x0)
-      return TRUE;
+      return true;
    if (src->y1 - src->y0 != dst->y1 - dst->y0)
-      return TRUE;
-   return FALSE;
+      return true;
+   return false;
 }
 
-static INLINE boolean
+static inline bool
 nv30_transfer_blit(XFER_ARGS)
 {
    if (nv30->screen->eng3d->oclass < NV40_3D_CLASS)
-      return FALSE;
+      return false;
    if (dst->offset & 63 || dst->pitch & 63 || dst->d > 1)
-      return FALSE;
+      return false;
    if (dst->w < 2 || dst->h < 2)
-      return FALSE;
+      return false;
    if (dst->cpp > 4 || (dst->cpp == 1 && !dst->pitch))
-      return FALSE;
+      return false;
    if (src->cpp > 4)
-      return FALSE;
-   return TRUE;
+      return false;
+   return true;
 }
 
-static INLINE struct nouveau_heap *
+static inline struct nouveau_heap *
 nv30_transfer_rect_vertprog(struct nv30_context *nv30)
 {
    struct nouveau_heap *heap = nv30->screen->vp_exec_heap;
@@ -108,7 +108,7 @@
 }
 
 
-static INLINE struct nv04_resource *
+static inline struct nv04_resource *
 nv30_transfer_rect_fragprog(struct nv30_context *nv30)
 {
    struct nv04_resource *fp = nv04_resource(nv30->blit_fp);
@@ -368,29 +368,29 @@
    PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);
 }
 
-static boolean
+static bool
 nv30_transfer_sifm(XFER_ARGS)
 {
-   if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2)
-      return FALSE;
+   if (!src->pitch || src->w > 1024 || src->h > 1024 || src->w < 2 || src->h < 2)
+      return false;
 
    if (src->d > 1 || dst->d > 1)
-      return FALSE;
+      return false;
 
    if (dst->offset & 63)
-      return FALSE;
+      return false;
 
    if (!dst->pitch) {
-      if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2)
-         return FALSE;
+      if (dst->w > 2048 || dst->h > 2048 || dst->w < 2 || dst->h < 2)
+         return false;
    } else {
       if (dst->domain != NOUVEAU_BO_VRAM)
-         return FALSE;
+         return false;
       if (dst->pitch & 63)
-         return FALSE;
+         return false;
    }
 
-   return TRUE;
+   return true;
 }
 
 static void
@@ -481,14 +481,14 @@
  * that name is still accurate on nv4x) error.
  */
 
-static boolean
+static bool
 nv30_transfer_m2mf(XFER_ARGS)
 {
    if (!src->pitch || !dst->pitch)
-      return FALSE;
+      return false;
    if (nv30_transfer_scaled(src, dst))
-      return FALSE;
-   return TRUE;
+      return false;
+   return true;
 }
 
 static void
@@ -540,12 +540,12 @@
    }
 }
 
-static boolean
+static bool
 nv30_transfer_cpu(XFER_ARGS)
 {
    if (nv30_transfer_scaled(src, dst))
-      return FALSE;
-   return TRUE;
+      return false;
+   return true;
 }
 
 static char *
@@ -554,7 +554,7 @@
    return base + (y * rect->pitch) + (x * rect->cpp);
 }
 
-static INLINE unsigned
+static inline unsigned
 swizzle2d(unsigned v, unsigned s)
 {
    v = (v | (v << 8)) & 0x00ff00ff;
@@ -614,7 +614,7 @@
 
 typedef char *(*get_ptr_t)(struct nv30_rect *, char *, int, int, int);
 
-static INLINE get_ptr_t
+static inline get_ptr_t
 get_ptr(struct nv30_rect *rect)
 {
    if (rect->pitch)
@@ -653,7 +653,7 @@
 {
    static const struct {
       char *name;
-      boolean (*possible)(XFER_ARGS);
+      bool (*possible)(XFER_ARGS);
       void (*execute)(XFER_ARGS);
    } *method, methods[] = {
       { "m2mf", nv30_transfer_m2mf, nv30_transfer_rect_m2mf },
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_vbo.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_vbo.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_vbo.c	2015-09-16 14:36:09.000000000 +0000
@@ -79,7 +79,7 @@
    }
 }
 
-static INLINE void
+static inline void
 nv30_vbuf_range(struct nv30_context *nv30, int vbi,
                 uint32_t *base, uint32_t *size)
 {
@@ -119,7 +119,7 @@
             } else {
                nouveau_buffer_migrate(&nv30->base, buf, NOUVEAU_BO_GART);
             }
-            nv30->base.vbo_dirty = TRUE;
+            nv30->base.vbo_dirty = true;
          }
       }
    }
@@ -160,10 +160,10 @@
                        NOUVEAU_BO_LOW | NOUVEAU_BO_RD,
                        0, NV30_3D_VTXBUF_DMA1);
    }
-   nv30->base.vbo_dirty = TRUE;
+   nv30->base.vbo_dirty = true;
 }
 
-static INLINE void
+static inline void
 nv30_release_user_vbufs(struct nv30_context *nv30)
 {
    uint32_t vbo_user = nv30->vbo_user;
@@ -202,6 +202,9 @@
       return;
 
    redefine = MAX2(vertex->num_elements, nv30->state.num_vtxelts);
+   if (redefine == 0)
+      return;
+
    BEGIN_NV04(push, NV30_3D(VTXFMT(0)), redefine);
 
    for (i = 0; i < vertex->num_elements; i++) {
@@ -221,7 +224,7 @@
    for (i = 0; i < vertex->num_elements; i++) {
       struct nv04_resource *res;
       unsigned offset;
-      boolean user;
+      bool user;
 
       ve = &vertex->pipe[i];
       vb = &nv30->vtxbuf[ve->vertex_buffer_index];
@@ -254,14 +257,12 @@
     struct translate_key transkey;
     unsigned i;
 
-    assert(num_elements);
-
     so = MALLOC(sizeof(*so) + sizeof(*so->element) * num_elements);
     if (!so)
         return NULL;
     memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
     so->num_elements = num_elements;
-    so->need_conversion = FALSE;
+    so->need_conversion = false;
 
     transkey.nr_elements = 0;
     transkey.output_stride = 0;
@@ -284,7 +285,7 @@
                 return NULL;
             }
             so->element[i].state = nv30_vtxfmt(pipe->screen, fmt)->hw;
-            so->need_conversion = TRUE;
+            so->need_conversion = true;
         }
 
         if (1) {
@@ -452,7 +453,7 @@
 }
 
 static void
-nv30_draw_elements(struct nv30_context *nv30, boolean shorten,
+nv30_draw_elements(struct nv30_context *nv30, bool shorten,
                    unsigned mode, unsigned start, unsigned count,
                    unsigned instance_count, int32_t index_bias)
 {
@@ -461,13 +462,11 @@
    struct nouveau_object *eng3d = nv30->screen->eng3d;
    unsigned prim = nv30_prim_gl(mode);
 
-#if 0 /*XXX*/
-   if (index_bias != nv30->state.index_bias) {
-      BEGIN_NV04(push, NV30_3D(VB_ELEMENT_BASE), 1);
+   if (eng3d->oclass >= NV40_3D_CLASS && index_bias != nv30->state.index_bias) {
+      BEGIN_NV04(push, NV40_3D(VB_ELEMENT_BASE), 1);
       PUSH_DATA (push, index_bias);
       nv30->state.index_bias = index_bias;
    }
-#endif
 
    if (eng3d->oclass == NV40_3D_CLASS && index_size > 1 &&
        nv30->idxbuf.buffer) {
@@ -564,7 +563,7 @@
    if (nv30->vbo_user && !(nv30->dirty & (NV30_NEW_VERTEX | NV30_NEW_ARRAYS)))
       nv30_update_user_vbufs(nv30);
 
-   nv30_state_validate(nv30, ~0, TRUE);
+   nv30_state_validate(nv30, ~0, true);
    if (nv30->draw_flags) {
       nv30_render_vbo(pipe, info);
       return;
@@ -578,17 +577,17 @@
       if (!nv30->vtxbuf[i].buffer)
          continue;
       if (nv30->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-         nv30->base.vbo_dirty = TRUE;
+         nv30->base.vbo_dirty = true;
    }
 
    if (!nv30->base.vbo_dirty && nv30->idxbuf.buffer &&
        nv30->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-      nv30->base.vbo_dirty = TRUE;
+      nv30->base.vbo_dirty = true;
 
    if (nv30->base.vbo_dirty) {
       BEGIN_NV04(push, NV30_3D(VTX_CACHE_INVALIDATE_1710), 1);
       PUSH_DATA (push, 0);
-      nv30->base.vbo_dirty = FALSE;
+      nv30->base.vbo_dirty = false;
    }
 
    if (!info->indexed) {
@@ -596,7 +595,7 @@
                        info->mode, info->start, info->count,
                        info->instance_count);
    } else {
-      boolean shorten = info->max_index <= 65535;
+      bool shorten = info->max_index <= 65535;
 
       if (info->primitive_restart != nv30->state.prim_restart) {
          if (info->primitive_restart) {
@@ -605,7 +604,7 @@
             PUSH_DATA (push, info->restart_index);
 
             if (info->restart_index > 65535)
-               shorten = FALSE;
+               shorten = false;
          } else {
             BEGIN_NV04(push, NV40_3D(PRIM_RESTART_ENABLE), 1);
             PUSH_DATA (push, 0);
@@ -617,7 +616,7 @@
          PUSH_DATA (push, info->restart_index);
 
          if (info->restart_index > 65535)
-            shorten = FALSE;
+            shorten = false;
       }
 
       nv30_draw_elements(nv30, shorten,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c	2015-09-16 14:36:09.000000000 +0000
@@ -48,7 +48,7 @@
    vp->consts = NULL;
    vp->nr_consts = 0;
 
-   vp->translated = FALSE;
+   vp->translated = false;
 }
 
 void
@@ -58,8 +58,8 @@
    struct nouveau_object *eng3d = nv30->screen->eng3d;
    struct nv30_vertprog *vp = nv30->vertprog.program;
    struct nv30_fragprog *fp = nv30->fragprog.program;
-   boolean upload_code = FALSE;
-   boolean upload_data = FALSE;
+   bool upload_code = false;
+   bool upload_data = false;
    unsigned i;
 
    if (nv30->dirty & NV30_NEW_FRAGPROG) {
@@ -125,7 +125,7 @@
          }
       }
 
-      upload_code = TRUE;
+      upload_code = true;
    }
 
    if (vp->nr_consts && !vp->data) {
@@ -166,8 +166,8 @@
          }
       }
 
-      upload_code = TRUE;
-      upload_data = TRUE;
+      upload_code = true;
+      upload_data = true;
    }
 
    if (vp->nr_consts) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_winsys.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_winsys.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nv30_winsys.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nv30_winsys.h	2015-09-16 14:36:09.000000000 +0000
@@ -19,34 +19,34 @@
 #define NV40_3D_PRIM_RESTART_ENABLE 0x1dac
 #define NV40_3D_PRIM_RESTART_INDEX  0x1db0
 
-static INLINE void
+static inline void
 PUSH_RELOC(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t offset,
       uint32_t flags, uint32_t vor, uint32_t tor)
 {
    nouveau_pushbuf_reloc(push, bo, offset, flags, vor, tor);
 }
 
-static INLINE struct nouveau_bufctx *
+static inline struct nouveau_bufctx *
 bufctx(struct nouveau_pushbuf *push)
 {
    struct nouveau_bufctx **pctx = push->user_priv;
    return *pctx;
 }
 
-static INLINE void
+static inline void
 PUSH_RESET(struct nouveau_pushbuf *push, int bin)
 {
    nouveau_bufctx_reset(bufctx(push), bin);
 }
 
-static INLINE void
+static inline void
 PUSH_REFN(struct nouveau_pushbuf *push, int bin,
      struct nouveau_bo *bo, uint32_t access)
 {
    nouveau_bufctx_refn(bufctx(push), bin, bo, access);
 }
 
-static INLINE void
+static inline void
 PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
       struct nouveau_bo *bo, uint32_t offset, uint32_t access)
 {
@@ -55,7 +55,7 @@
    PUSH_DATA(push, bo->offset + offset);
 }
 
-static INLINE void
+static inline void
 PUSH_MTHDo(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
       struct nouveau_bo *bo, uint32_t access, uint32_t vor, uint32_t tor)
 {
@@ -67,7 +67,7 @@
       PUSH_DATA(push, tor);
 }
 
-static INLINE void
+static inline void
 PUSH_MTHDs(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
       struct nouveau_bo *bo, uint32_t data, uint32_t access,
       uint32_t vor, uint32_t tor)
@@ -80,7 +80,7 @@
       PUSH_DATA(push, data | tor);
 }
 
-static INLINE struct nouveau_bufref *
+static inline struct nouveau_bufref *
 PUSH_MTHD(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
      struct nouveau_bo *bo, uint32_t data, uint32_t access,
      uint32_t vor, uint32_t tor)
@@ -99,7 +99,7 @@
    return bref;
 }
 
-static INLINE void
+static inline void
 PUSH_RESRC(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
            struct nv04_resource *r, uint32_t data, uint32_t access,
            uint32_t vor, uint32_t tor)
@@ -108,14 +108,14 @@
              r->domain | access, vor, tor)->priv = r;
 }
 
-static INLINE void
+static inline void
 BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, int size)
 {
    PUSH_SPACE(push, size + 1);
    PUSH_DATA (push, 0x00000000 | (size << 18) | (subc << 13) | mthd);
 }
 
-static INLINE void
+static inline void
 BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, int size)
 {
    PUSH_SPACE(push, size + 1);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c	2015-09-16 14:36:09.000000000 +0000
@@ -44,7 +44,7 @@
    struct util_dynarray label_relocs;
 };
 
-static INLINE struct nvfx_reg
+static inline struct nvfx_reg
 temp(struct nvfx_fpc *fpc)
 {
    int idx = __builtin_ctzll(~fpc->r_temps);
@@ -60,7 +60,7 @@
    return nvfx_reg(NVFXSR_TEMP, idx);
 }
 
-static INLINE void
+static inline void
 release_temps(struct nvfx_fpc *fpc)
 {
    fpc->r_temps &= ~fpc->r_temps_discard;
@@ -373,7 +373,7 @@
    hw[3] = 0;
 }
 
-static INLINE struct nvfx_src
+static inline struct nvfx_src
 tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 {
    struct nvfx_src src;
@@ -415,7 +415,7 @@
    return src;
 }
 
-static INLINE struct nvfx_reg
+static inline struct nvfx_reg
 tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
    switch (fdst->Register.File) {
    case TGSI_FILE_OUTPUT:
@@ -430,7 +430,7 @@
    }
 }
 
-static INLINE int
+static inline int
 tgsi_mask(uint tgsi)
 {
    int mask = 0;
@@ -442,7 +442,7 @@
    return mask;
 }
 
-static boolean
+static bool
 nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
             const struct tgsi_full_instruction *finst)
 {
@@ -455,7 +455,7 @@
    int i;
 
    if (finst->Instruction.Opcode == TGSI_OPCODE_END)
-      return TRUE;
+      return true;
 
    for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
       const struct tgsi_full_src_register *fsrc;
@@ -525,13 +525,13 @@
          break;
       default:
          NOUVEAU_ERR("bad src file\n");
-         return FALSE;
+         return false;
       }
    }
 
    dst  = tgsi_dst(fpc, &finst->Dst[0]);
    mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
-   sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
+   sat  = finst->Instruction.Saturate;
 
    switch (finst->Instruction.Opcode) {
    case TGSI_OPCODE_ABS:
@@ -868,12 +868,12 @@
 
         default:
       NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
-      return FALSE;
+      return false;
    }
 
 out:
    release_temps(fpc);
-   return TRUE;
+   return true;
 nv3x_cflow:
    {
       static int warned = 0;
@@ -887,7 +887,7 @@
    goto out;
 }
 
-static boolean
+static bool
 nvfx_fragprog_parse_decl_input(struct nvfx_fpc *fpc,
                                const struct tgsi_full_declaration *fdec)
 {
@@ -917,17 +917,17 @@
    case TGSI_SEMANTIC_GENERIC:
    case TGSI_SEMANTIC_PCOORD:
       /* will be assigned to remaining TC slots later */
-      return TRUE;
+      return true;
    default:
       assert(0);
-      return FALSE;
+      return false;
    }
 
    fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
-   return TRUE;
+   return true;
 }
 
-static boolean
+static bool
 nvfx_fragprog_assign_generic(struct nvfx_fpc *fpc,
                              const struct tgsi_full_declaration *fdec)
 {
@@ -954,16 +954,16 @@
             }
             hw = NVFX_FP_OP_INPUT_SRC_TC(hw);
             fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
-            return TRUE;
+            return true;
          }
       }
-      return FALSE;
+      return false;
    default:
-      return TRUE;
+      return true;
    }
 }
 
-static boolean
+static bool
 nvfx_fragprog_parse_decl_output(struct nvfx_fpc *fpc,
             const struct tgsi_full_declaration *fdec)
 {
@@ -984,20 +984,20 @@
       }
       if(hw > ((fpc->is_nv4x) ? 4 : 2)) {
          NOUVEAU_ERR("bad rcol index\n");
-         return FALSE;
+         return false;
       }
       break;
    default:
       NOUVEAU_ERR("bad output semantic\n");
-      return FALSE;
+      return false;
    }
 
    fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
    fpc->r_temps |= (1ULL << hw);
-   return TRUE;
+   return true;
 }
 
-static boolean
+static bool
 nvfx_fragprog_prepare(struct nvfx_fpc *fpc)
 {
    struct tgsi_parse_context p;
@@ -1081,17 +1081,17 @@
       fpc->r_temps_discard = 0ULL;
    }
 
-   return TRUE;
+   return true;
 
 out_err:
    FREE(fpc->r_temp);
    fpc->r_temp = NULL;
 
    tgsi_parse_free(&p);
-   return FALSE;
+   return false;
 }
 
-DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", false)
 
 void
 _nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp)
@@ -1100,7 +1100,7 @@
    struct nvfx_fpc *fpc = NULL;
    struct util_dynarray insns;
 
-   fp->translated = FALSE;
+   fp->translated = false;
    fp->point_sprite_control = 0;
    fp->vp_or = 0;
 
@@ -1182,7 +1182,7 @@
       debug_printf("\n");
    }
 
-   fp->translated = TRUE;
+   fp->translated = true;
 
 out:
    tgsi_parse_free(&parse);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nvfx_shader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nvfx_shader.h	2014-04-29 19:36:58.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nvfx_shader.h	2015-09-16 14:36:09.000000000 +0000
@@ -448,8 +448,8 @@
 	struct nvfx_src src[3];
 };
 
-static INLINE struct nvfx_insn
-nvfx_insn(boolean sat, unsigned op, int unit, struct nvfx_reg dst, unsigned mask, struct nvfx_src s0, struct nvfx_src s1, struct nvfx_src s2)
+static inline struct nvfx_insn
+nvfx_insn(bool sat, unsigned op, int unit, struct nvfx_reg dst, unsigned mask, struct nvfx_src s0, struct nvfx_src s1, struct nvfx_src s2)
 {
 	struct nvfx_insn insn = {
 		.op = op,
@@ -468,7 +468,7 @@
 	return insn;
 }
 
-static INLINE struct nvfx_reg
+static inline struct nvfx_reg
 nvfx_reg(int type, int index)
 {
 	struct nvfx_reg temp = {
@@ -478,7 +478,7 @@
 	return temp;
 }
 
-static INLINE struct nvfx_src
+static inline struct nvfx_src
 nvfx_src(struct nvfx_reg reg)
 {
 	struct nvfx_src temp = {
@@ -491,7 +491,7 @@
 	return temp;
 }
 
-static INLINE struct nvfx_src
+static inline struct nvfx_src
 nvfx_src_swz(struct nvfx_src src, int x, int y, int z, int w)
 {
 	struct nvfx_src dst = src;
@@ -503,14 +503,14 @@
 	return dst;
 }
 
-static INLINE struct nvfx_src
+static inline struct nvfx_src
 nvfx_src_neg(struct nvfx_src src)
 {
 	src.negate = !src.negate;
 	return src;
 }
 
-static INLINE struct nvfx_src
+static inline struct nvfx_src
 nvfx_src_abs(struct nvfx_src src)
 {
 	src.abs = 1;
@@ -529,7 +529,7 @@
 void
 _nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp);
 
-boolean
+bool
 _nvfx_vertprog_translate(uint16_t oclass, struct nv30_vertprog *vp);
 
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c	2015-09-16 14:36:09.000000000 +0000
@@ -416,7 +416,7 @@
    return src;
 }
 
-static INLINE struct nvfx_reg
+static inline struct nvfx_reg
 tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
    struct nvfx_reg dst;
 
@@ -455,7 +455,7 @@
    return mask;
 }
 
-static boolean
+static bool
 nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
             unsigned idx, const struct tgsi_full_instruction *finst)
 {
@@ -466,7 +466,7 @@
    struct nvfx_insn insn;
    struct nvfx_relocation reloc;
    struct nvfx_loop_entry loop;
-   boolean sat = FALSE;
+   bool sat = false;
    int mask;
    int ai = -1, ci = -1, ii = -1;
    int i;
@@ -524,25 +524,25 @@
          break;
       default:
          NOUVEAU_ERR("bad src file\n");
-         return FALSE;
+         return false;
       }
    }
 
    for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
       if(src[i].reg.type < 0)
-         return FALSE;
+         return false;
    }
 
    if(finst->Dst[0].Register.File == TGSI_FILE_ADDRESS &&
       finst->Instruction.Opcode != TGSI_OPCODE_ARL)
-      return FALSE;
+      return false;
 
    final_dst = dst  = tgsi_dst(vpc, &finst->Dst[0]);
    mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
-   if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) {
+   if(finst->Instruction.Saturate) {
       assert(finst->Instruction.Opcode != TGSI_OPCODE_ARL);
       if (vpc->is_nv4x)
-         sat = TRUE;
+         sat = true;
       else
       if(dst.type != NVFXSR_TEMP)
          dst = temp(vpc);
@@ -793,10 +793,10 @@
       break;
    default:
       NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
-      return FALSE;
+      return false;
    }
 
-   if(finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE && !vpc->is_nv4x) {
+   if(finst->Instruction.Saturate && !vpc->is_nv4x) {
       if (!vpc->r_0_1.type)
          vpc->r_0_1 = constant(vpc, -1, 0, 1, 0, 0);
       nvfx_vp_emit(vpc, arith(0, VEC, MAX, dst, mask, nvfx_src(dst), swz(nvfx_src(vpc->r_0_1), X, X, X, X), none));
@@ -804,10 +804,10 @@
    }
 
    release_temps(vpc);
-   return TRUE;
+   return true;
 }
 
-static boolean
+static bool
 nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,
                                 const struct tgsi_full_declaration *fdec)
 {
@@ -825,7 +825,7 @@
       vpc->r_result[idx] = temp(vpc);
       vpc->r_temps_discard = 0;
       vpc->cvtx_idx = idx;
-      return TRUE;
+      return true;
    case TGSI_SEMANTIC_COLOR:
       if (fdec->Semantic.Index == 0) {
          hw = NVFX_VP(INST_DEST_COL0);
@@ -834,7 +834,7 @@
          hw = NVFX_VP(INST_DEST_COL1);
       } else {
          NOUVEAU_ERR("bad colour semantic index\n");
-         return FALSE;
+         return false;
       }
       break;
    case TGSI_SEMANTIC_BCOLOR:
@@ -845,7 +845,7 @@
          hw = NVFX_VP(INST_DEST_BFC1);
       } else {
          NOUVEAU_ERR("bad bcolour semantic index\n");
-         return FALSE;
+         return false;
       }
       break;
    case TGSI_SEMANTIC_FOG:
@@ -868,22 +868,22 @@
 
       if (i == num_texcoords) {
          vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0);
-         return TRUE;
+         return true;
       }
       break;
    case TGSI_SEMANTIC_EDGEFLAG:
       vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0);
-      return TRUE;
+      return true;
    default:
       NOUVEAU_ERR("bad output semantic\n");
-      return FALSE;
+      return false;
    }
 
    vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
-   return TRUE;
+   return true;
 }
 
-static boolean
+static bool
 nvfx_vertprog_prepare(struct nvfx_vpc *vpc)
 {
    struct tgsi_parse_context p;
@@ -924,7 +924,7 @@
             break;
          case TGSI_FILE_OUTPUT:
             if (!nvfx_vertprog_parse_decl_output(vpc, fdec))
-               return FALSE;
+               return false;
             break;
          default:
             break;
@@ -961,12 +961,12 @@
    }
 
    vpc->r_temps_discard = 0;
-   return TRUE;
+   return true;
 }
 
-DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", false)
 
-boolean
+bool
 _nvfx_vertprog_translate(uint16_t oclass, struct nv30_vertprog *vp)
 {
    struct tgsi_parse_context parse;
@@ -975,13 +975,13 @@
    struct util_dynarray insns;
    int i, ucps;
 
-   vp->translated = FALSE;
+   vp->translated = false;
    vp->nr_insns = 0;
    vp->nr_consts = 0;
 
    vpc = CALLOC_STRUCT(nvfx_vpc);
    if (!vpc)
-      return FALSE;
+      return false;
    vpc->is_nv4x = (oclass >= NV40_3D_CLASS) ? ~0 : 0;
    vpc->vp   = vp;
    vpc->pipe = vp->pipe;
@@ -990,7 +990,7 @@
 
    if (!nvfx_vertprog_prepare(vpc)) {
       FREE(vpc);
-      return FALSE;
+      return false;
    }
 
    /* Redirect post-transform vertex position to a temp if user clip
@@ -1108,7 +1108,7 @@
       debug_printf("\n");
    }
 
-   vp->translated = TRUE;
+   vp->translated = true;
 
 out:
    tgsi_parse_free(&parse);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_blit.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_blit.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_blit.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_blit.h	2015-09-16 14:36:09.000000000 +0000
@@ -37,7 +37,7 @@
 #define NV50_BLIT_TEXTURE_2D_ARRAY  5
 #define NV50_BLIT_MAX_TEXTURE_TYPES 6
 
-static INLINE unsigned
+static inline unsigned
 nv50_blit_texture_type(enum pipe_texture_target target)
 {
    switch (target) {
@@ -52,7 +52,7 @@
    }
 }
 
-static INLINE unsigned
+static inline unsigned
 nv50_blit_get_tgsi_texture_target(enum pipe_texture_target target)
 {
    switch (target) {
@@ -67,7 +67,7 @@
    }
 }
 
-static INLINE enum pipe_texture_target
+static inline enum pipe_texture_target
 nv50_blit_reinterpret_pipe_texture_target(enum pipe_texture_target target)
 {
    switch (target) {
@@ -81,7 +81,7 @@
    }
 }
 
-static INLINE unsigned
+static inline unsigned
 nv50_blit_get_filter(const struct pipe_blit_info *info)
 {
    if (info->dst.resource->nr_samples < info->src.resource->nr_samples)
@@ -102,7 +102,7 @@
 /* Since shaders cannot export stencil, we cannot copy stencil values when
  * rendering to ZETA, so we attach the ZS surface to a colour render target.
  */
-static INLINE enum pipe_format
+static inline enum pipe_format
 nv50_blit_zeta_to_colour_format(enum pipe_format format)
 {
    switch (format) {
@@ -127,7 +127,7 @@
 }
 
 
-static INLINE uint16_t
+static inline uint16_t
 nv50_blit_derive_color_mask(const struct pipe_blit_info *info)
 {
    const unsigned mask = info->mask;
@@ -162,7 +162,7 @@
    return color_mask;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 nv50_blit_eng2d_get_mask(const struct pipe_blit_info *info)
 {
    uint32_t mask = 0;
@@ -191,8 +191,8 @@
 # define nv50_format_table nvc0_format_table
 #endif
 
-/* return TRUE for formats that can be converted among each other by NVC0_2D */
-static INLINE boolean
+/* return true for formats that can be converted among each other by NVC0_2D */
+static inline bool
 nv50_2d_dst_format_faithful(enum pipe_format format)
 {
    const uint64_t mask =
@@ -201,7 +201,7 @@
    uint8_t id = nv50_format_table[format].rt;
    return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
 }
-static INLINE boolean
+static inline bool
 nv50_2d_src_format_faithful(enum pipe_format format)
 {
    const uint64_t mask =
@@ -211,7 +211,7 @@
    return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
 }
 
-static INLINE boolean
+static inline bool
 nv50_2d_format_supported(enum pipe_format format)
 {
    uint8_t id = nv50_format_table[format].rt;
@@ -219,7 +219,7 @@
       (NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
 }
 
-static INLINE boolean
+static inline bool
 nv50_2d_dst_format_ops_supported(enum pipe_format format)
 {
    uint8_t id = nv50_format_table[format].rt;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_context.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -64,12 +64,12 @@
          if (!nv50->vtxbuf[i].buffer)
             continue;
          if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
-            nv50->base.vbo_dirty = TRUE;
+            nv50->base.vbo_dirty = true;
       }
 
       if (nv50->idxbuf.buffer &&
           nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
-         nv50->base.vbo_dirty = TRUE;
+         nv50->base.vbo_dirty = true;
 
       for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
          uint32_t valid = nv50->constbuf_valid[s];
@@ -87,7 +87,7 @@
                continue;
 
             if (res->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
-               nv50->cb_dirty = TRUE;
+               nv50->cb_dirty = true;
          }
       }
    }
@@ -100,9 +100,9 @@
 
    if (screen) {
       nouveau_fence_next(&screen->base);
-      nouveau_fence_update(&screen->base, TRUE);
+      nouveau_fence_update(&screen->base, true);
       if (screen->cur_ctx)
-         screen->cur_ctx->state.flushed = TRUE;
+         screen->cur_ctx->state.flushed = true;
    }
 }
 
@@ -199,9 +199,13 @@
          }
       }
 
-      if (nv50->idxbuf.buffer == res)
+      if (nv50->idxbuf.buffer == res) {
+         /* Just rebind to the bufctx as there is no separate dirty bit */
+         nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
+         BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(res), RD);
          if (!--ref)
             return ref;
+      }
 
       for (s = 0; s < 3; ++s) {
       assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
@@ -310,7 +314,7 @@
    nv50->base.invalidate_resource_storage = nv50_invalidate_resource_storage;
 
    if (screen->base.device->chipset < 0x84 ||
-       debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) {
+       debug_get_bool_option("NOUVEAU_PMPEG", false)) {
       /* PMPEG */
       nouveau_context_init_vdec(&nv50->base);
    } else if (screen->base.device->chipset < 0x98 ||
@@ -351,7 +355,7 @@
 }
 
 void
-nv50_bufctx_fence(struct nouveau_bufctx *bufctx, boolean on_flush)
+nv50_bufctx_fence(struct nouveau_bufctx *bufctx, bool on_flush)
 {
    struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending;
    struct nouveau_list *it;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_context.h	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -91,7 +91,7 @@
 
 struct nv50_blitctx;
 
-boolean nv50_blitctx_create(struct nv50_context *);
+bool nv50_blitctx_create(struct nv50_context *);
 
 struct nv50_context {
    struct nouveau_context base;
@@ -102,7 +102,7 @@
    struct nouveau_bufctx *bufctx;
 
    uint32_t dirty;
-   boolean cb_dirty;
+   bool cb_dirty;
 
    struct nv50_graph_state state;
 
@@ -152,26 +152,26 @@
    unsigned sample_mask;
    unsigned min_samples;
 
-   boolean vbo_push_hint;
+   bool vbo_push_hint;
 
    uint32_t rt_array_mode;
 
    struct pipe_query *cond_query;
-   boolean cond_cond; /* inverted rendering condition */
+   bool cond_cond; /* inverted rendering condition */
    uint cond_mode;
    uint32_t cond_condmode; /* the calculated condition */
 
    struct nv50_blitctx *blit;
 };
 
-static INLINE struct nv50_context *
+static inline struct nv50_context *
 nv50_context(struct pipe_context *pipe)
 {
    return (struct nv50_context *)pipe;
 }
 
 /* return index used in nv50_context arrays for a specific shader type */
-static INLINE unsigned
+static inline unsigned
 nv50_context_shader_stage(unsigned pipe)
 {
    switch (pipe) {
@@ -188,7 +188,7 @@
 /* nv50_context.c */
 struct pipe_context *nv50_create(struct pipe_screen *, void *);
 
-void nv50_bufctx_fence(struct nouveau_bufctx *, boolean on_flush);
+void nv50_bufctx_fence(struct nouveau_bufctx *, bool on_flush);
 
 void nv50_default_kick_notify(struct nouveau_pushbuf *);
 
@@ -197,12 +197,12 @@
 
 /* nv50_query.c */
 void nv50_init_query_functions(struct nv50_context *);
-void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
+void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method,
                                struct pipe_query *, unsigned result_offset);
 void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
 void nva0_so_target_save_offset(struct pipe_context *,
                                 struct pipe_stream_output_target *,
-                                unsigned index, boolean seralize);
+                                unsigned index, bool seralize);
 
 #define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
 
@@ -221,8 +221,8 @@
 
 /* nv50_state_validate.c */
 /* @words: check for space before emitting relocs */
-extern boolean nv50_state_validate(struct nv50_context *, uint32_t state_mask,
-                                   unsigned space_words);
+extern bool nv50_state_validate(struct nv50_context *, uint32_t state_mask,
+                                unsigned space_words);
 
 /* nv50_surface.c */
 extern void nv50_clear(struct pipe_context *, unsigned buffers,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_formats.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_formats.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_formats.c	2015-04-16 07:17:52.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_formats.c	2015-09-16 14:36:09.000000000 +0000
@@ -44,7 +44,7 @@
  */
 #define U_V   PIPE_BIND_VERTEX_BUFFER
 #define U_T   PIPE_BIND_SAMPLER_VIEW
-#define U_I   PIPE_BIND_SHADER_RESOURCE | PIPE_BIND_COMPUTE_RESOURCE
+#define U_I   PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE | PIPE_BIND_COMPUTE_RESOURCE
 #define U_TR  PIPE_BIND_RENDER_TARGET | U_T
 #define U_IR  U_TR | U_I
 #define U_TB  PIPE_BIND_BLENDABLE | U_TR
@@ -203,8 +203,10 @@
    F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD),
    C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD),
    F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD),
+#if NOUVEAU_DRIVER != 0xc0
    C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
    F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
+#endif
    F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),
 
    C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_miptree.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_miptree.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_miptree.c	2015-09-16 14:36:09.000000000 +0000
@@ -30,7 +30,7 @@
 
 uint32_t
 nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz,
-                                 boolean is_3d)
+                                 bool is_3d)
 {
    uint32_t tile_mode = 0x000;
 
@@ -59,13 +59,13 @@
 }
 
 static uint32_t
-nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, boolean is_3d)
+nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d)
 {
    return nv50_tex_choose_tile_dims_helper(nx, ny * 2, nz, is_3d);
 }
 
 static uint32_t
-nv50_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed)
+nv50_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
 {
    const unsigned ms = util_logbase2(mt->base.base.nr_samples);
    uint32_t tile_flags;
@@ -184,7 +184,7 @@
    unsigned stride;
 
    if (!mt || !mt->base.bo)
-      return FALSE;
+      return false;
 
    stride = mt->level[0].pitch;
 
@@ -204,7 +204,7 @@
    u_default_transfer_inline_write  /* transfer_inline_write */
 };
 
-static INLINE boolean
+static inline bool
 nv50_miptree_init_ms_mode(struct nv50_miptree *mt)
 {
    switch (mt->base.base.nr_samples) {
@@ -228,12 +228,12 @@
       break;
    default:
       NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples);
-      return FALSE;
+      return false;
    }
-   return TRUE;
+   return true;
 }
 
-boolean
+bool
 nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align)
 {
    struct pipe_resource *pt = &mt->base.base;
@@ -241,12 +241,12 @@
    unsigned h = pt->height0;
 
    if (util_format_is_depth_or_stencil(pt->format))
-      return FALSE;
+      return false;
 
    if ((pt->last_level > 0) || (pt->depth0 > 1) || (pt->array_size > 1))
-      return FALSE;
+      return false;
    if (mt->ms_x | mt->ms_y)
-      return FALSE;
+      return false;
 
    mt->level[0].pitch = align(pt->width0 * blocksize, pitch_align);
 
@@ -256,7 +256,7 @@
 
    mt->total_size = mt->level[0].pitch * h;
 
-   return TRUE;
+   return true;
 }
 
 static void
@@ -335,7 +335,7 @@
    struct nouveau_device *dev = nouveau_screen(pscreen)->device;
    struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
    struct pipe_resource *pt = &mt->base.base;
-   boolean compressed = dev->drm_version >= 0x01000101;
+   bool compressed = dev->drm_version >= 0x01000101;
    int ret;
    union nouveau_bo_config bo_config;
    uint32_t bo_flags;
@@ -377,7 +377,7 @@
    if (!bo_config.nv50.memtype && (pt->bind & PIPE_BIND_SHARED))
       mt->base.domain = NOUVEAU_BO_GART;
    else
-      mt->base.domain = NOUVEAU_BO_VRAM;
+      mt->base.domain = NV_VRAM_DOMAIN(nouveau_screen(pscreen));
 
    bo_flags = mt->base.domain | NOUVEAU_BO_NOSNOOP;
    if (mt->base.base.bind & (PIPE_BIND_CURSOR | PIPE_BIND_DISPLAY_TARGET))
@@ -438,7 +438,7 @@
 
 
 /* Offset of zslice @z from start of level @l. */
-INLINE unsigned
+inline unsigned
 nv50_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z)
 {
    const struct pipe_resource *pt = &mt->base.base;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_program.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_program.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_program.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_program.c	2015-09-16 14:36:09.000000000 +0000
@@ -25,7 +25,7 @@
 
 #include "codegen/nv50_ir_driver.h"
 
-static INLINE unsigned
+static inline unsigned
 bitcount4(const uint32_t val)
 {
    static const uint8_t cnt[16]
@@ -66,6 +66,7 @@
       case TGSI_SEMANTIC_VERTEXID:
          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
          prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
+         prog->vp.vertexid = 1;
          continue;
       default:
          break;
@@ -104,7 +105,7 @@
          prog->vp.bfc[info->out[i].si] = i;
          break;
       case TGSI_SEMANTIC_LAYER:
-         prog->gp.has_layer = TRUE;
+         prog->gp.has_layer = true;
          prog->gp.layerid = n;
          break;
       case TGSI_SEMANTIC_VIEWPORT_INDEX:
@@ -316,7 +317,7 @@
    return so;
 }
 
-boolean
+bool
 nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
 {
    struct nv50_ir_prog_info *info;
@@ -325,7 +326,7 @@
 
    info = CALLOC_STRUCT(nv50_ir_prog_info);
    if (!info)
-      return FALSE;
+      return false;
 
    info->type = prog->type;
    info->target = chipset;
@@ -410,7 +411,7 @@
    return !ret;
 }
 
-boolean
+bool
 nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
 {
    struct nouveau_heap *heap;
@@ -423,7 +424,7 @@
    case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
    default:
       assert(!"invalid program type");
-      return FALSE;
+      return false;
    }
 
    ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
@@ -440,7 +441,7 @@
       ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
       if (ret) {
          NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
-         return FALSE;
+         return false;
       }
    }
    prog->code_base = prog->mem->start;
@@ -448,10 +449,10 @@
    ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
    if (ret < 0) {
       nouveau_heap_free(&prog->mem);
-      return FALSE;
+      return false;
    }
    if (ret > 0)
-      nv50->state.new_tls_space = TRUE;
+      nv50->state.new_tls_space = true;
 
    if (prog->fixups)
       nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
@@ -463,7 +464,7 @@
    BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
    PUSH_DATA (nv50->base.pushbuf, 0);
 
-   return TRUE;
+   return true;
 }
 
 void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_program.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_program.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_program.h	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_program.h	2015-09-16 14:36:09.000000000 +0000
@@ -53,7 +53,7 @@
    struct pipe_shader_state pipe;
 
    ubyte type;
-   boolean translated;
+   bool translated;
 
    uint32_t *code;
    unsigned code_size;
@@ -76,6 +76,7 @@
       ubyte psiz;        /* output slot of point size */
       ubyte bfc[2];      /* indices into varying for FFC (FP) or BFC (VP) */
       ubyte edgeflag;
+      ubyte vertexid;
       ubyte clpd[2];     /* output slot of clip distance[i]'s 1st component */
       ubyte clpd_nr;
    } vp;
@@ -104,8 +105,8 @@
    struct nv50_stream_output_state *so;
 };
 
-boolean nv50_program_translate(struct nv50_program *, uint16_t chipset);
-boolean nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
+bool nv50_program_translate(struct nv50_program *, uint16_t chipset);
+bool nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
 void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
 
 #endif /* __NV50_PROG_H__ */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_push.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_push.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_push.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_push.c	2015-09-16 14:36:09.000000000 +0000
@@ -23,13 +23,13 @@
 
    struct translate *translate;
 
-   boolean primitive_restart;
+   bool primitive_restart;
    uint32_t prim;
    uint32_t restart_index;
    uint32_t instance_id;
 };
 
-static INLINE unsigned
+static inline unsigned
 prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
 {
    unsigned i;
@@ -39,7 +39,7 @@
    return i;
 }
 
-static INLINE unsigned
+static inline unsigned
 prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
 {
    unsigned i;
@@ -49,7 +49,7 @@
    return i;
 }
 
-static INLINE unsigned
+static inline unsigned
 prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
 {
    unsigned i;
@@ -179,7 +179,7 @@
 #define NV50_PRIM_GL_CASE(n) \
    case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
 
-static INLINE unsigned
+static inline unsigned
 nv50_prim_gl(unsigned prim)
 {
    switch (prim) {
@@ -212,7 +212,7 @@
    unsigned i, index_size;
    unsigned inst_count = info->instance_count;
    unsigned vert_count = info->count;
-   boolean apply_bias = info->indexed && info->index_bias;
+   bool apply_bias = info->indexed && info->index_bias;
 
    ctx.push = nv50->base.pushbuf;
    ctx.translate = nv50->vertex->translate;
@@ -258,12 +258,12 @@
             NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
             return;
          }
-         pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
+         pipe->get_query_result(pipe, targ->pq, true, (void *)&vert_count);
          vert_count /= targ->stride;
       }
       ctx.idxbuf = NULL;
       index_size = 0;
-      ctx.primitive_restart = FALSE;
+      ctx.primitive_restart = false;
       ctx.restart_index = 0;
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_query.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_query.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_query.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_query.c	2015-09-16 14:36:09.000000000 +0000
@@ -27,6 +27,11 @@
 #include "nv50/nv50_context.h"
 #include "nv_object.xml.h"
 
+#define NV50_QUERY_STATE_READY   0
+#define NV50_QUERY_STATE_ACTIVE  1
+#define NV50_QUERY_STATE_ENDED   2
+#define NV50_QUERY_STATE_FLUSHED 3
+
 /* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
  * (since we use only a single GPU channel per screen) will not work properly.
  *
@@ -42,21 +47,22 @@
    struct nouveau_bo *bo;
    uint32_t base;
    uint32_t offset; /* base + i * 32 */
-   boolean ready;
-   boolean flushed;
-   boolean is64bit;
+   uint8_t state;
+   bool is64bit;
+   int nesting; /* only used for occlusion queries */
    struct nouveau_mm_allocation *mm;
+   struct nouveau_fence *fence;
 };
 
 #define NV50_QUERY_ALLOC_SPACE 256
 
-static INLINE struct nv50_query *
+static inline struct nv50_query *
 nv50_query(struct pipe_query *pipe)
 {
    return (struct nv50_query *)pipe;
 }
 
-static boolean
+static bool
 nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
 {
    struct nv50_screen *screen = nv50->screen;
@@ -65,7 +71,7 @@
    if (q->bo) {
       nouveau_bo_ref(NULL, &q->bo);
       if (q->mm) {
-         if (q->ready)
+         if (q->state == NV50_QUERY_STATE_READY)
             nouveau_mm_free(q->mm);
          else
             nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work,
@@ -75,23 +81,24 @@
    if (size) {
       q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
       if (!q->bo)
-         return FALSE;
+         return false;
       q->offset = q->base;
 
       ret = nouveau_bo_map(q->bo, 0, screen->base.client);
       if (ret) {
          nv50_query_allocate(nv50, q, 0);
-         return FALSE;
+         return false;
       }
       q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
    }
-   return TRUE;
+   return true;
 }
 
 static void
 nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
 {
    nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
+   nouveau_fence_ref(NULL, &nv50_query(pq)->fence);
    FREE(nv50_query(pq));
 }
 
@@ -112,7 +119,8 @@
 
    q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
                  type == PIPE_QUERY_PRIMITIVES_EMITTED ||
-                 type == PIPE_QUERY_SO_STATISTICS);
+                 type == PIPE_QUERY_SO_STATISTICS ||
+                 type == PIPE_QUERY_PIPELINE_STATISTICS);
    q->type = type;
 
    if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
@@ -146,8 +154,8 @@
    struct nv50_query *q = nv50_query(pq);
 
    /* For occlusion queries we have to change the storage, because a previous
-    * query might set the initial render conition to FALSE even *after* we re-
-    * initialized it to TRUE.
+    * query might set the initial render conition to false even *after* we re-
+    * initialized it to true.
     */
    if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
       q->offset += 32;
@@ -159,7 +167,7 @@
        *  query ?
        */
       q->data[0] = q->sequence; /* initialize sequence */
-      q->data[1] = 1; /* initial render condition = TRUE */
+      q->data[1] = 1; /* initial render condition = true */
       q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
       q->data[5] = 0;
    }
@@ -168,11 +176,16 @@
 
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
-      PUSH_SPACE(push, 4);
-      BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
-      PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
-      BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
-      PUSH_DATA (push, 1);
+      q->nesting = nv50->screen->num_occlusion_queries_active++;
+      if (q->nesting) {
+         nv50_query_get(push, q, 0x10, 0x0100f002);
+      } else {
+         PUSH_SPACE(push, 4);
+         BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
+         PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
+         BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+         PUSH_DATA (push, 1);
+      }
       break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
       nv50_query_get(push, q, 0x10, 0x06805002);
@@ -200,7 +213,7 @@
    default:
       break;
    }
-   q->ready = FALSE;
+   q->state = NV50_QUERY_STATE_ACTIVE;
    return true;
 }
 
@@ -211,12 +224,16 @@
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    struct nv50_query *q = nv50_query(pq);
 
+   q->state = NV50_QUERY_STATE_ENDED;
+
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
       nv50_query_get(push, q, 0, 0x0100f002);
-      PUSH_SPACE(push, 2);
-      BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
-      PUSH_DATA (push, 0);
+      if (--nv50->screen->num_occlusion_queries_active == 0) {
+         PUSH_SPACE(push, 2);
+         BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+         PUSH_DATA (push, 0);
+      }
       break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
       nv50_query_get(push, q, 0, 0x06805002);
@@ -249,23 +266,32 @@
       nv50_query_get(push, q, 0, 0x1000f010);
       break;
    case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+      q->sequence++;
       nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
       break;
    case PIPE_QUERY_TIMESTAMP_DISJOINT:
-      /* This query is not issued on GPU because disjoint is forced to FALSE */
-      q->ready = TRUE;
+      /* This query is not issued on GPU because disjoint is forced to false */
+      q->state = NV50_QUERY_STATE_READY;
       break;
    default:
       assert(0);
       break;
    }
-   q->ready = q->flushed = FALSE;
+
+   if (q->is64bit)
+      nouveau_fence_ref(nv50->screen->base.fence.current, &q->fence);
 }
 
-static INLINE boolean
-nv50_query_ready(struct nv50_query *q)
+static inline void
+nv50_query_update(struct nv50_query *q)
 {
-   return q->ready || (!q->is64bit && (q->data[0] == q->sequence));
+   if (q->is64bit) {
+      if (nouveau_fence_signalled(q->fence))
+         q->state = NV50_QUERY_STATE_READY;
+   } else {
+      if (q->data[0] == q->sequence)
+         q->state = NV50_QUERY_STATE_READY;
+   }
 }
 
 static boolean
@@ -276,32 +302,33 @@
    struct nv50_query *q = nv50_query(pq);
    uint64_t *res64 = (uint64_t *)result;
    uint32_t *res32 = (uint32_t *)result;
-   boolean *res8 = (boolean *)result;
+   uint8_t *res8 = (uint8_t *)result;
    uint64_t *data64 = (uint64_t *)q->data;
    int i;
 
-   if (!q->ready) /* update ? */
-      q->ready = nv50_query_ready(q);
-   if (!q->ready) {
+   if (q->state != NV50_QUERY_STATE_READY)
+      nv50_query_update(q);
+
+   if (q->state != NV50_QUERY_STATE_READY) {
       if (!wait) {
          /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
-         if (!q->flushed) {
-            q->flushed = TRUE;
+         if (q->state != NV50_QUERY_STATE_FLUSHED) {
+            q->state = NV50_QUERY_STATE_FLUSHED;
             PUSH_KICK(nv50->base.pushbuf);
          }
-         return FALSE;
+         return false;
       }
       if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
-         return FALSE;
+         return false;
    }
-   q->ready = TRUE;
+   q->state = NV50_QUERY_STATE_READY;
 
    switch (q->type) {
    case PIPE_QUERY_GPU_FINISHED:
-      res8[0] = TRUE;
+      res8[0] = true;
       break;
    case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
-      res64[0] = q->data[1];
+      res64[0] = q->data[1] - q->data[5];
       break;
    case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
    case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
@@ -320,7 +347,7 @@
       break;
    case PIPE_QUERY_TIMESTAMP_DISJOINT:
       res64[0] = 1000000000;
-      res8[8] = FALSE;
+      res8[8] = false;
       break;
    case PIPE_QUERY_TIME_ELAPSED:
       res64[0] = data64[1] - data64[3];
@@ -329,10 +356,10 @@
       res32[0] = q->data[1];
       break;
    default:
-      return FALSE;
+      return false;
    }
 
-   return TRUE;
+   return true;
 }
 
 void
@@ -359,7 +386,7 @@
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    struct nv50_query *q;
    uint32_t cond;
-   boolean wait =
+   bool wait =
       mode != PIPE_RENDER_COND_NO_WAIT &&
       mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
 
@@ -373,13 +400,12 @@
       case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
          cond = condition ? NV50_3D_COND_MODE_EQUAL :
                             NV50_3D_COND_MODE_NOT_EQUAL;
-         wait = TRUE;
+         wait = true;
          break;
       case PIPE_QUERY_OCCLUSION_COUNTER:
       case PIPE_QUERY_OCCLUSION_PREDICATE:
          if (likely(!condition)) {
-            /* XXX: Placeholder, handle nesting here if available */
-            if (unlikely(false))
+            if (unlikely(q->nesting))
                cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL :
                              NV50_3D_COND_MODE_ALWAYS;
             else
@@ -426,24 +452,24 @@
 }
 
 void
-nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
+nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
                           struct pipe_query *pq, unsigned result_offset)
 {
    struct nv50_query *q = nv50_query(pq);
 
-   /* XXX: does this exist ? */
-#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
+   nv50_query_update(q);
+   if (q->state != NV50_QUERY_STATE_READY)
+      nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client);
+   q->state = NV50_QUERY_STATE_READY;
 
-   PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
-   nouveau_pushbuf_space(push, 0, 0, 1);
-   nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
-                        NV50_IB_ENTRY_1_NO_PREFETCH);
+   BEGIN_NV04(push, SUBC_3D(method), 1);
+   PUSH_DATA (push, q->data[result_offset / 4]);
 }
 
 void
 nva0_so_target_save_offset(struct pipe_context *pipe,
                            struct pipe_stream_output_target *ptarg,
-                           unsigned index, boolean serialize)
+                           unsigned index, bool serialize)
 {
    struct nv50_so_target *targ = nv50_so_target(ptarg);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_resource.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_resource.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_resource.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_resource.h	2015-09-16 14:36:09.000000000 +0000
@@ -35,7 +35,7 @@
 
 uint32_t
 nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz,
-                                 boolean is_3d);
+                                 bool is_3d);
 
 struct nv50_miptree_level {
    uint32_t offset;
@@ -50,13 +50,13 @@
    struct nv50_miptree_level level[NV50_MAX_TEXTURE_LEVELS];
    uint32_t total_size;
    uint32_t layer_stride;
-   boolean layout_3d; /* TRUE if layer count varies with mip level */
+   bool layout_3d; /* true if layer count varies with mip level */
    uint8_t ms_x;      /* log2 of number of samples in x/y dimension */
    uint8_t ms_y;
    uint8_t ms_mode;
 };
 
-static INLINE struct nv50_miptree *
+static inline struct nv50_miptree *
 nv50_miptree(struct pipe_resource *pt)
 {
    return (struct nv50_miptree *)pt;
@@ -70,7 +70,7 @@
 
 /* Internal functions:
  */
-boolean
+bool
 nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align);
 
 struct pipe_resource *
@@ -98,13 +98,13 @@
    uint16_t depth;
 };
 
-static INLINE struct nv50_surface *
+static inline struct nv50_surface *
 nv50_surface(struct pipe_surface *ps)
 {
    return (struct nv50_surface *)ps;
 }
 
-static INLINE enum pipe_format
+static inline enum pipe_format
 nv50_zs_to_s_format(enum pipe_format format)
 {
    switch (format) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -51,19 +51,19 @@
                                 unsigned bindings)
 {
    if (sample_count > 8)
-      return FALSE;
+      return false;
    if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
-      return FALSE;
+      return false;
    if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
-      return FALSE;
+      return false;
 
    if (!util_format_is_supported(format, bindings))
-      return FALSE;
+      return false;
 
    switch (format) {
    case PIPE_FORMAT_Z16_UNORM:
       if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
-         return FALSE;
+         return false;
       break;
    default:
       break;
@@ -176,6 +176,9 @@
    case PIPE_CAP_CLIP_HALFZ:
    case PIPE_CAP_POLYGON_OFFSET_CLAMP:
    case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+   case PIPE_CAP_DEPTH_BOUNDS_TEST:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
       return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -210,6 +213,7 @@
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -286,11 +290,12 @@
    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
       /* The chip could handle more sampler views than samplers */
    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
-      return MIN2(32, PIPE_MAX_SAMPLERS);
+      return MIN2(16, PIPE_MAX_SAMPLERS);
    case PIPE_SHADER_CAP_DOUBLES:
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       return 0;
    default:
       NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
@@ -453,7 +458,7 @@
    BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);
    PUSH_DATA (push, 0xf);
 
-   if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
+   if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) {
       BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);
       PUSH_DATA (push, 0x18);
    }
@@ -733,7 +738,7 @@
    nv50_screen_init_resource_functions(pscreen);
 
    if (screen->base.device->chipset < 0x84 ||
-       debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) {
+       debug_get_bool_option("NOUVEAU_PMPEG", false)) {
       /* PMPEG */
       nouveau_screen_init_vdec(&screen->base);
    } else if (screen->base.device->chipset < 0x98 ||
@@ -889,7 +894,7 @@
 
    nv50_screen_init_hwctx(screen);
 
-   nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+   nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
 
    return pscreen;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_screen.h	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -32,14 +32,14 @@
    uint32_t semantic_color;
    uint32_t semantic_psize;
    int32_t index_bias;
-   boolean uniform_buffer_bound[3];
-   boolean prim_restart;
-   boolean point_sprite;
-   boolean rt_serialize;
-   boolean flushed;
-   boolean rasterizer_discard;
+   bool uniform_buffer_bound[3];
+   bool prim_restart;
+   bool point_sprite;
+   bool rt_serialize;
+   bool flushed;
+   bool rasterizer_discard;
    uint8_t tls_required;
-   boolean new_tls_space;
+   bool new_tls_space;
    uint8_t num_vtxbufs;
    uint8_t num_vtxelts;
    uint8_t num_textures[3];
@@ -54,6 +54,8 @@
    struct nv50_context *cur_ctx;
    struct nv50_graph_state save_state;
 
+   int num_occlusion_queries_active;
+
    struct nouveau_bo *code;
    struct nouveau_bo *uniforms;
    struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
@@ -95,19 +97,19 @@
    struct nouveau_object *m2mf;
 };
 
-static INLINE struct nv50_screen *
+static inline struct nv50_screen *
 nv50_screen(struct pipe_screen *screen)
 {
    return (struct nv50_screen *)screen;
 }
 
-boolean nv50_blitter_create(struct nv50_screen *);
+bool nv50_blitter_create(struct nv50_screen *);
 void nv50_blitter_destroy(struct nv50_screen *);
 
 int nv50_screen_tic_alloc(struct nv50_screen *, void *);
 int nv50_screen_tsc_alloc(struct nv50_screen *, void *);
 
-static INLINE void
+static inline void
 nv50_resource_fence(struct nv04_resource *res, uint32_t flags)
 {
    struct nv50_screen *screen = nv50_screen(res->base.screen);
@@ -119,7 +121,7 @@
    }
 }
 
-static INLINE void
+static inline void
 nv50_resource_validate(struct nv04_resource *res, uint32_t flags)
 {
    if (likely(res->bo)) {
@@ -142,21 +144,21 @@
 
 extern const struct nv50_format nv50_format_table[];
 
-static INLINE void
+static inline void
 nv50_screen_tic_unlock(struct nv50_screen *screen, struct nv50_tic_entry *tic)
 {
    if (tic->id >= 0)
       screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
 }
 
-static INLINE void
+static inline void
 nv50_screen_tsc_unlock(struct nv50_screen *screen, struct nv50_tsc_entry *tsc)
 {
    if (tsc->id >= 0)
       screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
 }
 
-static INLINE void
+static inline void
 nv50_screen_tic_free(struct nv50_screen *screen, struct nv50_tic_entry *tic)
 {
    if (tic->id >= 0) {
@@ -165,7 +167,7 @@
    }
 }
 
-static INLINE void
+static inline void
 nv50_screen_tsc_free(struct nv50_screen *screen, struct nv50_tsc_entry *tsc)
 {
    if (tsc->id >= 0) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -60,7 +60,7 @@
                continue;
             }
             if (!nv50->state.uniform_buffer_bound[s]) {
-               nv50->state.uniform_buffer_bound[s] = TRUE;
+               nv50->state.uniform_buffer_bound[s] = true;
                BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
                PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
             }
@@ -99,33 +99,35 @@
                PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
 
                BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD);
+
+               nv50->cb_dirty = 1; /* Force cache flush for UBO. */
             } else {
                BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
                PUSH_DATA (push, (i << 8) | p | 0);
             }
             if (i == 0)
-               nv50->state.uniform_buffer_bound[s] = FALSE;
+               nv50->state.uniform_buffer_bound[s] = false;
          }
       }
    }
 }
 
-static boolean
+static bool
 nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
 {
    if (!prog->translated) {
       prog->translated = nv50_program_translate(
          prog, nv50->screen->base.device->chipset);
       if (!prog->translated)
-         return FALSE;
+         return false;
    } else
    if (prog->mem)
-      return TRUE;
+      return true;
 
    return nv50_program_upload_code(nv50, prog);
 }
 
-static INLINE void
+static inline void
 nv50_program_update_context_state(struct nv50_context *nv50,
                                   struct nv50_program *prog, int stage)
 {
@@ -136,7 +138,7 @@
          nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
       if (!nv50->state.tls_required || nv50->state.new_tls_space)
          BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo);
-      nv50->state.new_tls_space = FALSE;
+      nv50->state.new_tls_space = false;
       nv50->state.tls_required |= 1 << stage;
    } else {
       if (nv50->state.tls_required == (1 << stage))
@@ -243,11 +245,11 @@
          for (i = 0; i < 8; ++i)
             PUSH_DATA(push, 0);
 
-         nv50->state.point_sprite = FALSE;
+         nv50->state.point_sprite = false;
       }
       return;
    } else {
-      nv50->state.point_sprite = TRUE;
+      nv50->state.point_sprite = true;
    }
 
    memset(pntc, 0, sizeof(pntc));
@@ -639,20 +641,21 @@
       PUSH_DATA (push, so->num_attribs[i]);
       if (n == 4) {
          PUSH_DATA(push, targ->pipe.buffer_size);
-
-         BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
          if (!targ->clean) {
             assert(targ->pq);
-            nv50_query_pushbuf_submit(push, targ->pq, 0x4);
+            nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
+                                      targ->pq, 0x4);
          } else {
+            BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
             PUSH_DATA(push, 0);
-            targ->clean = FALSE;
+            targ->clean = false;
          }
       } else {
          const unsigned limit = targ->pipe.buffer_size /
             (so->stride[i] * nv50->state.prim_size);
          prims = MIN2(prims, limit);
       }
+      targ->stride = so->stride[i];
       BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
    }
    if (prims != ~0) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_state.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -62,7 +62,7 @@
  *     in advance to maintain elegant separate shader objects.)
  */
 
-static INLINE uint32_t
+static inline uint32_t
 nv50_colormask(unsigned mask)
 {
    uint32_t ret = 0;
@@ -82,7 +82,7 @@
 #define NV50_BLEND_FACTOR_CASE(a, b) \
    case PIPE_BLENDFACTOR_##a: return NV50_BLEND_FACTOR_##b
 
-static INLINE uint32_t
+static inline uint32_t
 nv50_blend_fac(unsigned factor)
 {
    switch (factor) {
@@ -116,8 +116,7 @@
 {
    struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj);
    int i;
-   boolean emit_common_func = cso->rt[0].blend_enable;
-   uint32_t ms;
+   bool emit_common_func = cso->rt[0].blend_enable;
 
    if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
       SB_BEGIN_3D(so, BLEND_INDEPENDENT, 1);
@@ -137,11 +136,11 @@
       for (i = 0; i < 8; ++i) {
          SB_DATA(so, cso->rt[i].blend_enable);
          if (cso->rt[i].blend_enable)
-            emit_common_func = TRUE;
+            emit_common_func = true;
       }
 
       if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
-         emit_common_func = FALSE;
+         emit_common_func = false;
 
          for (i = 0; i < 8; ++i) {
             if (!cso->rt[i].blend_enable)
@@ -189,15 +188,6 @@
       SB_DATA    (so, nv50_colormask(cso->rt[0].colormask));
    }
 
-   ms = 0;
-   if (cso->alpha_to_coverage)
-      ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
-   if (cso->alpha_to_one)
-      ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
-
-   SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1);
-   SB_DATA    (so, ms);
-
    assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
    return so;
 }
@@ -373,6 +363,16 @@
       SB_DATA    (so, 0);
    }
 
+   SB_BEGIN_3D(so, DEPTH_BOUNDS_EN, 1);
+   if (cso->depth.bounds_test) {
+      SB_DATA    (so, 1);
+      SB_BEGIN_3D(so, DEPTH_BOUNDS(0), 2);
+      SB_DATA    (so, fui(cso->depth.bounds_min));
+      SB_DATA    (so, fui(cso->depth.bounds_max));
+   } else {
+      SB_DATA    (so, 0);
+   }
+
    if (cso->stencil[0].enabled) {
       SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
       SB_DATA    (so, 1);
@@ -439,7 +439,7 @@
 #define NV50_TSC_WRAP_CASE(n) \
     case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
 
-static INLINE unsigned
+static inline unsigned
 nv50_tsc_wrap_mode(unsigned wrap)
 {
    switch (wrap) {
@@ -572,7 +572,7 @@
    FREE(hwcso);
 }
 
-static INLINE void
+static inline void
 nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
                                unsigned nr, void **hwcso)
 {
@@ -650,7 +650,7 @@
    FREE(nv50_tic_entry(view));
 }
 
-static INLINE void
+static inline void
 nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
                              unsigned nr,
                              struct pipe_sampler_view **views)
@@ -808,7 +808,7 @@
 
    pipe_resource_reference(&nv50->constbuf[s][i].u.buf, res);
 
-   nv50->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE;
+   nv50->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false;
    if (nv50->constbuf[s][i].user) {
       nv50->constbuf[s][i].u.data = cb->user_buffer;
       nv50->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
@@ -1041,7 +1041,7 @@
    } else {
       targ->pq = NULL;
    }
-   targ->clean = TRUE;
+   targ->clean = true;
 
    targ->pipe.buffer_size = size;
    targ->pipe.buffer_offset = offset;
@@ -1075,32 +1075,32 @@
 {
    struct nv50_context *nv50 = nv50_context(pipe);
    unsigned i;
-   boolean serialize = TRUE;
-   const boolean can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS;
+   bool serialize = true;
+   const bool can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS;
 
    assert(num_targets <= 4);
 
    for (i = 0; i < num_targets; ++i) {
-      const boolean changed = nv50->so_target[i] != targets[i];
-      const boolean append = (offsets[i] == (unsigned)-1);
+      const bool changed = nv50->so_target[i] != targets[i];
+      const bool append = (offsets[i] == (unsigned)-1);
       if (!changed && append)
          continue;
       nv50->so_targets_dirty |= 1 << i;
 
       if (can_resume && changed && nv50->so_target[i]) {
          nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
-         serialize = FALSE;
+         serialize = false;
       }
 
       if (targets[i] && !append)
-         nv50_so_target(targets[i])->clean = TRUE;
+         nv50_so_target(targets[i])->clean = true;
 
       pipe_so_target_reference(&nv50->so_target[i], targets[i]);
    }
    for (; i < nv50->num_so_targets; ++i) {
       if (can_resume && nv50->so_target[i]) {
          nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
-         serialize = FALSE;
+         serialize = false;
       }
       pipe_so_target_reference(&nv50->so_target[i], NULL);
       nv50->so_targets_dirty |= 1 << i;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h	2015-09-16 14:36:09.000000000 +0000
@@ -19,7 +19,7 @@
 struct nv50_blend_stateobj {
    struct pipe_blend_state pipe;
    int size;
-   uint32_t state[84]; // TODO: allocate less if !independent_blend_enable
+   uint32_t state[82]; // TODO: allocate less if !independent_blend_enable
 };
 
 struct nv50_rasterizer_stateobj {
@@ -31,7 +31,7 @@
 struct nv50_zsa_stateobj {
    struct pipe_depth_stencil_alpha_state pipe;
    int size;
-   uint32_t state[29];
+   uint32_t state[34];
 };
 
 struct nv50_constbuf {
@@ -41,7 +41,7 @@
    } u;
    uint32_t size; /* max 65536 */
    uint32_t offset;
-   boolean user; /* should only be TRUE if u.data is valid and non-NULL */
+   bool user; /* should only be true if u.data is valid and non-NULL */
 };
 
 struct nv50_vertex_element {
@@ -56,7 +56,7 @@
    unsigned num_elements;
    uint32_t instance_elts;
    uint32_t instance_bufs;
-   boolean need_conversion;
+   bool need_conversion;
    unsigned vertex_size;
    unsigned packet_vertex_limit;
    struct nv50_vertex_element element[0];
@@ -66,10 +66,10 @@
    struct pipe_stream_output_target pipe;
    struct pipe_query *pq;
    unsigned stride;
-   boolean clean;
+   bool clean;
 };
 
-static INLINE struct nv50_so_target *
+static inline struct nv50_so_target *
 nv50_so_target(struct pipe_stream_output_target *ptarg)
 {
    return (struct nv50_so_target *)ptarg;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h	2015-09-16 14:36:09.000000000 +0000
@@ -9,7 +9,7 @@
    uint32_t tsc[8];
 };
 
-static INLINE struct nv50_tsc_entry *
+static inline struct nv50_tsc_entry *
 nv50_tsc_entry(void *hwcso)
 {
    return (struct nv50_tsc_entry *)hwcso;
@@ -21,7 +21,7 @@
    uint32_t tic[8];
 };
 
-static INLINE struct nv50_tic_entry *
+static inline struct nv50_tic_entry *
 nv50_tic_entry(struct pipe_sampler_view *view)
 {
    return (struct nv50_tic_entry *)view;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c	2015-09-16 14:36:09.000000000 +0000
@@ -1,8 +1,10 @@
 
+#include "util/u_format.h"
+
 #include "nv50/nv50_context.h"
 #include "nv50/nv50_defs.xml.h"
 
-static INLINE void
+static inline void
 nv50_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i)
 {
    BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(i)), 4);
@@ -82,7 +84,7 @@
       ms_mode = mt->ms_mode;
 
       if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
-         nv50->state.rt_serialize = TRUE;
+         nv50->state.rt_serialize = true;
       mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
       mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
 
@@ -111,7 +113,7 @@
       ms_mode = mt->ms_mode;
 
       if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
-         nv50->state.rt_serialize = TRUE;
+         nv50->state.rt_serialize = true;
       mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
       mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
 
@@ -275,7 +277,7 @@
    nv50->viewports_dirty = 0;
 }
 
-static INLINE void
+static inline void
 nv50_check_program_ucps(struct nv50_context *nv50,
                         struct nv50_program *vp, uint8_t mask)
 {
@@ -314,6 +316,25 @@
 }
 
 static void
+nv50_validate_derived_3(struct nv50_context *nv50)
+{
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+   uint32_t ms = 0;
+
+   if ((!fb->nr_cbufs || !fb->cbufs[0] ||
+        !util_format_is_pure_integer(fb->cbufs[0]->format)) && nv50->blend) {
+      if (nv50->blend->pipe.alpha_to_coverage)
+         ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
+      if (nv50->blend->pipe.alpha_to_one)
+         ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
+   }
+
+   BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
+   PUSH_DATA (push, ms);
+}
+
+static void
 nv50_validate_clip(struct nv50_context *nv50)
 {
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
@@ -474,6 +495,7 @@
     { nv50_validate_derived_rs,    NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
                                    NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
     { nv50_validate_derived_2,     NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER },
+    { nv50_validate_derived_3,     NV50_NEW_BLEND | NV50_NEW_FRAMEBUFFER },
     { nv50_validate_clip,          NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
                                    NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
     { nv50_constbufs_validate,     NV50_NEW_CONSTBUF },
@@ -481,12 +503,13 @@
     { nv50_validate_samplers,      NV50_NEW_SAMPLERS },
     { nv50_stream_output_validate, NV50_NEW_STRMOUT |
                                    NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
-    { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
+    { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
+                                   NV50_NEW_VERTPROG },
     { nv50_validate_min_samples,   NV50_NEW_MIN_SAMPLES },
 };
 #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
 
-boolean
+bool
 nv50_state_validate(struct nv50_context *nv50, uint32_t mask, unsigned words)
 {
    uint32_t state_mask;
@@ -508,19 +531,19 @@
       nv50->dirty &= ~state_mask;
 
       if (nv50->state.rt_serialize) {
-         nv50->state.rt_serialize = FALSE;
+         nv50->state.rt_serialize = false;
          BEGIN_NV04(nv50->base.pushbuf, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
          PUSH_DATA (nv50->base.pushbuf, 0);
       }
 
-      nv50_bufctx_fence(nv50->bufctx_3d, FALSE);
+      nv50_bufctx_fence(nv50->bufctx_3d, false);
    }
    nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_3d);
    ret = nouveau_pushbuf_validate(nv50->base.pushbuf);
 
    if (unlikely(nv50->state.flushed)) {
-      nv50->state.flushed = FALSE;
-      nv50_bufctx_fence(nv50->bufctx_3d, TRUE);
+      nv50->state.flushed = false;
+      nv50_bufctx_fence(nv50->bufctx_3d, true);
    }
    return !ret;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_surface.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_surface.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_surface.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_surface.c	2015-09-16 14:36:09.000000000 +0000
@@ -49,8 +49,8 @@
 #define NOUVEAU_DRIVER 0x50
 #include "nv50/nv50_blit.h"
 
-static INLINE uint8_t
-nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
+static inline uint8_t
+nv50_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
 {
    uint8_t id = nv50_format_table[format].rt;
 
@@ -68,6 +68,10 @@
       return NV50_SURFACE_FORMAT_R16_UNORM;
    case 4:
       return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+   case 8:
+      return NV50_SURFACE_FORMAT_RGBA16_FLOAT;
+   case 16:
+      return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
    default:
       return 0;
    }
@@ -76,7 +80,7 @@
 static int
 nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst,
                     struct nv50_miptree *mt, unsigned level, unsigned layer,
-                    enum pipe_format pformat, boolean dst_src_pformat_equal)
+                    enum pipe_format pformat, bool dst_src_pformat_equal)
 {
    struct nouveau_bo *bo = mt->base.bo;
    uint32_t width, height, depth;
@@ -153,7 +157,7 @@
    const enum pipe_format dfmt = dst->base.base.format;
    const enum pipe_format sfmt = src->base.base.format;
    int ret;
-   boolean eqfmt = dfmt == sfmt;
+   bool eqfmt = dfmt == sfmt;
 
    if (!PUSH_SPACE(push, 2 * 16 + 32))
       return PIPE_ERROR;
@@ -196,7 +200,7 @@
 {
    struct nv50_context *nv50 = nv50_context(pipe);
    int ret;
-   boolean m2mf;
+   bool m2mf;
    unsigned dst_layer = dstz, src_layer = src_box->z;
 
    if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
@@ -658,7 +662,7 @@
    };
 
    blit->vp.type = PIPE_SHADER_VERTEX;
-   blit->vp.translated = TRUE;
+   blit->vp.translated = true;
    blit->vp.code = (uint32_t *)code; /* const_cast */
    blit->vp.code_size = sizeof(code);
    blit->vp.max_gpr = 4;
@@ -687,24 +691,24 @@
 
    const unsigned target = nv50_blit_get_tgsi_texture_target(ptarg);
 
-   boolean tex_rgbaz = FALSE;
-   boolean tex_s = FALSE;
-   boolean cvt_un8 = FALSE;
+   bool tex_rgbaz = false;
+   bool tex_s = false;
+   bool cvt_un8 = false;
 
    if (mode != NV50_BLIT_MODE_PASS &&
        mode != NV50_BLIT_MODE_Z24X8 &&
        mode != NV50_BLIT_MODE_X8Z24)
-      tex_s = TRUE;
+      tex_s = true;
 
    if (mode != NV50_BLIT_MODE_X24S8 &&
        mode != NV50_BLIT_MODE_S8X24 &&
        mode != NV50_BLIT_MODE_XS)
-      tex_rgbaz = TRUE;
+      tex_rgbaz = true;
 
    if (mode != NV50_BLIT_MODE_PASS &&
        mode != NV50_BLIT_MODE_ZS &&
        mode != NV50_BLIT_MODE_XS)
-      cvt_un8 = TRUE;
+      cvt_un8 = true;
 
    ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!ureg)
@@ -1003,6 +1007,8 @@
    /* zsa state */
    BEGIN_NV04(push, NV50_3D(DEPTH_TEST_ENABLE), 1);
    PUSH_DATA (push, 0);
+   BEGIN_NV04(push, NV50_3D(DEPTH_BOUNDS_EN), 1);
+   PUSH_DATA (push, 0);
    BEGIN_NV04(push, NV50_3D(STENCIL_ENABLE), 1);
    PUSH_DATA (push, 0);
    BEGIN_NV04(push, NV50_3D(ALPHA_TEST_ENABLE), 1);
@@ -1271,7 +1277,7 @@
    int i;
    uint32_t mode;
    uint32_t mask = nv50_blit_eng2d_get_mask(info);
-   boolean b;
+   bool b;
 
    mode = nv50_blit_get_filter(info) ?
       NV50_2D_BLIT_CONTROL_FILTER_BILINEAR :
@@ -1416,7 +1422,7 @@
          PUSH_DATA (push, srcy >> 32);
       }
    }
-   nv50_bufctx_fence(nv50->bufctx, FALSE);
+   nv50_bufctx_fence(nv50->bufctx, false);
 
    nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D);
 
@@ -1438,71 +1444,82 @@
 nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
 {
    struct nv50_context *nv50 = nv50_context(pipe);
-   boolean eng3d = FALSE;
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   bool eng3d = FALSE;
 
    if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
       if (!(info->mask & PIPE_MASK_ZS))
          return;
       if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT ||
           info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
-         eng3d = TRUE;
+         eng3d = true;
       if (info->filter != PIPE_TEX_FILTER_NEAREST)
-         eng3d = TRUE;
+         eng3d = true;
    } else {
       if (!(info->mask & PIPE_MASK_RGBA))
          return;
       if (info->mask != PIPE_MASK_RGBA)
-         eng3d = TRUE;
+         eng3d = true;
    }
 
    if (nv50_miptree(info->src.resource)->layout_3d) {
-      eng3d = TRUE;
+      eng3d = true;
    } else
    if (info->src.box.depth != info->dst.box.depth) {
-      eng3d = TRUE;
+      eng3d = true;
       debug_printf("blit: cannot filter array or cube textures in z direction");
    }
 
    if (!eng3d && info->dst.format != info->src.format) {
       if (!nv50_2d_dst_format_faithful(info->dst.format) ||
           !nv50_2d_src_format_faithful(info->src.format)) {
-         eng3d = TRUE;
+         eng3d = true;
       } else
       if (!nv50_2d_src_format_faithful(info->src.format)) {
          if (!util_format_is_luminance(info->src.format)) {
             if (util_format_is_intensity(info->src.format))
-               eng3d = TRUE;
+               eng3d = true;
             else
             if (!nv50_2d_dst_format_ops_supported(info->dst.format))
-               eng3d = TRUE;
+               eng3d = true;
             else
                eng3d = !nv50_2d_format_supported(info->src.format);
          }
       } else
       if (util_format_is_luminance_alpha(info->src.format))
-         eng3d = TRUE;
+         eng3d = true;
    }
 
    if (info->src.resource->nr_samples == 8 &&
        info->dst.resource->nr_samples <= 1)
-      eng3d = TRUE;
+      eng3d = true;
 
    /* FIXME: can't make this work with eng2d anymore */
    if ((info->src.resource->nr_samples | 1) !=
        (info->dst.resource->nr_samples | 1))
-      eng3d = TRUE;
+      eng3d = true;
 
    /* FIXME: find correct src coordinate adjustments */
    if ((info->src.box.width !=  info->dst.box.width &&
         info->src.box.width != -info->dst.box.width) ||
        (info->src.box.height !=  info->dst.box.height &&
         info->src.box.height != -info->dst.box.height))
-      eng3d = TRUE;
+      eng3d = true;
+
+   if (nv50->screen->num_occlusion_queries_active) {
+      BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+      PUSH_DATA (push, 0);
+   }
 
    if (!eng3d)
       nv50_blit_eng2d(nv50, info);
    else
       nv50_blit_3d(nv50, info);
+
+   if (nv50->screen->num_occlusion_queries_active) {
+      BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+      PUSH_DATA (push, 1);
+   }
 }
 
 static void
@@ -1511,13 +1528,13 @@
 {
 }
 
-boolean
+bool
 nv50_blitter_create(struct nv50_screen *screen)
 {
    screen->blitter = CALLOC_STRUCT(nv50_blitter);
    if (!screen->blitter) {
       NOUVEAU_ERR("failed to allocate blitter struct\n");
-      return FALSE;
+      return false;
    }
 
    pipe_mutex_init(screen->blitter->mutex);
@@ -1525,7 +1542,7 @@
    nv50_blitter_make_vp(screen->blitter);
    nv50_blitter_make_sampler(screen->blitter);
 
-   return TRUE;
+   return true;
 }
 
 void
@@ -1548,20 +1565,20 @@
    FREE(blitter);
 }
 
-boolean
+bool
 nv50_blitctx_create(struct nv50_context *nv50)
 {
    nv50->blit = CALLOC_STRUCT(nv50_blitctx);
    if (!nv50->blit) {
       NOUVEAU_ERR("failed to allocate blit context\n");
-      return FALSE;
+      return false;
    }
 
    nv50->blit->nv50 = nv50;
 
    nv50->blit->rast.pipe.half_pixel_center = 1;
 
-   return TRUE;
+   return true;
 }
 
 void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_tex.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_tex.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_tex.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_tex.c	2015-09-16 14:36:09.000000000 +0000
@@ -31,8 +31,8 @@
    (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK |   \
     NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
 
-static INLINE uint32_t
-nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int)
+static inline uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int)
 {
    switch (swz) {
    case PIPE_SWIZZLE_RED:
@@ -79,7 +79,7 @@
    uint32_t depth;
    struct nv50_tic_entry *view;
    struct nv50_miptree *mt = nv50_miptree(texture);
-   boolean tex_int;
+   bool tex_int;
 
    view = MALLOC_STRUCT(nv50_tic_entry);
    if (!view)
@@ -193,7 +193,7 @@
       break;
    default:
       NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target);
-      return FALSE;
+      return false;
    }
 
    tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
@@ -221,13 +221,13 @@
    return &view->pipe;
 }
 
-static boolean
+static bool
 nv50_validate_tic(struct nv50_context *nv50, int s)
 {
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    struct nouveau_bo *txc = nv50->screen->txc;
    unsigned i;
-   boolean need_flush = FALSE;
+   bool need_flush = false;
 
    assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
    for (i = 0; i < nv50->num_textures[s]; ++i) {
@@ -270,7 +270,7 @@
          BEGIN_NI04(push, NV50_2D(SIFC_DATA), 8);
          PUSH_DATAp(push, &tic->tic[0], 8);
 
-         need_flush = TRUE;
+         need_flush = true;
       } else
       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
          BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
@@ -316,7 +316,7 @@
 
 void nv50_validate_textures(struct nv50_context *nv50)
 {
-   boolean need_flush;
+   bool need_flush;
 
    need_flush  = nv50_validate_tic(nv50, 0);
    need_flush |= nv50_validate_tic(nv50, 1);
@@ -328,12 +328,12 @@
    }
 }
 
-static boolean
+static bool
 nv50_validate_tsc(struct nv50_context *nv50, int s)
 {
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    unsigned i;
-   boolean need_flush = FALSE;
+   bool need_flush = false;
 
    assert(nv50->num_samplers[s] <= PIPE_MAX_SAMPLERS);
    for (i = 0; i < nv50->num_samplers[s]; ++i) {
@@ -350,7 +350,7 @@
          nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc,
                              65536 + tsc->id * 32,
                              NOUVEAU_BO_VRAM, 32, tsc->tsc);
-         need_flush = TRUE;
+         need_flush = true;
       }
       nv50->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
 
@@ -368,7 +368,7 @@
 
 void nv50_validate_samplers(struct nv50_context *nv50)
 {
-   boolean need_flush;
+   bool need_flush;
 
    need_flush  = nv50_validate_tsc(nv50, 0);
    need_flush |= nv50_validate_tsc(nv50, 1);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_vbo.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_vbo.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_vbo.c	2015-09-16 14:36:09.000000000 +0000
@@ -58,7 +58,7 @@
     so->num_elements = num_elements;
     so->instance_elts = 0;
     so->instance_bufs = 0;
-    so->need_conversion = FALSE;
+    so->need_conversion = false;
 
     memset(so->vb_access_size, 0, sizeof(so->vb_access_size));
 
@@ -89,7 +89,7 @@
                 return NULL;
             }
             so->element[i].state = nv50_format_table[fmt].vtx;
-            so->need_conversion = TRUE;
+            so->need_conversion = true;
         }
         so->element[i].state |= i;
 
@@ -188,7 +188,7 @@
    }
 }
 
-static INLINE void
+static inline void
 nv50_user_vbuf_range(struct nv50_context *nv50, unsigned vbi,
                      uint32_t *base, uint32_t *size)
 {
@@ -229,7 +229,7 @@
          BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, NOUVEAU_BO_GART |
                       NOUVEAU_BO_RD, bo);
    }
-   nv50->base.vbo_dirty = TRUE;
+   nv50->base.vbo_dirty = true;
 }
 
 static void
@@ -275,10 +275,10 @@
       PUSH_DATAh(push, address[b] + ve->src_offset);
       PUSH_DATA (push, address[b] + ve->src_offset);
    }
-   nv50->base.vbo_dirty = TRUE;
+   nv50->base.vbo_dirty = true;
 }
 
-static INLINE void
+static inline void
 nv50_release_user_vbufs(struct nv50_context *nv50)
 {
    if (nv50->vbo_user) {
@@ -293,7 +293,8 @@
    uint64_t addrs[PIPE_MAX_ATTRIBS];
    uint32_t limits[PIPE_MAX_ATTRIBS];
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
-   struct nv50_vertex_stateobj *vertex = nv50->vertex;
+   struct nv50_vertex_stateobj dummy = {};
+   struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy;
    struct pipe_vertex_buffer *vb;
    struct nv50_vertex_element *ve;
    uint32_t mask;
@@ -301,6 +302,14 @@
    unsigned i;
    const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);
 
+   /* A vertexid is not generated for inline data uploads. Have to use a
+    * VBO. This check must come after the vertprog has been validated,
+    * otherwise vertexid may be unset.
+    */
+   assert(nv50->vertprog->translated);
+   if (nv50->vertprog->vp.vertexid)
+      nv50->vbo_push_hint = 0;
+
    if (unlikely(vertex->need_conversion))
       nv50->vbo_fifo = ~0;
    else
@@ -316,8 +325,7 @@
          struct nv04_resource *buf = nv04_resource(nv50->vtxbuf[i].buffer);
          if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
             buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
-            nv50->base.vbo_dirty = TRUE;
-            break;
+            nv50->base.vbo_dirty = true;
          }
       }
    }
@@ -423,7 +431,7 @@
 #define NV50_PRIM_GL_CASE(n) \
    case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
 
-static INLINE unsigned
+static inline unsigned
 nv50_prim_gl(unsigned prim)
 {
    switch (prim) {
@@ -590,7 +598,7 @@
 }
 
 static void
-nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
+nv50_draw_elements(struct nv50_context *nv50, bool shorten,
                    unsigned mode, unsigned start, unsigned count,
                    unsigned instance_count, int32_t index_bias)
 {
@@ -736,9 +744,8 @@
       BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
       PUSH_DATA (push, 0);
       BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
-      PUSH_DATA (push, 0);
-      BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
-      nv50_query_pushbuf_submit(push, so->pq, 0x4);
+      PUSH_DATA (push, so->stride);
+      nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, so->pq, 0x4);
       BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
       PUSH_DATA (push, 0);
 
@@ -751,9 +758,9 @@
 {
    struct nv50_screen *screen = chan->user_priv;
 
-   nouveau_fence_update(&screen->base, TRUE);
+   nouveau_fence_update(&screen->base, true);
 
-   nv50_bufctx_fence(screen->cur_ctx->bufctx_3d, TRUE);
+   nv50_bufctx_fence(screen->cur_ctx->bufctx_3d, true);
 }
 
 void
@@ -806,7 +813,7 @@
             continue;
 
          if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-            nv50->cb_dirty = TRUE;
+            nv50->cb_dirty = true;
       }
    }
 
@@ -814,7 +821,7 @@
    if (nv50->cb_dirty) {
       BEGIN_NV04(push, NV50_3D(CODE_CB_FLUSH), 1);
       PUSH_DATA (push, 0);
-      nv50->cb_dirty = FALSE;
+      nv50->cb_dirty = false;
    }
 
    if (nv50->vbo_fifo) {
@@ -835,21 +842,17 @@
       if (!nv50->vtxbuf[i].buffer)
          continue;
       if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-         nv50->base.vbo_dirty = TRUE;
+         nv50->base.vbo_dirty = true;
    }
 
-   if (!nv50->base.vbo_dirty && nv50->idxbuf.buffer &&
-       nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-      nv50->base.vbo_dirty = TRUE;
-
    if (nv50->base.vbo_dirty) {
       BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
       PUSH_DATA (push, 0);
-      nv50->base.vbo_dirty = FALSE;
+      nv50->base.vbo_dirty = false;
    }
 
    if (info->indexed) {
-      boolean shorten = info->max_index <= 65535;
+      bool shorten = info->max_index <= 65535;
 
       if (info->primitive_restart != nv50->state.prim_restart) {
          if (info->primitive_restart) {
@@ -858,7 +861,7 @@
             PUSH_DATA (push, info->restart_index);
 
             if (info->restart_index > 65535)
-               shorten = FALSE;
+               shorten = false;
          } else {
             BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 1);
             PUSH_DATA (push, 0);
@@ -870,7 +873,7 @@
          PUSH_DATA (push, info->restart_index);
 
          if (info->restart_index > 65535)
-            shorten = FALSE;
+            shorten = false;
       }
 
       nv50_draw_elements(nv50, shorten,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_winsys.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv50_winsys.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv50_winsys.h	2015-09-16 14:36:09.000000000 +0000
@@ -16,14 +16,14 @@
 #endif
 
 
-static INLINE void
+static inline void
 nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin,
                             unsigned flags, struct nouveau_bo *bo)
 {
    nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL;
 }
 
-static INLINE void
+static inline void
 nv50_add_bufctx_resident(struct nouveau_bufctx *bufctx, int bin,
                          struct nv04_resource *res, unsigned flags)
 {
@@ -39,7 +39,7 @@
 #define BCTX_REFN(bctx, bin, res, acc) \
    nv50_add_bufctx_resident(bctx, NV50_BIND_##bin, res, NOUVEAU_BO_##acc)
 
-static INLINE void
+static inline void
 PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
 {
    struct nouveau_pushbuf_refn ref = { bo, flags };
@@ -61,39 +61,39 @@
 #define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n)
 
 
-static INLINE uint32_t
+static inline uint32_t
 NV50_FIFO_PKHDR(int subc, int mthd, unsigned size)
 {
    return 0x00000000 | (size << 18) | (subc << 13) | mthd;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 NV50_FIFO_PKHDR_NI(int subc, int mthd, unsigned size)
 {
    return 0x40000000 | (size << 18) | (subc << 13) | mthd;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 NV50_FIFO_PKHDR_L(int subc, int mthd)
 {
    return 0x00030000 | (subc << 13) | mthd;
 }
 
 
-static INLINE uint32_t
+static inline uint32_t
 nouveau_bo_memtype(const struct nouveau_bo *bo)
 {
    return bo->config.nv50.memtype;
 }
 
 
-static INLINE void
+static inline void
 PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data)
 {
    *push->cur++ = (uint32_t)(data >> 32);
 }
 
-static INLINE void
+static inline void
 BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
 {
 #ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
@@ -102,7 +102,7 @@
    PUSH_DATA (push, NV50_FIFO_PKHDR(subc, mthd, size));
 }
 
-static INLINE void
+static inline void
 BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
 {
 #ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
@@ -112,7 +112,7 @@
 }
 
 /* long, non-incremental, nv50-only */
-static INLINE void
+static inline void
 BEGIN_NL50(struct nouveau_pushbuf *push, int subc, int mthd, uint32_t size)
 {
 #ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv84_video.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv84_video.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv84_video.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv84_video.h	2015-09-16 14:36:09.000000000 +0000
@@ -102,12 +102,12 @@
    uint8_t mpeg12_non_intra_matrix[64];
 };
 
-static INLINE uint32_t mb(uint32_t coord)
+static inline uint32_t mb(uint32_t coord)
 {
    return (coord + 0xf)>>4;
 }
 
-static INLINE uint32_t mb_half(uint32_t coord)
+static inline uint32_t mb_half(uint32_t coord)
 {
    return (coord + 0x1f)>>5;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c	2015-09-16 14:36:09.000000000 +0000
@@ -221,7 +221,7 @@
    PUSH_KICK (push);
 }
 
-static INLINE int16_t inverse_quantize(int16_t val, uint8_t quant, int mpeg1) {
+static inline int16_t inverse_quantize(int16_t val, uint8_t quant, int mpeg1) {
    int16_t ret = val * quant / 16;
    if (mpeg1 && ret) {
       if (ret > 0)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c	2015-09-16 14:36:09.000000000 +0000
@@ -57,7 +57,7 @@
       return ret;
    }
 
-   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL,
+   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL,
                         &screen->parm);
    if (ret)
       return ret;
@@ -121,51 +121,51 @@
    return 0;
 }
 
-boolean
+bool
 nvc0_compute_validate_program(struct nvc0_context *nvc0)
 {
    struct nvc0_program *prog = nvc0->compprog;
 
    if (prog->mem)
-      return TRUE;
+      return true;
 
    if (!prog->translated) {
       prog->translated = nvc0_program_translate(
          prog, nvc0->screen->base.device->chipset);
       if (!prog->translated)
-         return FALSE;
+         return false;
    }
    if (unlikely(!prog->code_size))
-      return FALSE;
+      return false;
 
    if (likely(prog->code_size)) {
       if (nvc0_program_upload_code(nvc0, prog)) {
          struct nouveau_pushbuf *push = nvc0->base.pushbuf;
          BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
          PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
-         return TRUE;
+         return true;
       }
    }
-   return FALSE;
+   return false;
 }
 
-static boolean
+static bool
 nvc0_compute_state_validate(struct nvc0_context *nvc0)
 {
    if (!nvc0_compute_validate_program(nvc0))
-      return FALSE;
+      return false;
 
    /* TODO: textures, samplers, surfaces, global memory buffers */
 
-   nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
+   nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
 
    nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
    if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
-      return FALSE;
+      return false;
    if (unlikely(nvc0->state.flushed))
-      nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
+      nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
 
-   return TRUE;
+   return true;
 
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h	2015-09-16 14:36:09.000000000 +0000
@@ -4,7 +4,7 @@
 #include "nv50/nv50_defs.xml.h"
 #include "nvc0/nvc0_compute.xml.h"
 
-boolean
+bool
 nvc0_compute_validate_program(struct nvc0_context *nvc0);
 
 #endif /* NVC0_COMPUTE_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_context.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -63,12 +63,12 @@
          if (!nvc0->vtxbuf[i].buffer)
             continue;
          if (nvc0->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
-            nvc0->base.vbo_dirty = TRUE;
+            nvc0->base.vbo_dirty = true;
       }
 
       if (nvc0->idxbuf.buffer &&
           nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
-         nvc0->base.vbo_dirty = TRUE;
+         nvc0->base.vbo_dirty = true;
 
       for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
          uint32_t valid = nvc0->constbuf_valid[s];
@@ -86,7 +86,7 @@
                continue;
 
             if (res->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
-               nvc0->cb_dirty = TRUE;
+               nvc0->cb_dirty = true;
          }
       }
    }
@@ -132,6 +132,9 @@
       pipe_resource_reference(res, NULL);
    }
    util_dynarray_fini(&nvc0->global_residents);
+
+   if (nvc0->tcp_empty)
+      nvc0->base.pipe.delete_tcs_state(&nvc0->base.pipe, nvc0->tcp_empty);
 }
 
 static void
@@ -164,9 +167,9 @@
 
    if (screen) {
       nouveau_fence_next(&screen->base);
-      nouveau_fence_update(&screen->base, TRUE);
+      nouveau_fence_update(&screen->base, true);
       if (screen->cur_ctx)
-         screen->cur_ctx->state.flushed = TRUE;
+         screen->cur_ctx->state.flushed = true;
       NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1);
    }
 }
@@ -306,13 +309,6 @@
    pipe->memory_barrier = nvc0_memory_barrier;
    pipe->get_sample_position = nvc0_context_get_sample_position;
 
-   if (!screen->cur_ctx) {
-      nvc0->state = screen->save_state;
-      screen->cur_ctx = nvc0;
-      nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx);
-   }
-   screen->base.pushbuf->kick_notify = nvc0_default_kick_notify;
-
    nvc0_init_query_functions(nvc0);
    nvc0_init_surface_functions(nvc0);
    nvc0_init_state_functions(nvc0);
@@ -326,10 +322,25 @@
 
    /* shader builtin library is per-screen, but we need a context for m2mf */
    nvc0_program_library_upload(nvc0);
+   nvc0_program_init_tcp_empty(nvc0);
+   if (!nvc0->tcp_empty)
+      goto out_err;
+   /* set the empty tctl prog on next draw in case one is never set */
+   nvc0->dirty |= NVC0_NEW_TCTLPROG;
+
+   /* now that there are no more opportunities for errors, set the current
+    * context if there isn't already one.
+    */
+   if (!screen->cur_ctx) {
+      nvc0->state = screen->save_state;
+      screen->cur_ctx = nvc0;
+      nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx);
+   }
+   screen->base.pushbuf->kick_notify = nvc0_default_kick_notify;
 
    /* add permanently resident buffers to bufctxts */
 
-   flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+   flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD;
 
    BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text);
    BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo);
@@ -340,7 +351,7 @@
       BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm);
    }
 
-   flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+   flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RDWR;
 
    if (screen->poly_cache)
       BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache);
@@ -378,7 +389,7 @@
 
 void
 nvc0_bufctx_fence(struct nvc0_context *nvc0, struct nouveau_bufctx *bufctx,
-                  boolean on_flush)
+                  bool on_flush)
 {
    struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending;
    struct nouveau_list *it;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_context.h	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -54,6 +54,7 @@
 #define NVC0_NEW_IDXBUF       (1 << 22)
 #define NVC0_NEW_SURFACES     (1 << 23)
 #define NVC0_NEW_MIN_SAMPLES  (1 << 24)
+#define NVC0_NEW_TESSFACTOR   (1 << 25)
 
 #define NVC0_NEW_CP_PROGRAM   (1 << 0)
 #define NVC0_NEW_CP_SURFACES  (1 << 1)
@@ -93,7 +94,7 @@
 
 struct nvc0_blitctx;
 
-boolean nvc0_blitctx_create(struct nvc0_context *);
+bool nvc0_blitctx_create(struct nvc0_context *);
 void nvc0_blitctx_destroy(struct nvc0_context *);
 
 struct nvc0_context {
@@ -127,10 +128,12 @@
    struct nvc0_program *fragprog;
    struct nvc0_program *compprog;
 
+   struct nvc0_program *tcp_empty;
+
    struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS];
    uint16_t constbuf_dirty[6];
    uint16_t constbuf_valid[6];
-   boolean cb_dirty;
+   bool cb_dirty;
 
    struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
    unsigned num_vtxbufs;
@@ -164,14 +167,17 @@
    unsigned sample_mask;
    unsigned min_samples;
 
-   boolean vbo_push_hint;
+   float default_tess_outer[4];
+   float default_tess_inner[2];
+
+   bool vbo_push_hint;
 
    uint8_t tfbbuf_dirty;
    struct pipe_stream_output_target *tfbbuf[4];
    unsigned num_tfbbufs;
 
    struct pipe_query *cond_query;
-   boolean cond_cond; /* inverted rendering condition */
+   bool cond_cond; /* inverted rendering condition */
    uint cond_mode;
    uint32_t cond_condmode; /* the calculated condition */
 
@@ -184,19 +190,19 @@
    struct util_dynarray global_residents;
 };
 
-static INLINE struct nvc0_context *
+static inline struct nvc0_context *
 nvc0_context(struct pipe_context *pipe)
 {
    return (struct nvc0_context *)pipe;
 }
 
-static INLINE unsigned
+static inline unsigned
 nvc0_shader_stage(unsigned pipe)
 {
    switch (pipe) {
    case PIPE_SHADER_VERTEX: return 0;
-/* case PIPE_SHADER_TESSELLATION_CONTROL: return 1; */
-/* case PIPE_SHADER_TESSELLATION_EVALUATION: return 2; */
+   case PIPE_SHADER_TESS_CTRL: return 1;
+   case PIPE_SHADER_TESS_EVAL: return 2;
    case PIPE_SHADER_GEOMETRY: return 3;
    case PIPE_SHADER_FRAGMENT: return 4;
    case PIPE_SHADER_COMPUTE: return 5;
@@ -210,19 +216,20 @@
 /* nvc0_context.c */
 struct pipe_context *nvc0_create(struct pipe_screen *, void *);
 void nvc0_bufctx_fence(struct nvc0_context *, struct nouveau_bufctx *,
-                       boolean on_flush);
+                       bool on_flush);
 void nvc0_default_kick_notify(struct nouveau_pushbuf *);
 
 /* nvc0_draw.c */
 extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
 
 /* nvc0_program.c */
-boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
-boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
+bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
+bool nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
 void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
 void nvc0_program_library_upload(struct nvc0_context *);
 uint32_t nvc0_program_symbol_offset(const struct nvc0_program *,
                                     uint32_t label);
+void nvc0_program_init_tcp_empty(struct nvc0_context *);
 
 /* nvc0_query.c */
 void nvc0_init_query_functions(struct nvc0_context *);
@@ -231,7 +238,7 @@
 void nvc0_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
 void nvc0_so_target_save_offset(struct pipe_context *,
                                 struct pipe_stream_output_target *, unsigned i,
-                                boolean *serialize);
+                                bool *serialize);
 
 #define NVC0_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
 
@@ -250,8 +257,8 @@
 /* nvc0_state_validate.c */
 void nvc0_validate_global_residents(struct nvc0_context *,
                                     struct nouveau_bufctx *, int bin);
-extern boolean nvc0_state_validate(struct nvc0_context *, uint32_t state_mask,
-                                   unsigned space_words);
+extern bool nvc0_state_validate(struct nvc0_context *, uint32_t state_mask,
+                                unsigned space_words);
 
 /* nvc0_surface.c */
 extern void nvc0_clear(struct pipe_context *, unsigned buffers,
@@ -260,7 +267,7 @@
 extern void nvc0_init_surface_functions(struct nvc0_context *);
 
 /* nvc0_tex.c */
-boolean nve4_validate_tsc(struct nvc0_context *nvc0, int s);
+bool nve4_validate_tsc(struct nvc0_context *nvc0, int s);
 void nvc0_validate_textures(struct nvc0_context *);
 void nvc0_validate_samplers(struct nvc0_context *);
 void nve4_set_tex_handles(struct nvc0_context *);
@@ -292,10 +299,10 @@
                       struct nouveau_bo *dst, unsigned offset, unsigned domain,
                       unsigned size, const void *data);
 void
-nvc0_cb_push(struct nouveau_context *,
-             struct nouveau_bo *bo, unsigned domain,
-             unsigned base, unsigned size,
-             unsigned offset, unsigned words, const uint32_t *data);
+nvc0_cb_bo_push(struct nouveau_context *,
+                struct nouveau_bo *bo, unsigned domain,
+                unsigned base, unsigned size,
+                unsigned offset, unsigned words, const uint32_t *data);
 
 /* nvc0_vbo.c */
 void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c	2015-09-16 14:36:09.000000000 +0000
@@ -29,13 +29,13 @@
 #include "nvc0/nvc0_resource.h"
 
 static uint32_t
-nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, boolean is_3d)
+nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d)
 {
    return nv50_tex_choose_tile_dims_helper(nx, ny, nz, is_3d);
 }
 
 static uint32_t
-nvc0_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed)
+nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
 {
    const unsigned ms = util_logbase2(mt->base.base.nr_samples);
 
@@ -133,7 +133,7 @@
    return tile_flags;
 }
 
-static INLINE boolean
+static inline bool
 nvc0_miptree_init_ms_mode(struct nv50_miptree *mt)
 {
    switch (mt->base.base.nr_samples) {
@@ -157,9 +157,9 @@
       break;
    default:
       NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples);
-      return FALSE;
+      return false;
    }
-   return TRUE;
+   return true;
 }
 
 static void
@@ -250,7 +250,7 @@
    struct nouveau_device *dev = nouveau_screen(pscreen)->device;
    struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
    struct pipe_resource *pt = &mt->base.base;
-   boolean compressed = dev->drm_version >= 0x01000101;
+   bool compressed = dev->drm_version >= 0x01000101;
    int ret;
    union nouveau_bo_config bo_config;
    uint32_t bo_flags;
@@ -302,7 +302,7 @@
    if (!bo_config.nvc0.memtype && (pt->usage == PIPE_USAGE_STAGING || pt->bind & PIPE_BIND_SHARED))
       mt->base.domain = NOUVEAU_BO_GART;
    else
-      mt->base.domain = NOUVEAU_BO_VRAM;
+      mt->base.domain = NV_VRAM_DOMAIN(nouveau_screen(pscreen));
 
    bo_flags = mt->base.domain | NOUVEAU_BO_NOSNOOP;
 
@@ -325,7 +325,7 @@
 }
 
 /* Offset of zslice @z from start of level @l. */
-INLINE unsigned
+inline unsigned
 nvc0_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z)
 {
    const struct pipe_resource *pt = &mt->base.base;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_program.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_program.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_program.c	2015-09-16 14:36:09.000000000 +0000
@@ -22,6 +22,8 @@
 
 #include "pipe/p_defines.h"
 
+#include "tgsi/tgsi_ureg.h"
+
 #include "nvc0/nvc0_context.h"
 
 #include "codegen/nv50_ir_driver.h"
@@ -31,24 +33,25 @@
  * 124 scalar varying values.
  */
 static uint32_t
-nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
+nvc0_shader_input_address(unsigned sn, unsigned si)
 {
    switch (sn) {
-   case NV50_SEMANTIC_TESSFACTOR:   return 0x000 + si * 0x4;
+   case TGSI_SEMANTIC_TESSOUTER:    return 0x000 + si * 0x4;
+   case TGSI_SEMANTIC_TESSINNER:    return 0x010 + si * 0x4;
+   case TGSI_SEMANTIC_PATCH:        return 0x020 + si * 0x10;
    case TGSI_SEMANTIC_PRIMID:       return 0x060;
    case TGSI_SEMANTIC_LAYER:        return 0x064;
    case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
    case TGSI_SEMANTIC_PSIZE:        return 0x06c;
    case TGSI_SEMANTIC_POSITION:     return 0x070;
-   case TGSI_SEMANTIC_GENERIC:      return ubase + si * 0x10;
+   case TGSI_SEMANTIC_GENERIC:      return 0x080 + si * 0x10;
    case TGSI_SEMANTIC_FOG:          return 0x2e8;
    case TGSI_SEMANTIC_COLOR:        return 0x280 + si * 0x10;
    case TGSI_SEMANTIC_BCOLOR:       return 0x2a0 + si * 0x10;
-   case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
    case TGSI_SEMANTIC_CLIPDIST:     return 0x2c0 + si * 0x10;
    case TGSI_SEMANTIC_CLIPVERTEX:   return 0x270;
    case TGSI_SEMANTIC_PCOORD:       return 0x2e0;
-   case NV50_SEMANTIC_TESSCOORD:    return 0x2f0;
+   case TGSI_SEMANTIC_TESSCOORD:    return 0x2f0;
    case TGSI_SEMANTIC_INSTANCEID:   return 0x2f8;
    case TGSI_SEMANTIC_VERTEXID:     return 0x2fc;
    case TGSI_SEMANTIC_TEXCOORD:     return 0x300 + si * 0x10;
@@ -60,20 +63,21 @@
 }
 
 static uint32_t
-nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase)
+nvc0_shader_output_address(unsigned sn, unsigned si)
 {
    switch (sn) {
-   case NV50_SEMANTIC_TESSFACTOR:    return 0x000 + si * 0x4;
+   case TGSI_SEMANTIC_TESSOUTER:     return 0x000 + si * 0x4;
+   case TGSI_SEMANTIC_TESSINNER:     return 0x010 + si * 0x4;
+   case TGSI_SEMANTIC_PATCH:         return 0x020 + si * 0x10;
    case TGSI_SEMANTIC_PRIMID:        return 0x060;
    case TGSI_SEMANTIC_LAYER:         return 0x064;
    case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
    case TGSI_SEMANTIC_PSIZE:         return 0x06c;
    case TGSI_SEMANTIC_POSITION:      return 0x070;
-   case TGSI_SEMANTIC_GENERIC:       return ubase + si * 0x10;
+   case TGSI_SEMANTIC_GENERIC:       return 0x080 + si * 0x10;
    case TGSI_SEMANTIC_FOG:           return 0x2e8;
    case TGSI_SEMANTIC_COLOR:         return 0x280 + si * 0x10;
    case TGSI_SEMANTIC_BCOLOR:        return 0x2a0 + si * 0x10;
-   case NV50_SEMANTIC_CLIPDISTANCE:  return 0x2c0 + si * 0x4;
    case TGSI_SEMANTIC_CLIPDIST:      return 0x2c0 + si * 0x10;
    case TGSI_SEMANTIC_CLIPVERTEX:    return 0x270;
    case TGSI_SEMANTIC_TEXCOORD:      return 0x300 + si * 0x10;
@@ -95,7 +99,7 @@
       case TGSI_SEMANTIC_VERTEXID:
          info->in[i].mask = 0x1;
          info->in[i].slot[0] =
-            nvc0_shader_input_address(info->in[i].sn, 0, 0) / 4;
+            nvc0_shader_input_address(info->in[i].sn, 0) / 4;
          continue;
       default:
          break;
@@ -111,18 +115,11 @@
 static int
 nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info)
 {
-   unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
    unsigned offset;
    unsigned i, c;
 
    for (i = 0; i < info->numInputs; ++i) {
-      offset = nvc0_shader_input_address(info->in[i].sn,
-                                         info->in[i].si, ubase);
-      if (info->in[i].patch && offset >= 0x20)
-         offset = 0x20 + info->in[i].si * 0x10;
-
-      if (info->in[i].sn == NV50_SEMANTIC_TESSCOORD)
-         info->in[i].mask &= 3;
+      offset = nvc0_shader_input_address(info->in[i].sn, info->in[i].si);
 
       for (c = 0; c < 4; ++c)
          info->in[i].slot[c] = (offset + c * 0x4) / 4;
@@ -157,15 +154,11 @@
 static int
 nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info)
 {
-   unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
    unsigned offset;
    unsigned i, c;
 
    for (i = 0; i < info->numOutputs; ++i) {
-      offset = nvc0_shader_output_address(info->out[i].sn,
-                                          info->out[i].si, ubase);
-      if (info->out[i].patch && offset >= 0x20)
-         offset = 0x20 + info->out[i].si * 0x10;
+      offset = nvc0_shader_output_address(info->out[i].sn, info->out[i].si);
 
       for (c = 0; c < 4; ++c)
          info->out[i].slot[c] = (offset + c * 0x4) / 4;
@@ -193,7 +186,7 @@
    return ret;
 }
 
-static INLINE void
+static inline void
 nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot)
 {
    uint8_t min = (vp->hdr[4] >> 12) & 0xff;
@@ -216,12 +209,8 @@
          continue;
       for (c = 0; c < 4; ++c) {
          a = info->in[i].slot[c];
-         if (info->in[i].mask & (1 << c)) {
-            if (info->in[i].sn != NV50_SEMANTIC_TESSCOORD)
-               vp->hdr[5 + a / 32] |= 1 << (a % 32);
-            else
-               nvc0_vtgp_hdr_update_oread(vp, info->in[i].slot[c]);
-         }
+         if (info->in[i].mask & (1 << c))
+            vp->hdr[5 + a / 32] |= 1 << (a % 32);
       }
    }
 
@@ -250,6 +239,14 @@
       case TGSI_SEMANTIC_VERTEXID:
          vp->hdr[10] |= 1 << 31;
          break;
+      case TGSI_SEMANTIC_TESSCOORD:
+         /* We don't have the mask, nor the slots populated. While this could
+          * be achieved, the vast majority of the time if either of the coords
+          * are read, then both will be read.
+          */
+         nvc0_vtgp_hdr_update_oread(vp, 0x2f0 / 4);
+         nvc0_vtgp_hdr_update_oread(vp, 0x2f4 / 4);
+         break;
       default:
          break;
       }
@@ -277,7 +274,6 @@
    return nvc0_vtgp_gen_header(vp, info);
 }
 
-#if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN)
 static void
 nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
 {
@@ -305,14 +301,13 @@
       tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED;
 
    switch (info->prop.tp.partitioning) {
-   case PIPE_TESS_PART_INTEGER:
-   case PIPE_TESS_PART_POW2:
+   case PIPE_TESS_SPACING_EQUAL:
       tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL;
       break;
-   case PIPE_TESS_PART_FRACT_ODD:
+   case PIPE_TESS_SPACING_FRACTIONAL_ODD:
       tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD;
       break;
-   case PIPE_TESS_PART_FRACT_EVEN:
+   case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
       tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN;
       break;
    default:
@@ -320,9 +315,7 @@
       break;
    }
 }
-#endif
 
-#ifdef PIPE_SHADER_HULL
 static int
 nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info)
 {
@@ -346,9 +339,7 @@
 
    return 0;
 }
-#endif
 
-#ifdef PIPE_SHADER_DOMAIN
 static int
 nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info)
 {
@@ -365,7 +356,6 @@
 
    return 0;
 }
-#endif
 
 static int
 nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info)
@@ -459,7 +449,7 @@
 
    for (i = 0; i < info->numOutputs; ++i) {
       if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
-         fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0];
+         fp->hdr[18] |= 0xf << info->out[i].slot[0];
    }
 
    fp->fp.early_z = info->prop.fp.earlyFragTests;
@@ -523,7 +513,7 @@
 }
 #endif
 
-boolean
+bool
 nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
 {
    struct nv50_ir_prog_info *info;
@@ -531,7 +521,7 @@
 
    info = CALLOC_STRUCT(nv50_ir_prog_info);
    if (!info)
-      return FALSE;
+      return false;
 
    info->type = prog->type;
    info->target = chipset;
@@ -598,16 +588,12 @@
    case PIPE_SHADER_VERTEX:
       ret = nvc0_vp_gen_header(prog, info);
       break;
-#ifdef PIPE_SHADER_HULL
-   case PIPE_SHADER_HULL:
+   case PIPE_SHADER_TESS_CTRL:
       ret = nvc0_tcp_gen_header(prog, info);
       break;
-#endif
-#ifdef PIPE_SHADER_DOMAIN
-   case PIPE_SHADER_DOMAIN:
+   case PIPE_SHADER_TESS_EVAL:
       ret = nvc0_tep_gen_header(prog, info);
       break;
-#endif
    case PIPE_SHADER_GEOMETRY:
       ret = nvc0_gp_gen_header(prog, info);
       break;
@@ -630,7 +616,7 @@
       assert(info->bin.tlsSpace < (1 << 24));
       prog->hdr[0] |= 1 << 26;
       prog->hdr[1] |= align(info->bin.tlsSpace, 0x10); /* l[] size */
-      prog->need_tls = TRUE;
+      prog->need_tls = true;
    }
    /* TODO: factor 2 only needed where joinat/precont is used,
     *       and we only have to count non-uniform branches
@@ -638,7 +624,7 @@
    /*
    if ((info->maxCFDepth * 2) > 16) {
       prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200;
-      prog->need_tls = TRUE;
+      prog->need_tls = true;
    }
    */
    if (info->io.globalAccess)
@@ -655,11 +641,11 @@
    return !ret;
 }
 
-boolean
+bool
 nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
 {
    struct nvc0_screen *screen = nvc0->screen;
-   const boolean is_cp = prog->type == PIPE_SHADER_COMPUTE;
+   const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
    int ret;
    uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
    uint32_t lib_pos = screen->lib_code->start;
@@ -694,7 +680,7 @@
       ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
       if (ret) {
          NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
-         return FALSE;
+         return false;
       }
       IMMED_NVC0(nvc0->base.pushbuf, NVC0_3D(SERIALIZE), 0);
    }
@@ -729,24 +715,24 @@
       nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
 
 #ifdef DEBUG
-   if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE))
+   if (debug_get_bool_option("NV50_PROG_DEBUG", false))
       nvc0_program_dump(prog);
 #endif
 
    if (!is_cp)
       nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
-                           NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+                           NV_VRAM_DOMAIN(&screen->base), NVC0_SHADER_HEADER_SIZE, prog->hdr);
    nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
-                        NOUVEAU_BO_VRAM, prog->code_size, prog->code);
+                        NV_VRAM_DOMAIN(&screen->base), prog->code_size, prog->code);
    if (prog->immd_size)
       nvc0->base.push_data(&nvc0->base,
-                           screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
+                           screen->text, prog->immd_base, NV_VRAM_DOMAIN(&screen->base),
                            prog->immd_size, prog->immd_data);
 
    BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1);
    PUSH_DATA (nvc0->base.pushbuf, 0x1011);
 
-   return TRUE;
+   return true;
 }
 
 /* Upload code for builtin functions like integer division emulation. */
@@ -771,7 +757,7 @@
       return;
 
    nvc0->base.push_data(&nvc0->base,
-                        screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
+                        screen->text, screen->lib_code->start, NV_VRAM_DOMAIN(&screen->base),
                         size, code);
    /* no need for a memory barrier, will be emitted with first program */
 }
@@ -815,3 +801,18 @@
          return prog->code_base + base + syms[i].offset;
    return prog->code_base; /* no symbols or symbol not found */
 }
+
+void
+nvc0_program_init_tcp_empty(struct nvc0_context *nvc0)
+{
+   struct ureg_program *ureg;
+
+   ureg = ureg_create(TGSI_PROCESSOR_TESS_CTRL);
+   if (!ureg)
+      return;
+
+   ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT, 1);
+   ureg_END(ureg);
+
+   nvc0->tcp_empty = ureg_create_shader_and_destroy(ureg, &nvc0->base.pipe);
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_program.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_program.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_program.h	2015-09-16 14:36:09.000000000 +0000
@@ -21,8 +21,8 @@
    struct pipe_shader_state pipe;
 
    ubyte type;
-   boolean translated;
-   boolean need_tls;
+   bool translated;
+   bool need_tls;
    uint8_t num_gprs;
 
    uint32_t *code;
@@ -41,7 +41,7 @@
       uint8_t clip_enable; /* mask of defined clip planes */
       uint8_t num_ucps; /* also set to max if ClipDistance is used */
       uint8_t edgeflag; /* attribute index of edgeflag input */
-      boolean need_vertex_id;
+      bool need_vertex_id;
    } vp;
    struct {
       uint8_t early_z;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_query.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_query.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_query.c	2015-09-16 14:36:09.000000000 +0000
@@ -44,7 +44,7 @@
    uint32_t base;
    uint32_t offset; /* base + i * rotate */
    uint8_t state;
-   boolean is64bit;
+   bool is64bit;
    uint8_t rotate;
    int nesting; /* only used for occlusion queries */
    union {
@@ -62,13 +62,13 @@
 static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
                                        struct nvc0_query *, void *, boolean);
 
-static INLINE struct nvc0_query *
+static inline struct nvc0_query *
 nvc0_query(struct pipe_query *pipe)
 {
    return (struct nvc0_query *)pipe;
 }
 
-static boolean
+static bool
 nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
 {
    struct nvc0_screen *screen = nvc0->screen;
@@ -87,17 +87,17 @@
    if (size) {
       q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
       if (!q->bo)
-         return FALSE;
+         return false;
       q->offset = q->base;
 
       ret = nouveau_bo_map(q->bo, 0, screen->base.client);
       if (ret) {
          nvc0_query_allocate(nvc0, q, 0);
-         return FALSE;
+         return false;
       }
       q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
    }
-   return TRUE;
+   return true;
 }
 
 static void
@@ -126,17 +126,17 @@
       space = NVC0_QUERY_ALLOC_SPACE;
       break;
    case PIPE_QUERY_PIPELINE_STATISTICS:
-      q->is64bit = TRUE;
+      q->is64bit = true;
       space = 512;
       break;
    case PIPE_QUERY_SO_STATISTICS:
    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-      q->is64bit = TRUE;
+      q->is64bit = true;
       space = 64;
       break;
    case PIPE_QUERY_PRIMITIVES_GENERATED:
    case PIPE_QUERY_PRIMITIVES_EMITTED:
-      q->is64bit = TRUE;
+      q->is64bit = true;
       q->index = index;
       space = 32;
       break;
@@ -257,11 +257,11 @@
    struct nvc0_context *nvc0 = nvc0_context(pipe);
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nvc0_query *q = nvc0_query(pq);
-   boolean ret = true;
+   bool ret = true;
 
    /* For occlusion queries we have to change the storage, because a previous
-    * query might set the initial render conition to FALSE even *after* we re-
-    * initialized it to TRUE.
+    * query might set the initial render conition to false even *after* we re-
+    * initialized it to true.
     */
    if (q->rotate) {
       nvc0_query_rotate(nvc0, q);
@@ -270,7 +270,7 @@
        *  query ?
        */
       q->data[0] = q->sequence; /* initialize sequence */
-      q->data[1] = 1; /* initial render condition = TRUE */
+      q->data[1] = 1; /* initial render condition = true */
       q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
       q->data[5] = 0;
    }
@@ -401,7 +401,7 @@
       nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
       break;
    case PIPE_QUERY_TIMESTAMP_DISJOINT:
-      /* This query is not issued on GPU because disjoint is forced to FALSE */
+      /* This query is not issued on GPU because disjoint is forced to false */
       q->state = NVC0_QUERY_STATE_READY;
       break;
    default:
@@ -422,7 +422,7 @@
       nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence);
 }
 
-static INLINE void
+static inline void
 nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
 {
    if (q->is64bit) {
@@ -442,7 +442,7 @@
    struct nvc0_query *q = nvc0_query(pq);
    uint64_t *res64 = (uint64_t*)result;
    uint32_t *res32 = (uint32_t*)result;
-   boolean *res8 = (boolean*)result;
+   uint8_t *res8 = (uint8_t*)result;
    uint64_t *data64 = (uint64_t *)q->data;
    unsigned i;
 
@@ -450,7 +450,7 @@
    if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
        q->type <= NVC0_QUERY_DRV_STAT_LAST) {
       res64[0] = q->u.value;
-      return TRUE;
+      return true;
    } else
 #endif
    if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
@@ -468,17 +468,17 @@
             /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
             PUSH_KICK(nvc0->base.pushbuf);
          }
-         return FALSE;
+         return false;
       }
       if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
-         return FALSE;
+         return false;
       NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
    }
    q->state = NVC0_QUERY_STATE_READY;
 
    switch (q->type) {
    case PIPE_QUERY_GPU_FINISHED:
-      res8[0] = TRUE;
+      res8[0] = true;
       break;
    case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
       res64[0] = q->data[1] - q->data[5];
@@ -502,7 +502,7 @@
       break;
    case PIPE_QUERY_TIMESTAMP_DISJOINT:
       res64[0] = 1000000000;
-      res8[8] = FALSE;
+      res8[8] = false;
       break;
    case PIPE_QUERY_TIME_ELAPSED:
       res64[0] = data64[1] - data64[3];
@@ -516,10 +516,10 @@
       break;
    default:
       assert(0); /* can't happen, we don't create queries with invalid type */
-      return FALSE;
+      return false;
    }
 
-   return TRUE;
+   return true;
 }
 
 void
@@ -549,7 +549,7 @@
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nvc0_query *q;
    uint32_t cond;
-   boolean wait =
+   bool wait =
       mode != PIPE_RENDER_COND_NO_WAIT &&
       mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
 
@@ -563,7 +563,7 @@
       case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
          cond = condition ? NVC0_3D_COND_MODE_EQUAL :
                           NVC0_3D_COND_MODE_NOT_EQUAL;
-         wait = TRUE;
+         wait = true;
          break;
       case PIPE_QUERY_OCCLUSION_COUNTER:
       case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -626,12 +626,12 @@
 void
 nvc0_so_target_save_offset(struct pipe_context *pipe,
                            struct pipe_stream_output_target *ptarg,
-                           unsigned index, boolean *serialize)
+                           unsigned index, bool *serialize)
 {
    struct nvc0_so_target *targ = nvc0_so_target(ptarg);
 
    if (*serialize) {
-      *serialize = FALSE;
+      *serialize = false;
       PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
       IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
 
@@ -1080,7 +1080,7 @@
 {
    struct nvc0_screen *screen = nvc0->screen;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
-   const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
+   const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
    const struct nvc0_mp_pm_query_cfg *cfg;
    unsigned i, c;
    unsigned num_ab[2] = { 0, 0 };
@@ -1101,7 +1101,7 @@
    PUSH_SPACE(push, 4 * 8 * (is_nve4 ? 1 : 6) + 6);
 
    if (!screen->pm.mp_counters_enabled) {
-      screen->pm.mp_counters_enabled = TRUE;
+      screen->pm.mp_counters_enabled = true;
       BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
       PUSH_DATA (push, 0x1fcb);
    }
@@ -1168,7 +1168,7 @@
    struct nvc0_screen *screen = nvc0->screen;
    struct pipe_context *pipe = &nvc0->base.pipe;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
-   const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
+   const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
    uint32_t mask;
    uint32_t input[3];
    const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
@@ -1181,7 +1181,7 @@
    if (unlikely(!screen->pm.prog)) {
       struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
       prog->type = PIPE_SHADER_COMPUTE;
-      prog->translated = TRUE;
+      prog->translated = true;
       prog->num_gprs = 14;
       prog->parm_size = 12;
       if (is_nve4) {
@@ -1249,9 +1249,9 @@
    }
 }
 
-static INLINE boolean
+static inline bool
 nvc0_mp_pm_query_read_data(uint32_t count[32][4],
-                           struct nvc0_context *nvc0, boolean wait,
+                           struct nvc0_context *nvc0, bool wait,
                            struct nvc0_query *q,
                            const struct nvc0_mp_pm_query_cfg *cfg,
                            unsigned mp_count)
@@ -1264,19 +1264,19 @@
       for (c = 0; c < cfg->num_counters; ++c) {
          if (q->data[b + 8] != q->sequence) {
             if (!wait)
-               return FALSE;
+               return false;
             if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
-               return FALSE;
+               return false;
          }
          count[p][c] = q->data[b + q->ctr[c]];
       }
    }
-   return TRUE;
+   return true;
 }
 
-static INLINE boolean
+static inline bool
 nve4_mp_pm_query_read_data(uint32_t count[32][4],
-                           struct nvc0_context *nvc0, boolean wait,
+                           struct nvc0_context *nvc0, bool wait,
                            struct nvc0_query *q,
                            const struct nvc0_mp_pm_query_cfg *cfg,
                            unsigned mp_count)
@@ -1291,9 +1291,9 @@
          for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) {
             if (q->data[b + 20 + d] != q->sequence) {
                if (!wait)
-                  return FALSE;
+                  return false;
                if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
-                  return FALSE;
+                  return false;
             }
             if (q->ctr[c] & ~0x3)
                count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)];
@@ -1302,7 +1302,7 @@
          }
       }
    }
-   return TRUE;
+   return true;
 }
 
 /* Metric calculations:
@@ -1325,7 +1325,7 @@
    unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
    unsigned p, c;
    const struct nvc0_mp_pm_query_cfg *cfg;
-   boolean ret;
+   bool ret;
 
    cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
 
@@ -1334,7 +1334,7 @@
    else
       ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
    if (!ret)
-      return FALSE;
+      return false;
 
    if (cfg->op == NVC0_COUNTER_OPn_SUM) {
       for (c = 0; c < cfg->num_counters; ++c)
@@ -1394,7 +1394,7 @@
    }
 
    *(uint64_t *)result = value;
-   return TRUE;
+   return true;
 }
 
 int
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -44,16 +44,16 @@
                                 unsigned bindings)
 {
    if (sample_count > 8)
-      return FALSE;
+      return false;
    if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
-      return FALSE;
+      return false;
 
    if (!util_format_is_supported(format, bindings))
-      return FALSE;
+      return false;
 
    if ((bindings & PIPE_BIND_SAMPLER_VIEW) && (target != PIPE_BUFFER))
       if (util_format_get_blocksizebits(format) == 3 * 32)
-         return FALSE;
+         return false;
 
    /* transfers & shared are always supported */
    bindings &= ~(PIPE_BIND_TRANSFER_READ |
@@ -120,6 +120,8 @@
       return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
    case PIPE_CAP_ENDIANNESS:
       return PIPE_ENDIAN_LITTLE;
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+      return 30;
 
    /* supported caps */
    case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
@@ -163,7 +165,6 @@
    case PIPE_CAP_USER_CONSTANT_BUFFERS:
    case PIPE_CAP_USER_INDEX_BUFFERS:
    case PIPE_CAP_USER_VERTEX_BUFFERS:
-   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
    case PIPE_CAP_TEXTURE_QUERY_LOD:
    case PIPE_CAP_SAMPLE_SHADING:
    case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
@@ -174,11 +175,16 @@
    case PIPE_CAP_CLIP_HALFZ:
    case PIPE_CAP_POLYGON_OFFSET_CLAMP:
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+   case PIPE_CAP_DEPTH_BOUNDS_TEST:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
       return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
    case PIPE_CAP_COMPUTE:
       return (class_3d == NVE4_3D_CLASS) ? 1 : 0;
+   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+      return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
 
    /* unsupported caps */
    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -226,13 +232,14 @@
 
    switch (shader) {
    case PIPE_SHADER_VERTEX:
-      /*
-   case PIPE_SHADER_TESSELLATION_CONTROL:
-   case PIPE_SHADER_TESSELLATION_EVALUATION:
-      */
    case PIPE_SHADER_GEOMETRY:
    case PIPE_SHADER_FRAGMENT:
       break;
+   case PIPE_SHADER_TESS_CTRL:
+   case PIPE_SHADER_TESS_EVAL:
+      if (class_3d >= GM107_3D_CLASS)
+         return 0;
+      break;
    case PIPE_SHADER_COMPUTE:
       if (class_3d != NVE4_3D_CLASS)
          return 0;
@@ -297,6 +304,7 @@
       return 1;
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       return 0;
    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
       return 16; /* would be 32 in linked (OpenGL-style) mode */
@@ -340,6 +348,7 @@
                               enum pipe_compute_cap param, void *data)
 {
    uint64_t *data64 = (uint64_t *)data;
+   uint32_t *data32 = (uint32_t *)data;
    const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
 
    switch (param) {
@@ -371,6 +380,9 @@
    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
       data64[0] = 4096;
       return 8;
+   case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+      data32[0] = 32;
+      return 4;
    default:
       return 0;
    }
@@ -549,7 +561,7 @@
       /* Using COMPUTE has weird effects on 3D state, we need to
        * investigate this further before enabling it by default.
        */
-      if (debug_get_bool_option("NVC0_COMPUTE", FALSE))
+      if (debug_get_bool_option("NVC0_COMPUTE", false))
          return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
       return 0;
    case 0xe0:
@@ -563,7 +575,7 @@
    }
 }
 
-boolean
+bool
 nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
                             uint32_t lpos, uint32_t lneg, uint32_t cstack)
 {
@@ -573,7 +585,7 @@
 
    if (size >= (1 << 20)) {
       NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size);
-      return FALSE;
+      return false;
    }
 
    size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */
@@ -582,15 +594,15 @@
 
    size = align(size, 1 << 17);
 
-   ret = nouveau_bo_new(screen->base.device, NOUVEAU_BO_VRAM, 1 << 17, size,
+   ret = nouveau_bo_new(screen->base.device, NV_VRAM_DOMAIN(&screen->base), 1 << 17, size,
                         NULL, &bo);
    if (ret) {
       NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size);
-      return FALSE;
+      return false;
    }
    nouveau_bo_ref(NULL, &screen->tls);
    screen->tls = bo;
-   return TRUE;
+   return true;
 }
 
 #define FAIL_SCREEN_INIT(str, err)                    \
@@ -609,6 +621,7 @@
    struct nouveau_pushbuf *push;
    uint64_t value;
    uint32_t obj_class;
+   uint32_t flags;
    int ret;
    unsigned i;
 
@@ -645,6 +658,11 @@
    screen->base.sysmem_bindings |=
       PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
 
+   if (screen->base.vram_domain & NOUVEAU_BO_GART) {
+      screen->base.sysmem_bindings |= screen->base.vidmem_bindings;
+      screen->base.vidmem_bindings = 0;
+   }
+
    pscreen->destroy = nvc0_screen_destroy;
    pscreen->context_create = nvc0_create;
    pscreen->is_format_supported = nvc0_screen_is_format_supported;
@@ -659,8 +677,11 @@
    screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;
    screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
 
-   ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, NULL,
-                        &screen->fence.bo);
+   flags = NOUVEAU_BO_GART | NOUVEAU_BO_MAP;
+   if (dev->drm_version >= 0x01000202)
+      flags |= NOUVEAU_BO_COHERENT;
+
+   ret = nouveau_bo_new(dev, flags, 0, 4096, NULL, &screen->fence.bo);
    if (ret)
       goto fail;
    nouveau_bo_map(screen->fence.bo, 0, NULL);
@@ -775,7 +796,7 @@
    BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1);
    PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS);
 
-   if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
+   if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) {
       /* kill shaders after about 1 second (at 100 MHz) */
       BEGIN_NVC0(push, NVC0_3D(WATCHDOG_TIMER), 1);
       PUSH_DATA (push, 0x17);
@@ -823,7 +844,7 @@
 
    nvc0_magic_3d_init(push, screen->eng3d->oclass);
 
-   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL,
+   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL,
                         &screen->text);
    if (ret)
       goto fail;
@@ -833,12 +854,12 @@
     */
    nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100);
 
-   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, NULL,
+   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 6 << 16, NULL,
                         &screen->uniform_bo);
    if (ret)
       goto fail;
 
-   PUSH_REFN (push, screen->uniform_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+   PUSH_REFN (push, screen->uniform_bo, NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_WR);
 
    for (i = 0; i < 5; ++i) {
       /* TIC and TSC entries for each unit (nve4+ only) */
@@ -909,7 +930,7 @@
    PUSH_DATA (push, 0);
 
    if (screen->eng3d->oclass < GM107_3D_CLASS) {
-      ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL,
+      ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL,
                            &screen->poly_cache);
       if (ret)
          goto fail;
@@ -920,7 +941,7 @@
       PUSH_DATA (push, 3);
    }
 
-   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, NULL,
+   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 17, NULL,
                         &screen->txc);
    if (ret)
       goto fail;
@@ -1006,6 +1027,7 @@
    PUSH_DATA (push, 0x20);
    BEGIN_NVC0(push, NVC0_3D(SP_SELECT(0)), 1);
    PUSH_DATA (push, 0x00);
+   screen->save_state.patch_vertices = 3;
 
    BEGIN_NVC0(push, NVC0_3D(POINT_COORD_REPLACE), 1);
    PUSH_DATA (push, 0);
@@ -1025,7 +1047,7 @@
    if (!nvc0_blitter_create(screen))
       goto fail;
 
-   nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+   nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
 
    return pscreen;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -28,16 +28,17 @@
 struct nvc0_blitter;
 
 struct nvc0_graph_state {
-   boolean flushed;
-   boolean rasterizer_discard;
-   boolean early_z_forced;
-   boolean prim_restart;
+   bool flushed;
+   bool rasterizer_discard;
+   bool early_z_forced;
+   bool prim_restart;
    uint32_t instance_elts; /* bitmask of per-instance elements */
    uint32_t instance_base;
    uint32_t constant_vbos;
    uint32_t constant_elts;
    int32_t index_bias;
    uint16_t scissor;
+   uint8_t patch_vertices;
    uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
    uint8_t num_vtxbufs;
    uint8_t num_vtxelts;
@@ -95,7 +96,7 @@
       struct nvc0_program *prog; /* compute state object to read MP counters */
       struct pipe_query *mp_counter[8]; /* counter to query allocation */
       uint8_t num_mp_pm_active[2];
-      boolean mp_counters_enabled;
+      bool mp_counters_enabled;
    } pm;
 
    struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
@@ -105,7 +106,7 @@
    struct nouveau_object *nvsw;
 };
 
-static INLINE struct nvc0_screen *
+static inline struct nvc0_screen *
 nvc0_screen(struct pipe_screen *screen)
 {
    return (struct nvc0_screen *)screen;
@@ -276,7 +277,7 @@
 int nvc0_screen_get_driver_query_group_info(struct pipe_screen *, unsigned,
                                             struct pipe_driver_query_group_info *);
 
-boolean nvc0_blitter_create(struct nvc0_screen *);
+bool nvc0_blitter_create(struct nvc0_screen *);
 void nvc0_blitter_destroy(struct nvc0_screen *);
 
 void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
@@ -287,10 +288,10 @@
 int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
 int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
 
-boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
-                                    uint32_t lneg, uint32_t cstack);
+bool nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
+                                 uint32_t lneg, uint32_t cstack);
 
-static INLINE void
+static inline void
 nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
 {
    struct nvc0_screen *screen = nvc0_screen(res->base.screen);
@@ -302,7 +303,7 @@
    }
 }
 
-static INLINE void
+static inline void
 nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
 {
    if (likely(res->bo)) {
@@ -325,21 +326,21 @@
 
 extern const struct nvc0_format nvc0_format_table[];
 
-static INLINE void
+static inline void
 nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
 {
    if (tic->id >= 0)
       screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
 }
 
-static INLINE void
+static inline void
 nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
 {
    if (tsc->id >= 0)
       screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
 }
 
-static INLINE void
+static inline void
 nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
 {
    if (tic->id >= 0) {
@@ -348,7 +349,7 @@
    }
 }
 
-static INLINE void
+static inline void
 nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
 {
    if (tsc->id >= 0) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -27,14 +27,14 @@
 
 #include "nvc0/nvc0_context.h"
 
-static INLINE void
+static inline void
 nvc0_program_update_context_state(struct nvc0_context *nvc0,
                                   struct nvc0_program *prog, int stage)
 {
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
 
    if (prog && prog->need_tls) {
-      const uint32_t flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+      const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR;
       if (!nvc0->state.tls_required)
          BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls);
       nvc0->state.tls_required |= 1 << stage;
@@ -63,22 +63,22 @@
    }
 }
 
-static INLINE boolean
+static inline bool
 nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
 {
    if (prog->mem)
-      return TRUE;
+      return true;
 
    if (!prog->translated) {
       prog->translated = nvc0_program_translate(
          prog, nvc0->screen->base.device->chipset);
       if (!prog->translated)
-         return FALSE;
+         return false;
    }
 
    if (likely(prog->code_size))
       return nvc0_program_upload_code(nvc0, prog);
-   return TRUE; /* stream output info only */
+   return true; /* stream output info only */
 }
 
 void
@@ -147,12 +147,14 @@
       PUSH_DATA (push, tp->code_base);
       BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
       PUSH_DATA (push, tp->num_gprs);
-
-      if (tp->tp.input_patch_size <= 32)
-         IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), tp->tp.input_patch_size);
    } else {
-      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
+      tp = nvc0->tcp_empty;
+      /* not a whole lot we can do to handle this failure */
+      if (!nvc0_program_validate(nvc0, tp))
+         assert(!"unable to validate empty tcp");
+      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
       PUSH_DATA (push, 0x20);
+      PUSH_DATA (push, tp->code_base);
    }
    nvc0_program_update_context_state(nvc0, tp, 1);
 }
@@ -192,7 +194,7 @@
 
    /* we allow GPs with no code for specifying stream output state only */
    if (gp && gp->code_size) {
-      const boolean gp_selects_layer = !!(gp->hdr[13] & (1 << 9));
+      const bool gp_selects_layer = !!(gp->hdr[13] & (1 << 9));
 
       BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
       PUSH_DATA (push, 0x41);
@@ -280,7 +282,7 @@
          nvc0_query_pushbuf_submit(push, targ->pq, 0x4);
       } else {
          PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
-         targ->clean = FALSE;
+         targ->clean = false;
       }
    }
    for (; b < 4; ++b)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -35,7 +35,7 @@
 
 #include "nouveau_gldefs.h"
 
-static INLINE uint32_t
+static inline uint32_t
 nvc0_colormask(unsigned mask)
 {
     uint32_t ret = 0;
@@ -55,7 +55,7 @@
 #define NVC0_BLEND_FACTOR_CASE(a, b) \
    case PIPE_BLENDFACTOR_##a: return NV50_BLEND_FACTOR_##b
 
-static INLINE uint32_t
+static inline uint32_t
 nvc0_blend_fac(unsigned factor)
 {
    switch (factor) {
@@ -90,10 +90,9 @@
    struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj);
    int i;
    int r; /* reference */
-   uint32_t ms;
    uint8_t blend_en = 0;
-   boolean indep_masks = FALSE;
-   boolean indep_funcs = FALSE;
+   bool indep_masks = false;
+   bool indep_funcs = false;
 
    so->pipe = *cso;
 
@@ -111,7 +110,7 @@
              cso->rt[i].alpha_func != cso->rt[r].alpha_func ||
              cso->rt[i].alpha_src_factor != cso->rt[r].alpha_src_factor ||
              cso->rt[i].alpha_dst_factor != cso->rt[r].alpha_dst_factor) {
-            indep_funcs = TRUE;
+            indep_funcs = true;
             break;
          }
       }
@@ -120,7 +119,7 @@
 
       for (i = 1; i < 8; ++i) {
          if (cso->rt[i].colormask != cso->rt[0].colormask) {
-            indep_masks = TRUE;
+            indep_masks = true;
             break;
          }
       }
@@ -176,15 +175,6 @@
       }
    }
 
-   ms = 0;
-   if (cso->alpha_to_coverage)
-      ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
-   if (cso->alpha_to_one)
-      ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
-
-   SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1);
-   SB_DATA    (so, ms);
-
    assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
    return so;
 }
@@ -234,7 +224,7 @@
     SB_IMMED_3D(so, MULTISAMPLE_ENABLE, cso->multisample);
 
     SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth);
-    if (cso->line_smooth)
+    if (cso->line_smooth || cso->multisample)
        SB_BEGIN_3D(so, LINE_WIDTH_SMOOTH, 1);
     else
        SB_BEGIN_3D(so, LINE_WIDTH_ALIASED, 1);
@@ -351,6 +341,13 @@
       SB_DATA    (so, nvgl_comparison_op(cso->depth.func));
    }
 
+   SB_IMMED_3D(so, DEPTH_BOUNDS_EN, cso->depth.bounds_test);
+   if (cso->depth.bounds_test) {
+      SB_BEGIN_3D(so, DEPTH_BOUNDS(0), 2);
+      SB_DATA    (so, fui(cso->depth.bounds_min));
+      SB_DATA    (so, fui(cso->depth.bounds_max));
+   }
+
    if (cso->stencil[0].enabled) {
       SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
       SB_DATA    (so, 1);
@@ -428,7 +425,7 @@
    FREE(hwcso);
 }
 
-static INLINE void
+static inline void
 nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s,
                                unsigned nr, void **hwcso)
 {
@@ -508,6 +505,14 @@
       assert(start == 0);
       nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s);
       break;
+   case PIPE_SHADER_TESS_CTRL:
+      assert(start == 0);
+      nvc0_stage_sampler_states_bind(nvc0_context(pipe), 1, nr, s);
+      break;
+   case PIPE_SHADER_TESS_EVAL:
+      assert(start == 0);
+      nvc0_stage_sampler_states_bind(nvc0_context(pipe), 2, nr, s);
+      break;
    case PIPE_SHADER_GEOMETRY:
       assert(start == 0);
       nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s);
@@ -537,7 +542,7 @@
    FREE(nv50_tic_entry(view));
 }
 
-static INLINE void
+static inline void
 nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
                              unsigned nr,
                              struct pipe_sampler_view **views)
@@ -633,6 +638,12 @@
    case PIPE_SHADER_VERTEX:
       nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views);
       break;
+   case PIPE_SHADER_TESS_CTRL:
+      nvc0_stage_set_sampler_views(nvc0_context(pipe), 1, nr, views);
+      break;
+   case PIPE_SHADER_TESS_EVAL:
+      nvc0_stage_set_sampler_views(nvc0_context(pipe), 2, nr, views);
+      break;
    case PIPE_SHADER_GEOMETRY:
       nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views);
       break;
@@ -734,6 +745,38 @@
 }
 
 static void *
+nvc0_tcp_state_create(struct pipe_context *pipe,
+                     const struct pipe_shader_state *cso)
+{
+   return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_TESS_CTRL);
+}
+
+static void
+nvc0_tcp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->tctlprog = hwcso;
+    nvc0->dirty |= NVC0_NEW_TCTLPROG;
+}
+
+static void *
+nvc0_tep_state_create(struct pipe_context *pipe,
+                     const struct pipe_shader_state *cso)
+{
+   return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_TESS_EVAL);
+}
+
+static void
+nvc0_tep_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->tevlprog = hwcso;
+    nvc0->dirty |= NVC0_NEW_TEVLPROG;
+}
+
+static void *
 nvc0_cp_state_create(struct pipe_context *pipe,
                      const struct pipe_compute_state *cso)
 {
@@ -788,9 +831,11 @@
    }
    nvc0->constbuf_dirty[s] |= 1 << i;
 
+   if (nvc0->constbuf[s][i].u.buf)
+      nv04_resource(nvc0->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i);
    pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);
 
-   nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE;
+   nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false;
    if (nvc0->constbuf[s][i].user) {
       nvc0->constbuf[s][i].u.data = cb->user_buffer;
       nvc0->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
@@ -934,6 +979,18 @@
 }
 
 static void
+nvc0_set_tess_state(struct pipe_context *pipe,
+                    const float default_tess_outer[4],
+                    const float default_tess_inner[2])
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+   memcpy(nvc0->default_tess_outer, default_tess_outer, 4 * sizeof(float));
+   memcpy(nvc0->default_tess_inner, default_tess_inner, 2 * sizeof(float));
+   nvc0->dirty |= NVC0_NEW_TESSFACTOR;
+}
+
+static void
 nvc0_set_vertex_buffers(struct pipe_context *pipe,
                         unsigned start_slot, unsigned count,
                         const struct pipe_vertex_buffer *vb)
@@ -1018,7 +1075,7 @@
       FREE(targ);
       return NULL;
    }
-   targ->clean = TRUE;
+   targ->clean = true;
 
    targ->pipe.buffer_size = size;
    targ->pipe.buffer_offset = offset;
@@ -1051,13 +1108,13 @@
 {
    struct nvc0_context *nvc0 = nvc0_context(pipe);
    unsigned i;
-   boolean serialize = TRUE;
+   bool serialize = true;
 
    assert(num_targets <= 4);
 
    for (i = 0; i < num_targets; ++i) {
-      const boolean changed = nvc0->tfbbuf[i] != targets[i];
-      const boolean append = (offsets[i] == ((unsigned)-1));
+      const bool changed = nvc0->tfbbuf[i] != targets[i];
+      const bool append = (offsets[i] == ((unsigned)-1));
       if (!changed && append)
          continue;
       nvc0->tfbbuf_dirty |= 1 << i;
@@ -1066,7 +1123,7 @@
          nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize);
 
       if (targets[i] && !append)
-         nvc0_so_target(targets[i])->clean = TRUE;
+         nvc0_so_target(targets[i])->clean = true;
 
       pipe_so_target_reference(&nvc0->tfbbuf[i], targets[i]);
    }
@@ -1125,16 +1182,18 @@
 }
 
 static void
-nvc0_set_shader_resources(struct pipe_context *pipe,
-                          unsigned start, unsigned nr,
-                          struct pipe_surface **resources)
+nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader,
+                       unsigned start_slot, unsigned count,
+                       struct pipe_image_view **views)
 {
-   nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, resources);
+#if 0
+   nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, views);
 
    nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES;
+#endif
 }
 
-static INLINE void
+static inline void
 nvc0_set_global_handle(uint32_t *phandle, struct pipe_resource *res)
 {
    struct nv04_resource *buf = nv04_resource(res);
@@ -1218,12 +1277,18 @@
    pipe->create_vs_state = nvc0_vp_state_create;
    pipe->create_fs_state = nvc0_fp_state_create;
    pipe->create_gs_state = nvc0_gp_state_create;
+   pipe->create_tcs_state = nvc0_tcp_state_create;
+   pipe->create_tes_state = nvc0_tep_state_create;
    pipe->bind_vs_state = nvc0_vp_state_bind;
    pipe->bind_fs_state = nvc0_fp_state_bind;
    pipe->bind_gs_state = nvc0_gp_state_bind;
+   pipe->bind_tcs_state = nvc0_tcp_state_bind;
+   pipe->bind_tes_state = nvc0_tep_state_bind;
    pipe->delete_vs_state = nvc0_sp_state_delete;
    pipe->delete_fs_state = nvc0_sp_state_delete;
    pipe->delete_gs_state = nvc0_sp_state_delete;
+   pipe->delete_tcs_state = nvc0_sp_state_delete;
+   pipe->delete_tes_state = nvc0_sp_state_delete;
 
    pipe->create_compute_state = nvc0_cp_state_create;
    pipe->bind_compute_state = nvc0_cp_state_bind;
@@ -1239,6 +1304,7 @@
    pipe->set_polygon_stipple = nvc0_set_polygon_stipple;
    pipe->set_scissor_states = nvc0_set_scissor_states;
    pipe->set_viewport_states = nvc0_set_viewport_states;
+   pipe->set_tess_state = nvc0_set_tess_state;
 
    pipe->create_vertex_elements_state = nvc0_vertex_state_create;
    pipe->delete_vertex_elements_state = nvc0_vertex_state_delete;
@@ -1253,8 +1319,14 @@
 
    pipe->set_global_binding = nvc0_set_global_bindings;
    pipe->set_compute_resources = nvc0_set_compute_resources;
-   pipe->set_shader_resources = nvc0_set_shader_resources;
+   pipe->set_shader_images = nvc0_set_shader_images;
 
    nvc0->sample_mask = ~0;
    nvc0->min_samples = 1;
+   nvc0->default_tess_outer[0] =
+   nvc0->default_tess_outer[1] =
+   nvc0->default_tess_outer[2] =
+   nvc0->default_tess_outer[3] = 1.0;
+   nvc0->default_tess_inner[0] =
+   nvc0->default_tess_inner[1] = 1.0;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h	2015-09-16 14:36:09.000000000 +0000
@@ -17,7 +17,7 @@
 struct nvc0_blend_stateobj {
    struct pipe_blend_state pipe;
    int size;
-   uint32_t state[72];
+   uint32_t state[70];
 };
 
 struct nvc0_rasterizer_stateobj {
@@ -29,7 +29,7 @@
 struct nvc0_zsa_stateobj {
    struct pipe_depth_stencil_alpha_state pipe;
    int size;
-   uint32_t state[26];
+   uint32_t state[30];
 };
 
 struct nvc0_constbuf {
@@ -39,7 +39,7 @@
    } u;
    uint32_t size;
    uint32_t offset;
-   boolean user; /* should only be TRUE if u.data is valid and non-NULL */
+   bool user; /* should only be true if u.data is valid and non-NULL */
 };
 
 struct nvc0_vertex_element {
@@ -55,8 +55,8 @@
    unsigned num_elements;
    uint32_t instance_elts;
    uint32_t instance_bufs;
-   boolean shared_slots;
-   boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */
+   bool shared_slots;
+   bool need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */
    unsigned size; /* size of vertex in bytes (when packed) */
    struct nvc0_vertex_element element[0];
 };
@@ -65,10 +65,10 @@
    struct pipe_stream_output_target pipe;
    struct pipe_query *pq;
    unsigned stride;
-   boolean clean;
+   bool clean;
 };
 
-static INLINE struct nvc0_so_target *
+static inline struct nvc0_so_target *
 nvc0_so_target(struct pipe_stream_output_target *ptarg)
 {
    return (struct nvc0_so_target *)ptarg;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c	2015-09-16 14:36:09.000000000 +0000
@@ -1,4 +1,5 @@
 
+#include "util/u_format.h"
 #include "util/u_math.h"
 
 #include "nvc0/nvc0_context.h"
@@ -55,7 +56,7 @@
 }
 #endif
 
-static INLINE void
+static inline void
 nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i)
 {
    BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 6);
@@ -74,7 +75,7 @@
     struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
     unsigned i, ms;
     unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
-    boolean serialize = FALSE;
+    bool serialize = false;
 
     nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
 
@@ -136,7 +137,7 @@
         }
 
         if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING)
-           serialize = TRUE;
+           serialize = true;
         res->status |=  NOUVEAU_BUFFER_STATUS_GPU_WRITING;
         res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
 
@@ -168,7 +169,7 @@
         ms_mode = mt->ms_mode;
 
         if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
-           serialize = TRUE;
+           serialize = true;
         mt->base.status |=  NOUVEAU_BUFFER_STATUS_GPU_WRITING;
         mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
 
@@ -309,7 +310,7 @@
    nvc0->viewports_dirty = 0;
 }
 
-static INLINE void
+static inline void
 nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
 {
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@@ -324,7 +325,7 @@
    PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
 }
 
-static INLINE void
+static inline void
 nvc0_check_program_ucps(struct nvc0_context *nvc0,
                         struct nvc0_program *vp, uint8_t mask)
 {
@@ -439,7 +440,7 @@
                BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
                PUSH_DATA (push, (0 << 4) | 1);
             }
-            nvc0_cb_push(&nvc0->base, bo, NOUVEAU_BO_VRAM,
+            nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
                          base, nvc0->state.uniform_buffer_bound[s],
                          0, (size + 3) / 4,
                          nvc0->constbuf[s][0].u.data);
@@ -455,6 +456,9 @@
                PUSH_DATA (push, (i << 4) | 1);
 
                BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
+
+               nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
+               res->cb_bindings[s] |= 1 << i;
             } else {
                BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
                PUSH_DATA (push, (i << 4) | 0);
@@ -518,12 +522,12 @@
 nvc0_validate_derived_1(struct nvc0_context *nvc0)
 {
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
-   boolean rasterizer_discard;
+   bool rasterizer_discard;
 
    if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) {
-      rasterizer_discard = TRUE;
+      rasterizer_discard = true;
    } else {
-      boolean zs = nvc0->zsa &&
+      bool zs = nvc0->zsa &&
          (nvc0->zsa->pipe.depth.enabled || nvc0->zsa->pipe.stencil[0].enabled);
       rasterizer_discard = !zs &&
          (!nvc0->fragprog || !nvc0->fragprog->hdr[18]);
@@ -553,6 +557,35 @@
 }
 
 static void
+nvc0_validate_derived_3(struct nvc0_context *nvc0)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+   uint32_t ms = 0;
+
+   if ((!fb->nr_cbufs || !fb->cbufs[0] ||
+        !util_format_is_pure_integer(fb->cbufs[0]->format)) && nvc0->blend) {
+      if (nvc0->blend->pipe.alpha_to_coverage)
+         ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
+      if (nvc0->blend->pipe.alpha_to_one)
+         ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
+   }
+
+   BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_CTRL), 1);
+   PUSH_DATA (push, ms);
+}
+
+static void
+nvc0_validate_tess_state(struct nvc0_context *nvc0)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+   BEGIN_NVC0(push, NVC0_3D(TESS_LEVEL_OUTER(0)), 6);
+   PUSH_DATAp(push, nvc0->default_tess_outer, 4);
+   PUSH_DATAp(push, nvc0->default_tess_inner, 2);
+}
+
+static void
 nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
 {
    struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx;
@@ -610,11 +643,13 @@
     { nvc0_vertprog_validate,      NVC0_NEW_VERTPROG },
     { nvc0_tctlprog_validate,      NVC0_NEW_TCTLPROG },
     { nvc0_tevlprog_validate,      NVC0_NEW_TEVLPROG },
+    { nvc0_validate_tess_state,    NVC0_NEW_TESSFACTOR },
     { nvc0_gmtyprog_validate,      NVC0_NEW_GMTYPROG },
     { nvc0_fragprog_validate,      NVC0_NEW_FRAGPROG },
     { nvc0_validate_derived_1,     NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
                                    NVC0_NEW_RASTERIZER },
     { nvc0_validate_derived_2,     NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },
+    { nvc0_validate_derived_3,     NVC0_NEW_BLEND | NVC0_NEW_FRAMEBUFFER },
     { nvc0_validate_clip,          NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER |
                                    NVC0_NEW_VERTPROG |
                                    NVC0_NEW_TEVLPROG |
@@ -631,7 +666,7 @@
 };
 #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
 
-boolean
+bool
 nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask, unsigned words)
 {
    uint32_t state_mask;
@@ -652,15 +687,15 @@
       }
       nvc0->dirty &= ~state_mask;
 
-      nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, FALSE);
+      nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, false);
    }
 
    nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_3d);
    ret = nouveau_pushbuf_validate(nvc0->base.pushbuf);
 
    if (unlikely(nvc0->state.flushed)) {
-      nvc0->state.flushed = FALSE;
-      nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, TRUE);
+      nvc0->state.flushed = false;
+      nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, true);
    }
    return !ret;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c	2015-09-16 14:36:09.000000000 +0000
@@ -47,8 +47,8 @@
 #define NOUVEAU_DRIVER 0xc0
 #include "nv50/nv50_blit.h"
 
-static INLINE uint8_t
-nvc0_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
+static inline uint8_t
+nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
 {
    uint8_t id = nvc0_format_table[format].rt;
 
@@ -81,9 +81,9 @@
 }
 
 static int
-nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst,
+nvc0_2d_texture_set(struct nouveau_pushbuf *push, bool dst,
                     struct nv50_miptree *mt, unsigned level, unsigned layer,
-                    enum pipe_format pformat, boolean dst_src_pformat_equal)
+                    enum pipe_format pformat, bool dst_src_pformat_equal)
 {
    struct nouveau_bo *bo = mt->base.bo;
    uint32_t width, height, depth;
@@ -161,16 +161,16 @@
    const enum pipe_format dfmt = dst->base.base.format;
    const enum pipe_format sfmt = src->base.base.format;
    int ret;
-   boolean eqfmt = dfmt == sfmt;
+   bool eqfmt = dfmt == sfmt;
 
    if (!PUSH_SPACE(push, 2 * 16 + 32))
       return PIPE_ERROR;
 
-   ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt, eqfmt);
+   ret = nvc0_2d_texture_set(push, true, dst, dst_level, dz, dfmt, eqfmt);
    if (ret)
       return ret;
 
-   ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt, eqfmt);
+   ret = nvc0_2d_texture_set(push, false, src, src_level, sz, sfmt, eqfmt);
    if (ret)
       return ret;
 
@@ -189,7 +189,7 @@
    PUSH_DATA (push, 0);
    PUSH_DATA (push, sx << src->ms_x);
    PUSH_DATA (push, 0);
-   PUSH_DATA (push, sy << src->ms_x);
+   PUSH_DATA (push, sy << src->ms_y);
 
    return 0;
 }
@@ -203,7 +203,7 @@
 {
    struct nvc0_context *nvc0 = nvc0_context(pipe);
    int ret;
-   boolean m2mf;
+   bool m2mf;
    unsigned dst_layer = dstz, src_layer = src_box->z;
 
    if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
@@ -704,7 +704,7 @@
    };
 
    blit->vp.type = PIPE_SHADER_VERTEX;
-   blit->vp.translated = TRUE;
+   blit->vp.translated = true;
    if (blit->screen->base.class_3d >= GM107_3D_CLASS) {
       blit->vp.code = (uint32_t *)code_gm107; /* const_cast */
       blit->vp.code_size = sizeof(code_gm107);
@@ -887,6 +887,7 @@
 
    /* zsa state */
    IMMED_NVC0(push, NVC0_3D(DEPTH_TEST_ENABLE), 0);
+   IMMED_NVC0(push, NVC0_3D(DEPTH_BOUNDS_EN), 0);
    IMMED_NVC0(push, NVC0_3D(STENCIL_ENABLE), 0);
    IMMED_NVC0(push, NVC0_3D(ALPHA_TEST_ENABLE), 0);
 
@@ -1217,7 +1218,7 @@
    int i;
    uint32_t mode;
    uint32_t mask = nv50_blit_eng2d_get_mask(info);
-   boolean b;
+   bool b;
 
    mode = nv50_blit_get_filter(info) ?
       NV50_2D_BLIT_CONTROL_FILTER_BILINEAR :
@@ -1383,39 +1384,39 @@
 {
    struct nvc0_context *nvc0 = nvc0_context(pipe);
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
-   boolean eng3d = FALSE;
+   bool eng3d = false;
 
    if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
       if (!(info->mask & PIPE_MASK_ZS))
          return;
       if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT ||
           info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
-         eng3d = TRUE;
+         eng3d = true;
       if (info->filter != PIPE_TEX_FILTER_NEAREST)
-         eng3d = TRUE;
+         eng3d = true;
    } else {
       if (!(info->mask & PIPE_MASK_RGBA))
          return;
       if (info->mask != PIPE_MASK_RGBA)
-         eng3d = TRUE;
+         eng3d = true;
    }
 
    if (nv50_miptree(info->src.resource)->layout_3d) {
-      eng3d = TRUE;
+      eng3d = true;
    } else
    if (info->src.box.depth != info->dst.box.depth) {
-      eng3d = TRUE;
+      eng3d = true;
       debug_printf("blit: cannot filter array or cube textures in z direction");
    }
 
    if (!eng3d && info->dst.format != info->src.format) {
       if (!nv50_2d_dst_format_faithful(info->dst.format)) {
-         eng3d = TRUE;
+         eng3d = true;
       } else
       if (!nv50_2d_src_format_faithful(info->src.format)) {
          if (!util_format_is_luminance(info->src.format)) {
             if (!nv50_2d_dst_format_ops_supported(info->dst.format))
-               eng3d = TRUE;
+               eng3d = true;
             else
             if (util_format_is_intensity(info->src.format))
                eng3d = info->src.format != PIPE_FORMAT_I8_UNORM;
@@ -1427,24 +1428,24 @@
          }
       } else
       if (util_format_is_luminance_alpha(info->src.format))
-         eng3d = TRUE;
+         eng3d = true;
    }
 
    if (info->src.resource->nr_samples == 8 &&
        info->dst.resource->nr_samples <= 1)
-      eng3d = TRUE;
+      eng3d = true;
 #if 0
    /* FIXME: can't make this work with eng2d anymore, at least not on nv50 */
    if (info->src.resource->nr_samples > 1 ||
        info->dst.resource->nr_samples > 1)
-      eng3d = TRUE;
+      eng3d = true;
 #endif
    /* FIXME: find correct src coordinates adjustments */
    if ((info->src.box.width !=  info->dst.box.width &&
         info->src.box.width != -info->dst.box.width) ||
        (info->src.box.height !=  info->dst.box.height &&
         info->src.box.height != -info->dst.box.height))
-      eng3d = TRUE;
+      eng3d = true;
 
    if (nvc0->screen->num_occlusion_queries_active)
       IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
@@ -1466,13 +1467,13 @@
 {
 }
 
-boolean
+bool
 nvc0_blitter_create(struct nvc0_screen *screen)
 {
    screen->blitter = CALLOC_STRUCT(nvc0_blitter);
    if (!screen->blitter) {
       NOUVEAU_ERR("failed to allocate blitter struct\n");
-      return FALSE;
+      return false;
    }
    screen->blitter->screen = screen;
 
@@ -1481,7 +1482,7 @@
    nvc0_blitter_make_vp(screen->blitter);
    nvc0_blitter_make_sampler(screen->blitter);
 
-   return TRUE;
+   return true;
 }
 
 void
@@ -1504,20 +1505,20 @@
    FREE(blitter);
 }
 
-boolean
+bool
 nvc0_blitctx_create(struct nvc0_context *nvc0)
 {
    nvc0->blit = CALLOC_STRUCT(nvc0_blitctx);
    if (!nvc0->blit) {
       NOUVEAU_ERR("failed to allocate blit context\n");
-      return FALSE;
+      return false;
    }
 
    nvc0->blit->nvc0 = nvc0;
 
    nvc0->blit->rast.pipe.half_pixel_center = 1;
 
-   return TRUE;
+   return true;
 }
 
 void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c	2015-09-16 14:36:09.000000000 +0000
@@ -34,8 +34,8 @@
    (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK |   \
     NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
 
-static INLINE uint32_t
-nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int)
+static inline uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int)
 {
    switch (swz) {
    case PIPE_SWIZZLE_RED:
@@ -82,7 +82,7 @@
    uint32_t depth;
    struct nv50_tic_entry *view;
    struct nv50_miptree *mt;
-   boolean tex_int;
+   bool tex_int;
 
    view = MALLOC_STRUCT(nv50_tic_entry);
    if (!view)
@@ -195,7 +195,7 @@
    default:
       NOUVEAU_ERR("unexpected/invalid texture target: %d\n",
                   mt->base.base.target);
-      return FALSE;
+      return false;
    }
 
    tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
@@ -226,7 +226,7 @@
    return &view->pipe;
 }
 
-static boolean
+static bool
 nvc0_validate_tic(struct nvc0_context *nvc0, int s)
 {
    uint32_t commands[32];
@@ -234,12 +234,12 @@
    struct nouveau_bo *txc = nvc0->screen->txc;
    unsigned i;
    unsigned n = 0;
-   boolean need_flush = FALSE;
+   bool need_flush = false;
 
    for (i = 0; i < nvc0->num_textures[s]; ++i) {
       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
       struct nv04_resource *res;
-      const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+      const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
 
       if (!tic) {
          if (dirty)
@@ -263,7 +263,7 @@
          BEGIN_NIC0(push, NVC0_M2MF(DATA), 8);
          PUSH_DATAp(push, &tic->tic[0], 8);
 
-         need_flush = TRUE;
+         need_flush = true;
       } else
       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
          BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
@@ -295,18 +295,18 @@
    return need_flush;
 }
 
-static boolean
+static bool
 nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
 {
    struct nouveau_bo *txc = nvc0->screen->txc;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    unsigned i;
-   boolean need_flush = FALSE;
+   bool need_flush = false;
 
    for (i = 0; i < nvc0->num_textures[s]; ++i) {
       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
       struct nv04_resource *res;
-      const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+      const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
 
       if (!tic) {
          nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
@@ -328,7 +328,7 @@
          PUSH_DATA (push, 0x1001);
          PUSH_DATAp(push, &tic->tic[0], 8);
 
-         need_flush = TRUE;
+         need_flush = true;
       } else
       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
          BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
@@ -356,16 +356,14 @@
 
 void nvc0_validate_textures(struct nvc0_context *nvc0)
 {
-   boolean need_flush;
+   bool need_flush = false;
+   int i;
 
-   if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
-      need_flush  = nve4_validate_tic(nvc0, 0);
-      need_flush |= nve4_validate_tic(nvc0, 3);
-      need_flush |= nve4_validate_tic(nvc0, 4);
-   } else {
-      need_flush  = nvc0_validate_tic(nvc0, 0);
-      need_flush |= nvc0_validate_tic(nvc0, 3);
-      need_flush |= nvc0_validate_tic(nvc0, 4);
+   for (i = 0; i < 5; i++) {
+      if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
+         need_flush |= nve4_validate_tic(nvc0, i);
+      else
+         need_flush |= nvc0_validate_tic(nvc0, i);
    }
 
    if (need_flush) {
@@ -374,14 +372,14 @@
    }
 }
 
-static boolean
+static bool
 nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
 {
    uint32_t commands[16];
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    unsigned i;
    unsigned n = 0;
-   boolean need_flush = FALSE;
+   bool need_flush = false;
 
    for (i = 0; i < nvc0->num_samplers[s]; ++i) {
       struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
@@ -396,9 +394,9 @@
          tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
 
          nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
-                               65536 + tsc->id * 32, NOUVEAU_BO_VRAM,
+                               65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
                                32, tsc->tsc);
-         need_flush = TRUE;
+         need_flush = true;
       }
       nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
 
@@ -418,13 +416,13 @@
    return need_flush;
 }
 
-boolean
+bool
 nve4_validate_tsc(struct nvc0_context *nvc0, int s)
 {
    struct nouveau_bo *txc = nvc0->screen->txc;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    unsigned i;
-   boolean need_flush = FALSE;
+   bool need_flush = false;
 
    for (i = 0; i < nvc0->num_samplers[s]; ++i) {
       struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
@@ -447,7 +445,7 @@
          PUSH_DATA (push, 0x1001);
          PUSH_DATAp(push, &tsc->tsc[0], 8);
 
-         need_flush = TRUE;
+         need_flush = true;
       }
       nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
 
@@ -466,16 +464,14 @@
 
 void nvc0_validate_samplers(struct nvc0_context *nvc0)
 {
-   boolean need_flush;
+   bool need_flush = false;
+   int i;
 
-   if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
-      need_flush  = nve4_validate_tsc(nvc0, 0);
-      need_flush |= nve4_validate_tsc(nvc0, 3);
-      need_flush |= nve4_validate_tsc(nvc0, 4);
-   } else {
-      need_flush  = nvc0_validate_tsc(nvc0, 0);
-      need_flush |= nvc0_validate_tsc(nvc0, 3);
-      need_flush |= nvc0_validate_tsc(nvc0, 4);
+   for (i = 0; i < 5; i++) {
+      if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
+         need_flush |= nve4_validate_tsc(nvc0, i);
+      else
+         need_flush |= nvc0_validate_tsc(nvc0, i);
    }
 
    if (need_flush) {
@@ -645,13 +641,13 @@
    }
 }
 
-static INLINE void
+static inline void
 nvc0_update_surface_bindings(struct nvc0_context *nvc0)
 {
    /* TODO */
 }
 
-static INLINE void
+static inline void
 nve4_update_surface_bindings(struct nvc0_context *nvc0)
 {
    /* TODO */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c	2015-09-16 14:36:09.000000000 +0000
@@ -329,17 +329,17 @@
 }
 
 
-static INLINE boolean
+static inline bool
 nvc0_mt_transfer_can_map_directly(struct nv50_miptree *mt)
 {
    if (mt->base.domain == NOUVEAU_BO_VRAM)
-      return FALSE;
+      return false;
    if (mt->base.base.usage != PIPE_USAGE_STAGING)
-      return FALSE;
+      return false;
    return !nouveau_bo_memtype(mt->base.bo);
 }
 
-static INLINE boolean
+static inline bool
 nvc0_mt_sync(struct nvc0_context *nvc0, struct nv50_miptree *mt, unsigned usage)
 {
    if (!mt->base.mm) {
@@ -506,12 +506,49 @@
 }
 
 /* This happens rather often with DTD9/st. */
-void
+static void
 nvc0_cb_push(struct nouveau_context *nv,
-             struct nouveau_bo *bo, unsigned domain,
-             unsigned base, unsigned size,
+             struct nv04_resource *res,
              unsigned offset, unsigned words, const uint32_t *data)
 {
+   struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
+   struct nvc0_constbuf *cb = NULL;
+   int s;
+
+   /* Go through all the constbuf binding points of this buffer and try to
+    * find one which contains the region to be updated.
+    */
+   for (s = 0; s < 6 && !cb; s++) {
+      uint16_t bindings = res->cb_bindings[s];
+      while (bindings) {
+         int i = ffs(bindings) - 1;
+         uint32_t cb_offset = nvc0->constbuf[s][i].offset;
+
+         bindings &= ~(1 << i);
+         if (cb_offset <= offset &&
+             cb_offset + nvc0->constbuf[s][i].size >= offset + words * 4) {
+            cb = &nvc0->constbuf[s][i];
+            break;
+         }
+      }
+   }
+
+   if (cb) {
+      nvc0_cb_bo_push(nv, res->bo, res->domain,
+                      res->offset + cb->offset, cb->size,
+                      offset - cb->offset, words, data);
+   } else {
+      nv->push_data(nv, res->bo, res->offset + offset, res->domain,
+                    words * 4, data);
+   }
+}
+
+void
+nvc0_cb_bo_push(struct nouveau_context *nv,
+                struct nouveau_bo *bo, unsigned domain,
+                unsigned base, unsigned size,
+                unsigned offset, unsigned words, const uint32_t *data)
+{
    struct nouveau_pushbuf *push = nv->pushbuf;
 
    NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_count, 1);
@@ -520,6 +557,9 @@
    assert(!(offset & 3));
    size = align(size, 0x100);
 
+   assert(offset < size);
+   assert(offset + words * 4 <= size);
+
    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
    PUSH_DATA (push, size);
    PUSH_DATAh(push, bo->offset + base);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c	2015-09-16 14:36:09.000000000 +0000
@@ -61,8 +61,8 @@
     so->num_elements = num_elements;
     so->instance_elts = 0;
     so->instance_bufs = 0;
-    so->shared_slots = FALSE;
-    so->need_conversion = FALSE;
+    so->shared_slots = false;
+    so->need_conversion = false;
 
     memset(so->vb_access_size, 0, sizeof(so->vb_access_size));
 
@@ -93,7 +93,7 @@
                 return NULL;
             }
             so->element[i].state = nvc0_format_table[fmt].vtx;
-            so->need_conversion = TRUE;
+            so->need_conversion = true;
         }
         size = util_format_get_blocksize(fmt);
 
@@ -141,7 +141,7 @@
 
     if (so->instance_elts || src_offset_max >= (1 << 14))
        return so;
-    so->shared_slots = TRUE;
+    so->shared_slots = true;
 
     for (i = 0; i < num_elements; ++i) {
        const unsigned b = elements[i].vertex_buffer_index;
@@ -196,7 +196,7 @@
    push->cur += 5;
 }
 
-static INLINE void
+static inline void
 nvc0_user_vbuf_range(struct nvc0_context *nvc0, int vbi,
                      uint32_t *base, uint32_t *size)
 {
@@ -214,7 +214,7 @@
    }
 }
 
-static INLINE void
+static inline void
 nvc0_release_user_vbufs(struct nvc0_context *nvc0)
 {
    if (nvc0->vbo_user) {
@@ -265,7 +265,7 @@
       PUSH_DATAh(push, address[b] + ve->src_offset);
       PUSH_DATA (push, address[b] + ve->src_offset);
    }
-   nvc0->base.vbo_dirty = TRUE;
+   nvc0->base.vbo_dirty = true;
 }
 
 static void
@@ -419,7 +419,7 @@
    uint32_t const_vbos;
    unsigned i;
    uint8_t vbo_mode;
-   boolean update_vertex;
+   bool update_vertex;
 
    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
 
@@ -529,7 +529,7 @@
 #define NVC0_PRIM_GL_CASE(n) \
    case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
 
-static INLINE unsigned
+static inline unsigned
 nvc0_prim_gl(unsigned prim)
 {
    switch (prim) {
@@ -547,8 +547,7 @@
    NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
    NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
    NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
-   /*
-   NVC0_PRIM_GL_CASE(PATCHES); */
+   NVC0_PRIM_GL_CASE(PATCHES);
    default:
       return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
    }
@@ -559,7 +558,7 @@
 {
    struct nvc0_screen *screen = push->user_priv;
 
-   nouveau_fence_update(&screen->base, TRUE);
+   nouveau_fence_update(&screen->base, true);
 
    NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1);
 }
@@ -695,7 +694,7 @@
 }
 
 static void
-nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
+nvc0_draw_elements(struct nvc0_context *nvc0, bool shorten,
                    unsigned mode, unsigned start, unsigned count,
                    unsigned instance_count, int32_t index_bias)
 {
@@ -835,8 +834,8 @@
                         buf->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | size);
 }
 
-static INLINE void
-nvc0_update_prim_restart(struct nvc0_context *nvc0, boolean en, uint32_t index)
+static inline void
+nvc0_update_prim_restart(struct nvc0_context *nvc0, bool en, uint32_t index)
 {
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
 
@@ -889,6 +888,12 @@
       }
    }
 
+   if (info->mode == PIPE_PRIM_PATCHES &&
+       nvc0->state.patch_vertices != info->vertices_per_patch) {
+      nvc0->state.patch_vertices = info->vertices_per_patch;
+      IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), nvc0->state.patch_vertices);
+   }
+
    /* 8 as minimum to avoid immediate double validation of new buffers */
    nvc0_state_validate(nvc0, ~0, 8);
 
@@ -910,13 +915,13 @@
             continue;
 
          if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-            nvc0->cb_dirty = TRUE;
+            nvc0->cb_dirty = true;
       }
    }
 
    if (nvc0->cb_dirty) {
       IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
-      nvc0->cb_dirty = FALSE;
+      nvc0->cb_dirty = false;
    }
 
    if (nvc0->state.vbo_mode) {
@@ -940,19 +945,19 @@
       if (!nvc0->vtxbuf[i].buffer)
          continue;
       if (nvc0->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-         nvc0->base.vbo_dirty = TRUE;
+         nvc0->base.vbo_dirty = true;
    }
 
    if (!nvc0->base.vbo_dirty && nvc0->idxbuf.buffer &&
        nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-      nvc0->base.vbo_dirty = TRUE;
+      nvc0->base.vbo_dirty = true;
 
    nvc0_update_prim_restart(nvc0, info->primitive_restart, info->restart_index);
 
    if (nvc0->base.vbo_dirty) {
       if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
          IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
-      nvc0->base.vbo_dirty = FALSE;
+      nvc0->base.vbo_dirty = false;
    }
 
    if (unlikely(info->indirect)) {
@@ -962,10 +967,10 @@
       nvc0_draw_stream_output(nvc0, info);
    } else
    if (info->indexed) {
-      boolean shorten = info->max_index <= 65535;
+      bool shorten = info->max_index <= 65535;
 
       if (info->primitive_restart && info->restart_index > 65535)
-         shorten = FALSE;
+         shorten = false;
 
       nvc0_draw_elements(nvc0, shorten,
                          info->mode, info->start, info->count,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c	2015-09-16 14:36:09.000000000 +0000
@@ -21,12 +21,12 @@
    uint32_t restart_index;
    uint32_t instance_id;
 
-   boolean prim_restart;
-   boolean need_vertex_id;
+   bool prim_restart;
+   bool need_vertex_id;
 
    struct {
-      boolean enabled;
-      boolean value;
+      bool enabled;
+      bool value;
       unsigned stride;
       const uint8_t *data;
    } edgeflag;
@@ -47,7 +47,7 @@
    ctx->need_vertex_id =
       nvc0->vertprog->vp.need_vertex_id && (nvc0->vertex->num_elements < 32);
 
-   ctx->edgeflag.value = TRUE;
+   ctx->edgeflag.value = true;
    ctx->edgeflag.enabled = nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS;
 
    /* silence warnings */
@@ -55,7 +55,7 @@
    ctx->edgeflag.stride = 0;
 }
 
-static INLINE void
+static inline void
 nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias)
 {
    struct translate *translate = nvc0->vertex->translate;
@@ -78,7 +78,7 @@
    }
 }
 
-static INLINE void
+static inline void
 nvc0_push_map_idxbuf(struct push_context *ctx, struct nvc0_context *nvc0)
 {
    if (nvc0->idxbuf.buffer) {
@@ -90,7 +90,7 @@
    }
 }
 
-static INLINE void
+static inline void
 nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0,
                        int32_t index_bias)
 {
@@ -112,7 +112,7 @@
       ctx->edgeflag.data += (intptr_t)index_bias * vb->stride;
 }
 
-static INLINE unsigned
+static inline unsigned
 prim_restart_search_i08(const uint8_t *elts, unsigned push, uint8_t index)
 {
    unsigned i;
@@ -120,7 +120,7 @@
    return i;
 }
 
-static INLINE unsigned
+static inline unsigned
 prim_restart_search_i16(const uint16_t *elts, unsigned push, uint16_t index)
 {
    unsigned i;
@@ -128,7 +128,7 @@
    return i;
 }
 
-static INLINE unsigned
+static inline unsigned
 prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index)
 {
    unsigned i;
@@ -136,21 +136,21 @@
    return i;
 }
 
-static INLINE boolean
+static inline bool
 ef_value(const struct push_context *ctx, uint32_t index)
 {
    float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
-   return *pf ? TRUE : FALSE;
+   return *pf ? true : false;
 }
 
-static INLINE boolean
+static inline bool
 ef_toggle(struct push_context *ctx)
 {
    ctx->edgeflag.value = !ctx->edgeflag.value;
    return ctx->edgeflag.value;
 }
 
-static INLINE unsigned
+static inline unsigned
 ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
 {
    unsigned i;
@@ -158,7 +158,7 @@
    return i;
 }
 
-static INLINE unsigned
+static inline unsigned
 ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
 {
    unsigned i;
@@ -166,7 +166,7 @@
    return i;
 }
 
-static INLINE unsigned
+static inline unsigned
 ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
 {
    unsigned i;
@@ -174,7 +174,7 @@
    return i;
 }
 
-static INLINE unsigned
+static inline unsigned
 ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
 {
    unsigned i;
@@ -182,7 +182,7 @@
    return i;
 }
 
-static INLINE void *
+static inline void *
 nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
 {
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@@ -409,7 +409,7 @@
 #define NVC0_PRIM_GL_CASE(n) \
    case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
 
-static INLINE unsigned
+static inline unsigned
 nvc0_prim_gl(unsigned prim)
 {
    switch (prim) {
@@ -427,8 +427,7 @@
    NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
    NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
    NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
-   /*
-   NVC0_PRIM_GL_CASE(PATCHES); */
+   NVC0_PRIM_GL_CASE(PATCHES);
    default:
       return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
    }
@@ -483,7 +482,7 @@
          struct pipe_context *pipe = &nvc0->base.pipe;
          struct nvc0_so_target *targ;
          targ = nvc0_so_target(info->count_from_stream_output);
-         pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
+         pipe->get_query_result(pipe, targ->pq, true, (void *)&vert_count);
          vert_count /= targ->stride;
       }
       ctx.idxbuf = NULL; /* shut up warnings */
@@ -560,7 +559,7 @@
    NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1);
 }
 
-static INLINE void
+static inline void
 copy_indices_u8(uint32_t *dst, const uint8_t *elts, uint32_t bias, unsigned n)
 {
    unsigned i;
@@ -568,7 +567,7 @@
       dst[i] = elts[i] + bias;
 }
 
-static INLINE void
+static inline void
 copy_indices_u16(uint32_t *dst, const uint16_t *elts, uint32_t bias, unsigned n)
 {
    unsigned i;
@@ -576,7 +575,7 @@
       dst[i] = elts[i] + bias;
 }
 
-static INLINE void
+static inline void
 copy_indices_u32(uint32_t *dst, const uint32_t *elts, uint32_t bias, unsigned n)
 {
    unsigned i;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h	2015-09-16 14:36:09.000000000 +0000
@@ -15,14 +15,14 @@
 #endif
 
 
-static INLINE void
+static inline void
 nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin,
                             unsigned flags, struct nouveau_bo *bo)
 {
    nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL;
 }
 
-static INLINE void
+static inline void
 nvc0_add_resident(struct nouveau_bufctx *bufctx, int bin,
                   struct nv04_resource *res, unsigned flags)
 {
@@ -38,7 +38,7 @@
 #define BCTX_REFN(bctx, bin, res, acc) \
    nvc0_add_resident(bctx, NVC0_BIND_##bin, res, NOUVEAU_BO_##acc)
 
-static INLINE void
+static inline void
 PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
 {
    struct nouveau_pushbuf_refn ref = { bo, flags };
@@ -69,46 +69,46 @@
 
 #define NVC0_3D_SERIALIZE NV50_GRAPH_SERIALIZE
 
-static INLINE uint32_t
+static inline uint32_t
 NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size)
 {
    return 0x20000000 | (size << 16) | (subc << 13) | (mthd >> 2);
 }
 
-static INLINE uint32_t
+static inline uint32_t
 NVC0_FIFO_PKHDR_NI(int subc, int mthd, unsigned size)
 {
    return 0x60000000 | (size << 16) | (subc << 13) | (mthd >> 2);
 }
 
-static INLINE uint32_t
+static inline uint32_t
 NVC0_FIFO_PKHDR_IL(int subc, int mthd, uint16_t data)
 {
    assert(data < 0x2000);
    return 0x80000000 | (data << 16) | (subc << 13) | (mthd >> 2);
 }
 
-static INLINE uint32_t
+static inline uint32_t
 NVC0_FIFO_PKHDR_1I(int subc, int mthd, unsigned size)
 {
    return 0xa0000000 | (size << 16) | (subc << 13) | (mthd >> 2);
 }
 
 
-static INLINE uint8_t
+static inline uint8_t
 nouveau_bo_memtype(const struct nouveau_bo *bo)
 {
    return bo->config.nvc0.memtype;
 }
 
 
-static INLINE void
+static inline void
 PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data)
 {
    *push->cur++ = (uint32_t)(data >> 32);
 }
 
-static INLINE void
+static inline void
 BEGIN_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
 {
 #ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
@@ -117,7 +117,7 @@
    PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(subc, mthd, size));
 }
 
-static INLINE void
+static inline void
 BEGIN_NIC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
 {
 #ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
@@ -126,7 +126,7 @@
    PUSH_DATA (push, NVC0_FIFO_PKHDR_NI(subc, mthd, size));
 }
 
-static INLINE void
+static inline void
 BEGIN_1IC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
 {
 #ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
@@ -135,7 +135,7 @@
    PUSH_DATA (push, NVC0_FIFO_PKHDR_1I(subc, mthd, size));
 }
 
-static INLINE void
+static inline void
 IMMED_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, uint16_t data)
 {
 #ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nve4_compute.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nve4_compute.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nve4_compute.c	2015-09-16 14:36:09.000000000 +0000
@@ -63,7 +63,7 @@
       return ret;
    }
 
-   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, NVE4_CP_PARAM_SIZE, NULL,
+   ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, NVE4_CP_PARAM_SIZE, NULL,
                         &screen->parm);
    if (ret)
       return ret;
@@ -250,7 +250,7 @@
 static void
 nve4_compute_validate_samplers(struct nvc0_context *nvc0)
 {
-   boolean need_flush = nve4_validate_tsc(nvc0, 5);
+   bool need_flush = nve4_validate_tsc(nvc0, 5);
    if (need_flush) {
       BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1);
       PUSH_DATA (nvc0->base.pushbuf, 0);
@@ -299,11 +299,11 @@
 }
 
 
-static boolean
+static bool
 nve4_compute_state_validate(struct nvc0_context *nvc0)
 {
    if (!nvc0_compute_validate_program(nvc0))
-      return FALSE;
+      return false;
    if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
       nve4_compute_validate_textures(nvc0);
    if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
@@ -316,15 +316,15 @@
       nvc0_validate_global_residents(nvc0,
                                      nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
 
-   nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
+   nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
 
    nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
    if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
-      return FALSE;
+      return false;
    if (unlikely(nvc0->state.flushed))
-      nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
+      nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
 
-   return TRUE;
+   return true;
 }
 
 
@@ -364,7 +364,7 @@
    PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
 }
 
-static INLINE uint8_t
+static inline uint8_t
 nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
 {
    if (shared_size > (32 << 10))
@@ -413,7 +413,7 @@
    nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
 }
 
-static INLINE struct nve4_cp_launch_desc *
+static inline struct nve4_cp_launch_desc *
 nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
                                struct nouveau_bo **pbo, uint64_t *pgpuaddr)
 {
@@ -505,7 +505,7 @@
    for (i = 0; i < nvc0->num_textures[s]; ++i) {
       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
       struct nv04_resource *res;
-      const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+      const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
 
       if (!tic) {
          nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
@@ -575,18 +575,18 @@
 {
    const uint32_t *data = (const uint32_t *)desc;
    unsigned i;
-   boolean zero = FALSE;
+   bool zero = false;
 
    debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
 
    for (i = 0; i < sizeof(*desc); i += 4) {
       if (data[i / 4]) {
          debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
-         zero = FALSE;
+         zero = false;
       } else
       if (!zero) {
          debug_printf("...\n");
-         zero = TRUE;
+         zero = true;
       }
    }
 
@@ -606,7 +606,7 @@
    for (i = 0; i < 8; ++i) {
       uint64_t address;
       uint32_t size = desc->cb[i].size;
-      boolean valid = !!(desc->cb_mask & (1 << i));
+      bool valid = !!(desc->cb_mask & (1 << i));
 
       address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nve4_compute.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/nouveau/nvc0/nve4_compute.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/nouveau/nvc0/nve4_compute.h	2015-09-16 14:36:09.000000000 +0000
@@ -68,7 +68,7 @@
    u32 unk48[16];
 };
 
-static INLINE void
+static inline void
 nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
 {
    memset(desc, 0, sizeof(*desc));
@@ -78,7 +78,7 @@
    desc->unk47_20 = 0x300;
 }
 
-static INLINE void
+static inline void
 nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
                            unsigned index,
                            struct nouveau_bo *bo,
@@ -96,7 +96,7 @@
    desc->cb_mask |= 1 << index;
 }
 
-static INLINE void
+static inline void
 nve4_cp_launch_desc_set_ctx_cb(struct nve4_cp_launch_desc *desc,
                                unsigned index,
                                const struct nvc0_constbuf *cb)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c	2015-09-16 14:36:09.000000000 +0000
@@ -693,7 +693,8 @@
 	};
 
 	/* Allocate the main ra data structure */
-	s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW);
+	s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW,
+                                   true);
 
 	/* Create the register classes */
 	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/Makefile.am	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_blit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_blit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_blit.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_blit.c	2015-09-16 14:36:09.000000000 +0000
@@ -382,7 +382,7 @@
             r300_get_num_cs_end_dwords(r300);
 
         /* Reserve CS space. */
-        if (dwords > (RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw)) {
+        if (dwords > (r300->cs->max_dw - r300->cs->cdw)) {
             r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL);
         }
 
@@ -667,7 +667,8 @@
     r300_blitter_begin(r300, R300_COPY);
     util_blitter_blit_generic(r300->blitter, dst_view, &dstbox,
                               src_view, src_box, src_width0, src_height0,
-                              PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL);
+                              PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
+                              FALSE);
     r300_blitter_end(r300);
 
     pipe_surface_reference(&dst_view, NULL);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_context.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -94,6 +94,8 @@
 
     if (r300->cs)
         r300->rws->cs_destroy(r300->cs);
+    if (r300->ctx)
+        r300->rws->ctx_destroy(r300->ctx);
 
     rc_destroy_regalloc_state(&r300->fs_regalloc_state);
 
@@ -382,7 +384,11 @@
                      sizeof(struct pipe_transfer), 64,
                      UTIL_SLAB_SINGLETHREADED);
 
-    r300->cs = rws->cs_create(rws, RING_GFX, r300_flush_callback, r300, NULL);
+    r300->ctx = rws->ctx_create(rws);
+    if (!r300->ctx)
+        goto fail;
+
+    r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300, NULL);
     if (r300->cs == NULL)
         goto fail;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_context.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -449,6 +449,8 @@
 
     /* The interface to the windowing system, etc. */
     struct radeon_winsys *rws;
+    /* The submission context. */
+    struct radeon_winsys_ctx *ctx;
     /* The command stream. */
     struct radeon_winsys_cs *cs;
     /* Screen. */
@@ -647,32 +649,32 @@
     for (atom = r300->first_dirty; atom != r300->last_dirty; atom++)
 
 /* Convenience cast wrappers. */
-static INLINE struct r300_query* r300_query(struct pipe_query* q)
+static inline struct r300_query* r300_query(struct pipe_query* q)
 {
     return (struct r300_query*)q;
 }
 
-static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf)
+static inline struct r300_surface* r300_surface(struct pipe_surface* surf)
 {
     return (struct r300_surface*)surf;
 }
 
-static INLINE struct r300_resource* r300_resource(struct pipe_resource* tex)
+static inline struct r300_resource* r300_resource(struct pipe_resource* tex)
 {
     return (struct r300_resource*)tex;
 }
 
-static INLINE struct r300_context* r300_context(struct pipe_context* context)
+static inline struct r300_context* r300_context(struct pipe_context* context)
 {
     return (struct r300_context*)context;
 }
 
-static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300)
+static inline struct r300_fragment_shader *r300_fs(struct r300_context *r300)
 {
     return (struct r300_fragment_shader*)r300->fs.state;
 }
 
-static INLINE void r300_mark_atom_dirty(struct r300_context *r300,
+static inline void r300_mark_atom_dirty(struct r300_context *r300,
                                         struct r300_atom *atom)
 {
     atom->dirty = TRUE;
@@ -688,7 +690,7 @@
     }
 }
 
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
 r300_get_nonnull_cb(struct pipe_framebuffer_state *fb, unsigned i)
 {
     if (fb->cbufs[i])
@@ -777,12 +779,12 @@
 void r500_dump_rs_block(struct r300_rs_block *rs);
 
 
-static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags)
+static inline boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags)
 {
     return SCREEN_DBG_ON(ctx->screen, flags);
 }
 
-static INLINE void CTX_DBG(struct r300_context * ctx, unsigned flags,
+static inline void CTX_DBG(struct r300_context * ctx, unsigned flags,
                        const char * fmt, ...)
 {
     if (CTX_DBG_ON(ctx, flags)) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_cs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_cs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_cs.h	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_cs.h	2015-09-16 14:36:09.000000000 +0000
@@ -46,7 +46,7 @@
 #ifdef DEBUG
 
 #define BEGIN_CS(size) do { \
-    assert(size <= (RADEON_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \
+    assert(size <= (cs_copy->max_dw - cs_copy->cdw)); \
     cs_count = size; \
 } while (0)
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_fs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_fs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_fs.h	2012-05-02 13:56:27.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_fs.h	2015-09-16 14:36:09.000000000 +0000
@@ -77,14 +77,14 @@
 /* Return TRUE if the shader was switched and should be re-emitted. */
 boolean r300_pick_fragment_shader(struct r300_context* r300);
 
-static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs)
+static inline boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs)
 {
     if (!fs)
         return FALSE;
     return (fs->shader->code.writes_depth) ? TRUE : FALSE;
 }
 
-static INLINE boolean r300_fragment_shader_writes_all(struct r300_fragment_shader *fs)
+static inline boolean r300_fragment_shader_writes_all(struct r300_fragment_shader *fs)
 {
     if (!fs)
         return FALSE;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_query.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_query.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_query.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_query.c	2015-09-16 14:36:09.000000000 +0000
@@ -146,10 +146,11 @@
 
     if (q->type == PIPE_QUERY_GPU_FINISHED) {
         if (wait) {
-            r300->rws->buffer_wait(q->buf, RADEON_USAGE_READWRITE);
+            r300->rws->buffer_wait(q->buf, PIPE_TIMEOUT_INFINITE,
+                                   RADEON_USAGE_READWRITE);
             vresult->b = TRUE;
         } else {
-            vresult->b = !r300->rws->buffer_is_busy(q->buf, RADEON_USAGE_READWRITE);
+            vresult->b = r300->rws->buffer_wait(q->buf, 0, RADEON_USAGE_READWRITE);
         }
         return vresult->b;
     }
@@ -168,8 +169,6 @@
         map++;
     }
 
-    r300->rws->buffer_unmap(q->cs_buf);
-
     if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
         vresult->b = temp != 0;
     } else {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_render.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_render.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_render.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_render.c	2015-09-16 14:36:09.000000000 +0000
@@ -215,7 +215,7 @@
     cs_dwords += r300_get_num_cs_end_dwords(r300);
 
     /* Reserve requested CS space. */
-    if (cs_dwords > (RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw)) {
+    if (cs_dwords > (r300->cs->max_dw - r300->cs->cdw)) {
         r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL);
         flushed = TRUE;
     }
@@ -871,7 +871,7 @@
     uint8_t *vbo_ptr;
 };
 
-static INLINE struct r300_render*
+static inline struct r300_render*
 r300_render(struct vbuf_render* render)
 {
     return (struct r300_render*)render;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_screen_buffer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_screen_buffer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_screen_buffer.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_screen_buffer.c	2015-09-16 14:36:09.000000000 +0000
@@ -96,7 +96,7 @@
 
         /* Check if mapping this buffer would cause waiting for the GPU. */
         if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, RADEON_USAGE_READWRITE) ||
-            r300->rws->buffer_is_busy(rbuf->buf, RADEON_USAGE_READWRITE)) {
+            !r300->rws->buffer_wait(rbuf->buf, 0, RADEON_USAGE_READWRITE)) {
             unsigned i;
             struct pb_buffer *new_buf;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_screen_buffer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_screen_buffer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_screen_buffer.h	2012-08-30 05:23:50.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_screen_buffer.h	2015-09-16 14:36:09.000000000 +0000
@@ -46,7 +46,7 @@
 
 /* Inline functions. */
 
-static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer)
+static inline struct r300_buffer *r300_buffer(struct pipe_resource *buffer)
 {
     return (struct r300_buffer *)buffer;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -191,6 +191,10 @@
         case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
         case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
         case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+        case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+        case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+        case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+        case PIPE_CAP_DEPTH_BOUNDS_TEST:
             return 0;
 
         /* SWTCL-only features. */
@@ -274,6 +278,7 @@
         case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
             return (is_r500 ? 256 : 32) * sizeof(float[4]);
         case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+        case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
             return 1;
         case PIPE_SHADER_CAP_MAX_TEMPS:
             return is_r500 ? 128 : is_r400 ? 64 : 32;
@@ -333,6 +338,7 @@
         case PIPE_SHADER_CAP_MAX_PREDS:
             return 0; /* unused */
         case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+        case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
             return 1;
         case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
         case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
@@ -425,7 +431,7 @@
  * Whether the format matches:
  *   PIPE_FORMAT_?10?10?10?2_UNORM
  */
-static INLINE boolean
+static inline boolean
 util_format_is_rgba1010102_variant(const struct util_format_description *desc)
 {
    static const unsigned size[4] = {10, 10, 10, 2};
@@ -658,14 +664,6 @@
     rws->fence_reference(ptr, fence);
 }
 
-static boolean r300_fence_signalled(struct pipe_screen *screen,
-                                    struct pipe_fence_handle *fence)
-{
-    struct radeon_winsys *rws = r300_screen(screen)->rws;
-
-    return rws->fence_wait(rws, fence, 0);
-}
-
 static boolean r300_fence_finish(struct pipe_screen *screen,
                                  struct pipe_fence_handle *fence,
                                  uint64_t timeout)
@@ -710,7 +708,6 @@
     r300screen->screen.is_video_format_supported = vl_video_buffer_is_format_supported;
     r300screen->screen.context_create = r300_create_context;
     r300screen->screen.fence_reference = r300_fence_reference;
-    r300screen->screen.fence_signalled = r300_fence_signalled;
     r300screen->screen.fence_finish = r300_fence_finish;
 
     r300_init_screen_resource_functions(r300screen);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_screen.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -51,11 +51,11 @@
 
 
 /* Convenience cast wrappers. */
-static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) {
+static inline struct r300_screen* r300_screen(struct pipe_screen* screen) {
     return (struct r300_screen*)screen;
 }
 
-static INLINE struct radeon_winsys *
+static inline struct radeon_winsys *
 radeon_winsys(struct pipe_screen *screen) {
     return r300_screen(screen)->rws;
 }
@@ -102,12 +102,12 @@
 #define DBG_P_STAT      (1 << 25)
 /*@}*/
 
-static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
+static inline boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
 {
     return (screen->debug & flags) ? TRUE : FALSE;
 }
 
-static INLINE void SCREEN_DBG(struct r300_screen * screen, unsigned flags,
+static inline void SCREEN_DBG(struct r300_screen * screen, unsigned flags,
                               const char * fmt, ...)
 {
     if (SCREEN_DBG_ON(screen, flags)) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_shader_semantics.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_shader_semantics.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_shader_semantics.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_shader_semantics.h	2015-09-16 14:36:09.000000000 +0000
@@ -46,7 +46,7 @@
     int num_generic;
 };
 
-static INLINE void r300_shader_semantics_reset(
+static inline void r300_shader_semantics_reset(
     struct r300_shader_semantics* info)
 {
     int i;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_state.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -844,7 +844,7 @@
         tex->tex.macrotile[level]) {
         r300->rws->buffer_set_tiling(tex->buf, r300->cs,
                 tex->tex.microtile, tex->tex.macrotile[level],
-                0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0,
                 tex->tex.stride_in_bytes[0], false);
 
         tex->surface_level = level;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_state_inlines.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_state_inlines.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_state_inlines.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_state_inlines.h	2015-09-16 14:36:09.000000000 +0000
@@ -32,13 +32,13 @@
 
 /* Some maths. These should probably find their way to u_math, if needed. */
 
-static INLINE int pack_float_16_6x(float f) {
+static inline int pack_float_16_6x(float f) {
     return ((int)(f * 6.0) & 0xffff);
 }
 
 /* Blend state. */
 
-static INLINE uint32_t r300_translate_blend_function(int blend_func,
+static inline uint32_t r300_translate_blend_function(int blend_func,
                                                      boolean clamp)
 {
     switch (blend_func) {
@@ -60,7 +60,7 @@
     return 0;
 }
 
-static INLINE uint32_t r300_translate_blend_factor(int blend_fact)
+static inline uint32_t r300_translate_blend_factor(int blend_fact)
 {
     switch (blend_fact) {
         case PIPE_BLENDFACTOR_ONE:
@@ -113,7 +113,7 @@
 
 /* DSA state. */
 
-static INLINE uint32_t r300_translate_depth_stencil_function(int zs_func)
+static inline uint32_t r300_translate_depth_stencil_function(int zs_func)
 {
     switch (zs_func) {
         case PIPE_FUNC_NEVER:
@@ -141,7 +141,7 @@
     return 0;
 }
 
-static INLINE uint32_t r300_translate_stencil_op(int s_op)
+static inline uint32_t r300_translate_stencil_op(int s_op)
 {
     switch (s_op) {
         case PIPE_STENCIL_OP_KEEP:
@@ -168,7 +168,7 @@
     return 0;
 }
 
-static INLINE uint32_t r300_translate_alpha_function(int alpha_func)
+static inline uint32_t r300_translate_alpha_function(int alpha_func)
 {
     switch (alpha_func) {
         case PIPE_FUNC_NEVER:
@@ -195,7 +195,7 @@
     return 0;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 r300_translate_polygon_mode_front(unsigned mode) {
     switch (mode)
     {
@@ -213,7 +213,7 @@
     }
 }
 
-static INLINE uint32_t
+static inline uint32_t
 r300_translate_polygon_mode_back(unsigned mode) {
     switch (mode)
     {
@@ -233,7 +233,7 @@
 
 /* Texture sampler state. */
 
-static INLINE uint32_t r300_translate_wrap(int wrap)
+static inline uint32_t r300_translate_wrap(int wrap)
 {
     switch (wrap) {
         case PIPE_TEX_WRAP_REPEAT:
@@ -259,7 +259,7 @@
     }
 }
 
-static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip,
+static inline uint32_t r300_translate_tex_filters(int min, int mag, int mip,
                                                   boolean is_anisotropic)
 {
     uint32_t retval = 0;
@@ -308,7 +308,7 @@
     return retval;
 }
 
-static INLINE uint32_t r300_anisotropy(unsigned max_aniso)
+static inline uint32_t r300_anisotropy(unsigned max_aniso)
 {
     if (max_aniso >= 16) {
         return R300_TX_MAX_ANISO_16_TO_1;
@@ -323,7 +323,7 @@
     }
 }
 
-static INLINE uint32_t r500_anisotropy(unsigned max_aniso)
+static inline uint32_t r500_anisotropy(unsigned max_aniso)
 {
     if (!max_aniso) {
         return 0;
@@ -336,7 +336,7 @@
 }
 
 /* Translate pipe_formats into PSC vertex types. */
-static INLINE uint16_t
+static inline uint16_t
 r300_translate_vertex_data_type(enum pipe_format format) {
     uint32_t result = 0;
     const struct util_format_description *desc;
@@ -410,7 +410,7 @@
     return result;
 }
 
-static INLINE uint16_t
+static inline uint16_t
 r300_translate_vertex_data_swizzle(enum pipe_format format) {
     const struct util_format_description *desc;
     unsigned i, swizzle = 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_texture.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_texture.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_texture.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_texture.c	2015-09-16 14:36:09.000000000 +0000
@@ -1063,7 +1063,7 @@
 
     rws->buffer_set_tiling(tex->buf, NULL,
             tex->tex.microtile, tex->tex.macrotile[0],
-            0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0,
             tex->tex.stride_in_bytes[0], false);
 
     return tex;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_tgsi_to_rc.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_tgsi_to_rc.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_tgsi_to_rc.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_tgsi_to_rc.c	2015-09-16 14:36:09.000000000 +0000
@@ -133,13 +133,7 @@
 
 static unsigned translate_saturate(unsigned saturate)
 {
-    switch(saturate) {
-        default:
-            fprintf(stderr, "Unknown saturate mode: %i\n", saturate);
-            /* fall-through */
-        case TGSI_SAT_NONE: return RC_SATURATE_NONE;
-        case TGSI_SAT_ZERO_ONE: return RC_SATURATE_ZERO_ONE;
-    }
+    return saturate ? RC_SATURATE_ZERO_ONE : RC_SATURATE_NONE;
 }
 
 static unsigned translate_register_file(unsigned file)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_transfer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_transfer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r300/r300_transfer.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r300/r300_transfer.c	2015-09-16 14:36:09.000000000 +0000
@@ -41,7 +41,7 @@
 };
 
 /* Convenience cast wrapper. */
-static INLINE struct r300_transfer*
+static inline struct r300_transfer*
 r300_transfer(struct pipe_transfer* transfer)
 {
     return (struct r300_transfer*)transfer;
@@ -120,7 +120,7 @@
         referenced_hw = TRUE;
     } else {
         referenced_hw =
-            r300->rws->buffer_is_busy(tex->buf, RADEON_USAGE_READWRITE);
+            !r300->rws->buffer_wait(tex->buf, 0, RADEON_USAGE_READWRITE);
     }
 
     trans = CALLOC_STRUCT(r300_transfer);
@@ -251,16 +251,12 @@
     struct r300_resource *tex = r300_resource(transfer->resource);
 
     if (trans->linear_texture) {
-        rws->buffer_unmap(trans->linear_texture->cs_buf);
-
         if (transfer->usage & PIPE_TRANSFER_WRITE) {
             r300_copy_into_tiled_texture(ctx, trans);
         }
 
         pipe_resource_reference(
             (struct pipe_resource**)&trans->linear_texture, NULL);
-    } else {
-        rws->buffer_unmap(tex->cs_buf);
     }
     FREE(transfer);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/Android.mk	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/Android.mk	2015-09-16 14:36:09.000000000 +0000
@@ -33,6 +33,10 @@
 LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon
 LOCAL_MODULE := libmesa_pipe_r600
 
+ifeq ($(MESA_LOLLIPOP_BUILD),true)
+LOCAL_C_INCLUDES := external/libcxx/include
+else
 include external/stlport/libstlport.mk
+endif
 include $(GALLIUM_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/eg_asm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/eg_asm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/eg_asm.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/eg_asm.c	2015-09-16 14:36:09.000000000 +0000
@@ -160,6 +160,9 @@
 	alu.op = ALU_OP1_MOVA_INT;
 	alu.src[0].sel = bc->index_reg[id];
 	alu.src[0].chan = 0;
+	if (bc->chip_class == CAYMAN)
+		alu.dst.sel = id == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
+
 	alu.last = 1;
 	r = r600_bytecode_add_alu(bc, &alu);
 	if (r)
@@ -167,12 +170,14 @@
 
 	bc->ar_loaded = 0; /* clobbered */
 
-	memset(&alu, 0, sizeof(alu));
-	alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
-	alu.last = 1;
-	r = r600_bytecode_add_alu(bc, &alu);
-	if (r)
-		return r;
+	if (bc->chip_class == EVERGREEN) {
+		memset(&alu, 0, sizeof(alu));
+		alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
+		alu.last = 1;
+		r = r600_bytecode_add_alu(bc, &alu);
+		if (r)
+			return r;
+	}
 
 	/* Must split ALU group as index only applies to following group */
 	if (inside_alu_clause) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/eg_sq.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/eg_sq.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/eg_sq.h	2012-08-30 05:23:50.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/eg_sq.h	2015-09-16 14:36:09.000000000 +0000
@@ -521,4 +521,11 @@
 
 #define V_SQ_REL_ABSOLUTE 0
 #define V_SQ_REL_RELATIVE 1
+
+/* CAYMAN has special encoding for MOVA_INT destination */
+#define CM_V_SQ_MOVA_DST_AR_X 0
+#define CM_V_SQ_MOVA_DST_CF_PC 1
+#define CM_V_SQ_MOVA_DST_CF_IDX0 2
+#define CM_V_SQ_MOVA_DST_CF_IDX1 3
+
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/evergreen_compute.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/evergreen_compute.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/evergreen_compute.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/evergreen_compute.c	2015-09-16 14:36:09.000000000 +0000
@@ -163,7 +163,7 @@
 	rctx->b.flags |= R600_CONTEXT_INV_VERTEX_CACHE;
 	state->enabled_mask |= 1 << vb_index;
 	state->dirty_mask |= 1 << vb_index;
-	state->atom.dirty = true;
+	r600_mark_atom_dirty(rctx, &state->atom);
 }
 
 static void evergreen_cs_set_constant_buffer(
@@ -226,7 +226,7 @@
 	}
 #else
 	memset(&shader->binary, 0, sizeof(shader->binary));
-	radeon_elf_read(code, header->num_bytes, &shader->binary, true);
+	radeon_elf_read(code, header->num_bytes, &shader->binary);
 	r600_create_shader(&shader->bc, &shader->binary, &use_kill);
 
 	shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
@@ -487,6 +487,12 @@
 	/* Emit constant buffer state */
 	r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom);
 
+	/* Emit sampler state */
+	r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].states.atom);
+
+	/* Emit sampler view (texture resource) state */
+	r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].views.atom);
+
 	/* Emit compute shader state */
 	r600_emit_atom(ctx, &ctx->cs_shader_state.atom);
 
@@ -655,25 +661,6 @@
 	}
 }
 
-void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
-		unsigned start_slot, unsigned count,
-		struct pipe_sampler_view **views)
-{
-	struct r600_pipe_sampler_view **resource =
-		(struct r600_pipe_sampler_view **)views;
-
-	for (unsigned i = 0; i < count; i++)	{
-		if (resource[i]) {
-			assert(i+1 < 12);
-			/* XXX: Implement */
-			assert(!"Compute samplers not implemented.");
-			///FETCH0 = VTX0 (param buffer),
-			//FETCH1 = VTX1 (global buffer pool), FETCH2... = TEX
-		}
-	}
-}
-
-
 static void evergreen_set_global_binding(
 	struct pipe_context *ctx_, unsigned first, unsigned n,
 	struct pipe_resource **resources,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/evergreend.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/evergreend.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/evergreend.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/evergreend.h	2015-09-16 14:36:09.000000000 +0000
@@ -1253,6 +1253,11 @@
 #define R_00A430_TD_GS_SAMPLER0_BORDER_GREEN         0x00A430
 #define R_00A434_TD_GS_SAMPLER0_BORDER_BLUE          0x00A434
 #define R_00A438_TD_GS_SAMPLER0_BORDER_ALPHA         0x00A438
+#define R_00A464_TD_CS_SAMPLER0_BORDER_INDEX         0x00A464
+#define R_00A468_TD_CS_SAMPLER0_BORDER_RED           0x00A468
+#define R_00A46C_TD_CS_SAMPLER0_BORDER_GREEN         0x00A46C
+#define R_00A470_TD_CS_SAMPLER0_BORDER_BLUE          0x00A470
+#define R_00A474_TD_CS_SAMPLER0_BORDER_ALPHA         0x00A474
 
 #define R_03C000_SQ_TEX_SAMPLER_WORD0_0              0x03C000
 #define   S_03C000_CLAMP_X(x)                          (((x) & 0x7) << 0)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/evergreen_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/evergreen_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/evergreen_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/evergreen_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -32,7 +32,7 @@
 #include "evergreen_compute.h"
 #include "util/u_math.h"
 
-static INLINE unsigned evergreen_array_mode(unsigned mode)
+static inline unsigned evergreen_array_mode(unsigned mode)
 {
 	switch (mode) {
 	case RADEON_SURF_MODE_LINEAR_ALIGNED:	return V_028C70_ARRAY_LINEAR_ALIGNED;
@@ -896,7 +896,7 @@
 
 	for (i = start_slot; i < start_slot + num_scissors; i++) {
 		rctx->scissor[i].scissor = state[i - start_slot];
-		rctx->scissor[i].atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
 	}
 }
 
@@ -1028,7 +1028,10 @@
 	macro_aspect = rtex->surface.mtilea;
 	bankw = rtex->surface.bankw;
 	bankh = rtex->surface.bankh;
-	fmask_bankh = rtex->fmask.bank_height;
+	if (rtex->fmask.size)
+		fmask_bankh = rtex->fmask.bank_height;
+	else
+		fmask_bankh = rtex->surface.bankh;
 	tile_split = eg_tile_split(tile_split);
 	macro_aspect = eg_macro_tile_aspect(macro_aspect);
 	bankw = eg_bank_wh(bankw);
@@ -1149,10 +1152,11 @@
 	surf->cb_color_attrib = color_attrib;
 	if (rtex->fmask.size) {
 		surf->cb_color_fmask = (base_offset + rtex->fmask.offset) >> 8;
+		surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
 	} else {
 		surf->cb_color_fmask = surf->cb_color_base;
+		surf->cb_color_fmask_slice = S_028C88_TILE_MAX(slice);
 	}
-	surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
 
 	surf->color_initialized = true;
 }
@@ -1342,11 +1346,11 @@
 
 		if (rctx->alphatest_state.bypass != alphatest_bypass) {
 			rctx->alphatest_state.bypass = alphatest_bypass;
-			rctx->alphatest_state.atom.dirty = true;
+			r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
 		}
 		if (rctx->alphatest_state.cb0_export_16bpc != export_16bpc) {
 			rctx->alphatest_state.cb0_export_16bpc = export_16bpc;
-			rctx->alphatest_state.atom.dirty = true;
+			r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
 		}
 	}
 
@@ -1362,28 +1366,28 @@
 
 		if (state->zsbuf->format != rctx->poly_offset_state.zs_format) {
 			rctx->poly_offset_state.zs_format = state->zsbuf->format;
-			rctx->poly_offset_state.atom.dirty = true;
+			r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
 		}
 
 		if (rctx->db_state.rsurf != surf) {
 			rctx->db_state.rsurf = surf;
-			rctx->db_state.atom.dirty = true;
-			rctx->db_misc_state.atom.dirty = true;
+			r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+			r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 		}
 	} else if (rctx->db_state.rsurf) {
 		rctx->db_state.rsurf = NULL;
-		rctx->db_state.atom.dirty = true;
-		rctx->db_misc_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+		r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 	}
 
 	if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
 		rctx->cb_misc_state.nr_cbufs = state->nr_cbufs;
-		rctx->cb_misc_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
 	}
 
 	if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) {
 		rctx->alphatest_state.bypass = false;
-		rctx->alphatest_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
 	}
 
 	log_samples = util_logbase2(rctx->framebuffer.nr_samples);
@@ -1392,7 +1396,7 @@
 	     rctx->b.family == CHIP_RV770) &&
 	    rctx->db_misc_state.log_samples != log_samples) {
 		rctx->db_misc_state.log_samples = log_samples;
-		rctx->db_misc_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 	}
 
 
@@ -1420,7 +1424,7 @@
 		rctx->framebuffer.atom.num_dw += 4;
 	}
 
-	rctx->framebuffer.atom.dirty = true;
+	r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
 
 	r600_set_sample_locations_constant_buffer(rctx);
 }
@@ -1434,7 +1438,7 @@
 
 	rctx->ps_iter_samples = min_samples;
 	if (rctx->framebuffer.nr_samples > 1) {
-		rctx->framebuffer.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
 	}
 }
 
@@ -1849,7 +1853,7 @@
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
 		radeon_emit(cs, (resource_offset + buffer_index) * 8);
 		radeon_emit(cs, va); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 				 S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
 				 S_030008_STRIDE(vb->stride) |
@@ -1919,7 +1923,7 @@
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
 		radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
 		radeon_emit(cs, va); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 			    S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
 			    S_030008_STRIDE(gs_ring_buffer ? 4 : 16) |
@@ -1980,7 +1984,7 @@
 
 static void evergreen_emit_sampler_views(struct r600_context *rctx,
 					 struct r600_samplerview_state *state,
-					 unsigned resource_id_base)
+					 unsigned resource_id_base, unsigned pkt_flags)
 {
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
@@ -1993,7 +1997,7 @@
 		rview = state->views[resource_index];
 		assert(rview);
 
-		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
+		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
 		radeon_emit(cs, (resource_id_base + resource_index) * 8);
 		radeon_emit_array(cs, rview->tex_resource_words, 8);
 
@@ -2002,11 +2006,11 @@
 					      rview->tex_resource->b.b.nr_samples > 1 ?
 						      RADEON_PRIO_SHADER_TEXTURE_MSAA :
 						      RADEON_PRIO_SHADER_TEXTURE_RO);
-		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
 		radeon_emit(cs, reloc);
 
 		if (!rview->skip_mip_address_reloc) {
-			radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+			radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
 			radeon_emit(cs, reloc);
 		}
 	}
@@ -2015,23 +2019,33 @@
 
 static void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
 {
-	evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, 176 + R600_MAX_CONST_BUFFERS);
+	evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views,
+	                             176 + R600_MAX_CONST_BUFFERS, 0);
 }
 
 static void evergreen_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
 {
-	evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, 336 + R600_MAX_CONST_BUFFERS);
+	evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views,
+	                             336 + R600_MAX_CONST_BUFFERS, 0);
 }
 
 static void evergreen_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
 {
-	evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, R600_MAX_CONST_BUFFERS);
+	evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views,
+	                             R600_MAX_CONST_BUFFERS, 0);
+}
+
+static void evergreen_emit_cs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
+{
+	evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views,
+	                             816 + 2, RADEON_CP_PACKET3_COMPUTE_MODE);
 }
 
 static void evergreen_emit_sampler_states(struct r600_context *rctx,
 				struct r600_textures_info *texinfo,
 				unsigned resource_id_base,
-				unsigned border_index_reg)
+				unsigned border_index_reg,
+				unsigned pkt_flags)
 {
 	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
 	uint32_t dirty_mask = texinfo->states.dirty_mask;
@@ -2043,7 +2057,7 @@
 		rstate = texinfo->states.states[i];
 		assert(rstate);
 
-		radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0));
+		radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0) | pkt_flags);
 		radeon_emit(cs, (resource_id_base + i) * 3);
 		radeon_emit_array(cs, rstate->tex_sampler_words, 3);
 
@@ -2058,17 +2072,27 @@
 
 static void evergreen_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
 {
-	evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX);
+	evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18,
+	                              R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0);
 }
 
 static void evergreen_emit_gs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
 {
-	evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36, R_00A428_TD_GS_SAMPLER0_BORDER_INDEX);
+	evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36,
+	                              R_00A428_TD_GS_SAMPLER0_BORDER_INDEX, 0);
 }
 
 static void evergreen_emit_ps_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
 {
-	evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX);
+	evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0,
+	                              R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, 0);
+}
+
+static void evergreen_emit_cs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
+{
+	evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE], 90,
+	                              R_00A464_TD_CS_SAMPLER0_BORDER_INDEX,
+	                              RADEON_CP_PACKET3_COMPUTE_MODE);
 }
 
 static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
@@ -3176,7 +3200,7 @@
 
 	if (db_shader_control != rctx->db_misc_state.db_shader_control) {
 		rctx->db_misc_state.db_shader_control = db_shader_control;
-		rctx->db_misc_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 	}
 }
 
@@ -3431,12 +3455,14 @@
 	r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].states.atom, id++, evergreen_emit_vs_sampler_states, 0);
 	r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].states.atom, id++, evergreen_emit_gs_sampler_states, 0);
 	r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].states.atom, id++, evergreen_emit_ps_sampler_states, 0);
+	r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].states.atom, id++, evergreen_emit_cs_sampler_states, 0);
 	/* resources */
 	r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, evergreen_fs_emit_vertex_buffers, 0);
 	r600_init_atom(rctx, &rctx->cs_vertex_buffer_state.atom, id++, evergreen_cs_emit_vertex_buffers, 0);
 	r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views.atom, id++, evergreen_emit_vs_sampler_views, 0);
 	r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views.atom, id++, evergreen_emit_gs_sampler_views, 0);
 	r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views.atom, id++, evergreen_emit_ps_sampler_views, 0);
+	r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views.atom, id++, evergreen_emit_cs_sampler_views, 0);
 
 	r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 10);
 
@@ -3466,8 +3492,8 @@
 	}
 	r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
 	r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
-	rctx->atoms[id++] = &rctx->b.streamout.begin_atom;
-	rctx->atoms[id++] = &rctx->b.streamout.enable_atom;
+	r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
+	r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++);
 	r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
 	r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
 	r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/Makefile.am	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_asm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_asm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_asm.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_asm.c	2015-09-16 14:36:09.000000000 +0000
@@ -2029,6 +2029,8 @@
 					fprintf(stderr, "CND:%X ", cf->cond);
 				if (cf->pop_count)
 					fprintf(stderr, "POP:%X ", cf->pop_count);
+				if (cf->end_of_program)
+					fprintf(stderr, "EOP ");
 				fprintf(stderr, "\n");
 			}
 		}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_blit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_blit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_blit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_blit.c	2015-09-16 14:36:09.000000000 +0000
@@ -145,7 +145,7 @@
 	rctx->db_misc_state.copy_depth = util_format_has_depth(desc);
 	rctx->db_misc_state.copy_stencil = util_format_has_stencil(desc);
 	rctx->db_misc_state.copy_sample = first_sample;
-	rctx->db_misc_state.atom.dirty = true;
+	r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 
 	for (level = first_level; level <= last_level; level++) {
 		if (!staging && !(texture->dirty_level_mask & (1 << level)))
@@ -162,7 +162,7 @@
 
 				if (sample != rctx->db_misc_state.copy_sample) {
 					rctx->db_misc_state.copy_sample = sample;
-					rctx->db_misc_state.atom.dirty = true;
+					r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 				}
 
 				surf_tmpl.format = texture->resource.b.b.format;
@@ -197,7 +197,7 @@
 
 	/* reenable compression in DB_RENDER_CONTROL */
 	rctx->db_misc_state.flush_depthstencil_through_cb = false;
-	rctx->db_misc_state.atom.dirty = true;
+	r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 }
 
 static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
@@ -210,7 +210,7 @@
 
 	/* Enable decompression in DB_RENDER_CONTROL */
 	rctx->db_misc_state.flush_depthstencil_in_place = true;
-	rctx->db_misc_state.atom.dirty = true;
+	r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 
 	surf_tmpl.format = texture->resource.b.b.format;
 
@@ -248,7 +248,7 @@
 
 	/* Disable decompression in DB_RENDER_CONTROL */
 	rctx->db_misc_state.flush_depthstencil_in_place = false;
-	rctx->db_misc_state.atom.dirty = true;
+	r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 }
 
 void r600_decompress_depth_textures(struct r600_context *rctx,
@@ -393,14 +393,11 @@
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct pipe_framebuffer_state *fb = &rctx->framebuffer.state;
 
-	/* Single-sample fast color clear is broken on r600g:
-	 *   https://bugs.freedesktop.org/show_bug.cgi?id=73528
-	 *   https://bugs.freedesktop.org/show_bug.cgi?id=82186
-	 */
-	if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN &&
-	    rctx->framebuffer.nr_samples > 1) {
+	if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) {
 		evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom,
 					      &buffers, color);
+		if (!buffers)
+			return; /* all buffers have been fast cleared */
 	}
 
 	if (buffers & PIPE_CLEAR_COLOR) {
@@ -440,10 +437,10 @@
                    fb->zsbuf->u.tex.last_layer == util_max_layer(&rtex->resource.b.b, level)) {
 			if (rtex->depth_clear_value != depth) {
 				rtex->depth_clear_value = depth;
-				rctx->db_state.atom.dirty = true;
+				r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
 			}
 			rctx->db_misc_state.htile_clear = true;
-			rctx->db_misc_state.atom.dirty = true;
+			r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 		}
 	}
 
@@ -456,7 +453,7 @@
 	/* disable fast clear */
 	if (rctx->db_misc_state.htile_clear) {
 		rctx->db_misc_state.htile_clear = false;
-		rctx->db_misc_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 	}
 }
 
@@ -735,7 +732,8 @@
 	r600_blitter_begin(ctx, R600_COPY_TEXTURE);
 	util_blitter_blit_generic(rctx->blitter, dst_view, &dstbox,
 				  src_view, src_box, src_width0, src_height0,
-				  PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL);
+				  PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
+				  FALSE);
 	r600_blitter_end(ctx);
 
 	pipe_surface_reference(&dst_view, NULL);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_formats.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_formats.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_formats.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_formats.h	2015-09-16 14:36:09.000000000 +0000
@@ -64,7 +64,7 @@
 #define     ENDIAN_8IN32                    2
 #define     ENDIAN_8IN64                    3
 
-static INLINE unsigned r600_endian_swap(unsigned size)
+static inline unsigned r600_endian_swap(unsigned size)
 {
 	if (R600_BIG_ENDIAN) {
 		switch (size) {
@@ -82,7 +82,7 @@
 	}
 }
 
-static INLINE bool r600_is_vertex_format_supported(enum pipe_format format)
+static inline bool r600_is_vertex_format_supported(enum pipe_format format)
 {
 	const struct util_format_description *desc = util_format_description(format);
 	unsigned i;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_hw_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_hw_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_hw_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_hw_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -51,13 +51,13 @@
 		unsigned i;
 
 		/* The number of dwords all the dirty states would take. */
-		for (i = 0; i < R600_NUM_ATOMS; i++) {
-			if (ctx->atoms[i] && ctx->atoms[i]->dirty) {
-				num_dw += ctx->atoms[i]->num_dw;
-				if (ctx->screen->b.trace_bo) {
-					num_dw += R600_TRACE_CS_DWORDS;
-				}
+		i = r600_next_dirty_atom(ctx, 0);
+		while (i < R600_NUM_ATOMS) {
+			num_dw += ctx->atoms[i]->num_dw;
+			if (ctx->screen->b.trace_bo) {
+				num_dw += R600_TRACE_CS_DWORDS;
 			}
+			i = r600_next_dirty_atom(ctx, i + 1);
 		}
 
 		/* The upper-bound of how much space a draw command would take. */
@@ -68,7 +68,8 @@
 	}
 
 	/* Count in queries_suspend. */
-	num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend;
+	num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend +
+		  ctx->b.num_cs_dw_timer_queries_suspend;
 
 	/* Count in streamout_end at the end of CS. */
 	if (ctx->b.streamout.begin_emitted) {
@@ -92,7 +93,7 @@
 	num_dw += 10;
 
 	/* Flush if there's not enough space. */
-	if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
+	if (num_dw > ctx->b.rings.gfx.cs->max_dw) {
 		ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 	}
 }
@@ -295,43 +296,45 @@
 	r600_emit_command_buffer(ctx->b.rings.gfx.cs, &ctx->start_cs_cmd);
 
 	/* Re-emit states. */
-	ctx->alphatest_state.atom.dirty = true;
-	ctx->blend_color.atom.dirty = true;
-	ctx->cb_misc_state.atom.dirty = true;
-	ctx->clip_misc_state.atom.dirty = true;
-	ctx->clip_state.atom.dirty = true;
-	ctx->db_misc_state.atom.dirty = true;
-	ctx->db_state.atom.dirty = true;
-	ctx->framebuffer.atom.dirty = true;
-	ctx->pixel_shader.atom.dirty = true;
-	ctx->poly_offset_state.atom.dirty = true;
-	ctx->vgt_state.atom.dirty = true;
-	ctx->sample_mask.atom.dirty = true;
+	r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom);
+	r600_mark_atom_dirty(ctx, &ctx->blend_color.atom);
+	r600_mark_atom_dirty(ctx, &ctx->cb_misc_state.atom);
+	r600_mark_atom_dirty(ctx, &ctx->clip_misc_state.atom);
+	r600_mark_atom_dirty(ctx, &ctx->clip_state.atom);
+	r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom);
+	r600_mark_atom_dirty(ctx, &ctx->db_state.atom);
+	r600_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+	r600_mark_atom_dirty(ctx, &ctx->pixel_shader.atom);
+	r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom);
+	r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom);
+	r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
 	for (i = 0; i < R600_MAX_VIEWPORTS; i++) {
-		ctx->scissor[i].atom.dirty = true;
-		ctx->viewport[i].atom.dirty = true;
+		r600_mark_atom_dirty(ctx, &ctx->scissor[i].atom);
+		r600_mark_atom_dirty(ctx, &ctx->viewport[i].atom);
 	}
-	ctx->config_state.atom.dirty = true;
-	ctx->stencil_ref.atom.dirty = true;
-	ctx->vertex_fetch_shader.atom.dirty = true;
-	ctx->export_shader.atom.dirty = true;
-	ctx->shader_stages.atom.dirty = true;
+	if (ctx->b.chip_class < EVERGREEN) {
+		r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
+	}
+	r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
+	r600_mark_atom_dirty(ctx, &ctx->vertex_fetch_shader.atom);
+	r600_mark_atom_dirty(ctx, &ctx->export_shader.atom);
+	r600_mark_atom_dirty(ctx, &ctx->shader_stages.atom);
 	if (ctx->gs_shader) {
-		ctx->geometry_shader.atom.dirty = true;
-		ctx->gs_rings.atom.dirty = true;
+		r600_mark_atom_dirty(ctx, &ctx->geometry_shader.atom);
+		r600_mark_atom_dirty(ctx, &ctx->gs_rings.atom);
 	}
-	ctx->vertex_shader.atom.dirty = true;
-	ctx->b.streamout.enable_atom.dirty = true;
+	r600_mark_atom_dirty(ctx, &ctx->vertex_shader.atom);
+	r600_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
 
 	if (ctx->blend_state.cso)
-		ctx->blend_state.atom.dirty = true;
+		r600_mark_atom_dirty(ctx, &ctx->blend_state.atom);
 	if (ctx->dsa_state.cso)
-		ctx->dsa_state.atom.dirty = true;
+		r600_mark_atom_dirty(ctx, &ctx->dsa_state.atom);
 	if (ctx->rasterizer_state.cso)
-		ctx->rasterizer_state.atom.dirty = true;
+		r600_mark_atom_dirty(ctx, &ctx->rasterizer_state.atom);
 
 	if (ctx->b.chip_class <= R700) {
-		ctx->seamless_cube_map.atom.dirty = true;
+		r600_mark_atom_dirty(ctx, &ctx->seamless_cube_map.atom);
 	}
 
 	ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_isa.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_isa.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_isa.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_isa.h	2015-09-16 14:36:09.000000000 +0000
@@ -262,7 +262,7 @@
 		{"PRED_SETNE_PUSH_INT",       2, { 0x4D, 0x4D },{  AF_VS, AF_VS, AF_VS, AF_VS},  AF_PRED_PUSH | AF_CC_NE | AF_INT_CMP },
 		{"PRED_SETLT_PUSH_INT",       2, { 0x4E, 0x4E },{  AF_VS, AF_VS, AF_VS, AF_VS},  AF_PRED_PUSH | AF_CC_LT | AF_INT_CMP },
 		{"PRED_SETLE_PUSH_INT",       2, { 0x4F, 0x4F },{  AF_VS, AF_VS, AF_VS, AF_VS},  AF_PRED_PUSH | AF_CC_LE | AF_INT_CMP },
-		{"FLT_TO_INT",                1, { 0x6B, 0x50 },{   AF_S,  AF_S, AF_VS, AF_VS},  AF_INT_DST | AF_CVT },
+		{"FLT_TO_INT",                1, { 0x6B, 0x50 },{   AF_S,  AF_S,  AF_V,  AF_V},  AF_INT_DST | AF_CVT },
 		{"BFREV_INT",                 1, {   -1, 0x51 },{      0,     0, AF_VS, AF_VS},  AF_INT_DST },
 		{"ADDC_UINT",                 2, {   -1, 0x52 },{      0,     0, AF_VS, AF_VS},  AF_UINT_DST },
 		{"SUBB_UINT",                 2, {   -1, 0x53 },{      0,     0, AF_VS, AF_VS},  AF_UINT_DST },
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_llvm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_llvm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_llvm.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_llvm.c	2015-09-16 14:36:09.000000000 +0000
@@ -84,7 +84,7 @@
 #else
 	LLVMValueRef reg = lp_build_const_int32(
 			ctx->soa.bld_base.base.gallivm, chan);
-	ctx->system_values[index] = build_intrinsic(
+	ctx->system_values[index] = lp_build_intrinsic(
 			ctx->soa.bld_base.base.gallivm->builder,
 			"llvm.R600.load.input",
 			ctx->soa.bld_base.base.elem_type, &reg, 1,
@@ -111,9 +111,9 @@
 			Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex,
 				lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2) + 1), "");
 			LLVMValueRef HalfVec[2] = {
-				build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy",
+				lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy",
 					VecType, Args, ArgCount, LLVMReadNoneAttribute),
-				build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw",
+				lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw",
 					VecType, Args, ArgCount, LLVMReadNoneAttribute)
 			};
 			LLVMValueRef MaskInputs[4] = {
@@ -127,7 +127,7 @@
 				Mask, "");
 		} else {
 			VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 4);
-			return build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const",
+			return lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const",
 				VecType, Args, ArgCount, LLVMReadNoneAttribute);
 		}
 }
@@ -153,7 +153,7 @@
 		arg_count = 1;
 	}
 
-	return build_intrinsic(bb->gallivm->builder, intrinsic,
+	return lp_build_intrinsic(bb->gallivm->builder, intrinsic,
 		bb->elem_type, &arg[0], arg_count, LLVMReadNoneAttribute);
 }
 #endif
@@ -332,7 +332,7 @@
 			args[2] = lp_build_const_int32(base->gallivm, so->output[i].output_buffer);
 			args[3] = lp_build_const_int32(base->gallivm, ((1 << num_components) - 1) << start_component);
 			lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.stream.output",
-				LLVMVoidTypeInContext(base->gallivm->context), args, 4);
+				LLVMVoidTypeInContext(base->gallivm->context), args, 4, 0);
 		}
 	}
 
@@ -356,7 +356,7 @@
 				args[0] = output;
 				args[1] = lp_build_const_int32(base->gallivm, next_pos++);
 				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
-				build_intrinsic(
+				lp_build_intrinsic(
 					base->gallivm->builder,
 					"llvm.R600.store.swizzle",
 					LLVMVoidTypeInContext(base->gallivm->context),
@@ -373,7 +373,7 @@
 						LLVMValueRef base_vector = llvm_load_const_buffer(bld_base, offset, CONSTANT_BUFFER_1_ADDR_SPACE);
 						args[0] = output;
 						args[1] = base_vector;
-						adjusted_elements[chan] = build_intrinsic(base->gallivm->builder,
+						adjusted_elements[chan] = lp_build_intrinsic(base->gallivm->builder,
 							"llvm.AMDGPU.dp4", bld_base->base.elem_type,
 							args, 2, LLVMReadNoneAttribute);
 					}
@@ -381,7 +381,7 @@
 						adjusted_elements, 4);
 					args[1] = lp_build_const_int32(base->gallivm, next_pos++);
 					args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
-					build_intrinsic(
+					lp_build_intrinsic(
 						base->gallivm->builder,
 						"llvm.R600.store.swizzle",
 						LLVMVoidTypeInContext(base->gallivm->context),
@@ -394,14 +394,14 @@
 				args[0] = output;
 				args[1] = lp_build_const_int32(base->gallivm, next_pos++);
 				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
-				build_intrinsic(
+				lp_build_intrinsic(
 					base->gallivm->builder,
 					"llvm.R600.store.swizzle",
 					LLVMVoidTypeInContext(base->gallivm->context),
 					args, 3, 0);
 				args[1] = lp_build_const_int32(base->gallivm, next_param++);
 				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
-				build_intrinsic(
+				lp_build_intrinsic(
 					base->gallivm->builder,
 					"llvm.R600.store.swizzle",
 					LLVMVoidTypeInContext(base->gallivm->context),
@@ -418,7 +418,7 @@
 				args[0] = lp_build_gather_values(base->gallivm, elements, 4);
 				args[1] = lp_build_const_int32(base->gallivm, next_param++);
 				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
-				build_intrinsic(
+				lp_build_intrinsic(
 					base->gallivm->builder,
 					"llvm.R600.store.swizzle",
 					LLVMVoidTypeInContext(base->gallivm->context),
@@ -430,7 +430,7 @@
 				args[0] = output;
 				args[1] = lp_build_const_int32(base->gallivm, next_param++);
 				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
-				build_intrinsic(
+				lp_build_intrinsic(
 					base->gallivm->builder,
 					"llvm.R600.store.swizzle",
 					LLVMVoidTypeInContext(base->gallivm->context),
@@ -449,7 +449,7 @@
 						for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
 							args[1] = lp_build_const_int32(base->gallivm, j);
 							args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
-							build_intrinsic(
+							lp_build_intrinsic(
 								base->gallivm->builder,
 								"llvm.R600.store.swizzle",
 								LLVMVoidTypeInContext(base->gallivm->context),
@@ -458,7 +458,7 @@
 					} else {
 						args[1] = lp_build_const_int32(base->gallivm, color_count++);
 						args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
-						build_intrinsic(
+						lp_build_intrinsic(
 							base->gallivm->builder,
 							"llvm.R600.store.swizzle",
 							LLVMVoidTypeInContext(base->gallivm->context),
@@ -543,7 +543,7 @@
 		case TGSI_OPCODE_TXF: {
 			args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), "");
 			args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS);
-			emit_data->output[0] = build_intrinsic(gallivm->builder,
+			emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
 							"llvm.R600.load.texbuf",
 							emit_data->dst_type, args, 2, LLVMReadNoneAttribute);
 			if (ctx->chip_class >= EVERGREEN)
@@ -658,7 +658,7 @@
 				lp_build_const_int32(gallivm, 1),
 				lp_build_const_int32(gallivm, 1)
 			};
-			LLVMValueRef ptr = build_intrinsic(gallivm->builder,
+			LLVMValueRef ptr = lp_build_intrinsic(gallivm->builder,
 				"llvm.R600.ldptr",
 				emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute);
 			LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0],
@@ -679,7 +679,7 @@
 		}
 	}
 
-	emit_data->output[0] = build_intrinsic(gallivm->builder,
+	emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
 					action->intr_name,
 					emit_data->dst_type, args, c, LLVMReadNoneAttribute);
 
@@ -754,7 +754,131 @@
 	.intr_name = "llvm.AMDGPU.dp4"
 };
 
+static void txd_fetch_args(
+	struct lp_build_tgsi_context * bld_base,
+	struct lp_build_emit_data * emit_data)
+{
+	const struct tgsi_full_instruction * inst = emit_data->inst;
+
+	LLVMValueRef coords[4];
+	unsigned chan, src;
+	for (src = 0; src < 3; src++) {
+		for (chan = 0; chan < 4; chan++)
+			coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
+
+		emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
+				coords, 4);
+	}
+	emit_data->arg_count = 3;
+	emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+}
+
+
+static void txp_fetch_args(
+	struct lp_build_tgsi_context * bld_base,
+	struct lp_build_emit_data * emit_data)
+{
+	const struct tgsi_full_instruction * inst = emit_data->inst;
+	LLVMValueRef src_w;
+	unsigned chan;
+	LLVMValueRef coords[5];
+
+	emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+	src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+
+	for (chan = 0; chan < 3; chan++ ) {
+		LLVMValueRef arg = lp_build_emit_fetch(bld_base,
+						emit_data->inst, 0, chan);
+		coords[chan] = lp_build_emit_llvm_binary(bld_base,
+					TGSI_OPCODE_DIV, arg, src_w);
+	}
+	coords[3] = bld_base->base.one;
+
+	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+	     inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
+	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
+	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
+	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
+		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
+	}
+
+	emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+						coords, 4);
+	emit_data->arg_count = 1;
+}
+
+static void tex_fetch_args(
+	struct lp_build_tgsi_context * bld_base,
+	struct lp_build_emit_data * emit_data)
+{
+	const struct tgsi_full_instruction * inst = emit_data->inst;
+
+	LLVMValueRef coords[5];
+	unsigned chan;
+	for (chan = 0; chan < 4; chan++) {
+		coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
+	}
+
+	if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
+		inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
+		inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
+		/* These instructions have additional operand that should be packed
+		 * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
+		 * That operand should be passed as a float value in the args array
+		 * right after the coord vector. After packing it's not used anymore,
+		 * that's why arg_count is not increased */
+		coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+	}
+
+	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+	     inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
+	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
+	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
+	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
+		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
+	}
 
+	emit_data->arg_count = 1;
+	emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+						coords, 4);
+	emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+}
+
+static void txf_fetch_args(
+	struct lp_build_tgsi_context * bld_base,
+	struct lp_build_emit_data * emit_data)
+{
+	const struct tgsi_full_instruction * inst = emit_data->inst;
+	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+	const struct tgsi_texture_offset * off = inst->TexOffsets;
+	LLVMTypeRef offset_type = bld_base->int_bld.elem_type;
+
+	/* fetch tex coords */
+	tex_fetch_args(bld_base, emit_data);
+
+	/* fetch tex offsets */
+	if (inst->Texture.NumOffsets) {
+		assert(inst->Texture.NumOffsets == 1);
+
+		emit_data->args[1] = LLVMConstBitCast(
+			bld->immediates[off->Index][off->SwizzleX],
+			offset_type);
+		emit_data->args[2] = LLVMConstBitCast(
+			bld->immediates[off->Index][off->SwizzleY],
+			offset_type);
+		emit_data->args[3] = LLVMConstBitCast(
+			bld->immediates[off->Index][off->SwizzleZ],
+			offset_type);
+	} else {
+		emit_data->args[1] = bld_base->int_bld.zero;
+		emit_data->args[2] = bld_base->int_bld.zero;
+		emit_data->args[3] = bld_base->int_bld.zero;
+	}
+
+	emit_data->arg_count = 4;
+}
 
 LLVMModuleRef r600_tgsi_llvm(
 	struct radeon_llvm_context * ctx,
@@ -783,7 +907,6 @@
 	bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = llvm_fetch_const;
 	bld_base->emit_prologue = llvm_emit_prologue;
 	bld_base->emit_epilogue = llvm_emit_epilogue;
-	ctx->userdata = ctx;
 	ctx->load_input = llvm_load_input;
 	ctx->load_system_value = llvm_load_system_value;
 
@@ -791,18 +914,42 @@
 	bld_base->op_actions[TGSI_OPCODE_DP3] = dot_action;
 	bld_base->op_actions[TGSI_OPCODE_DP4] = dot_action;
 	bld_base->op_actions[TGSI_OPCODE_DPH] = dot_action;
+	bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
+	bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
+	bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
 	bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex";
 	bld_base->op_actions[TGSI_OPCODE_TEX2].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
 	bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb";
 	bld_base->op_actions[TGSI_OPCODE_TXB2].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
 	bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
+	bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
 	bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl";
 	bld_base->op_actions[TGSI_OPCODE_TXL2].emit = llvm_emit_tex;
-	bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
-	bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
 	bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
+	bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
+	bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
 	bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt;
 
 	lp_build_tgsi_llvm(bld_base, tokens);
@@ -881,7 +1028,7 @@
 	const char * gpu_family = r600_get_llvm_processor_name(family);
 
 	memset(&binary, 0, sizeof(struct radeon_shader_binary));
-	r = radeon_llvm_compile(mod, &binary, gpu_family, dump, NULL);
+	r = radeon_llvm_compile(mod, &binary, gpu_family, dump, dump, NULL);
 
 	r = r600_create_shader(bc, &binary, use_kill);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_pipe.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_pipe.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_pipe.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_pipe.c	2015-09-16 14:36:09.000000000 +0000
@@ -120,6 +120,7 @@
 	rctx->b.b.screen = screen;
 	rctx->b.b.priv = priv;
 	rctx->b.b.destroy = r600_destroy_context;
+	rctx->b.set_atom_dirty = (void *)r600_set_atom_dirty;
 
 	if (!r600_common_context_init(&rctx->b, &rscreen->b))
 		goto fail;
@@ -176,7 +177,7 @@
 		goto fail;
 	}
 
-	rctx->b.rings.gfx.cs = ws->cs_create(ws, RING_GFX,
+	rctx->b.rings.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
 					     r600_context_gfx_flush, rctx,
 					     rscreen->b.trace_bo ?
 						     rscreen->b.trace_bo->cs_buf : NULL);
@@ -268,8 +269,14 @@
 	case PIPE_CAP_SAMPLE_SHADING:
 	case PIPE_CAP_CLIP_HALFZ:
 	case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+	case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
 		return 1;
 
+	case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+		return rscreen->b.info.drm_major == 2 && rscreen->b.info.drm_minor >= 43;
+
 	case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
 		return !R600_BIG_ENDIAN && rscreen->b.info.has_userptr;
 
@@ -329,10 +336,10 @@
 	case PIPE_CAP_VERTEX_COLOR_CLAMPED:
 	case PIPE_CAP_USER_VERTEX_BUFFERS:
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
-	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
 	case PIPE_CAP_SAMPLER_VIEW_TARGET:
 	case PIPE_CAP_VERTEXID_NOBASE:
-	case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+	case PIPE_CAP_DEPTH_BOUNDS_TEST:
 		return 0;
 
 	/* Stream output. */
@@ -476,6 +483,7 @@
 	case PIPE_SHADER_CAP_SUBROUTINES:
 		return 0;
 	case PIPE_SHADER_CAP_INTEGERS:
+	case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
 		return 1;
 	case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
 	case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_pipe.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_pipe.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_pipe.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_pipe.h	2015-09-16 14:36:09.000000000 +0000
@@ -36,7 +36,7 @@
 #include "util/list.h"
 #include "util/u_transfer.h"
 
-#define R600_NUM_ATOMS 73
+#define R600_NUM_ATOMS 75
 
 #define R600_MAX_VIEWPORTS 16
 
@@ -85,9 +85,12 @@
 #define R600_BIG_ENDIAN 0
 #endif
 
+#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8)
+#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS)
+
 struct r600_context;
 struct r600_bytecode;
-struct r600_shader_key;
+union  r600_shader_key;
 
 /* This is an atom containing GPU commands that never change.
  * This is supposed to be copied directly into the CS. */
@@ -426,6 +429,8 @@
 
 	/* State binding slots are here. */
 	struct r600_atom		*atoms[R600_NUM_ATOMS];
+	/* Dirty atom bitmask for fast tests */
+	unsigned long			dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN];
 	/* States for CS initialization. */
 	struct r600_command_buffer	start_cs_cmd; /* invariant state mostly */
 	/** Compute specific registers initializations.  The start_cs_cmd atom
@@ -490,37 +495,92 @@
 	struct r600_isa		*isa;
 };
 
-static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
+static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
 					    struct r600_command_buffer *cb)
 {
-	assert(cs->cdw + cb->num_dw <= RADEON_MAX_CMDBUF_DWORDS);
+	assert(cs->cdw + cb->num_dw <= cs->max_dw);
 	memcpy(cs->buf + cs->cdw, cb->buf, 4 * cb->num_dw);
 	cs->cdw += cb->num_dw;
 }
 
+static inline void r600_set_atom_dirty(struct r600_context *rctx,
+				       struct r600_atom *atom,
+				       bool dirty)
+{
+	unsigned long mask;
+	unsigned int w;
+
+	atom->dirty = dirty;
+
+	assert(atom->id != 0);
+	w = atom->id / R600_DIRTY_ATOM_WORD_BITS;
+	mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS);
+	if (dirty)
+		rctx->dirty_atoms[w] |= mask;
+	else
+		rctx->dirty_atoms[w] &= ~mask;
+}
+
+static inline void r600_mark_atom_dirty(struct r600_context *rctx,
+					struct r600_atom *atom)
+{
+	r600_set_atom_dirty(rctx, atom, true);
+}
+
+static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx,
+						unsigned int id)
+{
+#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ)
+	unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS;
+	unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS;
+	unsigned long bits, mask = (1ul << bit) - 1;
+
+	for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) {
+		bits = rctx->dirty_atoms[w] & ~mask;
+		if (bits == 0)
+			continue;
+		return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits);
+	}
+
+	return R600_NUM_ATOMS;
+#else
+	for (; id < R600_NUM_ATOMS; id++) {
+		bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] &
+			(1ul << (id % R600_DIRTY_ATOM_WORD_BITS)));
+		assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty));
+		if (dirty)
+			break;
+	}
+
+	return id;
+#endif
+}
+
 void r600_trace_emit(struct r600_context *rctx);
 
-static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
+static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
 {
 	atom->emit(&rctx->b, atom);
-	atom->dirty = false;
+	r600_set_atom_dirty(rctx, atom, false);
 	if (rctx->screen->b.trace_bo) {
 		r600_trace_emit(rctx);
 	}
 }
 
-static INLINE void r600_set_cso_state(struct r600_cso_state *state, void *cso)
+static inline void r600_set_cso_state(struct r600_context *rctx,
+				      struct r600_cso_state *state, void *cso)
 {
 	state->cso = cso;
-	state->atom.dirty = cso != NULL;
+	r600_set_atom_dirty(rctx, &state->atom, cso != NULL);
 }
 
-static INLINE void r600_set_cso_state_with_cb(struct r600_cso_state *state, void *cso,
+static inline void r600_set_cso_state_with_cb(struct r600_context *rctx,
+					      struct r600_cso_state *state, void *cso,
 					      struct r600_command_buffer *cb)
 {
 	state->cb = cb;
 	state->atom.num_dw = cb ? cb->num_dw : 0;
-	r600_set_cso_state(state, cso);
+	r600_set_cso_state(rctx, state, cso);
 }
 
 /* compute_memory_pool.c */
@@ -529,11 +589,6 @@
 struct compute_memory_pool* compute_memory_pool_new(
 	struct r600_screen *rscreen);
 
-/* evergreen_compute.c */
-void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
-                                   unsigned start_slot, unsigned count,
-                                   struct pipe_sampler_view **views);
-
 /* evergreen_state.c */
 struct pipe_sampler_view *
 evergreen_create_sampler_view_custom(struct pipe_context *ctx,
@@ -588,7 +643,7 @@
 /* r600_shader.c */
 int r600_pipe_shader_create(struct pipe_context *ctx,
 			    struct r600_pipe_shader *shader,
-			    struct r600_shader_key key);
+			    union r600_shader_key key);
 
 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
 
@@ -656,6 +711,7 @@
 void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom);
 void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom);
 void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a);
+void r600_add_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id);
 void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id,
 		    void (*emit)(struct r600_context *ctx, struct r600_atom *state),
 		    unsigned num_dw);
@@ -719,19 +775,19 @@
 /*Evergreen Compute packet3*/
 #define PKT3C(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate) | RADEON_CP_PACKET3_COMPUTE_MODE)
 
-static INLINE void r600_store_value(struct r600_command_buffer *cb, unsigned value)
+static inline void r600_store_value(struct r600_command_buffer *cb, unsigned value)
 {
 	cb->buf[cb->num_dw++] = value;
 }
 
-static INLINE void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr)
+static inline void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr)
 {
 	assert(cb->num_dw+num <= cb->max_num_dw);
 	memcpy(&cb->buf[cb->num_dw], ptr, num * sizeof(ptr[0]));
 	cb->num_dw += num;
 }
 
-static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
 {
 	assert(reg < R600_CONTEXT_REG_OFFSET);
 	assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -743,7 +799,7 @@
  * Needs cb->pkt_flags set to  RADEON_CP_PACKET3_COMPUTE_MODE for compute
  * shaders.
  */
-static INLINE void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
 {
 	assert(reg >= R600_CONTEXT_REG_OFFSET && reg < R600_CTL_CONST_OFFSET);
 	assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -755,7 +811,7 @@
  * Needs cb->pkt_flags set to  RADEON_CP_PACKET3_COMPUTE_MODE for compute
  * shaders.
  */
-static INLINE void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
 {
 	assert(reg >= R600_CTL_CONST_OFFSET);
 	assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -763,7 +819,7 @@
 	cb->buf[cb->num_dw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
 }
 
-static INLINE void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
 {
 	assert(reg >= R600_LOOP_CONST_OFFSET);
 	assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -775,7 +831,7 @@
  * Needs cb->pkt_flags set to  RADEON_CP_PACKET3_COMPUTE_MODE for compute
  * shaders.
  */
-static INLINE void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
 {
 	assert(reg >= EG_LOOP_CONST_OFFSET);
 	assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -783,31 +839,31 @@
 	cb->buf[cb->num_dw++] = (reg - EG_LOOP_CONST_OFFSET) >> 2;
 }
 
-static INLINE void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
 {
 	r600_store_config_reg_seq(cb, reg, 1);
 	r600_store_value(cb, value);
 }
 
-static INLINE void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
 {
 	r600_store_context_reg_seq(cb, reg, 1);
 	r600_store_value(cb, value);
 }
 
-static INLINE void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
 {
 	r600_store_ctl_const_seq(cb, reg, 1);
 	r600_store_value(cb, value);
 }
 
-static INLINE void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
 {
 	r600_store_loop_const_seq(cb, reg, 1);
 	r600_store_value(cb, value);
 }
 
-static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
 {
 	eg_store_loop_const_seq(cb, reg, 1);
 	r600_store_value(cb, value);
@@ -816,28 +872,28 @@
 void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw);
 void r600_release_command_buffer(struct r600_command_buffer *cb);
 
-static INLINE void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
 {
 	r600_write_context_reg_seq(cs, reg, num);
 	/* Set the compute bit on the packet header */
 	cs->buf[cs->cdw - 2] |= RADEON_CP_PACKET3_COMPUTE_MODE;
 }
 
-static INLINE void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
 {
 	assert(reg >= R600_CTL_CONST_OFFSET);
-	assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+	assert(cs->cdw+2+num <= cs->max_dw);
 	cs->buf[cs->cdw++] = PKT3(PKT3_SET_CTL_CONST, num, 0);
 	cs->buf[cs->cdw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
 }
 
-static INLINE void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
 	r600_write_compute_context_reg_seq(cs, reg, 1);
 	radeon_emit(cs, value);
 }
 
-static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
+static inline void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
 {
 	if (flag & RADEON_CP_PACKET3_COMPUTE_MODE) {
 		r600_write_compute_context_reg(cs, reg, value);
@@ -846,7 +902,7 @@
 	}
 }
 
-static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
 	r600_write_ctl_const_seq(cs, reg, 1);
 	radeon_emit(cs, value);
@@ -855,21 +911,21 @@
 /*
  * common helpers
  */
-static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
+static inline uint32_t S_FIXED(float value, uint32_t frac_bits)
 {
 	return value * (1 << frac_bits);
 }
 #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
 
 /* 12.4 fixed-point */
-static INLINE unsigned r600_pack_float_12p4(float x)
+static inline unsigned r600_pack_float_12p4(float x)
 {
 	return x <= 0    ? 0 :
 	       x >= 4096 ? 0xffff : x * 16;
 }
 
 /* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */
-static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
+static inline bool r600_can_read_depth(struct r600_texture *rtex)
 {
 	return rtex->resource.b.b.nr_samples <= 1 &&
 	       (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
@@ -880,28 +936,5 @@
 #define     V_028A6C_OUTPRIM_TYPE_LINESTRIP            1
 #define     V_028A6C_OUTPRIM_TYPE_TRISTRIP             2
 
-static INLINE unsigned r600_conv_prim_to_gs_out(unsigned mode)
-{
-	static const int prim_conv[] = {
-		V_028A6C_OUTPRIM_TYPE_POINTLIST,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP
-	};
-	assert(mode < Elements(prim_conv));
-
-	return prim_conv[mode];
-}
-
+unsigned r600_conv_prim_to_gs_out(unsigned mode);
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_shader.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_shader.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_shader.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_shader.c	2015-09-16 14:36:09.000000000 +0000
@@ -62,7 +62,7 @@
 
 static int r600_shader_from_tgsi(struct r600_context *rctx,
 				 struct r600_pipe_shader *pipeshader,
-				 struct r600_shader_key key);
+				 union r600_shader_key key);
 
 
 static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr,
@@ -133,7 +133,7 @@
 
 int r600_pipe_shader_create(struct pipe_context *ctx,
 			    struct r600_pipe_shader *shader,
-			    struct r600_shader_key key)
+			    union r600_shader_key key)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct r600_pipe_shader_selector *sel = shader->selector;
@@ -141,7 +141,7 @@
 	bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens);
 	unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
 	unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
-	unsigned export_shader = key.vs_as_es;
+	unsigned export_shader;
 
 	shader->shader.bc.isa = rctx->isa;
 
@@ -220,6 +220,7 @@
 		}
 		break;
 	case TGSI_PROCESSOR_VERTEX:
+		export_shader = key.vs.as_es;
 		if (rctx->b.chip_class >= EVERGREEN) {
 			if (export_shader)
 				evergreen_update_es_state(ctx, shader);
@@ -310,6 +311,7 @@
 	int					gs_next_vertex;
 	struct r600_shader	*gs_for_vs;
 	int					gs_export_gpr_treg;
+	unsigned				enabled_stream_buffers_mask;
 };
 
 struct r600_shader_tgsi_instruction {
@@ -617,98 +619,100 @@
 
 	switch (d->Declaration.File) {
 	case TGSI_FILE_INPUT:
-		i = ctx->shader->ninput;
-                assert(i < Elements(ctx->shader->input));
-		ctx->shader->ninput += count;
-		ctx->shader->input[i].name = d->Semantic.Name;
-		ctx->shader->input[i].sid = d->Semantic.Index;
-		ctx->shader->input[i].interpolate = d->Interp.Interpolate;
-		ctx->shader->input[i].interpolate_location = d->Interp.Location;
-		ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
-		if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
-			ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
-			switch (ctx->shader->input[i].name) {
-			case TGSI_SEMANTIC_FACE:
-				if (ctx->face_gpr != -1)
-					ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */
-				else
-					ctx->face_gpr = ctx->shader->input[i].gpr;
-				break;
-			case TGSI_SEMANTIC_COLOR:
-				ctx->colors_used++;
-				break;
-			case TGSI_SEMANTIC_POSITION:
-				ctx->fragcoord_input = i;
-				break;
-			case TGSI_SEMANTIC_PRIMID:
-				/* set this for now */
-				ctx->shader->gs_prim_id_input = true;
-				ctx->shader->ps_prim_id_input = i;
-				break;
-			}
-			if (ctx->bc->chip_class >= EVERGREEN) {
-				if ((r = evergreen_interp_input(ctx, i)))
-					return r;
+		for (j = 0; j < count; j++) {
+			i = ctx->shader->ninput + j;
+			assert(i < Elements(ctx->shader->input));
+			ctx->shader->input[i].name = d->Semantic.Name;
+			ctx->shader->input[i].sid = d->Semantic.Index + j;
+			ctx->shader->input[i].interpolate = d->Interp.Interpolate;
+			ctx->shader->input[i].interpolate_location = d->Interp.Location;
+			ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j;
+			if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+				ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
+				switch (ctx->shader->input[i].name) {
+				case TGSI_SEMANTIC_FACE:
+					if (ctx->face_gpr != -1)
+						ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */
+					else
+						ctx->face_gpr = ctx->shader->input[i].gpr;
+					break;
+				case TGSI_SEMANTIC_COLOR:
+					ctx->colors_used++;
+					break;
+				case TGSI_SEMANTIC_POSITION:
+					ctx->fragcoord_input = i;
+					break;
+				case TGSI_SEMANTIC_PRIMID:
+					/* set this for now */
+					ctx->shader->gs_prim_id_input = true;
+					ctx->shader->ps_prim_id_input = i;
+					break;
+				}
+				if (ctx->bc->chip_class >= EVERGREEN) {
+					if ((r = evergreen_interp_input(ctx, i)))
+						return r;
+				}
+			} else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+				/* FIXME probably skip inputs if they aren't passed in the ring */
+				ctx->shader->input[i].ring_offset = ctx->next_ring_offset;
+				ctx->next_ring_offset += 16;
+				if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID)
+					ctx->shader->gs_prim_id_input = true;
 			}
-		} else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
-			/* FIXME probably skip inputs if they aren't passed in the ring */
-			ctx->shader->input[i].ring_offset = ctx->next_ring_offset;
-			ctx->next_ring_offset += 16;
-			if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID)
-				ctx->shader->gs_prim_id_input = true;
-		}
-		for (j = 1; j < count; ++j) {
-			ctx->shader->input[i + j] = ctx->shader->input[i];
-			ctx->shader->input[i + j].gpr += j;
 		}
+		ctx->shader->ninput += count;
 		break;
 	case TGSI_FILE_OUTPUT:
-		i = ctx->shader->noutput++;
-                assert(i < Elements(ctx->shader->output));
-		ctx->shader->output[i].name = d->Semantic.Name;
-		ctx->shader->output[i].sid = d->Semantic.Index;
-		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
-		ctx->shader->output[i].interpolate = d->Interp.Interpolate;
-		ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
-		if (ctx->type == TGSI_PROCESSOR_VERTEX ||
-				ctx->type == TGSI_PROCESSOR_GEOMETRY) {
-			ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
-			switch (d->Semantic.Name) {
-			case TGSI_SEMANTIC_CLIPDIST:
-				ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
-				break;
-			case TGSI_SEMANTIC_PSIZE:
-				ctx->shader->vs_out_misc_write = 1;
-				ctx->shader->vs_out_point_size = 1;
-				break;
-			case TGSI_SEMANTIC_EDGEFLAG:
-				ctx->shader->vs_out_misc_write = 1;
-				ctx->shader->vs_out_edgeflag = 1;
-				ctx->edgeflag_output = i;
-				break;
-			case TGSI_SEMANTIC_VIEWPORT_INDEX:
-				ctx->shader->vs_out_misc_write = 1;
-				ctx->shader->vs_out_viewport = 1;
-				break;
-			case TGSI_SEMANTIC_LAYER:
-				ctx->shader->vs_out_misc_write = 1;
-				ctx->shader->vs_out_layer = 1;
-				break;
-			case TGSI_SEMANTIC_CLIPVERTEX:
-				ctx->clip_vertex_write = TRUE;
-				ctx->cv_output = i;
-				break;
-			}
-			if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
-				ctx->gs_out_ring_offset += 16;
-			}
-		} else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
-			switch (d->Semantic.Name) {
-			case TGSI_SEMANTIC_COLOR:
-				ctx->shader->nr_ps_max_color_exports++;
-				break;
+		for (j = 0; j < count; j++) {
+			i = ctx->shader->noutput + j;
+			assert(i < Elements(ctx->shader->output));
+			ctx->shader->output[i].name = d->Semantic.Name;
+			ctx->shader->output[i].sid = d->Semantic.Index + j;
+			ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j;
+			ctx->shader->output[i].interpolate = d->Interp.Interpolate;
+			ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
+			if (ctx->type == TGSI_PROCESSOR_VERTEX ||
+			    ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+				ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
+				switch (d->Semantic.Name) {
+				case TGSI_SEMANTIC_CLIPDIST:
+					ctx->shader->clip_dist_write |= d->Declaration.UsageMask <<
+									((d->Semantic.Index + j) << 2);
+					break;
+				case TGSI_SEMANTIC_PSIZE:
+					ctx->shader->vs_out_misc_write = 1;
+					ctx->shader->vs_out_point_size = 1;
+					break;
+				case TGSI_SEMANTIC_EDGEFLAG:
+					ctx->shader->vs_out_misc_write = 1;
+					ctx->shader->vs_out_edgeflag = 1;
+					ctx->edgeflag_output = i;
+					break;
+				case TGSI_SEMANTIC_VIEWPORT_INDEX:
+					ctx->shader->vs_out_misc_write = 1;
+					ctx->shader->vs_out_viewport = 1;
+					break;
+				case TGSI_SEMANTIC_LAYER:
+					ctx->shader->vs_out_misc_write = 1;
+					ctx->shader->vs_out_layer = 1;
+					break;
+				case TGSI_SEMANTIC_CLIPVERTEX:
+					ctx->clip_vertex_write = TRUE;
+					ctx->cv_output = i;
+					break;
+				}
+				if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+					ctx->gs_out_ring_offset += 16;
+				}
+			} else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+				switch (d->Semantic.Name) {
+				case TGSI_SEMANTIC_COLOR:
+					ctx->shader->nr_ps_max_color_exports++;
+					break;
+				}
 			}
 		}
+		ctx->shader->noutput += count;
 		break;
 	case TGSI_FILE_TEMPORARY:
 		if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
@@ -723,6 +727,7 @@
 
 	case TGSI_FILE_CONSTANT:
 	case TGSI_FILE_SAMPLER:
+	case TGSI_FILE_SAMPLER_VIEW:
 	case TGSI_FILE_ADDRESS:
 		break;
 
@@ -1337,7 +1342,7 @@
 	int i, j, r;
 
 	/* Sanity checking. */
-	if (so->num_outputs > PIPE_MAX_SHADER_OUTPUTS) {
+	if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) {
 		R600_ERR("Too many stream outputs: %d\n", so->num_outputs);
 		r = -EINVAL;
 		goto out_err;
@@ -1399,6 +1404,9 @@
 		 * with MEM_STREAM instructions */
 		output.array_size = 0xFFF;
 		output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component;
+
+		ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer);
+
 		if (ctx->bc->chip_class >= EVERGREEN) {
 			switch (so->output[i].output_buffer) {
 			case 0:
@@ -1715,6 +1723,8 @@
 	gs->gs_copy_shader = cshader;
 
 	ctx.bc->nstack = 1;
+
+	cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
 	cshader->shader.ring_item_size = ocnt * 16;
 
 	return r600_bytecode_build(ctx.bc);
@@ -1793,7 +1803,7 @@
 
 static int r600_shader_from_tgsi(struct r600_context *rctx,
 				 struct r600_pipe_shader *pipeshader,
-				 struct r600_shader_key key)
+				 union r600_shader_key key)
 {
 	struct r600_screen *rscreen = rctx->screen;
 	struct r600_shader *shader = &pipeshader->shader;
@@ -1807,7 +1817,7 @@
 	unsigned opcode;
 	int i, j, k, r = 0;
 	int next_param_base = 0, next_clip_base;
-	int max_color_exports = MAX2(key.nr_cbufs, 1);
+	int max_color_exports = MAX2(key.ps.nr_cbufs, 1);
 	/* Declarations used by llvm code */
 	bool use_llvm = false;
 	bool indirect_gprs;
@@ -1821,8 +1831,6 @@
 	ctx.shader = shader;
 	ctx.native_integers = true;
 
-	shader->vs_as_gs_a = key.vs_as_gs_a;
-	shader->vs_as_es = key.vs_as_es;
 
 	r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
 			   rscreen->has_compressed_msaa_texturing);
@@ -1835,9 +1843,14 @@
 	shader->processor_type = ctx.type;
 	ctx.bc->type = shader->processor_type;
 
-	ring_outputs = key.vs_as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
+	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+		shader->vs_as_gs_a = key.vs.as_gs_a;
+		shader->vs_as_es = key.vs.as_es;
+	}
+
+	ring_outputs = shader->vs_as_es || ctx.type == TGSI_PROCESSOR_GEOMETRY;
 
-	if (key.vs_as_es) {
+	if (shader->vs_as_es) {
 		ctx.gs_for_vs = &rctx->gs_shader->current->shader;
 	} else {
 		ctx.gs_for_vs = NULL;
@@ -1857,7 +1870,8 @@
 	shader->nr_ps_color_exports = 0;
 	shader->nr_ps_max_color_exports = 0;
 
-	shader->two_side = key.color_two_side;
+	if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
+		shader->two_side = key.ps.color_two_side;
 
 	/* register allocations */
 	/* Values [0,127] correspond to GPR[0..127].
@@ -1928,15 +1942,14 @@
 	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
 	ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
 			ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
+	ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1;
+	ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2;
+
 	if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
-		ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 1;
-		ctx.temp_reg = ctx.bc->ar_reg + 2;
-		ctx.bc->index_reg[0] = ctx.bc->ar_reg + 3;
-		ctx.bc->index_reg[1] = ctx.bc->ar_reg + 4;
+		ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 3;
+		ctx.temp_reg = ctx.bc->ar_reg + 4;
 	} else {
-		ctx.temp_reg = ctx.bc->ar_reg + 1;
-		ctx.bc->index_reg[0] = ctx.bc->ar_reg + 2;
-		ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3;
+		ctx.temp_reg = ctx.bc->ar_reg + 3;
 	}
 
 	shader->max_arrays = 0;
@@ -1962,7 +1975,7 @@
 	shader->fs_write_all = FALSE;
 
 	if (shader->vs_as_gs_a)
-		vs_add_primid_output(&ctx, key.vs_prim_id_out);
+		vs_add_primid_output(&ctx, key.vs.prim_id_out);
 
 	while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
 		tgsi_parse_token(&ctx.parse);
@@ -2083,8 +2096,7 @@
 		radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
 		radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN);
 		radeon_llvm_ctx.stream_outputs = &so;
-		radeon_llvm_ctx.clip_vertex = ctx.cv_output;
-		radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
+		radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one;
 		radeon_llvm_ctx.has_compressed_msaa_texturing =
 			ctx.bc->has_compressed_msaa_texturing;
 		mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
@@ -2259,10 +2271,11 @@
 	    so.num_outputs && !use_llvm)
 		emit_streamout(&ctx, &so);
 
+	pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
 	convert_edgeflag_to_int(&ctx);
 
 	if (ring_outputs) {
-		if (key.vs_as_es)
+		if (shader->vs_as_es)
 			emit_gs_ring_writes(&ctx, FALSE);
 	} else {
 		/* Export output */
@@ -2378,7 +2391,7 @@
 						j--;
 						continue;
 					}
-					output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
+					output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3;
 					output[j].array_base = shader->output[i].sid;
 					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
 					shader->nr_ps_color_exports++;
@@ -2391,7 +2404,7 @@
 							output[j].swizzle_x = 0;
 							output[j].swizzle_y = 1;
 							output[j].swizzle_z = 2;
-							output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
+							output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3;
 							output[j].burst_count = 1;
 							output[j].array_base = k;
 							output[j].op = CF_OP_EXPORT;
@@ -6143,10 +6156,10 @@
 		r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]);
 		if (r)
 			return r;
-		r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[2]);
+		r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]);
 		if (r)
 			return r;
-		r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[2], &ctx->src[1]);
+		r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]);
 		if (r)
 			return r;
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_shader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_shader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_shader.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_shader.h	2015-09-16 14:36:09.000000000 +0000
@@ -95,13 +95,17 @@
 	struct r600_shader_array * arrays;
 };
 
-struct r600_shader_key {
-	unsigned color_two_side:1;
-	unsigned alpha_to_one:1;
-	unsigned nr_cbufs:4;
-	unsigned vs_as_es:1;
-	unsigned vs_as_gs_a:1;
-	unsigned vs_prim_id_out:8;
+union r600_shader_key {
+	struct {
+		unsigned	nr_cbufs:4;
+		unsigned	color_two_side:1;
+		unsigned	alpha_to_one:1;
+	} ps;
+	struct {
+		unsigned	prim_id_out:8;
+		unsigned	as_es:1; /* export shader */
+		unsigned	as_gs_a:1;
+	} vs;
 };
 
 struct r600_shader_array {
@@ -122,9 +126,10 @@
 	unsigned		flatshade;
 	unsigned		pa_cl_vs_out_cntl;
 	unsigned		nr_ps_color_outputs;
-	struct r600_shader_key	key;
+	union r600_shader_key	key;
 	unsigned		db_shader_control;
 	unsigned		ps_depth_export;
+	unsigned		enabled_stream_buffers_mask;
 };
 
 /* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -802,7 +802,7 @@
 		return;
 
 	for (i = start_slot ; i < start_slot + num_scissors; i++) {
-		rctx->scissor[i].atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
 	}
 }
 
@@ -1007,7 +1007,7 @@
 
 		/* CMASK. */
 		if (!rctx->dummy_cmask ||
-		    rctx->dummy_cmask->buf->size < cmask.size ||
+		    rctx->dummy_cmask->b.b.width0 < cmask.size ||
 		    rctx->dummy_cmask->buf->alignment % cmask.alignment != 0) {
 			struct pipe_transfer *transfer;
 			void *ptr;
@@ -1025,7 +1025,7 @@
 
 		/* FMASK. */
 		if (!rctx->dummy_fmask ||
-		    rctx->dummy_fmask->buf->size < fmask.size ||
+		    rctx->dummy_fmask->b.b.width0 < fmask.size ||
 		    rctx->dummy_fmask->buf->alignment % fmask.alignment != 0) {
 			pipe_resource_reference((struct pipe_resource**)&rctx->dummy_fmask, NULL);
 			rctx->dummy_fmask = r600_buffer_create_helper(rscreen, fmask.size, fmask.alignment);
@@ -1193,7 +1193,7 @@
 
 		if (rctx->alphatest_state.bypass != alphatest_bypass) {
 			rctx->alphatest_state.bypass = alphatest_bypass;
-			rctx->alphatest_state.atom.dirty = true;
+			r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
 		}
 	}
 
@@ -1209,28 +1209,28 @@
 
 		if (state->zsbuf->format != rctx->poly_offset_state.zs_format) {
 			rctx->poly_offset_state.zs_format = state->zsbuf->format;
-			rctx->poly_offset_state.atom.dirty = true;
+			r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
 		}
 
 		if (rctx->db_state.rsurf != surf) {
 			rctx->db_state.rsurf = surf;
-			rctx->db_state.atom.dirty = true;
-			rctx->db_misc_state.atom.dirty = true;
+			r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+			r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 		}
 	} else if (rctx->db_state.rsurf) {
 		rctx->db_state.rsurf = NULL;
-		rctx->db_state.atom.dirty = true;
-		rctx->db_misc_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+		r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 	}
 
 	if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
 		rctx->cb_misc_state.nr_cbufs = state->nr_cbufs;
-		rctx->cb_misc_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
 	}
 
 	if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) {
 		rctx->alphatest_state.bypass = false;
-		rctx->alphatest_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
 	}
 
 	/* Calculate the CS size. */
@@ -1250,7 +1250,7 @@
 		rctx->framebuffer.atom.num_dw += 2;
 	}
 
-	rctx->framebuffer.atom.dirty = true;
+	r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
 
 	r600_set_sample_locations_constant_buffer(rctx);
 }
@@ -1541,9 +1541,9 @@
 
 	rctx->ps_iter_samples = min_samples;
 	if (rctx->framebuffer.nr_samples > 1) {
-		rctx->rasterizer_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->rasterizer_state.atom);
 		if (rctx->b.chip_class == R600)
-			rctx->db_misc_state.atom.dirty = true;
+			r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 	}
 }
 
@@ -1694,7 +1694,7 @@
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
 		radeon_emit(cs, (320 + buffer_index) * 7);
 		radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 				 S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
 				 S_038008_STRIDE(vb->stride));
@@ -1743,7 +1743,7 @@
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
 		radeon_emit(cs, (buffer_id_base + buffer_index) * 7);
 		radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 			    S_038008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
 			    S_038008_STRIDE(gs_ring_buffer ? 4 : 16));
@@ -2051,7 +2051,7 @@
 			/* always privilege vs stage so that at worst we have the
 			 * pixel stage producing wrong output (not the vertex
 			 * stage) */
-			new_num_ps_gprs = max_gprs - ((new_num_vs_gprs - new_num_es_gprs - new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
+			new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
 			new_num_vs_gprs = num_vs_gprs;
 			new_num_gs_gprs = num_gs_gprs;
 			new_num_es_gprs = num_es_gprs;
@@ -2089,7 +2089,7 @@
 	if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp || rctx->config_state.sq_gpr_resource_mgmt_2 != tmp2) {
 		rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
 		rctx->config_state.sq_gpr_resource_mgmt_2 = tmp2;
-		rctx->config_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
 		rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
 	}
 	return true;
@@ -2796,11 +2796,11 @@
 
 	if (db_shader_control != rctx->db_misc_state.db_shader_control) {
 		rctx->db_misc_state.db_shader_control = db_shader_control;
-		rctx->db_misc_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 	}
 }
 
-static INLINE unsigned r600_array_mode(unsigned mode)
+static inline unsigned r600_array_mode(unsigned mode)
 {
 	switch (mode) {
 	case RADEON_SURF_MODE_LINEAR_ALIGNED:	return V_0280A0_ARRAY_LINEAR_ALIGNED;
@@ -3074,8 +3074,8 @@
 	r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3);
 	r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
 	r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
-	rctx->atoms[id++] = &rctx->b.streamout.begin_atom;
-	rctx->atoms[id++] = &rctx->b.streamout.enable_atom;
+	r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
+	r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++);
 	r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
 	r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
 	r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_state_common.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_state_common.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/r600_state_common.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/r600_state_common.c	2015-09-16 14:36:09.000000000 +0000
@@ -47,18 +47,26 @@
 	FREE(cb->buf);
 }
 
+void r600_add_atom(struct r600_context *rctx,
+		   struct r600_atom *atom,
+		   unsigned id)
+{
+	assert(id < R600_NUM_ATOMS);
+	assert(rctx->atoms[id] == NULL);
+	rctx->atoms[id] = atom;
+	atom->id = id;
+	atom->dirty = false;
+}
+
 void r600_init_atom(struct r600_context *rctx,
 		    struct r600_atom *atom,
 		    unsigned id,
 		    void (*emit)(struct r600_context *ctx, struct r600_atom *state),
 		    unsigned num_dw)
 {
-	assert(id < R600_NUM_ATOMS);
-	assert(rctx->atoms[id] == NULL);
-	rctx->atoms[id] = atom;
 	atom->emit = (void*)emit;
 	atom->num_dw = num_dw;
-	atom->dirty = false;
+	r600_add_atom(rctx, atom, id);
 }
 
 void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom)
@@ -115,6 +123,31 @@
 	return prim_conv[prim];
 }
 
+unsigned r600_conv_prim_to_gs_out(unsigned mode)
+{
+	static const int prim_conv[] = {
+		[PIPE_PRIM_POINTS]			= V_028A6C_OUTPRIM_TYPE_POINTLIST,
+		[PIPE_PRIM_LINES]			= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_LINE_LOOP]			= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_LINE_STRIP]			= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_TRIANGLES]			= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_TRIANGLE_STRIP]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_TRIANGLE_FAN]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_QUADS]			= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_QUAD_STRIP]			= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_POLYGON]			= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_LINES_ADJACENCY]		= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_LINE_STRIP_ADJACENCY]	= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_TRIANGLES_ADJACENCY]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]	= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_PATCHES]			= V_028A6C_OUTPRIM_TYPE_POINTLIST,
+		[R600_PRIM_RECTANGLE_LIST]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP
+	};
+	assert(mode < Elements(prim_conv));
+
+	return prim_conv[mode];
+}
+
 /* common state between evergreen and r600 */
 
 static void r600_bind_blend_state_internal(struct r600_context *rctx,
@@ -127,11 +160,11 @@
 	rctx->dual_src_blend = blend->dual_src_blend;
 
 	if (!blend_disable) {
-		r600_set_cso_state_with_cb(&rctx->blend_state, blend, &blend->buffer);
+		r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer);
 		color_control = blend->cb_color_control;
 	} else {
 		/* Blending is disabled. */
-		r600_set_cso_state_with_cb(&rctx->blend_state, blend, &blend->buffer_no_blend);
+		r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer_no_blend);
 		color_control = blend->cb_color_control_no_blend;
 	}
 
@@ -150,7 +183,7 @@
 		update_cb = true;
 	}
 	if (update_cb) {
-		rctx->cb_misc_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
 	}
 }
 
@@ -160,7 +193,7 @@
 	struct r600_blend_state *blend = (struct r600_blend_state *)state;
 
 	if (blend == NULL) {
-		r600_set_cso_state_with_cb(&rctx->blend_state, NULL, NULL);
+		r600_set_cso_state_with_cb(rctx, &rctx->blend_state, NULL, NULL);
 		return;
 	}
 
@@ -173,7 +206,7 @@
 	struct r600_context *rctx = (struct r600_context *)ctx;
 
 	rctx->blend_color.state = *state;
-	rctx->blend_color.atom.dirty = true;
+	r600_mark_atom_dirty(rctx, &rctx->blend_color.atom);
 }
 
 void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
@@ -210,7 +243,7 @@
 	struct pipe_constant_buffer cb;
 
 	rctx->clip_state.state = *state;
-	rctx->clip_state.atom.dirty = true;
+	r600_mark_atom_dirty(rctx, &rctx->clip_state.atom);
 
 	cb.buffer = NULL;
 	cb.user_buffer = state->ucp;
@@ -226,7 +259,7 @@
 	struct r600_context *rctx = (struct r600_context *)ctx;
 
 	rctx->stencil_ref.state = *state;
-	rctx->stencil_ref.atom.dirty = true;
+	r600_mark_atom_dirty(rctx, &rctx->stencil_ref.atom);
 }
 
 void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
@@ -274,11 +307,11 @@
 	struct r600_stencil_ref ref;
 
 	if (state == NULL) {
-		r600_set_cso_state_with_cb(&rctx->dsa_state, NULL, NULL);
+		r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, NULL, NULL);
 		return;
 	}
 
-	r600_set_cso_state_with_cb(&rctx->dsa_state, dsa, &dsa->buffer);
+	r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, dsa, &dsa->buffer);
 
 	ref.ref_value[0] = rctx->stencil_ref.pipe_state.ref_value[0];
 	ref.ref_value[1] = rctx->stencil_ref.pipe_state.ref_value[1];
@@ -293,7 +326,7 @@
 			 * we are having lockup on evergreen so do not enable
 			 * hyperz when not writing zbuffer
 			 */
-			rctx->db_misc_state.atom.dirty = true;
+			r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 		}
 	}
 
@@ -304,7 +337,7 @@
 	    rctx->alphatest_state.sx_alpha_ref != dsa->alpha_ref) {
 		rctx->alphatest_state.sx_alpha_test_control = dsa->sx_alpha_test_control;
 		rctx->alphatest_state.sx_alpha_ref = dsa->alpha_ref;
-		rctx->alphatest_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
 	}
 }
 
@@ -318,14 +351,14 @@
 
 	rctx->rasterizer = rs;
 
-	r600_set_cso_state_with_cb(&rctx->rasterizer_state, rs, &rs->buffer);
+	r600_set_cso_state_with_cb(rctx, &rctx->rasterizer_state, rs, &rs->buffer);
 
 	if (rs->offset_enable &&
 	    (rs->offset_units != rctx->poly_offset_state.offset_units ||
 	     rs->offset_scale != rctx->poly_offset_state.offset_scale)) {
 		rctx->poly_offset_state.offset_units = rs->offset_units;
 		rctx->poly_offset_state.offset_scale = rs->offset_scale;
-		rctx->poly_offset_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
 	}
 
 	/* Update clip_misc_state. */
@@ -333,14 +366,14 @@
 	    rctx->clip_misc_state.clip_plane_enable != rs->clip_plane_enable) {
 		rctx->clip_misc_state.pa_cl_clip_cntl = rs->pa_cl_clip_cntl;
 		rctx->clip_misc_state.clip_plane_enable = rs->clip_plane_enable;
-		rctx->clip_misc_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
 	}
 
 	/* Workaround for a missing scissor enable on r600. */
 	if (rctx->b.chip_class == R600 &&
 	    rs->scissor_enable != rctx->scissor[0].enable) {
 		rctx->scissor[0].enable = rs->scissor_enable;
-		rctx->scissor[0].atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->scissor[0].atom);
 	}
 
 	/* Re-emit PA_SC_LINE_STIPPLE. */
@@ -378,7 +411,7 @@
 		state->atom.num_dw =
 			util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 +
 			util_bitcount(state->dirty_mask & ~state->has_bordercolor_mask) * 5;
-		state->atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &state->atom);
 	}
 }
 
@@ -399,6 +432,11 @@
 
 	assert(start == 0); /* XXX fix below */
 
+	if (!states) {
+		disable_mask = ~0u;
+		count = 0;
+	}
+
 	for (i = 0; i < count; i++) {
 		struct r600_pipe_sampler_state *rstate = rstates[i];
 
@@ -438,7 +476,7 @@
 		/* change in TA_CNTL_AUX need a pipeline flush */
 		rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
 		rctx->seamless_cube_map.enabled = seamless_cube_map;
-		rctx->seamless_cube_map.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->seamless_cube_map.atom);
 	}
 }
 
@@ -478,7 +516,7 @@
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
 
-	r600_set_cso_state(&rctx->vertex_fetch_shader, state);
+	r600_set_cso_state(rctx, &rctx->vertex_fetch_shader, state);
 }
 
 static void r600_delete_vertex_elements(struct pipe_context *ctx, void *state)
@@ -508,7 +546,7 @@
 		rctx->b.flags |= R600_CONTEXT_INV_VERTEX_CACHE;
 		rctx->vertex_buffer_state.atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 12 : 11) *
 					       util_bitcount(rctx->vertex_buffer_state.dirty_mask);
-		rctx->vertex_buffer_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->vertex_buffer_state.atom);
 	}
 }
 
@@ -565,7 +603,7 @@
 		rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
 		state->atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 14 : 13) *
 				     util_bitcount(state->dirty_mask);
-		state->atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &state->atom);
 	}
 }
 
@@ -588,9 +626,9 @@
 
 	assert(start == 0); /* XXX fix below */
 
-	if (shader == PIPE_SHADER_COMPUTE) {
-		evergreen_set_cs_sampler_view(pipe, start, count, views);
-		return;
+	if (!views) {
+		disable_mask = ~0u;
+		count = 0;
 	}
 
 	remaining_mask = dst->views.enabled_mask & disable_mask;
@@ -668,7 +706,7 @@
 
 	for (i = start_slot; i < start_slot + num_viewports; i++) {
 		rctx->viewport[i].state = state[i - start_slot];
-		rctx->viewport[i].atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->viewport[i].atom);
 	}
 }
 
@@ -689,29 +727,39 @@
 }
 
 /* Compute the key for the hw shader variant */
-static INLINE struct r600_shader_key r600_shader_selector_key(struct pipe_context * ctx,
+static inline union r600_shader_key r600_shader_selector_key(struct pipe_context * ctx,
 		struct r600_pipe_shader_selector * sel)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct r600_shader_key key;
+	union r600_shader_key key;
 	memset(&key, 0, sizeof(key));
 
-	if (sel->type == PIPE_SHADER_FRAGMENT) {
-		key.color_two_side = rctx->rasterizer && rctx->rasterizer->two_side;
-		key.alpha_to_one = rctx->alpha_to_one &&
-				   rctx->rasterizer && rctx->rasterizer->multisample_enable &&
-				   !rctx->framebuffer.cb0_is_integer;
-		key.nr_cbufs = rctx->framebuffer.state.nr_cbufs;
-		/* Dual-source blending only makes sense with nr_cbufs == 1. */
-		if (key.nr_cbufs == 1 && rctx->dual_src_blend)
-			key.nr_cbufs = 2;
-	} else if (sel->type == PIPE_SHADER_VERTEX) {
-		key.vs_as_es = (rctx->gs_shader != NULL);
+	switch (sel->type) {
+	case PIPE_SHADER_VERTEX: {
+		key.vs.as_es = (rctx->gs_shader != NULL);
 		if (rctx->ps_shader->current->shader.gs_prim_id_input && !rctx->gs_shader) {
-			key.vs_as_gs_a = true;
-			key.vs_prim_id_out = rctx->ps_shader->current->shader.input[rctx->ps_shader->current->shader.ps_prim_id_input].spi_sid;
+			key.vs.as_gs_a = true;
+			key.vs.prim_id_out = rctx->ps_shader->current->shader.input[rctx->ps_shader->current->shader.ps_prim_id_input].spi_sid;
 		}
+		break;
+	}
+	case PIPE_SHADER_GEOMETRY:
+		break;
+	case PIPE_SHADER_FRAGMENT: {
+		key.ps.color_two_side = rctx->rasterizer && rctx->rasterizer->two_side;
+		key.ps.alpha_to_one = rctx->alpha_to_one &&
+				      rctx->rasterizer && rctx->rasterizer->multisample_enable &&
+				      !rctx->framebuffer.cb0_is_integer;
+		key.ps.nr_cbufs = rctx->framebuffer.state.nr_cbufs;
+		/* Dual-source blending only makes sense with nr_cbufs == 1. */
+		if (key.ps.nr_cbufs == 1 && rctx->dual_src_blend)
+			key.ps.nr_cbufs = 2;
+		break;
 	}
+	default:
+		assert(0);
+	}
+
 	return key;
 }
 
@@ -721,7 +769,7 @@
         struct r600_pipe_shader_selector* sel,
         bool *dirty)
 {
-	struct r600_shader_key key;
+	union r600_shader_key key;
 	struct r600_pipe_shader * shader = NULL;
 	int r;
 
@@ -908,7 +956,7 @@
 		rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE;
 		state->atom.num_dw = rctx->b.chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20
 								   : util_bitcount(state->dirty_mask)*19;
-		state->atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &state->atom);
 	}
 }
 
@@ -977,7 +1025,7 @@
 		return;
 
 	rctx->sample_mask.sample_mask = sample_mask;
-	rctx->sample_mask.atom.dirty = true;
+	r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom);
 }
 
 /*
@@ -1102,27 +1150,28 @@
 			       struct r600_shader_state *state,
 			       struct r600_pipe_shader *shader)
 {
+	struct r600_context *rctx = (struct r600_context *)ctx;
+
 	state->shader = shader;
 	if (shader) {
 		state->atom.num_dw = shader->command_buffer.num_dw;
-		state->atom.dirty = true;
 		r600_context_add_resource_size(ctx, (struct pipe_resource *)shader->bo);
 	} else {
 		state->atom.num_dw = 0;
-		state->atom.dirty = false;
 	}
+	r600_mark_atom_dirty(rctx, &state->atom);
 }
 
 static void update_gs_block_state(struct r600_context *rctx, unsigned enable)
 {
 	if (rctx->shader_stages.geom_enable != enable) {
 		rctx->shader_stages.geom_enable = enable;
-		rctx->shader_stages.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
 	}
 
 	if (rctx->gs_rings.enable != enable) {
 		rctx->gs_rings.enable = enable;
-		rctx->gs_rings.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->gs_rings.atom);
 
 		if (enable && !rctx->gs_rings.esgs_ring.buffer) {
 			unsigned size = 0x1C000;
@@ -1187,7 +1236,7 @@
 
 		if (!rctx->shader_stages.geom_enable) {
 			rctx->shader_stages.geom_enable = true;
-			rctx->shader_stages.atom.dirty = true;
+			r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
 		}
 
 		/* gs_shader provides GS and VS (copy shader) */
@@ -1201,8 +1250,9 @@
 				rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->gs_shader->current->gs_copy_shader->pa_cl_vs_out_cntl;
 				rctx->clip_misc_state.clip_dist_write = rctx->gs_shader->current->gs_copy_shader->shader.clip_dist_write;
 				rctx->clip_misc_state.clip_disable = rctx->gs_shader->current->shader.vs_position_window_space;
-				rctx->clip_misc_state.atom.dirty = true;
+				r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
 			}
+			rctx->b.streamout.enabled_stream_buffers_mask = rctx->gs_shader->current->gs_copy_shader->enabled_stream_buffers_mask;
 		}
 
 		r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
@@ -1218,7 +1268,7 @@
 			update_shader_atom(ctx, &rctx->geometry_shader, NULL);
 			update_shader_atom(ctx, &rctx->export_shader, NULL);
 			rctx->shader_stages.geom_enable = false;
-			rctx->shader_stages.atom.dirty = true;
+			r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
 		}
 
 		r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
@@ -1235,8 +1285,9 @@
 				rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl;
 				rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write;
 				rctx->clip_misc_state.clip_disable = rctx->vs_shader->current->shader.vs_position_window_space;
-				rctx->clip_misc_state.atom.dirty = true;
+				r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
 			}
+			rctx->b.streamout.enabled_stream_buffers_mask = rctx->vs_shader->current->enabled_stream_buffers_mask;
 		}
 	}
 
@@ -1247,7 +1298,7 @@
 
 		if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) {
 			rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs;
-			rctx->cb_misc_state.atom.dirty = true;
+			r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
 		}
 
 		if (rctx->b.chip_class <= R700) {
@@ -1255,7 +1306,7 @@
 
 			if (rctx->cb_misc_state.multiwrite != multiwrite) {
 				rctx->cb_misc_state.multiwrite = multiwrite;
-				rctx->cb_misc_state.atom.dirty = true;
+				r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
 			}
 		}
 
@@ -1269,7 +1320,7 @@
 				r600_update_ps_state(ctx, rctx->ps_shader->current);
 		}
 
-		rctx->shader_stages.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
 		update_shader_atom(ctx, &rctx->pixel_shader, rctx->ps_shader->current);
 	}
 
@@ -1404,7 +1455,6 @@
 					data += info.indirect_offset / sizeof(unsigned);
 					start = data[2] * ib.index_size;
 					count = data[0];
-					rctx->b.ws->buffer_unmap(indirect_resource->cs_buf);
 				}
 				else {
 					start = 0;
@@ -1449,24 +1499,23 @@
 		rctx->vgt_state.vgt_multi_prim_ib_reset_en = info.primitive_restart;
 		rctx->vgt_state.vgt_multi_prim_ib_reset_indx = info.restart_index;
 		rctx->vgt_state.vgt_indx_offset = info.index_bias;
-		rctx->vgt_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->vgt_state.atom);
 	}
 
 	/* Workaround for hardware deadlock on certain R600 ASICs: write into a CB register. */
 	if (rctx->b.chip_class == R600) {
 		rctx->b.flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
-		rctx->cb_misc_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
 	}
 
 	/* Emit states. */
 	r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
 	r600_flush_emit(rctx);
 
-	for (i = 0; i < R600_NUM_ATOMS; i++) {
-		if (rctx->atoms[i] == NULL || !rctx->atoms[i]->dirty) {
-			continue;
-		}
+	i = r600_next_dirty_atom(rctx, 0);
+	while (i < R600_NUM_ATOMS) {
 		r600_emit_atom(rctx, rctx->atoms[i]);
+		i = r600_next_dirty_atom(rctx, i + 1);
 	}
 
 	if (rctx->b.chip_class == CAYMAN) {
@@ -2485,7 +2534,7 @@
 
 	if (rctx->db_misc_state.occlusion_query_enabled != enable) {
 		rctx->db_misc_state.occlusion_query_enabled = enable;
-		rctx->db_misc_state.atom.dirty = true;
+		r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 	}
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -32,6 +32,7 @@
 	int r = 0;
 	uint32_t dw0 = dw[i];
 	uint32_t dw1 = dw[i+1];
+	assert(i+1 <= ndw);
 
 	if ((dw1 >> 29) & 1) { // CF_ALU
 		return decode_cf_alu(i, bc);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -199,6 +199,9 @@
 		cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
 		cf_node *if_pop = sh.create_cf(CF_OP_POP);
 
+		if (!last_cf || last_cf->get_parent_region() == r) {
+			last_cf = if_pop;
+		}
 		if_pop->bc.pop_count = 1;
 		if_pop->jump_after(if_pop);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/sb/sb_bc_parser.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/sb/sb_bc_parser.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/sb/sb_bc_parser.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -95,7 +95,7 @@
 		if ((r = decode_cf(i, eop)))
 			return r;
 
-	} while (!eop || (i >> 1) <= max_cf);
+	} while (!eop || (i >> 1) < max_cf);
 
 	return 0;
 }
@@ -769,6 +769,7 @@
 }
 
 int bc_parser::prepare_loop(cf_node* c) {
+	assert(c->bc.addr-1 < cf_map.size());
 
 	cf_node *end = cf_map[c->bc.addr - 1];
 	assert(end->bc.op == CF_OP_LOOP_END);
@@ -788,8 +789,12 @@
 }
 
 int bc_parser::prepare_if(cf_node* c) {
+	assert(c->bc.addr-1 < cf_map.size());
 	cf_node *c_else = NULL, *end = cf_map[c->bc.addr];
 
+	if (!end)
+		return 0; // not quite sure how this happens, malformed input?
+
 	BCP_DUMP(
 		sblog << "parsing JUMP @" << c->bc.id;
 		sblog << "\n";
@@ -815,7 +820,7 @@
 	if (c_else->parent != c->parent)
 		c_else = NULL;
 
-	if (end->parent != c->parent)
+	if (end && end->parent != c->parent)
 		end = NULL;
 
 	region_node *reg = sh->create_region();
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/sb/sb_sched.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/sb/sb_sched.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/r600/sb/sb_sched.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/r600/sb/sb_sched.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -236,7 +236,7 @@
 
 	for (i = 0; i < nsrc; ++i) {
 		value *v = n->src[i];
-		if (v->is_readonly())
+		if (v->is_readonly() || v->is_undef())
 			continue;
 		if (i == 1 && opt)
 			continue;
@@ -489,7 +489,7 @@
 
 	n->bc.bank_swizzle = 0;
 
-	if (!trans & fbs)
+	if (!trans && fbs)
 		n->bc.bank_swizzle = VEC_210;
 
 	if (gpr.try_reserve(n)) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/Android.mk	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/Android.mk	2015-09-16 14:36:09.000000000 +0000
@@ -30,6 +30,10 @@
 
 LOCAL_SRC_FILES := $(C_SOURCES)
 
+ifeq ($(MESA_ENABLE_LLVM),true)
+LOCAL_SRC_FILES += $(LLVM_C_FILES)
+endif
+
 LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon
 LOCAL_MODULE := libmesa_pipe_radeon
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/Makefile.sources	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/Makefile.sources	2015-09-16 14:36:09.000000000 +0000
@@ -12,6 +12,7 @@
 	radeon_uvd.c \
 	radeon_uvd.h \
 	radeon_vce_40_2_2.c \
+	radeon_vce_50.c \
 	radeon_vce.c \
 	radeon_vce.h \
 	radeon_video.c \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_buffer_common.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_buffer_common.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_buffer_common.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_buffer_common.c	2015-09-16 14:36:09.000000000 +0000
@@ -84,7 +84,7 @@
 		}
 	}
 
-	if (busy || ctx->ws->buffer_is_busy(resource->buf, rusage)) {
+	if (busy || !ctx->ws->buffer_wait(resource->buf, 0, rusage)) {
 		if (usage & PIPE_TRANSFER_DONTBLOCK) {
 			return NULL;
 		} else {
@@ -121,7 +121,8 @@
 		/* Older kernels didn't always flush the HDP cache before
 		 * CS execution
 		 */
-		if (rscreen->info.drm_minor < 40) {
+		if (rscreen->info.drm_major == 2 &&
+		    rscreen->info.drm_minor < 40) {
 			res->domains = RADEON_DOMAIN_GTT;
 			flags |= RADEON_FLAG_GTT_WC;
 			break;
@@ -147,7 +148,8 @@
 		 * Write-combined CPU mappings are fine, the kernel ensures all CPU
 		 * writes finish before the GPU executes a command stream.
 		 */
-		if (rscreen->info.drm_minor < 40)
+		if (rscreen->info.drm_major == 2 &&
+		    rscreen->info.drm_minor < 40)
 			res->domains = RADEON_DOMAIN_GTT;
 		else if (res->domains & RADEON_DOMAIN_VRAM)
 			flags |= RADEON_FLAG_CPU_ACCESS;
@@ -161,6 +163,9 @@
 		flags |= RADEON_FLAG_NO_CPU_ACCESS;
 	}
 
+	if (rscreen->debug_flags & DBG_NO_WC)
+		flags &= ~RADEON_FLAG_GTT_WC;
+
 	/* Allocate a new resource. */
 	new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
 					     use_reusable_pool,
@@ -274,7 +279,7 @@
 
 		/* Check if mapping this buffer would cause waiting for the GPU. */
 		if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
-		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
+		    !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
 			rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
 		}
 		/* At this point, the buffer is always idle. */
@@ -288,7 +293,7 @@
 
 		/* Check if mapping this buffer would cause waiting for the GPU. */
 		if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
-		    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
+		    !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
 			/* Do a wait-free write-only transfer using a temporary buffer. */
 			unsigned offset;
 			struct r600_resource *staging = NULL;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_cs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_cs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_cs.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_cs.h	2015-09-16 14:36:09.000000000 +0000
@@ -33,7 +33,7 @@
 #include "r600_pipe_common.h"
 #include "r600d_common.h"
 
-static INLINE unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
+static inline unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
 					     struct r600_ring *ring,
 					     struct r600_resource *rbo,
 					     enum radeon_bo_usage usage,
@@ -59,7 +59,7 @@
 				      rbo->domains, priority) * 4;
 }
 
-static INLINE void r600_emit_reloc(struct r600_common_context *rctx,
+static inline void r600_emit_reloc(struct r600_common_context *rctx,
 				   struct r600_ring *ring, struct r600_resource *rbo,
 				   enum radeon_bo_usage usage,
 				   enum radeon_bo_priority priority)
@@ -74,57 +74,57 @@
 	}
 }
 
-static INLINE void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
 {
 	assert(reg < R600_CONTEXT_REG_OFFSET);
-	assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+	assert(cs->cdw+2+num <= cs->max_dw);
 	radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
 	radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
 }
 
-static INLINE void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
 	r600_write_config_reg_seq(cs, reg, 1);
 	radeon_emit(cs, value);
 }
 
-static INLINE void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
 {
 	assert(reg >= R600_CONTEXT_REG_OFFSET);
-	assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+	assert(cs->cdw+2+num <= cs->max_dw);
 	radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
 	radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
 }
 
-static INLINE void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
 	r600_write_context_reg_seq(cs, reg, 1);
 	radeon_emit(cs, value);
 }
 
-static INLINE void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
 {
 	assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
-	assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+	assert(cs->cdw+2+num <= cs->max_dw);
 	radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
 	radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
 }
 
-static INLINE void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
 	si_write_sh_reg_seq(cs, reg, 1);
 	radeon_emit(cs, value);
 }
 
-static INLINE void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
 {
 	assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
-	assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+	assert(cs->cdw+2+num <= cs->max_dw);
 	radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
 	radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
 }
 
-static INLINE void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
 	cik_write_uconfig_reg_seq(cs, reg, 1);
 	radeon_emit(cs, value);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600d_common.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600d_common.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600d_common.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600d_common.h	2015-09-16 14:36:09.000000000 +0000
@@ -66,6 +66,9 @@
 #define PKT3_SET_SH_REG                        0x76 /* SI and later */
 #define PKT3_SET_UCONFIG_REG                   0x79 /* CIK and later */
 
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS1      0x1 /* EG and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS2      0x2 /* EG and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS3      0x3 /* EG and later */
 #define EVENT_TYPE_PS_PARTIAL_FLUSH            0x10
 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
 #define EVENT_TYPE_ZPASS_DONE                  0x15
@@ -177,7 +180,7 @@
 #define   S_028804_INTERPOLATE_SRC_Z(x)			(((x) & 0x1) << 19)
 #define   S_028804_STATIC_ANCHOR_ASSOCIATIONS(x)	(((x) & 0x1) << 20)
 #define   S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x)	(((x) & 0x1) << 21)
-#define   S_028804_OVERRASTERIZATION_AMOUNT(x)		(((x) & 0x7) << 24)
+#define   S_028804_OVERRASTERIZATION_AMOUNT(x)		(((x) & 0x07) << 24)
 #define   S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x)	(((x) & 0x1) << 27)
 #define CM_R_028BDC_PA_SC_LINE_CNTL                  0x28bdc
 #define   S_028BDC_EXPAND_LINE_WIDTH(x)                (((x) & 0x1) << 9)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_pipe_common.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_pipe_common.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_pipe_common.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_pipe_common.c	2015-09-16 14:36:09.000000000 +0000
@@ -107,11 +107,10 @@
 
 void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw)
 {
-	/* The number of dwords we already used in the DMA so far. */
-	num_dw += ctx->rings.dma.cs->cdw;
 	/* Flush if there's not enough space. */
-	if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
+	if ((num_dw + ctx->rings.dma.cs->cdw) > ctx->rings.dma.cs->max_dw) {
 		ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+		assert((num_dw + ctx->rings.dma.cs->cdw) <= ctx->rings.dma.cs->max_dw);
 	}
 }
 
@@ -133,10 +132,11 @@
 	}
 
 	/* suspend queries */
-	ctx->nontimer_queries_suspended = false;
+	ctx->queries_suspended_for_flush = false;
 	if (ctx->num_cs_dw_nontimer_queries_suspend) {
 		r600_suspend_nontimer_queries(ctx);
-		ctx->nontimer_queries_suspended = true;
+		r600_suspend_timer_queries(ctx);
+		ctx->queries_suspended_for_flush = true;
 	}
 
 	ctx->streamout.suspended = false;
@@ -154,8 +154,9 @@
 	}
 
 	/* resume queries */
-	if (ctx->nontimer_queries_suspended) {
+	if (ctx->queries_suspended_for_flush) {
 		r600_resume_nontimer_queries(ctx);
+		r600_resume_timer_queries(ctx);
 	}
 
 	/* Re-enable render condition. */
@@ -197,6 +198,19 @@
 	rctx->rings.dma.flushing = false;
 }
 
+static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
+{
+	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+	unsigned latest = rctx->ws->query_value(rctx->ws,
+						RADEON_GPU_RESET_COUNTER);
+
+	if (rctx->gpu_reset_counter == latest)
+		return PIPE_NO_RESET;
+
+	rctx->gpu_reset_counter = latest;
+	return PIPE_UNKNOWN_CONTEXT_RESET;
+}
+
 bool r600_common_context_init(struct r600_common_context *rctx,
 			      struct r600_common_screen *rscreen)
 {
@@ -223,6 +237,13 @@
         rctx->b.memory_barrier = r600_memory_barrier;
 	rctx->b.flush = r600_flush_from_st;
 
+	if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
+		rctx->b.get_device_reset_status = r600_get_reset_status;
+		rctx->gpu_reset_counter =
+			rctx->ws->query_value(rctx->ws,
+					      RADEON_GPU_RESET_COUNTER);
+	}
+
 	LIST_INITHEAD(&rctx->texture_buffers);
 
 	r600_init_context_texture_functions(rctx);
@@ -241,8 +262,12 @@
 	if (!rctx->uploader)
 		return false;
 
+	rctx->ctx = rctx->ws->ctx_create(rctx->ws);
+	if (!rctx->ctx)
+		return false;
+
 	if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
-		rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ws, RING_DMA,
+		rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
 							 r600_flush_dma_ring,
 							 rctx, NULL);
 		rctx->rings.dma.flush = r600_flush_dma_ring;
@@ -253,12 +278,12 @@
 
 void r600_common_context_cleanup(struct r600_common_context *rctx)
 {
-	if (rctx->rings.gfx.cs) {
+	if (rctx->rings.gfx.cs)
 		rctx->ws->cs_destroy(rctx->rings.gfx.cs);
-	}
-	if (rctx->rings.dma.cs) {
+	if (rctx->rings.dma.cs)
 		rctx->ws->cs_destroy(rctx->rings.dma.cs);
-	}
+	if (rctx->ctx)
+		rctx->ws->ctx_destroy(rctx->ctx);
 
 	if (rctx->uploader) {
 		u_upload_destroy(rctx->uploader);
@@ -314,6 +339,11 @@
 	{ "gs", DBG_GS, "Print geometry shaders" },
 	{ "ps", DBG_PS, "Print pixel shaders" },
 	{ "cs", DBG_CS, "Print compute shaders" },
+	{ "tcs", DBG_TCS, "Print tessellation control shaders" },
+	{ "tes", DBG_TES, "Print tessellation evaluation shaders" },
+	{ "noir", DBG_NO_IR, "Don't print the LLVM IR"},
+	{ "notgsi", DBG_NO_TGSI, "Don't print the TGSI"},
+	{ "noasm", DBG_NO_ASM, "Don't print disassembled shaders"},
 
 	/* features */
 	{ "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
@@ -325,6 +355,7 @@
 	{ "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." },
 	{ "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." },
 	{ "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
+	{ "nowc", DBG_NO_WC, "Disable GTT write combining" },
 
 	DEBUG_NAMED_VALUE_END /* must be last */
 };
@@ -339,11 +370,9 @@
 	return "AMD";
 }
 
-static const char* r600_get_name(struct pipe_screen* pscreen)
+static const char* r600_get_chip_name(struct r600_common_screen *rscreen)
 {
-	struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
-
-	switch (rscreen->family) {
+	switch (rscreen->info.family) {
 	case CHIP_R600: return "AMD R600";
 	case CHIP_RV610: return "AMD RV610";
 	case CHIP_RV630: return "AMD RV630";
@@ -379,10 +408,21 @@
 	case CHIP_KABINI: return "AMD KABINI";
 	case CHIP_HAWAII: return "AMD HAWAII";
 	case CHIP_MULLINS: return "AMD MULLINS";
+	case CHIP_TONGA: return "AMD TONGA";
+	case CHIP_ICELAND: return "AMD ICELAND";
+	case CHIP_CARRIZO: return "AMD CARRIZO";
+	case CHIP_FIJI: return "AMD FIJI";
 	default: return "AMD unknown";
 	}
 }
 
+static const char* r600_get_name(struct pipe_screen* pscreen)
+{
+	struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
+
+	return rscreen->renderer_string;
+}
+
 static float r600_get_paramf(struct pipe_screen* pscreen,
 			     enum pipe_capf param)
 {
@@ -496,6 +536,10 @@
 #else
 		return "kabini";
 #endif
+	case CHIP_TONGA: return "tonga";
+	case CHIP_ICELAND: return "iceland";
+	case CHIP_CARRIZO: return "carrizo";
+	case CHIP_FIJI: return "fiji";
 	default: return "";
 	}
 }
@@ -637,6 +681,12 @@
 		return sizeof(uint32_t);
 	case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
 		break; /* unused */
+	case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+		if (ret) {
+			uint32_t *subgroup_size = ret;
+			*subgroup_size = r600_wavefront_size(rscreen->family);
+		}
+		return sizeof(uint32_t);
 	}
 
         fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
@@ -657,25 +707,33 @@
 {
 	struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
 	struct pipe_driver_query_info list[] = {
+		{"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
+		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
+		{"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
+		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
 		{"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
 		{"requested-VRAM", R600_QUERY_REQUESTED_VRAM, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
 		{"requested-GTT", R600_QUERY_REQUESTED_GTT, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-		{"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}},
+		{"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
+		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
 		{"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
-		{"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+		{"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES,
+		 PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
 		{"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
 		{"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+		{"GPU-load", R600_QUERY_GPU_LOAD, {100}},
 		{"temperature", R600_QUERY_GPU_TEMPERATURE, {100}},
-		{"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}},
-		{"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}},
-		{"GPU-load", R600_QUERY_GPU_LOAD, {100}}
+		{"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
+		{"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
 	};
 	unsigned num_queries;
 
 	if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
 		num_queries = Elements(list);
+	else if (rscreen->info.drm_major == 3)
+		num_queries = Elements(list) - 3;
 	else
-		num_queries = 8;
+		num_queries = Elements(list) - 4;
 
 	if (!info)
 		return num_queries;
@@ -696,14 +754,6 @@
 	rws->fence_reference(ptr, fence);
 }
 
-static boolean r600_fence_signalled(struct pipe_screen *screen,
-				    struct pipe_fence_handle *fence)
-{
-	struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
-
-	return rws->fence_wait(rws, fence, 0);
-}
-
 static boolean r600_fence_finish(struct pipe_screen *screen,
 				 struct pipe_fence_handle *fence,
 				 uint64_t timeout)
@@ -838,8 +888,22 @@
 bool r600_common_screen_init(struct r600_common_screen *rscreen,
 			     struct radeon_winsys *ws)
 {
+	char llvm_string[32] = {};
+
 	ws->query_info(ws, &rscreen->info);
 
+#if HAVE_LLVM
+	snprintf(llvm_string, sizeof(llvm_string),
+		 ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
+		 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
+#endif
+
+	snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
+		 "%s (DRM %i.%i.%i%s)",
+		 r600_get_chip_name(rscreen), rscreen->info.drm_major,
+		 rscreen->info.drm_minor, rscreen->info.drm_patchlevel,
+		 llvm_string);
+
 	rscreen->b.get_name = r600_get_name;
 	rscreen->b.get_vendor = r600_get_vendor;
 	rscreen->b.get_device_vendor = r600_get_device_vendor;
@@ -849,7 +913,6 @@
 	rscreen->b.get_timestamp = r600_get_timestamp;
 	rscreen->b.fence_finish = r600_fence_finish;
 	rscreen->b.fence_reference = r600_fence_reference;
-	rscreen->b.fence_signalled = r600_fence_signalled;
 	rscreen->b.resource_destroy = u_resource_destroy_vtbl;
 	rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
 
@@ -875,7 +938,9 @@
 	pipe_mutex_init(rscreen->aux_context_lock);
 	pipe_mutex_init(rscreen->gpu_load_mutex);
 
-	if (rscreen->info.drm_minor >= 28 && (rscreen->debug_flags & DBG_TRACE_CS)) {
+	if (((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 28) ||
+	     rscreen->info.drm_major == 3) &&
+	    (rscreen->debug_flags & DBG_TRACE_CS)) {
 		rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b,
 										PIPE_BIND_CUSTOM,
 										PIPE_USAGE_STAGING,
@@ -923,10 +988,8 @@
 	pipe_mutex_destroy(rscreen->aux_context_lock);
 	rscreen->aux_context->destroy(rscreen->aux_context);
 
-	if (rscreen->trace_bo) {
-		rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
+	if (rscreen->trace_bo)
 		pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
-	}
 
 	rscreen->ws->destroy(rscreen->ws);
 	FREE(rscreen);
@@ -942,6 +1005,10 @@
 	switch (tgsi_get_processor_type(tokens)) {
 	case TGSI_PROCESSOR_VERTEX:
 		return (rscreen->debug_flags & DBG_VS) != 0;
+	case TGSI_PROCESSOR_TESS_CTRL:
+		return (rscreen->debug_flags & DBG_TCS) != 0;
+	case TGSI_PROCESSOR_TESS_EVAL:
+		return (rscreen->debug_flags & DBG_TES) != 0;
 	case TGSI_PROCESSOR_GEOMETRY:
 		return (rscreen->debug_flags & DBG_GS) != 0;
 	case TGSI_PROCESSOR_FRAGMENT:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_pipe_common.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_pipe_common.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_pipe_common.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_pipe_common.h	2015-09-16 14:36:09.000000000 +0000
@@ -59,6 +59,8 @@
 #define R600_QUERY_CURRENT_GPU_SCLK	(PIPE_QUERY_DRIVER_SPECIFIC + 9)
 #define R600_QUERY_CURRENT_GPU_MCLK	(PIPE_QUERY_DRIVER_SPECIFIC + 10)
 #define R600_QUERY_GPU_LOAD		(PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define R600_QUERY_NUM_COMPILATIONS	(PIPE_QUERY_DRIVER_SPECIFIC + 12)
+#define R600_QUERY_NUM_SHADERS_CREATED	(PIPE_QUERY_DRIVER_SPECIFIC + 13)
 
 #define R600_CONTEXT_STREAMOUT_FLUSH		(1u << 0)
 #define R600_CONTEXT_PRIVATE_FLAG		(1u << 1)
@@ -79,17 +81,23 @@
 #define DBG_GS			(1 << 7)
 #define DBG_PS			(1 << 8)
 #define DBG_CS			(1 << 9)
+#define DBG_TCS			(1 << 10)
+#define DBG_TES			(1 << 11)
+#define DBG_NO_IR		(1 << 12)
+#define DBG_NO_TGSI		(1 << 13)
+#define DBG_NO_ASM		(1 << 14)
+/* Bits 21-31 are reserved for the r600g driver. */
 /* features */
-#define DBG_NO_ASYNC_DMA	(1 << 10)
-#define DBG_NO_HYPERZ		(1 << 11)
-#define DBG_NO_DISCARD_RANGE	(1 << 12)
-#define DBG_NO_2D_TILING	(1 << 13)
-#define DBG_NO_TILING		(1 << 14)
-#define DBG_SWITCH_ON_EOP	(1 << 15)
-#define DBG_FORCE_DMA		(1 << 16)
-#define DBG_PRECOMPILE		(1 << 17)
-#define DBG_INFO		(1 << 18)
-/* The maximum allowed bit is 20. */
+#define DBG_NO_ASYNC_DMA	(1llu << 32)
+#define DBG_NO_HYPERZ		(1llu << 33)
+#define DBG_NO_DISCARD_RANGE	(1llu << 34)
+#define DBG_NO_2D_TILING	(1llu << 35)
+#define DBG_NO_TILING		(1llu << 36)
+#define DBG_SWITCH_ON_EOP	(1llu << 37)
+#define DBG_FORCE_DMA		(1llu << 38)
+#define DBG_PRECOMPILE		(1llu << 39)
+#define DBG_INFO		(1llu << 40)
+#define DBG_NO_WC		(1llu << 41)
 
 #define R600_MAP_BUFFER_ALIGNMENT 64
 
@@ -127,9 +135,8 @@
 	struct radeon_shader_reloc *relocs;
 	unsigned reloc_count;
 
-	/** Set to 1 if the disassembly for this binary has been dumped to
-	 *  stderr. */
-	int disassembled;
+	/** Disassembled shader in a string. */
+	char *disasm_string;
 };
 
 struct r600_resource {
@@ -214,7 +221,6 @@
 	float				depth_clear_value;
 
 	bool				non_disp_tiling; /* R600-Cayman only */
-	unsigned			mipmap_shift;
 };
 
 struct r600_surface {
@@ -236,6 +242,7 @@
 	unsigned cb_color_pitch;	/* EG and later */
 	unsigned cb_color_slice;	/* EG and later */
 	unsigned cb_color_attrib;	/* EG and later */
+	unsigned cb_dcc_control;	/* VI and later */
 	unsigned cb_color_fmask;	/* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
 	unsigned cb_color_fmask_slice;	/* EG and later */
 	unsigned cb_color_cmask;	/* CB_COLORn_TILE (r600 only) */
@@ -272,7 +279,7 @@
 	enum chip_class			chip_class;
 	struct radeon_info		info;
 	struct r600_tiling_info		tiling_info;
-	unsigned			debug_flags;
+	uint64_t			debug_flags;
 	bool				has_cp_dma;
 	bool				has_streamout;
 
@@ -285,12 +292,23 @@
 	uint32_t			*trace_ptr;
 	unsigned			cs_count;
 
+	/* This must be in the screen, because UE4 uses one context for
+	 * compilation and another one for rendering.
+	 */
+	unsigned			num_compilations;
+	/* Along with ST_DEBUG=precompile, this should show if applications
+	 * are loading shaders on demand. This is a monotonic counter.
+	 */
+	unsigned			num_shaders_created;
+
 	/* GPU load thread. */
 	pipe_mutex			gpu_load_mutex;
 	pipe_thread			gpu_load_thread;
 	unsigned			gpu_load_counter_busy;
 	unsigned			gpu_load_counter_idle;
-	unsigned			gpu_load_stop_thread; /* bool */
+	volatile unsigned		gpu_load_stop_thread; /* bool */
+
+	char				renderer_string[64];
 };
 
 /* This encapsulates a state or an operation which can emitted into the GPU
@@ -298,6 +316,7 @@
 struct r600_atom {
 	void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
 	unsigned		num_dw;
+	unsigned short		id;	/* used by r600 only */
 	bool			dirty;
 };
 
@@ -327,6 +346,10 @@
 	/* External state which comes from the vertex shader,
 	 * it must be set explicitly when binding a shader. */
 	unsigned			*stride_in_dw;
+	unsigned			enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
+
+	/* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
+	unsigned			hw_enabled_mask;
 
 	/* The state of VGT_STRMOUT_(CONFIG|EN). */
 	struct r600_atom		enable_atom;
@@ -352,10 +375,12 @@
 
 	struct r600_common_screen	*screen;
 	struct radeon_winsys		*ws;
+	struct radeon_winsys_ctx	*ctx;
 	enum radeon_family		family;
 	enum chip_class			chip_class;
 	struct r600_rings		rings;
 	unsigned			initial_gfx_cs_size;
+	unsigned			gpu_reset_counter;
 
 	struct u_upload_mgr		*uploader;
 	struct u_suballocator		*allocator_so_filled_size;
@@ -376,11 +401,14 @@
 	int				num_occlusion_queries;
 	/* Keep track of non-timer queries, because they should be suspended
 	 * during context flushing.
-	 * The timer queries (TIME_ELAPSED) shouldn't be suspended. */
+	 * The timer queries (TIME_ELAPSED) shouldn't be suspended for blits,
+	 * but they should be suspended between IBs. */
 	struct list_head		active_nontimer_queries;
+	struct list_head		active_timer_queries;
 	unsigned			num_cs_dw_nontimer_queries_suspend;
+	unsigned			num_cs_dw_timer_queries_suspend;
 	/* If queries have been suspended. */
-	bool				nontimer_queries_suspended;
+	bool				queries_suspended_for_flush;
 	/* Additional hardware info. */
 	unsigned			backend_mask;
 	unsigned			max_db; /* for OQ */
@@ -441,6 +469,9 @@
 	/* This ensures there is enough space in the command stream. */
 	void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
 				  bool include_draw_vbo);
+
+	void (*set_atom_dirty)(struct r600_common_context *ctx,
+			       struct r600_atom *atom, bool dirty);
 };
 
 /* r600_buffer.c */
@@ -495,6 +526,8 @@
 void r600_query_init(struct r600_common_context *rctx);
 void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
 void r600_resume_nontimer_queries(struct r600_common_context *ctx);
+void r600_suspend_timer_queries(struct r600_common_context *ctx);
+void r600_resume_timer_queries(struct r600_common_context *ctx);
 void r600_query_init_backend_mask(struct r600_common_context *ctx);
 
 /* r600_streamout.c */
@@ -549,12 +582,12 @@
 
 /* Inline helpers. */
 
-static INLINE struct r600_resource *r600_resource(struct pipe_resource *r)
+static inline struct r600_resource *r600_resource(struct pipe_resource *r)
 {
 	return (struct r600_resource*)r;
 }
 
-static INLINE void
+static inline void
 r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
 {
 	pipe_resource_reference((struct pipe_resource **)ptr,
@@ -570,6 +603,26 @@
 	 /* else */        return 4;
 }
 
+static inline unsigned r600_wavefront_size(enum radeon_family family)
+{
+	switch (family) {
+	case CHIP_RV610:
+	case CHIP_RS780:
+	case CHIP_RV620:
+	case CHIP_RS880:
+		return 16;
+	case CHIP_RV630:
+	case CHIP_RV635:
+	case CHIP_RV730:
+	case CHIP_RV710:
+	case CHIP_PALM:
+	case CHIP_CEDAR:
+		return 32;
+	default:
+		return 64;
+	}
+}
+
 #define COMPUTE_DBG(rscreen, fmt, args...) \
 	do { \
 		if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_query.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_query.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_query.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_query.c	2015-09-16 14:36:09.000000000 +0000
@@ -54,6 +54,8 @@
 	uint64_t end_result;
 	/* Fence for GPU_FINISHED. */
 	struct pipe_fence_handle *fence;
+	/* For transform feedback: which stream the query is for */
+	unsigned stream;
 };
 
 
@@ -90,6 +92,8 @@
 	case R600_QUERY_CURRENT_GPU_SCLK:
 	case R600_QUERY_CURRENT_GPU_MCLK:
 	case R600_QUERY_GPU_LOAD:
+	case R600_QUERY_NUM_COMPILATIONS:
+	case R600_QUERY_NUM_SHADERS_CREATED:
 		return NULL;
 	}
 
@@ -118,7 +122,6 @@
 			}
 			results += 4 * ctx->max_db;
 		}
-		ctx->ws->buffer_unmap(buf->cs_buf);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 	case PIPE_QUERY_TIMESTAMP:
@@ -130,7 +133,6 @@
 	case PIPE_QUERY_PIPELINE_STATISTICS:
 		results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
 		memset(results, 0, buf_size);
-		ctx->ws->buffer_unmap(buf->cs_buf);
 		break;
 	default:
 		assert(0);
@@ -157,6 +159,17 @@
 	}
 }
 
+static unsigned event_type_for_stream(struct r600_query *query)
+{
+	switch (query->stream) {
+	default:
+	case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS;
+	case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1;
+	case 2: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS2;
+	case 3: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS3;
+	}
+}
+
 static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query)
 {
 	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
@@ -184,22 +197,22 @@
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
 	case PIPE_QUERY_SO_STATISTICS:
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
+		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
+		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
 		radeon_emit(cs, 0);
 		radeon_emit(cs, 0);
 		break;
@@ -207,7 +220,7 @@
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	default:
 		assert(0);
@@ -215,9 +228,10 @@
 	r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
 			RADEON_PRIO_MIN);
 
-	if (!r600_is_timer_query(query->type)) {
+	if (r600_is_timer_query(query->type))
+		ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
+	else
 		ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
-	}
 }
 
 static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query)
@@ -240,7 +254,7 @@
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -248,9 +262,9 @@
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 		va += query->buffer.results_end + query->result_size/2;
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
+		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		va += query->buffer.results_end + query->result_size/2;
@@ -259,7 +273,7 @@
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
+		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
 		radeon_emit(cs, 0);
 		radeon_emit(cs, 0);
 		break;
@@ -268,7 +282,7 @@
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	default:
 		assert(0);
@@ -279,9 +293,10 @@
 	query->buffer.results_end += query->result_size;
 
 	if (r600_query_needs_begin(query->type)) {
-		if (!r600_is_timer_query(query->type)) {
+		if (r600_is_timer_query(query->type))
+			ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw;
+		else
 			ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
-		}
 	}
 
 	r600_update_occlusion_query_state(ctx, query->type, -1);
@@ -292,6 +307,13 @@
 					int operation, bool flag_wait)
 {
 	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+	uint32_t op = PRED_OP(operation);
+
+	/* if true then invert, see GL_ARB_conditional_render_inverted */
+	if (ctx->current_render_cond_cond)
+		op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visable/overflow */
+	else
+		op |= PREDICATION_DRAW_VISIBLE; /* Draw if visable/overflow */
 
 	if (operation == PREDICATION_OP_CLEAR) {
 		ctx->need_gfx_cs_space(&ctx->b, 3, FALSE);
@@ -302,24 +324,21 @@
 	} else {
 		struct r600_query_buffer *qbuf;
 		unsigned count;
-		uint32_t op;
-
 		/* Find how many results there are. */
 		count = 0;
 		for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
 			count += qbuf->results_end / query->result_size;
 		}
-
+	
 		ctx->need_gfx_cs_space(&ctx->b, 5 * count, TRUE);
-
-		op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
-				(flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
-
+	
+		op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
+	
 		/* emit predicate packets for all data blocks */
 		for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
 			unsigned results_base = 0;
 			uint64_t va = qbuf->buf->gpu_address;
-
+	
 			while (results_base < qbuf->results_end) {
 				radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
 				radeon_emit(cs, (va + results_base) & 0xFFFFFFFFUL);
@@ -327,7 +346,7 @@
 				r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ,
 						RADEON_PRIO_MIN);
 				results_base += query->result_size;
-
+	
 				/* set CONTINUE bit for all packets except the first */
 				op |= PREDICATION_CONTINUE;
 			}
@@ -369,6 +388,7 @@
 		/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
 		query->result_size = 32;
 		query->num_cs_dw = 6;
+		query->stream = index;
 		break;
 	case PIPE_QUERY_PIPELINE_STATISTICS:
 		/* 11 values on EG, 8 on R600. */
@@ -390,6 +410,8 @@
 	case R600_QUERY_CURRENT_GPU_SCLK:
 	case R600_QUERY_CURRENT_GPU_MCLK:
 	case R600_QUERY_GPU_LOAD:
+	case R600_QUERY_NUM_COMPILATIONS:
+	case R600_QUERY_NUM_SHADERS_CREATED:
 		skip_allocation = true;
 		break;
 	default:
@@ -454,7 +476,7 @@
 		rquery->begin_result = 0;
 		return true;
 	case R600_QUERY_BUFFER_WAIT_TIME:
-		rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS);
+		rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
 		return true;
 	case R600_QUERY_NUM_CS_FLUSHES:
 		rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
@@ -465,6 +487,12 @@
 	case R600_QUERY_GPU_LOAD:
 		rquery->begin_result = r600_gpu_load_begin(rctx->screen);
 		return true;
+	case R600_QUERY_NUM_COMPILATIONS:
+		rquery->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+		return true;
+	case R600_QUERY_NUM_SHADERS_CREATED:
+		rquery->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+		return true;
 	}
 
 	/* Discard the old query buffers. */
@@ -477,7 +505,7 @@
 
 	/* Obtain a new buffer if the current one can't be mapped without a stall. */
 	if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
-	    rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
+	    !rctx->ws->buffer_wait(rquery->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
 		pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
 		rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
 	}
@@ -487,9 +515,10 @@
 
 	r600_emit_query_begin(rctx, rquery);
 
-	if (!r600_is_timer_query(rquery->type)) {
+	if (r600_is_timer_query(rquery->type))
+		LIST_ADDTAIL(&rquery->list, &rctx->active_timer_queries);
+	else
 		LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries);
-	}
    return true;
 }
 
@@ -515,7 +544,7 @@
 		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY);
 		return;
 	case R600_QUERY_BUFFER_WAIT_TIME:
-		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS);
+		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
 		return;
 	case R600_QUERY_NUM_CS_FLUSHES:
 		rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
@@ -541,13 +570,18 @@
 	case R600_QUERY_GPU_LOAD:
 		rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result);
 		return;
+	case R600_QUERY_NUM_COMPILATIONS:
+		rquery->end_result = p_atomic_read(&rctx->screen->num_compilations);
+		return;
+	case R600_QUERY_NUM_SHADERS_CREATED:
+		rquery->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
+		return;
 	}
 
 	r600_emit_query_end(rctx, rquery);
 
-	if (r600_query_needs_begin(rquery->type) && !r600_is_timer_query(rquery->type)) {
+	if (r600_query_needs_begin(rquery->type))
 		LIST_DELINIT(&rquery->list);
-	}
 }
 
 static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
@@ -601,6 +635,8 @@
 	case R600_QUERY_GPU_TEMPERATURE:
 	case R600_QUERY_CURRENT_GPU_SCLK:
 	case R600_QUERY_CURRENT_GPU_MCLK:
+	case R600_QUERY_NUM_COMPILATIONS:
+	case R600_QUERY_NUM_SHADERS_CREATED:
 		result->u64 = query->end_result - query->begin_result;
 		return TRUE;
 	case R600_QUERY_GPU_LOAD:
@@ -751,7 +787,6 @@
 		assert(0);
 	}
 
-	ctx->ws->buffer_unmap(qbuf->buf->cs_buf);
 	return TRUE;
 }
 
@@ -823,22 +858,37 @@
 	}
 }
 
-void r600_suspend_nontimer_queries(struct r600_common_context *ctx)
+static void r600_suspend_queries(struct r600_common_context *ctx,
+				 struct list_head *query_list,
+				 unsigned *num_cs_dw_queries_suspend)
 {
 	struct r600_query *query;
 
-	LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
+	LIST_FOR_EACH_ENTRY(query, query_list, list) {
 		r600_emit_query_end(ctx, query);
 	}
-	assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
+	assert(*num_cs_dw_queries_suspend == 0);
+}
+
+void r600_suspend_nontimer_queries(struct r600_common_context *ctx)
+{
+	r600_suspend_queries(ctx, &ctx->active_nontimer_queries,
+			     &ctx->num_cs_dw_nontimer_queries_suspend);
+}
+
+void r600_suspend_timer_queries(struct r600_common_context *ctx)
+{
+	r600_suspend_queries(ctx, &ctx->active_timer_queries,
+			     &ctx->num_cs_dw_timer_queries_suspend);
 }
 
-static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx)
+static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx,
+						    struct list_head *query_list)
 {
 	struct r600_query *query;
 	unsigned num_dw = 0;
 
-	LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
+	LIST_FOR_EACH_ENTRY(query, query_list, list) {
 		/* begin + end */
 		num_dw += query->num_cs_dw * 2;
 
@@ -857,21 +907,35 @@
 	return num_dw;
 }
 
-void r600_resume_nontimer_queries(struct r600_common_context *ctx)
+static void r600_resume_queries(struct r600_common_context *ctx,
+				struct list_head *query_list,
+				unsigned *num_cs_dw_queries_suspend)
 {
 	struct r600_query *query;
+	unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, query_list);
 
-	assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
+	assert(*num_cs_dw_queries_suspend == 0);
 
 	/* Check CS space here. Resuming must not be interrupted by flushes. */
-	ctx->need_gfx_cs_space(&ctx->b,
-			       r600_queries_num_cs_dw_for_resuming(ctx), TRUE);
+	ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, TRUE);
 
-	LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
+	LIST_FOR_EACH_ENTRY(query, query_list, list) {
 		r600_emit_query_begin(ctx, query);
 	}
 }
 
+void r600_resume_nontimer_queries(struct r600_common_context *ctx)
+{
+	r600_resume_queries(ctx, &ctx->active_nontimer_queries,
+			    &ctx->num_cs_dw_nontimer_queries_suspend);
+}
+
+void r600_resume_timer_queries(struct r600_common_context *ctx)
+{
+	r600_resume_queries(ctx, &ctx->active_timer_queries,
+			    &ctx->num_cs_dw_timer_queries_suspend);
+}
+
 /* Get backends mask */
 void r600_query_init_backend_mask(struct r600_common_context *ctx)
 {
@@ -919,7 +983,6 @@
 	results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
 	if (results) {
 		memset(results, 0, ctx->max_db * 4 * 4);
-		ctx->ws->buffer_unmap(buffer->cs_buf);
 
 		/* emit EVENT_WRITE for ZPASS_DONE */
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
@@ -937,7 +1000,6 @@
 				if (results[i*4 + 1])
 					mask |= (1<<i);
 			}
-			ctx->ws->buffer_unmap(buffer->cs_buf);
 		}
 	}
 
@@ -966,4 +1028,5 @@
 	    rctx->b.render_condition = r600_render_condition;
 
 	LIST_INITHEAD(&rctx->active_nontimer_queries);
+	LIST_INITHEAD(&rctx->active_timer_queries);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_streamout.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_streamout.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_streamout.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_streamout.c	2015-09-16 14:36:09.000000000 +0000
@@ -88,8 +88,7 @@
 		12 + /* flush_vgt_streamout */
 		num_bufs * 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */
 
-	begin->num_dw = 12 + /* flush_vgt_streamout */
-			3; /* VGT_STRMOUT_BUFFER_CONFIG */
+	begin->num_dw = 12; /* flush_vgt_streamout */
 
 	if (rctx->chip_class >= SI) {
 		begin->num_dw += num_bufs * 4; /* SET_CONTEXT_REG */
@@ -105,7 +104,7 @@
 		(num_bufs - num_bufs_appended) * 6 + /* STRMOUT_BUFFER_UPDATE */
 		(rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0); /* SURFACE_BASE_UPDATE */
 
-	begin->dirty = true;
+	rctx->set_atom_dirty(rctx, begin, true);
 
 	r600_set_streamout_enable(rctx, true);
 }
@@ -146,7 +145,7 @@
 	if (num_targets) {
 		r600_streamout_buffers_dirty(rctx);
 	} else {
-		rctx->streamout.begin_atom.dirty = false;
+		rctx->set_atom_dirty(rctx, &rctx->streamout.begin_atom, false);
 		r600_set_streamout_enable(rctx, false);
 	}
 }
@@ -192,11 +191,6 @@
 
 	r600_flush_vgt_streamout(rctx);
 
-	r600_write_context_reg(cs, rctx->chip_class >= EVERGREEN ?
-				       R_028B98_VGT_STRMOUT_BUFFER_CONFIG :
-				       R_028B20_VGT_STRMOUT_BUFFER_EN,
-			       rctx->streamout.enabled_mask);
-
 	for (i = 0; i < rctx->streamout.num_targets; i++) {
 		if (!t[i])
 			continue;
@@ -326,20 +320,42 @@
 static void r600_emit_streamout_enable(struct r600_common_context *rctx,
 				       struct r600_atom *atom)
 {
-	r600_write_context_reg(rctx->rings.gfx.cs,
-			       rctx->chip_class >= EVERGREEN ?
-				       R_028B94_VGT_STRMOUT_CONFIG :
-				       R_028AB0_VGT_STRMOUT_EN,
-			       S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx)));
+	unsigned strmout_config_reg = R_028AB0_VGT_STRMOUT_EN;
+	unsigned strmout_config_val = S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx));
+	unsigned strmout_buffer_reg = R_028B20_VGT_STRMOUT_BUFFER_EN;
+	unsigned strmout_buffer_val = rctx->streamout.hw_enabled_mask &
+				      rctx->streamout.enabled_stream_buffers_mask;
+
+	if (rctx->chip_class >= EVERGREEN) {
+		strmout_buffer_reg = R_028B98_VGT_STRMOUT_BUFFER_CONFIG;
+
+		strmout_config_reg = R_028B94_VGT_STRMOUT_CONFIG;
+		strmout_config_val |=
+			S_028B94_RAST_STREAM(0) |
+			S_028B94_STREAMOUT_1_EN(r600_get_strmout_en(rctx)) |
+			S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
+			S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
+	}
+	r600_write_context_reg(rctx->rings.gfx.cs, strmout_buffer_reg, strmout_buffer_val);
+	r600_write_context_reg(rctx->rings.gfx.cs, strmout_config_reg, strmout_config_val);
 }
 
 static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
 {
 	bool old_strmout_en = r600_get_strmout_en(rctx);
+	unsigned old_hw_enabled_mask = rctx->streamout.hw_enabled_mask;
 
 	rctx->streamout.streamout_enabled = enable;
-	if (old_strmout_en != r600_get_strmout_en(rctx))
-		rctx->streamout.enable_atom.dirty = true;
+
+	rctx->streamout.hw_enabled_mask = rctx->streamout.enabled_mask |
+					  (rctx->streamout.enabled_mask << 4) |
+					  (rctx->streamout.enabled_mask << 8) |
+					  (rctx->streamout.enabled_mask << 12);
+
+	if ((old_strmout_en != r600_get_strmout_en(rctx)) ||
+            (old_hw_enabled_mask != rctx->streamout.hw_enabled_mask)) {
+		rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
+	}
 }
 
 void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
@@ -354,8 +370,9 @@
 		rctx->streamout.prims_gen_query_enabled =
 			rctx->streamout.num_prims_gen_queries != 0;
 
-		if (old_strmout_en != r600_get_strmout_en(rctx))
-			rctx->streamout.enable_atom.dirty = true;
+		if (old_strmout_en != r600_get_strmout_en(rctx)) {
+			rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
+		}
 	}
 }
 
@@ -365,5 +382,5 @@
 	rctx->b.stream_output_target_destroy = r600_so_target_destroy;
 	rctx->streamout.begin_atom.emit = r600_emit_streamout_begin;
 	rctx->streamout.enable_atom.emit = r600_emit_streamout_enable;
-	rctx->streamout.enable_atom.num_dw = 3;
+	rctx->streamout.enable_atom.num_dw = 6;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_texture.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_texture.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/r600_texture.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/r600_texture.c	2015-09-16 14:36:09.000000000 +0000
@@ -243,10 +243,11 @@
 				       RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR,
 				       surface->level[0].mode >= RADEON_SURF_MODE_2D ?
 				       RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR,
+				       surface->pipe_config,
 				       surface->bankw, surface->bankh,
 				       surface->tile_split,
 				       surface->stencil_tile_split,
-				       surface->mtilea,
+				       surface->mtilea, surface->num_banks,
 				       surface->level[0].pitch_bytes,
 				       (surface->flags & RADEON_SURF_SCANOUT) != 0);
 
@@ -489,7 +490,7 @@
 	unsigned num_pipes = rscreen->tiling_info.num_channels;
 
 	if (rscreen->chip_class <= EVERGREEN &&
-	    rscreen->info.drm_minor < 26)
+	    rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
 		return 0;
 
 	/* HW bug on R6xx. */
@@ -501,7 +502,7 @@
 	/* HTILE is broken with 1D tiling on old kernels and CIK. */
 	if (rscreen->chip_class >= CIK &&
 	    rtex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
-	    rscreen->info.drm_minor < 38)
+	    rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
 		return 0;
 
 	switch (num_pipes) {
@@ -706,6 +707,7 @@
 				   const struct pipe_resource *templ)
 {
 	const struct util_format_description *desc = util_format_description(templ->format);
+	bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING;
 
 	/* MSAA resources must be 2D tiled. */
 	if (templ->nr_samples > 1)
@@ -715,10 +717,16 @@
 	if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
 		return RADEON_SURF_MODE_LINEAR_ALIGNED;
 
+	/* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */
+	if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN &&
+	    (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) &&
+	    (templ->target == PIPE_TEXTURE_2D ||
+	     templ->target == PIPE_TEXTURE_3D))
+		force_tiling = true;
+
 	/* Handle common candidates for the linear mode.
 	 * Compressed textures must always be tiled. */
-	if (!(templ->flags & R600_RESOURCE_FLAG_FORCE_TILING) &&
-	    !util_format_is_compressed(templ->format)) {
+	if (!force_tiling && !util_format_is_compressed(templ->format)) {
 		/* Not everything can be linear, so we cannot enforce it
 		 * for all textures. */
 		if ((rscreen->debug_flags & DBG_NO_TILING) &&
@@ -934,7 +942,7 @@
 		use_staging_texture = TRUE;
 	} else if (!(usage & PIPE_TRANSFER_READ) &&
 	    (r600_rings_is_buffer_referenced(rctx, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) ||
-	     rctx->ws->buffer_is_busy(rtex->resource.buf, RADEON_USAGE_READWRITE))) {
+	     !rctx->ws->buffer_wait(rtex->resource.buf, 0, RADEON_USAGE_READWRITE))) {
 		/* Use a staging texture for uploads if the underlying BO is busy. */
 		use_staging_texture = TRUE;
 	}
@@ -1059,18 +1067,9 @@
 					struct pipe_transfer* transfer)
 {
 	struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
-	struct r600_common_context *rctx = (struct r600_common_context*)ctx;
-	struct radeon_winsys_cs_handle *buf;
 	struct pipe_resource *texture = transfer->resource;
 	struct r600_texture *rtex = (struct r600_texture*)texture;
 
-	if (rtransfer->staging) {
-		buf = rtransfer->staging->cs_buf;
-	} else {
-		buf = r600_resource(transfer->resource)->cs_buf;
-	}
-	rctx->ws->buffer_unmap(buf);
-
 	if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
 		if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
 			ctx->resource_copy_region(ctx, texture, transfer->level,
@@ -1262,7 +1261,9 @@
 
 		/* fast color clear with 1D tiling doesn't work on old kernels and CIK */
 		if (tex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
-		    rctx->chip_class >= CIK && rctx->screen->info.drm_minor < 38) {
+		    rctx->chip_class >= CIK &&
+		    rctx->screen->info.drm_major == 2 &&
+		    rctx->screen->info.drm_minor < 38) {
 			continue;
 		}
 
@@ -1278,7 +1279,7 @@
 				   tex->cmask.offset, tex->cmask.size, 0, true);
 
 		tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
-		fb_state->dirty = true;
+		rctx->set_atom_dirty(rctx, fb_state, true);
 		*buffers &= ~clear_bit;
 	}
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_elf_util.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_elf_util.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_elf_util.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_elf_util.c	2015-09-16 14:36:09.000000000 +0000
@@ -103,8 +103,7 @@
 }
 
 void radeon_elf_read(const char *elf_data, unsigned elf_size,
-					struct radeon_shader_binary *binary,
-					unsigned debug)
+		     struct radeon_shader_binary *binary)
 {
 	char *elf_buffer;
 	Elf *elf;
@@ -124,7 +123,6 @@
 	elf = elf_memory(elf_buffer, elf_size);
 
 	elf_getshdrstrndx(elf, &section_str_index);
-	binary->disassembled = 0;
 
 	while ((section = elf_nextscn(elf, section))) {
 		const char *name;
@@ -145,12 +143,11 @@
 			binary->config_size = section_data->d_size;
 			binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
 			memcpy(binary->config, section_data->d_buf, binary->config_size);
-		} else if (debug && !strcmp(name, ".AMDGPU.disasm")) {
-			binary->disassembled = 1;
+		} else if (!strcmp(name, ".AMDGPU.disasm")) {
+			/* Always read disassembly if it's available. */
 			section_data = elf_getdata(section, section_data);
-			fprintf(stderr, "\nShader Disassembly:\n\n");
-			fprintf(stderr, "%.*s\n", (int)section_data->d_size,
-						  (char *)section_data->d_buf);
+			binary->disasm_string = strndup(section_data->d_buf,
+							section_data->d_size);
 		} else if (!strncmp(name, ".rodata", 7)) {
 			section_data = elf_getdata(section, section_data);
 			binary->rodata_size = section_data->d_size;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_elf_util.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_elf_util.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_elf_util.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_elf_util.h	2015-09-16 14:36:09.000000000 +0000
@@ -37,7 +37,7 @@
  * radeon_shader_binary object.
  */
 void radeon_elf_read(const char *elf_data, unsigned elf_size,
-		struct radeon_shader_binary *binary, unsigned debug);
+		     struct radeon_shader_binary *binary);
 
 /**
  * @returns A pointer to the start of the configuration information for
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_llvm_emit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_llvm_emit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_llvm_emit.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_llvm_emit.c	2015-09-16 14:36:09.000000000 +0000
@@ -62,6 +62,8 @@
 
 	switch (type) {
 	case TGSI_PROCESSOR_VERTEX:
+	case TGSI_PROCESSOR_TESS_CTRL:
+	case TGSI_PROCESSOR_TESS_EVAL:
 		llvm_type = RADEON_LLVM_SHADER_VS;
 		break;
 	case TGSI_PROCESSOR_GEOMETRY:
@@ -86,10 +88,18 @@
 {
 	static unsigned initialized = 0;
 	if (!initialized) {
+#if HAVE_LLVM < 0x0307
 		LLVMInitializeR600TargetInfo();
 		LLVMInitializeR600Target();
 		LLVMInitializeR600TargetMC();
 		LLVMInitializeR600AsmPrinter();
+#else
+		LLVMInitializeAMDGPUTargetInfo();
+		LLVMInitializeAMDGPUTarget();
+		LLVMInitializeAMDGPUTargetMC();
+		LLVMInitializeAMDGPUAsmPrinter();
+
+#endif
 		initialized = 1;
 	}
 }
@@ -134,7 +144,8 @@
  * @returns 0 for success, 1 for failure
  */
 unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
-			  const char *gpu_family, unsigned dump, LLVMTargetMachineRef tm)
+			     const char *gpu_family, bool dump_ir, bool dump_asm,
+			     LLVMTargetMachineRef tm)
 {
 
 	char cpu[CPU_STRING_LEN];
@@ -157,17 +168,15 @@
 		}
 		strncpy(cpu, gpu_family, CPU_STRING_LEN);
 		memset(fs, 0, sizeof(fs));
-		if (dump) {
+		if (dump_asm)
 			strncpy(fs, "+DumpCode", FS_STRING_LEN);
-		}
 		tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
 				  LLVMCodeGenLevelDefault, LLVMRelocDefault,
 						  LLVMCodeModelDefault);
 		dispose_tm = true;
 	}
-	if (dump) {
+	if (dump_ir)
 		LLVMDumpModule(M);
-	}
 	/* Setup Diagnostic Handler*/
 	llvm_ctx = LLVMGetModuleContext(M);
 
@@ -196,7 +205,7 @@
 	buffer_size = LLVMGetBufferSize(out_buffer);
 	buffer_data = LLVMGetBufferStart(out_buffer);
 
-	radeon_elf_read(buffer_data, buffer_size, binary, dump);
+	radeon_elf_read(buffer_data, buffer_size, binary);
 
 	/* Clean up */
 	LLVMDisposeMemoryBuffer(out_buffer);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_llvm_emit.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_llvm_emit.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_llvm_emit.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_llvm_emit.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,6 +29,7 @@
 
 #include <llvm-c/Core.h>
 #include <llvm-c/TargetMachine.h>
+#include <stdbool.h>
 
 struct radeon_shader_binary;
 
@@ -36,11 +37,8 @@
 
 LLVMTargetRef radeon_llvm_get_r600_target(const char *triple);
 
-unsigned  radeon_llvm_compile(
-	LLVMModuleRef M,
-	struct radeon_shader_binary *binary,
-	const char * gpu_family,
-	unsigned dump,
-	LLVMTargetMachineRef tm);
+unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
+			     const char *gpu_family, bool dump_ir, bool dump_asm,
+			     LLVMTargetMachineRef tm);
 
 #endif /* RADEON_LLVM_EMIT_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_llvm.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_llvm.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_llvm.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_llvm.h	2015-09-16 14:36:09.000000000 +0000
@@ -33,7 +33,6 @@
 
 #define RADEON_LLVM_MAX_INPUTS 32 * 4
 #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
-#define RADEON_LLVM_MAX_ARRAYS 16
 
 #define RADEON_LLVM_INITIAL_CF_DEPTH 4
 
@@ -59,7 +58,6 @@
 	unsigned type;
 	unsigned face_gpr;
 	unsigned two_side;
-	unsigned clip_vertex;
 	unsigned inputs_count;
 	struct r600_shader_io * r600_inputs;
 	struct r600_shader_io * r600_outputs;
@@ -73,21 +71,6 @@
 
 	/*=== Front end configuration ===*/
 
-	/* Special Intrinsics */
-
-	/** Write to an output register: float store_output(float, i32) */
-	const char * store_output_intr;
-
-	/** Swizzle a vector value: <4 x float> swizzle(<4 x float>, i32)
-	 * The swizzle is an unsigned integer that encodes a TGSI_SWIZZLE_* value
-	 * in 2-bits.
-	 * Swizzle{0-1} = X Channel
-	 * Swizzle{2-3} = Y Channel
-	 * Swizzle{4-5} = Z Channel
-	 * Swizzle{6-7} = W Channel
-	 */
-	const char * swizzle_intr;
-
 	/* Instructions that are not described by any of the TGSI opcodes. */
 
 	/** This function is responsible for initilizing the inputs array and will be
@@ -101,9 +84,6 @@
 			unsigned index,
 			const struct tgsi_full_declaration *decl);
 
-	/** User data to use with the callbacks */
-	void * userdata;
-
 	/** This array contains the input values for the shader.  Typically these
 	  * values will be in the form of a target intrinsic that will inform the
 	  * backend how to load the actual inputs to the shader. 
@@ -130,8 +110,7 @@
 	unsigned loop_depth;
 	unsigned loop_depth_max;
 
-	struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS];
-	unsigned num_arrays;
+	struct tgsi_declaration_range *arrays;
 
 	LLVMValueRef main_fn;
 
@@ -148,6 +127,8 @@
 	case TGSI_TYPE_UNSIGNED:
 	case TGSI_TYPE_SIGNED:
 		return LLVMInt32TypeInContext(ctx);
+	case TGSI_TYPE_DOUBLE:
+		return LLVMDoubleTypeInContext(ctx);
 	case TGSI_TYPE_UNTYPED:
 	case TGSI_TYPE_FLOAT:
 		return LLVMFloatTypeInContext(ctx);
@@ -173,8 +154,9 @@
 
 
 void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_base,
-                                          struct lp_build_emit_data * emit_data,
-                                          LLVMValueRef *coords_arg);
+					  struct lp_build_emit_data * emit_data,
+					  LLVMValueRef *coords_arg,
+					  LLVMValueRef *derivs_arg);
 
 void radeon_llvm_context_init(struct radeon_llvm_context * ctx);
 
@@ -193,20 +175,29 @@
 
 void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx);
 
-LLVMValueRef
-build_intrinsic(LLVMBuilderRef builder,
-		const char *name,
-		LLVMTypeRef ret_type,
-		LLVMValueRef *args,
-		unsigned num_args,
-		LLVMAttribute attr);
-
 void
 build_tgsi_intrinsic_nomem(
 		const struct lp_build_tgsi_action * action,
 		struct lp_build_tgsi_context * bld_base,
 		struct lp_build_emit_data * emit_data);
 
-
+LLVMValueRef
+radeon_llvm_emit_fetch_double(struct lp_build_tgsi_context *bld_base,
+			      LLVMValueRef ptr,
+			      LLVMValueRef ptr2);
+
+LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
+                                  LLVMValueRef value);
+
+LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
+				    const struct tgsi_full_src_register *reg,
+				    enum tgsi_opcode_type type,
+				    unsigned swizzle);
+
+void radeon_llvm_emit_store(
+	struct lp_build_tgsi_context * bld_base,
+	const struct tgsi_full_instruction * inst,
+	const struct tgsi_opcode_info * info,
+	LLVMValueRef dst[4]);
 
 #endif /* RADEON_LLVM_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c	2015-09-16 14:36:09.000000000 +0000
@@ -85,8 +85,9 @@
 		unsigned File, const struct tgsi_ind_register *reg)
 {
 	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+
 	if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
-            reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
+	    reg->ArrayID > bld_base->info->array_max[TGSI_FILE_TEMPORARY]) {
 		struct tgsi_declaration_range range;
 		range.First = 0;
 		range.Last = bld_base->info->file_max[File];
@@ -108,12 +109,27 @@
 	return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
 }
 
-static LLVMValueRef
-emit_fetch(
+LLVMValueRef
+radeon_llvm_emit_fetch_double(
 	struct lp_build_tgsi_context *bld_base,
-	const struct tgsi_full_src_register *reg,
-	enum tgsi_opcode_type type,
-	unsigned swizzle);
+	LLVMValueRef ptr,
+	LLVMValueRef ptr2)
+{
+	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+	LLVMValueRef result;
+
+	result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
+
+	result = LLVMBuildInsertElement(builder,
+					result,
+					bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
+					bld_base->int_bld.zero, "");
+	result = LLVMBuildInsertElement(builder,
+					result,
+					bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
+					bld_base->int_bld.one, "");
+	return bitcast(bld_base, TGSI_TYPE_DOUBLE, result);
+}
 
 static LLVMValueRef
 emit_array_fetch(
@@ -135,7 +151,7 @@
 
 	for (i = 0; i < size; ++i) {
 		tmp_reg.Register.Index = i + range.First;
-		LLVMValueRef temp = emit_fetch(bld_base, &tmp_reg, type, swizzle);
+		LLVMValueRef temp = radeon_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
 		result = LLVMBuildInsertElement(builder, result, temp,
 			lp_build_const_int32(gallivm, i), "");
 	}
@@ -149,23 +165,21 @@
 	return (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
 }
 
-static LLVMValueRef
-emit_fetch(
-	struct lp_build_tgsi_context *bld_base,
-	const struct tgsi_full_src_register *reg,
-	enum tgsi_opcode_type type,
-	unsigned swizzle)
+LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
+				    const struct tgsi_full_src_register *reg,
+				    enum tgsi_opcode_type type,
+				    unsigned swizzle)
 {
 	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
 	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-	LLVMValueRef result = NULL, ptr;
+	LLVMValueRef result = NULL, ptr, ptr2;
 
 	if (swizzle == ~0) {
 		LLVMValueRef values[TGSI_NUM_CHANNELS];
 		unsigned chan;
 		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-			values[chan] = emit_fetch(bld_base, reg, type, chan);
+			values[chan] = radeon_llvm_emit_fetch(bld_base, reg, type, chan);
 		}
 		return lp_build_gather_values(bld_base->base.gallivm, values,
 					      TGSI_NUM_CHANNELS);
@@ -183,11 +197,27 @@
 	switch(reg->Register.File) {
 	case TGSI_FILE_IMMEDIATE: {
 		LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
-		return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
+		if (type == TGSI_TYPE_DOUBLE) {
+			result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
+			result = LLVMConstInsertElement(result,
+							bld->immediates[reg->Register.Index][swizzle],
+							bld_base->int_bld.zero);
+			result = LLVMConstInsertElement(result,
+							bld->immediates[reg->Register.Index][swizzle + 1],
+							bld_base->int_bld.one);
+			return LLVMConstBitCast(result, ctype);
+		} else {
+			return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
+		}
 	}
 
 	case TGSI_FILE_INPUT:
 		result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
+		if (type == TGSI_TYPE_DOUBLE) {
+			ptr = result;
+			ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)];
+			return radeon_llvm_emit_fetch_double(bld_base, ptr, ptr2);
+		}
 		break;
 
 	case TGSI_FILE_TEMPORARY:
@@ -198,11 +228,23 @@
 			break;
 		}
 		ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
+		if (type == TGSI_TYPE_DOUBLE) {
+			ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
+			return radeon_llvm_emit_fetch_double(bld_base,
+						 LLVMBuildLoad(builder, ptr, ""),
+						 LLVMBuildLoad(builder, ptr2, ""));
+		}
 		result = LLVMBuildLoad(builder, ptr, "");
 		break;
 
 	case TGSI_FILE_OUTPUT:
 		ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
+		if (type == TGSI_TYPE_DOUBLE) {
+			ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1);
+			return radeon_llvm_emit_fetch_double(bld_base,
+						 LLVMBuildLoad(builder, ptr, ""),
+						 LLVMBuildLoad(builder, ptr2, ""));
+		}
 		result = LLVMBuildLoad(builder, ptr, "");
 		break;
 
@@ -252,8 +294,14 @@
 	}
 
 	case TGSI_FILE_TEMPORARY:
-		if (decl->Declaration.Array && decl->Array.ArrayID <= RADEON_LLVM_MAX_ARRAYS)
+		if (decl->Declaration.Array) {
+			if (!ctx->arrays) {
+				int size = bld_base->info->array_max[TGSI_FILE_TEMPORARY];
+				ctx->arrays = MALLOC(sizeof(ctx->arrays[0]) * size);
+			}
+
 			ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
+		}
 		if (uses_temp_indirect_addressing(bld_base)) {
 			lp_emit_declaration_soa(bld_base, decl);
 			break;
@@ -314,8 +362,22 @@
 	}
 }
 
-static void
-emit_store(
+LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
+                                  LLVMValueRef value)
+{
+	struct lp_build_emit_data clamp_emit_data;
+
+	memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
+	clamp_emit_data.arg_count = 3;
+	clamp_emit_data.args[0] = value;
+	clamp_emit_data.args[2] = bld_base->base.one;
+	clamp_emit_data.args[1] = bld_base->base.zero;
+
+	return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
+				  &clamp_emit_data);
+}
+
+void radeon_llvm_emit_store(
 	struct lp_build_tgsi_context * bld_base,
 	const struct tgsi_full_instruction * inst,
 	const struct tgsi_opcode_info * info,
@@ -324,12 +386,12 @@
 	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
 	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 	struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
-	struct lp_build_context base = bld->bld_base.base;
 	const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 	LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
-	LLVMValueRef temp_ptr;
+	LLVMValueRef temp_ptr, temp_ptr2 = NULL;
 	unsigned chan, chan_index;
 	boolean is_vec_store = FALSE;
+	enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 
 	if (dst[0]) {
 		LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
@@ -350,36 +412,19 @@
 	TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
 		LLVMValueRef value = dst[chan_index];
 
-		if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
-			struct lp_build_emit_data clamp_emit_data;
-
-			memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
-			clamp_emit_data.arg_count = 3;
-			clamp_emit_data.args[0] = value;
-			clamp_emit_data.args[2] = base.one;
-
-			switch(inst->Instruction.Saturate) {
-			case TGSI_SAT_ZERO_ONE:
-				clamp_emit_data.args[1] = base.zero;
-				break;
-			case TGSI_SAT_MINUS_PLUS_ONE:
-				clamp_emit_data.args[1] = LLVMConstReal(
-						base.elem_type, -1.0f);
-				break;
-			default:
-				assert(0);
-			}
-			value = lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
-						&clamp_emit_data);
-		}
+		if (dtype == TGSI_TYPE_DOUBLE && (chan_index == 1 || chan_index == 3))
+			continue;
+		if (inst->Instruction.Saturate)
+			value = radeon_llvm_saturate(bld_base, value);
 
 		if (reg->Register.File == TGSI_FILE_ADDRESS) {
 			temp_ptr = bld->addr[reg->Register.Index][chan_index];
 			LLVMBuildStore(builder, value, temp_ptr);
 			continue;
 		}
-	
-		value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
+
+		if (dtype != TGSI_TYPE_DOUBLE)
+			value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
 
 		if (reg->Register.Indirect) {
 			struct tgsi_declaration_range range = get_array_range(bld_base,
@@ -417,6 +462,8 @@
 			switch(reg->Register.File) {
 			case TGSI_FILE_OUTPUT:
 				temp_ptr = bld->outputs[reg->Register.Index][chan_index];
+				if (dtype == TGSI_TYPE_DOUBLE)
+					temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1];
 				break;
 
 			case TGSI_FILE_TEMPORARY:
@@ -427,12 +474,28 @@
 					break;
 				}
 				temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
+				if (dtype == TGSI_TYPE_DOUBLE)
+					temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
+
 				break;
 
 			default:
 				return;
 			}
-			LLVMBuildStore(builder, value, temp_ptr);
+			if (dtype != TGSI_TYPE_DOUBLE)
+				LLVMBuildStore(builder, value, temp_ptr);
+			else {
+				LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
+								    LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
+				LLVMValueRef val2;
+				value = LLVMBuildExtractElement(builder, ptr,
+								bld_base->uint_bld.zero, "");
+				val2 = LLVMBuildExtractElement(builder, ptr,
+								bld_base->uint_bld.one, "");
+
+				LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
+				LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
+			}
 		}
 	}
 }
@@ -685,34 +748,26 @@
 	}
 }
 
-void radeon_llvm_emit_prepare_cube_coords(
-		struct lp_build_tgsi_context * bld_base,
-		struct lp_build_emit_data * emit_data,
-		LLVMValueRef *coords_arg)
+static void radeon_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
+					  LLVMValueRef *in, LLVMValueRef *out)
 {
-
-	unsigned target = emit_data->inst->Texture.Texture;
-	unsigned opcode = emit_data->inst->Instruction.Opcode;
 	struct gallivm_state * gallivm = bld_base->base.gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
 	LLVMTypeRef type = bld_base->base.elem_type;
 	LLVMValueRef coords[4];
 	LLVMValueRef mad_args[3];
-	LLVMValueRef idx;
-	struct LLVMOpaqueValue *cube_vec;
-	LLVMValueRef v;
+	LLVMValueRef v, cube_vec;
 	unsigned i;
 
-	cube_vec = lp_build_gather_values(bld_base->base.gallivm, coords_arg, 4);
-	v = build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
+	cube_vec = lp_build_gather_values(bld_base->base.gallivm, in, 4);
+	v = lp_build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
                             &cube_vec, 1, LLVMReadNoneAttribute);
 
-	for (i = 0; i < 4; ++i) {
-		idx = lp_build_const_int32(gallivm, i);
-		coords[i] = LLVMBuildExtractElement(builder, v, idx, "");
-	}
+	for (i = 0; i < 4; ++i)
+		coords[i] = LLVMBuildExtractElement(builder, v,
+						    lp_build_const_int32(gallivm, i), "");
 
-	coords[2] = build_intrinsic(builder, "fabs",
+	coords[2] = lp_build_intrinsic(builder, "llvm.fabs.f32",
 			type, &coords[2], 1, LLVMReadNoneAttribute);
 	coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);
 
@@ -728,10 +783,60 @@
 			mad_args[0], mad_args[1], mad_args[2]);
 
 	/* apply xyz = yxw swizzle to cooords */
-	coords[2] = coords[3];
-	coords[3] = coords[1];
-	coords[1] = coords[0];
-	coords[0] = coords[3];
+	out[0] = coords[1];
+	out[1] = coords[0];
+	out[2] = coords[3];
+}
+
+void radeon_llvm_emit_prepare_cube_coords(
+		struct lp_build_tgsi_context * bld_base,
+		struct lp_build_emit_data * emit_data,
+		LLVMValueRef *coords_arg,
+		LLVMValueRef *derivs_arg)
+{
+
+	unsigned target = emit_data->inst->Texture.Texture;
+	unsigned opcode = emit_data->inst->Instruction.Opcode;
+	struct gallivm_state * gallivm = bld_base->base.gallivm;
+	LLVMBuilderRef builder = gallivm->builder;
+	LLVMValueRef coords[4];
+	unsigned i;
+
+	radeon_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
+
+	if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
+		LLVMValueRef derivs[4];
+		int axis;
+
+		/* Convert cube derivatives to 2D derivatives. */
+		for (axis = 0; axis < 2; axis++) {
+			LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
+
+			/* Shift the cube coordinates by the derivatives to get
+			 * the cube coordinates of the "neighboring pixel".
+			 */
+			for (i = 0; i < 3; i++)
+				shifted_cube_coords[i] =
+					LLVMBuildFAdd(builder, coords_arg[i],
+						      derivs_arg[axis*3+i], "");
+			shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
+
+			/* Project the shifted cube coordinates onto the face. */
+			radeon_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
+						      shifted_coords);
+
+			/* Subtract both sets of 2D coordinates to get 2D derivatives.
+			 * This won't work if the shifted coordinates ended up
+			 * in a different face.
+			 */
+			for (i = 0; i < 2; i++)
+				derivs[axis * 2 + i] =
+					LLVMBuildFSub(builder, shifted_coords[i],
+						      coords[i], "");
+		}
+
+		memcpy(derivs_arg, derivs, sizeof(derivs));
+	}
 
 	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
 	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
@@ -755,140 +860,6 @@
 	memcpy(coords_arg, coords, sizeof(coords));
 }
 
-static void txd_fetch_args(
-	struct lp_build_tgsi_context * bld_base,
-	struct lp_build_emit_data * emit_data)
-{
-	const struct tgsi_full_instruction * inst = emit_data->inst;
-
-	LLVMValueRef coords[4];
-	unsigned chan, src;
-	for (src = 0; src < 3; src++) {
-		for (chan = 0; chan < 4; chan++)
-			coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
-
-		emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
-				coords, 4);
-	}
-	emit_data->arg_count = 3;
-	emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-}
-
-
-static void txp_fetch_args(
-	struct lp_build_tgsi_context * bld_base,
-	struct lp_build_emit_data * emit_data)
-{
-	const struct tgsi_full_instruction * inst = emit_data->inst;
-	LLVMValueRef src_w;
-	unsigned chan;
-	LLVMValueRef coords[5];
-
-	emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-	src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
-
-	for (chan = 0; chan < 3; chan++ ) {
-		LLVMValueRef arg = lp_build_emit_fetch(bld_base,
-						emit_data->inst, 0, chan);
-		coords[chan] = lp_build_emit_llvm_binary(bld_base,
-					TGSI_OPCODE_DIV, arg, src_w);
-	}
-	coords[3] = bld_base->base.one;
-
-	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
-	     inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
-	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
-	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
-	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
-	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
-		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
-	}
-
-	emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
-						coords, 4);
-	emit_data->arg_count = 1;
-}
-
-static void tex_fetch_args(
-	struct lp_build_tgsi_context * bld_base,
-	struct lp_build_emit_data * emit_data)
-{
-	/* XXX: lp_build_swizzle_aos() was failing with wrong arg types,
-	 * when we used CHAN_ALL.  We should be able to get this to work,
-	 * but for now we will swizzle it ourselves
-	emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
-						 0, CHAN_ALL);
-
-	*/
-
-	const struct tgsi_full_instruction * inst = emit_data->inst;
-
-	LLVMValueRef coords[5];
-	unsigned chan;
-	for (chan = 0; chan < 4; chan++) {
-		coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
-	}
-
-	if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
-		inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
-		inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
-		/* These instructions have additional operand that should be packed
-		 * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
-		 * That operand should be passed as a float value in the args array
-		 * right after the coord vector. After packing it's not used anymore,
-		 * that's why arg_count is not increased */
-		coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
-	}
-
-	if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
-	     inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
-	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
-	     inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
-	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
-	    inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
-		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
-	}
-
-	emit_data->arg_count = 1;
-	emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
-						coords, 4);
-	emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-}
-
-static void txf_fetch_args(
-	struct lp_build_tgsi_context * bld_base,
-	struct lp_build_emit_data * emit_data)
-{
-	const struct tgsi_full_instruction * inst = emit_data->inst;
-	struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
-	const struct tgsi_texture_offset * off = inst->TexOffsets;
-	LLVMTypeRef offset_type = bld_base->int_bld.elem_type;
-
-	/* fetch tex coords */
-	tex_fetch_args(bld_base, emit_data);
-
-	/* fetch tex offsets */
-	if (inst->Texture.NumOffsets) {
-		assert(inst->Texture.NumOffsets == 1);
-
-		emit_data->args[1] = LLVMConstBitCast(
-			bld->immediates[off->Index][off->SwizzleX],
-			offset_type);
-		emit_data->args[2] = LLVMConstBitCast(
-			bld->immediates[off->Index][off->SwizzleY],
-			offset_type);
-		emit_data->args[3] = LLVMConstBitCast(
-			bld->immediates[off->Index][off->SwizzleZ],
-			offset_type);
-	} else {
-		emit_data->args[1] = bld_base->int_bld.zero;
-		emit_data->args[2] = bld_base->int_bld.zero;
-		emit_data->args[3] = bld_base->int_bld.zero;
-	}
-
-	emit_data->arg_count = 4;
-}
-
 static void emit_icmp(
 		const struct lp_build_tgsi_action * action,
 		struct lp_build_tgsi_context * bld_base,
@@ -995,6 +966,35 @@
 	emit_data->output[emit_data->chan] = v;
 }
 
+static void emit_dcmp(
+		const struct lp_build_tgsi_action *action,
+		struct lp_build_tgsi_context * bld_base,
+		struct lp_build_emit_data * emit_data)
+{
+	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+	LLVMContextRef context = bld_base->base.gallivm->context;
+	LLVMRealPredicate pred;
+
+	/* Use ordered for everything but NE (which is usual for
+	 * float comparisons)
+	 */
+	switch (emit_data->inst->Instruction.Opcode) {
+	case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
+	case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
+	case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
+	case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
+	default: assert(!"unknown instruction"); pred = 0; break;
+	}
+
+	LLVMValueRef v = LLVMBuildFCmp(builder, pred,
+			emit_data->args[0], emit_data->args[1],"");
+
+	v = LLVMBuildSExtOrBitCast(builder, v,
+			LLVMInt32TypeInContext(context), "");
+
+	emit_data->output[emit_data->chan] = v;
+}
+
 static void emit_not(
 		const struct lp_build_tgsi_action * action,
 		struct lp_build_tgsi_context * bld_base,
@@ -1160,6 +1160,40 @@
 			emit_data->args[0], "");
 }
 
+static void emit_dneg(
+		const struct lp_build_tgsi_action * action,
+		struct lp_build_tgsi_context * bld_base,
+		struct lp_build_emit_data * emit_data)
+{
+	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+	emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
+			emit_data->args[0], "");
+}
+
+static void emit_frac(
+		const struct lp_build_tgsi_action * action,
+		struct lp_build_tgsi_context * bld_base,
+		struct lp_build_emit_data * emit_data)
+{
+	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+	char *intr;
+
+	if (emit_data->info->opcode == TGSI_OPCODE_FRC)
+		intr = "llvm.floor.f32";
+	else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
+		intr = "llvm.floor.f64";
+	else {
+		assert(0);
+		return;
+	}
+
+	LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
+						&emit_data->args[0], 1,
+						LLVMReadNoneAttribute);
+	emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
+			emit_data->args[0], floor, "");
+}
+
 static void emit_f2i(
 		const struct lp_build_tgsi_action * action,
 		struct lp_build_tgsi_context * bld_base,
@@ -1214,58 +1248,16 @@
 	ctx->soa.num_immediates++;
 }
 
-LLVMValueRef
-build_intrinsic(LLVMBuilderRef builder,
-                   const char *name,
-                   LLVMTypeRef ret_type,
-                   LLVMValueRef *args,
-                   unsigned num_args,
-                   LLVMAttribute attr)
-{
-   LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
-   LLVMValueRef function;
-
-   function = LLVMGetNamedFunction(module, name);
-   if(!function) {
-      LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
-      unsigned i;
-
-      assert(num_args <= LP_MAX_FUNC_ARGS);
-
-      for(i = 0; i < num_args; ++i) {
-         assert(args[i]);
-         arg_types[i] = LLVMTypeOf(args[i]);
-      }
-
-      function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
-
-      if (attr)
-          LLVMAddFunctionAttr(function, attr);
-   }
-
-   return LLVMBuildCall(builder, function, args, num_args, "");
-}
-
-static void build_tgsi_intrinsic(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data,
- LLVMAttribute attr)
-{
-   struct lp_build_context * base = &bld_base->base;
-   emit_data->output[emit_data->chan] = build_intrinsic(
-               base->gallivm->builder, action->intr_name,
-               emit_data->dst_type, emit_data->args,
-               emit_data->arg_count, attr);
-}
-
 void
-build_tgsi_intrinsic_nomem(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
+build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
+			   struct lp_build_tgsi_context *bld_base,
+			   struct lp_build_emit_data *emit_data)
 {
-	build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadNoneAttribute);
+	struct lp_build_context * base = &bld_base->base;
+	emit_data->output[emit_data->chan] =
+		lp_build_intrinsic(base->gallivm->builder, action->intr_name,
+				   emit_data->dst_type, emit_data->args,
+				   emit_data->arg_count, LLVMReadNoneAttribute);
 }
 
 static void emit_bfi(const struct lp_build_tgsi_action * action,
@@ -1321,7 +1313,7 @@
 	};
 
 	emit_data->output[emit_data->chan] =
-		build_intrinsic(gallivm->builder, "llvm.cttz.i32",
+		lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
 				emit_data->dst_type, args, Elements(args),
 				LLVMReadNoneAttribute);
 }
@@ -1340,7 +1332,7 @@
 	};
 
 	LLVMValueRef msb =
-		build_intrinsic(builder, "llvm.ctlz.i32",
+		lp_build_intrinsic(builder, "llvm.ctlz.i32",
 				emit_data->dst_type, args, Elements(args),
 				LLVMReadNoneAttribute);
 
@@ -1367,7 +1359,7 @@
 	LLVMValueRef arg = emit_data->args[0];
 
 	LLVMValueRef msb =
-		build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
+		lp_build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
 				emit_data->dst_type, &arg, 1,
 				LLVMReadNoneAttribute);
 
@@ -1406,12 +1398,8 @@
 						ctx->gallivm.context);
 	ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
 
-	ctx->store_output_intr = "llvm.AMDGPU.store.output.";
-	ctx->swizzle_intr = "llvm.AMDGPU.swizzle";
 	struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
 
-	/* XXX: We need to revisit this.I think the correct way to do this is
-	 * to use length = 4 here and use the elem_bld for everything. */
 	type.floating = TRUE;
 	type.fixed = FALSE;
 	type.sign = TRUE;
@@ -1422,30 +1410,32 @@
 	lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
 	lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
 	lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
+	{
+		struct lp_type dbl_type;
+		dbl_type = type;
+		dbl_type.width *= 2;
+		lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
+	}
 
 	bld_base->soa = 1;
-	bld_base->emit_store = emit_store;
+	bld_base->emit_store = radeon_llvm_emit_store;
 	bld_base->emit_swizzle = emit_swizzle;
 	bld_base->emit_declaration = emit_declaration;
 	bld_base->emit_immediate = emit_immediate;
 
-	bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch;
-	bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch;
-	bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch;
-	bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch;
+	bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = radeon_llvm_emit_fetch;
+	bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = radeon_llvm_emit_fetch;
+	bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = radeon_llvm_emit_fetch;
+	bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch;
 	bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
 
 	/* Allocate outputs */
 	ctx->soa.outputs = ctx->outputs;
 
-	ctx->num_arrays = 0;
-
-	/* XXX: Is there a better way to initialize all this ? */
-
 	lp_set_default_actions(bld_base);
 
 	bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "fabs";
+	bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.fabs.f32";
 	bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
 	bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
 	bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
@@ -1454,7 +1444,7 @@
 	bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev";
 	bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
 	bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "ceil";
+	bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
 	bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
 	bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem;
@@ -1462,21 +1452,30 @@
 	bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
 	bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
-	bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
-	bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
-	bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
-	bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
+	bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
+	bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
+	bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
+	bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
+	bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
+	bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
+	bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
+	bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
+	bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
+	bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
+	bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
+	bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.AMDGPU.rsq.f64";
+	bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
+	bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
 	bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
 	bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
 	bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
 	bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
 	bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "floor";
+	bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
 	bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
-	bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem;
-	bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction.";
+	bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
 	bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
 	bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
 	bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
@@ -1521,6 +1520,9 @@
 	bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
 	bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
+	bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name =
+		HAVE_LLVM >= 0x0305 ? "llvm.AMDGPU.rsq.clamped.f32" : "llvm.AMDGPU.rsq";
+	bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
 	bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp;
 	bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
@@ -1533,26 +1535,6 @@
 	bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
 	bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
-	bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
-	bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
-	bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args;
-	bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex";
-	bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
-	bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
-	bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args;
-	bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb";
-	bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
-	bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
-	bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
-	bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
-	bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
-	bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
-	bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args;
-	bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl";
-	bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
-	bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
-	bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
-	bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
 	bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc";
 	bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
@@ -1572,13 +1554,6 @@
 	bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
 	bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
 	bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
-
-	bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem;
-#if HAVE_LLVM >= 0x0305
-	bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq.clamped.f32";
-#else
-	bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq";
-#endif
 }
 
 void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
@@ -1628,8 +1603,11 @@
 {
 	LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
 	LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
+	FREE(ctx->arrays);
+	ctx->arrays = NULL;
 	FREE(ctx->temps);
 	ctx->temps = NULL;
+	ctx->temps_count = 0;
 	FREE(ctx->loop);
 	ctx->loop = NULL;
 	ctx->loop_depth_max = 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_uvd.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_uvd.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_uvd.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_uvd.c	2015-09-16 14:36:09.000000000 +0000
@@ -57,6 +57,7 @@
 
 #define FB_BUFFER_OFFSET 0x1000
 #define FB_BUFFER_SIZE 2048
+#define IT_SCALING_TABLE_SIZE 992
 
 /* UVD decoder representation */
 struct ruvd_decoder {
@@ -65,6 +66,7 @@
 	ruvd_set_dtb			set_dtb;
 
 	unsigned			stream_handle;
+	unsigned			stream_type;
 	unsigned			frame_number;
 
 	struct pipe_screen		*screen;
@@ -73,15 +75,18 @@
 
 	unsigned			cur_buffer;
 
-	struct rvid_buffer		msg_fb_buffers[NUM_BUFFERS];
+	struct rvid_buffer		msg_fb_it_buffers[NUM_BUFFERS];
 	struct ruvd_msg			*msg;
 	uint32_t			*fb;
+	uint8_t				*it;
 
 	struct rvid_buffer		bs_buffers[NUM_BUFFERS];
 	void*				bs_ptr;
 	unsigned			bs_size;
 
 	struct rvid_buffer		dpb;
+	bool				use_legacy;
+	struct rvid_buffer		ctx;
 };
 
 /* flush IB to the hardware */
@@ -107,19 +112,34 @@
 
 	reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain,
 					  RADEON_PRIO_MIN);
-	set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
-	set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
+	if (!dec->use_legacy) {
+		uint64_t addr;
+		addr = dec->ws->buffer_get_virtual_address(cs_buf);
+		addr = addr + off;
+		set_reg(dec, RUVD_GPCOM_VCPU_DATA0, addr);
+		set_reg(dec, RUVD_GPCOM_VCPU_DATA1, addr >> 32);
+	} else {
+		set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
+		set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
+	}
 	set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1);
 }
 
-/* map the next available message/feedback buffer */
-static void map_msg_fb_buf(struct ruvd_decoder *dec)
+/* do the codec needs an IT buffer ?*/
+static bool have_it(struct ruvd_decoder *dec)
+{
+	return dec->stream_type == RUVD_CODEC_H264_PERF ||
+		dec->stream_type == RUVD_CODEC_H265;
+}
+
+/* map the next available message/feedback/itscaling buffer */
+static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
 {
 	struct rvid_buffer* buf;
 	uint8_t *ptr;
 
 	/* grab the current message/feedback buffer */
-	buf = &dec->msg_fb_buffers[dec->cur_buffer];
+	buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
 
 	/* and map it for CPU access */
 	ptr = dec->ws->buffer_map(buf->res->cs_buf, dec->cs, PIPE_TRANSFER_WRITE);
@@ -127,6 +147,8 @@
 	/* calc buffer offsets */
 	dec->msg = (struct ruvd_msg *)ptr;
 	dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
+	if (have_it(dec))
+		dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
 }
 
 /* unmap and send a message command to the VCPU */
@@ -139,12 +161,13 @@
 		return;
 
 	/* grab the current message buffer */
-	buf = &dec->msg_fb_buffers[dec->cur_buffer];
+	buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
 
 	/* unmap the buffer */
 	dec->ws->buffer_unmap(buf->res->cs_buf);
 	dec->msg = NULL;
 	dec->fb = NULL;
+	dec->it = NULL;
 
 	/* and send it to the hardware */
 	send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->cs_buf, 0,
@@ -159,11 +182,12 @@
 }
 
 /* convert the profile into something UVD understands */
-static uint32_t profile2stream_type(enum pipe_video_profile profile)
+static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)
 {
-	switch (u_reduce_video_profile(profile)) {
+	switch (u_reduce_video_profile(dec->base.profile)) {
 	case PIPE_VIDEO_FORMAT_MPEG4_AVC:
-		return RUVD_CODEC_H264;
+		return (family >= CHIP_TONGA) ?
+			RUVD_CODEC_H264_PERF : RUVD_CODEC_H264;
 
 	case PIPE_VIDEO_FORMAT_VC1:
 		return RUVD_CODEC_VC1;
@@ -174,23 +198,43 @@
 	case PIPE_VIDEO_FORMAT_MPEG4:
 		return RUVD_CODEC_MPEG4;
 
+	case PIPE_VIDEO_FORMAT_HEVC:
+		return RUVD_CODEC_H265;
+
 	default:
 		assert(0);
 		return 0;
 	}
 }
 
+static unsigned calc_ctx_size(struct ruvd_decoder *dec)
+{
+	unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+	unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
+
+	unsigned max_references = dec->base.max_references + 1;
+
+	if (dec->base.width * dec->base.height >= 4096*2000)
+		max_references = MAX2(max_references, 8);
+	else
+		max_references = MAX2(max_references, 17);
+
+	width = align (width, 16);
+	height = align (height, 16);
+	return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
+}
+
 /* calculate size of reference picture buffer */
-static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
+static unsigned calc_dpb_size(struct ruvd_decoder *dec)
 {
 	unsigned width_in_mb, height_in_mb, image_size, dpb_size;
 
 	// always align them to MB size for dpb calculation
-	unsigned width = align(templ->width, VL_MACROBLOCK_WIDTH);
-	unsigned height = align(templ->height, VL_MACROBLOCK_HEIGHT);
+	unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+	unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
 
 	// always one more for currently decoded picture
-	unsigned max_references = templ->max_references + 1;
+	unsigned max_references = dec->base.max_references + 1;
 
 	// aligned size of a single frame
 	image_size = width * height;
@@ -201,19 +245,67 @@
 	width_in_mb = width / VL_MACROBLOCK_WIDTH;
 	height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
 
-	switch (u_reduce_video_profile(templ->profile)) {
-	case PIPE_VIDEO_FORMAT_MPEG4_AVC:
-		// the firmware seems to allways assume a minimum of ref frames
-		max_references = MAX2(NUM_H264_REFS, max_references);
-
-		// reference picture buffer
-		dpb_size = image_size * max_references;
-
-		// macroblock context buffer
-		dpb_size += width_in_mb * height_in_mb * max_references * 192;
+	switch (u_reduce_video_profile(dec->base.profile)) {
+	case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
+		if (!dec->use_legacy) {
+			unsigned fs_in_mb = width_in_mb * height_in_mb;
+			unsigned alignment = 64, num_dpb_buffer;
+
+			if (dec->stream_type == RUVD_CODEC_H264_PERF)
+				alignment = 256;
+			switch(dec->base.level) {
+			case 30:
+				num_dpb_buffer = 8100 / fs_in_mb;
+				break;
+			case 31:
+				num_dpb_buffer = 18000 / fs_in_mb;
+				break;
+			case 32:
+				num_dpb_buffer = 20480 / fs_in_mb;
+				break;
+			case 41:
+				num_dpb_buffer = 32768 / fs_in_mb;
+				break;
+			case 42:
+				num_dpb_buffer = 34816 / fs_in_mb;
+				break;
+			case 50:
+				num_dpb_buffer = 110400 / fs_in_mb;
+				break;
+			case 51:
+				num_dpb_buffer = 184320 / fs_in_mb;
+				break;
+			default:
+				num_dpb_buffer = 184320 / fs_in_mb;
+				break;
+			}
+			num_dpb_buffer++;
+			max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
+			dpb_size = image_size * max_references;
+			dpb_size += max_references * align(width_in_mb * height_in_mb  * 192, alignment);
+			dpb_size += align(width_in_mb * height_in_mb * 32, alignment);
+		} else {
+			// the firmware seems to allways assume a minimum of ref frames
+			max_references = MAX2(NUM_H264_REFS, max_references);
+			// reference picture buffer
+			dpb_size = image_size * max_references;
+			// macroblock context buffer
+			dpb_size += width_in_mb * height_in_mb * max_references * 192;
+			// IT surface buffer
+			dpb_size += width_in_mb * height_in_mb * 32;
+		}
+		break;
+	}
 
-		// IT surface buffer
-		dpb_size += width_in_mb * height_in_mb * 32;
+	case PIPE_VIDEO_FORMAT_HEVC:
+		if (dec->base.width * dec->base.height >= 4096*2000)
+			max_references = MAX2(max_references, 8);
+		else
+			max_references = MAX2(max_references, 17);
+
+		width = align (width, 16);
+		height = align (height, 16);
+		dpb_size = align((width * height * 3) / 2, 256) * max_references;
 		break;
 
 	case PIPE_VIDEO_FORMAT_VC1:
@@ -250,6 +342,8 @@
 
 		// IT surface buffer
 		dpb_size += align(width_in_mb * height_in_mb * 32, 64);
+
+		dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
 		break;
 
 	default:
@@ -263,6 +357,12 @@
 	return dpb_size;
 }
 
+/* free associated data in the video buffer callback */
+static void ruvd_destroy_associated_data(void *data)
+{
+	/* NOOP, since we only use an intptr */
+}
+
 /* get h264 specific message bits */
 static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)
 {
@@ -286,10 +386,8 @@
 		assert(0);
 		break;
 	}
-	if (((dec->base.width * dec->base.height) >> 8) <= 1620)
-		result.level = 30;
-	else
-		result.level = 41;
+
+	result.level = dec->base.level;
 
 	result.sps_info_flags = 0;
 	result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
@@ -338,6 +436,11 @@
 	memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16);
 	memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64);
 
+	if (dec->stream_type == RUVD_CODEC_H264_PERF) {
+		memcpy(dec->it, result.scaling_list_4x4, 6*16);
+		memcpy((dec->it + 96), result.scaling_list_8x8, 2*64);
+	}
+
 	result.num_ref_frames = pic->num_ref_frames;
 
 	result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
@@ -354,6 +457,151 @@
 	return result;
 }
 
+/* get h265 specific message bits */
+static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target,
+				     struct pipe_h265_picture_desc *pic)
+{
+	struct ruvd_h265 result;
+	unsigned i;
+
+	memset(&result, 0, sizeof(result));
+
+	result.sps_info_flags = 0;
+	result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
+	result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
+	result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
+	result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
+	result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
+	result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
+	result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
+	result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
+	result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
+	if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO)
+		result.sps_info_flags |= 1 << 9;
+
+	result.chroma_format = pic->pps->sps->chroma_format_idc;
+	result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
+	result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
+	result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
+	result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
+	result.log2_min_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_luma_coding_block_size_minus3;
+	result.log2_diff_max_min_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
+	result.log2_min_transform_block_size_minus2 = pic->pps->sps->log2_min_transform_block_size_minus2;
+	result.log2_diff_max_min_transform_block_size = pic->pps->sps->log2_diff_max_min_transform_block_size;
+	result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter;
+	result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra;
+	result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
+	result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
+	result.log2_min_pcm_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
+	result.log2_diff_max_min_pcm_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
+	result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
+
+	result.pps_info_flags = 0;
+	result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
+	result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
+	result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
+	result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
+	result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
+	result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
+	result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
+	result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
+	result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
+	result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
+	result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
+	result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
+	result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
+	result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
+	result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
+	result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
+	result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
+	result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
+	result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
+	result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
+	//result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ???
+
+	result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
+	result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
+	result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
+	result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
+	result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
+	result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
+	result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
+	result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
+	result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
+	result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
+	result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
+	result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
+	result.init_qp_minus26 = pic->pps->init_qp_minus26;
+
+	for (i = 0; i < 19; ++i)
+		result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
+
+	for (i = 0; i < 21; ++i)
+		result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
+
+	result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
+	result.curr_idx = pic->CurrPicOrderCntVal;
+	result.curr_poc = pic->CurrPicOrderCntVal;
+
+	vl_video_buffer_set_associated_data(target, &dec->base,
+					    (void *)(uintptr_t)pic->CurrPicOrderCntVal,
+					    &ruvd_destroy_associated_data);
+
+	for (i = 0; i < 16; ++i) {
+		struct pipe_video_buffer *ref = pic->ref[i];
+		uintptr_t ref_pic = 0;
+
+		result.poc_list[i] = pic->PicOrderCntVal[i];
+
+		if (ref)
+			ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
+		else
+			ref_pic = 0x7F;
+		result.ref_pic_list[i] = ref_pic;
+	}
+
+	for (i = 0; i < 8; ++i) {
+		result.ref_pic_set_st_curr_before[i] = 0xFF;
+		result.ref_pic_set_st_curr_after[i] = 0xFF;
+		result.ref_pic_set_lt_curr[i] = 0xFF;
+	}
+
+	for (i = 0; i < pic->NumPocStCurrBefore; ++i)
+		result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
+
+	for (i = 0; i < pic->NumPocStCurrAfter; ++i)
+		result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
+
+	for (i = 0; i < pic->NumPocLtCurr; ++i)
+		result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
+
+	for (i = 0; i < 6; ++i)
+		result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
+
+	for (i = 0; i < 2; ++i)
+		result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
+
+	memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
+	memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
+	memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
+	memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
+
+	/* TODO
+	result.highestTid;
+	result.isNonRef;
+
+	IDRPicFlag;
+	RAPPicFlag;
+	NumPocTotalCurr;
+	NumShortTermPictureSliceHeaderBits;
+	NumLongTermPictureSliceHeaderBits;
+
+	IsLongTerm[16];
+	*/
+
+	return result;
+}
+
 /* get vc1 specific message bits */
 static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)
 {
@@ -556,7 +804,7 @@
 
 	assert(decoder);
 
-	map_msg_fb_buf(dec);
+	map_msg_fb_it_buf(dec);
 	memset(dec->msg, 0, sizeof(*dec->msg));
 	dec->msg->size = sizeof(*dec->msg);
 	dec->msg->msg_type = RUVD_MSG_DESTROY;
@@ -568,21 +816,17 @@
 	dec->ws->cs_destroy(dec->cs);
 
 	for (i = 0; i < NUM_BUFFERS; ++i) {
-		rvid_destroy_buffer(&dec->msg_fb_buffers[i]);
+		rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
 		rvid_destroy_buffer(&dec->bs_buffers[i]);
 	}
 
 	rvid_destroy_buffer(&dec->dpb);
+	if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC)
+		rvid_destroy_buffer(&dec->ctx);
 
 	FREE(dec);
 }
 
-/* free associated data in the video buffer callback */
-static void ruvd_destroy_associated_data(void *data)
-{
-	/* NOOP, since we only use an intptr */
-}
-
 /**
  * start decoding of a new frame
  */
@@ -670,7 +914,7 @@
 {
 	struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
 	struct radeon_winsys_cs_handle *dt;
-	struct rvid_buffer *msg_fb_buf, *bs_buf;
+	struct rvid_buffer *msg_fb_it_buf, *bs_buf;
 	unsigned bs_size;
 
 	assert(decoder);
@@ -678,26 +922,27 @@
 	if (!dec->bs_ptr)
 		return;
 
-	msg_fb_buf = &dec->msg_fb_buffers[dec->cur_buffer];
+	msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
 	bs_buf = &dec->bs_buffers[dec->cur_buffer];
 
 	bs_size = align(dec->bs_size, 128);
 	memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
 	dec->ws->buffer_unmap(bs_buf->res->cs_buf);
 
-	map_msg_fb_buf(dec);
+	map_msg_fb_it_buf(dec);
 	dec->msg->size = sizeof(*dec->msg);
 	dec->msg->msg_type = RUVD_MSG_DECODE;
 	dec->msg->stream_handle = dec->stream_handle;
 	dec->msg->status_report_feedback_number = dec->frame_number;
 
-	dec->msg->body.decode.stream_type = profile2stream_type(dec->base.profile);
+	dec->msg->body.decode.stream_type = dec->stream_type;
 	dec->msg->body.decode.decode_flags = 0x1;
 	dec->msg->body.decode.width_in_samples = dec->base.width;
 	dec->msg->body.decode.height_in_samples = dec->base.height;
 
 	dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
 	dec->msg->body.decode.bsd_size = bs_size;
+	dec->msg->body.decode.db_pitch = dec->base.width;
 
 	dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
 
@@ -706,6 +951,10 @@
 		dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
 		break;
 
+	case PIPE_VIDEO_FORMAT_HEVC:
+		dec->msg->body.decode.codec.h265 = get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture);
+		break;
+
 	case PIPE_VIDEO_FORMAT_VC1:
 		dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture);
 		break;
@@ -733,12 +982,19 @@
 
 	send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->cs_buf, 0,
 		 RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+	if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
+		send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->cs_buf, 0,
+			RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+	}
 	send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->cs_buf,
 		 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
 	send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,
 		 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
-	send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_buf->res->cs_buf,
+	send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->cs_buf,
 		 FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
+	if (have_it(dec))
+		send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->cs_buf,
+			 FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
 	set_reg(dec, RUVD_ENGINE_CNTL, 1);
 
 	flush(dec);
@@ -760,7 +1016,8 @@
 					     ruvd_set_dtb set_dtb)
 {
 	struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
-	unsigned dpb_size = calc_dpb_size(templ);
+	struct r600_common_context *rctx = (struct r600_common_context*)context;
+	unsigned dpb_size;
 	unsigned width = templ->width, height = templ->height;
 	unsigned bs_buf_size;
 	struct radeon_info info;
@@ -791,6 +1048,9 @@
 	if (!dec)
 		return NULL;
 
+	if (info.drm_major < 3)
+		dec->use_legacy = TRUE;
+
 	dec->base = *templ;
 	dec->base.context = context;
 	dec->base.width = width;
@@ -803,11 +1063,12 @@
 	dec->base.end_frame = ruvd_end_frame;
 	dec->base.flush = ruvd_flush;
 
+	dec->stream_type = profile2stream_type(dec, info.family);
 	dec->set_dtb = set_dtb;
 	dec->stream_handle = rvid_alloc_stream_handle();
 	dec->screen = context->screen;
 	dec->ws = ws;
-	dec->cs = ws->cs_create(ws, RING_UVD, NULL, NULL, NULL);
+	dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL, NULL);
 	if (!dec->cs) {
 		RVID_ERR("Can't get command submission context.\n");
 		goto error;
@@ -815,10 +1076,12 @@
 
 	bs_buf_size = width * height * 512 / (16 * 16);
 	for (i = 0; i < NUM_BUFFERS; ++i) {
-		unsigned msg_fb_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
+		unsigned msg_fb_it_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
 		STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
-		if (!rvid_create_buffer(dec->screen, &dec->msg_fb_buffers[i],
-					msg_fb_size, PIPE_USAGE_STAGING)) {
+		if (have_it(dec))
+			msg_fb_it_size += IT_SCALING_TABLE_SIZE;
+		if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
+					msg_fb_it_size, PIPE_USAGE_STAGING)) {
 			RVID_ERR("Can't allocated message buffers.\n");
 			goto error;
 		}
@@ -829,10 +1092,12 @@
 			goto error;
 		}
 
-		rvid_clear_buffer(context, &dec->msg_fb_buffers[i]);
+		rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
 		rvid_clear_buffer(context, &dec->bs_buffers[i]);
 	}
 
+	dpb_size = calc_dpb_size(dec);
+
 	if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
 		RVID_ERR("Can't allocated dpb.\n");
 		goto error;
@@ -840,14 +1105,23 @@
 
 	rvid_clear_buffer(context, &dec->dpb);
 
-	map_msg_fb_buf(dec);
+	if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC) {
+		unsigned ctx_size = calc_ctx_size(dec);
+		if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
+			RVID_ERR("Can't allocated context buffer.\n");
+			goto error;
+		}
+		rvid_clear_buffer(context, &dec->ctx);
+	}
+
+	map_msg_fb_it_buf(dec);
 	dec->msg->size = sizeof(*dec->msg);
 	dec->msg->msg_type = RUVD_MSG_CREATE;
 	dec->msg->stream_handle = dec->stream_handle;
-	dec->msg->body.create.stream_type = profile2stream_type(dec->base.profile);
+	dec->msg->body.create.stream_type = dec->stream_type;
 	dec->msg->body.create.width_in_samples = dec->base.width;
 	dec->msg->body.create.height_in_samples = dec->base.height;
-	dec->msg->body.create.dpb_size = dec->dpb.res->buf->size;
+	dec->msg->body.create.dpb_size = dpb_size;
 	send_msg_buf(dec);
 	flush(dec);
 	next_buffer(dec);
@@ -858,11 +1132,13 @@
 	if (dec->cs) dec->ws->cs_destroy(dec->cs);
 
 	for (i = 0; i < NUM_BUFFERS; ++i) {
-		rvid_destroy_buffer(&dec->msg_fb_buffers[i]);
+		rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
 		rvid_destroy_buffer(&dec->bs_buffers[i]);
 	}
 
 	rvid_destroy_buffer(&dec->dpb);
+	if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC)
+		rvid_destroy_buffer(&dec->ctx);
 
 	FREE(dec);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_uvd.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_uvd.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_uvd.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_uvd.h	2015-09-16 14:36:09.000000000 +0000
@@ -62,6 +62,8 @@
 #define RUVD_CMD_DECODING_TARGET_BUFFER	0x00000002
 #define RUVD_CMD_FEEDBACK_BUFFER	0x00000003
 #define RUVD_CMD_BITSTREAM_BUFFER	0x00000100
+#define RUVD_CMD_ITSCALING_TABLE_BUFFER	0x00000204
+#define RUVD_CMD_CONTEXT_BUFFER		0x00000206
 
 /* UVD message types */
 #define RUVD_MSG_CREATE		0
@@ -73,6 +75,8 @@
 #define RUVD_CODEC_VC1		0x00000001
 #define RUVD_CODEC_MPEG2	0x00000003
 #define RUVD_CODEC_MPEG4	0x00000004
+#define RUVD_CODEC_H264_PERF	0x00000007
+#define RUVD_CODEC_H265		0x00000010
 
 /* UVD decode target buffer tiling mode */
 #define RUVD_TILE_LINEAR	0x00000000
@@ -171,6 +175,66 @@
 	} mvc;
 };
 
+struct ruvd_h265 {
+	uint32_t	sps_info_flags;
+	uint32_t	pps_info_flags;
+
+	uint8_t		chroma_format;
+	uint8_t		bit_depth_luma_minus8;
+	uint8_t		bit_depth_chroma_minus8;
+	uint8_t		log2_max_pic_order_cnt_lsb_minus4;
+
+	uint8_t		sps_max_dec_pic_buffering_minus1;
+	uint8_t		log2_min_luma_coding_block_size_minus3;
+	uint8_t		log2_diff_max_min_luma_coding_block_size;
+	uint8_t		log2_min_transform_block_size_minus2;
+
+	uint8_t		log2_diff_max_min_transform_block_size;
+	uint8_t		max_transform_hierarchy_depth_inter;
+	uint8_t		max_transform_hierarchy_depth_intra;
+	uint8_t		pcm_sample_bit_depth_luma_minus1;
+
+	uint8_t		pcm_sample_bit_depth_chroma_minus1;
+	uint8_t		log2_min_pcm_luma_coding_block_size_minus3;
+	uint8_t		log2_diff_max_min_pcm_luma_coding_block_size;
+	uint8_t		num_extra_slice_header_bits;
+
+	uint8_t		num_short_term_ref_pic_sets;
+	uint8_t		num_long_term_ref_pic_sps;
+	uint8_t		num_ref_idx_l0_default_active_minus1;
+	uint8_t		num_ref_idx_l1_default_active_minus1;
+
+	int8_t		pps_cb_qp_offset;
+	int8_t		pps_cr_qp_offset;
+	int8_t		pps_beta_offset_div2;
+	int8_t		pps_tc_offset_div2;
+
+	uint8_t		diff_cu_qp_delta_depth;
+	uint8_t		num_tile_columns_minus1;
+	uint8_t		num_tile_rows_minus1;
+	uint8_t		log2_parallel_merge_level_minus2;
+
+	uint16_t	column_width_minus1[19];
+	uint16_t	row_height_minus1[21];
+
+	int8_t		init_qp_minus26;
+	uint8_t		num_delta_pocs_ref_rps_idx;
+	uint8_t		curr_idx;
+	uint8_t		reserved1;
+	int32_t		curr_poc;
+	uint8_t		ref_pic_list[16];
+	int32_t		poc_list[16];
+	uint8_t		ref_pic_set_st_curr_before[8];
+	uint8_t		ref_pic_set_st_curr_after[8];
+	uint8_t		ref_pic_set_lt_curr[8];
+
+	uint8_t		ucScalingListDCCoefSizeID2[6];
+	uint8_t		ucScalingListDCCoefSizeID3[2];
+
+	uint8_t		highestTid;
+	uint8_t		isNonRef;
+};
+
 struct ruvd_vc1 {
 	uint32_t	profile;
 	uint32_t	level;
@@ -327,6 +391,7 @@
 
 			union {
 				struct ruvd_h264	h264;
+				struct ruvd_h265	h265;
 				struct ruvd_vc1		vc1;
 				struct ruvd_mpeg2	mpeg2;
 				struct ruvd_mpeg4	mpeg4;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_vce_40_2_2.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_vce_40_2_2.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_vce_40_2_2.c	2015-09-16 14:36:09.000000000 +0000
@@ -46,32 +46,6 @@
 
 static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
 
-static struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
-{
-	return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
-}
-
-static struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
-{
-	return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
-}
-
-static struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
-{
-	return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
-}
-
-static void frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
-			 unsigned *luma_offset, unsigned *chroma_offset)
-{
-	unsigned pitch = align(enc->luma->level[0].pitch_bytes, 128);
-	unsigned vpitch = align(enc->luma->npix_y, 16);
-	unsigned fsize = pitch * (vpitch + vpitch / 2);
-
-	*luma_offset = slot->index * fsize;
-	*chroma_offset = *luma_offset + pitch * vpitch;
-}
-
 static void session(struct rvce_encoder *enc)
 {
 	RVCE_BEGIN(0x00000001); // session cmd
@@ -79,30 +53,38 @@
 	RVCE_END();
 }
 
-static void task_info(struct rvce_encoder *enc, uint32_t taskOperation)
+static void task_info(struct rvce_encoder *enc, uint32_t op,
+		      uint32_t dep, uint32_t fb_idx, uint32_t ring_idx)
 {
 	RVCE_BEGIN(0x00000002); // task info
+	if (op == 0x3) {
+		if (enc->task_info_idx) {
+			uint32_t offs = enc->cs->cdw - enc->task_info_idx + 3;
+			// Update offsetOfNextTaskInfo
+			enc->cs->buf[enc->task_info_idx] = offs;
+		}
+		enc->task_info_idx = enc->cs->cdw;
+	}
 	RVCE_CS(0xffffffff); // offsetOfNextTaskInfo
-	RVCE_CS(taskOperation); // taskOperation
-	RVCE_CS(0x00000000); // referencePictureDependency
+	RVCE_CS(op); // taskOperation
+	RVCE_CS(dep); // referencePictureDependency
 	RVCE_CS(0x00000000); // collocateFlagDependency
-	RVCE_CS(0x00000000); // feedbackIndex
-	RVCE_CS(0x00000000); // videoBitstreamRingIndex
+	RVCE_CS(fb_idx); // feedbackIndex
+	RVCE_CS(ring_idx); // videoBitstreamRingIndex
 	RVCE_END();
 }
 
 static void feedback(struct rvce_encoder *enc)
 {
 	RVCE_BEGIN(0x05000005); // feedback buffer
-	RVCE_WRITE(enc->fb->res->cs_buf, enc->fb->res->domains); // feedbackRingAddressHi
-	RVCE_CS(0x00000000); // feedbackRingAddressLo
+	RVCE_WRITE(enc->fb->res->cs_buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo
 	RVCE_CS(0x00000001); // feedbackRingSize
 	RVCE_END();
 }
 
 static void create(struct rvce_encoder *enc)
 {
-	task_info(enc, 0x00000000);
+	enc->task_info(enc, 0x00000000, 0, 0, 0);
 
 	RVCE_BEGIN(0x01000001); // create cmd
 	RVCE_CS(0x00000000); // encUseCircularBuffer
@@ -298,21 +280,31 @@
 	RVCE_END();
 }
 
+static void config(struct rvce_encoder *enc)
+{
+	enc->task_info(enc, 0x00000002, 0, 0xffffffff, 0);
+	enc->rate_control(enc);
+	enc->config_extension(enc);
+	enc->motion_estimation(enc);
+	enc->rdo(enc);
+	if (enc->use_vui)
+		enc->vui(enc);
+	enc->pic_control(enc);
+}
+
 static void encode(struct rvce_encoder *enc)
 {
+	signed luma_offset, chroma_offset;
 	int i;
-	unsigned luma_offset, chroma_offset;
 
-	task_info(enc, 0x00000003);
+	enc->task_info(enc, 0x00000003, 0, 0, 0);
 
 	RVCE_BEGIN(0x05000001); // context buffer
-	RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains); // encodeContextAddressHi
-	RVCE_CS(0x00000000); // encodeContextAddressLo
+	RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0x0); // encodeContextAddressHi/Lo
 	RVCE_END();
 
 	RVCE_BEGIN(0x05000004); // video bitstream buffer
-	RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT); // videoBitstreamRingAddressHi
-	RVCE_CS(0x00000000); // videoBitstreamRingAddressLo
+	RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0x0); // videoBitstreamRingAddressHi/Lo
 	RVCE_CS(enc->bs_size); // videoBitstreamRingSize
 	RVCE_END();
 
@@ -324,10 +316,10 @@
 	RVCE_CS(0x00000000); // insertAUD
 	RVCE_CS(0x00000000); // endOfSequence
 	RVCE_CS(0x00000000); // endOfStream
-	RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureLumaAddressHi
-	RVCE_CS(enc->luma->level[0].offset); // inputPictureLumaAddressLo
-	RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureChromaAddressHi
-	RVCE_CS(enc->chroma->level[0].offset); // inputPictureChromaAddressLo
+	RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+		  enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo
+	RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+		  enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo
 	RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
 	RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
 	RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
@@ -369,7 +361,7 @@
 	if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
 	   enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
 		struct rvce_cpb_slot *l0 = l0_slot(enc);
-		frame_offset(enc, l0, &luma_offset, &chroma_offset);
+		rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
 		RVCE_CS(l0->picture_type); // encPicType
 		RVCE_CS(l0->frame_num); // frameNumber
 		RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
@@ -395,7 +387,7 @@
 	RVCE_CS(0x00000000); // pictureStructure
 	if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
 		struct rvce_cpb_slot *l1 = l1_slot(enc);
-		frame_offset(enc, l1, &luma_offset, &chroma_offset);
+		rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
 		RVCE_CS(l1->picture_type); // encPicType
 		RVCE_CS(l1->frame_num); // frameNumber
 		RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
@@ -409,7 +401,7 @@
 		RVCE_CS(0xffffffff); // chromaOffset
 	}
 
-	frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
+	rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
 	RVCE_CS(luma_offset); // encReconstructedLumaOffset
 	RVCE_CS(chroma_offset); // encReconstructedChromaOffset
 	RVCE_CS(0x00000000); // encColocBufferOffset
@@ -430,7 +422,7 @@
 
 static void destroy(struct rvce_encoder *enc)
 {
-	task_info(enc, 0x00000001);
+	enc->task_info(enc, 0x00000001, 0, 0, 0);
 
 	RVCE_BEGIN(0x02000001); // destroy
 	RVCE_END();
@@ -439,6 +431,7 @@
 void radeon_vce_40_2_2_init(struct rvce_encoder *enc)
 {
 	enc->session = session;
+	enc->task_info = task_info;
 	enc->create = create;
 	enc->feedback = feedback;
 	enc->rate_control = rate_control;
@@ -447,6 +440,7 @@
 	enc->motion_estimation = motion_estimation;
 	enc->rdo = rdo;
 	enc->vui = vui;
+	enc->config = config;
 	enc->encode = encode;
 	enc->destroy = destroy;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_vce_50.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_vce_50.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_vce_50.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_vce_50.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,243 @@
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ *      Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#include <stdio.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_video.h"
+#include "util/u_memory.h"
+
+#include "vl/vl_video_buffer.h"
+
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_vce.h"
+
+static void rate_control(struct rvce_encoder *enc)
+{
+	RVCE_BEGIN(0x04000005); // rate control
+	RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod
+	RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate
+	RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate
+	RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum
+	RVCE_CS(0x00000000); // encGOPSize
+	RVCE_CS(enc->pic.quant_i_frames); // encQP_I
+	RVCE_CS(enc->pic.quant_p_frames); // encQP_P
+	RVCE_CS(enc->pic.quant_b_frames); // encQP_B
+	RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize
+	RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen
+	RVCE_CS(0x00000000); // encVBVBufferLevel
+	RVCE_CS(0x00000000); // encMaxAUSize
+	RVCE_CS(0x00000000); // encQPInitialMode
+	RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture
+	RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger
+	RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional
+	RVCE_CS(0x00000000); // encMinQP
+	RVCE_CS(0x00000033); // encMaxQP
+	RVCE_CS(0x00000000); // encSkipFrameEnable
+	RVCE_CS(0x00000000); // encFillerDataEnable
+	RVCE_CS(0x00000000); // encEnforceHRD
+	RVCE_CS(0x00000000); // encBPicsDeltaQP
+	RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP
+	RVCE_CS(0x00000000); // encRateControlReInitDisable
+	RVCE_CS(0x00000000); // encLCVBRInitQPFlag
+	RVCE_CS(0x00000000); // encLCVBRSATDBasedNonlinearBitBudgetFlag
+	RVCE_END();
+}
+
+static void encode(struct rvce_encoder *enc)
+{
+	signed luma_offset, chroma_offset, bs_offset;
+	unsigned dep, bs_idx = enc->bs_idx++;
+	int i;
+
+	if (enc->dual_inst) {
+		if (bs_idx == 0)
+			dep = 1;
+		else if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR)
+			dep = 0;
+		else
+			dep = 2;
+	} else
+		dep = 0;
+
+	enc->task_info(enc, 0x00000003, dep, 0, bs_idx);
+
+	RVCE_BEGIN(0x05000001); // context buffer
+	RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo
+	RVCE_END();
+
+	bs_offset = -(signed)(bs_idx * enc->bs_size);
+
+	RVCE_BEGIN(0x05000004); // video bitstream buffer
+	RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo
+	RVCE_CS(enc->bs_size); // videoBitstreamRingSize
+	RVCE_END();
+
+	if (enc->dual_pipe) {
+		unsigned aux_offset = enc->cpb.res->buf->size -
+			RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
+		RVCE_BEGIN(0x05000002); // auxiliary buffer
+		for (i = 0; i < 8; ++i) {
+			RVCE_CS(aux_offset);
+			aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE;
+		}
+		for (i = 0; i < 8; ++i)
+			RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE);
+		RVCE_END();
+	}
+
+	RVCE_BEGIN(0x03000001); // encode
+	RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders
+	RVCE_CS(0x00000000); // pictureStructure
+	RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize
+	RVCE_CS(0x00000000); // forceRefreshMap
+	RVCE_CS(0x00000000); // insertAUD
+	RVCE_CS(0x00000000); // endOfSequence
+	RVCE_CS(0x00000000); // endOfStream
+	RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+		enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo
+	RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+		enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo
+	RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
+	RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
+	RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
+	if (enc->dual_pipe)
+		RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+	else
+		RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+	RVCE_CS(0x00000000); // encInputPicTileConfig
+	RVCE_CS(enc->pic.picture_type); // encPicType
+	RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag
+	RVCE_CS(0x00000000); // encIdrPicId
+	RVCE_CS(0x00000000); // encMGSKeyPic
+	RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag
+	RVCE_CS(0x00000000); // encTemporalLayerIndex
+	RVCE_CS(0x00000000); // num_ref_idx_active_override_flag
+	RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1
+	RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1
+
+	i = enc->pic.frame_num - enc->pic.ref_idx_l0;
+	if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) {
+		RVCE_CS(0x00000001); // encRefListModificationOp
+		RVCE_CS(i - 1);      // encRefListModificationNum
+	} else {
+		RVCE_CS(0x00000000); // encRefListModificationOp
+		RVCE_CS(0x00000000); // encRefListModificationNum
+	}
+
+	for (i = 0; i < 3; ++i) {
+		RVCE_CS(0x00000000); // encRefListModificationOp
+		RVCE_CS(0x00000000); // encRefListModificationNum
+	}
+	for (i = 0; i < 4; ++i) {
+		RVCE_CS(0x00000000); // encDecodedPictureMarkingOp
+		RVCE_CS(0x00000000); // encDecodedPictureMarkingNum
+		RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx
+		RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp
+		RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum
+	}
+
+	// encReferencePictureL0[0]
+	RVCE_CS(0x00000000); // pictureStructure
+	if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
+	   enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
+		struct rvce_cpb_slot *l0 = l0_slot(enc);
+		rvce_frame_offset(enc, l0, &luma_offset, &chroma_offset);
+		RVCE_CS(l0->picture_type); // encPicType
+		RVCE_CS(l0->frame_num); // frameNumber
+		RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
+		RVCE_CS(luma_offset); // lumaOffset
+		RVCE_CS(chroma_offset); // chromaOffset
+	} else {
+		RVCE_CS(0x00000000); // encPicType
+		RVCE_CS(0x00000000); // frameNumber
+		RVCE_CS(0x00000000); // pictureOrderCount
+		RVCE_CS(0xffffffff); // lumaOffset
+		RVCE_CS(0xffffffff); // chromaOffset
+	}
+
+	// encReferencePictureL0[1]
+	RVCE_CS(0x00000000); // pictureStructure
+	RVCE_CS(0x00000000); // encPicType
+	RVCE_CS(0x00000000); // frameNumber
+	RVCE_CS(0x00000000); // pictureOrderCount
+	RVCE_CS(0xffffffff); // lumaOffset
+	RVCE_CS(0xffffffff); // chromaOffset
+
+	// encReferencePictureL1[0]
+	RVCE_CS(0x00000000); // pictureStructure
+	if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
+		struct rvce_cpb_slot *l1 = l1_slot(enc);
+		rvce_frame_offset(enc, l1, &luma_offset, &chroma_offset);
+		RVCE_CS(l1->picture_type); // encPicType
+		RVCE_CS(l1->frame_num); // frameNumber
+		RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
+		RVCE_CS(luma_offset); // lumaOffset
+		RVCE_CS(chroma_offset); // chromaOffset
+	} else {
+		RVCE_CS(0x00000000); // encPicType
+		RVCE_CS(0x00000000); // frameNumber
+		RVCE_CS(0x00000000); // pictureOrderCount
+		RVCE_CS(0xffffffff); // lumaOffset
+		RVCE_CS(0xffffffff); // chromaOffset
+	}
+
+	rvce_frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
+	RVCE_CS(luma_offset); // encReconstructedLumaOffset
+	RVCE_CS(chroma_offset); // encReconstructedChromaOffset
+	RVCE_CS(0x00000000); // encColocBufferOffset
+	RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset
+	RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset
+	RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset
+	RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset
+	RVCE_CS(0x00000000); // pictureCount
+	RVCE_CS(enc->pic.frame_num); // frameNumber
+	RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount
+	RVCE_CS(0x00000000); // numIPicRemainInRCGOP
+	RVCE_CS(0x00000000); // numPPicRemainInRCGOP
+	RVCE_CS(0x00000000); // numBPicRemainInRCGOP
+	RVCE_CS(0x00000000); // numIRPicRemainInRCGOP
+	RVCE_CS(0x00000000); // enableIntraRefresh
+	RVCE_END();
+}
+
+void radeon_vce_50_init(struct rvce_encoder *enc)
+{
+	radeon_vce_40_2_2_init(enc);
+
+	/* only the two below are different */
+	enc->rate_control = rate_control;
+	enc->encode = encode;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_vce.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_vce.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_vce.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_vce.c	2015-09-16 14:36:09.000000000 +0000
@@ -44,12 +44,20 @@
 #include "radeon_video.h"
 #include "radeon_vce.h"
 
+#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8))
+#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8))
+#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8))
+#define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
+#define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
+
 /**
  * flush commands to the hardware
  */
 static void flush(struct rvce_encoder *enc)
 {
 	enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL, 0);
+	enc->task_info_idx = 0;
+	enc->bs_idx = 0;
 }
 
 #if 0
@@ -183,6 +191,44 @@
 }
 
 /**
+ * Get the slot for the currently encoded frame
+ */
+struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
+{
+	return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
+}
+
+/**
+ * Get the slot for L0
+ */
+struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
+{
+	return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
+}
+
+/**
+ * Get the slot for L1
+ */
+struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
+{
+	return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
+}
+
+/**
+ * Calculate the offsets into the CPB
+ */
+void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
+		       signed *luma_offset, signed *chroma_offset)
+{
+	unsigned pitch = align(enc->luma->level[0].pitch_bytes, 128);
+	unsigned vpitch = align(enc->luma->npix_y, 16);
+	unsigned fsize = pitch * (vpitch + vpitch / 2);
+
+	*luma_offset = slot->index * fsize;
+	*chroma_offset = *luma_offset + pitch * vpitch;
+}
+
+/**
  * destroy this video encoder
  */
 static void rvce_destroy(struct pipe_video_codec *encoder)
@@ -236,24 +282,19 @@
 		enc->fb = &fb;
 		enc->session(enc);
 		enc->create(enc);
-		enc->rate_control(enc);
-		need_rate_control = false;
-		enc->config_extension(enc);
-		enc->motion_estimation(enc);
-		enc->rdo(enc);
-		if (enc->use_vui)
-			enc->vui(enc);
-		enc->pic_control(enc);
+		enc->config(enc);
 		enc->feedback(enc);
 		flush(enc);
 		//dump_feedback(enc, &fb);
 		rvid_destroy_buffer(&fb);
+		need_rate_control = false;
 	}
 
-	enc->session(enc);
-
-	if (need_rate_control)
-		enc->rate_control(enc);
+	if (need_rate_control) {
+		enc->session(enc);
+		enc->config(enc);
+		flush(enc);
+	}
 }
 
 static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
@@ -270,6 +311,8 @@
 		RVID_ERR("Can't create feedback buffer.\n");
 		return;
 	}
+	if (!enc->cs->cdw)
+		enc->session(enc);
 	enc->encode(enc);
 	enc->feedback(enc);
 }
@@ -282,7 +325,8 @@
 	struct rvce_cpb_slot *slot = LIST_ENTRY(
 		struct rvce_cpb_slot, enc->cpb_slots.prev, list);
 
-	flush(enc);
+	if (!enc->dual_inst || enc->bs_idx > 1)
+		flush(enc);
 
 	/* update the CPB backtrack with the just encoded frame */
 	slot->picture_type = enc->pic.picture_type;
@@ -321,6 +365,9 @@
  */
 static void rvce_flush(struct pipe_video_codec *encoder)
 {
+	struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+
+	flush(enc);
 }
 
 static void rvce_cs_flush(void *ctx, unsigned flags,
@@ -335,6 +382,7 @@
 					     rvce_get_buffer get_buffer)
 {
 	struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
+	struct r600_common_context *rctx = (struct r600_common_context*)context;
 	struct rvce_encoder *enc;
 	struct pipe_video_buffer *tmp_buf, templat = {};
 	struct radeon_surf *tmp_surf;
@@ -353,8 +401,17 @@
 	if (!enc)
 		return NULL;
 
+	if (rscreen->info.drm_major == 3)
+		enc->use_vm = true;
 	if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42))
 		enc->use_vui = true;
+	if (rscreen->info.family >= CHIP_TONGA)
+		enc->dual_pipe = true;
+	/* TODO enable B frame with dual instance */
+	if ((rscreen->info.family >= CHIP_TONGA) &&
+		(templ->max_references == 1) &&
+		(rscreen->info.vce_harvest_config == 0))
+		enc->dual_inst = true;
 
 	enc->base = *templ;
 	enc->base.context = context;
@@ -369,7 +426,7 @@
 
 	enc->screen = context->screen;
 	enc->ws = ws;
-	enc->cs = ws->cs_create(ws, RING_VCE, rvce_cs_flush, enc, NULL);
+	enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc, NULL);
 	if (!enc->cs) {
 		RVID_ERR("Can't get command submission context.\n");
 		goto error;
@@ -394,6 +451,9 @@
 	cpb_size = cpb_size * align(tmp_surf->npix_y, 16);
 	cpb_size = cpb_size * 3 / 2;
 	cpb_size = cpb_size * enc->cpb_num;
+	if (enc->dual_pipe)
+		cpb_size +=  RVCE_MAX_AUX_BUFFER_NUM *
+			RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
 	tmp_buf->destroy(tmp_buf);
 	if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
 		RVID_ERR("Can't create CPB buffer.\n");
@@ -406,7 +466,21 @@
 
 	reset_cpb(enc);
 
-	radeon_vce_40_2_2_init(enc);
+	switch (rscreen->info.vce_fw_version) {
+	case FW_40_2_2:
+		radeon_vce_40_2_2_init(enc);
+		break;
+
+	case FW_50_0_1:
+	case FW_50_1_2:
+	case FW_50_10_2:
+	case FW_50_17_3:
+		radeon_vce_50_init(enc);
+		break;
+
+	default:
+		goto error;
+	}
 
 	return &enc->base;
 
@@ -426,5 +500,31 @@
  */
 bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
 {
-	return rscreen->info.vce_fw_version == ((40 << 24) | (2 << 16) | (2 << 8));
+	return rscreen->info.vce_fw_version == FW_40_2_2 ||
+		rscreen->info.vce_fw_version == FW_50_0_1 ||
+		rscreen->info.vce_fw_version == FW_50_1_2 ||
+		rscreen->info.vce_fw_version == FW_50_10_2 ||
+		rscreen->info.vce_fw_version == FW_50_17_3;
+}
+
+/**
+ * Add the buffer as relocation to the current command submission
+ */
+void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *buf,
+                     enum radeon_bo_usage usage, enum radeon_bo_domain domain,
+                     signed offset)
+{
+	int reloc_idx;
+
+	reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_MIN);
+	if (enc->use_vm) {
+		uint64_t addr;
+		addr = enc->ws->buffer_get_virtual_address(buf);
+		addr = addr + offset;
+		RVCE_CS(addr >> 32);
+		RVCE_CS(addr);
+	} else {
+		RVCE_CS(reloc_idx * 4);
+		RVCE_CS(offset);
+	}
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_vce.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_vce.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_vce.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_vce.h	2015-09-16 14:36:09.000000000 +0000
@@ -36,15 +36,16 @@
 
 #include "util/list.h"
 
-#define RVCE_RELOC(buf, usage, domain) (enc->ws->cs_add_reloc(enc->cs, (buf), (usage), domain, RADEON_PRIO_MIN))
-
 #define RVCE_CS(value) (enc->cs->buf[enc->cs->cdw++] = (value))
 #define RVCE_BEGIN(cmd) { uint32_t *begin = &enc->cs->buf[enc->cs->cdw++]; RVCE_CS(cmd)
-#define RVCE_READ(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READ, domain) * 4)
-#define RVCE_WRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_WRITE, domain) * 4)
-#define RVCE_READWRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READWRITE, domain) * 4)
+#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
 #define RVCE_END() *begin = (&enc->cs->buf[enc->cs->cdw] - begin) * 4; }
 
+#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
+#define RVCE_MAX_AUX_BUFFER_NUM 4
+
 struct r600_common_screen;
 
 /* driver dependent callback */
@@ -76,8 +77,12 @@
 	void (*motion_estimation)(struct rvce_encoder *enc);
 	void (*rdo)(struct rvce_encoder *enc);
 	void (*vui)(struct rvce_encoder *enc);
+	void (*config)(struct rvce_encoder *enc);
 	void (*encode)(struct rvce_encoder *enc);
 	void (*destroy)(struct rvce_encoder *enc);
+	void (*task_info)(struct rvce_encoder *enc, uint32_t op,
+			  uint32_t dep, uint32_t fb_idx,
+			  uint32_t ring_idx);
 
 	unsigned			stream_handle;
 
@@ -101,9 +106,23 @@
 	struct rvid_buffer		*fb;
 	struct rvid_buffer		cpb;
 	struct pipe_h264_enc_picture_desc pic;
-	bool use_vui;
+
+	unsigned			task_info_idx;
+	unsigned			bs_idx;
+
+	bool				use_vm;
+	bool				use_vui;
+	bool				dual_pipe;
+	bool				dual_inst;
 };
 
+/* CPB handling functions */
+struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc);
+struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc);
+struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc);
+void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
+		       signed *luma_offset, signed *chroma_offset);
+
 struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
 					     const struct pipe_video_codec *templat,
 					     struct radeon_winsys* ws,
@@ -111,7 +130,14 @@
 
 bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
 
+void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *buf,
+		     enum radeon_bo_usage usage, enum radeon_bo_domain domain,
+		     signed offset);
+
 /* init vce fw 40.2.2 specific callbacks */
 void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
 
+/* init vce fw 50 specific callbacks */
+void radeon_vce_50_init(struct rvce_encoder *enc);
+
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_video.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_video.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_video.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_video.c	2015-09-16 14:36:09.000000000 +0000
@@ -214,9 +214,9 @@
 	        case PIPE_VIDEO_CAP_NPOT_TEXTURES:
         	        return 1;
 	        case PIPE_VIDEO_CAP_MAX_WIDTH:
-        	        return 2048;
+			return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
 	        case PIPE_VIDEO_CAP_MAX_HEIGHT:
-        	        return 1152;
+			return (rscreen->family < CHIP_TONGA) ? 1152 : 2304;
 	        case PIPE_VIDEO_CAP_PREFERED_FORMAT:
         	        return PIPE_FORMAT_NV12;
 	        case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
@@ -225,6 +225,8 @@
         	        return false;
 	        case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
         	        return true;
+	        case PIPE_VIDEO_CAP_STACKED_FRAMES:
+			return (rscreen->family < CHIP_TONGA) ? 1 : 2;
 	        default:
         	        return 0;
 		}
@@ -262,20 +264,28 @@
 			/* FIXME: VC-1 simple/main profile is broken */
 			return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED &&
 			       entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE;
+		case PIPE_VIDEO_FORMAT_HEVC:
+			/* Carrizo only supports HEVC Main */
+			return rscreen->family >= CHIP_CARRIZO &&
+				   profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
 		default:
 			return false;
 		}
 	case PIPE_VIDEO_CAP_NPOT_TEXTURES:
 		return 1;
 	case PIPE_VIDEO_CAP_MAX_WIDTH:
-		return 2048;
+		return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
 	case PIPE_VIDEO_CAP_MAX_HEIGHT:
-		return 1152;
+		return (rscreen->family < CHIP_TONGA) ? 1152 : 2304;
 	case PIPE_VIDEO_CAP_PREFERED_FORMAT:
 		return PIPE_FORMAT_NV12;
 	case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+		if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
+			return false; //The hardware doesn't support interlaced HEVC.
 		return true;
 	case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+		if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
+			return false; //The hardware doesn't support interlaced HEVC.
 		return true;
 	case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
 		return true;
@@ -300,6 +310,8 @@
 		case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
 		case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
 			return 41;
+		case PIPE_VIDEO_PROFILE_HEVC_MAIN:
+			return 186;
 		default:
 			return 0;
 		}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_winsys.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_winsys.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeon/radeon_winsys.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeon/radeon_winsys.h	2015-09-16 14:36:09.000000000 +0000
@@ -42,12 +42,9 @@
 
 #include "pipebuffer/pb_buffer.h"
 
-#define RADEON_MAX_CMDBUF_DWORDS (16 * 1024)
-
 #define RADEON_FLUSH_ASYNC		(1 << 0)
 #define RADEON_FLUSH_KEEP_TILING_FLAGS	(1 << 1) /* needs DRM 2.12.0 */
-#define RADEON_FLUSH_COMPUTE		(1 << 2)
-#define RADEON_FLUSH_END_OF_FRAME       (1 << 3)
+#define RADEON_FLUSH_END_OF_FRAME       (1 << 2)
 
 /* Tiling flags. */
 enum radeon_bo_layout {
@@ -136,6 +133,10 @@
     CHIP_KABINI,
     CHIP_HAWAII,
     CHIP_MULLINS,
+    CHIP_TONGA,
+    CHIP_ICELAND,
+    CHIP_CARRIZO,
+    CHIP_FIJI,
     CHIP_LAST,
 };
 
@@ -150,10 +151,12 @@
     CAYMAN,
     SI,
     CIK,
+    VI,
 };
 
 enum ring_type {
     RING_GFX = 0,
+    RING_COMPUTE,
     RING_DMA,
     RING_UVD,
     RING_VCE,
@@ -169,9 +172,10 @@
     RADEON_NUM_BYTES_MOVED,
     RADEON_VRAM_USAGE,
     RADEON_GTT_USAGE,
-    RADEON_GPU_TEMPERATURE,
+    RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */
     RADEON_CURRENT_SCLK,
-    RADEON_CURRENT_MCLK
+    RADEON_CURRENT_MCLK,
+    RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */
 };
 
 enum radeon_bo_priority {
@@ -192,9 +196,11 @@
 
 struct winsys_handle;
 struct radeon_winsys_cs_handle;
+struct radeon_winsys_ctx;
 
 struct radeon_winsys_cs {
     unsigned                    cdw;  /* Number of used dwords. */
+    unsigned                    max_dw; /* Maximum number of dwords. */
     uint32_t                    *buf; /* The command buffer. */
     enum ring_type              ring_type;
 };
@@ -238,6 +244,7 @@
 
     boolean                     cik_macrotile_mode_array_valid;
     uint32_t                    cik_macrotile_mode_array[16];
+    uint32_t                    vce_harvest_config;
 };
 
 enum radeon_feature_id {
@@ -317,6 +324,8 @@
     struct radeon_surf_level    stencil_level[RADEON_SURF_MAX_LEVEL];
     uint32_t                    tiling_index[RADEON_SURF_MAX_LEVEL];
     uint32_t                    stencil_tiling_index[RADEON_SURF_MAX_LEVEL];
+    uint32_t                    pipe_config;
+    uint32_t                    num_banks;
 };
 
 struct radeon_winsys {
@@ -398,24 +407,15 @@
     void (*buffer_unmap)(struct radeon_winsys_cs_handle *buf);
 
     /**
-     * Return TRUE if a buffer object is being used by the GPU.
-     *
-     * \param buf       A winsys buffer object.
-     * \param usage     Only check whether the buffer is busy for the given usage.
-     */
-    boolean (*buffer_is_busy)(struct pb_buffer *buf,
-                              enum radeon_bo_usage usage);
-
-    /**
-     * Wait for a buffer object until it is not used by a GPU. This is
-     * equivalent to a fence placed after the last command using the buffer,
-     * and synchronizing to the fence.
+     * Wait for the buffer and return true if the buffer is not used
+     * by the device.
      *
-     * \param buf       A winsys buffer object to wait for.
-     * \param usage     Only wait until the buffer is idle for the given usage,
-     *                  but may still be busy for some other usage.
+     * The timeout of 0 will only return the status.
+     * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the buffer
+     * is idle.
      */
-    void (*buffer_wait)(struct pb_buffer *buf, enum radeon_bo_usage usage);
+    bool (*buffer_wait)(struct pb_buffer *buf, uint64_t timeout,
+                        enum radeon_bo_usage usage);
 
     /**
      * Return tiling flags describing a memory layout of a buffer object.
@@ -450,10 +450,11 @@
                               struct radeon_winsys_cs *rcs,
                               enum radeon_bo_layout microtile,
                               enum radeon_bo_layout macrotile,
+                              unsigned pipe_config,
                               unsigned bankw, unsigned bankh,
                               unsigned tile_split,
                               unsigned stencil_tile_split,
-                              unsigned mtilea,
+                              unsigned mtilea, unsigned num_banks,
                               unsigned stride,
                               bool scanout);
 
@@ -515,15 +516,31 @@
      *************************************************************************/
 
     /**
+     * Create a command submission context.
+     * Various command streams can be submitted to the same context.
+     */
+    struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws);
+
+    /**
+     * Destroy a context.
+     */
+    void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
+
+    /**
+     * Query a GPU reset status.
+     */
+    enum pipe_reset_status (*ctx_query_reset_status)(struct radeon_winsys_ctx *ctx);
+
+    /**
      * Create a command stream.
      *
-     * \param ws        The winsys this function is called from.
+     * \param ctx       The submission context
      * \param ring_type The ring type (GFX, DMA, UVD)
      * \param flush     Flush callback function associated with the command stream.
      * \param user      User pointer that will be passed to the flush callback.
      * \param trace_buf Trace buffer when tracing is enabled
      */
-    struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
+    struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys_ctx *ctx,
                                           enum ring_type ring_type,
                                           void (*flush)(void *ctx, unsigned flags,
 							struct pipe_fence_handle **fence),
@@ -668,12 +685,12 @@
 };
 
 
-static INLINE void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
+static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
 {
     cs->buf[cs->cdw++] = value;
 }
 
-static INLINE void radeon_emit_array(struct radeon_winsys_cs *cs,
+static inline void radeon_emit_array(struct radeon_winsys_cs *cs,
 				     const uint32_t *values, unsigned count)
 {
     memcpy(cs->buf+cs->cdw, values, count * 4);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/Automake.inc mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/Automake.inc
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/Automake.inc	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/Automake.inc	2015-09-16 14:36:09.000000000 +0000
@@ -5,10 +5,12 @@
 TARGET_LIB_DEPS += \
 	$(top_builddir)/src/gallium/drivers/radeonsi/libradeonsi.la \
 	$(RADEON_LIBS) \
-	$(LIBDRM_LIBS)
+	$(LIBDRM_LIBS) \
+	$(AMDGPU_LIBS)
 
 TARGET_RADEON_WINSYS = \
-	$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
+	$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
+	$(top_builddir)/src/gallium/winsys/amdgpu/drm/libamdgpuwinsys.la
 
 TARGET_RADEON_COMMON = \
 	$(top_builddir)/src/gallium/drivers/radeon/libradeon.la
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/cik_sdma.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/cik_sdma.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/cik_sdma.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/cik_sdma.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,364 @@
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ * Copyright 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Jerome Glisse
+ */
+
+#include "sid.h"
+#include "si_pipe.h"
+#include "radeon/r600_cs.h"
+
+#include "util/u_format.h"
+
+static uint32_t cik_micro_tile_mode(struct si_screen *sscreen, unsigned tile_mode)
+{
+	if (sscreen->b.info.si_tile_mode_array_valid) {
+		uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode];
+
+		return G_009910_MICRO_TILE_MODE_NEW(gb_tile_mode);
+	}
+
+	/* The kernel cannod return the tile mode array. Guess? */
+	return V_009910_ADDR_SURF_THIN_MICRO_TILING;
+}
+
+static void cik_sdma_do_copy_buffer(struct si_context *ctx,
+				    struct pipe_resource *dst,
+				    struct pipe_resource *src,
+				    uint64_t dst_offset,
+				    uint64_t src_offset,
+				    uint64_t size)
+{
+	struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
+	unsigned i, ncopy, csize;
+	struct r600_resource *rdst = (struct r600_resource*)dst;
+	struct r600_resource *rsrc = (struct r600_resource*)src;
+
+	dst_offset += r600_resource(dst)->gpu_address;
+	src_offset += r600_resource(src)->gpu_address;
+
+	ncopy = (size + CIK_SDMA_COPY_MAX_SIZE - 1) / CIK_SDMA_COPY_MAX_SIZE;
+	r600_need_dma_space(&ctx->b, ncopy * 7);
+
+	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+			      RADEON_PRIO_MIN);
+	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+			      RADEON_PRIO_MIN);
+
+	for (i = 0; i < ncopy; i++) {
+		csize = size < CIK_SDMA_COPY_MAX_SIZE ? size : CIK_SDMA_COPY_MAX_SIZE;
+		cs->buf[cs->cdw++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
+						     CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
+						     0);
+		cs->buf[cs->cdw++] = csize;
+		cs->buf[cs->cdw++] = 0; /* src/dst endian swap */
+		cs->buf[cs->cdw++] = src_offset;
+		cs->buf[cs->cdw++] = src_offset >> 32;
+		cs->buf[cs->cdw++] = dst_offset;
+		cs->buf[cs->cdw++] = dst_offset >> 32;
+		dst_offset += csize;
+		src_offset += csize;
+		size -= csize;
+	}
+}
+
+static void cik_sdma_copy_buffer(struct si_context *ctx,
+				 struct pipe_resource *dst,
+				 struct pipe_resource *src,
+				 uint64_t dst_offset,
+				 uint64_t src_offset,
+				 uint64_t size)
+{
+	struct r600_resource *rdst = (struct r600_resource*)dst;
+
+	/* Mark the buffer range of destination as valid (initialized),
+	 * so that transfer_map knows it should wait for the GPU when mapping
+	 * that range. */
+	util_range_add(&rdst->valid_buffer_range, dst_offset,
+		       dst_offset + size);
+
+	cik_sdma_do_copy_buffer(ctx, dst, src, dst_offset, src_offset, size);
+}
+
+static void cik_sdma_copy_tile(struct si_context *ctx,
+			       struct pipe_resource *dst,
+			       unsigned dst_level,
+			       struct pipe_resource *src,
+			       unsigned src_level,
+			       unsigned y,
+			       unsigned copy_height,
+			       unsigned y_align,
+			       unsigned pitch,
+			       unsigned bpe)
+{
+	struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
+	struct si_screen *sscreen = ctx->screen;
+	struct r600_texture *rsrc = (struct r600_texture*)src;
+	struct r600_texture *rdst = (struct r600_texture*)dst;
+	struct r600_texture *rlinear, *rtiled;
+	unsigned linear_lvl, tiled_lvl;
+	unsigned array_mode, lbpe, pitch_tile_max, slice_tile_max, size;
+	unsigned ncopy, height, cheight, detile, i, src_mode, dst_mode;
+	unsigned sub_op, bank_h, bank_w, mt_aspect, nbanks, tile_split, mt;
+	uint64_t base, addr;
+	unsigned pipe_config, tile_mode_index;
+
+	dst_mode = rdst->surface.level[dst_level].mode;
+	src_mode = rsrc->surface.level[src_level].mode;
+	/* downcast linear aligned to linear to simplify test */
+	src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode;
+	dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode;
+	assert(dst_mode != src_mode);
+	assert(src_mode == RADEON_SURF_MODE_LINEAR || dst_mode == RADEON_SURF_MODE_LINEAR);
+
+	sub_op = CIK_SDMA_COPY_SUB_OPCODE_TILED;
+	lbpe = util_logbase2(bpe);
+	pitch_tile_max = ((pitch / bpe) / 8) - 1;
+
+	detile = dst_mode == RADEON_SURF_MODE_LINEAR;
+	rlinear = detile ? rdst : rsrc;
+	rtiled = detile ? rsrc : rdst;
+	linear_lvl = detile ? dst_level : src_level;
+	tiled_lvl = detile ? src_level : dst_level;
+
+	assert(!util_format_is_depth_and_stencil(rtiled->resource.b.b.format));
+
+	array_mode = si_array_mode(rtiled->surface.level[tiled_lvl].mode);
+	slice_tile_max = (rtiled->surface.level[tiled_lvl].nblk_x *
+			  rtiled->surface.level[tiled_lvl].nblk_y) / (8*8) - 1;
+	height = rlinear->surface.level[linear_lvl].nblk_y;
+	base = rtiled->surface.level[tiled_lvl].offset;
+	addr = rlinear->surface.level[linear_lvl].offset;
+	bank_h = cik_bank_wh(rtiled->surface.bankh);
+	bank_w = cik_bank_wh(rtiled->surface.bankw);
+	mt_aspect = cik_macro_tile_aspect(rtiled->surface.mtilea);
+	tile_split = cik_tile_split(rtiled->surface.tile_split);
+	tile_mode_index = si_tile_mode_index(rtiled, tiled_lvl, false);
+	nbanks = si_num_banks(sscreen, rtiled);
+	base += rtiled->resource.gpu_address;
+	addr += rlinear->resource.gpu_address;
+
+	pipe_config = cik_db_pipe_config(sscreen, tile_mode_index);
+	mt = cik_micro_tile_mode(sscreen, tile_mode_index);
+
+	size = (copy_height * pitch) / 4;
+	cheight = copy_height;
+	if (((cheight * pitch) / 4) > CIK_SDMA_COPY_MAX_SIZE) {
+		cheight = (CIK_SDMA_COPY_MAX_SIZE * 4) / pitch;
+		cheight &= ~(y_align - 1);
+	}
+	ncopy = (copy_height + cheight - 1) / cheight;
+	r600_need_dma_space(&ctx->b, ncopy * 12);
+
+	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
+			      RADEON_USAGE_READ, RADEON_PRIO_MIN);
+	r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
+			      RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+
+	copy_height = size * 4 / pitch;
+	for (i = 0; i < ncopy; i++) {
+		cheight = copy_height;
+		if (((cheight * pitch) / 4) > CIK_SDMA_COPY_MAX_SIZE) {
+			cheight = (CIK_SDMA_COPY_MAX_SIZE * 4) / pitch;
+			cheight &= ~(y_align - 1);
+		}
+		size = (cheight * pitch) / 4;
+
+		cs->buf[cs->cdw++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
+						     sub_op, detile << 15);
+		cs->buf[cs->cdw++] = base;
+		cs->buf[cs->cdw++] = base >> 32;
+		cs->buf[cs->cdw++] = ((height - 1) << 16) | pitch_tile_max;
+		cs->buf[cs->cdw++] = slice_tile_max;
+		cs->buf[cs->cdw++] = (pipe_config << 26) | (mt_aspect << 24) |
+			(nbanks << 21) | (bank_h << 18) | (bank_w << 15) |
+			(tile_split << 11) | (mt << 8) | (array_mode << 3) |
+			lbpe;
+		cs->buf[cs->cdw++] = y << 16; /* | x */
+		cs->buf[cs->cdw++] = 0; /* z */;
+		cs->buf[cs->cdw++] = addr & 0xfffffffc;
+		cs->buf[cs->cdw++] = addr >> 32;
+		cs->buf[cs->cdw++] = (pitch / bpe) - 1;
+		cs->buf[cs->cdw++] = size;
+
+		copy_height -= cheight;
+		y += cheight;
+	}
+}
+
+void cik_sdma_copy(struct pipe_context *ctx,
+		   struct pipe_resource *dst,
+		   unsigned dst_level,
+		   unsigned dstx, unsigned dsty, unsigned dstz,
+		   struct pipe_resource *src,
+		   unsigned src_level,
+		   const struct pipe_box *src_box)
+{
+	struct si_context *sctx = (struct si_context *)ctx;
+	struct r600_texture *rsrc = (struct r600_texture*)src;
+	struct r600_texture *rdst = (struct r600_texture*)dst;
+	unsigned dst_pitch, src_pitch, bpe, dst_mode, src_mode;
+	unsigned src_w, dst_w;
+	unsigned src_x, src_y;
+	unsigned copy_height, y_align;
+	unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
+
+	if (sctx->b.rings.dma.cs == NULL) {
+		goto fallback;
+	}
+
+	if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
+		cik_sdma_copy_buffer(sctx, dst, src, dst_x, src_box->x, src_box->width);
+		return;
+	}
+
+	/* Before re-enabling this, please make sure you can hit all newly
+	 * enabled paths in your testing, preferably with both piglit (in
+	 * particular the streaming-texture-leak test) and real world apps
+	 * (e.g. the UE4 Elemental demo).
+	 */
+	goto fallback;
+
+	if (src->format != dst->format ||
+	    rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 ||
+	    rdst->dirty_level_mask & (1 << dst_level)) {
+		goto fallback;
+	}
+
+	if (rsrc->dirty_level_mask & (1 << src_level)) {
+		if (rsrc->htile_buffer)
+			goto fallback;
+
+		ctx->flush_resource(ctx, src);
+	}
+
+	src_x = util_format_get_nblocksx(src->format, src_box->x);
+	dst_x = util_format_get_nblocksx(src->format, dst_x);
+	src_y = util_format_get_nblocksy(src->format, src_box->y);
+	dst_y = util_format_get_nblocksy(src->format, dst_y);
+
+	dst_pitch = rdst->surface.level[dst_level].pitch_bytes;
+	src_pitch = rsrc->surface.level[src_level].pitch_bytes;
+	src_w = rsrc->surface.level[src_level].npix_x;
+	dst_w = rdst->surface.level[dst_level].npix_x;
+
+	if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w ||
+	    src_box->width != src_w ||
+	    rsrc->surface.level[src_level].nblk_y !=
+	    rdst->surface.level[dst_level].nblk_y) {
+		/* FIXME CIK can do partial blit */
+		goto fallback;
+	}
+
+	bpe = rdst->surface.bpe;
+	copy_height = src_box->height / rsrc->surface.blk_h;
+	dst_mode = rdst->surface.level[dst_level].mode;
+	src_mode = rsrc->surface.level[src_level].mode;
+	/* downcast linear aligned to linear to simplify test */
+	src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode;
+	dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode;
+
+	/* Dimensions must be aligned to (macro)tiles */
+	switch (src_mode == RADEON_SURF_MODE_LINEAR ? dst_mode : src_mode) {
+	case RADEON_SURF_MODE_1D:
+		if ((src_x % 8) || (src_y % 8) || (dst_x % 8) || (dst_y % 8) ||
+		    (copy_height % 8))
+			goto fallback;
+		y_align = 8;
+		break;
+	case RADEON_SURF_MODE_2D: {
+		unsigned mtilew, mtileh, num_banks;
+
+			switch (si_num_banks(sctx->screen, rsrc)) {
+			case V_02803C_ADDR_SURF_2_BANK:
+			default:
+				num_banks = 2;
+				break;
+			case V_02803C_ADDR_SURF_4_BANK:
+				num_banks = 4;
+				break;
+			case V_02803C_ADDR_SURF_8_BANK:
+				num_banks = 8;
+				break;
+			case V_02803C_ADDR_SURF_16_BANK:
+				num_banks = 16;
+				break;
+			}
+
+			mtilew = (8 * rsrc->surface.bankw *
+				  sctx->screen->b.tiling_info.num_channels) *
+				rsrc->surface.mtilea;
+			assert(!(mtilew & (mtilew - 1)));
+			mtileh = (8 * rsrc->surface.bankh * num_banks) /
+				rsrc->surface.mtilea;
+			assert(!(mtileh & (mtileh - 1)));
+
+			if ((src_x & (mtilew - 1)) || (src_y & (mtileh - 1)) ||
+			    (dst_x & (mtilew - 1)) || (dst_y & (mtileh - 1)) ||
+			    (copy_height & (mtileh - 1)))
+				goto fallback;
+
+			y_align = mtileh;
+			break;
+	}
+	default:
+		y_align = 1;
+	}
+
+	if (src_mode == dst_mode) {
+		uint64_t dst_offset, src_offset;
+		unsigned src_h, dst_h;
+
+		src_h = rsrc->surface.level[src_level].npix_y;
+		dst_h = rdst->surface.level[dst_level].npix_y;
+
+		if (src_box->depth > 1 &&
+		    (src_y || dst_y || src_h != dst_h || src_box->height != src_h))
+			goto fallback;
+
+		/* simple dma blit would do NOTE code here assume :
+		 *   dst_pitch == src_pitch
+		 */
+		src_offset= rsrc->surface.level[src_level].offset;
+		src_offset += rsrc->surface.level[src_level].slice_size * src_box->z;
+		src_offset += src_y * src_pitch + src_x * bpe;
+		dst_offset = rdst->surface.level[dst_level].offset;
+		dst_offset += rdst->surface.level[dst_level].slice_size * dst_z;
+		dst_offset += dst_y * dst_pitch + dst_x * bpe;
+		cik_sdma_do_copy_buffer(sctx, dst, src, dst_offset, src_offset,
+					src_box->depth *
+					rsrc->surface.level[src_level].slice_size);
+	} else {
+		if (dst_y != src_y || src_box->depth > 1 || src_box->z || dst_z)
+			goto fallback;
+
+		cik_sdma_copy_tile(sctx, dst, dst_level, src, src_level,
+				   src_y, copy_height, y_align, dst_pitch, bpe);
+	}
+	return;
+
+fallback:
+	si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
+				src, src_level, src_box);
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/Makefile.sources	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/Makefile.sources	2015-09-16 14:36:09.000000000 +0000
@@ -1,7 +1,9 @@
 C_SOURCES := \
+	cik_sdma.c \
 	si_blit.c \
 	si_commands.c \
 	si_compute.c \
+	si_cp_dma.c \
 	si_descriptors.c \
 	sid.h \
 	si_dma.c \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_blit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_blit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_blit.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_blit.c	2015-09-16 14:36:09.000000000 +0000
@@ -57,17 +57,19 @@
 	util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
 	util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader);
 	util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader);
+	util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader);
+	util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader);
 	util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader);
 	util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
 	if (sctx->queued.named.sample_mask) {
 		util_blitter_save_sample_mask(sctx->blitter,
 					      sctx->queued.named.sample_mask->sample_mask);
 	}
-	if (sctx->queued.named.viewport) {
-		util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport->viewport);
+	if (sctx->queued.named.viewport[0]) {
+		util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport);
 	}
-	if (sctx->queued.named.scissor) {
-		util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor->scissor);
+	if (sctx->queued.named.scissor[0]) {
+		util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor[0]->scissor);
 	}
 	util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
 	util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
@@ -146,7 +148,7 @@
 				struct pipe_surface *zsurf, *cbsurf, surf_tmpl;
 
 				sctx->dbcb_copy_sample = sample;
-				sctx->db_render_state.dirty = true;
+				si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
 				surf_tmpl.format = texture->resource.b.b.format;
 				surf_tmpl.u.tex.level = level;
@@ -180,7 +182,7 @@
 
 	sctx->dbcb_depth_copy_enabled = false;
 	sctx->dbcb_stencil_copy_enabled = false;
-	sctx->db_render_state.dirty = true;
+	si_mark_atom_dirty(sctx, &sctx->db_render_state);
 }
 
 static void si_blit_decompress_depth_in_place(struct si_context *sctx,
@@ -192,7 +194,7 @@
 	unsigned layer, max_layer, checked_last_layer, level;
 
 	sctx->db_inplace_flush_enabled = true;
-	sctx->db_render_state.dirty = true;
+	si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
 	surf_tmpl.format = texture->resource.b.b.format;
 
@@ -230,7 +232,7 @@
 	}
 
 	sctx->db_inplace_flush_enabled = false;
-	sctx->db_render_state.dirty = true;
+	si_mark_atom_dirty(sctx, &sctx->db_render_state);
 }
 
 void si_flush_depth_textures(struct si_context *sctx,
@@ -340,6 +342,8 @@
 	if (buffers & PIPE_CLEAR_COLOR) {
 		evergreen_do_fast_color_clear(&sctx->b, fb, &sctx->framebuffer.atom,
 					      &buffers, color);
+		if (!buffers)
+			return; /* all buffers have been fast cleared */
 	}
 
 	if (buffers & PIPE_CLEAR_COLOR) {
@@ -374,9 +378,9 @@
 		}
 
 		zstex->depth_clear_value = depth;
-		sctx->framebuffer.atom.dirty = true; /* updates DB_DEPTH_CLEAR */
+		si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
 		sctx->db_depth_clear = true;
-		sctx->db_render_state.dirty = true;
+		si_mark_atom_dirty(sctx, &sctx->db_render_state);
 	}
 
 	si_blitter_begin(ctx, SI_CLEAR);
@@ -389,7 +393,7 @@
 		sctx->db_depth_clear = false;
 		sctx->db_depth_disable_expclear = false;
 		zstex->depth_cleared = true;
-		sctx->db_render_state.dirty = true;
+		si_mark_atom_dirty(sctx, &sctx->db_render_state);
 	}
 }
 
@@ -455,89 +459,6 @@
 	unsigned npix0_y;
 };
 
-static void si_compressed_to_blittable(struct pipe_resource *tex,
-				       unsigned level,
-				       struct texture_orig_info *orig)
-{
-	struct r600_texture *rtex = (struct r600_texture*)tex;
-	unsigned pixsize = util_format_get_blocksize(rtex->resource.b.b.format);
-	int new_format;
-	int new_height, new_width;
-
-	orig->format = tex->format;
-	orig->width0 = tex->width0;
-	orig->height0 = tex->height0;
-	orig->npix0_x = rtex->surface.level[0].npix_x;
-	orig->npix0_y = rtex->surface.level[0].npix_y;
-	orig->npix_x = rtex->surface.level[level].npix_x;
-	orig->npix_y = rtex->surface.level[level].npix_y;
-
-	if (pixsize == 8)
-		new_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
-	else
-		new_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
-
-	new_width = util_format_get_nblocksx(tex->format, orig->width0);
-	new_height = util_format_get_nblocksy(tex->format, orig->height0);
-
-	tex->width0 = new_width;
-	tex->height0 = new_height;
-	tex->format = new_format;
-	rtex->surface.level[0].npix_x = util_format_get_nblocksx(orig->format, orig->npix0_x);
-	rtex->surface.level[0].npix_y = util_format_get_nblocksy(orig->format, orig->npix0_y);
-	rtex->surface.level[level].npix_x = util_format_get_nblocksx(orig->format, orig->npix_x);
-	rtex->surface.level[level].npix_y = util_format_get_nblocksy(orig->format, orig->npix_y);
-
-	/* By dividing the dimensions by 4, we effectively decrement
-	 * last_level by 2, therefore the last 2 mipmap levels disappear and
-	 * aren't blittable. Note that the last 3 mipmap levels (4x4, 2x2,
-	 * 1x1) have equal slice sizes, which is an important assumption
-	 * for this to work.
-	 *
-	 * In order to make the last 2 mipmap levels blittable, we have to
-	 * add the slice size of the last mipmap level to the texture
-	 * address, so that even though the hw thinks it reads last_level-2,
-	 * it will actually read last_level-1, and if we add the slice size*2,
-	 * it will read last_level. That's how this workaround works.
-	 */
-	if (level > rtex->resource.b.b.last_level-2)
-		rtex->mipmap_shift = level - (rtex->resource.b.b.last_level-2);
-}
-
-static void si_change_format(struct pipe_resource *tex,
-			     unsigned level,
-			     struct texture_orig_info *orig,
-			     enum pipe_format format)
-{
-	struct r600_texture *rtex = (struct r600_texture*)tex;
-
-	orig->format = tex->format;
-	orig->width0 = tex->width0;
-	orig->height0 = tex->height0;
-	orig->npix0_x = rtex->surface.level[0].npix_x;
-	orig->npix0_y = rtex->surface.level[0].npix_y;
-	orig->npix_x = rtex->surface.level[level].npix_x;
-	orig->npix_y = rtex->surface.level[level].npix_y;
-
-	tex->format = format;
-}
-
-static void si_reset_blittable_to_orig(struct pipe_resource *tex,
-				       unsigned level,
-				       struct texture_orig_info *orig)
-{
-	struct r600_texture *rtex = (struct r600_texture*)tex;
-
-	tex->format = orig->format;
-	tex->width0 = orig->width0;
-	tex->height0 = orig->height0;
-	rtex->surface.level[0].npix_x = orig->npix0_x;
-	rtex->surface.level[0].npix_y = orig->npix0_y;
-	rtex->surface.level[level].npix_x = orig->npix_x;
-	rtex->surface.level[level].npix_y = orig->npix_y;
-	rtex->mipmap_shift = 0;
-}
-
 void si_resource_copy_region(struct pipe_context *ctx,
 			     struct pipe_resource *dst,
 			     unsigned dst_level,
@@ -547,114 +468,116 @@
 			     const struct pipe_box *src_box)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct r600_texture *rdst = (struct r600_texture*)dst;
 	struct pipe_surface *dst_view, dst_templ;
 	struct pipe_sampler_view src_templ, *src_view;
-	struct texture_orig_info orig_info[2];
+	unsigned dst_width, dst_height, src_width0, src_height0;
+	unsigned src_force_level = 0;
 	struct pipe_box sbox, dstbox;
-	boolean restore_orig[2];
 
-	/* Fallback for buffers. */
+	/* Handle buffers first. */
 	if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
 		si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, false);
 		return;
 	}
 
-	memset(orig_info, 0, sizeof(orig_info));
+	assert(u_max_sample(dst) == u_max_sample(src));
 
 	/* The driver doesn't decompress resources automatically while
 	 * u_blitter is rendering. */
 	si_decompress_subresource(ctx, src, src_level,
 				  src_box->z, src_box->z + src_box->depth - 1);
 
-	restore_orig[0] = restore_orig[1] = FALSE;
+	dst_width = u_minify(dst->width0, dst_level);
+	dst_height = u_minify(dst->height0, dst_level);
+	src_width0 = src->width0;
+	src_height0 = src->height0;
+
+	util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
+	util_blitter_default_src_texture(&src_templ, src, src_level);
 
 	if (util_format_is_compressed(src->format) &&
 	    util_format_is_compressed(dst->format)) {
-		si_compressed_to_blittable(src, src_level, &orig_info[0]);
-		restore_orig[0] = TRUE;
-		sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x);
-		sbox.y = util_format_get_nblocksy(orig_info[0].format, src_box->y);
+		unsigned blocksize = util_format_get_blocksize(src->format);
+
+		if (blocksize == 8)
+			src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
+		else
+			src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
+		dst_templ.format = src_templ.format;
+
+		dst_width = util_format_get_nblocksx(dst->format, dst_width);
+		dst_height = util_format_get_nblocksy(dst->format, dst_height);
+		src_width0 = util_format_get_nblocksx(src->format, src_width0);
+		src_height0 = util_format_get_nblocksy(src->format, src_height0);
+
+		dstx = util_format_get_nblocksx(dst->format, dstx);
+		dsty = util_format_get_nblocksy(dst->format, dsty);
+
+		sbox.x = util_format_get_nblocksx(src->format, src_box->x);
+		sbox.y = util_format_get_nblocksy(src->format, src_box->y);
 		sbox.z = src_box->z;
-		sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width);
-		sbox.height = util_format_get_nblocksy(orig_info[0].format, src_box->height);
+		sbox.width = util_format_get_nblocksx(src->format, src_box->width);
+		sbox.height = util_format_get_nblocksy(src->format, src_box->height);
 		sbox.depth = src_box->depth;
 		src_box = &sbox;
 
-		si_compressed_to_blittable(dst, dst_level, &orig_info[1]);
-		restore_orig[1] = TRUE;
-		/* translate the dst box as well */
-		dstx = util_format_get_nblocksx(orig_info[1].format, dstx);
-		dsty = util_format_get_nblocksy(orig_info[1].format, dsty);
-	} else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src)) {
+		src_force_level = src_level;
+	} else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src) ||
+		   /* also *8_SNORM has precision issues, use UNORM instead */
+		   util_format_is_snorm(src->format)) {
 		if (util_format_is_subsampled_422(src->format)) {
-			/* XXX untested */
-			si_change_format(src, src_level, &orig_info[0],
-					 PIPE_FORMAT_R8G8B8A8_UINT);
-			si_change_format(dst, dst_level, &orig_info[1],
-					 PIPE_FORMAT_R8G8B8A8_UINT);
+			src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
+			dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
+
+			dst_width = util_format_get_nblocksx(dst->format, dst_width);
+			src_width0 = util_format_get_nblocksx(src->format, src_width0);
+
+			dstx = util_format_get_nblocksx(dst->format, dstx);
 
 			sbox = *src_box;
-			sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x);
-			sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width);
+			sbox.x = util_format_get_nblocksx(src->format, src_box->x);
+			sbox.width = util_format_get_nblocksx(src->format, src_box->width);
 			src_box = &sbox;
-			dstx = util_format_get_nblocksx(orig_info[1].format, dstx);
-
-			restore_orig[0] = TRUE;
-			restore_orig[1] = TRUE;
 		} else {
 			unsigned blocksize = util_format_get_blocksize(src->format);
 
 			switch (blocksize) {
 			case 1:
-				si_change_format(src, src_level, &orig_info[0],
-						PIPE_FORMAT_R8_UNORM);
-				si_change_format(dst, dst_level, &orig_info[1],
-						PIPE_FORMAT_R8_UNORM);
+				dst_templ.format = PIPE_FORMAT_R8_UNORM;
+				src_templ.format = PIPE_FORMAT_R8_UNORM;
 				break;
 			case 2:
-				si_change_format(src, src_level, &orig_info[0],
-						PIPE_FORMAT_R8G8_UNORM);
-				si_change_format(dst, dst_level, &orig_info[1],
-						PIPE_FORMAT_R8G8_UNORM);
+				dst_templ.format = PIPE_FORMAT_R8G8_UNORM;
+				src_templ.format = PIPE_FORMAT_R8G8_UNORM;
 				break;
 			case 4:
-				si_change_format(src, src_level, &orig_info[0],
-						PIPE_FORMAT_R8G8B8A8_UNORM);
-				si_change_format(dst, dst_level, &orig_info[1],
-						PIPE_FORMAT_R8G8B8A8_UNORM);
+				dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
+				src_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
 				break;
 			case 8:
-				si_change_format(src, src_level, &orig_info[0],
-						PIPE_FORMAT_R16G16B16A16_UINT);
-				si_change_format(dst, dst_level, &orig_info[1],
-						PIPE_FORMAT_R16G16B16A16_UINT);
+				dst_templ.format = PIPE_FORMAT_R16G16B16A16_UINT;
+				src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT;
 				break;
 			case 16:
-				si_change_format(src, src_level, &orig_info[0],
-						PIPE_FORMAT_R32G32B32A32_UINT);
-				si_change_format(dst, dst_level, &orig_info[1],
-						PIPE_FORMAT_R32G32B32A32_UINT);
+				dst_templ.format = PIPE_FORMAT_R32G32B32A32_UINT;
+				src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT;
 				break;
 			default:
 				fprintf(stderr, "Unhandled format %s with blocksize %u\n",
 					util_format_short_name(src->format), blocksize);
 				assert(0);
 			}
-			restore_orig[0] = TRUE;
-			restore_orig[1] = TRUE;
 		}
 	}
 
 	/* Initialize the surface. */
-	util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
 	dst_view = r600_create_surface_custom(ctx, dst, &dst_templ,
-					      rdst->surface.level[dst_level].npix_x,
-					      rdst->surface.level[dst_level].npix_y);
+					      dst_width, dst_height);
 
 	/* Initialize the sampler view. */
-	util_blitter_default_src_texture(&src_templ, src, src_level);
-	src_view = ctx->create_sampler_view(ctx, src, &src_templ);
+	src_view = si_create_sampler_view_custom(ctx, src, &src_templ,
+						 src_width0, src_height0,
+						 src_force_level);
 
 	u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height),
 		 abs(src_box->depth), &dstbox);
@@ -662,18 +585,13 @@
 	/* Copy. */
 	si_blitter_begin(ctx, SI_COPY);
 	util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox,
-				  src_view, src_box, src->width0, src->height0,
-				  PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL);
+				  src_view, src_box, src_width0, src_height0,
+				  PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
+				  FALSE);
 	si_blitter_end(ctx);
 
 	pipe_surface_reference(&dst_view, NULL);
 	pipe_sampler_view_reference(&src_view, NULL);
-
-	if (restore_orig[0])
-		si_reset_blittable_to_orig(src, src_level, &orig_info[0]);
-
-	if (restore_orig[1])
-		si_reset_blittable_to_orig(dst, dst_level, &orig_info[1]);
 }
 
 /* For MSAA integer resolving to work, we change the format to NORM using this function. */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_compute.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_compute.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_compute.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_compute.c	2015-09-16 14:36:09.000000000 +0000
@@ -137,14 +137,14 @@
 	}
 #else
 
-	radeon_elf_read(code, header->num_bytes, &program->shader.binary, true);
+	radeon_elf_read(code, header->num_bytes, &program->shader.binary);
 
 	/* init_scratch_buffer patches the shader code with the scratch address,
 	 * so we need to call it before si_shader_binary_read() which uploads
 	 * the shader code to the GPU.
 	 */
 	init_scratch_buffer(sctx, program);
-	si_shader_binary_read(sctx->screen, &program->shader, &program->shader.binary);
+	si_shader_binary_read(sctx->screen, &program->shader);
 
 #endif
 	program->input_buffer =	si_resource_create_custom(sctx->b.b.screen,
@@ -309,8 +309,6 @@
 			kernel_args[i]);
 	}
 
-	sctx->b.ws->buffer_unmap(input_buffer->cs_buf);
-
 	kernel_args_va = input_buffer->gpu_address;
 	kernel_args_va += kernel_args_offset;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_cp_dma.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_cp_dma.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_cp_dma.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_cp_dma.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,265 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Marek Olšák <maraeo@gmail.com>
+ */
+
+#include "si_pipe.h"
+#include "sid.h"
+#include "radeon/r600_cs.h"
+
+
+/* Set this if you want the 3D engine to wait until CP DMA is done.
+ * It should be set on the last CP DMA packet. */
+#define R600_CP_DMA_SYNC	(1 << 0) /* R600+ */
+
+/* Set this if the source data was used as a destination in a previous CP DMA
+ * packet. It's for preventing a read-after-write (RAW) hazard between two
+ * CP DMA packets. */
+#define SI_CP_DMA_RAW_WAIT	(1 << 1) /* SI+ */
+#define CIK_CP_DMA_USE_L2	(1 << 2)
+
+/* Emit a CP DMA packet to do a copy from one buffer to another.
+ * The size must fit in bits [20:0].
+ */
+static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
+				       uint64_t dst_va, uint64_t src_va,
+				       unsigned size, unsigned flags)
+{
+	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
+	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
+	uint32_t sel = flags & CIK_CP_DMA_USE_L2 ?
+			   PKT3_CP_DMA_SRC_SEL(3) | PKT3_CP_DMA_DST_SEL(3) : 0;
+
+	assert(size);
+	assert((size & ((1<<21)-1)) == size);
+
+	if (sctx->b.chip_class >= CIK) {
+		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
+		radeon_emit(cs, sync_flag | sel);	/* CP_SYNC [31] */
+		radeon_emit(cs, src_va);		/* SRC_ADDR_LO [31:0] */
+		radeon_emit(cs, src_va >> 32);		/* SRC_ADDR_HI [31:0] */
+		radeon_emit(cs, dst_va);		/* DST_ADDR_LO [31:0] */
+		radeon_emit(cs, dst_va >> 32);		/* DST_ADDR_HI [31:0] */
+		radeon_emit(cs, size | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
+	} else {
+		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
+		radeon_emit(cs, src_va);			/* SRC_ADDR_LO [31:0] */
+		radeon_emit(cs, sync_flag | ((src_va >> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */
+		radeon_emit(cs, dst_va);			/* DST_ADDR_LO [31:0] */
+		radeon_emit(cs, (dst_va >> 32) & 0xffff);	/* DST_ADDR_HI [15:0] */
+		radeon_emit(cs, size | raw_wait);		/* COMMAND [29:22] | BYTE_COUNT [20:0] */
+	}
+}
+
+/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
+static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
+					uint64_t dst_va, unsigned size,
+					uint32_t clear_value, unsigned flags)
+{
+	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
+	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
+	uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? PKT3_CP_DMA_DST_SEL(3) : 0;
+
+	assert(size);
+	assert((size & ((1<<21)-1)) == size);
+
+	if (sctx->b.chip_class >= CIK) {
+		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
+		radeon_emit(cs, sync_flag | dst_sel | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
+		radeon_emit(cs, clear_value);		/* DATA [31:0] */
+		radeon_emit(cs, 0);
+		radeon_emit(cs, dst_va);		/* DST_ADDR_LO [31:0] */
+		radeon_emit(cs, dst_va >> 32);		/* DST_ADDR_HI [15:0] */
+		radeon_emit(cs, size | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
+	} else {
+		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
+		radeon_emit(cs, clear_value);		/* DATA [31:0] */
+		radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
+		radeon_emit(cs, dst_va);			/* DST_ADDR_LO [31:0] */
+		radeon_emit(cs, (dst_va >> 32) & 0xffff);	/* DST_ADDR_HI [15:0] */
+		radeon_emit(cs, size | raw_wait);		/* COMMAND [29:22] | BYTE_COUNT [20:0] */
+	}
+}
+
+/* The max number of bytes to copy per packet. */
+#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
+
+static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
+			    unsigned offset, unsigned size, unsigned value,
+			    bool is_framebuffer)
+{
+	struct si_context *sctx = (struct si_context*)ctx;
+	unsigned flush_flags, tc_l2_flag;
+
+	if (!size)
+		return;
+
+	/* Mark the buffer range of destination as valid (initialized),
+	 * so that transfer_map knows it should wait for the GPU when mapping
+	 * that range. */
+	util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
+		       offset + size);
+
+	/* Fallback for unaligned clears. */
+	if (offset % 4 != 0 || size % 4 != 0) {
+		uint32_t *map = sctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
+						       sctx->b.rings.gfx.cs,
+						       PIPE_TRANSFER_WRITE);
+		size /= 4;
+		for (unsigned i = 0; i < size; i++)
+			*map++ = value;
+		return;
+	}
+
+	uint64_t va = r600_resource(dst)->gpu_address + offset;
+
+	/* Flush the caches where the resource is bound. */
+	if (is_framebuffer) {
+		flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+		tc_l2_flag = 0;
+	} else {
+		flush_flags = SI_CONTEXT_INV_TC_L1 |
+			      (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
+			      SI_CONTEXT_INV_KCACHE;
+		tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+	}
+
+	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+			 flush_flags;
+
+	while (size) {
+		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+		unsigned dma_flags = tc_l2_flag;
+
+		si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0),
+				 FALSE);
+
+		/* This must be done after need_cs_space. */
+		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+				      (struct r600_resource*)dst, RADEON_USAGE_WRITE,
+				      RADEON_PRIO_MIN);
+
+		/* Flush the caches for the first copy only.
+		 * Also wait for the previous CP DMA operations. */
+		if (sctx->b.flags) {
+			si_emit_cache_flush(&sctx->b, NULL);
+			dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
+		}
+
+		/* Do the synchronization after the last copy, so that all data is written to memory. */
+		if (size == byte_count)
+			dma_flags |= R600_CP_DMA_SYNC;
+
+		/* Emit the clear packet. */
+		si_emit_cp_dma_clear_buffer(sctx, va, byte_count, value, dma_flags);
+
+		size -= byte_count;
+		va += byte_count;
+	}
+
+	/* Flush the caches again in case the 3D engine has been prefetching
+	 * the resource. */
+	sctx->b.flags |= flush_flags;
+
+	if (tc_l2_flag)
+		r600_resource(dst)->TC_L2_dirty = true;
+}
+
+void si_copy_buffer(struct si_context *sctx,
+		    struct pipe_resource *dst, struct pipe_resource *src,
+		    uint64_t dst_offset, uint64_t src_offset, unsigned size,
+		    bool is_framebuffer)
+{
+	unsigned flush_flags, tc_l2_flag;
+
+	if (!size)
+		return;
+
+	/* Mark the buffer range of destination as valid (initialized),
+	 * so that transfer_map knows it should wait for the GPU when mapping
+	 * that range. */
+	util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
+		       dst_offset + size);
+
+	dst_offset += r600_resource(dst)->gpu_address;
+	src_offset += r600_resource(src)->gpu_address;
+
+	/* Flush the caches where the resource is bound. */
+	if (is_framebuffer) {
+		flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+		tc_l2_flag = 0;
+	} else {
+		flush_flags = SI_CONTEXT_INV_TC_L1 |
+			      (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
+			      SI_CONTEXT_INV_KCACHE;
+		tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+	}
+
+	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+			 flush_flags;
+
+	while (size) {
+		unsigned sync_flags = tc_l2_flag;
+		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+
+		si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0), FALSE);
+
+		/* Flush the caches for the first copy only. Also wait for old CP DMA packets to complete. */
+		if (sctx->b.flags) {
+			si_emit_cache_flush(&sctx->b, NULL);
+			sync_flags |= SI_CP_DMA_RAW_WAIT;
+		}
+
+		/* Do the synchronization after the last copy, so that all data is written to memory. */
+		if (size == byte_count) {
+			sync_flags |= R600_CP_DMA_SYNC;
+		}
+
+		/* This must be done after r600_need_cs_space. */
+		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
+				      RADEON_USAGE_READ, RADEON_PRIO_MIN);
+		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
+				      RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+
+		si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
+
+		size -= byte_count;
+		src_offset += byte_count;
+		dst_offset += byte_count;
+	}
+
+	/* Flush the caches again in case the 3D engine has been prefetching
+	 * the resource. */
+	sctx->b.flags |= flush_flags;
+
+	if (tc_l2_flag)
+		r600_resource(dst)->TC_L2_dirty = true;
+}
+
+void si_init_cp_dma_functions(struct si_context *sctx)
+{
+	sctx->b.clear_buffer = si_clear_buffer;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_descriptors.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_descriptors.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_descriptors.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_descriptors.c	2015-09-16 14:36:09.000000000 +0000
@@ -72,81 +72,6 @@
 	 * descriptor */
 };
 
-/* Set this if you want the 3D engine to wait until CP DMA is done.
- * It should be set on the last CP DMA packet. */
-#define R600_CP_DMA_SYNC	(1 << 0) /* R600+ */
-
-/* Set this if the source data was used as a destination in a previous CP DMA
- * packet. It's for preventing a read-after-write (RAW) hazard between two
- * CP DMA packets. */
-#define SI_CP_DMA_RAW_WAIT	(1 << 1) /* SI+ */
-#define CIK_CP_DMA_USE_L2	(1 << 2)
-
-/* Emit a CP DMA packet to do a copy from one buffer to another.
- * The size must fit in bits [20:0].
- */
-static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
-				       uint64_t dst_va, uint64_t src_va,
-				       unsigned size, unsigned flags)
-{
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
-	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
-	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
-	uint32_t sel = flags & CIK_CP_DMA_USE_L2 ?
-			   PKT3_CP_DMA_SRC_SEL(3) | PKT3_CP_DMA_DST_SEL(3) : 0;
-
-	assert(size);
-	assert((size & ((1<<21)-1)) == size);
-
-	if (sctx->b.chip_class >= CIK) {
-		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
-		radeon_emit(cs, sync_flag | sel);	/* CP_SYNC [31] */
-		radeon_emit(cs, src_va);		/* SRC_ADDR_LO [31:0] */
-		radeon_emit(cs, src_va >> 32);		/* SRC_ADDR_HI [31:0] */
-		radeon_emit(cs, dst_va);		/* DST_ADDR_LO [31:0] */
-		radeon_emit(cs, dst_va >> 32);		/* DST_ADDR_HI [31:0] */
-		radeon_emit(cs, size | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
-	} else {
-		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
-		radeon_emit(cs, src_va);			/* SRC_ADDR_LO [31:0] */
-		radeon_emit(cs, sync_flag | ((src_va >> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */
-		radeon_emit(cs, dst_va);			/* DST_ADDR_LO [31:0] */
-		radeon_emit(cs, (dst_va >> 32) & 0xffff);	/* DST_ADDR_HI [15:0] */
-		radeon_emit(cs, size | raw_wait);		/* COMMAND [29:22] | BYTE_COUNT [20:0] */
-	}
-}
-
-/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
-static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
-					uint64_t dst_va, unsigned size,
-					uint32_t clear_value, unsigned flags)
-{
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
-	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
-	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
-	uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? PKT3_CP_DMA_DST_SEL(3) : 0;
-
-	assert(size);
-	assert((size & ((1<<21)-1)) == size);
-
-	if (sctx->b.chip_class >= CIK) {
-		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
-		radeon_emit(cs, sync_flag | dst_sel | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
-		radeon_emit(cs, clear_value);		/* DATA [31:0] */
-		radeon_emit(cs, 0);
-		radeon_emit(cs, dst_va);		/* DST_ADDR_LO [31:0] */
-		radeon_emit(cs, dst_va >> 32);		/* DST_ADDR_HI [15:0] */
-		radeon_emit(cs, size | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
-	} else {
-		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
-		radeon_emit(cs, clear_value);		/* DATA [31:0] */
-		radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
-		radeon_emit(cs, dst_va);			/* DST_ADDR_LO [31:0] */
-		radeon_emit(cs, (dst_va >> 32) & 0xffff);	/* DST_ADDR_HI [15:0] */
-		radeon_emit(cs, size | raw_wait);		/* COMMAND [29:22] | BYTE_COUNT [20:0] */
-	}
-}
-
 static void si_init_descriptors(struct si_descriptors *desc,
 				unsigned shader_userdata_index,
 				unsigned element_dw_size,
@@ -197,7 +122,7 @@
 
 	desc->list_dirty = false;
 	desc->pointer_dirty = true;
-	sctx->shader_userdata.atom.dirty = true;
+	si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
 	return true;
 }
 
@@ -504,7 +429,8 @@
 		desc[0] = va & 0xFFFFFFFF;
 		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
 			  S_008F04_STRIDE(vb->stride);
-		if (vb->stride)
+
+		if (sctx->b.chip_class <= CIK && vb->stride)
 			/* Round up by rounding down and adding 1 */
 			desc[2] = (vb->buffer->width0 - offset -
 				   sctx->vertex_elements->format_size[i]) /
@@ -527,7 +453,7 @@
 	 * uploaded to a fresh new buffer, so I don't think flushing the const
 	 * cache is needed. */
 	desc->pointer_dirty = true;
-	sctx->shader_userdata.atom.dirty = true;
+	si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
 	sctx->vertex_buffers_dirty = false;
 	return true;
 }
@@ -613,7 +539,7 @@
 			struct pipe_resource *buffer,
 			unsigned stride, unsigned num_records,
 			bool add_tid, bool swizzle,
-			unsigned element_size, unsigned index_stride)
+			unsigned element_size, unsigned index_stride, uint64_t offset)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
@@ -630,7 +556,7 @@
 	if (buffer) {
 		uint64_t va;
 
-		va = r600_resource(buffer)->gpu_address;
+		va = r600_resource(buffer)->gpu_address + offset;
 
 		switch (element_size) {
 		default:
@@ -668,6 +594,9 @@
 			break;
 		}
 
+		if (sctx->b.chip_class >= VI && stride)
+			num_records *= stride;
+
 		/* Set the descriptor. */
 		uint32_t *desc = buffers->desc.list + slot*4;
 		desc[0] = va;
@@ -757,7 +686,12 @@
 			struct pipe_resource *buffer = targets[i]->buffer;
 			uint64_t va = r600_resource(buffer)->gpu_address;
 
-			/* Set the descriptor. */
+			/* Set the descriptor.
+			 *
+			 * On VI, the format must be non-INVALID, otherwise
+			 * the buffer will be considered not bound and store
+			 * instructions will be no-ops.
+			 */
 			uint32_t *desc = buffers->desc.list + bufidx*4;
 			desc[0] = va;
 			desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
@@ -765,7 +699,8 @@
 			desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
 				  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
 				  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-				  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+				  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+				  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
 			/* Set the resource. */
 			pipe_resource_reference(&buffers->buffers[bufidx],
@@ -931,163 +866,6 @@
 	}
 }
 
-/* CP DMA */
-
-/* The max number of bytes to copy per packet. */
-#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
-
-static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
-			    unsigned offset, unsigned size, unsigned value,
-			    bool is_framebuffer)
-{
-	struct si_context *sctx = (struct si_context*)ctx;
-	unsigned flush_flags, tc_l2_flag;
-
-	if (!size)
-		return;
-
-	/* Mark the buffer range of destination as valid (initialized),
-	 * so that transfer_map knows it should wait for the GPU when mapping
-	 * that range. */
-	util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
-		       offset + size);
-
-	/* Fallback for unaligned clears. */
-	if (offset % 4 != 0 || size % 4 != 0) {
-		uint32_t *map = sctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
-						       sctx->b.rings.gfx.cs,
-						       PIPE_TRANSFER_WRITE);
-		size /= 4;
-		for (unsigned i = 0; i < size; i++)
-			*map++ = value;
-		return;
-	}
-
-	uint64_t va = r600_resource(dst)->gpu_address + offset;
-
-	/* Flush the caches where the resource is bound. */
-	if (is_framebuffer) {
-		flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
-		tc_l2_flag = 0;
-	} else {
-		flush_flags = SI_CONTEXT_INV_TC_L1 |
-			      (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
-			      SI_CONTEXT_INV_KCACHE;
-		tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
-	}
-
-	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-			 flush_flags;
-
-	while (size) {
-		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
-		unsigned dma_flags = tc_l2_flag;
-
-		si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0),
-				 FALSE);
-
-		/* This must be done after need_cs_space. */
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
-				      (struct r600_resource*)dst, RADEON_USAGE_WRITE,
-				      RADEON_PRIO_MIN);
-
-		/* Flush the caches for the first copy only.
-		 * Also wait for the previous CP DMA operations. */
-		if (sctx->b.flags) {
-			si_emit_cache_flush(&sctx->b, NULL);
-			dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
-		}
-
-		/* Do the synchronization after the last copy, so that all data is written to memory. */
-		if (size == byte_count)
-			dma_flags |= R600_CP_DMA_SYNC;
-
-		/* Emit the clear packet. */
-		si_emit_cp_dma_clear_buffer(sctx, va, byte_count, value, dma_flags);
-
-		size -= byte_count;
-		va += byte_count;
-	}
-
-	/* Flush the caches again in case the 3D engine has been prefetching
-	 * the resource. */
-	sctx->b.flags |= flush_flags;
-
-	if (tc_l2_flag)
-		r600_resource(dst)->TC_L2_dirty = true;
-}
-
-void si_copy_buffer(struct si_context *sctx,
-		    struct pipe_resource *dst, struct pipe_resource *src,
-		    uint64_t dst_offset, uint64_t src_offset, unsigned size,
-		    bool is_framebuffer)
-{
-	unsigned flush_flags, tc_l2_flag;
-
-	if (!size)
-		return;
-
-	/* Mark the buffer range of destination as valid (initialized),
-	 * so that transfer_map knows it should wait for the GPU when mapping
-	 * that range. */
-	util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
-		       dst_offset + size);
-
-	dst_offset += r600_resource(dst)->gpu_address;
-	src_offset += r600_resource(src)->gpu_address;
-
-	/* Flush the caches where the resource is bound. */
-	if (is_framebuffer) {
-		flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
-		tc_l2_flag = 0;
-	} else {
-		flush_flags = SI_CONTEXT_INV_TC_L1 |
-			      (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
-			      SI_CONTEXT_INV_KCACHE;
-		tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
-	}
-
-	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-			 flush_flags;
-
-	while (size) {
-		unsigned sync_flags = tc_l2_flag;
-		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
-
-		si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0), FALSE);
-
-		/* Flush the caches for the first copy only. Also wait for old CP DMA packets to complete. */
-		if (sctx->b.flags) {
-			si_emit_cache_flush(&sctx->b, NULL);
-			sync_flags |= SI_CP_DMA_RAW_WAIT;
-		}
-
-		/* Do the synchronization after the last copy, so that all data is written to memory. */
-		if (size == byte_count) {
-			sync_flags |= R600_CP_DMA_SYNC;
-		}
-
-		/* This must be done after r600_need_cs_space. */
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
-				      RADEON_USAGE_READ, RADEON_PRIO_MIN);
-		r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
-				      RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
-
-		si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
-
-		size -= byte_count;
-		src_offset += byte_count;
-		dst_offset += byte_count;
-	}
-
-	/* Flush the caches again in case the 3D engine has been prefetching
-	 * the resource. */
-	sctx->b.flags |= flush_flags;
-
-	if (tc_l2_flag)
-		r600_resource(dst)->TC_L2_dirty = true;
-}
-
 /* SHADER USER DATA */
 
 static void si_mark_shader_pointers_dirty(struct si_context *sctx,
@@ -1101,7 +879,7 @@
 	if (shader == PIPE_SHADER_VERTEX)
 		sctx->vertex_buffers.pointer_dirty = true;
 
-	sctx->shader_userdata.atom.dirty = true;
+	si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
 }
 
 static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
@@ -1137,13 +915,28 @@
  */
 void si_shader_change_notify(struct si_context *sctx)
 {
-	/* VS can be bound as VS or ES. */
-	if (sctx->gs_shader)
+	/* VS can be bound as VS, ES, or LS. */
+	if (sctx->tes_shader)
+		si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+				      R_00B530_SPI_SHADER_USER_DATA_LS_0);
+	else if (sctx->gs_shader)
 		si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
 				      R_00B330_SPI_SHADER_USER_DATA_ES_0);
 	else
 		si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
 				      R_00B130_SPI_SHADER_USER_DATA_VS_0);
+
+	/* TES can be bound as ES, VS, or not bound. */
+	if (sctx->tes_shader) {
+		if (sctx->gs_shader)
+			si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
+					      R_00B330_SPI_SHADER_USER_DATA_ES_0);
+		else
+			si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
+					      R_00B130_SPI_SHADER_USER_DATA_VS_0);
+	} else {
+		si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
+	}
 }
 
 static void si_emit_shader_pointer(struct si_context *sctx,
@@ -1173,20 +966,33 @@
 	unsigned i;
 	uint32_t *sh_base = sctx->shader_userdata.sh_base;
 
-	/* The VS copy shader needs these for clipping, streamout, and rings. */
 	if (sctx->gs_shader) {
-		unsigned base = R_00B130_SPI_SHADER_USER_DATA_VS_0;
+		/* The VS copy shader needs these for clipping, streamout, and rings. */
+		unsigned vs_base = R_00B130_SPI_SHADER_USER_DATA_VS_0;
 		unsigned i = PIPE_SHADER_VERTEX;
 
-		si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, true);
-		si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, true);
+		si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, vs_base, true);
+		si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, vs_base, true);
+
+		/* The TESSEVAL shader needs this for the ESGS ring buffer. */
+		si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc,
+				       R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
+	} else if (sctx->tes_shader) {
+		/* The TESSEVAL shader needs this for streamout. */
+		si_emit_shader_pointer(sctx, &sctx->rw_buffers[PIPE_SHADER_VERTEX].desc,
+				       R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
 	}
 
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
 		unsigned base = sh_base[i];
 
+		if (!base)
+			continue;
+
+		if (i != PIPE_SHADER_TESS_EVAL)
+			si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, false);
+
 		si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false);
-		si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, false);
 		si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false);
 		si_emit_shader_pointer(sctx, &sctx->samplers[i].states.desc, base, false);
 	}
@@ -1220,7 +1026,6 @@
 	sctx->b.b.set_constant_buffer = si_set_constant_buffer;
 	sctx->b.b.set_sampler_views = si_set_sampler_views;
 	sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
-	sctx->b.clear_buffer = si_clear_buffer;
 	sctx->b.invalidate_buffer = si_invalidate_buffer;
 
 	/* Shader user data. */
@@ -1232,6 +1037,7 @@
 
 	/* Set default and immutable mappings. */
 	si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
+	si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, R_00B430_SPI_SHADER_USER_DATA_HS_0);
 	si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, R_00B230_SPI_SHADER_USER_DATA_GS_0);
 	si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/sid.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/sid.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/sid.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/sid.h	2015-09-16 14:36:09.000000000 +0000
@@ -206,6 +206,398 @@
  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
  */
 
+
+#define R_000E4C_SRBM_STATUS2                                           0x000E4C
+#define   S_000E4C_SDMA_RQ_PENDING(x)                                 (((x) & 0x1) << 0)
+#define   G_000E4C_SDMA_RQ_PENDING(x)                                 (((x) >> 0) & 0x1)
+#define   C_000E4C_SDMA_RQ_PENDING                                    0xFFFFFFFE
+#define   S_000E4C_TST_RQ_PENDING(x)                                  (((x) & 0x1) << 1)
+#define   G_000E4C_TST_RQ_PENDING(x)                                  (((x) >> 1) & 0x1)
+#define   C_000E4C_TST_RQ_PENDING                                     0xFFFFFFFD
+#define   S_000E4C_SDMA1_RQ_PENDING(x)                                (((x) & 0x1) << 2)
+#define   G_000E4C_SDMA1_RQ_PENDING(x)                                (((x) >> 2) & 0x1)
+#define   C_000E4C_SDMA1_RQ_PENDING                                   0xFFFFFFFB
+#define   S_000E4C_VCE0_RQ_PENDING(x)                                 (((x) & 0x1) << 3)
+#define   G_000E4C_VCE0_RQ_PENDING(x)                                 (((x) >> 3) & 0x1)
+#define   C_000E4C_VCE0_RQ_PENDING                                    0xFFFFFFF7
+#define   S_000E4C_VP8_BUSY(x)                                        (((x) & 0x1) << 4)
+#define   G_000E4C_VP8_BUSY(x)                                        (((x) >> 4) & 0x1)
+#define   C_000E4C_VP8_BUSY                                           0xFFFFFFEF
+#define   S_000E4C_SDMA_BUSY(x)                                       (((x) & 0x1) << 5)
+#define   G_000E4C_SDMA_BUSY(x)                                       (((x) >> 5) & 0x1)
+#define   C_000E4C_SDMA_BUSY                                          0xFFFFFFDF
+#define   S_000E4C_SDMA1_BUSY(x)                                      (((x) & 0x1) << 6)
+#define   G_000E4C_SDMA1_BUSY(x)                                      (((x) >> 6) & 0x1)
+#define   C_000E4C_SDMA1_BUSY                                         0xFFFFFFBF
+#define   S_000E4C_VCE0_BUSY(x)                                       (((x) & 0x1) << 7)
+#define   G_000E4C_VCE0_BUSY(x)                                       (((x) >> 7) & 0x1)
+#define   C_000E4C_VCE0_BUSY                                          0xFFFFFF7F
+#define   S_000E4C_XDMA_BUSY(x)                                       (((x) & 0x1) << 8)
+#define   G_000E4C_XDMA_BUSY(x)                                       (((x) >> 8) & 0x1)
+#define   C_000E4C_XDMA_BUSY                                          0xFFFFFEFF
+#define   S_000E4C_CHUB_BUSY(x)                                       (((x) & 0x1) << 9)
+#define   G_000E4C_CHUB_BUSY(x)                                       (((x) >> 9) & 0x1)
+#define   C_000E4C_CHUB_BUSY                                          0xFFFFFDFF
+#define   S_000E4C_SDMA2_BUSY(x)                                      (((x) & 0x1) << 10)
+#define   G_000E4C_SDMA2_BUSY(x)                                      (((x) >> 10) & 0x1)
+#define   C_000E4C_SDMA2_BUSY                                         0xFFFFFBFF
+#define   S_000E4C_SDMA3_BUSY(x)                                      (((x) & 0x1) << 11)
+#define   G_000E4C_SDMA3_BUSY(x)                                      (((x) >> 11) & 0x1)
+#define   C_000E4C_SDMA3_BUSY                                         0xFFFFF7FF
+#define   S_000E4C_SAMSCP_BUSY(x)                                     (((x) & 0x1) << 12)
+#define   G_000E4C_SAMSCP_BUSY(x)                                     (((x) >> 12) & 0x1)
+#define   C_000E4C_SAMSCP_BUSY                                        0xFFFFEFFF
+#define   S_000E4C_ISP_BUSY(x)                                        (((x) & 0x1) << 13)
+#define   G_000E4C_ISP_BUSY(x)                                        (((x) >> 13) & 0x1)
+#define   C_000E4C_ISP_BUSY                                           0xFFFFDFFF
+#define   S_000E4C_VCE1_BUSY(x)                                       (((x) & 0x1) << 14)
+#define   G_000E4C_VCE1_BUSY(x)                                       (((x) >> 14) & 0x1)
+#define   C_000E4C_VCE1_BUSY                                          0xFFFFBFFF
+#define   S_000E4C_ODE_BUSY(x)                                        (((x) & 0x1) << 15)
+#define   G_000E4C_ODE_BUSY(x)                                        (((x) >> 15) & 0x1)
+#define   C_000E4C_ODE_BUSY                                           0xFFFF7FFF
+#define   S_000E4C_SDMA2_RQ_PENDING(x)                                (((x) & 0x1) << 16)
+#define   G_000E4C_SDMA2_RQ_PENDING(x)                                (((x) >> 16) & 0x1)
+#define   C_000E4C_SDMA2_RQ_PENDING                                   0xFFFEFFFF
+#define   S_000E4C_SDMA3_RQ_PENDING(x)                                (((x) & 0x1) << 17)
+#define   G_000E4C_SDMA3_RQ_PENDING(x)                                (((x) >> 17) & 0x1)
+#define   C_000E4C_SDMA3_RQ_PENDING                                   0xFFFDFFFF
+#define   S_000E4C_SAMSCP_RQ_PENDING(x)                               (((x) & 0x1) << 18)
+#define   G_000E4C_SAMSCP_RQ_PENDING(x)                               (((x) >> 18) & 0x1)
+#define   C_000E4C_SAMSCP_RQ_PENDING                                  0xFFFBFFFF
+#define   S_000E4C_ISP_RQ_PENDING(x)                                  (((x) & 0x1) << 19)
+#define   G_000E4C_ISP_RQ_PENDING(x)                                  (((x) >> 19) & 0x1)
+#define   C_000E4C_ISP_RQ_PENDING                                     0xFFF7FFFF
+#define   S_000E4C_VCE1_RQ_PENDING(x)                                 (((x) & 0x1) << 20)
+#define   G_000E4C_VCE1_RQ_PENDING(x)                                 (((x) >> 20) & 0x1)
+#define   C_000E4C_VCE1_RQ_PENDING                                    0xFFEFFFFF
+#define R_000E50_SRBM_STATUS                                            0x000E50
+#define   S_000E50_UVD_RQ_PENDING(x)                                  (((x) & 0x1) << 1)
+#define   G_000E50_UVD_RQ_PENDING(x)                                  (((x) >> 1) & 0x1)
+#define   C_000E50_UVD_RQ_PENDING                                     0xFFFFFFFD
+#define   S_000E50_SAMMSP_RQ_PENDING(x)                               (((x) & 0x1) << 2)
+#define   G_000E50_SAMMSP_RQ_PENDING(x)                               (((x) >> 2) & 0x1)
+#define   C_000E50_SAMMSP_RQ_PENDING                                  0xFFFFFFFB
+#define   S_000E50_ACP_RQ_PENDING(x)                                  (((x) & 0x1) << 3)
+#define   G_000E50_ACP_RQ_PENDING(x)                                  (((x) >> 3) & 0x1)
+#define   C_000E50_ACP_RQ_PENDING                                     0xFFFFFFF7
+#define   S_000E50_SMU_RQ_PENDING(x)                                  (((x) & 0x1) << 4)
+#define   G_000E50_SMU_RQ_PENDING(x)                                  (((x) >> 4) & 0x1)
+#define   C_000E50_SMU_RQ_PENDING                                     0xFFFFFFEF
+#define   S_000E50_GRBM_RQ_PENDING(x)                                 (((x) & 0x1) << 5)
+#define   G_000E50_GRBM_RQ_PENDING(x)                                 (((x) >> 5) & 0x1)
+#define   C_000E50_GRBM_RQ_PENDING                                    0xFFFFFFDF
+#define   S_000E50_HI_RQ_PENDING(x)                                   (((x) & 0x1) << 6)
+#define   G_000E50_HI_RQ_PENDING(x)                                   (((x) >> 6) & 0x1)
+#define   C_000E50_HI_RQ_PENDING                                      0xFFFFFFBF
+#define   S_000E50_VMC_BUSY(x)                                        (((x) & 0x1) << 8)
+#define   G_000E50_VMC_BUSY(x)                                        (((x) >> 8) & 0x1)
+#define   C_000E50_VMC_BUSY                                           0xFFFFFEFF
+#define   S_000E50_MCB_BUSY(x)                                        (((x) & 0x1) << 9)
+#define   G_000E50_MCB_BUSY(x)                                        (((x) >> 9) & 0x1)
+#define   C_000E50_MCB_BUSY                                           0xFFFFFDFF
+#define   S_000E50_MCB_NON_DISPLAY_BUSY(x)                            (((x) & 0x1) << 10)
+#define   G_000E50_MCB_NON_DISPLAY_BUSY(x)                            (((x) >> 10) & 0x1)
+#define   C_000E50_MCB_NON_DISPLAY_BUSY                               0xFFFFFBFF
+#define   S_000E50_MCC_BUSY(x)                                        (((x) & 0x1) << 11)
+#define   G_000E50_MCC_BUSY(x)                                        (((x) >> 11) & 0x1)
+#define   C_000E50_MCC_BUSY                                           0xFFFFF7FF
+#define   S_000E50_MCD_BUSY(x)                                        (((x) & 0x1) << 12)
+#define   G_000E50_MCD_BUSY(x)                                        (((x) >> 12) & 0x1)
+#define   C_000E50_MCD_BUSY                                           0xFFFFEFFF
+#define   S_000E50_VMC1_BUSY(x)                                       (((x) & 0x1) << 13)
+#define   G_000E50_VMC1_BUSY(x)                                       (((x) >> 13) & 0x1)
+#define   C_000E50_VMC1_BUSY                                          0xFFFFDFFF
+#define   S_000E50_SEM_BUSY(x)                                        (((x) & 0x1) << 14)
+#define   G_000E50_SEM_BUSY(x)                                        (((x) >> 14) & 0x1)
+#define   C_000E50_SEM_BUSY                                           0xFFFFBFFF
+#define   S_000E50_ACP_BUSY(x)                                        (((x) & 0x1) << 16)
+#define   G_000E50_ACP_BUSY(x)                                        (((x) >> 16) & 0x1)
+#define   C_000E50_ACP_BUSY                                           0xFFFEFFFF
+#define   S_000E50_IH_BUSY(x)                                         (((x) & 0x1) << 17)
+#define   G_000E50_IH_BUSY(x)                                         (((x) >> 17) & 0x1)
+#define   C_000E50_IH_BUSY                                            0xFFFDFFFF
+#define   S_000E50_UVD_BUSY(x)                                        (((x) & 0x1) << 19)
+#define   G_000E50_UVD_BUSY(x)                                        (((x) >> 19) & 0x1)
+#define   C_000E50_UVD_BUSY                                           0xFFF7FFFF
+#define   S_000E50_SAMMSP_BUSY(x)                                     (((x) & 0x1) << 20)
+#define   G_000E50_SAMMSP_BUSY(x)                                     (((x) >> 20) & 0x1)
+#define   C_000E50_SAMMSP_BUSY                                        0xFFEFFFFF
+#define   S_000E50_GCATCL2_BUSY(x)                                    (((x) & 0x1) << 21)
+#define   G_000E50_GCATCL2_BUSY(x)                                    (((x) >> 21) & 0x1)
+#define   C_000E50_GCATCL2_BUSY                                       0xFFDFFFFF
+#define   S_000E50_OSATCL2_BUSY(x)                                    (((x) & 0x1) << 22)
+#define   G_000E50_OSATCL2_BUSY(x)                                    (((x) >> 22) & 0x1)
+#define   C_000E50_OSATCL2_BUSY                                       0xFFBFFFFF
+#define   S_000E50_BIF_BUSY(x)                                        (((x) & 0x1) << 29)
+#define   G_000E50_BIF_BUSY(x)                                        (((x) >> 29) & 0x1)
+#define   C_000E50_BIF_BUSY                                           0xDFFFFFFF
+#define R_000E54_SRBM_STATUS3                                           0x000E54
+#define   S_000E54_MCC0_BUSY(x)                                       (((x) & 0x1) << 0)
+#define   G_000E54_MCC0_BUSY(x)                                       (((x) >> 0) & 0x1)
+#define   C_000E54_MCC0_BUSY                                          0xFFFFFFFE
+#define   S_000E54_MCC1_BUSY(x)                                       (((x) & 0x1) << 1)
+#define   G_000E54_MCC1_BUSY(x)                                       (((x) >> 1) & 0x1)
+#define   C_000E54_MCC1_BUSY                                          0xFFFFFFFD
+#define   S_000E54_MCC2_BUSY(x)                                       (((x) & 0x1) << 2)
+#define   G_000E54_MCC2_BUSY(x)                                       (((x) >> 2) & 0x1)
+#define   C_000E54_MCC2_BUSY                                          0xFFFFFFFB
+#define   S_000E54_MCC3_BUSY(x)                                       (((x) & 0x1) << 3)
+#define   G_000E54_MCC3_BUSY(x)                                       (((x) >> 3) & 0x1)
+#define   C_000E54_MCC3_BUSY                                          0xFFFFFFF7
+#define   S_000E54_MCC4_BUSY(x)                                       (((x) & 0x1) << 4)
+#define   G_000E54_MCC4_BUSY(x)                                       (((x) >> 4) & 0x1)
+#define   C_000E54_MCC4_BUSY                                          0xFFFFFFEF
+#define   S_000E54_MCC5_BUSY(x)                                       (((x) & 0x1) << 5)
+#define   G_000E54_MCC5_BUSY(x)                                       (((x) >> 5) & 0x1)
+#define   C_000E54_MCC5_BUSY                                          0xFFFFFFDF
+#define   S_000E54_MCC6_BUSY(x)                                       (((x) & 0x1) << 6)
+#define   G_000E54_MCC6_BUSY(x)                                       (((x) >> 6) & 0x1)
+#define   C_000E54_MCC6_BUSY                                          0xFFFFFFBF
+#define   S_000E54_MCC7_BUSY(x)                                       (((x) & 0x1) << 7)
+#define   G_000E54_MCC7_BUSY(x)                                       (((x) >> 7) & 0x1)
+#define   C_000E54_MCC7_BUSY                                          0xFFFFFF7F
+#define   S_000E54_MCD0_BUSY(x)                                       (((x) & 0x1) << 8)
+#define   G_000E54_MCD0_BUSY(x)                                       (((x) >> 8) & 0x1)
+#define   C_000E54_MCD0_BUSY                                          0xFFFFFEFF
+#define   S_000E54_MCD1_BUSY(x)                                       (((x) & 0x1) << 9)
+#define   G_000E54_MCD1_BUSY(x)                                       (((x) >> 9) & 0x1)
+#define   C_000E54_MCD1_BUSY                                          0xFFFFFDFF
+#define   S_000E54_MCD2_BUSY(x)                                       (((x) & 0x1) << 10)
+#define   G_000E54_MCD2_BUSY(x)                                       (((x) >> 10) & 0x1)
+#define   C_000E54_MCD2_BUSY                                          0xFFFFFBFF
+#define   S_000E54_MCD3_BUSY(x)                                       (((x) & 0x1) << 11)
+#define   G_000E54_MCD3_BUSY(x)                                       (((x) >> 11) & 0x1)
+#define   C_000E54_MCD3_BUSY                                          0xFFFFF7FF
+#define   S_000E54_MCD4_BUSY(x)                                       (((x) & 0x1) << 12)
+#define   G_000E54_MCD4_BUSY(x)                                       (((x) >> 12) & 0x1)
+#define   C_000E54_MCD4_BUSY                                          0xFFFFEFFF
+#define   S_000E54_MCD5_BUSY(x)                                       (((x) & 0x1) << 13)
+#define   G_000E54_MCD5_BUSY(x)                                       (((x) >> 13) & 0x1)
+#define   C_000E54_MCD5_BUSY                                          0xFFFFDFFF
+#define   S_000E54_MCD6_BUSY(x)                                       (((x) & 0x1) << 14)
+#define   G_000E54_MCD6_BUSY(x)                                       (((x) >> 14) & 0x1)
+#define   C_000E54_MCD6_BUSY                                          0xFFFFBFFF
+#define   S_000E54_MCD7_BUSY(x)                                       (((x) & 0x1) << 15)
+#define   G_000E54_MCD7_BUSY(x)                                       (((x) >> 15) & 0x1)
+#define   C_000E54_MCD7_BUSY                                          0xFFFF7FFF
+#define R_00D034_SDMA0_STATUS_REG                                       0x00D034
+#define   S_00D034_IDLE(x)                                            (((x) & 0x1) << 0)
+#define   G_00D034_IDLE(x)                                            (((x) >> 0) & 0x1)
+#define   C_00D034_IDLE                                               0xFFFFFFFE
+#define   S_00D034_REG_IDLE(x)                                        (((x) & 0x1) << 1)
+#define   G_00D034_REG_IDLE(x)                                        (((x) >> 1) & 0x1)
+#define   C_00D034_REG_IDLE                                           0xFFFFFFFD
+#define   S_00D034_RB_EMPTY(x)                                        (((x) & 0x1) << 2)
+#define   G_00D034_RB_EMPTY(x)                                        (((x) >> 2) & 0x1)
+#define   C_00D034_RB_EMPTY                                           0xFFFFFFFB
+#define   S_00D034_RB_FULL(x)                                         (((x) & 0x1) << 3)
+#define   G_00D034_RB_FULL(x)                                         (((x) >> 3) & 0x1)
+#define   C_00D034_RB_FULL                                            0xFFFFFFF7
+#define   S_00D034_RB_CMD_IDLE(x)                                     (((x) & 0x1) << 4)
+#define   G_00D034_RB_CMD_IDLE(x)                                     (((x) >> 4) & 0x1)
+#define   C_00D034_RB_CMD_IDLE                                        0xFFFFFFEF
+#define   S_00D034_RB_CMD_FULL(x)                                     (((x) & 0x1) << 5)
+#define   G_00D034_RB_CMD_FULL(x)                                     (((x) >> 5) & 0x1)
+#define   C_00D034_RB_CMD_FULL                                        0xFFFFFFDF
+#define   S_00D034_IB_CMD_IDLE(x)                                     (((x) & 0x1) << 6)
+#define   G_00D034_IB_CMD_IDLE(x)                                     (((x) >> 6) & 0x1)
+#define   C_00D034_IB_CMD_IDLE                                        0xFFFFFFBF
+#define   S_00D034_IB_CMD_FULL(x)                                     (((x) & 0x1) << 7)
+#define   G_00D034_IB_CMD_FULL(x)                                     (((x) >> 7) & 0x1)
+#define   C_00D034_IB_CMD_FULL                                        0xFFFFFF7F
+#define   S_00D034_BLOCK_IDLE(x)                                      (((x) & 0x1) << 8)
+#define   G_00D034_BLOCK_IDLE(x)                                      (((x) >> 8) & 0x1)
+#define   C_00D034_BLOCK_IDLE                                         0xFFFFFEFF
+#define   S_00D034_INSIDE_IB(x)                                       (((x) & 0x1) << 9)
+#define   G_00D034_INSIDE_IB(x)                                       (((x) >> 9) & 0x1)
+#define   C_00D034_INSIDE_IB                                          0xFFFFFDFF
+#define   S_00D034_EX_IDLE(x)                                         (((x) & 0x1) << 10)
+#define   G_00D034_EX_IDLE(x)                                         (((x) >> 10) & 0x1)
+#define   C_00D034_EX_IDLE                                            0xFFFFFBFF
+#define   S_00D034_EX_IDLE_POLL_TIMER_EXPIRE(x)                       (((x) & 0x1) << 11)
+#define   G_00D034_EX_IDLE_POLL_TIMER_EXPIRE(x)                       (((x) >> 11) & 0x1)
+#define   C_00D034_EX_IDLE_POLL_TIMER_EXPIRE                          0xFFFFF7FF
+#define   S_00D034_PACKET_READY(x)                                    (((x) & 0x1) << 12)
+#define   G_00D034_PACKET_READY(x)                                    (((x) >> 12) & 0x1)
+#define   C_00D034_PACKET_READY                                       0xFFFFEFFF
+#define   S_00D034_MC_WR_IDLE(x)                                      (((x) & 0x1) << 13)
+#define   G_00D034_MC_WR_IDLE(x)                                      (((x) >> 13) & 0x1)
+#define   C_00D034_MC_WR_IDLE                                         0xFFFFDFFF
+#define   S_00D034_SRBM_IDLE(x)                                       (((x) & 0x1) << 14)
+#define   G_00D034_SRBM_IDLE(x)                                       (((x) >> 14) & 0x1)
+#define   C_00D034_SRBM_IDLE                                          0xFFFFBFFF
+#define   S_00D034_CONTEXT_EMPTY(x)                                   (((x) & 0x1) << 15)
+#define   G_00D034_CONTEXT_EMPTY(x)                                   (((x) >> 15) & 0x1)
+#define   C_00D034_CONTEXT_EMPTY                                      0xFFFF7FFF
+#define   S_00D034_DELTA_RPTR_FULL(x)                                 (((x) & 0x1) << 16)
+#define   G_00D034_DELTA_RPTR_FULL(x)                                 (((x) >> 16) & 0x1)
+#define   C_00D034_DELTA_RPTR_FULL                                    0xFFFEFFFF
+#define   S_00D034_RB_MC_RREQ_IDLE(x)                                 (((x) & 0x1) << 17)
+#define   G_00D034_RB_MC_RREQ_IDLE(x)                                 (((x) >> 17) & 0x1)
+#define   C_00D034_RB_MC_RREQ_IDLE                                    0xFFFDFFFF
+#define   S_00D034_IB_MC_RREQ_IDLE(x)                                 (((x) & 0x1) << 18)
+#define   G_00D034_IB_MC_RREQ_IDLE(x)                                 (((x) >> 18) & 0x1)
+#define   C_00D034_IB_MC_RREQ_IDLE                                    0xFFFBFFFF
+#define   S_00D034_MC_RD_IDLE(x)                                      (((x) & 0x1) << 19)
+#define   G_00D034_MC_RD_IDLE(x)                                      (((x) >> 19) & 0x1)
+#define   C_00D034_MC_RD_IDLE                                         0xFFF7FFFF
+#define   S_00D034_DELTA_RPTR_EMPTY(x)                                (((x) & 0x1) << 20)
+#define   G_00D034_DELTA_RPTR_EMPTY(x)                                (((x) >> 20) & 0x1)
+#define   C_00D034_DELTA_RPTR_EMPTY                                   0xFFEFFFFF
+#define   S_00D034_MC_RD_RET_STALL(x)                                 (((x) & 0x1) << 21)
+#define   G_00D034_MC_RD_RET_STALL(x)                                 (((x) >> 21) & 0x1)
+#define   C_00D034_MC_RD_RET_STALL                                    0xFFDFFFFF
+#define   S_00D034_MC_RD_NO_POLL_IDLE(x)                              (((x) & 0x1) << 22)
+#define   G_00D034_MC_RD_NO_POLL_IDLE(x)                              (((x) >> 22) & 0x1)
+#define   C_00D034_MC_RD_NO_POLL_IDLE                                 0xFFBFFFFF
+#define   S_00D034_PREV_CMD_IDLE(x)                                   (((x) & 0x1) << 25)
+#define   G_00D034_PREV_CMD_IDLE(x)                                   (((x) >> 25) & 0x1)
+#define   C_00D034_PREV_CMD_IDLE                                      0xFDFFFFFF
+#define   S_00D034_SEM_IDLE(x)                                        (((x) & 0x1) << 26)
+#define   G_00D034_SEM_IDLE(x)                                        (((x) >> 26) & 0x1)
+#define   C_00D034_SEM_IDLE                                           0xFBFFFFFF
+#define   S_00D034_SEM_REQ_STALL(x)                                   (((x) & 0x1) << 27)
+#define   G_00D034_SEM_REQ_STALL(x)                                   (((x) >> 27) & 0x1)
+#define   C_00D034_SEM_REQ_STALL                                      0xF7FFFFFF
+#define   S_00D034_SEM_RESP_STATE(x)                                  (((x) & 0x03) << 28)
+#define   G_00D034_SEM_RESP_STATE(x)                                  (((x) >> 28) & 0x03)
+#define   C_00D034_SEM_RESP_STATE                                     0xCFFFFFFF
+#define   S_00D034_INT_IDLE(x)                                        (((x) & 0x1) << 30)
+#define   G_00D034_INT_IDLE(x)                                        (((x) >> 30) & 0x1)
+#define   C_00D034_INT_IDLE                                           0xBFFFFFFF
+#define   S_00D034_INT_REQ_STALL(x)                                   (((x) & 0x1) << 31)
+#define   G_00D034_INT_REQ_STALL(x)                                   (((x) >> 31) & 0x1)
+#define   C_00D034_INT_REQ_STALL                                      0x7FFFFFFF
+#define R_00D834_SDMA1_STATUS_REG                                       0x00D834
+#define R_008008_GRBM_STATUS2                                           0x008008
+#define   S_008008_ME0PIPE1_CMDFIFO_AVAIL(x)                          (((x) & 0x0F) << 0)
+#define   G_008008_ME0PIPE1_CMDFIFO_AVAIL(x)                          (((x) >> 0) & 0x0F)
+#define   C_008008_ME0PIPE1_CMDFIFO_AVAIL                             0xFFFFFFF0
+#define   S_008008_ME0PIPE1_CF_RQ_PENDING(x)                          (((x) & 0x1) << 4)
+#define   G_008008_ME0PIPE1_CF_RQ_PENDING(x)                          (((x) >> 4) & 0x1)
+#define   C_008008_ME0PIPE1_CF_RQ_PENDING                             0xFFFFFFEF
+#define   S_008008_ME0PIPE1_PF_RQ_PENDING(x)                          (((x) & 0x1) << 5)
+#define   G_008008_ME0PIPE1_PF_RQ_PENDING(x)                          (((x) >> 5) & 0x1)
+#define   C_008008_ME0PIPE1_PF_RQ_PENDING                             0xFFFFFFDF
+#define   S_008008_ME1PIPE0_RQ_PENDING(x)                             (((x) & 0x1) << 6)
+#define   G_008008_ME1PIPE0_RQ_PENDING(x)                             (((x) >> 6) & 0x1)
+#define   C_008008_ME1PIPE0_RQ_PENDING                                0xFFFFFFBF
+#define   S_008008_ME1PIPE1_RQ_PENDING(x)                             (((x) & 0x1) << 7)
+#define   G_008008_ME1PIPE1_RQ_PENDING(x)                             (((x) >> 7) & 0x1)
+#define   C_008008_ME1PIPE1_RQ_PENDING                                0xFFFFFF7F
+#define   S_008008_ME1PIPE2_RQ_PENDING(x)                             (((x) & 0x1) << 8)
+#define   G_008008_ME1PIPE2_RQ_PENDING(x)                             (((x) >> 8) & 0x1)
+#define   C_008008_ME1PIPE2_RQ_PENDING                                0xFFFFFEFF
+#define   S_008008_ME1PIPE3_RQ_PENDING(x)                             (((x) & 0x1) << 9)
+#define   G_008008_ME1PIPE3_RQ_PENDING(x)                             (((x) >> 9) & 0x1)
+#define   C_008008_ME1PIPE3_RQ_PENDING                                0xFFFFFDFF
+#define   S_008008_ME2PIPE0_RQ_PENDING(x)                             (((x) & 0x1) << 10)
+#define   G_008008_ME2PIPE0_RQ_PENDING(x)                             (((x) >> 10) & 0x1)
+#define   C_008008_ME2PIPE0_RQ_PENDING                                0xFFFFFBFF
+#define   S_008008_ME2PIPE1_RQ_PENDING(x)                             (((x) & 0x1) << 11)
+#define   G_008008_ME2PIPE1_RQ_PENDING(x)                             (((x) >> 11) & 0x1)
+#define   C_008008_ME2PIPE1_RQ_PENDING                                0xFFFFF7FF
+#define   S_008008_ME2PIPE2_RQ_PENDING(x)                             (((x) & 0x1) << 12)
+#define   G_008008_ME2PIPE2_RQ_PENDING(x)                             (((x) >> 12) & 0x1)
+#define   C_008008_ME2PIPE2_RQ_PENDING                                0xFFFFEFFF
+#define   S_008008_ME2PIPE3_RQ_PENDING(x)                             (((x) & 0x1) << 13)
+#define   G_008008_ME2PIPE3_RQ_PENDING(x)                             (((x) >> 13) & 0x1)
+#define   C_008008_ME2PIPE3_RQ_PENDING                                0xFFFFDFFF
+#define   S_008008_RLC_RQ_PENDING(x)                                  (((x) & 0x1) << 14)
+#define   G_008008_RLC_RQ_PENDING(x)                                  (((x) >> 14) & 0x1)
+#define   C_008008_RLC_RQ_PENDING                                     0xFFFFBFFF
+#define   S_008008_RLC_BUSY(x)                                        (((x) & 0x1) << 24)
+#define   G_008008_RLC_BUSY(x)                                        (((x) >> 24) & 0x1)
+#define   C_008008_RLC_BUSY                                           0xFEFFFFFF
+#define   S_008008_TC_BUSY(x)                                         (((x) & 0x1) << 25)
+#define   G_008008_TC_BUSY(x)                                         (((x) >> 25) & 0x1)
+#define   C_008008_TC_BUSY                                            0xFDFFFFFF
+#define   S_008008_TCC_CC_RESIDENT(x)                                 (((x) & 0x1) << 26)
+#define   G_008008_TCC_CC_RESIDENT(x)                                 (((x) >> 26) & 0x1)
+#define   C_008008_TCC_CC_RESIDENT                                    0xFBFFFFFF
+#define   S_008008_CPF_BUSY(x)                                        (((x) & 0x1) << 28)
+#define   G_008008_CPF_BUSY(x)                                        (((x) >> 28) & 0x1)
+#define   C_008008_CPF_BUSY                                           0xEFFFFFFF
+#define   S_008008_CPC_BUSY(x)                                        (((x) & 0x1) << 29)
+#define   G_008008_CPC_BUSY(x)                                        (((x) >> 29) & 0x1)
+#define   C_008008_CPC_BUSY                                           0xDFFFFFFF
+#define   S_008008_CPG_BUSY(x)                                        (((x) & 0x1) << 30)
+#define   G_008008_CPG_BUSY(x)                                        (((x) >> 30) & 0x1)
+#define   C_008008_CPG_BUSY                                           0xBFFFFFFF
+#define R_008010_GRBM_STATUS                                            0x008010
+#define   S_008010_ME0PIPE0_CMDFIFO_AVAIL(x)                          (((x) & 0x0F) << 0)
+#define   G_008010_ME0PIPE0_CMDFIFO_AVAIL(x)                          (((x) >> 0) & 0x0F)
+#define   C_008010_ME0PIPE0_CMDFIFO_AVAIL                             0xFFFFFFF0
+#define   S_008010_SRBM_RQ_PENDING(x)                                 (((x) & 0x1) << 5)
+#define   G_008010_SRBM_RQ_PENDING(x)                                 (((x) >> 5) & 0x1)
+#define   C_008010_SRBM_RQ_PENDING                                    0xFFFFFFDF
+#define   S_008010_ME0PIPE0_CF_RQ_PENDING(x)                          (((x) & 0x1) << 7)
+#define   G_008010_ME0PIPE0_CF_RQ_PENDING(x)                          (((x) >> 7) & 0x1)
+#define   C_008010_ME0PIPE0_CF_RQ_PENDING                             0xFFFFFF7F
+#define   S_008010_ME0PIPE0_PF_RQ_PENDING(x)                          (((x) & 0x1) << 8)
+#define   G_008010_ME0PIPE0_PF_RQ_PENDING(x)                          (((x) >> 8) & 0x1)
+#define   C_008010_ME0PIPE0_PF_RQ_PENDING                             0xFFFFFEFF
+#define   S_008010_GDS_DMA_RQ_PENDING(x)                              (((x) & 0x1) << 9)
+#define   G_008010_GDS_DMA_RQ_PENDING(x)                              (((x) >> 9) & 0x1)
+#define   C_008010_GDS_DMA_RQ_PENDING                                 0xFFFFFDFF
+#define   S_008010_DB_CLEAN(x)                                        (((x) & 0x1) << 12)
+#define   G_008010_DB_CLEAN(x)                                        (((x) >> 12) & 0x1)
+#define   C_008010_DB_CLEAN                                           0xFFFFEFFF
+#define   S_008010_CB_CLEAN(x)                                        (((x) & 0x1) << 13)
+#define   G_008010_CB_CLEAN(x)                                        (((x) >> 13) & 0x1)
+#define   C_008010_CB_CLEAN                                           0xFFFFDFFF
+#define   S_008010_TA_BUSY(x)                                         (((x) & 0x1) << 14)
+#define   G_008010_TA_BUSY(x)                                         (((x) >> 14) & 0x1)
+#define   C_008010_TA_BUSY                                            0xFFFFBFFF
+#define   S_008010_GDS_BUSY(x)                                        (((x) & 0x1) << 15)
+#define   G_008010_GDS_BUSY(x)                                        (((x) >> 15) & 0x1)
+#define   C_008010_GDS_BUSY                                           0xFFFF7FFF
+#define   S_008010_WD_BUSY_NO_DMA(x)                                  (((x) & 0x1) << 16)
+#define   G_008010_WD_BUSY_NO_DMA(x)                                  (((x) >> 16) & 0x1)
+#define   C_008010_WD_BUSY_NO_DMA                                     0xFFFEFFFF
+#define   S_008010_VGT_BUSY(x)                                        (((x) & 0x1) << 17)
+#define   G_008010_VGT_BUSY(x)                                        (((x) >> 17) & 0x1)
+#define   C_008010_VGT_BUSY                                           0xFFFDFFFF
+#define   S_008010_IA_BUSY_NO_DMA(x)                                  (((x) & 0x1) << 18)
+#define   G_008010_IA_BUSY_NO_DMA(x)                                  (((x) >> 18) & 0x1)
+#define   C_008010_IA_BUSY_NO_DMA                                     0xFFFBFFFF
+#define   S_008010_IA_BUSY(x)                                         (((x) & 0x1) << 19)
+#define   G_008010_IA_BUSY(x)                                         (((x) >> 19) & 0x1)
+#define   C_008010_IA_BUSY                                            0xFFF7FFFF
+#define   S_008010_SX_BUSY(x)                                         (((x) & 0x1) << 20)
+#define   G_008010_SX_BUSY(x)                                         (((x) >> 20) & 0x1)
+#define   C_008010_SX_BUSY                                            0xFFEFFFFF
+#define   S_008010_WD_BUSY(x)                                         (((x) & 0x1) << 21)
+#define   G_008010_WD_BUSY(x)                                         (((x) >> 21) & 0x1)
+#define   C_008010_WD_BUSY                                            0xFFDFFFFF
+#define   S_008010_SPI_BUSY(x)                                        (((x) & 0x1) << 22)
+#define   G_008010_SPI_BUSY(x)                                        (((x) >> 22) & 0x1)
+#define   C_008010_SPI_BUSY                                           0xFFBFFFFF
+#define   S_008010_BCI_BUSY(x)                                        (((x) & 0x1) << 23)
+#define   G_008010_BCI_BUSY(x)                                        (((x) >> 23) & 0x1)
+#define   C_008010_BCI_BUSY                                           0xFF7FFFFF
+#define   S_008010_SC_BUSY(x)                                         (((x) & 0x1) << 24)
+#define   G_008010_SC_BUSY(x)                                         (((x) >> 24) & 0x1)
+#define   C_008010_SC_BUSY                                            0xFEFFFFFF
+#define   S_008010_PA_BUSY(x)                                         (((x) & 0x1) << 25)
+#define   G_008010_PA_BUSY(x)                                         (((x) >> 25) & 0x1)
+#define   C_008010_PA_BUSY                                            0xFDFFFFFF
+#define   S_008010_DB_BUSY(x)                                         (((x) & 0x1) << 26)
+#define   G_008010_DB_BUSY(x)                                         (((x) >> 26) & 0x1)
+#define   C_008010_DB_BUSY                                            0xFBFFFFFF
+#define   S_008010_CP_COHERENCY_BUSY(x)                               (((x) & 0x1) << 28)
+#define   G_008010_CP_COHERENCY_BUSY(x)                               (((x) >> 28) & 0x1)
+#define   C_008010_CP_COHERENCY_BUSY                                  0xEFFFFFFF
+#define   S_008010_CP_BUSY(x)                                         (((x) & 0x1) << 29)
+#define   G_008010_CP_BUSY(x)                                         (((x) >> 29) & 0x1)
+#define   C_008010_CP_BUSY                                            0xDFFFFFFF
+#define   S_008010_CB_BUSY(x)                                         (((x) & 0x1) << 30)
+#define   G_008010_CB_BUSY(x)                                         (((x) >> 30) & 0x1)
+#define   C_008010_CB_BUSY                                            0xBFFFFFFF
+#define   S_008010_GUI_ACTIVE(x)                                      (((x) & 0x1) << 31)
+#define   G_008010_GUI_ACTIVE(x)                                      (((x) >> 31) & 0x1)
+#define   C_008010_GUI_ACTIVE                                         0x7FFFFFFF
 #define GRBM_GFX_INDEX                                                  0x802C
 #define         INSTANCE_INDEX(x)                                     ((x) << 0)
 #define         SH_INDEX(x)                                           ((x) << 8)
@@ -276,12 +668,155 @@
 #define   C_0085F0_SH_ICACHE_ACTION_ENA                               0xDFFFFFFF
 #define R_0085F4_CP_COHER_SIZE                                          0x0085F4
 #define R_0085F8_CP_COHER_BASE                                          0x0085F8
-
-/* CIK */
+#define R_008014_GRBM_STATUS_SE0                                        0x008014
+#define   S_008014_DB_CLEAN(x)                                        (((x) & 0x1) << 1)
+#define   G_008014_DB_CLEAN(x)                                        (((x) >> 1) & 0x1)
+#define   C_008014_DB_CLEAN                                           0xFFFFFFFD
+#define   S_008014_CB_CLEAN(x)                                        (((x) & 0x1) << 2)
+#define   G_008014_CB_CLEAN(x)                                        (((x) >> 2) & 0x1)
+#define   C_008014_CB_CLEAN                                           0xFFFFFFFB
+#define   S_008014_BCI_BUSY(x)                                        (((x) & 0x1) << 22)
+#define   G_008014_BCI_BUSY(x)                                        (((x) >> 22) & 0x1)
+#define   C_008014_BCI_BUSY                                           0xFFBFFFFF
+#define   S_008014_VGT_BUSY(x)                                        (((x) & 0x1) << 23)
+#define   G_008014_VGT_BUSY(x)                                        (((x) >> 23) & 0x1)
+#define   C_008014_VGT_BUSY                                           0xFF7FFFFF
+#define   S_008014_PA_BUSY(x)                                         (((x) & 0x1) << 24)
+#define   G_008014_PA_BUSY(x)                                         (((x) >> 24) & 0x1)
+#define   C_008014_PA_BUSY                                            0xFEFFFFFF
+#define   S_008014_TA_BUSY(x)                                         (((x) & 0x1) << 25)
+#define   G_008014_TA_BUSY(x)                                         (((x) >> 25) & 0x1)
+#define   C_008014_TA_BUSY                                            0xFDFFFFFF
+#define   S_008014_SX_BUSY(x)                                         (((x) & 0x1) << 26)
+#define   G_008014_SX_BUSY(x)                                         (((x) >> 26) & 0x1)
+#define   C_008014_SX_BUSY                                            0xFBFFFFFF
+#define   S_008014_SPI_BUSY(x)                                        (((x) & 0x1) << 27)
+#define   G_008014_SPI_BUSY(x)                                        (((x) >> 27) & 0x1)
+#define   C_008014_SPI_BUSY                                           0xF7FFFFFF
+#define   S_008014_SC_BUSY(x)                                         (((x) & 0x1) << 29)
+#define   G_008014_SC_BUSY(x)                                         (((x) >> 29) & 0x1)
+#define   C_008014_SC_BUSY                                            0xDFFFFFFF
+#define   S_008014_DB_BUSY(x)                                         (((x) & 0x1) << 30)
+#define   G_008014_DB_BUSY(x)                                         (((x) >> 30) & 0x1)
+#define   C_008014_DB_BUSY                                            0xBFFFFFFF
+#define   S_008014_CB_BUSY(x)                                         (((x) & 0x1) << 31)
+#define   G_008014_CB_BUSY(x)                                         (((x) >> 31) & 0x1)
+#define   C_008014_CB_BUSY                                            0x7FFFFFFF
+#define R_008018_GRBM_STATUS_SE1                                        0x008018
+#define   S_008018_DB_CLEAN(x)                                        (((x) & 0x1) << 1)
+#define   G_008018_DB_CLEAN(x)                                        (((x) >> 1) & 0x1)
+#define   C_008018_DB_CLEAN                                           0xFFFFFFFD
+#define   S_008018_CB_CLEAN(x)                                        (((x) & 0x1) << 2)
+#define   G_008018_CB_CLEAN(x)                                        (((x) >> 2) & 0x1)
+#define   C_008018_CB_CLEAN                                           0xFFFFFFFB
+#define   S_008018_BCI_BUSY(x)                                        (((x) & 0x1) << 22)
+#define   G_008018_BCI_BUSY(x)                                        (((x) >> 22) & 0x1)
+#define   C_008018_BCI_BUSY                                           0xFFBFFFFF
+#define   S_008018_VGT_BUSY(x)                                        (((x) & 0x1) << 23)
+#define   G_008018_VGT_BUSY(x)                                        (((x) >> 23) & 0x1)
+#define   C_008018_VGT_BUSY                                           0xFF7FFFFF
+#define   S_008018_PA_BUSY(x)                                         (((x) & 0x1) << 24)
+#define   G_008018_PA_BUSY(x)                                         (((x) >> 24) & 0x1)
+#define   C_008018_PA_BUSY                                            0xFEFFFFFF
+#define   S_008018_TA_BUSY(x)                                         (((x) & 0x1) << 25)
+#define   G_008018_TA_BUSY(x)                                         (((x) >> 25) & 0x1)
+#define   C_008018_TA_BUSY                                            0xFDFFFFFF
+#define   S_008018_SX_BUSY(x)                                         (((x) & 0x1) << 26)
+#define   G_008018_SX_BUSY(x)                                         (((x) >> 26) & 0x1)
+#define   C_008018_SX_BUSY                                            0xFBFFFFFF
+#define   S_008018_SPI_BUSY(x)                                        (((x) & 0x1) << 27)
+#define   G_008018_SPI_BUSY(x)                                        (((x) >> 27) & 0x1)
+#define   C_008018_SPI_BUSY                                           0xF7FFFFFF
+#define   S_008018_SC_BUSY(x)                                         (((x) & 0x1) << 29)
+#define   G_008018_SC_BUSY(x)                                         (((x) >> 29) & 0x1)
+#define   C_008018_SC_BUSY                                            0xDFFFFFFF
+#define   S_008018_DB_BUSY(x)                                         (((x) & 0x1) << 30)
+#define   G_008018_DB_BUSY(x)                                         (((x) >> 30) & 0x1)
+#define   C_008018_DB_BUSY                                            0xBFFFFFFF
+#define   S_008018_CB_BUSY(x)                                         (((x) & 0x1) << 31)
+#define   G_008018_CB_BUSY(x)                                         (((x) >> 31) & 0x1)
+#define   C_008018_CB_BUSY                                            0x7FFFFFFF
+#define R_008038_GRBM_STATUS_SE2                                        0x008038
+#define   S_008038_DB_CLEAN(x)                                        (((x) & 0x1) << 1)
+#define   G_008038_DB_CLEAN(x)                                        (((x) >> 1) & 0x1)
+#define   C_008038_DB_CLEAN                                           0xFFFFFFFD
+#define   S_008038_CB_CLEAN(x)                                        (((x) & 0x1) << 2)
+#define   G_008038_CB_CLEAN(x)                                        (((x) >> 2) & 0x1)
+#define   C_008038_CB_CLEAN                                           0xFFFFFFFB
+#define   S_008038_BCI_BUSY(x)                                        (((x) & 0x1) << 22)
+#define   G_008038_BCI_BUSY(x)                                        (((x) >> 22) & 0x1)
+#define   C_008038_BCI_BUSY                                           0xFFBFFFFF
+#define   S_008038_VGT_BUSY(x)                                        (((x) & 0x1) << 23)
+#define   G_008038_VGT_BUSY(x)                                        (((x) >> 23) & 0x1)
+#define   C_008038_VGT_BUSY                                           0xFF7FFFFF
+#define   S_008038_PA_BUSY(x)                                         (((x) & 0x1) << 24)
+#define   G_008038_PA_BUSY(x)                                         (((x) >> 24) & 0x1)
+#define   C_008038_PA_BUSY                                            0xFEFFFFFF
+#define   S_008038_TA_BUSY(x)                                         (((x) & 0x1) << 25)
+#define   G_008038_TA_BUSY(x)                                         (((x) >> 25) & 0x1)
+#define   C_008038_TA_BUSY                                            0xFDFFFFFF
+#define   S_008038_SX_BUSY(x)                                         (((x) & 0x1) << 26)
+#define   G_008038_SX_BUSY(x)                                         (((x) >> 26) & 0x1)
+#define   C_008038_SX_BUSY                                            0xFBFFFFFF
+#define   S_008038_SPI_BUSY(x)                                        (((x) & 0x1) << 27)
+#define   G_008038_SPI_BUSY(x)                                        (((x) >> 27) & 0x1)
+#define   C_008038_SPI_BUSY                                           0xF7FFFFFF
+#define   S_008038_SC_BUSY(x)                                         (((x) & 0x1) << 29)
+#define   G_008038_SC_BUSY(x)                                         (((x) >> 29) & 0x1)
+#define   C_008038_SC_BUSY                                            0xDFFFFFFF
+#define   S_008038_DB_BUSY(x)                                         (((x) & 0x1) << 30)
+#define   G_008038_DB_BUSY(x)                                         (((x) >> 30) & 0x1)
+#define   C_008038_DB_BUSY                                            0xBFFFFFFF
+#define   S_008038_CB_BUSY(x)                                         (((x) & 0x1) << 31)
+#define   G_008038_CB_BUSY(x)                                         (((x) >> 31) & 0x1)
+#define   C_008038_CB_BUSY                                            0x7FFFFFFF
+#define R_00803C_GRBM_STATUS_SE3                                        0x00803C
+#define   S_00803C_DB_CLEAN(x)                                        (((x) & 0x1) << 1)
+#define   G_00803C_DB_CLEAN(x)                                        (((x) >> 1) & 0x1)
+#define   C_00803C_DB_CLEAN                                           0xFFFFFFFD
+#define   S_00803C_CB_CLEAN(x)                                        (((x) & 0x1) << 2)
+#define   G_00803C_CB_CLEAN(x)                                        (((x) >> 2) & 0x1)
+#define   C_00803C_CB_CLEAN                                           0xFFFFFFFB
+#define   S_00803C_BCI_BUSY(x)                                        (((x) & 0x1) << 22)
+#define   G_00803C_BCI_BUSY(x)                                        (((x) >> 22) & 0x1)
+#define   C_00803C_BCI_BUSY                                           0xFFBFFFFF
+#define   S_00803C_VGT_BUSY(x)                                        (((x) & 0x1) << 23)
+#define   G_00803C_VGT_BUSY(x)                                        (((x) >> 23) & 0x1)
+#define   C_00803C_VGT_BUSY                                           0xFF7FFFFF
+#define   S_00803C_PA_BUSY(x)                                         (((x) & 0x1) << 24)
+#define   G_00803C_PA_BUSY(x)                                         (((x) >> 24) & 0x1)
+#define   C_00803C_PA_BUSY                                            0xFEFFFFFF
+#define   S_00803C_TA_BUSY(x)                                         (((x) & 0x1) << 25)
+#define   G_00803C_TA_BUSY(x)                                         (((x) >> 25) & 0x1)
+#define   C_00803C_TA_BUSY                                            0xFDFFFFFF
+#define   S_00803C_SX_BUSY(x)                                         (((x) & 0x1) << 26)
+#define   G_00803C_SX_BUSY(x)                                         (((x) >> 26) & 0x1)
+#define   C_00803C_SX_BUSY                                            0xFBFFFFFF
+#define   S_00803C_SPI_BUSY(x)                                        (((x) & 0x1) << 27)
+#define   G_00803C_SPI_BUSY(x)                                        (((x) >> 27) & 0x1)
+#define   C_00803C_SPI_BUSY                                           0xF7FFFFFF
+#define   S_00803C_SC_BUSY(x)                                         (((x) & 0x1) << 29)
+#define   G_00803C_SC_BUSY(x)                                         (((x) >> 29) & 0x1)
+#define   C_00803C_SC_BUSY                                            0xDFFFFFFF
+#define   S_00803C_DB_BUSY(x)                                         (((x) & 0x1) << 30)
+#define   G_00803C_DB_BUSY(x)                                         (((x) >> 30) & 0x1)
+#define   C_00803C_DB_BUSY                                            0xBFFFFFFF
+#define   S_00803C_CB_BUSY(x)                                         (((x) & 0x1) << 31)
+#define   G_00803C_CB_BUSY(x)                                         (((x) >> 31) & 0x1)
+#define   C_00803C_CB_BUSY                                            0x7FFFFFFF
+/* CIK */
+#define R_0300FC_CP_STRMOUT_CNTL                                        0x0300FC
+#define   S_0300FC_OFFSET_UPDATE_DONE(x)                              (((x) & 0x1) << 0)
+#define   G_0300FC_OFFSET_UPDATE_DONE(x)                              (((x) >> 0) & 0x1)
+#define   C_0300FC_OFFSET_UPDATE_DONE                                 0xFFFFFFFE
 #define R_0301E4_CP_COHER_BASE_HI                                       0x0301E4
 #define   S_0301E4_COHER_BASE_HI_256B(x)                              (((x) & 0xFF) << 0)
 #define   G_0301E4_COHER_BASE_HI_256B(x)                              (((x) >> 0) & 0xFF)
 #define   C_0301E4_COHER_BASE_HI_256B                                 0xFFFFFF00
+#define R_0301EC_CP_COHER_START_DELAY                                   0x0301EC
+#define   S_0301EC_START_DELAY_COUNT(x)                               (((x) & 0x3F) << 0)
+#define   G_0301EC_START_DELAY_COUNT(x)                               (((x) >> 0) & 0x3F)
+#define   C_0301EC_START_DELAY_COUNT                                  0xFFFFFFC0
 #define R_0301F0_CP_COHER_CNTL                                          0x0301F0
 #define   S_0301F0_DEST_BASE_0_ENA(x)                                 (((x) & 0x1) << 0)
 #define   G_0301F0_DEST_BASE_0_ENA(x)                                 (((x) >> 0) & 0x1)
@@ -289,6 +824,14 @@
 #define   S_0301F0_DEST_BASE_1_ENA(x)                                 (((x) & 0x1) << 1)
 #define   G_0301F0_DEST_BASE_1_ENA(x)                                 (((x) >> 1) & 0x1)
 #define   C_0301F0_DEST_BASE_1_ENA                                    0xFFFFFFFD
+/* VI */
+#define   S_0301F0_TC_SD_ACTION_ENA(x)                                (((x) & 0x1) << 2)
+#define   G_0301F0_TC_SD_ACTION_ENA(x)                                (((x) >> 2) & 0x1)
+#define   C_0301F0_TC_SD_ACTION_ENA                                   0xFFFFFFFB
+#define   S_0301F0_TC_NC_ACTION_ENA(x)                                (((x) & 0x1) << 3)
+#define   G_0301F0_TC_NC_ACTION_ENA(x)                                (((x) >> 3) & 0x1)
+#define   C_0301F0_TC_NC_ACTION_ENA                                   0xFFFFFFF7
+/*    */
 #define   S_0301F0_CB0_DEST_BASE_ENA(x)                               (((x) & 0x1) << 6)
 #define   G_0301F0_CB0_DEST_BASE_ENA(x)                               (((x) >> 6) & 0x1)
 #define   C_0301F0_CB0_DEST_BASE_ENA                                  0xFFFFFFBF
@@ -319,7 +862,7 @@
 #define   S_0301F0_TCL1_VOL_ACTION_ENA(x)                             (((x) & 0x1) << 15)
 #define   G_0301F0_TCL1_VOL_ACTION_ENA(x)                             (((x) >> 15) & 0x1)
 #define   C_0301F0_TCL1_VOL_ACTION_ENA                                0xFFFF7FFF
-#define   S_0301F0_TC_VOL_ACTION_ENA(x)                               (((x) & 0x1) << 16)
+#define   S_0301F0_TC_VOL_ACTION_ENA(x)                               (((x) & 0x1) << 16) /* not on VI */
 #define   G_0301F0_TC_VOL_ACTION_ENA(x)                               (((x) >> 16) & 0x1)
 #define   C_0301F0_TC_VOL_ACTION_ENA                                  0xFFFEFFFF
 #define   S_0301F0_TC_WB_ACTION_ENA(x)                                (((x) & 0x1) << 18)
@@ -352,8 +895,389 @@
 #define   S_0301F0_SH_ICACHE_ACTION_ENA(x)                            (((x) & 0x1) << 29)
 #define   G_0301F0_SH_ICACHE_ACTION_ENA(x)                            (((x) >> 29) & 0x1)
 #define   C_0301F0_SH_ICACHE_ACTION_ENA                               0xDFFFFFFF
+/* VI */
+#define   S_0301F0_SH_KCACHE_WB_ACTION_ENA(x)                         (((x) & 0x1) << 30)
+#define   G_0301F0_SH_KCACHE_WB_ACTION_ENA(x)                         (((x) >> 30) & 0x1)
+#define   C_0301F0_SH_KCACHE_WB_ACTION_ENA                            0xBFFFFFFF
+#define   S_0301F0_SH_SD_ACTION_ENA(x)                                (((x) & 0x1) << 31)
+#define   G_0301F0_SH_SD_ACTION_ENA(x)                                (((x) >> 31) & 0x1)
+#define   C_0301F0_SH_SD_ACTION_ENA                                   0x7FFFFFFF
+/*    */
 #define R_0301F4_CP_COHER_SIZE                                          0x0301F4
 #define R_0301F8_CP_COHER_BASE                                          0x0301F8
+#define R_0301FC_CP_COHER_STATUS                                        0x0301FC
+#define   S_0301FC_MATCHING_GFX_CNTX(x)                               (((x) & 0xFF) << 0)
+#define   G_0301FC_MATCHING_GFX_CNTX(x)                               (((x) >> 0) & 0xFF)
+#define   C_0301FC_MATCHING_GFX_CNTX                                  0xFFFFFF00
+#define   S_0301FC_MEID(x)                                            (((x) & 0x03) << 24)
+#define   G_0301FC_MEID(x)                                            (((x) >> 24) & 0x03)
+#define   C_0301FC_MEID                                               0xFCFFFFFF
+#define   S_0301FC_PHASE1_STATUS(x)                                   (((x) & 0x1) << 30)
+#define   G_0301FC_PHASE1_STATUS(x)                                   (((x) >> 30) & 0x1)
+#define   C_0301FC_PHASE1_STATUS                                      0xBFFFFFFF
+#define   S_0301FC_STATUS(x)                                          (((x) & 0x1) << 31)
+#define   G_0301FC_STATUS(x)                                          (((x) >> 31) & 0x1)
+#define   C_0301FC_STATUS                                             0x7FFFFFFF
+#define R_008210_CP_CPC_STATUS                                          0x008210
+#define   S_008210_MEC1_BUSY(x)                                       (((x) & 0x1) << 0)
+#define   G_008210_MEC1_BUSY(x)                                       (((x) >> 0) & 0x1)
+#define   C_008210_MEC1_BUSY                                          0xFFFFFFFE
+#define   S_008210_MEC2_BUSY(x)                                       (((x) & 0x1) << 1)
+#define   G_008210_MEC2_BUSY(x)                                       (((x) >> 1) & 0x1)
+#define   C_008210_MEC2_BUSY                                          0xFFFFFFFD
+#define   S_008210_DC0_BUSY(x)                                        (((x) & 0x1) << 2)
+#define   G_008210_DC0_BUSY(x)                                        (((x) >> 2) & 0x1)
+#define   C_008210_DC0_BUSY                                           0xFFFFFFFB
+#define   S_008210_DC1_BUSY(x)                                        (((x) & 0x1) << 3)
+#define   G_008210_DC1_BUSY(x)                                        (((x) >> 3) & 0x1)
+#define   C_008210_DC1_BUSY                                           0xFFFFFFF7
+#define   S_008210_RCIU1_BUSY(x)                                      (((x) & 0x1) << 4)
+#define   G_008210_RCIU1_BUSY(x)                                      (((x) >> 4) & 0x1)
+#define   C_008210_RCIU1_BUSY                                         0xFFFFFFEF
+#define   S_008210_RCIU2_BUSY(x)                                      (((x) & 0x1) << 5)
+#define   G_008210_RCIU2_BUSY(x)                                      (((x) >> 5) & 0x1)
+#define   C_008210_RCIU2_BUSY                                         0xFFFFFFDF
+#define   S_008210_ROQ1_BUSY(x)                                       (((x) & 0x1) << 6)
+#define   G_008210_ROQ1_BUSY(x)                                       (((x) >> 6) & 0x1)
+#define   C_008210_ROQ1_BUSY                                          0xFFFFFFBF
+#define   S_008210_ROQ2_BUSY(x)                                       (((x) & 0x1) << 7)
+#define   G_008210_ROQ2_BUSY(x)                                       (((x) >> 7) & 0x1)
+#define   C_008210_ROQ2_BUSY                                          0xFFFFFF7F
+#define   S_008210_TCIU_BUSY(x)                                       (((x) & 0x1) << 10)
+#define   G_008210_TCIU_BUSY(x)                                       (((x) >> 10) & 0x1)
+#define   C_008210_TCIU_BUSY                                          0xFFFFFBFF
+#define   S_008210_SCRATCH_RAM_BUSY(x)                                (((x) & 0x1) << 11)
+#define   G_008210_SCRATCH_RAM_BUSY(x)                                (((x) >> 11) & 0x1)
+#define   C_008210_SCRATCH_RAM_BUSY                                   0xFFFFF7FF
+#define   S_008210_QU_BUSY(x)                                         (((x) & 0x1) << 12)
+#define   G_008210_QU_BUSY(x)                                         (((x) >> 12) & 0x1)
+#define   C_008210_QU_BUSY                                            0xFFFFEFFF
+#define   S_008210_ATCL2IU_BUSY(x)                                    (((x) & 0x1) << 13)
+#define   G_008210_ATCL2IU_BUSY(x)                                    (((x) >> 13) & 0x1)
+#define   C_008210_ATCL2IU_BUSY                                       0xFFFFDFFF
+#define   S_008210_CPG_CPC_BUSY(x)                                    (((x) & 0x1) << 29)
+#define   G_008210_CPG_CPC_BUSY(x)                                    (((x) >> 29) & 0x1)
+#define   C_008210_CPG_CPC_BUSY                                       0xDFFFFFFF
+#define   S_008210_CPF_CPC_BUSY(x)                                    (((x) & 0x1) << 30)
+#define   G_008210_CPF_CPC_BUSY(x)                                    (((x) >> 30) & 0x1)
+#define   C_008210_CPF_CPC_BUSY                                       0xBFFFFFFF
+#define   S_008210_CPC_BUSY(x)                                        (((x) & 0x1) << 31)
+#define   G_008210_CPC_BUSY(x)                                        (((x) >> 31) & 0x1)
+#define   C_008210_CPC_BUSY                                           0x7FFFFFFF
+#define R_008214_CP_CPC_BUSY_STAT                                       0x008214
+#define   S_008214_MEC1_LOAD_BUSY(x)                                  (((x) & 0x1) << 0)
+#define   G_008214_MEC1_LOAD_BUSY(x)                                  (((x) >> 0) & 0x1)
+#define   C_008214_MEC1_LOAD_BUSY                                     0xFFFFFFFE
+#define   S_008214_MEC1_SEMAPOHRE_BUSY(x)                             (((x) & 0x1) << 1)
+#define   G_008214_MEC1_SEMAPOHRE_BUSY(x)                             (((x) >> 1) & 0x1)
+#define   C_008214_MEC1_SEMAPOHRE_BUSY                                0xFFFFFFFD
+#define   S_008214_MEC1_MUTEX_BUSY(x)                                 (((x) & 0x1) << 2)
+#define   G_008214_MEC1_MUTEX_BUSY(x)                                 (((x) >> 2) & 0x1)
+#define   C_008214_MEC1_MUTEX_BUSY                                    0xFFFFFFFB
+#define   S_008214_MEC1_MESSAGE_BUSY(x)                               (((x) & 0x1) << 3)
+#define   G_008214_MEC1_MESSAGE_BUSY(x)                               (((x) >> 3) & 0x1)
+#define   C_008214_MEC1_MESSAGE_BUSY                                  0xFFFFFFF7
+#define   S_008214_MEC1_EOP_QUEUE_BUSY(x)                             (((x) & 0x1) << 4)
+#define   G_008214_MEC1_EOP_QUEUE_BUSY(x)                             (((x) >> 4) & 0x1)
+#define   C_008214_MEC1_EOP_QUEUE_BUSY                                0xFFFFFFEF
+#define   S_008214_MEC1_IQ_QUEUE_BUSY(x)                              (((x) & 0x1) << 5)
+#define   G_008214_MEC1_IQ_QUEUE_BUSY(x)                              (((x) >> 5) & 0x1)
+#define   C_008214_MEC1_IQ_QUEUE_BUSY                                 0xFFFFFFDF
+#define   S_008214_MEC1_IB_QUEUE_BUSY(x)                              (((x) & 0x1) << 6)
+#define   G_008214_MEC1_IB_QUEUE_BUSY(x)                              (((x) >> 6) & 0x1)
+#define   C_008214_MEC1_IB_QUEUE_BUSY                                 0xFFFFFFBF
+#define   S_008214_MEC1_TC_BUSY(x)                                    (((x) & 0x1) << 7)
+#define   G_008214_MEC1_TC_BUSY(x)                                    (((x) >> 7) & 0x1)
+#define   C_008214_MEC1_TC_BUSY                                       0xFFFFFF7F
+#define   S_008214_MEC1_DMA_BUSY(x)                                   (((x) & 0x1) << 8)
+#define   G_008214_MEC1_DMA_BUSY(x)                                   (((x) >> 8) & 0x1)
+#define   C_008214_MEC1_DMA_BUSY                                      0xFFFFFEFF
+#define   S_008214_MEC1_PARTIAL_FLUSH_BUSY(x)                         (((x) & 0x1) << 9)
+#define   G_008214_MEC1_PARTIAL_FLUSH_BUSY(x)                         (((x) >> 9) & 0x1)
+#define   C_008214_MEC1_PARTIAL_FLUSH_BUSY                            0xFFFFFDFF
+#define   S_008214_MEC1_PIPE0_BUSY(x)                                 (((x) & 0x1) << 10)
+#define   G_008214_MEC1_PIPE0_BUSY(x)                                 (((x) >> 10) & 0x1)
+#define   C_008214_MEC1_PIPE0_BUSY                                    0xFFFFFBFF
+#define   S_008214_MEC1_PIPE1_BUSY(x)                                 (((x) & 0x1) << 11)
+#define   G_008214_MEC1_PIPE1_BUSY(x)                                 (((x) >> 11) & 0x1)
+#define   C_008214_MEC1_PIPE1_BUSY                                    0xFFFFF7FF
+#define   S_008214_MEC1_PIPE2_BUSY(x)                                 (((x) & 0x1) << 12)
+#define   G_008214_MEC1_PIPE2_BUSY(x)                                 (((x) >> 12) & 0x1)
+#define   C_008214_MEC1_PIPE2_BUSY                                    0xFFFFEFFF
+#define   S_008214_MEC1_PIPE3_BUSY(x)                                 (((x) & 0x1) << 13)
+#define   G_008214_MEC1_PIPE3_BUSY(x)                                 (((x) >> 13) & 0x1)
+#define   C_008214_MEC1_PIPE3_BUSY                                    0xFFFFDFFF
+#define   S_008214_MEC2_LOAD_BUSY(x)                                  (((x) & 0x1) << 16)
+#define   G_008214_MEC2_LOAD_BUSY(x)                                  (((x) >> 16) & 0x1)
+#define   C_008214_MEC2_LOAD_BUSY                                     0xFFFEFFFF
+#define   S_008214_MEC2_SEMAPOHRE_BUSY(x)                             (((x) & 0x1) << 17)
+#define   G_008214_MEC2_SEMAPOHRE_BUSY(x)                             (((x) >> 17) & 0x1)
+#define   C_008214_MEC2_SEMAPOHRE_BUSY                                0xFFFDFFFF
+#define   S_008214_MEC2_MUTEX_BUSY(x)                                 (((x) & 0x1) << 18)
+#define   G_008214_MEC2_MUTEX_BUSY(x)                                 (((x) >> 18) & 0x1)
+#define   C_008214_MEC2_MUTEX_BUSY                                    0xFFFBFFFF
+#define   S_008214_MEC2_MESSAGE_BUSY(x)                               (((x) & 0x1) << 19)
+#define   G_008214_MEC2_MESSAGE_BUSY(x)                               (((x) >> 19) & 0x1)
+#define   C_008214_MEC2_MESSAGE_BUSY                                  0xFFF7FFFF
+#define   S_008214_MEC2_EOP_QUEUE_BUSY(x)                             (((x) & 0x1) << 20)
+#define   G_008214_MEC2_EOP_QUEUE_BUSY(x)                             (((x) >> 20) & 0x1)
+#define   C_008214_MEC2_EOP_QUEUE_BUSY                                0xFFEFFFFF
+#define   S_008214_MEC2_IQ_QUEUE_BUSY(x)                              (((x) & 0x1) << 21)
+#define   G_008214_MEC2_IQ_QUEUE_BUSY(x)                              (((x) >> 21) & 0x1)
+#define   C_008214_MEC2_IQ_QUEUE_BUSY                                 0xFFDFFFFF
+#define   S_008214_MEC2_IB_QUEUE_BUSY(x)                              (((x) & 0x1) << 22)
+#define   G_008214_MEC2_IB_QUEUE_BUSY(x)                              (((x) >> 22) & 0x1)
+#define   C_008214_MEC2_IB_QUEUE_BUSY                                 0xFFBFFFFF
+#define   S_008214_MEC2_TC_BUSY(x)                                    (((x) & 0x1) << 23)
+#define   G_008214_MEC2_TC_BUSY(x)                                    (((x) >> 23) & 0x1)
+#define   C_008214_MEC2_TC_BUSY                                       0xFF7FFFFF
+#define   S_008214_MEC2_DMA_BUSY(x)                                   (((x) & 0x1) << 24)
+#define   G_008214_MEC2_DMA_BUSY(x)                                   (((x) >> 24) & 0x1)
+#define   C_008214_MEC2_DMA_BUSY                                      0xFEFFFFFF
+#define   S_008214_MEC2_PARTIAL_FLUSH_BUSY(x)                         (((x) & 0x1) << 25)
+#define   G_008214_MEC2_PARTIAL_FLUSH_BUSY(x)                         (((x) >> 25) & 0x1)
+#define   C_008214_MEC2_PARTIAL_FLUSH_BUSY                            0xFDFFFFFF
+#define   S_008214_MEC2_PIPE0_BUSY(x)                                 (((x) & 0x1) << 26)
+#define   G_008214_MEC2_PIPE0_BUSY(x)                                 (((x) >> 26) & 0x1)
+#define   C_008214_MEC2_PIPE0_BUSY                                    0xFBFFFFFF
+#define   S_008214_MEC2_PIPE1_BUSY(x)                                 (((x) & 0x1) << 27)
+#define   G_008214_MEC2_PIPE1_BUSY(x)                                 (((x) >> 27) & 0x1)
+#define   C_008214_MEC2_PIPE1_BUSY                                    0xF7FFFFFF
+#define   S_008214_MEC2_PIPE2_BUSY(x)                                 (((x) & 0x1) << 28)
+#define   G_008214_MEC2_PIPE2_BUSY(x)                                 (((x) >> 28) & 0x1)
+#define   C_008214_MEC2_PIPE2_BUSY                                    0xEFFFFFFF
+#define   S_008214_MEC2_PIPE3_BUSY(x)                                 (((x) & 0x1) << 29)
+#define   G_008214_MEC2_PIPE3_BUSY(x)                                 (((x) >> 29) & 0x1)
+#define   C_008214_MEC2_PIPE3_BUSY                                    0xDFFFFFFF
+#define R_008218_CP_CPC_STALLED_STAT1                                   0x008218
+#define   S_008218_RCIU_TX_FREE_STALL(x)                              (((x) & 0x1) << 3)
+#define   G_008218_RCIU_TX_FREE_STALL(x)                              (((x) >> 3) & 0x1)
+#define   C_008218_RCIU_TX_FREE_STALL                                 0xFFFFFFF7
+#define   S_008218_RCIU_PRIV_VIOLATION(x)                             (((x) & 0x1) << 4)
+#define   G_008218_RCIU_PRIV_VIOLATION(x)                             (((x) >> 4) & 0x1)
+#define   C_008218_RCIU_PRIV_VIOLATION                                0xFFFFFFEF
+#define   S_008218_TCIU_TX_FREE_STALL(x)                              (((x) & 0x1) << 6)
+#define   G_008218_TCIU_TX_FREE_STALL(x)                              (((x) >> 6) & 0x1)
+#define   C_008218_TCIU_TX_FREE_STALL                                 0xFFFFFFBF
+#define   S_008218_MEC1_DECODING_PACKET(x)                            (((x) & 0x1) << 8)
+#define   G_008218_MEC1_DECODING_PACKET(x)                            (((x) >> 8) & 0x1)
+#define   C_008218_MEC1_DECODING_PACKET                               0xFFFFFEFF
+#define   S_008218_MEC1_WAIT_ON_RCIU(x)                               (((x) & 0x1) << 9)
+#define   G_008218_MEC1_WAIT_ON_RCIU(x)                               (((x) >> 9) & 0x1)
+#define   C_008218_MEC1_WAIT_ON_RCIU                                  0xFFFFFDFF
+#define   S_008218_MEC1_WAIT_ON_RCIU_READ(x)                          (((x) & 0x1) << 10)
+#define   G_008218_MEC1_WAIT_ON_RCIU_READ(x)                          (((x) >> 10) & 0x1)
+#define   C_008218_MEC1_WAIT_ON_RCIU_READ                             0xFFFFFBFF
+#define   S_008218_MEC1_WAIT_ON_ROQ_DATA(x)                           (((x) & 0x1) << 13)
+#define   G_008218_MEC1_WAIT_ON_ROQ_DATA(x)                           (((x) >> 13) & 0x1)
+#define   C_008218_MEC1_WAIT_ON_ROQ_DATA                              0xFFFFDFFF
+#define   S_008218_MEC2_DECODING_PACKET(x)                            (((x) & 0x1) << 16)
+#define   G_008218_MEC2_DECODING_PACKET(x)                            (((x) >> 16) & 0x1)
+#define   C_008218_MEC2_DECODING_PACKET                               0xFFFEFFFF
+#define   S_008218_MEC2_WAIT_ON_RCIU(x)                               (((x) & 0x1) << 17)
+#define   G_008218_MEC2_WAIT_ON_RCIU(x)                               (((x) >> 17) & 0x1)
+#define   C_008218_MEC2_WAIT_ON_RCIU                                  0xFFFDFFFF
+#define   S_008218_MEC2_WAIT_ON_RCIU_READ(x)                          (((x) & 0x1) << 18)
+#define   G_008218_MEC2_WAIT_ON_RCIU_READ(x)                          (((x) >> 18) & 0x1)
+#define   C_008218_MEC2_WAIT_ON_RCIU_READ                             0xFFFBFFFF
+#define   S_008218_MEC2_WAIT_ON_ROQ_DATA(x)                           (((x) & 0x1) << 21)
+#define   G_008218_MEC2_WAIT_ON_ROQ_DATA(x)                           (((x) >> 21) & 0x1)
+#define   C_008218_MEC2_WAIT_ON_ROQ_DATA                              0xFFDFFFFF
+#define   S_008218_ATCL2IU_WAITING_ON_FREE(x)                         (((x) & 0x1) << 22)
+#define   G_008218_ATCL2IU_WAITING_ON_FREE(x)                         (((x) >> 22) & 0x1)
+#define   C_008218_ATCL2IU_WAITING_ON_FREE                            0xFFBFFFFF
+#define   S_008218_ATCL2IU_WAITING_ON_TAGS(x)                         (((x) & 0x1) << 23)
+#define   G_008218_ATCL2IU_WAITING_ON_TAGS(x)                         (((x) >> 23) & 0x1)
+#define   C_008218_ATCL2IU_WAITING_ON_TAGS                            0xFF7FFFFF
+#define   S_008218_ATCL1_WAITING_ON_TRANS(x)                          (((x) & 0x1) << 24)
+#define   G_008218_ATCL1_WAITING_ON_TRANS(x)                          (((x) >> 24) & 0x1)
+#define   C_008218_ATCL1_WAITING_ON_TRANS                             0xFEFFFFFF
+#define R_00821C_CP_CPF_STATUS                                          0x00821C
+#define   S_00821C_POST_WPTR_GFX_BUSY(x)                              (((x) & 0x1) << 0)
+#define   G_00821C_POST_WPTR_GFX_BUSY(x)                              (((x) >> 0) & 0x1)
+#define   C_00821C_POST_WPTR_GFX_BUSY                                 0xFFFFFFFE
+#define   S_00821C_CSF_BUSY(x)                                        (((x) & 0x1) << 1)
+#define   G_00821C_CSF_BUSY(x)                                        (((x) >> 1) & 0x1)
+#define   C_00821C_CSF_BUSY                                           0xFFFFFFFD
+#define   S_00821C_ROQ_ALIGN_BUSY(x)                                  (((x) & 0x1) << 4)
+#define   G_00821C_ROQ_ALIGN_BUSY(x)                                  (((x) >> 4) & 0x1)
+#define   C_00821C_ROQ_ALIGN_BUSY                                     0xFFFFFFEF
+#define   S_00821C_ROQ_RING_BUSY(x)                                   (((x) & 0x1) << 5)
+#define   G_00821C_ROQ_RING_BUSY(x)                                   (((x) >> 5) & 0x1)
+#define   C_00821C_ROQ_RING_BUSY                                      0xFFFFFFDF
+#define   S_00821C_ROQ_INDIRECT1_BUSY(x)                              (((x) & 0x1) << 6)
+#define   G_00821C_ROQ_INDIRECT1_BUSY(x)                              (((x) >> 6) & 0x1)
+#define   C_00821C_ROQ_INDIRECT1_BUSY                                 0xFFFFFFBF
+#define   S_00821C_ROQ_INDIRECT2_BUSY(x)                              (((x) & 0x1) << 7)
+#define   G_00821C_ROQ_INDIRECT2_BUSY(x)                              (((x) >> 7) & 0x1)
+#define   C_00821C_ROQ_INDIRECT2_BUSY                                 0xFFFFFF7F
+#define   S_00821C_ROQ_STATE_BUSY(x)                                  (((x) & 0x1) << 8)
+#define   G_00821C_ROQ_STATE_BUSY(x)                                  (((x) >> 8) & 0x1)
+#define   C_00821C_ROQ_STATE_BUSY                                     0xFFFFFEFF
+#define   S_00821C_ROQ_CE_RING_BUSY(x)                                (((x) & 0x1) << 9)
+#define   G_00821C_ROQ_CE_RING_BUSY(x)                                (((x) >> 9) & 0x1)
+#define   C_00821C_ROQ_CE_RING_BUSY                                   0xFFFFFDFF
+#define   S_00821C_ROQ_CE_INDIRECT1_BUSY(x)                           (((x) & 0x1) << 10)
+#define   G_00821C_ROQ_CE_INDIRECT1_BUSY(x)                           (((x) >> 10) & 0x1)
+#define   C_00821C_ROQ_CE_INDIRECT1_BUSY                              0xFFFFFBFF
+#define   S_00821C_ROQ_CE_INDIRECT2_BUSY(x)                           (((x) & 0x1) << 11)
+#define   G_00821C_ROQ_CE_INDIRECT2_BUSY(x)                           (((x) >> 11) & 0x1)
+#define   C_00821C_ROQ_CE_INDIRECT2_BUSY                              0xFFFFF7FF
+#define   S_00821C_SEMAPHORE_BUSY(x)                                  (((x) & 0x1) << 12)
+#define   G_00821C_SEMAPHORE_BUSY(x)                                  (((x) >> 12) & 0x1)
+#define   C_00821C_SEMAPHORE_BUSY                                     0xFFFFEFFF
+#define   S_00821C_INTERRUPT_BUSY(x)                                  (((x) & 0x1) << 13)
+#define   G_00821C_INTERRUPT_BUSY(x)                                  (((x) >> 13) & 0x1)
+#define   C_00821C_INTERRUPT_BUSY                                     0xFFFFDFFF
+#define   S_00821C_TCIU_BUSY(x)                                       (((x) & 0x1) << 14)
+#define   G_00821C_TCIU_BUSY(x)                                       (((x) >> 14) & 0x1)
+#define   C_00821C_TCIU_BUSY                                          0xFFFFBFFF
+#define   S_00821C_HQD_BUSY(x)                                        (((x) & 0x1) << 15)
+#define   G_00821C_HQD_BUSY(x)                                        (((x) >> 15) & 0x1)
+#define   C_00821C_HQD_BUSY                                           0xFFFF7FFF
+#define   S_00821C_PRT_BUSY(x)                                        (((x) & 0x1) << 16)
+#define   G_00821C_PRT_BUSY(x)                                        (((x) >> 16) & 0x1)
+#define   C_00821C_PRT_BUSY                                           0xFFFEFFFF
+#define   S_00821C_ATCL2IU_BUSY(x)                                    (((x) & 0x1) << 17)
+#define   G_00821C_ATCL2IU_BUSY(x)                                    (((x) >> 17) & 0x1)
+#define   C_00821C_ATCL2IU_BUSY                                       0xFFFDFFFF
+#define   S_00821C_CPF_GFX_BUSY(x)                                    (((x) & 0x1) << 26)
+#define   G_00821C_CPF_GFX_BUSY(x)                                    (((x) >> 26) & 0x1)
+#define   C_00821C_CPF_GFX_BUSY                                       0xFBFFFFFF
+#define   S_00821C_CPF_CMP_BUSY(x)                                    (((x) & 0x1) << 27)
+#define   G_00821C_CPF_CMP_BUSY(x)                                    (((x) >> 27) & 0x1)
+#define   C_00821C_CPF_CMP_BUSY                                       0xF7FFFFFF
+#define   S_00821C_GRBM_CPF_STAT_BUSY(x)                              (((x) & 0x03) << 28)
+#define   G_00821C_GRBM_CPF_STAT_BUSY(x)                              (((x) >> 28) & 0x03)
+#define   C_00821C_GRBM_CPF_STAT_BUSY                                 0xCFFFFFFF
+#define   S_00821C_CPC_CPF_BUSY(x)                                    (((x) & 0x1) << 30)
+#define   G_00821C_CPC_CPF_BUSY(x)                                    (((x) >> 30) & 0x1)
+#define   C_00821C_CPC_CPF_BUSY                                       0xBFFFFFFF
+#define   S_00821C_CPF_BUSY(x)                                        (((x) & 0x1) << 31)
+#define   G_00821C_CPF_BUSY(x)                                        (((x) >> 31) & 0x1)
+#define   C_00821C_CPF_BUSY                                           0x7FFFFFFF
+#define R_008220_CP_CPF_BUSY_STAT                                       0x008220
+#define   S_008220_REG_BUS_FIFO_BUSY(x)                               (((x) & 0x1) << 0)
+#define   G_008220_REG_BUS_FIFO_BUSY(x)                               (((x) >> 0) & 0x1)
+#define   C_008220_REG_BUS_FIFO_BUSY                                  0xFFFFFFFE
+#define   S_008220_CSF_RING_BUSY(x)                                   (((x) & 0x1) << 1)
+#define   G_008220_CSF_RING_BUSY(x)                                   (((x) >> 1) & 0x1)
+#define   C_008220_CSF_RING_BUSY                                      0xFFFFFFFD
+#define   S_008220_CSF_INDIRECT1_BUSY(x)                              (((x) & 0x1) << 2)
+#define   G_008220_CSF_INDIRECT1_BUSY(x)                              (((x) >> 2) & 0x1)
+#define   C_008220_CSF_INDIRECT1_BUSY                                 0xFFFFFFFB
+#define   S_008220_CSF_INDIRECT2_BUSY(x)                              (((x) & 0x1) << 3)
+#define   G_008220_CSF_INDIRECT2_BUSY(x)                              (((x) >> 3) & 0x1)
+#define   C_008220_CSF_INDIRECT2_BUSY                                 0xFFFFFFF7
+#define   S_008220_CSF_STATE_BUSY(x)                                  (((x) & 0x1) << 4)
+#define   G_008220_CSF_STATE_BUSY(x)                                  (((x) >> 4) & 0x1)
+#define   C_008220_CSF_STATE_BUSY                                     0xFFFFFFEF
+#define   S_008220_CSF_CE_INDR1_BUSY(x)                               (((x) & 0x1) << 5)
+#define   G_008220_CSF_CE_INDR1_BUSY(x)                               (((x) >> 5) & 0x1)
+#define   C_008220_CSF_CE_INDR1_BUSY                                  0xFFFFFFDF
+#define   S_008220_CSF_CE_INDR2_BUSY(x)                               (((x) & 0x1) << 6)
+#define   G_008220_CSF_CE_INDR2_BUSY(x)                               (((x) >> 6) & 0x1)
+#define   C_008220_CSF_CE_INDR2_BUSY                                  0xFFFFFFBF
+#define   S_008220_CSF_ARBITER_BUSY(x)                                (((x) & 0x1) << 7)
+#define   G_008220_CSF_ARBITER_BUSY(x)                                (((x) >> 7) & 0x1)
+#define   C_008220_CSF_ARBITER_BUSY                                   0xFFFFFF7F
+#define   S_008220_CSF_INPUT_BUSY(x)                                  (((x) & 0x1) << 8)
+#define   G_008220_CSF_INPUT_BUSY(x)                                  (((x) >> 8) & 0x1)
+#define   C_008220_CSF_INPUT_BUSY                                     0xFFFFFEFF
+#define   S_008220_OUTSTANDING_READ_TAGS(x)                           (((x) & 0x1) << 9)
+#define   G_008220_OUTSTANDING_READ_TAGS(x)                           (((x) >> 9) & 0x1)
+#define   C_008220_OUTSTANDING_READ_TAGS                              0xFFFFFDFF
+#define   S_008220_HPD_PROCESSING_EOP_BUSY(x)                         (((x) & 0x1) << 11)
+#define   G_008220_HPD_PROCESSING_EOP_BUSY(x)                         (((x) >> 11) & 0x1)
+#define   C_008220_HPD_PROCESSING_EOP_BUSY                            0xFFFFF7FF
+#define   S_008220_HQD_DISPATCH_BUSY(x)                               (((x) & 0x1) << 12)
+#define   G_008220_HQD_DISPATCH_BUSY(x)                               (((x) >> 12) & 0x1)
+#define   C_008220_HQD_DISPATCH_BUSY                                  0xFFFFEFFF
+#define   S_008220_HQD_IQ_TIMER_BUSY(x)                               (((x) & 0x1) << 13)
+#define   G_008220_HQD_IQ_TIMER_BUSY(x)                               (((x) >> 13) & 0x1)
+#define   C_008220_HQD_IQ_TIMER_BUSY                                  0xFFFFDFFF
+#define   S_008220_HQD_DMA_OFFLOAD_BUSY(x)                            (((x) & 0x1) << 14)
+#define   G_008220_HQD_DMA_OFFLOAD_BUSY(x)                            (((x) >> 14) & 0x1)
+#define   C_008220_HQD_DMA_OFFLOAD_BUSY                               0xFFFFBFFF
+#define   S_008220_HQD_WAIT_SEMAPHORE_BUSY(x)                         (((x) & 0x1) << 15)
+#define   G_008220_HQD_WAIT_SEMAPHORE_BUSY(x)                         (((x) >> 15) & 0x1)
+#define   C_008220_HQD_WAIT_SEMAPHORE_BUSY                            0xFFFF7FFF
+#define   S_008220_HQD_SIGNAL_SEMAPHORE_BUSY(x)                       (((x) & 0x1) << 16)
+#define   G_008220_HQD_SIGNAL_SEMAPHORE_BUSY(x)                       (((x) >> 16) & 0x1)
+#define   C_008220_HQD_SIGNAL_SEMAPHORE_BUSY                          0xFFFEFFFF
+#define   S_008220_HQD_MESSAGE_BUSY(x)                                (((x) & 0x1) << 17)
+#define   G_008220_HQD_MESSAGE_BUSY(x)                                (((x) >> 17) & 0x1)
+#define   C_008220_HQD_MESSAGE_BUSY                                   0xFFFDFFFF
+#define   S_008220_HQD_PQ_FETCHER_BUSY(x)                             (((x) & 0x1) << 18)
+#define   G_008220_HQD_PQ_FETCHER_BUSY(x)                             (((x) >> 18) & 0x1)
+#define   C_008220_HQD_PQ_FETCHER_BUSY                                0xFFFBFFFF
+#define   S_008220_HQD_IB_FETCHER_BUSY(x)                             (((x) & 0x1) << 19)
+#define   G_008220_HQD_IB_FETCHER_BUSY(x)                             (((x) >> 19) & 0x1)
+#define   C_008220_HQD_IB_FETCHER_BUSY                                0xFFF7FFFF
+#define   S_008220_HQD_IQ_FETCHER_BUSY(x)                             (((x) & 0x1) << 20)
+#define   G_008220_HQD_IQ_FETCHER_BUSY(x)                             (((x) >> 20) & 0x1)
+#define   C_008220_HQD_IQ_FETCHER_BUSY                                0xFFEFFFFF
+#define   S_008220_HQD_EOP_FETCHER_BUSY(x)                            (((x) & 0x1) << 21)
+#define   G_008220_HQD_EOP_FETCHER_BUSY(x)                            (((x) >> 21) & 0x1)
+#define   C_008220_HQD_EOP_FETCHER_BUSY                               0xFFDFFFFF
+#define   S_008220_HQD_CONSUMED_RPTR_BUSY(x)                          (((x) & 0x1) << 22)
+#define   G_008220_HQD_CONSUMED_RPTR_BUSY(x)                          (((x) >> 22) & 0x1)
+#define   C_008220_HQD_CONSUMED_RPTR_BUSY                             0xFFBFFFFF
+#define   S_008220_HQD_FETCHER_ARB_BUSY(x)                            (((x) & 0x1) << 23)
+#define   G_008220_HQD_FETCHER_ARB_BUSY(x)                            (((x) >> 23) & 0x1)
+#define   C_008220_HQD_FETCHER_ARB_BUSY                               0xFF7FFFFF
+#define   S_008220_HQD_ROQ_ALIGN_BUSY(x)                              (((x) & 0x1) << 24)
+#define   G_008220_HQD_ROQ_ALIGN_BUSY(x)                              (((x) >> 24) & 0x1)
+#define   C_008220_HQD_ROQ_ALIGN_BUSY                                 0xFEFFFFFF
+#define   S_008220_HQD_ROQ_EOP_BUSY(x)                                (((x) & 0x1) << 25)
+#define   G_008220_HQD_ROQ_EOP_BUSY(x)                                (((x) >> 25) & 0x1)
+#define   C_008220_HQD_ROQ_EOP_BUSY                                   0xFDFFFFFF
+#define   S_008220_HQD_ROQ_IQ_BUSY(x)                                 (((x) & 0x1) << 26)
+#define   G_008220_HQD_ROQ_IQ_BUSY(x)                                 (((x) >> 26) & 0x1)
+#define   C_008220_HQD_ROQ_IQ_BUSY                                    0xFBFFFFFF
+#define   S_008220_HQD_ROQ_PQ_BUSY(x)                                 (((x) & 0x1) << 27)
+#define   G_008220_HQD_ROQ_PQ_BUSY(x)                                 (((x) >> 27) & 0x1)
+#define   C_008220_HQD_ROQ_PQ_BUSY                                    0xF7FFFFFF
+#define   S_008220_HQD_ROQ_IB_BUSY(x)                                 (((x) & 0x1) << 28)
+#define   G_008220_HQD_ROQ_IB_BUSY(x)                                 (((x) >> 28) & 0x1)
+#define   C_008220_HQD_ROQ_IB_BUSY                                    0xEFFFFFFF
+#define   S_008220_HQD_WPTR_POLL_BUSY(x)                              (((x) & 0x1) << 29)
+#define   G_008220_HQD_WPTR_POLL_BUSY(x)                              (((x) >> 29) & 0x1)
+#define   C_008220_HQD_WPTR_POLL_BUSY                                 0xDFFFFFFF
+#define   S_008220_HQD_PQ_BUSY(x)                                     (((x) & 0x1) << 30)
+#define   G_008220_HQD_PQ_BUSY(x)                                     (((x) >> 30) & 0x1)
+#define   C_008220_HQD_PQ_BUSY                                        0xBFFFFFFF
+#define   S_008220_HQD_IB_BUSY(x)                                     (((x) & 0x1) << 31)
+#define   G_008220_HQD_IB_BUSY(x)                                     (((x) >> 31) & 0x1)
+#define   C_008220_HQD_IB_BUSY                                        0x7FFFFFFF
+#define R_008224_CP_CPF_STALLED_STAT1                                   0x008224
+#define   S_008224_RING_FETCHING_DATA(x)                              (((x) & 0x1) << 0)
+#define   G_008224_RING_FETCHING_DATA(x)                              (((x) >> 0) & 0x1)
+#define   C_008224_RING_FETCHING_DATA                                 0xFFFFFFFE
+#define   S_008224_INDR1_FETCHING_DATA(x)                             (((x) & 0x1) << 1)
+#define   G_008224_INDR1_FETCHING_DATA(x)                             (((x) >> 1) & 0x1)
+#define   C_008224_INDR1_FETCHING_DATA                                0xFFFFFFFD
+#define   S_008224_INDR2_FETCHING_DATA(x)                             (((x) & 0x1) << 2)
+#define   G_008224_INDR2_FETCHING_DATA(x)                             (((x) >> 2) & 0x1)
+#define   C_008224_INDR2_FETCHING_DATA                                0xFFFFFFFB
+#define   S_008224_STATE_FETCHING_DATA(x)                             (((x) & 0x1) << 3)
+#define   G_008224_STATE_FETCHING_DATA(x)                             (((x) >> 3) & 0x1)
+#define   C_008224_STATE_FETCHING_DATA                                0xFFFFFFF7
+#define   S_008224_TCIU_WAITING_ON_FREE(x)                            (((x) & 0x1) << 5)
+#define   G_008224_TCIU_WAITING_ON_FREE(x)                            (((x) >> 5) & 0x1)
+#define   C_008224_TCIU_WAITING_ON_FREE                               0xFFFFFFDF
+#define   S_008224_TCIU_WAITING_ON_TAGS(x)                            (((x) & 0x1) << 6)
+#define   G_008224_TCIU_WAITING_ON_TAGS(x)                            (((x) >> 6) & 0x1)
+#define   C_008224_TCIU_WAITING_ON_TAGS                               0xFFFFFFBF
+#define   S_008224_ATCL2IU_WAITING_ON_FREE(x)                         (((x) & 0x1) << 7)
+#define   G_008224_ATCL2IU_WAITING_ON_FREE(x)                         (((x) >> 7) & 0x1)
+#define   C_008224_ATCL2IU_WAITING_ON_FREE                            0xFFFFFF7F
+#define   S_008224_ATCL2IU_WAITING_ON_TAGS(x)                         (((x) & 0x1) << 8)
+#define   G_008224_ATCL2IU_WAITING_ON_TAGS(x)                         (((x) >> 8) & 0x1)
+#define   C_008224_ATCL2IU_WAITING_ON_TAGS                            0xFFFFFEFF
+#define   S_008224_ATCL1_WAITING_ON_TRANS(x)                          (((x) & 0x1) << 9)
+#define   G_008224_ATCL1_WAITING_ON_TRANS(x)                          (((x) >> 9) & 0x1)
+#define   C_008224_ATCL1_WAITING_ON_TRANS                             0xFFFFFDFF
 #define R_030230_CP_COHER_SIZE_HI                                       0x030230
 #define   S_030230_COHER_SIZE_HI_256B(x)                              (((x) & 0xFF) << 0)
 #define   G_030230_COHER_SIZE_HI_256B(x)                              (((x) >> 0) & 0xFF)
@@ -375,10 +1299,6 @@
 #define   C_0088C4_ES_LIMIT                                           0xFFE0FFFF
 #define R_0088C8_VGT_ESGS_RING_SIZE                                     0x0088C8
 #define R_0088CC_VGT_GSVS_RING_SIZE                                     0x0088CC
-/* CIK */
-#define R_030900_VGT_ESGS_RING_SIZE                                     0x030900
-#define R_030904_VGT_GSVS_RING_SIZE                                     0x030904
-/*     */
 #define R_0088D4_VGT_GS_VERTEX_REUSE                                    0x0088D4
 #define   S_0088D4_VERT_REUSE(x)                                      (((x) & 0x1F) << 0)
 #define   G_0088D4_VERT_REUSE(x)                                      (((x) >> 0) & 0x1F)
@@ -461,7 +1381,293 @@
 #define   S_008B10_CURRENT_COUNT(x)                                   (((x) & 0xFF) << 8)
 #define   G_008B10_CURRENT_COUNT(x)                                   (((x) >> 8) & 0xFF)
 #define   C_008B10_CURRENT_COUNT                                      0xFFFF00FF
-/* CIK */
+#define R_008670_CP_STALLED_STAT3                                       0x008670
+#define   S_008670_CE_TO_CSF_NOT_RDY_TO_RCV(x)                        (((x) & 0x1) << 0)
+#define   G_008670_CE_TO_CSF_NOT_RDY_TO_RCV(x)                        (((x) >> 0) & 0x1)
+#define   C_008670_CE_TO_CSF_NOT_RDY_TO_RCV                           0xFFFFFFFE
+#define   S_008670_CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV(x)           (((x) & 0x1) << 1)
+#define   G_008670_CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV(x)           (((x) >> 1) & 0x1)
+#define   C_008670_CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV              0xFFFFFFFD
+#define   S_008670_CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER(x)        (((x) & 0x1) << 2)
+#define   G_008670_CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER(x)        (((x) >> 2) & 0x1)
+#define   C_008670_CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER           0xFFFFFFFB
+#define   S_008670_CE_TO_RAM_INIT_NOT_RDY(x)                          (((x) & 0x1) << 3)
+#define   G_008670_CE_TO_RAM_INIT_NOT_RDY(x)                          (((x) >> 3) & 0x1)
+#define   C_008670_CE_TO_RAM_INIT_NOT_RDY                             0xFFFFFFF7
+#define   S_008670_CE_TO_RAM_DUMP_NOT_RDY(x)                          (((x) & 0x1) << 4)
+#define   G_008670_CE_TO_RAM_DUMP_NOT_RDY(x)                          (((x) >> 4) & 0x1)
+#define   C_008670_CE_TO_RAM_DUMP_NOT_RDY                             0xFFFFFFEF
+#define   S_008670_CE_TO_RAM_WRITE_NOT_RDY(x)                         (((x) & 0x1) << 5)
+#define   G_008670_CE_TO_RAM_WRITE_NOT_RDY(x)                         (((x) >> 5) & 0x1)
+#define   C_008670_CE_TO_RAM_WRITE_NOT_RDY                            0xFFFFFFDF
+#define   S_008670_CE_TO_INC_FIFO_NOT_RDY_TO_RCV(x)                   (((x) & 0x1) << 6)
+#define   G_008670_CE_TO_INC_FIFO_NOT_RDY_TO_RCV(x)                   (((x) >> 6) & 0x1)
+#define   C_008670_CE_TO_INC_FIFO_NOT_RDY_TO_RCV                      0xFFFFFFBF
+#define   S_008670_CE_TO_WR_FIFO_NOT_RDY_TO_RCV(x)                    (((x) & 0x1) << 7)
+#define   G_008670_CE_TO_WR_FIFO_NOT_RDY_TO_RCV(x)                    (((x) >> 7) & 0x1)
+#define   C_008670_CE_TO_WR_FIFO_NOT_RDY_TO_RCV                       0xFFFFFF7F
+#define   S_008670_CE_WAITING_ON_BUFFER_DATA(x)                       (((x) & 0x1) << 10)
+#define   G_008670_CE_WAITING_ON_BUFFER_DATA(x)                       (((x) >> 10) & 0x1)
+#define   C_008670_CE_WAITING_ON_BUFFER_DATA                          0xFFFFFBFF
+#define   S_008670_CE_WAITING_ON_CE_BUFFER_FLAG(x)                    (((x) & 0x1) << 11)
+#define   G_008670_CE_WAITING_ON_CE_BUFFER_FLAG(x)                    (((x) >> 11) & 0x1)
+#define   C_008670_CE_WAITING_ON_CE_BUFFER_FLAG                       0xFFFFF7FF
+#define   S_008670_CE_WAITING_ON_DE_COUNTER(x)                        (((x) & 0x1) << 12)
+#define   G_008670_CE_WAITING_ON_DE_COUNTER(x)                        (((x) >> 12) & 0x1)
+#define   C_008670_CE_WAITING_ON_DE_COUNTER                           0xFFFFEFFF
+#define   S_008670_CE_WAITING_ON_DE_COUNTER_UNDERFLOW(x)              (((x) & 0x1) << 13)
+#define   G_008670_CE_WAITING_ON_DE_COUNTER_UNDERFLOW(x)              (((x) >> 13) & 0x1)
+#define   C_008670_CE_WAITING_ON_DE_COUNTER_UNDERFLOW                 0xFFFFDFFF
+#define   S_008670_TCIU_WAITING_ON_FREE(x)                            (((x) & 0x1) << 14)
+#define   G_008670_TCIU_WAITING_ON_FREE(x)                            (((x) >> 14) & 0x1)
+#define   C_008670_TCIU_WAITING_ON_FREE                               0xFFFFBFFF
+#define   S_008670_TCIU_WAITING_ON_TAGS(x)                            (((x) & 0x1) << 15)
+#define   G_008670_TCIU_WAITING_ON_TAGS(x)                            (((x) >> 15) & 0x1)
+#define   C_008670_TCIU_WAITING_ON_TAGS                               0xFFFF7FFF
+#define   S_008670_CE_STALLED_ON_TC_WR_CONFIRM(x)                     (((x) & 0x1) << 16)
+#define   G_008670_CE_STALLED_ON_TC_WR_CONFIRM(x)                     (((x) >> 16) & 0x1)
+#define   C_008670_CE_STALLED_ON_TC_WR_CONFIRM                        0xFFFEFFFF
+#define   S_008670_CE_STALLED_ON_ATOMIC_RTN_DATA(x)                   (((x) & 0x1) << 17)
+#define   G_008670_CE_STALLED_ON_ATOMIC_RTN_DATA(x)                   (((x) >> 17) & 0x1)
+#define   C_008670_CE_STALLED_ON_ATOMIC_RTN_DATA                      0xFFFDFFFF
+#define   S_008670_ATCL2IU_WAITING_ON_FREE(x)                         (((x) & 0x1) << 18)
+#define   G_008670_ATCL2IU_WAITING_ON_FREE(x)                         (((x) >> 18) & 0x1)
+#define   C_008670_ATCL2IU_WAITING_ON_FREE                            0xFFFBFFFF
+#define   S_008670_ATCL2IU_WAITING_ON_TAGS(x)                         (((x) & 0x1) << 19)
+#define   G_008670_ATCL2IU_WAITING_ON_TAGS(x)                         (((x) >> 19) & 0x1)
+#define   C_008670_ATCL2IU_WAITING_ON_TAGS                            0xFFF7FFFF
+#define   S_008670_ATCL1_WAITING_ON_TRANS(x)                          (((x) & 0x1) << 20)
+#define   G_008670_ATCL1_WAITING_ON_TRANS(x)                          (((x) >> 20) & 0x1)
+#define   C_008670_ATCL1_WAITING_ON_TRANS                             0xFFEFFFFF
+#define R_008674_CP_STALLED_STAT1                                       0x008674
+#define   S_008674_RBIU_TO_DMA_NOT_RDY_TO_RCV(x)                      (((x) & 0x1) << 0)
+#define   G_008674_RBIU_TO_DMA_NOT_RDY_TO_RCV(x)                      (((x) >> 0) & 0x1)
+#define   C_008674_RBIU_TO_DMA_NOT_RDY_TO_RCV                         0xFFFFFFFE
+#define   S_008674_RBIU_TO_SEM_NOT_RDY_TO_RCV(x)                      (((x) & 0x1) << 2)
+#define   G_008674_RBIU_TO_SEM_NOT_RDY_TO_RCV(x)                      (((x) >> 2) & 0x1)
+#define   C_008674_RBIU_TO_SEM_NOT_RDY_TO_RCV                         0xFFFFFFFB
+#define   S_008674_RBIU_TO_MEMWR_NOT_RDY_TO_RCV(x)                    (((x) & 0x1) << 4)
+#define   G_008674_RBIU_TO_MEMWR_NOT_RDY_TO_RCV(x)                    (((x) >> 4) & 0x1)
+#define   C_008674_RBIU_TO_MEMWR_NOT_RDY_TO_RCV                       0xFFFFFFEF
+#define   S_008674_ME_HAS_ACTIVE_CE_BUFFER_FLAG(x)                    (((x) & 0x1) << 10)
+#define   G_008674_ME_HAS_ACTIVE_CE_BUFFER_FLAG(x)                    (((x) >> 10) & 0x1)
+#define   C_008674_ME_HAS_ACTIVE_CE_BUFFER_FLAG                       0xFFFFFBFF
+#define   S_008674_ME_HAS_ACTIVE_DE_BUFFER_FLAG(x)                    (((x) & 0x1) << 11)
+#define   G_008674_ME_HAS_ACTIVE_DE_BUFFER_FLAG(x)                    (((x) >> 11) & 0x1)
+#define   C_008674_ME_HAS_ACTIVE_DE_BUFFER_FLAG                       0xFFFFF7FF
+#define   S_008674_ME_STALLED_ON_TC_WR_CONFIRM(x)                     (((x) & 0x1) << 12)
+#define   G_008674_ME_STALLED_ON_TC_WR_CONFIRM(x)                     (((x) >> 12) & 0x1)
+#define   C_008674_ME_STALLED_ON_TC_WR_CONFIRM                        0xFFFFEFFF
+#define   S_008674_ME_STALLED_ON_ATOMIC_RTN_DATA(x)                   (((x) & 0x1) << 13)
+#define   G_008674_ME_STALLED_ON_ATOMIC_RTN_DATA(x)                   (((x) >> 13) & 0x1)
+#define   C_008674_ME_STALLED_ON_ATOMIC_RTN_DATA                      0xFFFFDFFF
+#define   S_008674_ME_WAITING_ON_TC_READ_DATA(x)                      (((x) & 0x1) << 14)
+#define   G_008674_ME_WAITING_ON_TC_READ_DATA(x)                      (((x) >> 14) & 0x1)
+#define   C_008674_ME_WAITING_ON_TC_READ_DATA                         0xFFFFBFFF
+#define   S_008674_ME_WAITING_ON_REG_READ_DATA(x)                     (((x) & 0x1) << 15)
+#define   G_008674_ME_WAITING_ON_REG_READ_DATA(x)                     (((x) >> 15) & 0x1)
+#define   C_008674_ME_WAITING_ON_REG_READ_DATA                        0xFFFF7FFF
+#define   S_008674_RCIU_WAITING_ON_GDS_FREE(x)                        (((x) & 0x1) << 23)
+#define   G_008674_RCIU_WAITING_ON_GDS_FREE(x)                        (((x) >> 23) & 0x1)
+#define   C_008674_RCIU_WAITING_ON_GDS_FREE                           0xFF7FFFFF
+#define   S_008674_RCIU_WAITING_ON_GRBM_FREE(x)                       (((x) & 0x1) << 24)
+#define   G_008674_RCIU_WAITING_ON_GRBM_FREE(x)                       (((x) >> 24) & 0x1)
+#define   C_008674_RCIU_WAITING_ON_GRBM_FREE                          0xFEFFFFFF
+#define   S_008674_RCIU_WAITING_ON_VGT_FREE(x)                        (((x) & 0x1) << 25)
+#define   G_008674_RCIU_WAITING_ON_VGT_FREE(x)                        (((x) >> 25) & 0x1)
+#define   C_008674_RCIU_WAITING_ON_VGT_FREE                           0xFDFFFFFF
+#define   S_008674_RCIU_STALLED_ON_ME_READ(x)                         (((x) & 0x1) << 26)
+#define   G_008674_RCIU_STALLED_ON_ME_READ(x)                         (((x) >> 26) & 0x1)
+#define   C_008674_RCIU_STALLED_ON_ME_READ                            0xFBFFFFFF
+#define   S_008674_RCIU_STALLED_ON_DMA_READ(x)                        (((x) & 0x1) << 27)
+#define   G_008674_RCIU_STALLED_ON_DMA_READ(x)                        (((x) >> 27) & 0x1)
+#define   C_008674_RCIU_STALLED_ON_DMA_READ                           0xF7FFFFFF
+#define   S_008674_RCIU_STALLED_ON_APPEND_READ(x)                     (((x) & 0x1) << 28)
+#define   G_008674_RCIU_STALLED_ON_APPEND_READ(x)                     (((x) >> 28) & 0x1)
+#define   C_008674_RCIU_STALLED_ON_APPEND_READ                        0xEFFFFFFF
+#define   S_008674_RCIU_HALTED_BY_REG_VIOLATION(x)                    (((x) & 0x1) << 29)
+#define   G_008674_RCIU_HALTED_BY_REG_VIOLATION(x)                    (((x) >> 29) & 0x1)
+#define   C_008674_RCIU_HALTED_BY_REG_VIOLATION                       0xDFFFFFFF
+#define R_008678_CP_STALLED_STAT2                                       0x008678
+#define   S_008678_PFP_TO_CSF_NOT_RDY_TO_RCV(x)                       (((x) & 0x1) << 0)
+#define   G_008678_PFP_TO_CSF_NOT_RDY_TO_RCV(x)                       (((x) >> 0) & 0x1)
+#define   C_008678_PFP_TO_CSF_NOT_RDY_TO_RCV                          0xFFFFFFFE
+#define   S_008678_PFP_TO_MEQ_NOT_RDY_TO_RCV(x)                       (((x) & 0x1) << 1)
+#define   G_008678_PFP_TO_MEQ_NOT_RDY_TO_RCV(x)                       (((x) >> 1) & 0x1)
+#define   C_008678_PFP_TO_MEQ_NOT_RDY_TO_RCV                          0xFFFFFFFD
+#define   S_008678_PFP_TO_RCIU_NOT_RDY_TO_RCV(x)                      (((x) & 0x1) << 2)
+#define   G_008678_PFP_TO_RCIU_NOT_RDY_TO_RCV(x)                      (((x) >> 2) & 0x1)
+#define   C_008678_PFP_TO_RCIU_NOT_RDY_TO_RCV                         0xFFFFFFFB
+#define   S_008678_PFP_TO_VGT_WRITES_PENDING(x)                       (((x) & 0x1) << 4)
+#define   G_008678_PFP_TO_VGT_WRITES_PENDING(x)                       (((x) >> 4) & 0x1)
+#define   C_008678_PFP_TO_VGT_WRITES_PENDING                          0xFFFFFFEF
+#define   S_008678_PFP_RCIU_READ_PENDING(x)                           (((x) & 0x1) << 5)
+#define   G_008678_PFP_RCIU_READ_PENDING(x)                           (((x) >> 5) & 0x1)
+#define   C_008678_PFP_RCIU_READ_PENDING                              0xFFFFFFDF
+#define   S_008678_PFP_WAITING_ON_BUFFER_DATA(x)                      (((x) & 0x1) << 8)
+#define   G_008678_PFP_WAITING_ON_BUFFER_DATA(x)                      (((x) >> 8) & 0x1)
+#define   C_008678_PFP_WAITING_ON_BUFFER_DATA                         0xFFFFFEFF
+#define   S_008678_ME_WAIT_ON_CE_COUNTER(x)                           (((x) & 0x1) << 9)
+#define   G_008678_ME_WAIT_ON_CE_COUNTER(x)                           (((x) >> 9) & 0x1)
+#define   C_008678_ME_WAIT_ON_CE_COUNTER                              0xFFFFFDFF
+#define   S_008678_ME_WAIT_ON_AVAIL_BUFFER(x)                         (((x) & 0x1) << 10)
+#define   G_008678_ME_WAIT_ON_AVAIL_BUFFER(x)                         (((x) >> 10) & 0x1)
+#define   C_008678_ME_WAIT_ON_AVAIL_BUFFER                            0xFFFFFBFF
+#define   S_008678_GFX_CNTX_NOT_AVAIL_TO_ME(x)                        (((x) & 0x1) << 11)
+#define   G_008678_GFX_CNTX_NOT_AVAIL_TO_ME(x)                        (((x) >> 11) & 0x1)
+#define   C_008678_GFX_CNTX_NOT_AVAIL_TO_ME                           0xFFFFF7FF
+#define   S_008678_ME_RCIU_NOT_RDY_TO_RCV(x)                          (((x) & 0x1) << 12)
+#define   G_008678_ME_RCIU_NOT_RDY_TO_RCV(x)                          (((x) >> 12) & 0x1)
+#define   C_008678_ME_RCIU_NOT_RDY_TO_RCV                             0xFFFFEFFF
+#define   S_008678_ME_TO_CONST_NOT_RDY_TO_RCV(x)                      (((x) & 0x1) << 13)
+#define   G_008678_ME_TO_CONST_NOT_RDY_TO_RCV(x)                      (((x) >> 13) & 0x1)
+#define   C_008678_ME_TO_CONST_NOT_RDY_TO_RCV                         0xFFFFDFFF
+#define   S_008678_ME_WAITING_DATA_FROM_PFP(x)                        (((x) & 0x1) << 14)
+#define   G_008678_ME_WAITING_DATA_FROM_PFP(x)                        (((x) >> 14) & 0x1)
+#define   C_008678_ME_WAITING_DATA_FROM_PFP                           0xFFFFBFFF
+#define   S_008678_ME_WAITING_ON_PARTIAL_FLUSH(x)                     (((x) & 0x1) << 15)
+#define   G_008678_ME_WAITING_ON_PARTIAL_FLUSH(x)                     (((x) >> 15) & 0x1)
+#define   C_008678_ME_WAITING_ON_PARTIAL_FLUSH                        0xFFFF7FFF
+#define   S_008678_MEQ_TO_ME_NOT_RDY_TO_RCV(x)                        (((x) & 0x1) << 16)
+#define   G_008678_MEQ_TO_ME_NOT_RDY_TO_RCV(x)                        (((x) >> 16) & 0x1)
+#define   C_008678_MEQ_TO_ME_NOT_RDY_TO_RCV                           0xFFFEFFFF
+#define   S_008678_STQ_TO_ME_NOT_RDY_TO_RCV(x)                        (((x) & 0x1) << 17)
+#define   G_008678_STQ_TO_ME_NOT_RDY_TO_RCV(x)                        (((x) >> 17) & 0x1)
+#define   C_008678_STQ_TO_ME_NOT_RDY_TO_RCV                           0xFFFDFFFF
+#define   S_008678_ME_WAITING_DATA_FROM_STQ(x)                        (((x) & 0x1) << 18)
+#define   G_008678_ME_WAITING_DATA_FROM_STQ(x)                        (((x) >> 18) & 0x1)
+#define   C_008678_ME_WAITING_DATA_FROM_STQ                           0xFFFBFFFF
+#define   S_008678_PFP_STALLED_ON_TC_WR_CONFIRM(x)                    (((x) & 0x1) << 19)
+#define   G_008678_PFP_STALLED_ON_TC_WR_CONFIRM(x)                    (((x) >> 19) & 0x1)
+#define   C_008678_PFP_STALLED_ON_TC_WR_CONFIRM                       0xFFF7FFFF
+#define   S_008678_PFP_STALLED_ON_ATOMIC_RTN_DATA(x)                  (((x) & 0x1) << 20)
+#define   G_008678_PFP_STALLED_ON_ATOMIC_RTN_DATA(x)                  (((x) >> 20) & 0x1)
+#define   C_008678_PFP_STALLED_ON_ATOMIC_RTN_DATA                     0xFFEFFFFF
+#define   S_008678_EOPD_FIFO_NEEDS_SC_EOP_DONE(x)                     (((x) & 0x1) << 21)
+#define   G_008678_EOPD_FIFO_NEEDS_SC_EOP_DONE(x)                     (((x) >> 21) & 0x1)
+#define   C_008678_EOPD_FIFO_NEEDS_SC_EOP_DONE                        0xFFDFFFFF
+#define   S_008678_EOPD_FIFO_NEEDS_WR_CONFIRM(x)                      (((x) & 0x1) << 22)
+#define   G_008678_EOPD_FIFO_NEEDS_WR_CONFIRM(x)                      (((x) >> 22) & 0x1)
+#define   C_008678_EOPD_FIFO_NEEDS_WR_CONFIRM                         0xFFBFFFFF
+#define   S_008678_STRMO_WR_OF_PRIM_DATA_PENDING(x)                   (((x) & 0x1) << 23)
+#define   G_008678_STRMO_WR_OF_PRIM_DATA_PENDING(x)                   (((x) >> 23) & 0x1)
+#define   C_008678_STRMO_WR_OF_PRIM_DATA_PENDING                      0xFF7FFFFF
+#define   S_008678_PIPE_STATS_WR_DATA_PENDING(x)                      (((x) & 0x1) << 24)
+#define   G_008678_PIPE_STATS_WR_DATA_PENDING(x)                      (((x) >> 24) & 0x1)
+#define   C_008678_PIPE_STATS_WR_DATA_PENDING                         0xFEFFFFFF
+#define   S_008678_APPEND_RDY_WAIT_ON_CS_DONE(x)                      (((x) & 0x1) << 25)
+#define   G_008678_APPEND_RDY_WAIT_ON_CS_DONE(x)                      (((x) >> 25) & 0x1)
+#define   C_008678_APPEND_RDY_WAIT_ON_CS_DONE                         0xFDFFFFFF
+#define   S_008678_APPEND_RDY_WAIT_ON_PS_DONE(x)                      (((x) & 0x1) << 26)
+#define   G_008678_APPEND_RDY_WAIT_ON_PS_DONE(x)                      (((x) >> 26) & 0x1)
+#define   C_008678_APPEND_RDY_WAIT_ON_PS_DONE                         0xFBFFFFFF
+#define   S_008678_APPEND_WAIT_ON_WR_CONFIRM(x)                       (((x) & 0x1) << 27)
+#define   G_008678_APPEND_WAIT_ON_WR_CONFIRM(x)                       (((x) >> 27) & 0x1)
+#define   C_008678_APPEND_WAIT_ON_WR_CONFIRM                          0xF7FFFFFF
+#define   S_008678_APPEND_ACTIVE_PARTITION(x)                         (((x) & 0x1) << 28)
+#define   G_008678_APPEND_ACTIVE_PARTITION(x)                         (((x) >> 28) & 0x1)
+#define   C_008678_APPEND_ACTIVE_PARTITION                            0xEFFFFFFF
+#define   S_008678_APPEND_WAITING_TO_SEND_MEMWRITE(x)                 (((x) & 0x1) << 29)
+#define   G_008678_APPEND_WAITING_TO_SEND_MEMWRITE(x)                 (((x) >> 29) & 0x1)
+#define   C_008678_APPEND_WAITING_TO_SEND_MEMWRITE                    0xDFFFFFFF
+#define   S_008678_SURF_SYNC_NEEDS_IDLE_CNTXS(x)                      (((x) & 0x1) << 30)
+#define   G_008678_SURF_SYNC_NEEDS_IDLE_CNTXS(x)                      (((x) >> 30) & 0x1)
+#define   C_008678_SURF_SYNC_NEEDS_IDLE_CNTXS                         0xBFFFFFFF
+#define   S_008678_SURF_SYNC_NEEDS_ALL_CLEAN(x)                       (((x) & 0x1) << 31)
+#define   G_008678_SURF_SYNC_NEEDS_ALL_CLEAN(x)                       (((x) >> 31) & 0x1)
+#define   C_008678_SURF_SYNC_NEEDS_ALL_CLEAN                          0x7FFFFFFF
+#define R_008680_CP_STAT                                                0x008680
+#define   S_008680_ROQ_RING_BUSY(x)                                   (((x) & 0x1) << 9)
+#define   G_008680_ROQ_RING_BUSY(x)                                   (((x) >> 9) & 0x1)
+#define   C_008680_ROQ_RING_BUSY                                      0xFFFFFDFF
+#define   S_008680_ROQ_INDIRECT1_BUSY(x)                              (((x) & 0x1) << 10)
+#define   G_008680_ROQ_INDIRECT1_BUSY(x)                              (((x) >> 10) & 0x1)
+#define   C_008680_ROQ_INDIRECT1_BUSY                                 0xFFFFFBFF
+#define   S_008680_ROQ_INDIRECT2_BUSY(x)                              (((x) & 0x1) << 11)
+#define   G_008680_ROQ_INDIRECT2_BUSY(x)                              (((x) >> 11) & 0x1)
+#define   C_008680_ROQ_INDIRECT2_BUSY                                 0xFFFFF7FF
+#define   S_008680_ROQ_STATE_BUSY(x)                                  (((x) & 0x1) << 12)
+#define   G_008680_ROQ_STATE_BUSY(x)                                  (((x) >> 12) & 0x1)
+#define   C_008680_ROQ_STATE_BUSY                                     0xFFFFEFFF
+#define   S_008680_DC_BUSY(x)                                         (((x) & 0x1) << 13)
+#define   G_008680_DC_BUSY(x)                                         (((x) >> 13) & 0x1)
+#define   C_008680_DC_BUSY                                            0xFFFFDFFF
+#define   S_008680_ATCL2IU_BUSY(x)                                    (((x) & 0x1) << 14)
+#define   G_008680_ATCL2IU_BUSY(x)                                    (((x) >> 14) & 0x1)
+#define   C_008680_ATCL2IU_BUSY                                       0xFFFFBFFF
+#define   S_008680_PFP_BUSY(x)                                        (((x) & 0x1) << 15)
+#define   G_008680_PFP_BUSY(x)                                        (((x) >> 15) & 0x1)
+#define   C_008680_PFP_BUSY                                           0xFFFF7FFF
+#define   S_008680_MEQ_BUSY(x)                                        (((x) & 0x1) << 16)
+#define   G_008680_MEQ_BUSY(x)                                        (((x) >> 16) & 0x1)
+#define   C_008680_MEQ_BUSY                                           0xFFFEFFFF
+#define   S_008680_ME_BUSY(x)                                         (((x) & 0x1) << 17)
+#define   G_008680_ME_BUSY(x)                                         (((x) >> 17) & 0x1)
+#define   C_008680_ME_BUSY                                            0xFFFDFFFF
+#define   S_008680_QUERY_BUSY(x)                                      (((x) & 0x1) << 18)
+#define   G_008680_QUERY_BUSY(x)                                      (((x) >> 18) & 0x1)
+#define   C_008680_QUERY_BUSY                                         0xFFFBFFFF
+#define   S_008680_SEMAPHORE_BUSY(x)                                  (((x) & 0x1) << 19)
+#define   G_008680_SEMAPHORE_BUSY(x)                                  (((x) >> 19) & 0x1)
+#define   C_008680_SEMAPHORE_BUSY                                     0xFFF7FFFF
+#define   S_008680_INTERRUPT_BUSY(x)                                  (((x) & 0x1) << 20)
+#define   G_008680_INTERRUPT_BUSY(x)                                  (((x) >> 20) & 0x1)
+#define   C_008680_INTERRUPT_BUSY                                     0xFFEFFFFF
+#define   S_008680_SURFACE_SYNC_BUSY(x)                               (((x) & 0x1) << 21)
+#define   G_008680_SURFACE_SYNC_BUSY(x)                               (((x) >> 21) & 0x1)
+#define   C_008680_SURFACE_SYNC_BUSY                                  0xFFDFFFFF
+#define   S_008680_DMA_BUSY(x)                                        (((x) & 0x1) << 22)
+#define   G_008680_DMA_BUSY(x)                                        (((x) >> 22) & 0x1)
+#define   C_008680_DMA_BUSY                                           0xFFBFFFFF
+#define   S_008680_RCIU_BUSY(x)                                       (((x) & 0x1) << 23)
+#define   G_008680_RCIU_BUSY(x)                                       (((x) >> 23) & 0x1)
+#define   C_008680_RCIU_BUSY                                          0xFF7FFFFF
+#define   S_008680_SCRATCH_RAM_BUSY(x)                                (((x) & 0x1) << 24)
+#define   G_008680_SCRATCH_RAM_BUSY(x)                                (((x) >> 24) & 0x1)
+#define   C_008680_SCRATCH_RAM_BUSY                                   0xFEFFFFFF
+#define   S_008680_CPC_CPG_BUSY(x)                                    (((x) & 0x1) << 25)
+#define   G_008680_CPC_CPG_BUSY(x)                                    (((x) >> 25) & 0x1)
+#define   C_008680_CPC_CPG_BUSY                                       0xFDFFFFFF
+#define   S_008680_CE_BUSY(x)                                         (((x) & 0x1) << 26)
+#define   G_008680_CE_BUSY(x)                                         (((x) >> 26) & 0x1)
+#define   C_008680_CE_BUSY                                            0xFBFFFFFF
+#define   S_008680_TCIU_BUSY(x)                                       (((x) & 0x1) << 27)
+#define   G_008680_TCIU_BUSY(x)                                       (((x) >> 27) & 0x1)
+#define   C_008680_TCIU_BUSY                                          0xF7FFFFFF
+#define   S_008680_ROQ_CE_RING_BUSY(x)                                (((x) & 0x1) << 28)
+#define   G_008680_ROQ_CE_RING_BUSY(x)                                (((x) >> 28) & 0x1)
+#define   C_008680_ROQ_CE_RING_BUSY                                   0xEFFFFFFF
+#define   S_008680_ROQ_CE_INDIRECT1_BUSY(x)                           (((x) & 0x1) << 29)
+#define   G_008680_ROQ_CE_INDIRECT1_BUSY(x)                           (((x) >> 29) & 0x1)
+#define   C_008680_ROQ_CE_INDIRECT1_BUSY                              0xDFFFFFFF
+#define   S_008680_ROQ_CE_INDIRECT2_BUSY(x)                           (((x) & 0x1) << 30)
+#define   G_008680_ROQ_CE_INDIRECT2_BUSY(x)                           (((x) >> 30) & 0x1)
+#define   C_008680_ROQ_CE_INDIRECT2_BUSY                              0xBFFFFFFF
+#define   S_008680_CP_BUSY(x)                                         (((x) & 0x1) << 31)
+#define   G_008680_CP_BUSY(x)                                         (((x) >> 31) & 0x1)
+#define   C_008680_CP_BUSY                                            0x7FFFFFFF
+/* CIK */
+#define R_030800_GRBM_GFX_INDEX                                         0x030800
+#define   S_030800_INSTANCE_INDEX(x)                                  (((x) & 0xFF) << 0)
+#define   G_030800_INSTANCE_INDEX(x)                                  (((x) >> 0) & 0xFF)
+#define   C_030800_INSTANCE_INDEX                                     0xFFFFFF00
+#define   S_030800_SH_INDEX(x)                                        (((x) & 0xFF) << 8)
+#define   G_030800_SH_INDEX(x)                                        (((x) >> 8) & 0xFF)
+#define   C_030800_SH_INDEX                                           0xFFFF00FF
+#define   S_030800_SE_INDEX(x)                                        (((x) & 0xFF) << 16)
+#define   G_030800_SE_INDEX(x)                                        (((x) >> 16) & 0xFF)
+#define   C_030800_SE_INDEX                                           0xFF00FFFF
+#define   S_030800_SH_BROADCAST_WRITES(x)                             (((x) & 0x1) << 29)
+#define   G_030800_SH_BROADCAST_WRITES(x)                             (((x) >> 29) & 0x1)
+#define   C_030800_SH_BROADCAST_WRITES                                0xDFFFFFFF
+#define   S_030800_INSTANCE_BROADCAST_WRITES(x)                       (((x) & 0x1) << 30)
+#define   G_030800_INSTANCE_BROADCAST_WRITES(x)                       (((x) >> 30) & 0x1)
+#define   C_030800_INSTANCE_BROADCAST_WRITES                          0xBFFFFFFF
+#define   S_030800_SE_BROADCAST_WRITES(x)                             (((x) & 0x1) << 31)
+#define   G_030800_SE_BROADCAST_WRITES(x)                             (((x) >> 31) & 0x1)
+#define   C_030800_SE_BROADCAST_WRITES                                0x7FFFFFFF
+#define R_030900_VGT_ESGS_RING_SIZE                                     0x030900
+#define R_030904_VGT_GSVS_RING_SIZE                                     0x030904
 #define R_030908_VGT_PRIMITIVE_TYPE                                     0x030908
 #define   S_030908_PRIM_TYPE(x)                                       (((x) & 0x3F) << 0)
 #define   G_030908_PRIM_TYPE(x)                                       (((x) >> 0) & 0x3F)
@@ -530,6 +1736,34 @@
 #define   S_030A04_CURRENT_COUNT(x)                                   (((x) & 0xFF) << 8)
 #define   G_030A04_CURRENT_COUNT(x)                                   (((x) >> 8) & 0xFF)
 #define   C_030A04_CURRENT_COUNT                                      0xFFFF00FF
+#define R_030A10_PA_SC_SCREEN_EXTENT_MIN_0                              0x030A10
+#define   S_030A10_X(x)                                               (((x) & 0xFFFF) << 0)
+#define   G_030A10_X(x)                                               (((x) >> 0) & 0xFFFF)
+#define   C_030A10_X                                                  0xFFFF0000
+#define   S_030A10_Y(x)                                               (((x) & 0xFFFF) << 16)
+#define   G_030A10_Y(x)                                               (((x) >> 16) & 0xFFFF)
+#define   C_030A10_Y                                                  0x0000FFFF
+#define R_030A14_PA_SC_SCREEN_EXTENT_MAX_0                              0x030A14
+#define   S_030A14_X(x)                                               (((x) & 0xFFFF) << 0)
+#define   G_030A14_X(x)                                               (((x) >> 0) & 0xFFFF)
+#define   C_030A14_X                                                  0xFFFF0000
+#define   S_030A14_Y(x)                                               (((x) & 0xFFFF) << 16)
+#define   G_030A14_Y(x)                                               (((x) >> 16) & 0xFFFF)
+#define   C_030A14_Y                                                  0x0000FFFF
+#define R_030A18_PA_SC_SCREEN_EXTENT_MIN_1                              0x030A18
+#define   S_030A18_X(x)                                               (((x) & 0xFFFF) << 0)
+#define   G_030A18_X(x)                                               (((x) >> 0) & 0xFFFF)
+#define   C_030A18_X                                                  0xFFFF0000
+#define   S_030A18_Y(x)                                               (((x) & 0xFFFF) << 16)
+#define   G_030A18_Y(x)                                               (((x) >> 16) & 0xFFFF)
+#define   C_030A18_Y                                                  0x0000FFFF
+#define R_030A2C_PA_SC_SCREEN_EXTENT_MAX_1                              0x030A2C
+#define   S_030A2C_X(x)                                               (((x) & 0xFFFF) << 0)
+#define   G_030A2C_X(x)                                               (((x) >> 0) & 0xFFFF)
+#define   C_030A2C_X                                                  0xFFFF0000
+#define   S_030A2C_Y(x)                                               (((x) & 0xFFFF) << 16)
+#define   G_030A2C_Y(x)                                               (((x) >> 16) & 0xFFFF)
+#define   C_030A2C_Y                                                  0x0000FFFF
 /*     */
 #define R_008BF0_PA_SC_ENHANCE                                          0x008BF0
 #define   S_008BF0_ENABLE_PA_SC_OUT_OF_ORDER(x)                       (((x) & 0x1) << 0)
@@ -608,6 +1842,32 @@
 #define     V_008DFC_SQ_VGPR                                        0x00
 /*     */
 #define R_008DFC_SQ_INST                                                0x008DFC
+#define R_030D20_SQC_CACHES                                             0x030D20
+#define   S_030D20_TARGET_INST(x)                                     (((x) & 0x1) << 0)
+#define   G_030D20_TARGET_INST(x)                                     (((x) >> 0) & 0x1)
+#define   C_030D20_TARGET_INST                                        0xFFFFFFFE
+#define   S_030D20_TARGET_DATA(x)                                     (((x) & 0x1) << 1)
+#define   G_030D20_TARGET_DATA(x)                                     (((x) >> 1) & 0x1)
+#define   C_030D20_TARGET_DATA                                        0xFFFFFFFD
+#define   S_030D20_INVALIDATE(x)                                      (((x) & 0x1) << 2)
+#define   G_030D20_INVALIDATE(x)                                      (((x) >> 2) & 0x1)
+#define   C_030D20_INVALIDATE                                         0xFFFFFFFB
+#define   S_030D20_WRITEBACK(x)                                       (((x) & 0x1) << 3)
+#define   G_030D20_WRITEBACK(x)                                       (((x) >> 3) & 0x1)
+#define   C_030D20_WRITEBACK                                          0xFFFFFFF7
+#define   S_030D20_VOL(x)                                             (((x) & 0x1) << 4)
+#define   G_030D20_VOL(x)                                             (((x) >> 4) & 0x1)
+#define   C_030D20_VOL                                                0xFFFFFFEF
+#define   S_030D20_COMPLETE(x)                                        (((x) & 0x1) << 16)
+#define   G_030D20_COMPLETE(x)                                        (((x) >> 16) & 0x1)
+#define   C_030D20_COMPLETE                                           0xFFFEFFFF
+#define R_030D24_SQC_WRITEBACK                                          0x030D24
+#define   S_030D24_DWB(x)                                             (((x) & 0x1) << 0)
+#define   G_030D24_DWB(x)                                             (((x) >> 0) & 0x1)
+#define   C_030D24_DWB                                                0xFFFFFFFE
+#define   S_030D24_DIRTY(x)                                           (((x) & 0x1) << 1)
+#define   G_030D24_DIRTY(x)                                           (((x) >> 1) & 0x1)
+#define   C_030D24_DIRTY                                              0xFFFFFFFD
 #define R_008DFC_SQ_VOP1                                                0x008DFC
 #define   S_008DFC_SRC0(x)                                            (((x) & 0x1FF) << 0)
 #define   G_008DFC_SRC0(x)                                            (((x) >> 0) & 0x1FF)
@@ -3740,7 +5000,17 @@
 #define   C_008DFC_ENCODING                                           0x03FFFFFF
 #define     V_008DFC_SQ_ENC_MUBUF_FIELD                             0x38
 #endif
+#define R_030E00_TA_CS_BC_BASE_ADDR                                     0x030E00
+#define R_030E04_TA_CS_BC_BASE_ADDR_HI                                  0x030E04
+#define   S_030E04_ADDRESS(x)                                         (((x) & 0xFF) << 0)
+#define   G_030E04_ADDRESS(x)                                         (((x) >> 0) & 0xFF)
+#define   C_030E04_ADDRESS                                            0xFFFFFF00
+#define R_030F00_DB_OCCLUSION_COUNT0_LOW                                0x030F00
 #define R_008F00_SQ_BUF_RSRC_WORD0                                      0x008F00
+#define R_030F04_DB_OCCLUSION_COUNT0_HI                                 0x030F04
+#define   S_030F04_COUNT_HI(x)                                        (((x) & 0x7FFFFFFF) << 0)
+#define   G_030F04_COUNT_HI(x)                                        (((x) >> 0) & 0x7FFFFFFF)
+#define   C_030F04_COUNT_HI                                           0x80000000
 #define R_008F04_SQ_BUF_RSRC_WORD1                                      0x008F04
 #define   S_008F04_BASE_ADDRESS_HI(x)                                 (((x) & 0xFFFF) << 0)
 #define   G_008F04_BASE_ADDRESS_HI(x)                                 (((x) >> 0) & 0xFFFF)
@@ -3754,7 +5024,12 @@
 #define   S_008F04_SWIZZLE_ENABLE(x)                                  (((x) & 0x1) << 31)
 #define   G_008F04_SWIZZLE_ENABLE(x)                                  (((x) >> 31) & 0x1)
 #define   C_008F04_SWIZZLE_ENABLE                                     0x7FFFFFFF
+#define R_030F08_DB_OCCLUSION_COUNT1_LOW                                0x030F08
 #define R_008F08_SQ_BUF_RSRC_WORD2                                      0x008F08
+#define R_030F0C_DB_OCCLUSION_COUNT1_HI                                 0x030F0C
+#define   S_030F0C_COUNT_HI(x)                                        (((x) & 0x7FFFFFFF) << 0)
+#define   G_030F0C_COUNT_HI(x)                                        (((x) >> 0) & 0x7FFFFFFF)
+#define   C_030F0C_COUNT_HI                                           0x80000000
 #define R_008F0C_SQ_BUF_RSRC_WORD3                                      0x008F0C
 #define   S_008F0C_DST_SEL_X(x)                                       (((x) & 0x07) << 0)
 #define   G_008F0C_DST_SEL_X(x)                                       (((x) >> 0) & 0x07)
@@ -3862,7 +5137,12 @@
 #define     V_008F0C_SQ_RSRC_BUF_RSVD_1                             0x01
 #define     V_008F0C_SQ_RSRC_BUF_RSVD_2                             0x02
 #define     V_008F0C_SQ_RSRC_BUF_RSVD_3                             0x03
+#define R_030F10_DB_OCCLUSION_COUNT2_LOW                                0x030F10
 #define R_008F10_SQ_IMG_RSRC_WORD0                                      0x008F10
+#define R_030F14_DB_OCCLUSION_COUNT2_HI                                 0x030F14
+#define   S_030F14_COUNT_HI(x)                                        (((x) & 0x7FFFFFFF) << 0)
+#define   G_030F14_COUNT_HI(x)                                        (((x) >> 0) & 0x7FFFFFFF)
+#define   C_030F14_COUNT_HI                                           0x80000000
 #define R_008F14_SQ_IMG_RSRC_WORD1                                      0x008F14
 #define   S_008F14_BASE_ADDRESS_HI(x)                                 (((x) & 0xFF) << 0)
 #define   G_008F14_BASE_ADDRESS_HI(x)                                 (((x) >> 0) & 0xFF)
@@ -3961,6 +5241,7 @@
 #define   G_008F14_MTYPE(x)                                           (((x) >> 30) & 0x03)
 #define   C_008F14_MTYPE                                              0x3FFFFFFF
 /*     */
+#define R_030F18_DB_OCCLUSION_COUNT3_LOW                                0x030F18
 #define R_008F18_SQ_IMG_RSRC_WORD2                                      0x008F18
 #define   S_008F18_WIDTH(x)                                           (((x) & 0x3FFF) << 0)
 #define   G_008F18_WIDTH(x)                                           (((x) >> 0) & 0x3FFF)
@@ -3974,6 +5255,10 @@
 #define   S_008F18_INTERLACED(x)                                      (((x) & 0x1) << 31)
 #define   G_008F18_INTERLACED(x)                                      (((x) >> 31) & 0x1)
 #define   C_008F18_INTERLACED                                         0x7FFFFFFF
+#define R_030F1C_DB_OCCLUSION_COUNT3_HI                                 0x030F1C
+#define   S_030F1C_COUNT_HI(x)                                        (((x) & 0x7FFFFFFF) << 0)
+#define   G_030F1C_COUNT_HI(x)                                        (((x) >> 0) & 0x7FFFFFFF)
+#define   C_030F1C_COUNT_HI                                           0x80000000
 #define R_008F1C_SQ_IMG_RSRC_WORD3                                      0x008F1C
 #define   S_008F1C_DST_SEL_X(x)                                       (((x) & 0x07) << 0)
 #define   G_008F1C_DST_SEL_X(x)                                       (((x) >> 0) & 0x07)
@@ -4084,6 +5369,23 @@
 #define   G_008F28_LOD_HDW_CNT_EN(x)                                  (((x) >> 20) & 0x1)
 #define   C_008F28_LOD_HDW_CNT_EN                                     0xFFEFFFFF
 /*     */
+/* VI */
+#define   S_008F28_COMPRESSION_EN(x)                                  (((x) & 0x1) << 21)
+#define   G_008F28_COMPRESSION_EN(x)                                  (((x) >> 21) & 0x1)
+#define   C_008F28_COMPRESSION_EN                                     0xFFDFFFFF
+#define   S_008F28_ALPHA_IS_ON_MSB(x)                                 (((x) & 0x1) << 22)
+#define   G_008F28_ALPHA_IS_ON_MSB(x)                                 (((x) >> 22) & 0x1)
+#define   C_008F28_ALPHA_IS_ON_MSB                                    0xFFBFFFFF
+#define   S_008F28_COLOR_TRANSFORM(x)                                 (((x) & 0x1) << 23)
+#define   G_008F28_COLOR_TRANSFORM(x)                                 (((x) >> 23) & 0x1)
+#define   C_008F28_COLOR_TRANSFORM                                    0xFF7FFFFF
+#define   S_008F28_LOST_ALPHA_BITS(x)                                 (((x) & 0x0F) << 24)
+#define   G_008F28_LOST_ALPHA_BITS(x)                                 (((x) >> 24) & 0x0F)
+#define   C_008F28_LOST_ALPHA_BITS                                    0xF0FFFFFF
+#define   S_008F28_LOST_COLOR_BITS(x)                                 (((x) & 0x0F) << 28)
+#define   G_008F28_LOST_COLOR_BITS(x)                                 (((x) >> 28) & 0x0F)
+#define   C_008F28_LOST_COLOR_BITS                                    0x0FFFFFFF
+/*    */
 #define R_008F2C_SQ_IMG_RSRC_WORD7                                      0x008F2C
 #define R_008F30_SQ_IMG_SAMP_WORD0                                      0x008F30
 #define   S_008F30_CLAMP_X(x)                                         (((x) & 0x07) << 0)
@@ -4148,6 +5450,11 @@
 #define   S_008F30_FILTER_MODE(x)                                     (((x) & 0x03) << 29)
 #define   G_008F30_FILTER_MODE(x)                                     (((x) >> 29) & 0x03)
 #define   C_008F30_FILTER_MODE                                        0x9FFFFFFF
+/* VI */
+#define   S_008F30_COMPAT_MODE(x)                                     (((x) & 0x1) << 31)
+#define   G_008F30_COMPAT_MODE(x)                                     (((x) >> 31) & 0x1)
+#define   C_008F30_COMPAT_MODE                                        0x7FFFFFFF
+/*    */
 #define R_008F34_SQ_IMG_SAMP_WORD1                                      0x008F34
 #define   S_008F34_MIN_LOD(x)                                         (((x) & 0xFFF) << 0)
 #define   G_008F34_MIN_LOD(x)                                         (((x) >> 0) & 0xFFF)
@@ -4313,6 +5620,11 @@
 #define   G_008F44_OFFSET(x)                                          (((x) >> 0) & 0xFFFFFF)
 #define   C_008F44_OFFSET                                             0xFF000000
 /*     */
+#define R_030FF8_DB_ZPASS_COUNT_LOW                                     0x030FF8
+#define R_030FFC_DB_ZPASS_COUNT_HI                                      0x030FFC
+#define   S_030FFC_COUNT_HI(x)                                        (((x) & 0x7FFFFFFF) << 0)
+#define   G_030FFC_COUNT_HI(x)                                        (((x) >> 0) & 0x7FFFFFFF)
+#define   C_030FFC_COUNT_HI                                           0x80000000
 #define R_009100_SPI_CONFIG_CNTL                                        0x009100
 #define   S_009100_GPR_WRITE_PRIORITY(x)                              (((x) & 0x1FFFFF) << 0)
 #define   G_009100_GPR_WRITE_PRIORITY(x)                              (((x) >> 0) & 0x1FFFFF)
@@ -4437,6 +5749,34 @@
 #define   S_009858_MSAA16_Y(x)                                        (((x) & 0x03) << 18)
 #define   G_009858_MSAA16_Y(x)                                        (((x) >> 18) & 0x03)
 #define   C_009858_MSAA16_Y                                           0xFFF3FFFF
+#define R_0098F8_GB_ADDR_CONFIG                                         0x0098F8
+#define   S_0098F8_NUM_PIPES(x)                                       (((x) & 0x07) << 0)
+#define   G_0098F8_NUM_PIPES(x)                                       (((x) >> 0) & 0x07)
+#define   C_0098F8_NUM_PIPES                                          0xFFFFFFF8
+#define   S_0098F8_PIPE_INTERLEAVE_SIZE(x)                            (((x) & 0x07) << 4)
+#define   G_0098F8_PIPE_INTERLEAVE_SIZE(x)                            (((x) >> 4) & 0x07)
+#define   C_0098F8_PIPE_INTERLEAVE_SIZE                               0xFFFFFF8F
+#define   S_0098F8_BANK_INTERLEAVE_SIZE(x)                            (((x) & 0x07) << 8)
+#define   G_0098F8_BANK_INTERLEAVE_SIZE(x)                            (((x) >> 8) & 0x07)
+#define   C_0098F8_BANK_INTERLEAVE_SIZE                               0xFFFFF8FF
+#define   S_0098F8_NUM_SHADER_ENGINES(x)                              (((x) & 0x03) << 12)
+#define   G_0098F8_NUM_SHADER_ENGINES(x)                              (((x) >> 12) & 0x03)
+#define   C_0098F8_NUM_SHADER_ENGINES                                 0xFFFFCFFF
+#define   S_0098F8_SHADER_ENGINE_TILE_SIZE(x)                         (((x) & 0x07) << 16)
+#define   G_0098F8_SHADER_ENGINE_TILE_SIZE(x)                         (((x) >> 16) & 0x07)
+#define   C_0098F8_SHADER_ENGINE_TILE_SIZE                            0xFFF8FFFF
+#define   S_0098F8_NUM_GPUS(x)                                        (((x) & 0x07) << 20)
+#define   G_0098F8_NUM_GPUS(x)                                        (((x) >> 20) & 0x07)
+#define   C_0098F8_NUM_GPUS                                           0xFF8FFFFF
+#define   S_0098F8_MULTI_GPU_TILE_SIZE(x)                             (((x) & 0x03) << 24)
+#define   G_0098F8_MULTI_GPU_TILE_SIZE(x)                             (((x) >> 24) & 0x03)
+#define   C_0098F8_MULTI_GPU_TILE_SIZE                                0xFCFFFFFF
+#define   S_0098F8_ROW_SIZE(x)                                        (((x) & 0x03) << 28)
+#define   G_0098F8_ROW_SIZE(x)                                        (((x) >> 28) & 0x03)
+#define   C_0098F8_ROW_SIZE                                           0xCFFFFFFF
+#define   S_0098F8_NUM_LOWER_PIPES(x)                                 (((x) & 0x1) << 30)
+#define   G_0098F8_NUM_LOWER_PIPES(x)                                 (((x) >> 30) & 0x1)
+#define   C_0098F8_NUM_LOWER_PIPES                                    0xBFFFFFFF
 #define R_009910_GB_TILE_MODE0                                          0x009910
 #define   S_009910_MICRO_TILE_MODE(x)                                 (((x) & 0x03) << 0)
 #define   G_009910_MICRO_TILE_MODE(x)                                 (((x) >> 0) & 0x03)
@@ -4515,6 +5855,87 @@
 #define     V_009910_ADDR_SURF_4_BANK                               0x01
 #define     V_009910_ADDR_SURF_8_BANK                               0x02
 #define     V_009910_ADDR_SURF_16_BANK                              0x03
+#define   S_009910_MICRO_TILE_MODE_NEW(x)                             (((x) & 0x07) << 22)
+#define   G_009910_MICRO_TILE_MODE_NEW(x)                             (((x) >> 22) & 0x07)
+#define   C_009910_MICRO_TILE_MODE_NEW                                0xFE3FFFFF
+#define     V_009910_ADDR_SURF_DISPLAY_MICRO_TILING                 0x00
+#define     V_009910_ADDR_SURF_THIN_MICRO_TILING                    0x01
+#define     V_009910_ADDR_SURF_DEPTH_MICRO_TILING                   0x02
+#define     V_009910_ADDR_SURF_ROTATED_MICRO_TILING                 0x03
+#define   S_009910_SAMPLE_SPLIT(x)                                    (((x) & 0x03) << 25)
+#define   G_009910_SAMPLE_SPLIT(x)                                    (((x) >> 25) & 0x03)
+#define   C_009910_SAMPLE_SPLIT                                       0xF9FFFFFF
+#define R_009914_GB_TILE_MODE1                                          0x009914
+#define R_009918_GB_TILE_MODE2                                          0x009918
+#define R_00991C_GB_TILE_MODE3                                          0x00991C
+#define R_009920_GB_TILE_MODE4                                          0x009920
+#define R_009924_GB_TILE_MODE5                                          0x009924
+#define R_009928_GB_TILE_MODE6                                          0x009928
+#define R_00992C_GB_TILE_MODE7                                          0x00992C
+#define R_009930_GB_TILE_MODE8                                          0x009930
+#define R_009934_GB_TILE_MODE9                                          0x009934
+#define R_009938_GB_TILE_MODE10                                         0x009938
+#define R_00993C_GB_TILE_MODE11                                         0x00993C
+#define R_009940_GB_TILE_MODE12                                         0x009940
+#define R_009944_GB_TILE_MODE13                                         0x009944
+#define R_009948_GB_TILE_MODE14                                         0x009948
+#define R_00994C_GB_TILE_MODE15                                         0x00994C
+#define R_009950_GB_TILE_MODE16                                         0x009950
+#define R_009954_GB_TILE_MODE17                                         0x009954
+#define R_009958_GB_TILE_MODE18                                         0x009958
+#define R_00995C_GB_TILE_MODE19                                         0x00995C
+#define R_009960_GB_TILE_MODE20                                         0x009960
+#define R_009964_GB_TILE_MODE21                                         0x009964
+#define R_009968_GB_TILE_MODE22                                         0x009968
+#define R_00996C_GB_TILE_MODE23                                         0x00996C
+#define R_009970_GB_TILE_MODE24                                         0x009970
+#define R_009974_GB_TILE_MODE25                                         0x009974
+#define R_009978_GB_TILE_MODE26                                         0x009978
+#define R_00997C_GB_TILE_MODE27                                         0x00997C
+#define R_009980_GB_TILE_MODE28                                         0x009980
+#define R_009984_GB_TILE_MODE29                                         0x009984
+#define R_009988_GB_TILE_MODE30                                         0x009988
+#define R_00998C_GB_TILE_MODE31                                         0x00998C
+/* CIK */
+#define R_009990_GB_MACROTILE_MODE0                                     0x009990
+#define   S_009990_BANK_WIDTH(x)                                      (((x) & 0x03) << 0)
+#define   G_009990_BANK_WIDTH(x)                                      (((x) >> 0) & 0x03)
+#define   C_009990_BANK_WIDTH                                         0xFFFFFFFC
+#define   S_009990_BANK_HEIGHT(x)                                     (((x) & 0x03) << 2)
+#define   G_009990_BANK_HEIGHT(x)                                     (((x) >> 2) & 0x03)
+#define   C_009990_BANK_HEIGHT                                        0xFFFFFFF3
+#define   S_009990_MACRO_TILE_ASPECT(x)                               (((x) & 0x03) << 4)
+#define   G_009990_MACRO_TILE_ASPECT(x)                               (((x) >> 4) & 0x03)
+#define   C_009990_MACRO_TILE_ASPECT                                  0xFFFFFFCF
+#define   S_009990_NUM_BANKS(x)                                       (((x) & 0x03) << 6)
+#define   G_009990_NUM_BANKS(x)                                       (((x) >> 6) & 0x03)
+#define   C_009990_NUM_BANKS                                          0xFFFFFF3F
+#define R_009994_GB_MACROTILE_MODE1                                     0x009994
+#define R_009998_GB_MACROTILE_MODE2                                     0x009998
+#define R_00999C_GB_MACROTILE_MODE3                                     0x00999C
+#define R_0099A0_GB_MACROTILE_MODE4                                     0x0099A0
+#define R_0099A4_GB_MACROTILE_MODE5                                     0x0099A4
+#define R_0099A8_GB_MACROTILE_MODE6                                     0x0099A8
+#define R_0099AC_GB_MACROTILE_MODE7                                     0x0099AC
+#define R_0099B0_GB_MACROTILE_MODE8                                     0x0099B0
+#define R_0099B4_GB_MACROTILE_MODE9                                     0x0099B4
+#define R_0099B8_GB_MACROTILE_MODE10                                    0x0099B8
+#define R_0099BC_GB_MACROTILE_MODE11                                    0x0099BC
+#define R_0099C0_GB_MACROTILE_MODE12                                    0x0099C0
+#define R_0099C4_GB_MACROTILE_MODE13                                    0x0099C4
+#define R_0099C8_GB_MACROTILE_MODE14                                    0x0099C8
+#define R_0099CC_GB_MACROTILE_MODE15                                    0x0099CC
+/*     */
+#define R_00B000_SPI_SHADER_TBA_LO_PS                                   0x00B000
+#define R_00B004_SPI_SHADER_TBA_HI_PS                                   0x00B004
+#define   S_00B004_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+#define   G_00B004_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
+#define   C_00B004_MEM_BASE                                           0xFFFFFF00
+#define R_00B008_SPI_SHADER_TMA_LO_PS                                   0x00B008
+#define R_00B00C_SPI_SHADER_TMA_HI_PS                                   0x00B00C
+#define   S_00B00C_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+#define   G_00B00C_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
+#define   C_00B00C_MEM_BASE                                           0xFFFFFF00
 /* CIK */
 #define R_00B01C_SPI_SHADER_PGM_RSRC3_PS                                0x00B01C
 #define   S_00B01C_CU_EN(x)                                           (((x) & 0xFFFF) << 0)
@@ -4575,6 +5996,9 @@
 #define   S_00B02C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
 #define   G_00B02C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
 #define   C_00B02C_USER_SGPR                                          0xFFFFFFC1
+#define   S_00B02C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
+#define   G_00B02C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
+#define   C_00B02C_TRAP_PRESENT                                       0xFFFFFFBF
 #define   S_00B02C_WAVE_CNT_EN(x)                                     (((x) & 0x1) << 7)
 #define   G_00B02C_WAVE_CNT_EN(x)                                     (((x) >> 7) & 0x1)
 #define   C_00B02C_WAVE_CNT_EN                                        0xFFFFFF7F
@@ -4584,6 +6008,9 @@
 #define   S_00B02C_EXCP_EN(x)                                         (((x) & 0x7F) << 16) /* mask is 0x1FF on CIK */
 #define   G_00B02C_EXCP_EN(x)                                         (((x) >> 16) & 0x7F) /* mask is 0x1FF on CIK */
 #define   C_00B02C_EXCP_EN                                            0xFF80FFFF /* mask is 0x1FF on CIK */
+#define   S_00B02C_EXCP_EN_CIK(x)                                     (((x) & 0x1FF) << 16)
+#define   G_00B02C_EXCP_EN_CIK(x)                                     (((x) >> 16) & 0x1FF)
+#define   C_00B02C_EXCP_EN_CIK                                        0xFE00FFFF
 #define R_00B030_SPI_SHADER_USER_DATA_PS_0                              0x00B030
 #define R_00B034_SPI_SHADER_USER_DATA_PS_1                              0x00B034
 #define R_00B038_SPI_SHADER_USER_DATA_PS_2                              0x00B038
@@ -4600,6 +6027,16 @@
 #define R_00B064_SPI_SHADER_USER_DATA_PS_13                             0x00B064
 #define R_00B068_SPI_SHADER_USER_DATA_PS_14                             0x00B068
 #define R_00B06C_SPI_SHADER_USER_DATA_PS_15                             0x00B06C
+#define R_00B100_SPI_SHADER_TBA_LO_VS                                   0x00B100
+#define R_00B104_SPI_SHADER_TBA_HI_VS                                   0x00B104
+#define   S_00B104_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+#define   G_00B104_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
+#define   C_00B104_MEM_BASE                                           0xFFFFFF00
+#define R_00B108_SPI_SHADER_TMA_LO_VS                                   0x00B108
+#define R_00B10C_SPI_SHADER_TMA_HI_VS                                   0x00B10C
+#define   S_00B10C_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+#define   G_00B10C_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
+#define   C_00B10C_MEM_BASE                                           0xFFFFFF00
 /* CIK */
 #define R_00B118_SPI_SHADER_PGM_RSRC3_VS                                0x00B118
 #define   S_00B118_CU_EN(x)                                           (((x) & 0xFFFF) << 0)
@@ -4667,6 +6104,9 @@
 #define   S_00B12C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
 #define   G_00B12C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
 #define   C_00B12C_USER_SGPR                                          0xFFFFFFC1
+#define   S_00B12C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
+#define   G_00B12C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
+#define   C_00B12C_TRAP_PRESENT                                       0xFFFFFFBF
 #define   S_00B12C_OC_LDS_EN(x)                                       (((x) & 0x1) << 7)
 #define   G_00B12C_OC_LDS_EN(x)                                       (((x) >> 7) & 0x1)
 #define   C_00B12C_OC_LDS_EN                                          0xFFFFFF7F
@@ -4688,6 +6128,14 @@
 #define   S_00B12C_EXCP_EN(x)                                         (((x) & 0x7F) << 13) /* mask is 0x1FF on CIK */
 #define   G_00B12C_EXCP_EN(x)                                         (((x) >> 13) & 0x7F) /* mask is 0x1FF on CIK */
 #define   C_00B12C_EXCP_EN                                            0xFFF01FFF /* mask is 0x1FF on CIK */
+#define   S_00B12C_EXCP_EN_CIK(x)                                     (((x) & 0x1FF) << 13)
+#define   G_00B12C_EXCP_EN_CIK(x)                                     (((x) >> 13) & 0x1FF)
+#define   C_00B12C_EXCP_EN_CIK                                        0xFFC01FFF
+/* VI */
+#define   S_00B12C_DISPATCH_DRAW_EN(x)                                (((x) & 0x1) << 24)
+#define   G_00B12C_DISPATCH_DRAW_EN(x)                                (((x) >> 24) & 0x1)
+#define   C_00B12C_DISPATCH_DRAW_EN                                   0xFEFFFFFF
+/*    */
 #define R_00B130_SPI_SHADER_USER_DATA_VS_0                              0x00B130
 #define R_00B134_SPI_SHADER_USER_DATA_VS_1                              0x00B134
 #define R_00B138_SPI_SHADER_USER_DATA_VS_2                              0x00B138
@@ -4704,6 +6152,16 @@
 #define R_00B164_SPI_SHADER_USER_DATA_VS_13                             0x00B164
 #define R_00B168_SPI_SHADER_USER_DATA_VS_14                             0x00B168
 #define R_00B16C_SPI_SHADER_USER_DATA_VS_15                             0x00B16C
+#define R_00B200_SPI_SHADER_TBA_LO_GS                                   0x00B200
+#define R_00B204_SPI_SHADER_TBA_HI_GS                                   0x00B204
+#define   S_00B204_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+#define   G_00B204_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
+#define   C_00B204_MEM_BASE                                           0xFFFFFF00
+#define R_00B208_SPI_SHADER_TMA_LO_GS                                   0x00B208
+#define R_00B20C_SPI_SHADER_TMA_HI_GS                                   0x00B20C
+#define   S_00B20C_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+#define   G_00B20C_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
+#define   C_00B20C_MEM_BASE                                           0xFFFFFF00
 /* CIK */
 #define R_00B21C_SPI_SHADER_PGM_RSRC3_GS                                0x00B21C
 #define   S_00B21C_CU_EN(x)                                           (((x) & 0xFFFF) << 0)
@@ -4716,6 +6174,11 @@
 #define   G_00B21C_LOCK_LOW_THRESHOLD(x)                              (((x) >> 22) & 0x0F)
 #define   C_00B21C_LOCK_LOW_THRESHOLD                                 0xFC3FFFFF
 /*     */
+/* VI */
+#define   S_00B21C_GROUP_FIFO_DEPTH(x)                                (((x) & 0x3F) << 26)
+#define   G_00B21C_GROUP_FIFO_DEPTH(x)                                (((x) >> 26) & 0x3F)
+#define   C_00B21C_GROUP_FIFO_DEPTH                                   0x03FFFFFF
+/*    */
 #define R_00B220_SPI_SHADER_PGM_LO_GS                                   0x00B220
 #define R_00B224_SPI_SHADER_PGM_HI_GS                                   0x00B224
 #define   S_00B224_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
@@ -4764,10 +6227,41 @@
 #define   S_00B22C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
 #define   G_00B22C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
 #define   C_00B22C_USER_SGPR                                          0xFFFFFFC1
+#define   S_00B22C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
+#define   G_00B22C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
+#define   C_00B22C_TRAP_PRESENT                                       0xFFFFFFBF
 #define   S_00B22C_EXCP_EN(x)                                         (((x) & 0x7F) << 7) /* mask is 0x1FF on CIK */
 #define   G_00B22C_EXCP_EN(x)                                         (((x) >> 7) & 0x7F) /* mask is 0x1FF on CIK */
 #define   C_00B22C_EXCP_EN                                            0xFFFFC07F /* mask is 0x1FF on CIK */
+#define   S_00B22C_EXCP_EN_CIK(x)                                     (((x) & 0x1FF) << 7)
+#define   G_00B22C_EXCP_EN_CIK(x)                                     (((x) >> 7) & 0x1FF)
+#define   C_00B22C_EXCP_EN_CIK                                        0xFFFF007F
 #define R_00B230_SPI_SHADER_USER_DATA_GS_0                              0x00B230
+#define R_00B234_SPI_SHADER_USER_DATA_GS_1                              0x00B234
+#define R_00B238_SPI_SHADER_USER_DATA_GS_2                              0x00B238
+#define R_00B23C_SPI_SHADER_USER_DATA_GS_3                              0x00B23C
+#define R_00B240_SPI_SHADER_USER_DATA_GS_4                              0x00B240
+#define R_00B244_SPI_SHADER_USER_DATA_GS_5                              0x00B244
+#define R_00B248_SPI_SHADER_USER_DATA_GS_6                              0x00B248
+#define R_00B24C_SPI_SHADER_USER_DATA_GS_7                              0x00B24C
+#define R_00B250_SPI_SHADER_USER_DATA_GS_8                              0x00B250
+#define R_00B254_SPI_SHADER_USER_DATA_GS_9                              0x00B254
+#define R_00B258_SPI_SHADER_USER_DATA_GS_10                             0x00B258
+#define R_00B25C_SPI_SHADER_USER_DATA_GS_11                             0x00B25C
+#define R_00B260_SPI_SHADER_USER_DATA_GS_12                             0x00B260
+#define R_00B264_SPI_SHADER_USER_DATA_GS_13                             0x00B264
+#define R_00B268_SPI_SHADER_USER_DATA_GS_14                             0x00B268
+#define R_00B26C_SPI_SHADER_USER_DATA_GS_15                             0x00B26C
+#define R_00B300_SPI_SHADER_TBA_LO_ES                                   0x00B300
+#define R_00B304_SPI_SHADER_TBA_HI_ES                                   0x00B304
+#define   S_00B304_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+#define   G_00B304_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
+#define   C_00B304_MEM_BASE                                           0xFFFFFF00
+#define R_00B308_SPI_SHADER_TMA_LO_ES                                   0x00B308
+#define R_00B30C_SPI_SHADER_TMA_HI_ES                                   0x00B30C
+#define   S_00B30C_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+#define   G_00B30C_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
+#define   C_00B30C_MEM_BASE                                           0xFFFFFF00
 /* CIK */
 #define R_00B31C_SPI_SHADER_PGM_RSRC3_ES                                0x00B31C
 #define   S_00B31C_CU_EN(x)                                           (((x) & 0xFFFF) << 0)
@@ -4780,6 +6274,11 @@
 #define   G_00B31C_LOCK_LOW_THRESHOLD(x)                              (((x) >> 22) & 0x0F)
 #define   C_00B31C_LOCK_LOW_THRESHOLD                                 0xFC3FFFFF
 /*     */
+/* VI */
+#define   S_00B31C_GROUP_FIFO_DEPTH(x)                                (((x) & 0x3F) << 26)
+#define   G_00B31C_GROUP_FIFO_DEPTH(x)                                (((x) >> 26) & 0x3F)
+#define   C_00B31C_GROUP_FIFO_DEPTH                                   0x03FFFFFF
+/*    */
 #define R_00B320_SPI_SHADER_PGM_LO_ES                                   0x00B320
 #define R_00B324_SPI_SHADER_PGM_HI_ES                                   0x00B324
 #define   S_00B324_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
@@ -4831,6 +6330,9 @@
 #define   S_00B32C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
 #define   G_00B32C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
 #define   C_00B32C_USER_SGPR                                          0xFFFFFFC1
+#define   S_00B32C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
+#define   G_00B32C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
+#define   C_00B32C_TRAP_PRESENT                                       0xFFFFFFBF
 #define   S_00B32C_OC_LDS_EN(x)                                       (((x) & 0x1) << 7)
 #define   G_00B32C_OC_LDS_EN(x)                                       (((x) >> 7) & 0x1)
 #define   C_00B32C_OC_LDS_EN                                          0xFFFFFF7F
@@ -4841,6 +6343,31 @@
 #define   G_00B32C_LDS_SIZE(x)                                        (((x) >> 20) & 0x1FF) /* CIK, for on-chip GS */
 #define   C_00B32C_LDS_SIZE                                           0xE00FFFFF /* CIK, for on-chip GS */
 #define R_00B330_SPI_SHADER_USER_DATA_ES_0                              0x00B330
+#define R_00B334_SPI_SHADER_USER_DATA_ES_1                              0x00B334
+#define R_00B338_SPI_SHADER_USER_DATA_ES_2                              0x00B338
+#define R_00B33C_SPI_SHADER_USER_DATA_ES_3                              0x00B33C
+#define R_00B340_SPI_SHADER_USER_DATA_ES_4                              0x00B340
+#define R_00B344_SPI_SHADER_USER_DATA_ES_5                              0x00B344
+#define R_00B348_SPI_SHADER_USER_DATA_ES_6                              0x00B348
+#define R_00B34C_SPI_SHADER_USER_DATA_ES_7                              0x00B34C
+#define R_00B350_SPI_SHADER_USER_DATA_ES_8                              0x00B350
+#define R_00B354_SPI_SHADER_USER_DATA_ES_9                              0x00B354
+#define R_00B358_SPI_SHADER_USER_DATA_ES_10                             0x00B358
+#define R_00B35C_SPI_SHADER_USER_DATA_ES_11                             0x00B35C
+#define R_00B360_SPI_SHADER_USER_DATA_ES_12                             0x00B360
+#define R_00B364_SPI_SHADER_USER_DATA_ES_13                             0x00B364
+#define R_00B368_SPI_SHADER_USER_DATA_ES_14                             0x00B368
+#define R_00B36C_SPI_SHADER_USER_DATA_ES_15                             0x00B36C
+#define R_00B400_SPI_SHADER_TBA_LO_HS                                   0x00B400
+#define R_00B404_SPI_SHADER_TBA_HI_HS                                   0x00B404
+#define   S_00B404_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+#define   G_00B404_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
+#define   C_00B404_MEM_BASE                                           0xFFFFFF00
+#define R_00B408_SPI_SHADER_TMA_LO_HS                                   0x00B408
+#define R_00B40C_SPI_SHADER_TMA_HI_HS                                   0x00B40C
+#define   S_00B40C_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+#define   G_00B40C_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
+#define   C_00B40C_MEM_BASE                                           0xFFFFFF00
 /* CIK */
 #define R_00B41C_SPI_SHADER_PGM_RSRC3_HS                                0x00B41C
 #define   S_00B41C_WAVE_LIMIT(x)                                      (((x) & 0x3F) << 0)
@@ -4850,6 +6377,11 @@
 #define   G_00B41C_LOCK_LOW_THRESHOLD(x)                              (((x) >> 6) & 0x0F)
 #define   C_00B41C_LOCK_LOW_THRESHOLD                                 0xFFFFFC3F
 /*     */
+/* VI */
+#define   S_00B41C_GROUP_FIFO_DEPTH(x)                                (((x) & 0x3F) << 10)
+#define   G_00B41C_GROUP_FIFO_DEPTH(x)                                (((x) >> 10) & 0x3F)
+#define   C_00B41C_GROUP_FIFO_DEPTH                                   0xFFFF03FF
+/*    */
 #define R_00B420_SPI_SHADER_PGM_LO_HS                                   0x00B420
 #define R_00B424_SPI_SHADER_PGM_HI_HS                                   0x00B424
 #define   S_00B424_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
@@ -4895,6 +6427,9 @@
 #define   S_00B42C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
 #define   G_00B42C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
 #define   C_00B42C_USER_SGPR                                          0xFFFFFFC1
+#define   S_00B42C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
+#define   G_00B42C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
+#define   C_00B42C_TRAP_PRESENT                                       0xFFFFFFBF
 #define   S_00B42C_OC_LDS_EN(x)                                       (((x) & 0x1) << 7)
 #define   G_00B42C_OC_LDS_EN(x)                                       (((x) >> 7) & 0x1)
 #define   C_00B42C_OC_LDS_EN                                          0xFFFFFF7F
@@ -4905,6 +6440,31 @@
 #define   G_00B42C_EXCP_EN(x)                                         (((x) >> 9) & 0x7F) /* mask is 0x1FF on CIK */
 #define   C_00B42C_EXCP_EN                                            0xFFFF01FF /* mask is 0x1FF on CIK */
 #define R_00B430_SPI_SHADER_USER_DATA_HS_0                              0x00B430
+#define R_00B434_SPI_SHADER_USER_DATA_HS_1                              0x00B434
+#define R_00B438_SPI_SHADER_USER_DATA_HS_2                              0x00B438
+#define R_00B43C_SPI_SHADER_USER_DATA_HS_3                              0x00B43C
+#define R_00B440_SPI_SHADER_USER_DATA_HS_4                              0x00B440
+#define R_00B444_SPI_SHADER_USER_DATA_HS_5                              0x00B444
+#define R_00B448_SPI_SHADER_USER_DATA_HS_6                              0x00B448
+#define R_00B44C_SPI_SHADER_USER_DATA_HS_7                              0x00B44C
+#define R_00B450_SPI_SHADER_USER_DATA_HS_8                              0x00B450
+#define R_00B454_SPI_SHADER_USER_DATA_HS_9                              0x00B454
+#define R_00B458_SPI_SHADER_USER_DATA_HS_10                             0x00B458
+#define R_00B45C_SPI_SHADER_USER_DATA_HS_11                             0x00B45C
+#define R_00B460_SPI_SHADER_USER_DATA_HS_12                             0x00B460
+#define R_00B464_SPI_SHADER_USER_DATA_HS_13                             0x00B464
+#define R_00B468_SPI_SHADER_USER_DATA_HS_14                             0x00B468
+#define R_00B46C_SPI_SHADER_USER_DATA_HS_15                             0x00B46C
+#define R_00B500_SPI_SHADER_TBA_LO_LS                                   0x00B500
+#define R_00B504_SPI_SHADER_TBA_HI_LS                                   0x00B504
+#define   S_00B504_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+#define   G_00B504_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
+#define   C_00B504_MEM_BASE                                           0xFFFFFF00
+#define R_00B508_SPI_SHADER_TMA_LO_LS                                   0x00B508
+#define R_00B50C_SPI_SHADER_TMA_HI_LS                                   0x00B50C
+#define   S_00B50C_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
+#define   G_00B50C_MEM_BASE(x)                                        (((x) >> 0) & 0xFF)
+#define   C_00B50C_MEM_BASE                                           0xFFFFFF00
 /* CIK */
 #define R_00B51C_SPI_SHADER_PGM_RSRC3_LS                                0x00B51C
 #define   S_00B51C_CU_EN(x)                                           (((x) & 0xFFFF) << 0)
@@ -4917,6 +6477,11 @@
 #define   G_00B51C_LOCK_LOW_THRESHOLD(x)                              (((x) >> 22) & 0x0F)
 #define   C_00B51C_LOCK_LOW_THRESHOLD                                 0xFC3FFFFF
 /*     */
+/* VI */
+#define   S_00B51C_GROUP_FIFO_DEPTH(x)                                (((x) & 0x3F) << 26)
+#define   G_00B51C_GROUP_FIFO_DEPTH(x)                                (((x) >> 26) & 0x3F)
+#define   C_00B51C_GROUP_FIFO_DEPTH                                   0x03FFFFFF
+/*    */
 #define R_00B520_SPI_SHADER_PGM_LO_LS                                   0x00B520
 #define R_00B524_SPI_SHADER_PGM_HI_LS                                   0x00B524
 #define   S_00B524_MEM_BASE(x)                                        (((x) & 0xFF) << 0)
@@ -4965,6 +6530,9 @@
 #define   S_00B52C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
 #define   G_00B52C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
 #define   C_00B52C_USER_SGPR                                          0xFFFFFFC1
+#define   S_00B52C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
+#define   G_00B52C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
+#define   C_00B52C_TRAP_PRESENT                                       0xFFFFFFBF
 #define   S_00B52C_LDS_SIZE(x)                                        (((x) & 0x1FF) << 7)
 #define   G_00B52C_LDS_SIZE(x)                                        (((x) >> 7) & 0x1FF)
 #define   C_00B52C_LDS_SIZE                                           0xFFFF007F
@@ -4972,6 +6540,21 @@
 #define   G_00B52C_EXCP_EN(x)                                         (((x) >> 16) & 0x7F) /* mask is 0x1FF on CIK */
 #define   C_00B52C_EXCP_EN                                            0xFF80FFFF /* mask is 0x1FF on CIK */
 #define R_00B530_SPI_SHADER_USER_DATA_LS_0                              0x00B530
+#define R_00B534_SPI_SHADER_USER_DATA_LS_1                              0x00B534
+#define R_00B538_SPI_SHADER_USER_DATA_LS_2                              0x00B538
+#define R_00B53C_SPI_SHADER_USER_DATA_LS_3                              0x00B53C
+#define R_00B540_SPI_SHADER_USER_DATA_LS_4                              0x00B540
+#define R_00B544_SPI_SHADER_USER_DATA_LS_5                              0x00B544
+#define R_00B548_SPI_SHADER_USER_DATA_LS_6                              0x00B548
+#define R_00B54C_SPI_SHADER_USER_DATA_LS_7                              0x00B54C
+#define R_00B550_SPI_SHADER_USER_DATA_LS_8                              0x00B550
+#define R_00B554_SPI_SHADER_USER_DATA_LS_9                              0x00B554
+#define R_00B558_SPI_SHADER_USER_DATA_LS_10                             0x00B558
+#define R_00B55C_SPI_SHADER_USER_DATA_LS_11                             0x00B55C
+#define R_00B560_SPI_SHADER_USER_DATA_LS_12                             0x00B560
+#define R_00B564_SPI_SHADER_USER_DATA_LS_13                             0x00B564
+#define R_00B568_SPI_SHADER_USER_DATA_LS_14                             0x00B568
+#define R_00B56C_SPI_SHADER_USER_DATA_LS_15                             0x00B56C
 #define R_00B800_COMPUTE_DISPATCH_INITIATOR                             0x00B800
 #define   S_00B800_COMPUTE_SHADER_EN(x)                               (((x) & 0x1) << 0)
 #define   G_00B800_COMPUTE_SHADER_EN(x)                               (((x) >> 0) & 0x1)
@@ -5042,6 +6625,16 @@
 #define   S_00B82C_MAX_WAVE_ID(x)                                     (((x) & 0xFFF) << 0)
 #define   G_00B82C_MAX_WAVE_ID(x)                                     (((x) >> 0) & 0xFFF)
 #define   C_00B82C_MAX_WAVE_ID                                        0xFFFFF000
+/* CIK */
+#define R_00B828_COMPUTE_PIPELINESTAT_ENABLE                            0x00B828
+#define   S_00B828_PIPELINESTAT_ENABLE(x)                             (((x) & 0x1) << 0)
+#define   G_00B828_PIPELINESTAT_ENABLE(x)                             (((x) >> 0) & 0x1)
+#define   C_00B828_PIPELINESTAT_ENABLE                                0xFFFFFFFE
+#define R_00B82C_COMPUTE_PERFCOUNT_ENABLE                               0x00B82C
+#define   S_00B82C_PERFCOUNT_ENABLE(x)                                (((x) & 0x1) << 0)
+#define   G_00B82C_PERFCOUNT_ENABLE(x)                                (((x) >> 0) & 0x1)
+#define   C_00B82C_PERFCOUNT_ENABLE                                   0xFFFFFFFE
+/*     */
 #define R_00B830_COMPUTE_PGM_LO                                         0x00B830
 #define R_00B834_COMPUTE_PGM_HI                                         0x00B834
 #define   S_00B834_DATA(x)                                            (((x) & 0xFF) << 0)
@@ -5052,6 +6645,16 @@
 #define   G_00B834_INST_ATC(x)                                        (((x) >> 8) & 0x1)
 #define   C_00B834_INST_ATC                                           0xFFFFFEFF
 /*     */
+#define R_00B838_COMPUTE_TBA_LO                                         0x00B838
+#define R_00B83C_COMPUTE_TBA_HI                                         0x00B83C
+#define   S_00B83C_DATA(x)                                            (((x) & 0xFF) << 0)
+#define   G_00B83C_DATA(x)                                            (((x) >> 0) & 0xFF)
+#define   C_00B83C_DATA                                               0xFFFFFF00
+#define R_00B840_COMPUTE_TMA_LO                                         0x00B840
+#define R_00B844_COMPUTE_TMA_HI                                         0x00B844
+#define   S_00B844_DATA(x)                                            (((x) & 0xFF) << 0)
+#define   G_00B844_DATA(x)                                            (((x) >> 0) & 0xFF)
+#define   C_00B844_DATA                                               0xFFFFFF00
 #define R_00B848_COMPUTE_PGM_RSRC1                                      0x00B848
 #define   S_00B848_VGPRS(x)                                           (((x) & 0x3F) << 0)
 #define   G_00B848_VGPRS(x)                                           (((x) >> 0) & 0x3F)
@@ -5092,6 +6695,9 @@
 #define   S_00B84C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
 #define   G_00B84C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
 #define   C_00B84C_USER_SGPR                                          0xFFFFFFC1
+#define   S_00B84C_TRAP_PRESENT(x)                                    (((x) & 0x1) << 6)
+#define   G_00B84C_TRAP_PRESENT(x)                                    (((x) >> 6) & 0x1)
+#define   C_00B84C_TRAP_PRESENT                                       0xFFFFFFBF
 #define   S_00B84C_TGID_X_EN(x)                                       (((x) & 0x1) << 7)
 #define   G_00B84C_TGID_X_EN(x)                                       (((x) >> 7) & 0x1)
 #define   C_00B84C_TGID_X_EN                                          0xFFFFFF7F
@@ -5118,6 +6724,10 @@
 #define   S_00B84C_EXCP_EN(x)                                         (((x) & 0x7F) << 24)
 #define   G_00B84C_EXCP_EN(x)                                         (((x) >> 24) & 0x7F)
 #define   C_00B84C_EXCP_EN                                            0x80FFFFFF
+#define R_00B850_COMPUTE_VMID                                           0x00B850
+#define   S_00B850_DATA(x)                                            (((x) & 0x0F) << 0)
+#define   G_00B850_DATA(x)                                            (((x) >> 0) & 0x0F)
+#define   C_00B850_DATA                                               0xFFFFFFF0
 #define R_00B854_COMPUTE_RESOURCE_LIMITS                                0x00B854
 #define   S_00B854_WAVES_PER_SH(x)                                    (((x) & 0x3F) << 0) /* mask is 0x3FF on CIK */
 #define   G_00B854_WAVES_PER_SH(x)                                    (((x) >> 0) & 0x3F) /* mask is 0x3FF on CIK */
@@ -5160,7 +6770,84 @@
 #define   S_00B860_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12)
 #define   G_00B860_WAVESIZE(x)                                        (((x) >> 12) & 0x1FFF)
 #define   C_00B860_WAVESIZE                                           0xFE000FFF
+/* CIK */
+#define R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2                         0x00B864
+#define   S_00B864_SH0_CU_EN(x)                                       (((x) & 0xFFFF) << 0)
+#define   G_00B864_SH0_CU_EN(x)                                       (((x) >> 0) & 0xFFFF)
+#define   C_00B864_SH0_CU_EN                                          0xFFFF0000
+#define   S_00B864_SH1_CU_EN(x)                                       (((x) & 0xFFFF) << 16)
+#define   G_00B864_SH1_CU_EN(x)                                       (((x) >> 16) & 0xFFFF)
+#define   C_00B864_SH1_CU_EN                                          0x0000FFFF
+#define R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3                         0x00B868
+#define   S_00B868_SH0_CU_EN(x)                                       (((x) & 0xFFFF) << 0)
+#define   G_00B868_SH0_CU_EN(x)                                       (((x) >> 0) & 0xFFFF)
+#define   C_00B868_SH0_CU_EN                                          0xFFFF0000
+#define   S_00B868_SH1_CU_EN(x)                                       (((x) & 0xFFFF) << 16)
+#define   G_00B868_SH1_CU_EN(x)                                       (((x) >> 16) & 0xFFFF)
+#define   C_00B868_SH1_CU_EN                                          0x0000FFFF
+#define R_00B86C_COMPUTE_RESTART_X                                      0x00B86C
+#define R_00B870_COMPUTE_RESTART_Y                                      0x00B870
+#define R_00B874_COMPUTE_RESTART_Z                                      0x00B874
+#define R_00B87C_COMPUTE_MISC_RESERVED                                  0x00B87C
+#define   S_00B87C_SEND_SEID(x)                                       (((x) & 0x03) << 0)
+#define   G_00B87C_SEND_SEID(x)                                       (((x) >> 0) & 0x03)
+#define   C_00B87C_SEND_SEID                                          0xFFFFFFFC
+#define   S_00B87C_RESERVED2(x)                                       (((x) & 0x1) << 2)
+#define   G_00B87C_RESERVED2(x)                                       (((x) >> 2) & 0x1)
+#define   C_00B87C_RESERVED2                                          0xFFFFFFFB
+#define   S_00B87C_RESERVED3(x)                                       (((x) & 0x1) << 3)
+#define   G_00B87C_RESERVED3(x)                                       (((x) >> 3) & 0x1)
+#define   C_00B87C_RESERVED3                                          0xFFFFFFF7
+#define   S_00B87C_RESERVED4(x)                                       (((x) & 0x1) << 4)
+#define   G_00B87C_RESERVED4(x)                                       (((x) >> 4) & 0x1)
+#define   C_00B87C_RESERVED4                                          0xFFFFFFEF
+/* VI */
+#define   S_00B87C_WAVE_ID_BASE(x)                                    (((x) & 0xFFF) << 5)
+#define   G_00B87C_WAVE_ID_BASE(x)                                    (((x) >> 5) & 0xFFF)
+#define   C_00B87C_WAVE_ID_BASE                                       0xFFFE001F
+#define R_00B880_COMPUTE_DISPATCH_ID                                    0x00B880
+#define R_00B884_COMPUTE_THREADGROUP_ID                                 0x00B884
+#define R_00B888_COMPUTE_RELAUNCH                                       0x00B888
+#define   S_00B888_PAYLOAD(x)                                         (((x) & 0x3FFFFFFF) << 0)
+#define   G_00B888_PAYLOAD(x)                                         (((x) >> 0) & 0x3FFFFFFF)
+#define   C_00B888_PAYLOAD                                            0xC0000000
+#define   S_00B888_IS_EVENT(x)                                        (((x) & 0x1) << 30)
+#define   G_00B888_IS_EVENT(x)                                        (((x) >> 30) & 0x1)
+#define   C_00B888_IS_EVENT                                           0xBFFFFFFF
+#define   S_00B888_IS_STATE(x)                                        (((x) & 0x1) << 31)
+#define   G_00B888_IS_STATE(x)                                        (((x) >> 31) & 0x1)
+#define   C_00B888_IS_STATE                                           0x7FFFFFFF
+#define R_00B88C_COMPUTE_WAVE_RESTORE_ADDR_LO                           0x00B88C
+#define R_00B890_COMPUTE_WAVE_RESTORE_ADDR_HI                           0x00B890
+#define   S_00B890_ADDR(x)                                            (((x) & 0xFFFF) << 0)
+#define   G_00B890_ADDR(x)                                            (((x) >> 0) & 0xFFFF)
+#define   C_00B890_ADDR                                               0xFFFF0000
+#define R_00B894_COMPUTE_WAVE_RESTORE_CONTROL                           0x00B894
+#define   S_00B894_ATC(x)                                             (((x) & 0x1) << 0)
+#define   G_00B894_ATC(x)                                             (((x) >> 0) & 0x1)
+#define   C_00B894_ATC                                                0xFFFFFFFE
+#define   S_00B894_MTYPE(x)                                           (((x) & 0x03) << 1)
+#define   G_00B894_MTYPE(x)                                           (((x) >> 1) & 0x03)
+#define   C_00B894_MTYPE                                              0xFFFFFFF9
+/*    */
+/*     */
 #define R_00B900_COMPUTE_USER_DATA_0                                    0x00B900
+#define R_00B904_COMPUTE_USER_DATA_1                                    0x00B904
+#define R_00B908_COMPUTE_USER_DATA_2                                    0x00B908
+#define R_00B90C_COMPUTE_USER_DATA_3                                    0x00B90C
+#define R_00B910_COMPUTE_USER_DATA_4                                    0x00B910
+#define R_00B914_COMPUTE_USER_DATA_5                                    0x00B914
+#define R_00B918_COMPUTE_USER_DATA_6                                    0x00B918
+#define R_00B91C_COMPUTE_USER_DATA_7                                    0x00B91C
+#define R_00B920_COMPUTE_USER_DATA_8                                    0x00B920
+#define R_00B924_COMPUTE_USER_DATA_9                                    0x00B924
+#define R_00B928_COMPUTE_USER_DATA_10                                   0x00B928
+#define R_00B92C_COMPUTE_USER_DATA_11                                   0x00B92C
+#define R_00B930_COMPUTE_USER_DATA_12                                   0x00B930
+#define R_00B934_COMPUTE_USER_DATA_13                                   0x00B934
+#define R_00B938_COMPUTE_USER_DATA_14                                   0x00B938
+#define R_00B93C_COMPUTE_USER_DATA_15                                   0x00B93C
+#define R_00B9FC_COMPUTE_NOWHERE                                        0x00B9FC
 #define R_028000_DB_RENDER_CONTROL                                      0x028000
 #define   S_028000_DEPTH_CLEAR_ENABLE(x)                              (((x) & 0x1) << 0)
 #define   G_028000_DEPTH_CLEAR_ENABLE(x)                              (((x) >> 0) & 0x1)
@@ -5189,6 +6876,11 @@
 #define   S_028000_COPY_SAMPLE(x)                                     (((x) & 0x0F) << 8)
 #define   G_028000_COPY_SAMPLE(x)                                     (((x) >> 8) & 0x0F)
 #define   C_028000_COPY_SAMPLE                                        0xFFFFF0FF
+/* VI */
+#define   S_028000_DECOMPRESS_ENABLE(x)                               (((x) & 0x1) << 12)
+#define   G_028000_DECOMPRESS_ENABLE(x)                               (((x) >> 12) & 0x1)
+#define   C_028000_DECOMPRESS_ENABLE                                  0xFFFFEFFF
+/*    */
 #define R_028004_DB_COUNT_CONTROL                                       0x028004
 #define   S_028004_ZPASS_INCREMENT_DISABLE(x)                         (((x) & 0x1) << 0)
 #define   G_028004_ZPASS_INCREMENT_DISABLE(x)                         (((x) >> 0) & 0x1)
@@ -5467,9 +7159,6 @@
 #define   S_028040_NUM_SAMPLES(x)                                     (((x) & 0x03) << 2)
 #define   G_028040_NUM_SAMPLES(x)                                     (((x) >> 2) & 0x03)
 #define   C_028040_NUM_SAMPLES                                        0xFFFFFFF3
-#define   S_028040_TILE_MODE_INDEX(x)                                 (((x) & 0x07) << 20) /* not on CIK */
-#define   G_028040_TILE_MODE_INDEX(x)                                 (((x) >> 20) & 0x07) /* not on CIK */
-#define   C_028040_TILE_MODE_INDEX                                    0xFF8FFFFF /* not on CIK */
 /* CIK */
 #define   S_028040_TILE_SPLIT(x)                                      (((x) & 0x07) << 13)
 #define   G_028040_TILE_SPLIT(x)                                      (((x) >> 13) & 0x07)
@@ -5482,6 +7171,14 @@
 #define     V_028040_ADDR_SURF_TILE_SPLIT_2KB                       0x05
 #define     V_028040_ADDR_SURF_TILE_SPLIT_4KB                       0x06
 /*     */
+#define   S_028040_TILE_MODE_INDEX(x)                                 (((x) & 0x07) << 20) /* not on CIK */
+#define   G_028040_TILE_MODE_INDEX(x)                                 (((x) >> 20) & 0x07) /* not on CIK */
+#define   C_028040_TILE_MODE_INDEX                                    0xFF8FFFFF /* not on CIK */
+/* VI */
+#define   S_028040_DECOMPRESS_ON_N_ZPLANES(x)                         (((x) & 0x0F) << 23)
+#define   G_028040_DECOMPRESS_ON_N_ZPLANES(x)                         (((x) >> 23) & 0x0F)
+#define   C_028040_DECOMPRESS_ON_N_ZPLANES                            0xF87FFFFF
+/*    */
 #define   S_028040_ALLOW_EXPCLEAR(x)                                  (((x) & 0x1) << 27)
 #define   G_028040_ALLOW_EXPCLEAR(x)                                  (((x) >> 27) & 0x1)
 #define   C_028040_ALLOW_EXPCLEAR                                     0xF7FFFFFF
@@ -5491,6 +7188,11 @@
 #define   S_028040_TILE_SURFACE_ENABLE(x)                             (((x) & 0x1) << 29)
 #define   G_028040_TILE_SURFACE_ENABLE(x)                             (((x) >> 29) & 0x1)
 #define   C_028040_TILE_SURFACE_ENABLE                                0xDFFFFFFF
+/* VI */
+#define   S_028040_CLEAR_DISALLOWED(x)                                (((x) & 0x1) << 30)
+#define   G_028040_CLEAR_DISALLOWED(x)                                (((x) >> 30) & 0x1)
+#define   C_028040_CLEAR_DISALLOWED                                   0xBFFFFFFF
+/*    */
 #define   S_028040_ZRANGE_PRECISION(x)                                (((x) & 0x1) << 31)
 #define   G_028040_ZRANGE_PRECISION(x)                                (((x) >> 31) & 0x1)
 #define   C_028040_ZRANGE_PRECISION                                   0x7FFFFFFF
@@ -5500,9 +7202,6 @@
 #define   C_028044_FORMAT                                             0xFFFFFFFE
 #define     V_028044_STENCIL_INVALID                                0x00
 #define     V_028044_STENCIL_8                                      0x01
-#define   S_028044_TILE_MODE_INDEX(x)                                 (((x) & 0x07) << 20) /* not on CIK */
-#define   G_028044_TILE_MODE_INDEX(x)                                 (((x) >> 20) & 0x07) /* not on CIK */
-#define   C_028044_TILE_MODE_INDEX                                    0xFF8FFFFF /* not on CIK */
 /* CIK */
 #define   S_028044_TILE_SPLIT(x)                                      (((x) & 0x07) << 13)
 #define   G_028044_TILE_SPLIT(x)                                      (((x) >> 13) & 0x07)
@@ -5515,12 +7214,20 @@
 #define     V_028044_ADDR_SURF_TILE_SPLIT_2KB                       0x05
 #define     V_028044_ADDR_SURF_TILE_SPLIT_4KB                       0x06
 /*     */
+#define   S_028044_TILE_MODE_INDEX(x)                                 (((x) & 0x07) << 20) /* not on CIK */
+#define   G_028044_TILE_MODE_INDEX(x)                                 (((x) >> 20) & 0x07) /* not on CIK */
+#define   C_028044_TILE_MODE_INDEX                                    0xFF8FFFFF /* not on CIK */
 #define   S_028044_ALLOW_EXPCLEAR(x)                                  (((x) & 0x1) << 27)
 #define   G_028044_ALLOW_EXPCLEAR(x)                                  (((x) >> 27) & 0x1)
 #define   C_028044_ALLOW_EXPCLEAR                                     0xF7FFFFFF
 #define   S_028044_TILE_STENCIL_DISABLE(x)                            (((x) & 0x1) << 29)
 #define   G_028044_TILE_STENCIL_DISABLE(x)                            (((x) >> 29) & 0x1)
 #define   C_028044_TILE_STENCIL_DISABLE                               0xDFFFFFFF
+/* VI */
+#define   S_028044_CLEAR_DISALLOWED(x)                                (((x) & 0x1) << 30)
+#define   G_028044_CLEAR_DISALLOWED(x)                                (((x) >> 30) & 0x1)
+#define   C_028044_CLEAR_DISALLOWED                                   0xBFFFFFFF
+/*    */
 #define R_028048_DB_Z_READ_BASE                                         0x028048
 #define R_02804C_DB_STENCIL_READ_BASE                                   0x02804C
 #define R_028050_DB_Z_WRITE_BASE                                        0x028050
@@ -5542,7 +7249,13 @@
 #define   S_028084_ADDRESS(x)                                         (((x) & 0xFF) << 0)
 #define   G_028084_ADDRESS(x)                                         (((x) >> 0) & 0xFF)
 #define   C_028084_ADDRESS                                            0xFFFFFF00
-/* */
+#define R_0281E8_COHER_DEST_BASE_HI_0                                   0x0281E8
+#define R_0281EC_COHER_DEST_BASE_HI_1                                   0x0281EC
+#define R_0281F0_COHER_DEST_BASE_HI_2                                   0x0281F0
+#define R_0281F4_COHER_DEST_BASE_HI_3                                   0x0281F4
+/*     */
+#define R_0281F8_COHER_DEST_BASE_2                                      0x0281F8
+#define R_0281FC_COHER_DEST_BASE_3                                      0x0281FC
 #define R_028200_PA_SC_WINDOW_OFFSET                                    0x028200
 #define   S_028200_WINDOW_X_OFFSET(x)                                 (((x) & 0xFFFF) << 0)
 #define   G_028200_WINDOW_X_OFFSET(x)                                 (((x) >> 0) & 0xFFFF)
@@ -5687,6 +7400,8 @@
 #define   S_028244_BR_Y(x)                                            (((x) & 0x7FFF) << 16)
 #define   G_028244_BR_Y(x)                                            (((x) >> 16) & 0x7FFF)
 #define   C_028244_BR_Y                                               0x8000FFFF
+#define R_028248_COHER_DEST_BASE_0                                      0x028248
+#define R_02824C_COHER_DEST_BASE_1                                      0x02824C
 #define R_028250_PA_SC_VPORT_SCISSOR_0_TL                               0x028250
 #define   S_028250_TL_X(x)                                            (((x) & 0x7FFF) << 0)
 #define   G_028250_TL_X(x)                                            (((x) >> 0) & 0x7FFF)
@@ -5704,8 +7419,68 @@
 #define   S_028254_BR_Y(x)                                            (((x) & 0x7FFF) << 16)
 #define   G_028254_BR_Y(x)                                            (((x) >> 16) & 0x7FFF)
 #define   C_028254_BR_Y                                               0x8000FFFF
+#define R_028258_PA_SC_VPORT_SCISSOR_1_TL                               0x028258
+#define R_02825C_PA_SC_VPORT_SCISSOR_1_BR                               0x02825C
+#define R_028260_PA_SC_VPORT_SCISSOR_2_TL                               0x028260
+#define R_028264_PA_SC_VPORT_SCISSOR_2_BR                               0x028264
+#define R_028268_PA_SC_VPORT_SCISSOR_3_TL                               0x028268
+#define R_02826C_PA_SC_VPORT_SCISSOR_3_BR                               0x02826C
+#define R_028270_PA_SC_VPORT_SCISSOR_4_TL                               0x028270
+#define R_028274_PA_SC_VPORT_SCISSOR_4_BR                               0x028274
+#define R_028278_PA_SC_VPORT_SCISSOR_5_TL                               0x028278
+#define R_02827C_PA_SC_VPORT_SCISSOR_5_BR                               0x02827C
+#define R_028280_PA_SC_VPORT_SCISSOR_6_TL                               0x028280
+#define R_028284_PA_SC_VPORT_SCISSOR_6_BR                               0x028284
+#define R_028288_PA_SC_VPORT_SCISSOR_7_TL                               0x028288
+#define R_02828C_PA_SC_VPORT_SCISSOR_7_BR                               0x02828C
+#define R_028290_PA_SC_VPORT_SCISSOR_8_TL                               0x028290
+#define R_028294_PA_SC_VPORT_SCISSOR_8_BR                               0x028294
+#define R_028298_PA_SC_VPORT_SCISSOR_9_TL                               0x028298
+#define R_02829C_PA_SC_VPORT_SCISSOR_9_BR                               0x02829C
+#define R_0282A0_PA_SC_VPORT_SCISSOR_10_TL                              0x0282A0
+#define R_0282A4_PA_SC_VPORT_SCISSOR_10_BR                              0x0282A4
+#define R_0282A8_PA_SC_VPORT_SCISSOR_11_TL                              0x0282A8
+#define R_0282AC_PA_SC_VPORT_SCISSOR_11_BR                              0x0282AC
+#define R_0282B0_PA_SC_VPORT_SCISSOR_12_TL                              0x0282B0
+#define R_0282B4_PA_SC_VPORT_SCISSOR_12_BR                              0x0282B4
+#define R_0282B8_PA_SC_VPORT_SCISSOR_13_TL                              0x0282B8
+#define R_0282BC_PA_SC_VPORT_SCISSOR_13_BR                              0x0282BC
+#define R_0282C0_PA_SC_VPORT_SCISSOR_14_TL                              0x0282C0
+#define R_0282C4_PA_SC_VPORT_SCISSOR_14_BR                              0x0282C4
+#define R_0282C8_PA_SC_VPORT_SCISSOR_15_TL                              0x0282C8
+#define R_0282CC_PA_SC_VPORT_SCISSOR_15_BR                              0x0282CC
 #define R_0282D0_PA_SC_VPORT_ZMIN_0                                     0x0282D0
 #define R_0282D4_PA_SC_VPORT_ZMAX_0                                     0x0282D4
+#define R_0282D8_PA_SC_VPORT_ZMIN_1                                     0x0282D8
+#define R_0282DC_PA_SC_VPORT_ZMAX_1                                     0x0282DC
+#define R_0282E0_PA_SC_VPORT_ZMIN_2                                     0x0282E0
+#define R_0282E4_PA_SC_VPORT_ZMAX_2                                     0x0282E4
+#define R_0282E8_PA_SC_VPORT_ZMIN_3                                     0x0282E8
+#define R_0282EC_PA_SC_VPORT_ZMAX_3                                     0x0282EC
+#define R_0282F0_PA_SC_VPORT_ZMIN_4                                     0x0282F0
+#define R_0282F4_PA_SC_VPORT_ZMAX_4                                     0x0282F4
+#define R_0282F8_PA_SC_VPORT_ZMIN_5                                     0x0282F8
+#define R_0282FC_PA_SC_VPORT_ZMAX_5                                     0x0282FC
+#define R_028300_PA_SC_VPORT_ZMIN_6                                     0x028300
+#define R_028304_PA_SC_VPORT_ZMAX_6                                     0x028304
+#define R_028308_PA_SC_VPORT_ZMIN_7                                     0x028308
+#define R_02830C_PA_SC_VPORT_ZMAX_7                                     0x02830C
+#define R_028310_PA_SC_VPORT_ZMIN_8                                     0x028310
+#define R_028314_PA_SC_VPORT_ZMAX_8                                     0x028314
+#define R_028318_PA_SC_VPORT_ZMIN_9                                     0x028318
+#define R_02831C_PA_SC_VPORT_ZMAX_9                                     0x02831C
+#define R_028320_PA_SC_VPORT_ZMIN_10                                    0x028320
+#define R_028324_PA_SC_VPORT_ZMAX_10                                    0x028324
+#define R_028328_PA_SC_VPORT_ZMIN_11                                    0x028328
+#define R_02832C_PA_SC_VPORT_ZMAX_11                                    0x02832C
+#define R_028330_PA_SC_VPORT_ZMIN_12                                    0x028330
+#define R_028334_PA_SC_VPORT_ZMAX_12                                    0x028334
+#define R_028338_PA_SC_VPORT_ZMIN_13                                    0x028338
+#define R_02833C_PA_SC_VPORT_ZMAX_13                                    0x02833C
+#define R_028340_PA_SC_VPORT_ZMIN_14                                    0x028340
+#define R_028344_PA_SC_VPORT_ZMAX_14                                    0x028344
+#define R_028348_PA_SC_VPORT_ZMIN_15                                    0x028348
+#define R_02834C_PA_SC_VPORT_ZMAX_15                                    0x02834C
 #define R_028350_PA_SC_RASTER_CONFIG                                    0x028350
 #define   S_028350_RB_MAP_PKR0(x)                                     (((x) & 0x03) << 0)
 #define   G_028350_RB_MAP_PKR0(x)                                     (((x) >> 0) & 0x03)
@@ -5827,6 +7602,13 @@
 #define     V_028354_RASTER_CONFIG_SE_PAIR_YSEL_16_WIDE_TILE        0x01
 #define     V_028354_RASTER_CONFIG_SE_PAIR_YSEL_32_WIDE_TILE        0x02
 #define     V_028354_RASTER_CONFIG_SE_PAIR_YSEL_64_WIDE_TILE        0x03
+#define R_028358_PA_SC_SCREEN_EXTENT_CONTROL                            0x028358
+#define   S_028358_SLICE_EVEN_ENABLE(x)                               (((x) & 0x03) << 0)
+#define   G_028358_SLICE_EVEN_ENABLE(x)                               (((x) >> 0) & 0x03)
+#define   C_028358_SLICE_EVEN_ENABLE                                  0xFFFFFFFC
+#define   S_028358_SLICE_ODD_ENABLE(x)                                (((x) & 0x03) << 2)
+#define   G_028358_SLICE_ODD_ENABLE(x)                                (((x) >> 2) & 0x03)
+#define   C_028358_SLICE_ODD_ENABLE                                   0xFFFFFFF3
 /*     */
 #define R_028400_VGT_MAX_VTX_INDX                                       0x028400
 #define R_028404_VGT_MIN_VTX_INDX                                       0x028404
@@ -5836,6 +7618,18 @@
 #define R_028418_CB_BLEND_GREEN                                         0x028418
 #define R_02841C_CB_BLEND_BLUE                                          0x02841C
 #define R_028420_CB_BLEND_ALPHA                                         0x028420
+/* VI */
+#define R_028424_CB_DCC_CONTROL                                         0x028424
+#define   S_028424_OVERWRITE_COMBINER_DISABLE(x)                      (((x) & 0x1) << 0)
+#define   G_028424_OVERWRITE_COMBINER_DISABLE(x)                      (((x) >> 0) & 0x1)
+#define   C_028424_OVERWRITE_COMBINER_DISABLE                         0xFFFFFFFE
+#define   S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x)          (((x) & 0x1) << 1)
+#define   G_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x)          (((x) >> 1) & 0x1)
+#define   C_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE             0xFFFFFFFD
+#define   S_028424_OVERWRITE_COMBINER_WATERMARK(x)                    (((x) & 0x1F) << 2)
+#define   G_028424_OVERWRITE_COMBINER_WATERMARK(x)                    (((x) >> 2) & 0x1F)
+#define   C_028424_OVERWRITE_COMBINER_WATERMARK                       0xFFFFFF83
+/*    */
 #define R_02842C_DB_STENCIL_CONTROL                                     0x02842C
 #define   S_02842C_STENCILFAIL(x)                                     (((x) & 0x0F) << 0)
 #define   G_02842C_STENCILFAIL(x)                                     (((x) >> 0) & 0x0F)
@@ -5977,12 +7771,102 @@
 #define   S_028434_STENCILOPVAL_BF(x)                                 (((x) & 0xFF) << 24)
 #define   G_028434_STENCILOPVAL_BF(x)                                 (((x) >> 24) & 0xFF)
 #define   C_028434_STENCILOPVAL_BF                                    0x00FFFFFF
-#define R_02843C_PA_CL_VPORT_XSCALE_0                                   0x02843C
-#define R_028440_PA_CL_VPORT_XOFFSET_0                                  0x028440
-#define R_028444_PA_CL_VPORT_YSCALE_0                                   0x028444
-#define R_028448_PA_CL_VPORT_YOFFSET_0                                  0x028448
-#define R_02844C_PA_CL_VPORT_ZSCALE_0                                   0x02844C
-#define R_028450_PA_CL_VPORT_ZOFFSET_0                                  0x028450
+#define R_02843C_PA_CL_VPORT_XSCALE                                     0x02843C
+#define R_028440_PA_CL_VPORT_XOFFSET                                    0x028440
+#define R_028444_PA_CL_VPORT_YSCALE                                     0x028444
+#define R_028448_PA_CL_VPORT_YOFFSET                                    0x028448
+#define R_02844C_PA_CL_VPORT_ZSCALE                                     0x02844C
+#define R_028450_PA_CL_VPORT_ZOFFSET                                    0x028450
+#define R_028454_PA_CL_VPORT_XSCALE_1                                   0x028454
+#define R_028458_PA_CL_VPORT_XOFFSET_1                                  0x028458
+#define R_02845C_PA_CL_VPORT_YSCALE_1                                   0x02845C
+#define R_028460_PA_CL_VPORT_YOFFSET_1                                  0x028460
+#define R_028464_PA_CL_VPORT_ZSCALE_1                                   0x028464
+#define R_028468_PA_CL_VPORT_ZOFFSET_1                                  0x028468
+#define R_02846C_PA_CL_VPORT_XSCALE_2                                   0x02846C
+#define R_028470_PA_CL_VPORT_XOFFSET_2                                  0x028470
+#define R_028474_PA_CL_VPORT_YSCALE_2                                   0x028474
+#define R_028478_PA_CL_VPORT_YOFFSET_2                                  0x028478
+#define R_02847C_PA_CL_VPORT_ZSCALE_2                                   0x02847C
+#define R_028480_PA_CL_VPORT_ZOFFSET_2                                  0x028480
+#define R_028484_PA_CL_VPORT_XSCALE_3                                   0x028484
+#define R_028488_PA_CL_VPORT_XOFFSET_3                                  0x028488
+#define R_02848C_PA_CL_VPORT_YSCALE_3                                   0x02848C
+#define R_028490_PA_CL_VPORT_YOFFSET_3                                  0x028490
+#define R_028494_PA_CL_VPORT_ZSCALE_3                                   0x028494
+#define R_028498_PA_CL_VPORT_ZOFFSET_3                                  0x028498
+#define R_02849C_PA_CL_VPORT_XSCALE_4                                   0x02849C
+#define R_0284A0_PA_CL_VPORT_XOFFSET_4                                  0x0284A0
+#define R_0284A4_PA_CL_VPORT_YSCALE_4                                   0x0284A4
+#define R_0284A8_PA_CL_VPORT_YOFFSET_4                                  0x0284A8
+#define R_0284AC_PA_CL_VPORT_ZSCALE_4                                   0x0284AC
+#define R_0284B0_PA_CL_VPORT_ZOFFSET_4                                  0x0284B0
+#define R_0284B4_PA_CL_VPORT_XSCALE_5                                   0x0284B4
+#define R_0284B8_PA_CL_VPORT_XOFFSET_5                                  0x0284B8
+#define R_0284BC_PA_CL_VPORT_YSCALE_5                                   0x0284BC
+#define R_0284C0_PA_CL_VPORT_YOFFSET_5                                  0x0284C0
+#define R_0284C4_PA_CL_VPORT_ZSCALE_5                                   0x0284C4
+#define R_0284C8_PA_CL_VPORT_ZOFFSET_5                                  0x0284C8
+#define R_0284CC_PA_CL_VPORT_XSCALE_6                                   0x0284CC
+#define R_0284D0_PA_CL_VPORT_XOFFSET_6                                  0x0284D0
+#define R_0284D4_PA_CL_VPORT_YSCALE_6                                   0x0284D4
+#define R_0284D8_PA_CL_VPORT_YOFFSET_6                                  0x0284D8
+#define R_0284DC_PA_CL_VPORT_ZSCALE_6                                   0x0284DC
+#define R_0284E0_PA_CL_VPORT_ZOFFSET_6                                  0x0284E0
+#define R_0284E4_PA_CL_VPORT_XSCALE_7                                   0x0284E4
+#define R_0284E8_PA_CL_VPORT_XOFFSET_7                                  0x0284E8
+#define R_0284EC_PA_CL_VPORT_YSCALE_7                                   0x0284EC
+#define R_0284F0_PA_CL_VPORT_YOFFSET_7                                  0x0284F0
+#define R_0284F4_PA_CL_VPORT_ZSCALE_7                                   0x0284F4
+#define R_0284F8_PA_CL_VPORT_ZOFFSET_7                                  0x0284F8
+#define R_0284FC_PA_CL_VPORT_XSCALE_8                                   0x0284FC
+#define R_028500_PA_CL_VPORT_XOFFSET_8                                  0x028500
+#define R_028504_PA_CL_VPORT_YSCALE_8                                   0x028504
+#define R_028508_PA_CL_VPORT_YOFFSET_8                                  0x028508
+#define R_02850C_PA_CL_VPORT_ZSCALE_8                                   0x02850C
+#define R_028510_PA_CL_VPORT_ZOFFSET_8                                  0x028510
+#define R_028514_PA_CL_VPORT_XSCALE_9                                   0x028514
+#define R_028518_PA_CL_VPORT_XOFFSET_9                                  0x028518
+#define R_02851C_PA_CL_VPORT_YSCALE_9                                   0x02851C
+#define R_028520_PA_CL_VPORT_YOFFSET_9                                  0x028520
+#define R_028524_PA_CL_VPORT_ZSCALE_9                                   0x028524
+#define R_028528_PA_CL_VPORT_ZOFFSET_9                                  0x028528
+#define R_02852C_PA_CL_VPORT_XSCALE_10                                  0x02852C
+#define R_028530_PA_CL_VPORT_XOFFSET_10                                 0x028530
+#define R_028534_PA_CL_VPORT_YSCALE_10                                  0x028534
+#define R_028538_PA_CL_VPORT_YOFFSET_10                                 0x028538
+#define R_02853C_PA_CL_VPORT_ZSCALE_10                                  0x02853C
+#define R_028540_PA_CL_VPORT_ZOFFSET_10                                 0x028540
+#define R_028544_PA_CL_VPORT_XSCALE_11                                  0x028544
+#define R_028548_PA_CL_VPORT_XOFFSET_11                                 0x028548
+#define R_02854C_PA_CL_VPORT_YSCALE_11                                  0x02854C
+#define R_028550_PA_CL_VPORT_YOFFSET_11                                 0x028550
+#define R_028554_PA_CL_VPORT_ZSCALE_11                                  0x028554
+#define R_028558_PA_CL_VPORT_ZOFFSET_11                                 0x028558
+#define R_02855C_PA_CL_VPORT_XSCALE_12                                  0x02855C
+#define R_028560_PA_CL_VPORT_XOFFSET_12                                 0x028560
+#define R_028564_PA_CL_VPORT_YSCALE_12                                  0x028564
+#define R_028568_PA_CL_VPORT_YOFFSET_12                                 0x028568
+#define R_02856C_PA_CL_VPORT_ZSCALE_12                                  0x02856C
+#define R_028570_PA_CL_VPORT_ZOFFSET_12                                 0x028570
+#define R_028574_PA_CL_VPORT_XSCALE_13                                  0x028574
+#define R_028578_PA_CL_VPORT_XOFFSET_13                                 0x028578
+#define R_02857C_PA_CL_VPORT_YSCALE_13                                  0x02857C
+#define R_028580_PA_CL_VPORT_YOFFSET_13                                 0x028580
+#define R_028584_PA_CL_VPORT_ZSCALE_13                                  0x028584
+#define R_028588_PA_CL_VPORT_ZOFFSET_13                                 0x028588
+#define R_02858C_PA_CL_VPORT_XSCALE_14                                  0x02858C
+#define R_028590_PA_CL_VPORT_XOFFSET_14                                 0x028590
+#define R_028594_PA_CL_VPORT_YSCALE_14                                  0x028594
+#define R_028598_PA_CL_VPORT_YOFFSET_14                                 0x028598
+#define R_02859C_PA_CL_VPORT_ZSCALE_14                                  0x02859C
+#define R_0285A0_PA_CL_VPORT_ZOFFSET_14                                 0x0285A0
+#define R_0285A4_PA_CL_VPORT_XSCALE_15                                  0x0285A4
+#define R_0285A8_PA_CL_VPORT_XOFFSET_15                                 0x0285A8
+#define R_0285AC_PA_CL_VPORT_YSCALE_15                                  0x0285AC
+#define R_0285B0_PA_CL_VPORT_YOFFSET_15                                 0x0285B0
+#define R_0285B4_PA_CL_VPORT_ZSCALE_15                                  0x0285B4
+#define R_0285B8_PA_CL_VPORT_ZOFFSET_15                                 0x0285B8
 #define R_0285BC_PA_CL_UCP_0_X                                          0x0285BC
 #define R_0285C0_PA_CL_UCP_0_Y                                          0x0285C0
 #define R_0285C4_PA_CL_UCP_0_Z                                          0x0285C4
@@ -6029,6 +7913,26 @@
 #define   G_028644_DUP(x)                                             (((x) >> 18) & 0x1)
 #define   C_028644_DUP                                                0xFFFBFFFF
 /*     */
+/* VI */
+#define   S_028644_FP16_INTERP_MODE(x)                                (((x) & 0x1) << 19)
+#define   G_028644_FP16_INTERP_MODE(x)                                (((x) >> 19) & 0x1)
+#define   C_028644_FP16_INTERP_MODE                                   0xFFF7FFFF
+#define   S_028644_USE_DEFAULT_ATTR1(x)                               (((x) & 0x1) << 20)
+#define   G_028644_USE_DEFAULT_ATTR1(x)                               (((x) >> 20) & 0x1)
+#define   C_028644_USE_DEFAULT_ATTR1                                  0xFFEFFFFF
+#define   S_028644_DEFAULT_VAL_ATTR1(x)                               (((x) & 0x03) << 21)
+#define   G_028644_DEFAULT_VAL_ATTR1(x)                               (((x) >> 21) & 0x03)
+#define   C_028644_DEFAULT_VAL_ATTR1                                  0xFF9FFFFF
+#define   S_028644_PT_SPRITE_TEX_ATTR1(x)                             (((x) & 0x1) << 23)
+#define   G_028644_PT_SPRITE_TEX_ATTR1(x)                             (((x) >> 23) & 0x1)
+#define   C_028644_PT_SPRITE_TEX_ATTR1                                0xFF7FFFFF
+#define   S_028644_ATTR0_VALID(x)                                     (((x) & 0x1) << 24)
+#define   G_028644_ATTR0_VALID(x)                                     (((x) >> 24) & 0x1)
+#define   C_028644_ATTR0_VALID                                        0xFEFFFFFF
+#define   S_028644_ATTR1_VALID(x)                                     (((x) & 0x1) << 25)
+#define   G_028644_ATTR1_VALID(x)                                     (((x) >> 25) & 0x1)
+#define   C_028644_ATTR1_VALID                                        0xFDFFFFFF
+/*    */
 #define R_028648_SPI_PS_INPUT_CNTL_1                                    0x028648
 #define R_02864C_SPI_PS_INPUT_CNTL_2                                    0x02864C
 #define R_028650_SPI_PS_INPUT_CNTL_3                                    0x028650
@@ -6552,6 +8456,10 @@
 #define R_028794_CB_BLEND5_CONTROL                                      0x028794
 #define R_028798_CB_BLEND6_CONTROL                                      0x028798
 #define R_02879C_CB_BLEND7_CONTROL                                      0x02879C
+#define R_0287CC_CS_COPY_STATE                                          0x0287CC
+#define   S_0287CC_SRC_STATE_ID(x)                                    (((x) & 0x07) << 0)
+#define   G_0287CC_SRC_STATE_ID(x)                                    (((x) >> 0) & 0x07)
+#define   C_0287CC_SRC_STATE_ID                                       0xFFFFFFF8
 #define R_0287D4_PA_CL_POINT_X_RAD                                      0x0287D4
 #define R_0287D8_PA_CL_POINT_Y_RAD                                      0x0287D8
 #define R_0287DC_PA_CL_POINT_SIZE                                       0x0287DC
@@ -6581,6 +8489,10 @@
 #define   G_0287F0_USE_OPAQUE(x)                                      (((x) >> 6) & 0x1)
 #define   C_0287F0_USE_OPAQUE                                         0xFFFFFFBF
 #define R_0287F4_VGT_IMMED_DATA                                         0x0287F4 /* not on CIK */
+#define R_0287F8_VGT_EVENT_ADDRESS_REG                                  0x0287F8
+#define   S_0287F8_ADDRESS_LOW(x)                                     (((x) & 0xFFFFFFF) << 0)
+#define   G_0287F8_ADDRESS_LOW(x)                                     (((x) >> 0) & 0xFFFFFFF)
+#define   C_0287F8_ADDRESS_LOW                                        0xF0000000
 #define R_028800_DB_DEPTH_CONTROL                                       0x028800
 #define   S_028800_STENCIL_ENABLE(x)                                  (((x) & 0x1) << 0)
 #define   G_028800_STENCIL_ENABLE(x)                                  (((x) >> 0) & 0x1)
@@ -6637,36 +8549,42 @@
 #define   G_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS(x)              (((x) >> 31) & 0x1)
 #define   C_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS                 0x7FFFFFFF
 #define R_028804_DB_EQAA                                                0x028804
-#define   S_028804_MAX_ANCHOR_SAMPLES(x)		(((x) & 0x7) << 0)
-#define   G_028804_MAX_ANCHOR_SAMPLES(x)		(((x) >> 0) & 0x7)
-#define   C_028804_MAX_ANCHOR_SAMPLES			(~(((~0) & 0x7) << 0))
-#define   S_028804_PS_ITER_SAMPLES(x)			(((x) & 0x7) << 4)
-#define   G_028804_PS_ITER_SAMPLES(x)			(((x) >> 4) & 0x7)
-#define   C_028804_PS_ITER_SAMPLES			(~(((~0) & 0x7) << 4))
-#define   S_028804_MASK_EXPORT_NUM_SAMPLES(x)		(((x) & 0x7) << 8)
-#define   G_028804_MASK_EXPORT_NUM_SAMPLES(x)		(((x) >> 8) & 0x7)
-#define   C_028804_MASK_EXPORT_NUM_SAMPLES		(~(((~0) & 0x7) << 8))
-#define   S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x)		(((x) & 0x7) << 12)
-#define   G_028804_ALPHA_TO_MASK_NUM_SAMPLES(x)		(((x) >> 12) & 0x7)
-#define   C_028804_ALPHA_TO_MASK_NUM_SAMPLES		(~(((~0) & 0x7) << 12))
-#define   S_028804_HIGH_QUALITY_INTERSECTIONS(x)	(((x) & 0x1) << 16)
-#define   G_028804_HIGH_QUALITY_INTERSECTIONS(x)	(((x) >> 16) & 0x1)
-#define   C_028804_HIGH_QUALITY_INTERSECTIONS		(~(((~0) & 0x1) << 16))
-#define   S_028804_INCOHERENT_EQAA_READS(x)		(((x) & 0x1) << 17)
-#define   G_028804_INCOHERENT_EQAA_READS(x)		(((x) >> 17) & 0x1)
-#define   C_028804_INCOHERENT_EQAA_READS		(~(((~0) & 0x1) << 17))
-#define   S_028804_INTERPOLATE_COMP_Z(x)		(((x) & 0x1) << 18)
-#define   G_028804_INTERPOLATE_COMP_Z(x)		(((x) >> 18) & 0x1)
-#define   C_028804_INTERPOLATE_COMP_Z			(~(((~0) >> 18) & 0x1))
-#define   S_028804_INTERPOLATE_SRC_Z(x)			(((x) & 0x1) << 19)
-#define   G_028804_INTERPOLATE_SRC_Z(x)			(((x) >> 19) & 0x1)
-#define   C_028804_INTERPOLATE_SRC_Z			(~(((~0) & 0x1) << 19))
-#define   S_028804_STATIC_ANCHOR_ASSOCIATIONS(x)	(((x) & 0x1) << 20)
-#define   G_028804_STATIC_ANCHOR_ASSOCIATIONS(x)	(((x) >> 20) & 0x1)
-#define   C_028804_STATIC_ANCHOR_ASSOCIATIONS		(~(((~0) & 0x1) << 20))
-#define   S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x)	(((x) & 0x1) << 21)
-#define   G_028804_ALPHA_TO_MASK_EQAA_DISABLE(x)	(((x) >> 21) & 0x1)
-#define   C_028804_ALPHA_TO_MASK_EQAA_DISABLE		(~(((~0) & 0x1) << 21))
+#define   S_028804_MAX_ANCHOR_SAMPLES(x)                              (((x) & 0x7) << 0)
+#define   G_028804_MAX_ANCHOR_SAMPLES(x)                              (((x) >> 0) & 0x07)
+#define   C_028804_MAX_ANCHOR_SAMPLES                                 0xFFFFFFF8
+#define   S_028804_PS_ITER_SAMPLES(x)                                 (((x) & 0x7) << 4)
+#define   G_028804_PS_ITER_SAMPLES(x)                                 (((x) >> 4) & 0x07)
+#define   C_028804_PS_ITER_SAMPLES                                    0xFFFFFF8F
+#define   S_028804_MASK_EXPORT_NUM_SAMPLES(x)                         (((x) & 0x7) << 8)
+#define   G_028804_MASK_EXPORT_NUM_SAMPLES(x)                         (((x) >> 8) & 0x07)
+#define   C_028804_MASK_EXPORT_NUM_SAMPLES                            0xFFFFF8FF
+#define   S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x)                       (((x) & 0x7) << 12)
+#define   G_028804_ALPHA_TO_MASK_NUM_SAMPLES(x)                       (((x) >> 12) & 0x07)
+#define   C_028804_ALPHA_TO_MASK_NUM_SAMPLES                          0xFFFF8FFF
+#define   S_028804_HIGH_QUALITY_INTERSECTIONS(x)                      (((x) & 0x1) << 16)
+#define   G_028804_HIGH_QUALITY_INTERSECTIONS(x)                      (((x) >> 16) & 0x1)
+#define   C_028804_HIGH_QUALITY_INTERSECTIONS                         0xFFFEFFFF
+#define   S_028804_INCOHERENT_EQAA_READS(x)                           (((x) & 0x1) << 17)
+#define   G_028804_INCOHERENT_EQAA_READS(x)                           (((x) >> 17) & 0x1)
+#define   C_028804_INCOHERENT_EQAA_READS                              0xFFFDFFFF
+#define   S_028804_INTERPOLATE_COMP_Z(x)                              (((x) & 0x1) << 18)
+#define   G_028804_INTERPOLATE_COMP_Z(x)                              (((x) >> 18) & 0x1)
+#define   C_028804_INTERPOLATE_COMP_Z                                 0xFFFBFFFF
+#define   S_028804_INTERPOLATE_SRC_Z(x)                               (((x) & 0x1) << 19)
+#define   G_028804_INTERPOLATE_SRC_Z(x)                               (((x) >> 19) & 0x1)
+#define   C_028804_INTERPOLATE_SRC_Z                                  0xFFF7FFFF
+#define   S_028804_STATIC_ANCHOR_ASSOCIATIONS(x)                      (((x) & 0x1) << 20)
+#define   G_028804_STATIC_ANCHOR_ASSOCIATIONS(x)                      (((x) >> 20) & 0x1)
+#define   C_028804_STATIC_ANCHOR_ASSOCIATIONS                         0xFFEFFFFF
+#define   S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x)                      (((x) & 0x1) << 21)
+#define   G_028804_ALPHA_TO_MASK_EQAA_DISABLE(x)                      (((x) >> 21) & 0x1)
+#define   C_028804_ALPHA_TO_MASK_EQAA_DISABLE                         0xFFDFFFFF
+#define   S_028804_OVERRASTERIZATION_AMOUNT(x)                        (((x) & 0x07) << 24)
+#define   G_028804_OVERRASTERIZATION_AMOUNT(x)                        (((x) >> 24) & 0x07)
+#define   C_028804_OVERRASTERIZATION_AMOUNT                           0xF8FFFFFF
+#define   S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x)                  (((x) & 0x1) << 27)
+#define   G_028804_ENABLE_POSTZ_OVERRASTERIZATION(x)                  (((x) >> 27) & 0x1)
+#define   C_028804_ENABLE_POSTZ_OVERRASTERIZATION                     0xF7FFFFFF
 #define R_028808_CB_COLOR_CONTROL                                       0x028808
 #define   S_028808_DEGAMMA_ENABLE(x)                                  (((x) & 0x1) << 3)
 #define   G_028808_DEGAMMA_ENABLE(x)                                  (((x) >> 3) & 0x1)
@@ -6970,6 +8888,11 @@
 #define   S_02881C_USE_VTX_GS_CUT_FLAG(x)                             (((x) & 0x1) << 25)
 #define   G_02881C_USE_VTX_GS_CUT_FLAG(x)                             (((x) >> 25) & 0x1)
 #define   C_02881C_USE_VTX_GS_CUT_FLAG                                0xFDFFFFFF
+/* VI */
+#define   S_02881C_USE_VTX_LINE_WIDTH(x)                              (((x) & 0x1) << 26)
+#define   G_02881C_USE_VTX_LINE_WIDTH(x)                              (((x) >> 26) & 0x1)
+#define   C_02881C_USE_VTX_LINE_WIDTH                                 0xFBFFFFFF
+/*    */
 #define R_028820_PA_CL_NANINF_CNTL                                      0x028820
 #define   S_028820_VTE_XY_INF_DISCARD(x)                              (((x) & 0x1) << 0)
 #define   G_028820_VTE_XY_INF_DISCARD(x)                              (((x) >> 0) & 0x1)
@@ -7440,9 +9363,21 @@
 #define   S_028A4C_PS_ITER_SAMPLE(x)                                  (((x) & 0x1) << 16)
 #define   G_028A4C_PS_ITER_SAMPLE(x)                                  (((x) >> 16) & 0x1)
 #define   C_028A4C_PS_ITER_SAMPLE                                     0xFFFEFFFF
-#define   S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISC(x)                   (((x) & 0x1) << 17)
-#define   G_028A4C_MULTI_SHADER_ENGINE_PRIM_DISC(x)                   (((x) >> 17) & 0x1)
-#define   C_028A4C_MULTI_SHADER_ENGINE_PRIM_DISC                      0xFFFDFFFF
+#define   S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(x)         (((x) & 0x1) << 17)
+#define   G_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(x)         (((x) >> 17) & 0x1)
+#define   C_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE            0xFFFDFFFF
+#define   S_028A4C_MULTI_GPU_SUPERTILE_ENABLE(x)                      (((x) & 0x1) << 18)
+#define   G_028A4C_MULTI_GPU_SUPERTILE_ENABLE(x)                      (((x) >> 18) & 0x1)
+#define   C_028A4C_MULTI_GPU_SUPERTILE_ENABLE                         0xFFFBFFFF
+#define   S_028A4C_GPU_ID_OVERRIDE_ENABLE(x)                          (((x) & 0x1) << 19)
+#define   G_028A4C_GPU_ID_OVERRIDE_ENABLE(x)                          (((x) >> 19) & 0x1)
+#define   C_028A4C_GPU_ID_OVERRIDE_ENABLE                             0xFFF7FFFF
+#define   S_028A4C_GPU_ID_OVERRIDE(x)                                 (((x) & 0x0F) << 20)
+#define   G_028A4C_GPU_ID_OVERRIDE(x)                                 (((x) >> 20) & 0x0F)
+#define   C_028A4C_GPU_ID_OVERRIDE                                    0xFF0FFFFF
+#define   S_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE(x)                   (((x) & 0x1) << 24)
+#define   G_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE(x)                   (((x) >> 24) & 0x1)
+#define   C_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE                      0xFEFFFFFF
 #define   S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x)                         (((x) & 0x1) << 25)
 #define   G_028A4C_FORCE_EOV_CNTDWN_ENABLE(x)                         (((x) >> 25) & 0x1)
 #define   C_028A4C_FORCE_EOV_CNTDWN_ENABLE                            0xFDFFFFFF
@@ -7508,6 +9443,7 @@
 #define   C_028A7C_INDEX_TYPE                                         0xFFFFFFFC
 #define     V_028A7C_VGT_INDEX_16                                   0x00
 #define     V_028A7C_VGT_INDEX_32                                   0x01
+#define     V_028A7C_VGT_INDEX_8                                    0x02 /* VI */
 #define   S_028A7C_SWAP_MODE(x)                                       (((x) & 0x03) << 2)
 #define   G_028A7C_SWAP_MODE(x)                                       (((x) >> 2) & 0x03)
 #define   C_028A7C_SWAP_MODE                                          0xFFFFFFF3
@@ -7537,6 +9473,12 @@
 #define   G_028A7C_REQ_PATH(x)                                        (((x) >> 10) & 0x1)
 #define   C_028A7C_REQ_PATH                                           0xFFFFFBFF
 /*     */
+/* VI */
+#define   S_028A7C_MTYPE(x)                                           (((x) & 0x03) << 11)
+#define   G_028A7C_MTYPE(x)                                           (((x) >> 11) & 0x03)
+#define   C_028A7C_MTYPE                                              0xFFFFE7FF
+/*    */
+#define R_028A80_WD_ENHANCE                                             0x028A80
 #define R_028A84_VGT_PRIMITIVEID_EN                                     0x028A84
 #define   S_028A84_PRIMITIVEID_EN(x)                                  (((x) & 0x1) << 0)
 #define   G_028A84_PRIMITIVEID_EN(x)                                  (((x) >> 0) & 0x1)
@@ -7635,6 +9577,10 @@
 #define   S_028AA8_WD_SWITCH_ON_EOP(x)                                (((x) & 0x1) << 20)
 #define   G_028AA8_WD_SWITCH_ON_EOP(x)                                (((x) >> 20) & 0x1)
 #define   C_028AA8_WD_SWITCH_ON_EOP                                   0xFFEFFFFF
+/* VI */
+#define   S_028AA8_MAX_PRIMGRP_IN_WAVE(x)                             (((x) & 0x0F) << 28)
+#define   G_028AA8_MAX_PRIMGRP_IN_WAVE(x)                             (((x) >> 28) & 0x0F)
+#define   C_028AA8_MAX_PRIMGRP_IN_WAVE                                0x0FFFFFFF
 /*     */
 #define R_028AAC_VGT_ESGS_RING_ITEMSIZE                                 0x028AAC
 #define   S_028AAC_ITEMSIZE(x)                                        (((x) & 0x7FFF) << 0)
@@ -7674,6 +9620,11 @@
 #define   S_028ABC_DST_OUTSIDE_ZERO_TO_ONE(x)                         (((x) & 0x1) << 16)
 #define   G_028ABC_DST_OUTSIDE_ZERO_TO_ONE(x)                         (((x) >> 16) & 0x1)
 #define   C_028ABC_DST_OUTSIDE_ZERO_TO_ONE                            0xFFFEFFFF
+/* VI */
+#define   S_028ABC_TC_COMPATIBLE(x)                                   (((x) & 0x1) << 17)
+#define   G_028ABC_TC_COMPATIBLE(x)                                   (((x) >> 17) & 0x1)
+#define   C_028ABC_TC_COMPATIBLE                                      0xFFFDFFFF
+/*    */
 #define R_028AC0_DB_SRESULTS_COMPARE_STATE0                             0x028AC0
 #define   S_028AC0_COMPAREFUNC0(x)                                    (((x) & 0x07) << 0)
 #define   G_028AC0_COMPAREFUNC0(x)                                    (((x) >> 0) & 0x07)
@@ -7763,6 +9714,21 @@
 #define   S_028B38_MAX_VERT_OUT(x)                                    (((x) & 0x7FF) << 0)
 #define   G_028B38_MAX_VERT_OUT(x)                                    (((x) >> 0) & 0x7FF)
 #define   C_028B38_MAX_VERT_OUT                                       0xFFFFF800
+/* VI */
+#define R_028B50_VGT_TESS_DISTRIBUTION                                  0x028B50
+#define   S_028B50_ACCUM_ISOLINE(x)                                   (((x) & 0xFF) << 0)
+#define   G_028B50_ACCUM_ISOLINE(x)                                   (((x) >> 0) & 0xFF)
+#define   C_028B50_ACCUM_ISOLINE                                      0xFFFFFF00
+#define   S_028B50_ACCUM_TRI(x)                                       (((x) & 0xFF) << 8)
+#define   G_028B50_ACCUM_TRI(x)                                       (((x) >> 8) & 0xFF)
+#define   C_028B50_ACCUM_TRI                                          0xFFFF00FF
+#define   S_028B50_ACCUM_QUAD(x)                                      (((x) & 0xFF) << 16)
+#define   G_028B50_ACCUM_QUAD(x)                                      (((x) >> 16) & 0xFF)
+#define   C_028B50_ACCUM_QUAD                                         0xFF00FFFF
+#define   S_028B50_DONUT_SPLIT(x)                                     (((x) & 0xFF) << 24)
+#define   G_028B50_DONUT_SPLIT(x)                                     (((x) >> 24) & 0xFF)
+#define   C_028B50_DONUT_SPLIT                                        0x00FFFFFF
+/*    */
 #define R_028B54_VGT_SHADER_STAGES_EN                                   0x028B54
 #define   S_028B54_LS_EN(x)                                           (((x) & 0x03) << 0)
 #define   G_028B54_LS_EN(x)                                           (((x) >> 0) & 0x03)
@@ -7791,6 +9757,20 @@
 #define   S_028B54_DYNAMIC_HS(x)                                      (((x) & 0x1) << 8)
 #define   G_028B54_DYNAMIC_HS(x)                                      (((x) >> 8) & 0x1)
 #define   C_028B54_DYNAMIC_HS                                         0xFFFFFEFF
+/* VI */
+#define   S_028B54_DISPATCH_DRAW_EN(x)                                (((x) & 0x1) << 9)
+#define   G_028B54_DISPATCH_DRAW_EN(x)                                (((x) >> 9) & 0x1)
+#define   C_028B54_DISPATCH_DRAW_EN                                   0xFFFFFDFF
+#define   S_028B54_DIS_DEALLOC_ACCUM_0(x)                             (((x) & 0x1) << 10)
+#define   G_028B54_DIS_DEALLOC_ACCUM_0(x)                             (((x) >> 10) & 0x1)
+#define   C_028B54_DIS_DEALLOC_ACCUM_0                                0xFFFFFBFF
+#define   S_028B54_DIS_DEALLOC_ACCUM_1(x)                             (((x) & 0x1) << 11)
+#define   G_028B54_DIS_DEALLOC_ACCUM_1(x)                             (((x) >> 11) & 0x1)
+#define   C_028B54_DIS_DEALLOC_ACCUM_1                                0xFFFFF7FF
+#define   S_028B54_VS_WAVE_ID_EN(x)                                   (((x) & 0x1) << 12)
+#define   G_028B54_VS_WAVE_ID_EN(x)                                   (((x) >> 12) & 0x1)
+#define   C_028B54_VS_WAVE_ID_EN                                      0xFFFFEFFF
+/*    */
 #define R_028B58_VGT_LS_HS_CONFIG                                       0x028B58
 #define   S_028B58_NUM_PATCHES(x)                                     (((x) & 0xFF) << 0)
 #define   G_028B58_NUM_PATCHES(x)                                     (((x) >> 0) & 0xFF)
@@ -7841,6 +9821,9 @@
 #define   S_028B6C_RESERVED_REDUC_AXIS(x)                             (((x) & 0x1) << 8) /* not on CIK */
 #define   G_028B6C_RESERVED_REDUC_AXIS(x)                             (((x) >> 8) & 0x1) /* not on CIK */
 #define   C_028B6C_RESERVED_REDUC_AXIS                                0xFFFFFEFF /* not on CIK */
+#define   S_028B6C_DEPRECATED(x)                                      (((x) & 0x1) << 9)
+#define   G_028B6C_DEPRECATED(x)                                      (((x) >> 9) & 0x1)
+#define   C_028B6C_DEPRECATED                                         0xFFFFFDFF
 #define   S_028B6C_NUM_DS_WAVES_PER_SIMD(x)                           (((x) & 0x0F) << 10)
 #define   G_028B6C_NUM_DS_WAVES_PER_SIMD(x)                           (((x) >> 10) & 0x0F)
 #define   C_028B6C_NUM_DS_WAVES_PER_SIMD                              0xFFFFC3FF
@@ -7855,6 +9838,14 @@
 #define     V_028B6C_VGT_POLICY_STREAM                              0x01
 #define     V_028B6C_VGT_POLICY_BYPASS                              0x02
 /*     */
+/* VI */
+#define   S_028B6C_DISTRIBUTION_MODE(x)                               (((x) & 0x03) << 17)
+#define   G_028B6C_DISTRIBUTION_MODE(x)                               (((x) >> 17) & 0x03)
+#define   C_028B6C_DISTRIBUTION_MODE                                  0xFFF9FFFF
+#define   S_028B6C_MTYPE(x)                                           (((x) & 0x03) << 19)
+#define   G_028B6C_MTYPE(x)                                           (((x) >> 19) & 0x03)
+#define   C_028B6C_MTYPE                                              0xFFE7FFFF
+/*    */
 #define R_028B70_DB_ALPHA_TO_MASK                                       0x028B70
 #define   S_028B70_ALPHA_TO_MASK_ENABLE(x)                            (((x) & 0x1) << 0)
 #define   G_028B70_ALPHA_TO_MASK_ENABLE(x)                            (((x) >> 0) & 0x1)
@@ -7994,6 +9985,22 @@
 #define   S_028BDC_DX10_DIAMOND_TEST_ENA(x)                           (((x) & 0x1) << 12)
 #define   G_028BDC_DX10_DIAMOND_TEST_ENA(x)                           (((x) >> 12) & 0x1)
 #define   C_028BDC_DX10_DIAMOND_TEST_ENA                              0xFFFFEFFF
+#define R_028BE0_PA_SC_AA_CONFIG                                        0x028BE0
+#define   S_028BE0_MSAA_NUM_SAMPLES(x)                                (((x) & 0x7) << 0)
+#define   G_028BE0_MSAA_NUM_SAMPLES(x)                                (((x) >> 0) & 0x07)
+#define   C_028BE0_MSAA_NUM_SAMPLES                                   0xFFFFFFF8
+#define   S_028BE0_AA_MASK_CENTROID_DTMN(x)                           (((x) & 0x1) << 4)
+#define   G_028BE0_AA_MASK_CENTROID_DTMN(x)                           (((x) >> 4) & 0x1)
+#define   C_028BE0_AA_MASK_CENTROID_DTMN                              0xFFFFFFEF
+#define   S_028BE0_MAX_SAMPLE_DIST(x)                                 (((x) & 0xf) << 13)
+#define   G_028BE0_MAX_SAMPLE_DIST(x)                                 (((x) >> 13) & 0x0F)
+#define   C_028BE0_MAX_SAMPLE_DIST                                    0xFFFE1FFF
+#define   S_028BE0_MSAA_EXPOSED_SAMPLES(x)                            (((x) & 0x7) << 20)
+#define   G_028BE0_MSAA_EXPOSED_SAMPLES(x)                            (((x) >> 20) & 0x07)
+#define   C_028BE0_MSAA_EXPOSED_SAMPLES                               0xFF8FFFFF
+#define   S_028BE0_DETAIL_TO_EXPOSED_MODE(x)                          (((x) & 0x3) << 24)
+#define   G_028BE0_DETAIL_TO_EXPOSED_MODE(x)                          (((x) >> 24) & 0x03)
+#define   C_028BE0_DETAIL_TO_EXPOSED_MODE                             0xFCFFFFFF
 #define R_028BE4_PA_SU_VTX_CNTL                                         0x028BE4
 #define   S_028BE4_PIX_CENTER(x)                                      (((x) & 0x1) << 0)
 #define   G_028BE4_PIX_CENTER(x)                                      (((x) >> 0) & 0x1)
@@ -8562,6 +10569,17 @@
 #define   G_028C70_FMASK_COMPRESSION_DISABLE(x)                       (((x) >> 26) & 0x1)
 #define   C_028C70_FMASK_COMPRESSION_DISABLE                          0xFBFFFFFF
 /*     */
+/* VI */
+#define   S_028C70_FMASK_COMPRESS_1FRAG_ONLY(x)                       (((x) & 0x1) << 27)
+#define   G_028C70_FMASK_COMPRESS_1FRAG_ONLY(x)                       (((x) >> 27) & 0x1)
+#define   C_028C70_FMASK_COMPRESS_1FRAG_ONLY                          0xF7FFFFFF
+#define   S_028C70_DCC_ENABLE(x)                                      (((x) & 0x1) << 28)
+#define   G_028C70_DCC_ENABLE(x)                                      (((x) >> 28) & 0x1)
+#define   C_028C70_DCC_ENABLE                                         0xEFFFFFFF
+#define   S_028C70_CMASK_ADDR_TYPE(x)                                 (((x) & 0x03) << 29)
+#define   G_028C70_CMASK_ADDR_TYPE(x)                                 (((x) >> 29) & 0x03)
+#define   C_028C70_CMASK_ADDR_TYPE                                    0x9FFFFFFF
+/*    */
 #define R_028C74_CB_COLOR0_ATTRIB                                       0x028C74
 #define   S_028C74_TILE_MODE_INDEX(x)                                 (((x) & 0x1F) << 0)
 #define   G_028C74_TILE_MODE_INDEX(x)                                 (((x) >> 0) & 0x1F)
@@ -8569,7 +10587,9 @@
 #define   S_028C74_FMASK_TILE_MODE_INDEX(x)                           (((x) & 0x1F) << 5)
 #define   G_028C74_FMASK_TILE_MODE_INDEX(x)                           (((x) >> 5) & 0x1F)
 #define   C_028C74_FMASK_TILE_MODE_INDEX                              0xFFFFFC1F
-#define   S_028C74_FMASK_BANK_HEIGHT(x)				      (((x) & 0x3) << 10) /* SI errata */
+#define   S_028C74_FMASK_BANK_HEIGHT(x)                               (((x) & 0x03) << 10)
+#define   G_028C74_FMASK_BANK_HEIGHT(x)                               (((x) >> 10) & 0x03)
+#define   C_028C74_FMASK_BANK_HEIGHT                                  0xFFFFF3FF
 #define   S_028C74_NUM_SAMPLES(x)                                     (((x) & 0x07) << 12)
 #define   G_028C74_NUM_SAMPLES(x)                                     (((x) >> 12) & 0x07)
 #define   C_028C74_NUM_SAMPLES                                        0xFFFF8FFF
@@ -8579,6 +10599,36 @@
 #define   S_028C74_FORCE_DST_ALPHA_1(x)                               (((x) & 0x1) << 17)
 #define   G_028C74_FORCE_DST_ALPHA_1(x)                               (((x) >> 17) & 0x1)
 #define   C_028C74_FORCE_DST_ALPHA_1                                  0xFFFDFFFF
+/* VI */
+#define R_028C78_CB_COLOR0_DCC_CONTROL                                  0x028C78
+#define   S_028C78_OVERWRITE_COMBINER_DISABLE(x)                      (((x) & 0x1) << 0)
+#define   G_028C78_OVERWRITE_COMBINER_DISABLE(x)                      (((x) >> 0) & 0x1)
+#define   C_028C78_OVERWRITE_COMBINER_DISABLE                         0xFFFFFFFE
+#define   S_028C78_KEY_CLEAR_ENABLE(x)                                (((x) & 0x1) << 1)
+#define   G_028C78_KEY_CLEAR_ENABLE(x)                                (((x) >> 1) & 0x1)
+#define   C_028C78_KEY_CLEAR_ENABLE                                   0xFFFFFFFD
+#define   S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(x)                     (((x) & 0x03) << 2)
+#define   G_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(x)                     (((x) >> 2) & 0x03)
+#define   C_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE                        0xFFFFFFF3
+#define   S_028C78_MIN_COMPRESSED_BLOCK_SIZE(x)                       (((x) & 0x1) << 4)
+#define   G_028C78_MIN_COMPRESSED_BLOCK_SIZE(x)                       (((x) >> 4) & 0x1)
+#define   C_028C78_MIN_COMPRESSED_BLOCK_SIZE                          0xFFFFFFEF
+#define   S_028C78_MAX_COMPRESSED_BLOCK_SIZE(x)                       (((x) & 0x03) << 5)
+#define   G_028C78_MAX_COMPRESSED_BLOCK_SIZE(x)                       (((x) >> 5) & 0x03)
+#define   C_028C78_MAX_COMPRESSED_BLOCK_SIZE                          0xFFFFFF9F
+#define   S_028C78_COLOR_TRANSFORM(x)                                 (((x) & 0x03) << 7)
+#define   G_028C78_COLOR_TRANSFORM(x)                                 (((x) >> 7) & 0x03)
+#define   C_028C78_COLOR_TRANSFORM                                    0xFFFFFE7F
+#define   S_028C78_INDEPENDENT_64B_BLOCKS(x)                          (((x) & 0x1) << 9)
+#define   G_028C78_INDEPENDENT_64B_BLOCKS(x)                          (((x) >> 9) & 0x1)
+#define   C_028C78_INDEPENDENT_64B_BLOCKS                             0xFFFFFDFF
+#define   S_028C78_LOSSY_RGB_PRECISION(x)                             (((x) & 0x0F) << 10)
+#define   G_028C78_LOSSY_RGB_PRECISION(x)                             (((x) >> 10) & 0x0F)
+#define   C_028C78_LOSSY_RGB_PRECISION                                0xFFFFC3FF
+#define   S_028C78_LOSSY_ALPHA_PRECISION(x)                           (((x) & 0x0F) << 14)
+#define   G_028C78_LOSSY_ALPHA_PRECISION(x)                           (((x) >> 14) & 0x0F)
+#define   C_028C78_LOSSY_ALPHA_PRECISION                              0xFFFC3FFF
+/*    */
 #define R_028C7C_CB_COLOR0_CMASK                                        0x028C7C
 #define R_028C80_CB_COLOR0_CMASK_SLICE                                  0x028C80
 #define   S_028C80_TILE_MAX(x)                                        (((x) & 0x3FFF) << 0)
@@ -8591,90 +10641,105 @@
 #define   C_028C88_TILE_MAX                                           0xFFC00000
 #define R_028C8C_CB_COLOR0_CLEAR_WORD0                                  0x028C8C
 #define R_028C90_CB_COLOR0_CLEAR_WORD1                                  0x028C90
+#define R_028C94_CB_COLOR0_DCC_BASE                                     0x028C94 /* VI */
 #define R_028C9C_CB_COLOR1_BASE                                         0x028C9C
 #define R_028CA0_CB_COLOR1_PITCH                                        0x028CA0
 #define R_028CA4_CB_COLOR1_SLICE                                        0x028CA4
 #define R_028CA8_CB_COLOR1_VIEW                                         0x028CA8
 #define R_028CAC_CB_COLOR1_INFO                                         0x028CAC
 #define R_028CB0_CB_COLOR1_ATTRIB                                       0x028CB0
-#define R_028CD4_CB_COLOR1_CMASK                                        0x028CB8
+#define R_028CB4_CB_COLOR1_DCC_CONTROL                                  0x028CB4 /* VI */
+#define R_028CB8_CB_COLOR1_CMASK                                        0x028CB8
 #define R_028CBC_CB_COLOR1_CMASK_SLICE                                  0x028CBC
 #define R_028CC0_CB_COLOR1_FMASK                                        0x028CC0
 #define R_028CC4_CB_COLOR1_FMASK_SLICE                                  0x028CC4
 #define R_028CC8_CB_COLOR1_CLEAR_WORD0                                  0x028CC8
 #define R_028CCC_CB_COLOR1_CLEAR_WORD1                                  0x028CCC
+#define R_028CD0_CB_COLOR1_DCC_BASE                                     0x028CD0 /* VI */
 #define R_028CD8_CB_COLOR2_BASE                                         0x028CD8
 #define R_028CDC_CB_COLOR2_PITCH                                        0x028CDC
 #define R_028CE0_CB_COLOR2_SLICE                                        0x028CE0
 #define R_028CE4_CB_COLOR2_VIEW                                         0x028CE4
 #define R_028CE8_CB_COLOR2_INFO                                         0x028CE8
 #define R_028CEC_CB_COLOR2_ATTRIB                                       0x028CEC
+#define R_028CF0_CB_COLOR2_DCC_CONTROL                                  0x028CF0 /* VI */
 #define R_028CF4_CB_COLOR2_CMASK                                        0x028CF4
 #define R_028CF8_CB_COLOR2_CMASK_SLICE                                  0x028CF8
 #define R_028CFC_CB_COLOR2_FMASK                                        0x028CFC
 #define R_028D00_CB_COLOR2_FMASK_SLICE                                  0x028D00
 #define R_028D04_CB_COLOR2_CLEAR_WORD0                                  0x028D04
 #define R_028D08_CB_COLOR2_CLEAR_WORD1                                  0x028D08
+#define R_028D0C_CB_COLOR2_DCC_BASE                                     0x028D0C /* VI */
 #define R_028D14_CB_COLOR3_BASE                                         0x028D14
 #define R_028D18_CB_COLOR3_PITCH                                        0x028D18
 #define R_028D1C_CB_COLOR3_SLICE                                        0x028D1C
 #define R_028D20_CB_COLOR3_VIEW                                         0x028D20
 #define R_028D24_CB_COLOR3_INFO                                         0x028D24
 #define R_028D28_CB_COLOR3_ATTRIB                                       0x028D28
+#define R_028D2C_CB_COLOR3_DCC_CONTROL                                  0x028D2C /* VI */
 #define R_028D30_CB_COLOR3_CMASK                                        0x028D30
 #define R_028D34_CB_COLOR3_CMASK_SLICE                                  0x028D34
 #define R_028D38_CB_COLOR3_FMASK                                        0x028D38
 #define R_028D3C_CB_COLOR3_FMASK_SLICE                                  0x028D3C
 #define R_028D40_CB_COLOR3_CLEAR_WORD0                                  0x028D40
 #define R_028D44_CB_COLOR3_CLEAR_WORD1                                  0x028D44
+#define R_028D48_CB_COLOR3_DCC_BASE                                     0x028D48 /* VI */
 #define R_028D50_CB_COLOR4_BASE                                         0x028D50
 #define R_028D54_CB_COLOR4_PITCH                                        0x028D54
 #define R_028D58_CB_COLOR4_SLICE                                        0x028D58
 #define R_028D5C_CB_COLOR4_VIEW                                         0x028D5C
 #define R_028D60_CB_COLOR4_INFO                                         0x028D60
 #define R_028D64_CB_COLOR4_ATTRIB                                       0x028D64
+#define R_028D68_CB_COLOR4_DCC_CONTROL                                  0x028D68 /* VI */
 #define R_028D6C_CB_COLOR4_CMASK                                        0x028D6C
 #define R_028D70_CB_COLOR4_CMASK_SLICE                                  0x028D70
 #define R_028D74_CB_COLOR4_FMASK                                        0x028D74
 #define R_028D78_CB_COLOR4_FMASK_SLICE                                  0x028D78
 #define R_028D7C_CB_COLOR4_CLEAR_WORD0                                  0x028D7C
 #define R_028D80_CB_COLOR4_CLEAR_WORD1                                  0x028D80
+#define R_028D84_CB_COLOR4_DCC_BASE                                     0x028D84 /* VI */
 #define R_028D8C_CB_COLOR5_BASE                                         0x028D8C
 #define R_028D90_CB_COLOR5_PITCH                                        0x028D90
 #define R_028D94_CB_COLOR5_SLICE                                        0x028D94
 #define R_028D98_CB_COLOR5_VIEW                                         0x028D98
 #define R_028D9C_CB_COLOR5_INFO                                         0x028D9C
 #define R_028DA0_CB_COLOR5_ATTRIB                                       0x028DA0
+#define R_028DA4_CB_COLOR5_DCC_CONTROL                                  0x028DA4 /* VI */
 #define R_028DA8_CB_COLOR5_CMASK                                        0x028DA8
 #define R_028DAC_CB_COLOR5_CMASK_SLICE                                  0x028DAC
 #define R_028DB0_CB_COLOR5_FMASK                                        0x028DB0
 #define R_028DB4_CB_COLOR5_FMASK_SLICE                                  0x028DB4
 #define R_028DB8_CB_COLOR5_CLEAR_WORD0                                  0x028DB8
 #define R_028DBC_CB_COLOR5_CLEAR_WORD1                                  0x028DBC
+#define R_028DC0_CB_COLOR5_DCC_BASE                                     0x028DC0 /* VI */
 #define R_028DC8_CB_COLOR6_BASE                                         0x028DC8
 #define R_028DCC_CB_COLOR6_PITCH                                        0x028DCC
 #define R_028DD0_CB_COLOR6_SLICE                                        0x028DD0
 #define R_028DD4_CB_COLOR6_VIEW                                         0x028DD4
 #define R_028DD8_CB_COLOR6_INFO                                         0x028DD8
 #define R_028DDC_CB_COLOR6_ATTRIB                                       0x028DDC
+#define R_028DE0_CB_COLOR6_DCC_CONTROL                                  0x028DE0 /* VI */
 #define R_028DE4_CB_COLOR6_CMASK                                        0x028DE4
 #define R_028DE8_CB_COLOR6_CMASK_SLICE                                  0x028DE8
 #define R_028DEC_CB_COLOR6_FMASK                                        0x028DEC
 #define R_028DF0_CB_COLOR6_FMASK_SLICE                                  0x028DF0
 #define R_028DF4_CB_COLOR6_CLEAR_WORD0                                  0x028DF4
 #define R_028DF8_CB_COLOR6_CLEAR_WORD1                                  0x028DF8
+#define R_028DFC_CB_COLOR6_DCC_BASE                                     0x028DFC /* VI */
 #define R_028E04_CB_COLOR7_BASE                                         0x028E04
 #define R_028E08_CB_COLOR7_PITCH                                        0x028E08
 #define R_028E0C_CB_COLOR7_SLICE                                        0x028E0C
 #define R_028E10_CB_COLOR7_VIEW                                         0x028E10
 #define R_028E14_CB_COLOR7_INFO                                         0x028E14
 #define R_028E18_CB_COLOR7_ATTRIB                                       0x028E18
+#define R_028E1C_CB_COLOR7_DCC_CONTROL                                  0x028E1C /* VI */
 #define R_028E20_CB_COLOR7_CMASK                                        0x028E20
 #define R_028E24_CB_COLOR7_CMASK_SLICE                                  0x028E24
 #define R_028E28_CB_COLOR7_FMASK                                        0x028E28
 #define R_028E2C_CB_COLOR7_FMASK_SLICE                                  0x028E2C
 #define R_028E30_CB_COLOR7_CLEAR_WORD0                                  0x028E30
 #define R_028E34_CB_COLOR7_CLEAR_WORD1                                  0x028E34
+#define R_028E38_CB_COLOR7_DCC_BASE                                     0x028E38 /* VI */
 
 /* SI async DMA packets */
 #define SI_DMA_PACKET(cmd, sub_cmd, n) ((((cmd) & 0xF) << 28) |    \
@@ -8696,5 +10761,29 @@
 #define    SI_DMA_PACKET_CONSTANT_FILL             0xd
 #define    SI_DMA_PACKET_NOP                       0xf
 
+/* CIK async DMA packets */
+#define CIK_SDMA_PACKET(op, sub_op, n)   ((((n) & 0xFFFF) << 16) |	\
+					 (((sub_op) & 0xFF) << 8) |	\
+					 (((op) & 0xFF) << 0))
+/* CIK async DMA packet types */
+#define    CIK_SDMA_OPCODE_NOP                     0x0
+#define    CIK_SDMA_OPCODE_COPY                    0x1
+#define        CIK_SDMA_COPY_SUB_OPCODE_LINEAR            0x0
+#define        CIK_SDMA_COPY_SUB_OPCODE_TILED             0x1
+#define        CIK_SDMA_COPY_SUB_OPCODE_SOA               0x3
+#define        CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4
+#define        CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW  0x5
+#define        CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW    0x6
+#define    CIK_SDMA_OPCODE_WRITE                   0x2
+#define        SDMA_WRITE_SUB_OPCODE_LINEAR               0x0
+#define        SDMA_WRTIE_SUB_OPCODE_TILED                0x1
+#define    CIK_SDMA_OPCODE_INDIRECT_BUFFER         0x4
+#define    CIK_SDMA_PACKET_FENCE                   0x5
+#define    CIK_SDMA_PACKET_TRAP                    0x6
+#define    CIK_SDMA_PACKET_SEMAPHORE               0x7
+#define    CIK_SDMA_PACKET_CONSTANT_FILL           0xb
+#define    CIK_SDMA_PACKET_SRBM_WRITE              0xe
+#define    CIK_SDMA_COPY_MAX_SIZE                  0x1fffff
+
 #endif /* _SID_H */
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_dma.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_dma.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_dma.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_dma.c	2015-09-16 14:36:09.000000000 +0000
@@ -30,21 +30,6 @@
 
 #include "util/u_format.h"
 
-static unsigned si_array_mode(unsigned mode)
-{
-	switch (mode) {
-	case RADEON_SURF_MODE_LINEAR_ALIGNED:
-		return V_009910_ARRAY_LINEAR_ALIGNED;
-	case RADEON_SURF_MODE_1D:
-		return V_009910_ARRAY_1D_TILED_THIN1;
-	case RADEON_SURF_MODE_2D:
-		return V_009910_ARRAY_2D_TILED_THIN1;
-	default:
-	case RADEON_SURF_MODE_LINEAR:
-		return V_009910_ARRAY_LINEAR_GENERAL;
-	}
-}
-
 static uint32_t si_micro_tile_mode(struct si_screen *sscreen, unsigned tile_mode)
 {
 	if (sscreen->b.info.si_tile_mode_array_valid) {
@@ -240,11 +225,6 @@
 		goto fallback;
 	}
 
-	/* TODO: Implement DMA copy for CIK */
-	if (sctx->b.chip_class >= CIK) {
-		goto fallback;
-	}
-
 	if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
 		si_dma_copy_buffer(sctx, dst, src, dst_x, src_box->x, src_box->width);
 		return;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_hw_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_hw_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_hw_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_hw_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -30,10 +30,33 @@
 void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
 			boolean count_draw_in)
 {
+	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
 	int i;
 
+	/* There are two memory usage counters in the winsys for all buffers
+	 * that have been added (cs_add_reloc) and two counters in the pipe
+	 * driver for those that haven't been added yet.
+	 * */
+	if (!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs, ctx->b.vram, ctx->b.gtt)) {
+		ctx->b.gtt = 0;
+		ctx->b.vram = 0;
+		ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+		return;
+	}
+	ctx->b.gtt = 0;
+	ctx->b.vram = 0;
+
+	/* If the CS is sufficiently large, don't count the space needed
+	 * and just flush if there is less than 8096 dwords left.
+	 */
+	if (cs->max_dw >= 24 * 1024) {
+		if (cs->cdw > cs->max_dw - 8 * 1024)
+			ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+		return;
+	}
+
 	/* The number of dwords we already used in the CS so far. */
-	num_dw += ctx->b.rings.gfx.cs->cdw;
+	num_dw += cs->cdw;
 
 	if (count_draw_in) {
 		for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
@@ -50,7 +73,8 @@
 	}
 
 	/* Count in queries_suspend. */
-	num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend;
+	num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend +
+		  ctx->b.num_cs_dw_timer_queries_suspend;
 
 	/* Count in streamout_end at the end of CS. */
 	if (ctx->b.streamout.begin_emitted) {
@@ -72,7 +96,7 @@
 #endif
 
 	/* Flush if there's not enough space. */
-	if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
+	if (num_dw > cs->max_dw) {
 		ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 	}
 }
@@ -82,9 +106,16 @@
 {
 	struct si_context *ctx = context;
 	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+	struct radeon_winsys *ws = ctx->b.ws;
 
-	if (cs->cdw == ctx->b.initial_gfx_cs_size && !fence)
+	if (cs->cdw == ctx->b.initial_gfx_cs_size &&
+	    (!fence || ctx->last_gfx_fence)) {
+		if (fence)
+			ws->fence_reference(fence, ctx->last_gfx_fence);
+		if (!(flags & RADEON_FLUSH_ASYNC))
+			ws->cs_sync_flush(cs);
 		return;
+	}
 
 	ctx->b.rings.gfx.flushing = true;
 
@@ -101,9 +132,13 @@
 	flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
 
 	/* Flush the CS. */
-	ctx->b.ws->cs_flush(cs, flags, fence, ctx->screen->b.cs_count++);
+	ws->cs_flush(cs, flags, &ctx->last_gfx_fence,
+		     ctx->screen->b.cs_count++);
 	ctx->b.rings.gfx.flushing = false;
 
+	if (fence)
+		ws->fence_reference(fence, ctx->last_gfx_fence);
+
 #if SI_TRACE_CS
 	if (ctx->screen->b.trace_bo) {
 		struct si_screen *sscreen = ctx->screen;
@@ -111,7 +146,7 @@
 
 		for (i = 0; i < 10; i++) {
 			usleep(5);
-			if (!ctx->b.ws->buffer_is_busy(sscreen->b.trace_bo->buf, RADEON_USAGE_READWRITE)) {
+			if (!ws->buffer_is_busy(sscreen->b.trace_bo->buf, RADEON_USAGE_READWRITE)) {
 				break;
 			}
 		}
@@ -130,7 +165,8 @@
 void si_begin_new_cs(struct si_context *ctx)
 {
 	/* Flush read caches at the beginning of CS. */
-	ctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
+	ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
+			SI_CONTEXT_INV_TC_L1 |
 			SI_CONTEXT_INV_TC_L2 |
 			SI_CONTEXT_INV_KCACHE |
 			SI_CONTEXT_INV_ICACHE;
@@ -143,24 +179,32 @@
 	/* The CS initialization should be emitted before everything else. */
 	si_pm4_emit(ctx, ctx->init_config);
 
-	ctx->clip_regs.dirty = true;
-	ctx->framebuffer.atom.dirty = true;
-	ctx->msaa_sample_locs.dirty = true;
-	ctx->msaa_config.dirty = true;
-	ctx->db_render_state.dirty = true;
-	ctx->b.streamout.enable_atom.dirty = true;
+	si_mark_atom_dirty(ctx, &ctx->clip_regs);
+	si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+	si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
+	si_mark_atom_dirty(ctx, &ctx->msaa_config);
+	si_mark_atom_dirty(ctx, &ctx->db_render_state);
+	si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
 	si_all_descriptors_begin_new_cs(ctx);
 
 	r600_postflush_resume_features(&ctx->b);
 
 	ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw;
+
+	/* Invalidate various draw states so that they are emitted before
+	 * the first draw call. */
 	si_invalidate_draw_sh_constants(ctx);
 	ctx->last_primitive_restart_en = -1;
 	ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
 	ctx->last_gs_out_prim = -1;
 	ctx->last_prim = -1;
 	ctx->last_multi_vgt_param = -1;
+	ctx->last_ls_hs_config = -1;
 	ctx->last_rast_prim = -1;
 	ctx->last_sc_line_stipple = ~0;
 	ctx->emit_scratch_reloc = true;
+	ctx->last_ls = NULL;
+	ctx->last_tcs = NULL;
+	ctx->last_tes_sh_base = -1;
+	ctx->last_num_tcs_input_cp = -1;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_pipe.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_pipe.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_pipe.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_pipe.c	2015-09-16 14:36:09.000000000 +0000
@@ -36,32 +36,42 @@
 static void si_destroy_context(struct pipe_context *context)
 {
 	struct si_context *sctx = (struct si_context *)context;
+	int i;
 
 	si_release_all_descriptors(sctx);
 
 	pipe_resource_reference(&sctx->esgs_ring, NULL);
 	pipe_resource_reference(&sctx->gsvs_ring, NULL);
+	pipe_resource_reference(&sctx->tf_ring, NULL);
 	pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
 	r600_resource_reference(&sctx->border_color_table, NULL);
 	r600_resource_reference(&sctx->scratch_buffer, NULL);
+	sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);
 
 	si_pm4_free_state(sctx, sctx->init_config, ~0);
 	si_pm4_delete_state(sctx, gs_rings, sctx->gs_rings);
-	si_pm4_delete_state(sctx, gs_onoff, sctx->gs_on);
-	si_pm4_delete_state(sctx, gs_onoff, sctx->gs_off);
+	si_pm4_delete_state(sctx, tf_ring, sctx->tf_state);
+	for (i = 0; i < Elements(sctx->vgt_shader_config); i++)
+		si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
 
 	if (sctx->pstipple_sampler_state)
 		sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state);
-	if (sctx->dummy_pixel_shader) {
+	if (sctx->dummy_pixel_shader)
 		sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader);
-	}
-	sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush);
-	sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_resolve);
-	sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_decompress);
-	sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_fastclear);
+	if (sctx->fixed_func_tcs_shader)
+		sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader);
+	if (sctx->custom_dsa_flush)
+		sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush);
+	if (sctx->custom_blend_resolve)
+		sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_resolve);
+	if (sctx->custom_blend_decompress)
+		sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_decompress);
+	if (sctx->custom_blend_fastclear)
+		sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_fastclear);
 	util_unreference_framebuffer_state(&sctx->framebuffer.state);
 
-	util_blitter_destroy(sctx->blitter);
+	if (sctx->blitter)
+		util_blitter_destroy(sctx->blitter);
 
 	si_pm4_cleanup(sctx);
 
@@ -74,6 +84,14 @@
 	FREE(sctx);
 }
 
+static enum pipe_reset_status
+si_amdgpu_get_reset_status(struct pipe_context *ctx)
+{
+	struct si_context *sctx = (struct si_context *)ctx;
+
+	return sctx->b.ws->ctx_query_reset_status(sctx->b.ctx);
+}
+
 static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv)
 {
 	struct si_context *sctx = CALLOC_STRUCT(si_context);
@@ -91,13 +109,18 @@
 	sctx->b.b.screen = screen; /* this must be set first */
 	sctx->b.b.priv = priv;
 	sctx->b.b.destroy = si_destroy_context;
+	sctx->b.set_atom_dirty = (void *)si_set_atom_dirty;
 	sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
 
 	if (!r600_common_context_init(&sctx->b, &sscreen->b))
 		goto fail;
 
+	if (sscreen->b.info.drm_major == 3)
+		sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status;
+
 	si_init_blit_functions(sctx);
 	si_init_compute_functions(sctx);
+	si_init_cp_dma_functions(sctx);
 
 	if (sscreen->b.info.has_uvd) {
 		sctx->b.b.create_video_codec = si_uvd_create_decoder;
@@ -107,7 +130,7 @@
 		sctx->b.b.create_video_buffer = vl_video_buffer_create;
 	}
 
-	sctx->b.rings.gfx.cs = ws->cs_create(ws, RING_GFX, si_context_gfx_flush,
+	sctx->b.rings.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush,
 					     sctx, sscreen->b.trace_bo ?
 						sscreen->b.trace_bo->cs_buf : NULL);
 	sctx->b.rings.gfx.flush = si_context_gfx_flush;
@@ -127,17 +150,8 @@
 	sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom;
 	sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom;
 
-	switch (sctx->b.chip_class) {
-	case SI:
-	case CIK:
-		si_init_state_functions(sctx);
-		si_init_shader_functions(sctx);
-		si_init_config(sctx);
-		break;
-	default:
-		R600_ERR("Unsupported chip class %d.\n", sctx->b.chip_class);
-		goto fail;
-	}
+	si_init_state_functions(sctx);
+	si_init_shader_functions(sctx);
 
 	if (sscreen->b.debug_flags & DBG_FORCE_DMA)
 		sctx->b.b.resource_copy_region = sctx->b.dma_copy;
@@ -251,15 +265,28 @@
 	case PIPE_CAP_POLYGON_OFFSET_CLAMP:
 	case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
 	case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+	case PIPE_CAP_TGSI_TEXCOORD:
+	case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+	case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+	case PIPE_CAP_DEPTH_BOUNDS_TEST:
 		return 1;
 
 	case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
 		return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
 
+	case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+		return (sscreen->b.info.drm_major == 2 &&
+			sscreen->b.info.drm_minor >= 43) ||
+		       sscreen->b.info.drm_major == 3;
+
 	case PIPE_CAP_TEXTURE_MULTISAMPLE:
 		/* 2D tiling on CIK is supported since DRM 2.35.0 */
 		return sscreen->b.chip_class < CIK ||
-		       sscreen->b.info.drm_minor >= 35;
+		       (sscreen->b.info.drm_major == 2 &&
+			sscreen->b.info.drm_minor >= 35) ||
+		       sscreen->b.info.drm_major == 3;
 
         case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
                 return R600_MAP_BUFFER_ALIGNMENT;
@@ -269,7 +296,7 @@
 		return 4;
 
 	case PIPE_CAP_GLSL_FEATURE_LEVEL:
-		return 330;
+		return HAVE_LLVM >= 0x0307 ? 410 : 330;
 
 	case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
 		return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
@@ -286,16 +313,15 @@
 	case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
 	case PIPE_CAP_VERTEX_COLOR_CLAMPED:
 	case PIPE_CAP_USER_VERTEX_BUFFERS:
-	case PIPE_CAP_TGSI_TEXCOORD:
 	case PIPE_CAP_FAKE_SW_MSAA:
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
-	case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
-	case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
 	case PIPE_CAP_SAMPLER_VIEW_TARGET:
 	case PIPE_CAP_VERTEXID_NOBASE:
-	case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
 		return 0;
 
+	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+		return 30;
+
 	case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
 		return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600;
 
@@ -314,7 +340,7 @@
 	case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
 		return 4095;
 	case PIPE_CAP_MAX_VERTEX_STREAMS:
-		return 1;
+		return 4;
 
 	case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
 		return 2048;
@@ -335,7 +361,7 @@
 		return 8;
 
 	case PIPE_CAP_MAX_VIEWPORTS:
-		return 1;
+		return 16;
 
 	/* Timer queries, present when the clock frequency is non zero. */
 	case PIPE_CAP_QUERY_TIMESTAMP:
@@ -375,6 +401,13 @@
 	case PIPE_SHADER_VERTEX:
 	case PIPE_SHADER_GEOMETRY:
 		break;
+	case PIPE_SHADER_TESS_CTRL:
+	case PIPE_SHADER_TESS_EVAL:
+		/* LLVM 3.6.2 is required for tessellation because of bug fixes there */
+		if (HAVE_LLVM < 0x0306 ||
+		    (HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 2))
+			return 0;
+		break;
 	case PIPE_SHADER_COMPUTE:
 		switch (param) {
 		case PIPE_SHADER_CAP_PREFERRED_IR:
@@ -401,7 +434,6 @@
 		}
 		break;
 	default:
-		/* TODO: support tessellation */
 		return 0;
 	}
 
@@ -433,7 +465,7 @@
 		/* Indirection of geometry shader input dimension is not
 		 * handled yet
 		 */
-		return shader < PIPE_SHADER_GEOMETRY;
+		return shader != PIPE_SHADER_GEOMETRY;
 	case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
 	case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
 	case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
@@ -448,10 +480,12 @@
 	case PIPE_SHADER_CAP_PREFERRED_IR:
 		return PIPE_SHADER_IR_TGSI;
 	case PIPE_SHADER_CAP_DOUBLES:
+		return HAVE_LLVM >= 0x0307;
 	case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
 	case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
 		return 0;
 	case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+	case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
 		return 1;
 	}
 	return 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_pipe.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_pipe.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_pipe.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_pipe.h	2015-09-16 14:36:09.000000000 +0000
@@ -48,7 +48,8 @@
 
 #define SI_MAX_DRAW_CS_DWORDS \
 	(/*scratch:*/ 3 + /*derived prim state:*/ 3 + \
-	 /*draw regs:*/ 16 + /*draw packets:*/ 31)
+	 /*draw regs:*/ 18 + /*draw packets:*/ 31 +\
+	 /*derived tess state:*/ 19)
 
 /* Instruction cache. */
 #define SI_CONTEXT_INV_ICACHE		(R600_CONTEXT_PRIVATE_FLAG << 0)
@@ -135,6 +136,8 @@
 	void				*pstipple_sampler_state;
 	struct si_screen		*screen;
 	struct si_pm4_state		*init_config;
+	struct pipe_fence_handle	*last_gfx_fence;
+	struct si_shader_selector	*fixed_func_tcs_shader;
 
 	union {
 		struct {
@@ -160,6 +163,8 @@
 	struct si_shader_selector	*ps_shader;
 	struct si_shader_selector	*gs_shader;
 	struct si_shader_selector	*vs_shader;
+	struct si_shader_selector	*tcs_shader;
+	struct si_shader_selector	*tes_shader;
 	struct si_cs_shader_state	cs_shader_state;
 	struct si_shader_data		shader_userdata;
 	/* shader information */
@@ -187,13 +192,16 @@
 	/* With rasterizer discard, there doesn't have to be a pixel shader.
 	 * In that case, we bind this one: */
 	void			*dummy_pixel_shader;
-	struct si_pm4_state	*gs_on;
-	struct si_pm4_state	*gs_off;
-	struct si_pm4_state	*gs_rings;
 	struct r600_atom	cache_flush;
 	struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
+
+	/* VGT states. */
+	struct si_pm4_state	*vgt_shader_config[4];
+	struct si_pm4_state	*gs_rings;
 	struct pipe_resource	*esgs_ring;
 	struct pipe_resource	*gsvs_ring;
+	struct si_pm4_state	*tf_state;
+	struct pipe_resource	*tf_ring;
 
 	LLVMTargetMachineRef		tm;
 
@@ -211,7 +219,7 @@
 	bool			db_depth_disable_expclear;
 	unsigned		ps_db_shader_control;
 
-	/* Draw state. */
+	/* Emitted draw state. */
 	int			last_base_vertex;
 	int			last_start_instance;
 	int			last_sh_base_reg;
@@ -220,6 +228,7 @@
 	int			last_gs_out_prim;
 	int			last_prim;
 	int			last_multi_vgt_param;
+	int			last_ls_hs_config;
 	int			last_rast_prim;
 	unsigned		last_sc_line_stipple;
 	int			current_rast_prim; /* primitive type after TES, GS */
@@ -228,8 +237,23 @@
 	boolean                 emit_scratch_reloc;
 	unsigned		scratch_waves;
 	unsigned		spi_tmpring_size;
+
+	/* Emitted derived tessellation state. */
+	struct si_shader	*last_ls; /* local shader (VS) */
+	struct si_shader_selector *last_tcs;
+	int			last_num_tcs_input_cp;
+	int			last_tes_sh_base;
 };
 
+/* cik_sdma.c */
+void cik_sdma_copy(struct pipe_context *ctx,
+		   struct pipe_resource *dst,
+		   unsigned dst_level,
+		   unsigned dstx, unsigned dsty, unsigned dstz,
+		   struct pipe_resource *src,
+		   unsigned src_level,
+		   const struct pipe_box *src_box);
+
 /* si_blit.c */
 void si_init_blit_functions(struct si_context *sctx);
 void si_flush_depth_textures(struct si_context *sctx,
@@ -244,6 +268,13 @@
 			     unsigned src_level,
 			     const struct pipe_box *src_box);
 
+/* si_cp_dma.c */
+void si_copy_buffer(struct si_context *sctx,
+		    struct pipe_resource *dst, struct pipe_resource *src,
+		    uint64_t dst_offset, uint64_t src_offset, unsigned size,
+		    bool is_framebuffer);
+void si_init_cp_dma_functions(struct si_context *sctx);
+
 /* si_dma.c */
 void si_dma_copy(struct pipe_context *ctx,
 		 struct pipe_resource *dst,
@@ -277,7 +308,7 @@
  * common helpers
  */
 
-static INLINE struct r600_resource *
+static inline struct r600_resource *
 si_resource_create_custom(struct pipe_screen *screen,
 			  unsigned usage, unsigned size)
 {
@@ -286,7 +317,7 @@
 		PIPE_BIND_CUSTOM, usage, size));
 }
 
-static INLINE void
+static inline void
 si_invalidate_draw_sh_constants(struct si_context *sctx)
 {
 	sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN;
@@ -294,4 +325,18 @@
 	sctx->last_sh_base_reg = -1; /* reset to an unknown value */
 }
 
+static inline void
+si_set_atom_dirty(struct si_context *sctx,
+		  struct r600_atom *atom, bool dirty)
+{
+	atom->dirty = dirty;
+}
+
+static inline void
+si_mark_atom_dirty(struct si_context *sctx,
+		   struct r600_atom *atom)
+{
+	si_set_atom_dirty(sctx, atom, true);
+}
+
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_shader.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_shader.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_shader.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_shader.c	2015-09-16 14:36:09.000000000 +0000
@@ -31,6 +31,7 @@
 #include "gallivm/lp_bld_intr.h"
 #include "gallivm/lp_bld_logic.h"
 #include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_bitarit.h"
 #include "gallivm/lp_bld_flow.h"
 #include "radeon/r600_cs.h"
 #include "radeon/radeon_llvm.h"
@@ -71,18 +72,25 @@
 	int param_streamout_write_index;
 	int param_streamout_offset[4];
 	int param_vertex_id;
+	int param_rel_auto_id;
+	int param_vs_prim_id;
 	int param_instance_id;
+	int param_tes_u;
+	int param_tes_v;
+	int param_tes_rel_patch_id;
+	int param_tes_patch_id;
+	int param_es2gs_offset;
 	LLVMTargetMachineRef tm;
 	LLVMValueRef const_md;
 	LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS];
-	LLVMValueRef ddxy_lds;
+	LLVMValueRef lds;
 	LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
 	LLVMValueRef resources[SI_NUM_SAMPLER_VIEWS];
 	LLVMValueRef samplers[SI_NUM_SAMPLER_STATES];
 	LLVMValueRef so_buffers[4];
 	LLVMValueRef esgs_ring;
-	LLVMValueRef gsvs_ring;
-	LLVMValueRef gs_next_vertex;
+	LLVMValueRef gsvs_ring[4];
+	LLVMValueRef gs_next_vertex[4];
 };
 
 static struct si_shader_context * si_shader_context(
@@ -128,24 +136,30 @@
 	case TGSI_SEMANTIC_CLIPDIST:
 		assert(index <= 1);
 		return 2 + index;
-	case TGSI_SEMANTIC_CLIPVERTEX:
-		return 4;
-	case TGSI_SEMANTIC_COLOR:
-		assert(index <= 1);
-		return 5 + index;
-	case TGSI_SEMANTIC_BCOLOR:
-		assert(index <= 1);
-		return 7 + index;
-	case TGSI_SEMANTIC_FOG:
-		return 9;
-	case TGSI_SEMANTIC_EDGEFLAG:
-		return 10;
 	case TGSI_SEMANTIC_GENERIC:
-		assert(index <= 63-11);
-		return 11 + index;
+		if (index <= 63-4)
+			return 4 + index;
+		else
+			/* same explanation as in the default statement,
+			 * the only user hitting this is st/nine.
+			 */
+			return 0;
+
+	/* patch indices are completely separate and thus start from 0 */
+	case TGSI_SEMANTIC_TESSOUTER:
+		return 0;
+	case TGSI_SEMANTIC_TESSINNER:
+		return 1;
+	case TGSI_SEMANTIC_PATCH:
+		return 2 + index;
+
 	default:
-		assert(0);
-		return 63;
+		/* Don't fail here. The result of this function is only used
+		 * for LS, TCS, TES, and GS, where legacy GL semantics can't
+		 * occur, but this function is called for all vertex shaders
+		 * before it's known whether LS will be compiled or not.
+		 */
+		return 0;
 	}
 }
 
@@ -216,6 +230,136 @@
 	return value;
 }
 
+static LLVMValueRef get_rel_patch_id(struct si_shader_context *si_shader_ctx)
+{
+	switch (si_shader_ctx->type) {
+	case TGSI_PROCESSOR_TESS_CTRL:
+		return unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 0, 8);
+
+	case TGSI_PROCESSOR_TESS_EVAL:
+		return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+				    si_shader_ctx->param_tes_rel_patch_id);
+
+	default:
+		assert(0);
+		return NULL;
+	}
+}
+
+/* Tessellation shaders pass outputs to the next shader using LDS.
+ *
+ * LS outputs = TCS inputs
+ * TCS outputs = TES inputs
+ *
+ * The LDS layout is:
+ * - TCS inputs for patch 0
+ * - TCS inputs for patch 1
+ * - TCS inputs for patch 2		= get_tcs_in_current_patch_offset (if RelPatchID==2)
+ * - ...
+ * - TCS outputs for patch 0            = get_tcs_out_patch0_offset
+ * - Per-patch TCS outputs for patch 0  = get_tcs_out_patch0_patch_data_offset
+ * - TCS outputs for patch 1
+ * - Per-patch TCS outputs for patch 1
+ * - TCS outputs for patch 2            = get_tcs_out_current_patch_offset (if RelPatchID==2)
+ * - Per-patch TCS outputs for patch 2  = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
+ * - ...
+ *
+ * All three shaders VS(LS), TCS, TES share the same LDS space.
+ */
+
+static LLVMValueRef
+get_tcs_in_patch_stride(struct si_shader_context *si_shader_ctx)
+{
+	if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX)
+		return unpack_param(si_shader_ctx, SI_PARAM_LS_OUT_LAYOUT, 0, 13);
+	else if (si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL)
+		return unpack_param(si_shader_ctx, SI_PARAM_TCS_IN_LAYOUT, 0, 13);
+	else {
+		assert(0);
+		return NULL;
+	}
+}
+
+static LLVMValueRef
+get_tcs_out_patch_stride(struct si_shader_context *si_shader_ctx)
+{
+	return unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 0, 13);
+}
+
+static LLVMValueRef
+get_tcs_out_patch0_offset(struct si_shader_context *si_shader_ctx)
+{
+	return lp_build_mul_imm(&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld,
+				unpack_param(si_shader_ctx,
+					     SI_PARAM_TCS_OUT_OFFSETS,
+					     0, 16),
+				4);
+}
+
+static LLVMValueRef
+get_tcs_out_patch0_patch_data_offset(struct si_shader_context *si_shader_ctx)
+{
+	return lp_build_mul_imm(&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld,
+				unpack_param(si_shader_ctx,
+					     SI_PARAM_TCS_OUT_OFFSETS,
+					     16, 16),
+				4);
+}
+
+static LLVMValueRef
+get_tcs_in_current_patch_offset(struct si_shader_context *si_shader_ctx)
+{
+	struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+	LLVMValueRef patch_stride = get_tcs_in_patch_stride(si_shader_ctx);
+	LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+	return LLVMBuildMul(gallivm->builder, patch_stride, rel_patch_id, "");
+}
+
+static LLVMValueRef
+get_tcs_out_current_patch_offset(struct si_shader_context *si_shader_ctx)
+{
+	struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+	LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(si_shader_ctx);
+	LLVMValueRef patch_stride = get_tcs_out_patch_stride(si_shader_ctx);
+	LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+	return LLVMBuildAdd(gallivm->builder, patch0_offset,
+			    LLVMBuildMul(gallivm->builder, patch_stride,
+					 rel_patch_id, ""),
+			    "");
+}
+
+static LLVMValueRef
+get_tcs_out_current_patch_data_offset(struct si_shader_context *si_shader_ctx)
+{
+	struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+	LLVMValueRef patch0_patch_data_offset =
+		get_tcs_out_patch0_patch_data_offset(si_shader_ctx);
+	LLVMValueRef patch_stride = get_tcs_out_patch_stride(si_shader_ctx);
+	LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+	return LLVMBuildAdd(gallivm->builder, patch0_patch_data_offset,
+			    LLVMBuildMul(gallivm->builder, patch_stride,
+					 rel_patch_id, ""),
+			    "");
+}
+
+static void build_indexed_store(struct si_shader_context *si_shader_ctx,
+				LLVMValueRef base_ptr, LLVMValueRef index,
+				LLVMValueRef value)
+{
+	struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	LLVMValueRef indices[2], pointer;
+
+	indices[0] = bld_base->uint_bld.zero;
+	indices[1] = index;
+
+	pointer = LLVMBuildGEP(gallivm->builder, base_ptr, indices, 2, "");
+	LLVMBuildStore(gallivm->builder, value, pointer);
+}
+
 /**
  * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
  * It's equivalent to doing a load from &base_ptr[index].
@@ -319,7 +463,7 @@
 	args[0] = t_list;
 	args[1] = attribute_offset;
 	args[2] = buffer_index;
-	input = build_intrinsic(gallivm->builder,
+	input = lp_build_intrinsic(gallivm->builder,
 		"llvm.SI.vs.load.input", vec4_type, args, 3,
 		LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 
@@ -334,6 +478,285 @@
 	}
 }
 
+static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
+				     unsigned swizzle)
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+
+	if (swizzle > 0)
+		return bld_base->uint_bld.zero;
+
+	switch (si_shader_ctx->type) {
+	case TGSI_PROCESSOR_VERTEX:
+		return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+				    si_shader_ctx->param_vs_prim_id);
+	case TGSI_PROCESSOR_TESS_CTRL:
+		return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+				    SI_PARAM_PATCH_ID);
+	case TGSI_PROCESSOR_TESS_EVAL:
+		return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+				    si_shader_ctx->param_tes_patch_id);
+	case TGSI_PROCESSOR_GEOMETRY:
+		return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+				    SI_PARAM_PRIMITIVE_ID);
+	default:
+		assert(0);
+		return bld_base->uint_bld.zero;
+	}
+}
+
+/**
+ * Return the value of tgsi_ind_register for indexing.
+ * This is the indirect index with the constant offset added to it.
+ */
+static LLVMValueRef get_indirect_index(struct si_shader_context *si_shader_ctx,
+				       const struct tgsi_ind_register *ind,
+				       int rel_index)
+{
+	struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
+	LLVMValueRef result;
+
+	result = si_shader_ctx->radeon_bld.soa.addr[ind->Index][ind->Swizzle];
+	result = LLVMBuildLoad(gallivm->builder, result, "");
+	result = LLVMBuildAdd(gallivm->builder, result,
+			      lp_build_const_int32(gallivm, rel_index), "");
+	return result;
+}
+
+/**
+ * Calculate a dword address given an input or output register and a stride.
+ */
+static LLVMValueRef get_dw_address(struct si_shader_context *si_shader_ctx,
+				   const struct tgsi_full_dst_register *dst,
+				   const struct tgsi_full_src_register *src,
+				   LLVMValueRef vertex_dw_stride,
+				   LLVMValueRef base_addr)
+{
+	struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
+	struct tgsi_shader_info *info = &si_shader_ctx->shader->selector->info;
+	ubyte *name, *index, *array_first;
+	int first, param;
+	struct tgsi_full_dst_register reg;
+
+	/* Set the register description. The address computation is the same
+	 * for sources and destinations. */
+	if (src) {
+		reg.Register.File = src->Register.File;
+		reg.Register.Index = src->Register.Index;
+		reg.Register.Indirect = src->Register.Indirect;
+		reg.Register.Dimension = src->Register.Dimension;
+		reg.Indirect = src->Indirect;
+		reg.Dimension = src->Dimension;
+		reg.DimIndirect = src->DimIndirect;
+	} else
+		reg = *dst;
+
+	/* If the register is 2-dimensional (e.g. an array of vertices
+	 * in a primitive), calculate the base address of the vertex. */
+	if (reg.Register.Dimension) {
+		LLVMValueRef index;
+
+		if (reg.Dimension.Indirect)
+			index = get_indirect_index(si_shader_ctx, &reg.DimIndirect,
+						   reg.Dimension.Index);
+		else
+			index = lp_build_const_int32(gallivm, reg.Dimension.Index);
+
+		base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
+					 LLVMBuildMul(gallivm->builder, index,
+						      vertex_dw_stride, ""), "");
+	}
+
+	/* Get information about the register. */
+	if (reg.Register.File == TGSI_FILE_INPUT) {
+		name = info->input_semantic_name;
+		index = info->input_semantic_index;
+		array_first = info->input_array_first;
+	} else if (reg.Register.File == TGSI_FILE_OUTPUT) {
+		name = info->output_semantic_name;
+		index = info->output_semantic_index;
+		array_first = info->output_array_first;
+	} else {
+		assert(0);
+		return NULL;
+	}
+
+	if (reg.Register.Indirect) {
+		/* Add the relative address of the element. */
+		LLVMValueRef ind_index;
+
+		if (reg.Indirect.ArrayID)
+			first = array_first[reg.Indirect.ArrayID];
+		else
+			first = reg.Register.Index;
+
+		ind_index = get_indirect_index(si_shader_ctx, &reg.Indirect,
+					   reg.Register.Index - first);
+
+		base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
+				    LLVMBuildMul(gallivm->builder, ind_index,
+						 lp_build_const_int32(gallivm, 4), ""), "");
+
+		param = si_shader_io_get_unique_index(name[first], index[first]);
+	} else {
+		param = si_shader_io_get_unique_index(name[reg.Register.Index],
+						      index[reg.Register.Index]);
+	}
+
+	/* Add the base address of the element. */
+	return LLVMBuildAdd(gallivm->builder, base_addr,
+			    lp_build_const_int32(gallivm, param * 4), "");
+}
+
+/**
+ * Load from LDS.
+ *
+ * \param type		output value type
+ * \param swizzle	offset (typically 0..3); it can be ~0, which loads a vec4
+ * \param dw_addr	address in dwords
+ */
+static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
+			     enum tgsi_opcode_type type, unsigned swizzle,
+			     LLVMValueRef dw_addr)
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	LLVMValueRef value;
+
+	if (swizzle == ~0) {
+		LLVMValueRef values[TGSI_NUM_CHANNELS];
+
+		for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
+			values[chan] = lds_load(bld_base, type, chan, dw_addr);
+
+		return lp_build_gather_values(bld_base->base.gallivm, values,
+					      TGSI_NUM_CHANNELS);
+	}
+
+	dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+			    lp_build_const_int32(gallivm, swizzle));
+
+	value = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
+	return LLVMBuildBitCast(gallivm->builder, value,
+				tgsi2llvmtype(bld_base, type), "");
+}
+
+/**
+ * Store to LDS.
+ *
+ * \param swizzle	offset (typically 0..3)
+ * \param dw_addr	address in dwords
+ * \param value		value to store
+ */
+static void lds_store(struct lp_build_tgsi_context * bld_base,
+		      unsigned swizzle, LLVMValueRef dw_addr,
+		      LLVMValueRef value)
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+
+	dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+			    lp_build_const_int32(gallivm, swizzle));
+
+	value = LLVMBuildBitCast(gallivm->builder, value,
+				 LLVMInt32TypeInContext(gallivm->context), "");
+	build_indexed_store(si_shader_ctx, si_shader_ctx->lds,
+			    dw_addr, value);
+}
+
+static LLVMValueRef fetch_input_tcs(
+	struct lp_build_tgsi_context *bld_base,
+	const struct tgsi_full_src_register *reg,
+	enum tgsi_opcode_type type, unsigned swizzle)
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+	LLVMValueRef dw_addr, stride;
+
+	stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
+	dw_addr = get_tcs_in_current_patch_offset(si_shader_ctx);
+	dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+
+	return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static LLVMValueRef fetch_output_tcs(
+		struct lp_build_tgsi_context *bld_base,
+		const struct tgsi_full_src_register *reg,
+		enum tgsi_opcode_type type, unsigned swizzle)
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+	LLVMValueRef dw_addr, stride;
+
+	if (reg->Register.Dimension) {
+		stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+		dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+		dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+	} else {
+		dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+		dw_addr = get_dw_address(si_shader_ctx, NULL, reg, NULL, dw_addr);
+	}
+
+	return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static LLVMValueRef fetch_input_tes(
+	struct lp_build_tgsi_context *bld_base,
+	const struct tgsi_full_src_register *reg,
+	enum tgsi_opcode_type type, unsigned swizzle)
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+	LLVMValueRef dw_addr, stride;
+
+	if (reg->Register.Dimension) {
+		stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+		dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+		dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+	} else {
+		dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+		dw_addr = get_dw_address(si_shader_ctx, NULL, reg, NULL, dw_addr);
+	}
+
+	return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static void store_output_tcs(struct lp_build_tgsi_context * bld_base,
+			     const struct tgsi_full_instruction * inst,
+			     const struct tgsi_opcode_info * info,
+			     LLVMValueRef dst[4])
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+	const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+	unsigned chan_index;
+	LLVMValueRef dw_addr, stride;
+
+	/* Only handle per-patch and per-vertex outputs here.
+	 * Vectors will be lowered to scalars and this function will be called again.
+	 */
+	if (reg->Register.File != TGSI_FILE_OUTPUT ||
+	    (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) {
+		radeon_llvm_emit_store(bld_base, inst, info, dst);
+		return;
+	}
+
+	if (reg->Register.Dimension) {
+		stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+		dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+		dw_addr = get_dw_address(si_shader_ctx, reg, NULL, stride, dw_addr);
+	} else {
+		dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+		dw_addr = get_dw_address(si_shader_ctx, reg, NULL, NULL, dw_addr);
+	}
+
+	TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+		LLVMValueRef value = dst[chan_index];
+
+		if (inst->Instruction.Saturate)
+			value = radeon_llvm_saturate(bld_base, value);
+
+		lds_store(bld_base, chan_index, dw_addr, value);
+	}
+}
+
 static LLVMValueRef fetch_input_gs(
 	struct lp_build_tgsi_context *bld_base,
 	const struct tgsi_full_src_register *reg,
@@ -353,13 +776,8 @@
 	unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
 	unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
 
-	if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID) {
-		if (swizzle == 0)
-			return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
-					    SI_PARAM_PRIMITIVE_ID);
-		else
-			return uint->zero;
-	}
+	if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
+		return get_primitive_id(bld_base, swizzle);
 
 	if (!reg->Register.Dimension)
 		return NULL;
@@ -391,7 +809,7 @@
 	args[1] = vtx_offset;
 	args[2] = lp_build_const_int32(gallivm,
 				       (get_param_index(semantic_name, semantic_index,
-							shader->selector->gs_used_inputs) * 4 +
+							shader->selector->inputs_read) * 4 +
 					swizzle) * 256);
 	args[3] = uint->zero;
 	args[4] = uint->one;  /* OFFEN */
@@ -401,13 +819,42 @@
 	args[8] = uint->zero; /* TFE */
 
 	return LLVMBuildBitCast(gallivm->builder,
-				build_intrinsic(gallivm->builder,
+				lp_build_intrinsic(gallivm->builder,
 						"llvm.SI.buffer.load.dword.i32.i32",
 						i32, args, 9,
 						LLVMReadOnlyAttribute | LLVMNoUnwindAttribute),
 				tgsi2llvmtype(bld_base, type), "");
 }
 
+static int lookup_interp_param_index(unsigned interpolate, unsigned location)
+{
+	switch (interpolate) {
+	case TGSI_INTERPOLATE_CONSTANT:
+		return 0;
+
+	case TGSI_INTERPOLATE_LINEAR:
+		if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
+			return SI_PARAM_LINEAR_SAMPLE;
+		else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
+			return SI_PARAM_LINEAR_CENTROID;
+		else
+			return SI_PARAM_LINEAR_CENTER;
+		break;
+	case TGSI_INTERPOLATE_COLOR:
+	case TGSI_INTERPOLATE_PERSPECTIVE:
+		if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
+			return SI_PARAM_PERSP_SAMPLE;
+		else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
+			return SI_PARAM_PERSP_CENTROID;
+		else
+			return SI_PARAM_PERSP_CENTER;
+		break;
+	default:
+		fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
+		return -1;
+	}
+}
+
 static void declare_input_fs(
 	struct radeon_llvm_context *radeon_bld,
 	unsigned input_index,
@@ -422,7 +869,8 @@
 	LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
 	LLVMValueRef main_fn = radeon_bld->main_fn;
 
-	LLVMValueRef interp_param;
+	LLVMValueRef interp_param = NULL;
+	int interp_param_idx;
 	const char * intr_name;
 
 	/* This value is:
@@ -471,31 +919,13 @@
 	attr_number = lp_build_const_int32(gallivm,
 					   shader->ps_input_param_offset[input_index]);
 
-	switch (decl->Interp.Interpolate) {
-	case TGSI_INTERPOLATE_CONSTANT:
-		interp_param = 0;
-		break;
-	case TGSI_INTERPOLATE_LINEAR:
-		if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_SAMPLE)
-			interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_SAMPLE);
-		else if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
-			interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTROID);
-		else
-			interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTER);
-		break;
-	case TGSI_INTERPOLATE_COLOR:
-	case TGSI_INTERPOLATE_PERSPECTIVE:
-		if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_SAMPLE)
-			interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_SAMPLE);
-		else if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
-			interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID);
-		else
-			interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER);
-		break;
-	default:
-		fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
+	shader->ps_input_interpolate[input_index] = decl->Interp.Interpolate;
+	interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
+						     decl->Interp.Location);
+	if (interp_param_idx == -1)
 		return;
-	}
+	else if (interp_param_idx)
+		interp_param = LLVMGetParam(main_fn, interp_param_idx);
 
 	/* fs.constant returns the param from the middle vertex, so it's not
 	 * really useful for flat shading. It's meant to be used for custom
@@ -533,12 +963,12 @@
 
 			args[0] = llvm_chan;
 			args[1] = attr_number;
-			front = build_intrinsic(gallivm->builder, intr_name,
+			front = lp_build_intrinsic(gallivm->builder, intr_name,
 						input_type, args, args[3] ? 4 : 3,
 						LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 
 			args[1] = back_attr_number;
-			back = build_intrinsic(gallivm->builder, intr_name,
+			back = lp_build_intrinsic(gallivm->builder, intr_name,
 					       input_type, args, args[3] ? 4 : 3,
 					       LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 
@@ -559,7 +989,7 @@
 		args[2] = params;
 		args[3] = interp_param;
 		radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
-			build_intrinsic(gallivm->builder, intr_name,
+			lp_build_intrinsic(gallivm->builder, intr_name,
 					input_type, args, args[3] ? 4 : 3,
 					LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 		radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
@@ -577,7 +1007,7 @@
 			args[2] = params;
 			args[3] = interp_param;
 			radeon_bld->inputs[soa_index] =
-				build_intrinsic(gallivm->builder, intr_name,
+				lp_build_intrinsic(gallivm->builder, intr_name,
 						input_type, args, args[3] ? 4 : 3,
 						LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 		}
@@ -598,10 +1028,35 @@
 {
 	LLVMValueRef args[2] = {resource, offset};
 
-	return build_intrinsic(builder, "llvm.SI.load.const", return_type, args, 2,
+	return lp_build_intrinsic(builder, "llvm.SI.load.const", return_type, args, 2,
 			       LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 }
 
+static LLVMValueRef load_sample_position(struct radeon_llvm_context *radeon_bld, LLVMValueRef sample_id)
+{
+	struct si_shader_context *si_shader_ctx =
+		si_shader_context(&radeon_bld->soa.bld_base);
+	struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
+	struct gallivm_state *gallivm = &radeon_bld->gallivm;
+	LLVMBuilderRef builder = gallivm->builder;
+	LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+	LLVMValueRef buf_index = lp_build_const_int32(gallivm, SI_DRIVER_STATE_CONST_BUF);
+	LLVMValueRef resource = build_indexed_load_const(si_shader_ctx, desc, buf_index);
+
+	/* offset = sample_id * 8  (8 = 2 floats containing samplepos.xy) */
+	LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8);
+	LLVMValueRef offset1 = LLVMBuildAdd(builder, offset0, lp_build_const_int32(gallivm, 4), "");
+
+	LLVMValueRef pos[4] = {
+		buffer_load_const(builder, resource, offset0, radeon_bld->soa.bld_base.base.elem_type),
+		buffer_load_const(builder, resource, offset1, radeon_bld->soa.bld_base.base.elem_type),
+		lp_build_const_float(gallivm, 0),
+		lp_build_const_float(gallivm, 0)
+	};
+
+	return lp_build_gather_values(gallivm, pos, 4);
+}
+
 static void declare_system_value(
 	struct radeon_llvm_context * radeon_bld,
 	unsigned index,
@@ -609,6 +1064,7 @@
 {
 	struct si_shader_context *si_shader_ctx =
 		si_shader_context(&radeon_bld->soa.bld_base);
+	struct lp_build_context *bld = &radeon_bld->soa.bld_base.base;
 	struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
 	struct gallivm_state *gallivm = &radeon_bld->gallivm;
 	LLVMValueRef value = 0;
@@ -637,30 +1093,23 @@
 				     SI_PARAM_BASE_VERTEX);
 		break;
 
+	case TGSI_SEMANTIC_INVOCATIONID:
+		if (si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL)
+			value = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
+		else if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY)
+			value = LLVMGetParam(radeon_bld->main_fn,
+					     SI_PARAM_GS_INSTANCE_ID);
+		else
+			assert(!"INVOCATIONID not implemented");
+		break;
+
 	case TGSI_SEMANTIC_SAMPLEID:
 		value = get_sample_id(radeon_bld);
 		break;
 
 	case TGSI_SEMANTIC_SAMPLEPOS:
-	{
-		LLVMBuilderRef builder = gallivm->builder;
-		LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
-		LLVMValueRef buf_index = lp_build_const_int32(gallivm, SI_DRIVER_STATE_CONST_BUF);
-		LLVMValueRef resource = build_indexed_load_const(si_shader_ctx, desc, buf_index);
-
-		/* offset = sample_id * 8  (8 = 2 floats containing samplepos.xy) */
-		LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, get_sample_id(radeon_bld), 8);
-		LLVMValueRef offset1 = LLVMBuildAdd(builder, offset0, lp_build_const_int32(gallivm, 4), "");
-
-		LLVMValueRef pos[4] = {
-			buffer_load_const(builder, resource, offset0, radeon_bld->soa.bld_base.base.elem_type),
-			buffer_load_const(builder, resource, offset1, radeon_bld->soa.bld_base.base.elem_type),
-			lp_build_const_float(gallivm, 0),
-			lp_build_const_float(gallivm, 0)
-		};
-		value = lp_build_gather_values(gallivm, pos, 4);
+		value = load_sample_position(radeon_bld, get_sample_id(radeon_bld));
 		break;
-	}
 
 	case TGSI_SEMANTIC_SAMPLEMASK:
 		/* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
@@ -671,6 +1120,48 @@
 			value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
 		break;
 
+	case TGSI_SEMANTIC_TESSCOORD:
+	{
+		LLVMValueRef coord[4] = {
+			LLVMGetParam(radeon_bld->main_fn, si_shader_ctx->param_tes_u),
+			LLVMGetParam(radeon_bld->main_fn, si_shader_ctx->param_tes_v),
+			bld->zero,
+			bld->zero
+		};
+
+		/* For triangles, the vector should be (u, v, 1-u-v). */
+		if (si_shader_ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
+		    PIPE_PRIM_TRIANGLES)
+			coord[2] = lp_build_sub(bld, bld->one,
+						lp_build_add(bld, coord[0], coord[1]));
+
+		value = lp_build_gather_values(gallivm, coord, 4);
+		break;
+	}
+
+	case TGSI_SEMANTIC_VERTICESIN:
+		value = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 26, 6);
+		break;
+
+	case TGSI_SEMANTIC_TESSINNER:
+	case TGSI_SEMANTIC_TESSOUTER:
+	{
+		LLVMValueRef dw_addr;
+		int param = si_shader_io_get_unique_index(decl->Semantic.Name, 0);
+
+		dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+		dw_addr = LLVMBuildAdd(gallivm->builder, dw_addr,
+				       lp_build_const_int32(gallivm, param * 4), "");
+
+		value = lds_load(&radeon_bld->soa.bld_base, TGSI_TYPE_FLOAT,
+				 ~0, dw_addr);
+		break;
+	}
+
+	case TGSI_SEMANTIC_PRIMID:
+		value = get_primitive_id(&radeon_bld->soa.bld_base, 0);
+		break;
+
 	default:
 		assert(!"unknown system value");
 		return;
@@ -690,7 +1181,7 @@
 	const struct tgsi_ind_register *ireg = &reg->Indirect;
 	unsigned buf, idx;
 
-	LLVMValueRef addr;
+	LLVMValueRef addr, bufp;
 	LLVMValueRef result;
 
 	if (swizzle == LP_CHAN_ALL) {
@@ -705,8 +1196,24 @@
 	buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
 	idx = reg->Register.Index * 4 + swizzle;
 
-	if (!reg->Register.Indirect)
-		return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
+	if (!reg->Register.Indirect && !reg->Dimension.Indirect) {
+		if (type != TGSI_TYPE_DOUBLE)
+			return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
+		else {
+			return radeon_llvm_emit_fetch_double(bld_base,
+							     si_shader_ctx->constants[buf][idx],
+							     si_shader_ctx->constants[buf][idx + 1]);
+		}
+	}
+
+	if (reg->Register.Dimension && reg->Dimension.Indirect) {
+		LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+		LLVMValueRef index;
+		index = get_indirect_index(si_shader_ctx, &reg->DimIndirect,
+						   reg->Dimension.Index);
+		bufp = build_indexed_load_const(si_shader_ctx, ptr, index);
+	} else
+		bufp = si_shader_ctx->const_resource[buf];
 
 	addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
 	addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
@@ -714,10 +1221,26 @@
 	addr = lp_build_add(&bld_base->uint_bld, addr,
 			    lp_build_const_int32(base->gallivm, idx * 4));
 
-	result = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
-			    addr, base->elem_type);
+	result = buffer_load_const(base->gallivm->builder, bufp,
+				   addr, bld_base->base.elem_type);
 
-	return bitcast(bld_base, type, result);
+	if (type != TGSI_TYPE_DOUBLE)
+		result = bitcast(bld_base, type, result);
+	else {
+		LLVMValueRef addr2, result2;
+		addr2 = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1];
+		addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2");
+		addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16);
+		addr2 = lp_build_add(&bld_base->uint_bld, addr2,
+				     lp_build_const_int32(base->gallivm, idx * 4));
+
+		result2 = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
+				   addr2, bld_base->base.elem_type);
+
+		result = radeon_llvm_emit_fetch_double(bld_base,
+					               result, result2);
+	}
+	return result;
 }
 
 /* Initialize arguments for the shader export intrinsic */
@@ -756,7 +1279,7 @@
 			args[0] = values[2 * chan];
 			args[1] = values[2 * chan + 1];
 			args[chan + 5] =
-				build_intrinsic(base->gallivm->builder,
+				lp_build_intrinsic(base->gallivm->builder,
 						"llvm.SI.packf16",
 						LLVMInt32TypeInContext(base->gallivm->context),
 						args, 2,
@@ -838,12 +1361,12 @@
 					lp_build_const_float(gallivm, 1.0f),
 					lp_build_const_float(gallivm, -1.0f));
 
-		build_intrinsic(gallivm->builder,
+		lp_build_intrinsic(gallivm->builder,
 				"llvm.AMDGPU.kill",
 				LLVMVoidTypeInContext(gallivm->context),
 				&arg, 1, 0);
 	} else {
-		build_intrinsic(gallivm->builder,
+		lp_build_intrinsic(gallivm->builder,
 				"llvm.AMDGPU.kilp",
 				LLVMVoidTypeInContext(gallivm->context),
 				NULL, 0, 0);
@@ -864,7 +1387,7 @@
 				SI_PARAM_SAMPLE_COVERAGE);
 	coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
 
-	coverage = build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
+	coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
 				   bld_base->int_bld.elem_type,
 				   &coverage, 1, LLVMReadNoneAttribute);
 
@@ -994,16 +1517,16 @@
 
 	lp_build_intrinsic(gallivm->builder, name,
 			   LLVMVoidTypeInContext(gallivm->context),
-			   args, Elements(args));
+			   args, Elements(args), 0);
 }
 
-static void build_streamout_store(struct si_shader_context *shader,
-				  LLVMValueRef rsrc,
-				  LLVMValueRef vdata,
-				  unsigned num_channels,
-				  LLVMValueRef vaddr,
-				  LLVMValueRef soffset,
-				  unsigned inst_offset)
+static void build_tbuffer_store_dwords(struct si_shader_context *shader,
+				     LLVMValueRef rsrc,
+				     LLVMValueRef vdata,
+				     unsigned num_channels,
+				     LLVMValueRef vaddr,
+				     LLVMValueRef soffset,
+				     unsigned inst_offset)
 {
 	static unsigned dfmt[] = {
 		V_008F0C_BUF_DATA_FORMAT_32,
@@ -1036,13 +1559,16 @@
 	LLVMValueRef so_vtx_count =
 		unpack_param(shader, shader->param_streamout_config, 16, 7);
 
-	LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32,
+	LLVMValueRef tid = lp_build_intrinsic(builder, "llvm.SI.tid", i32,
 					   NULL, 0, LLVMReadNoneAttribute);
 
 	/* can_emit = tid < so_vtx_count; */
 	LLVMValueRef can_emit =
 		LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
 
+	LLVMValueRef stream_id =
+		unpack_param(shader, shader->param_streamout_config, 24, 2);
+
 	/* Emit the streamout code conditionally. This actually avoids
 	 * out-of-bounds buffer access. The hw tells us via the SGPR
 	 * (so_vtx_count) which threads are allowed to emit streamout data. */
@@ -1082,7 +1608,9 @@
 			unsigned reg = so->output[i].register_index;
 			unsigned start = so->output[i].start_component;
 			unsigned num_comps = so->output[i].num_components;
+			unsigned stream = so->output[i].stream;
 			LLVMValueRef out[4];
+			struct lp_build_if_state if_ctx_stream;
 
 			assert(num_comps && num_comps <= 4);
 			if (!num_comps || num_comps > 4)
@@ -1116,11 +1644,18 @@
 				break;
 			}
 
-			build_streamout_store(shader, shader->so_buffers[buf_idx],
-					      vdata, num_comps,
-					      so_write_offset[buf_idx],
-					      LLVMConstInt(i32, 0, 0),
-					      so->output[i].dst_offset*4);
+			LLVMValueRef can_emit_stream =
+				LLVMBuildICmp(builder, LLVMIntEQ,
+					      stream_id,
+					      lp_build_const_int32(gallivm, stream), "");
+
+			lp_build_if(&if_ctx_stream, gallivm, can_emit_stream);
+			build_tbuffer_store_dwords(shader, shader->so_buffers[buf_idx],
+						   vdata, num_comps,
+						   so_write_offset[buf_idx],
+						   LLVMConstInt(i32, 0, 0),
+						   so->output[i].dst_offset*4);
+			lp_build_endif(&if_ctx_stream);
 		}
 	}
 	lp_build_endif(&if_ctx);
@@ -1139,7 +1674,7 @@
 				&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
 	LLVMValueRef args[9];
 	LLVMValueRef pos_args[4][9] = { { 0 } };
-	LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL;
+	LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
 	unsigned semantic_name, semantic_index;
 	unsigned target;
 	unsigned param_count = 0;
@@ -1165,7 +1700,12 @@
 			continue;
 		case TGSI_SEMANTIC_LAYER:
 			layer_value = outputs[i].values[0];
-			continue;
+			semantic_name = TGSI_SEMANTIC_GENERIC;
+			goto handle_semantic;
+		case TGSI_SEMANTIC_VIEWPORT_INDEX:
+			viewport_index_value = outputs[i].values[0];
+			semantic_name = TGSI_SEMANTIC_GENERIC;
+			goto handle_semantic;
 		case TGSI_SEMANTIC_POSITION:
 			target = V_008DFC_SQ_EXP_POS;
 			break;
@@ -1183,6 +1723,7 @@
 			continue;
 		case TGSI_SEMANTIC_PRIMID:
 		case TGSI_SEMANTIC_FOG:
+		case TGSI_SEMANTIC_TEXCOORD:
 		case TGSI_SEMANTIC_GENERIC:
 			target = V_008DFC_SQ_EXP_PARAM + param_count;
 			shader->vs_output_param_offset[i] = param_count;
@@ -1205,7 +1746,7 @@
 			lp_build_intrinsic(base->gallivm->builder,
 					   "llvm.SI.export",
 					   LLVMVoidTypeInContext(base->gallivm->context),
-					   args, 9);
+					   args, 9, 0);
 		}
 
 		if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
@@ -1214,6 +1755,8 @@
 		}
 	}
 
+	shader->nr_param_exports = param_count;
+
 	/* We need to add the position output manually if it's missing. */
 	if (!pos_args[0][0]) {
 		pos_args[0][0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
@@ -1230,11 +1773,13 @@
 	/* Write the misc vector (point size, edgeflag, layer, viewport). */
 	if (shader->selector->info.writes_psize ||
 	    shader->selector->info.writes_edgeflag ||
+	    shader->selector->info.writes_viewport_index ||
 	    shader->selector->info.writes_layer) {
 		pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */
 						      shader->selector->info.writes_psize |
 						      (shader->selector->info.writes_edgeflag << 1) |
-						      (shader->selector->info.writes_layer << 2));
+						      (shader->selector->info.writes_layer << 2) |
+						      (shader->selector->info.writes_viewport_index << 3));
 		pos_args[1][1] = uint->zero; /* EXEC mask */
 		pos_args[1][2] = uint->zero; /* last export? */
 		pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1);
@@ -1265,6 +1810,9 @@
 
 		if (shader->selector->info.writes_layer)
 			pos_args[1][7] = layer_value;
+
+		if (shader->selector->info.writes_viewport_index)
+			pos_args[1][8] = viewport_index_value;
 	}
 
 	for (i = 0; i < 4; i++)
@@ -1286,7 +1834,133 @@
 		lp_build_intrinsic(base->gallivm->builder,
 				   "llvm.SI.export",
 				   LLVMVoidTypeInContext(base->gallivm->context),
-				   pos_args[i], 9);
+				   pos_args[i], 9, 0);
+	}
+}
+
+/* This only writes the tessellation factor levels. */
+static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	struct si_shader *shader = si_shader_ctx->shader;
+	unsigned tess_inner_index, tess_outer_index;
+	LLVMValueRef lds_base, lds_inner, lds_outer;
+	LLVMValueRef tf_base, rel_patch_id, byteoffset, buffer, rw_buffers;
+	LLVMValueRef out[6], vec0, vec1, invocation_id;
+	unsigned stride, outer_comps, inner_comps, i;
+	struct lp_build_if_state if_ctx;
+
+	invocation_id = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
+
+	/* Do this only for invocation 0, because the tess levels are per-patch,
+	 * not per-vertex.
+	 *
+	 * This can't jump, because invocation 0 executes this. It should
+	 * at least mask out the loads and stores for other invocations.
+	 */
+	lp_build_if(&if_ctx, gallivm,
+		    LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
+				  invocation_id, bld_base->uint_bld.zero, ""));
+
+	/* Determine the layout of one tess factor element in the buffer. */
+	switch (shader->key.tcs.prim_mode) {
+	case PIPE_PRIM_LINES:
+		stride = 2; /* 2 dwords, 1 vec2 store */
+		outer_comps = 2;
+		inner_comps = 0;
+		break;
+	case PIPE_PRIM_TRIANGLES:
+		stride = 4; /* 4 dwords, 1 vec4 store */
+		outer_comps = 3;
+		inner_comps = 1;
+		break;
+	case PIPE_PRIM_QUADS:
+		stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
+		outer_comps = 4;
+		inner_comps = 2;
+		break;
+	default:
+		assert(0);
+		return;
+	}
+
+	/* Load tess_inner and tess_outer from LDS.
+	 * Any invocation can write them, so we can't get them from a temporary.
+	 */
+	tess_inner_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSINNER, 0);
+	tess_outer_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSOUTER, 0);
+
+	lds_base = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+	lds_inner = LLVMBuildAdd(gallivm->builder, lds_base,
+				 lp_build_const_int32(gallivm,
+						      tess_inner_index * 4), "");
+	lds_outer = LLVMBuildAdd(gallivm->builder, lds_base,
+				 lp_build_const_int32(gallivm,
+						      tess_outer_index * 4), "");
+
+	for (i = 0; i < outer_comps; i++)
+		out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
+	for (i = 0; i < inner_comps; i++)
+		out[outer_comps+i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner);
+
+	/* Convert the outputs to vectors for stores. */
+	vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4));
+	vec1 = NULL;
+
+	if (stride > 4)
+		vec1 = lp_build_gather_values(gallivm, out+4, stride - 4);
+
+	/* Get the buffer. */
+	rw_buffers = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+				  SI_PARAM_RW_BUFFERS);
+	buffer = build_indexed_load_const(si_shader_ctx, rw_buffers,
+			lp_build_const_int32(gallivm, SI_RING_TESS_FACTOR));
+
+	/* Get the offset. */
+	tf_base = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+			       SI_PARAM_TESS_FACTOR_OFFSET);
+	rel_patch_id = get_rel_patch_id(si_shader_ctx);
+	byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
+				  lp_build_const_int32(gallivm, 4 * stride), "");
+
+	/* Store the outputs. */
+	build_tbuffer_store_dwords(si_shader_ctx, buffer, vec0,
+				   MIN2(stride, 4), byteoffset, tf_base, 0);
+	if (vec1)
+		build_tbuffer_store_dwords(si_shader_ctx, buffer, vec1,
+					   stride - 4, byteoffset, tf_base, 16);
+	lp_build_endif(&if_ctx);
+}
+
+static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context * bld_base)
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+	struct si_shader *shader = si_shader_ctx->shader;
+	struct tgsi_shader_info *info = &shader->selector->info;
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	unsigned i, chan;
+	LLVMValueRef vertex_id = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+					      si_shader_ctx->param_rel_auto_id);
+	LLVMValueRef vertex_dw_stride =
+		unpack_param(si_shader_ctx, SI_PARAM_LS_OUT_LAYOUT, 13, 8);
+	LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
+						 vertex_dw_stride, "");
+
+	/* Write outputs to LDS. The next shader (TCS aka HS) will read
+	 * its inputs from it. */
+	for (i = 0; i < info->num_outputs; i++) {
+		LLVMValueRef *out_ptr = si_shader_ctx->radeon_bld.soa.outputs[i];
+		unsigned name = info->output_semantic_name[i];
+		unsigned index = info->output_semantic_index[i];
+		int param = si_shader_io_get_unique_index(name, index);
+		LLVMValueRef dw_addr = LLVMBuildAdd(gallivm->builder, base_dw_addr,
+					lp_build_const_int32(gallivm, param * 4), "");
+
+		for (chan = 0; chan < 4; chan++) {
+			lds_store(bld_base, chan, dw_addr,
+				  LLVMBuildLoad(gallivm->builder, out_ptr[chan], ""));
+		}
 	}
 }
 
@@ -1298,17 +1972,25 @@
 	struct tgsi_shader_info *info = &es->selector->info;
 	LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
 	LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
-					    SI_PARAM_ES2GS_OFFSET);
+					    si_shader_ctx->param_es2gs_offset);
+	uint64_t enabled_outputs = si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL ?
+					   es->key.tes.es_enabled_outputs :
+					   es->key.vs.es_enabled_outputs;
 	unsigned chan;
 	int i;
 
 	for (i = 0; i < info->num_outputs; i++) {
 		LLVMValueRef *out_ptr =
 			si_shader_ctx->radeon_bld.soa.outputs[i];
-		int param_index = get_param_index(info->output_semantic_name[i],
-						  info->output_semantic_index[i],
-						  es->key.vs.gs_used_inputs);
+		int param_index;
+
+		if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
+		    info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
+			continue;
 
+		param_index = get_param_index(info->output_semantic_name[i],
+					      info->output_semantic_index[i],
+					      enabled_outputs);
 		if (param_index < 0)
 			continue;
 
@@ -1336,7 +2018,7 @@
 
 	args[0] = lp_build_const_int32(gallivm,	SENDMSG_GS_OP_NOP | SENDMSG_GS_DONE);
 	args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
-	build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
+	lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
 			LLVMVoidTypeInContext(gallivm->context), args, 2,
 			LLVMNoUnwindAttribute);
 }
@@ -1349,7 +2031,7 @@
 	struct si_shader_output_values *outputs = NULL;
 	int i,j;
 
-	outputs = MALLOC(info->num_outputs * sizeof(outputs[0]));
+	outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
 
 	for (i = 0; i < info->num_outputs; i++) {
 		outputs[i].name = info->output_semantic_name[i];
@@ -1362,7 +2044,19 @@
 					      "");
 	}
 
-	si_llvm_export_vs(bld_base, outputs, info->num_outputs);
+	/* Export PrimitiveID when PS needs it. */
+	if (si_vs_exports_prim_id(si_shader_ctx->shader)) {
+		outputs[i].name = TGSI_SEMANTIC_PRIMID;
+		outputs[i].sid = 0;
+		outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+					       get_primitive_id(bld_base, 0));
+		outputs[i].values[1] = bld_base->base.undef;
+		outputs[i].values[2] = bld_base->base.undef;
+		outputs[i].values[3] = bld_base->base.undef;
+		i++;
+	}
+
+	si_llvm_export_vs(bld_base, outputs, i);
 	FREE(outputs);
 }
 
@@ -1427,7 +2121,7 @@
 				lp_build_intrinsic(base->gallivm->builder,
 						   "llvm.SI.export",
 						   LLVMVoidTypeInContext(base->gallivm->context),
-						   last_args, 9);
+						   last_args, 9, 0);
 			}
 
 			/* This instruction will be emitted at the end of the shader. */
@@ -1444,14 +2138,14 @@
 					lp_build_intrinsic(base->gallivm->builder,
 							   "llvm.SI.export",
 							   LLVMVoidTypeInContext(base->gallivm->context),
-							   args, 9);
+							   args, 9, 0);
 				}
 			}
 		} else {
 			lp_build_intrinsic(base->gallivm->builder,
 					   "llvm.SI.export",
 					   LLVMVoidTypeInContext(base->gallivm->context),
-					   args, 9);
+					   args, 9, 0);
 		}
 	}
 
@@ -1513,7 +2207,7 @@
 			lp_build_intrinsic(base->gallivm->builder,
 					   "llvm.SI.export",
 					   LLVMVoidTypeInContext(base->gallivm->context),
-					   args, 9);
+					   args, 9, 0);
 		else
 			memcpy(last_args, args, sizeof(args));
 	}
@@ -1544,7 +2238,7 @@
 	lp_build_intrinsic(base->gallivm->builder,
 			   "llvm.SI.export",
 			   LLVMVoidTypeInContext(base->gallivm->context),
-			   last_args, 9);
+			   last_args, 9, 0);
 }
 
 static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
@@ -1573,15 +2267,46 @@
 	const struct tgsi_full_instruction * inst = emit_data->inst;
 	unsigned opcode = inst->Instruction.Opcode;
 	unsigned target = inst->Texture.Texture;
-	LLVMValueRef coords[5];
+	LLVMValueRef coords[5], derivs[6];
 	LLVMValueRef address[16];
 	int ref_pos;
 	unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
 	unsigned count = 0;
 	unsigned chan;
-	unsigned sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
-	unsigned sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
+	unsigned sampler_src;
+	unsigned sampler_index;
+	unsigned num_deriv_channels = 0;
 	bool has_offset = HAVE_LLVM >= 0x0305 ? inst->Texture.NumOffsets > 0 : false;
+	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
+
+	sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
+	sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
+
+	if (emit_data->inst->Src[sampler_src].Register.Indirect) {
+		const struct tgsi_full_src_register *reg = &emit_data->inst->Src[sampler_src];
+		LLVMValueRef ind_index;
+
+		ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect, reg->Register.Index);
+
+		res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+		res_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
+
+		samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
+		samp_ptr = build_indexed_load_const(si_shader_ctx, samp_ptr, ind_index);
+
+		if (target == TGSI_TEXTURE_2D_MSAA ||
+		    target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
+			ind_index = LLVMBuildAdd(gallivm->builder, ind_index,
+						 lp_build_const_int32(gallivm,
+								      SI_FMASK_TEX_OFFSET), "");
+			fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+			fmask_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
+		}
+	} else {
+		res_ptr = si_shader_ctx->resources[sampler_index];
+		samp_ptr = si_shader_ctx->samplers[sampler_index];
+		fmask_ptr = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
+	}
 
 	if (target == TGSI_TEXTURE_BUFFER) {
 		LLVMTypeRef i128 = LLVMIntTypeInContext(gallivm->context, 128);
@@ -1590,7 +2315,7 @@
 		LLVMTypeRef v16i8 = LLVMVectorType(i8, 16);
 
 		/* Bitcast and truncate v8i32 to v16i8. */
-		LLVMValueRef res = si_shader_ctx->resources[sampler_index];
+		LLVMValueRef res = res_ptr;
 		res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
 		res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.one, "");
 		res = LLVMBuildBitCast(gallivm->builder, res, v16i8, "");
@@ -1659,18 +2384,13 @@
 		}
 	}
 
-	if (target == TGSI_TEXTURE_CUBE ||
-	    target == TGSI_TEXTURE_CUBE_ARRAY ||
-	    target == TGSI_TEXTURE_SHADOWCUBE ||
-	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
-		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
-
 	/* Pack user derivatives */
 	if (opcode == TGSI_OPCODE_TXD) {
-		int num_deriv_channels, param;
+		int param, num_src_deriv_channels;
 
 		switch (target) {
 		case TGSI_TEXTURE_3D:
+			num_src_deriv_channels = 3;
 			num_deriv_channels = 3;
 			break;
 		case TGSI_TEXTURE_2D:
@@ -1679,27 +2399,44 @@
 		case TGSI_TEXTURE_SHADOWRECT:
 		case TGSI_TEXTURE_2D_ARRAY:
 		case TGSI_TEXTURE_SHADOW2D_ARRAY:
+			num_src_deriv_channels = 2;
+			num_deriv_channels = 2;
+			break;
 		case TGSI_TEXTURE_CUBE:
 		case TGSI_TEXTURE_SHADOWCUBE:
 		case TGSI_TEXTURE_CUBE_ARRAY:
 		case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+			/* Cube derivatives will be converted to 2D. */
+			num_src_deriv_channels = 3;
 			num_deriv_channels = 2;
 			break;
 		case TGSI_TEXTURE_1D:
 		case TGSI_TEXTURE_SHADOW1D:
 		case TGSI_TEXTURE_1D_ARRAY:
 		case TGSI_TEXTURE_SHADOW1D_ARRAY:
+			num_src_deriv_channels = 1;
 			num_deriv_channels = 1;
 			break;
 		default:
 			assert(0); /* no other targets are valid here */
 		}
 
-		for (param = 1; param <= 2; param++)
-			for (chan = 0; chan < num_deriv_channels; chan++)
-				address[count++] = lp_build_emit_fetch(bld_base, inst, param, chan);
+		for (param = 0; param < 2; param++)
+			for (chan = 0; chan < num_src_deriv_channels; chan++)
+				derivs[param * num_src_deriv_channels + chan] =
+					lp_build_emit_fetch(bld_base, inst, param+1, chan);
 	}
 
+	if (target == TGSI_TEXTURE_CUBE ||
+	    target == TGSI_TEXTURE_CUBE_ARRAY ||
+	    target == TGSI_TEXTURE_SHADOWCUBE ||
+	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
+		radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, derivs);
+
+	if (opcode == TGSI_OPCODE_TXD)
+		for (int i = 0; i < num_deriv_channels * 2; i++)
+			address[count++] = derivs[i];
+
 	/* Pack texture coordinates */
 	address[count++] = coords[0];
 	if (num_coords > 1)
@@ -1766,7 +2503,7 @@
 		txf_emit_data.dst_type = LLVMVectorType(
 			LLVMInt32TypeInContext(gallivm->context), 4);
 		txf_emit_data.args[0] = lp_build_gather_values(gallivm, txf_address, txf_count);
-		txf_emit_data.args[1] = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
+		txf_emit_data.args[1] = fmask_ptr;
 		txf_emit_data.args[2] = lp_build_const_int32(gallivm, inst.Texture.Texture);
 		txf_emit_data.arg_count = 3;
 
@@ -1797,8 +2534,7 @@
 		 * resource descriptor is 0 (invalid),
 		 */
 		LLVMValueRef fmask_desc =
-			LLVMBuildBitCast(gallivm->builder,
-					 si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index],
+			LLVMBuildBitCast(gallivm->builder, fmask_ptr,
 					 LLVMVectorType(uint_bld->elem_type, 8), "");
 
 		LLVMValueRef fmask_word1 =
@@ -1816,7 +2552,7 @@
 	}
 
 	/* Resource */
-	emit_data->args[1] = si_shader_ctx->resources[sampler_index];
+	emit_data->args[1] = res_ptr;
 
 	if (opcode == TGSI_OPCODE_TXF) {
 		/* add tex offsets */
@@ -1899,7 +2635,7 @@
 			dmask = 1 << gather_comp;
 		}
 
-		emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
+		emit_data->args[2] = samp_ptr;
 		emit_data->args[3] = lp_build_const_int32(gallivm, dmask);
 		emit_data->args[4] = lp_build_const_int32(gallivm, is_rect); /* unorm */
 		emit_data->args[5] = lp_build_const_int32(gallivm, 0); /* r128 */
@@ -1915,7 +2651,7 @@
 			LLVMFloatTypeInContext(gallivm->context),
 			4);
 	} else {
-		emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
+		emit_data->args[2] = samp_ptr;
 		emit_data->args[3] = lp_build_const_int32(gallivm, target);
 		emit_data->arg_count = 4;
 
@@ -1950,7 +2686,7 @@
 				emit_data->inst->Texture.NumOffsets > 0 : false;
 
 	if (target == TGSI_TEXTURE_BUFFER) {
-		emit_data->output[emit_data->chan] = build_intrinsic(
+		emit_data->output[emit_data->chan] = lp_build_intrinsic(
 			base->gallivm->builder,
 			"llvm.SI.vs.load.input", emit_data->dst_type,
 			emit_data->args, emit_data->arg_count,
@@ -1999,7 +2735,7 @@
 			is_shadow ? ".c" : "", infix, has_offset ? ".o" : "",
 			LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
 
-		emit_data->output[emit_data->chan] = build_intrinsic(
+		emit_data->output[emit_data->chan] = lp_build_intrinsic(
 			base->gallivm->builder, intr_name, emit_data->dst_type,
 			emit_data->args, emit_data->arg_count,
 			LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
@@ -2046,7 +2782,7 @@
 		sprintf(intr_name, "%s.v%ui32", name,
 			LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
 
-		emit_data->output[emit_data->chan] = build_intrinsic(
+		emit_data->output[emit_data->chan] = lp_build_intrinsic(
 			base->gallivm->builder, intr_name, emit_data->dst_type,
 			emit_data->args, emit_data->arg_count,
 			LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
@@ -2060,17 +2796,47 @@
 	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 	const struct tgsi_full_instruction *inst = emit_data->inst;
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	LLVMBuilderRef builder = gallivm->builder;
 	unsigned target = inst->Texture.Texture;
+	LLVMValueRef res_ptr;
+
+	if (inst->Src[1].Register.Indirect) {
+		const struct tgsi_full_src_register *reg = &inst->Src[1];
+		LLVMValueRef ind_index;
+
+		ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect, reg->Register.Index);
+
+		res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+		res_ptr = build_indexed_load_const(si_shader_ctx, res_ptr,
+						   ind_index);
+	} else
+		res_ptr = si_shader_ctx->resources[inst->Src[1].Register.Index];
 
 	if (target == TGSI_TEXTURE_BUFFER) {
 		LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
 		LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
 
 		/* Read the size from the buffer descriptor directly. */
-		LLVMValueRef size = si_shader_ctx->resources[inst->Src[1].Register.Index];
-		size = LLVMBuildBitCast(gallivm->builder, size, v8i32, "");
-		size = LLVMBuildExtractElement(gallivm->builder, size,
-					      lp_build_const_int32(gallivm, 6), "");
+		LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
+		LLVMValueRef size = LLVMBuildExtractElement(builder, res,
+						lp_build_const_int32(gallivm, 6), "");
+
+		if (si_shader_ctx->screen->b.chip_class >= VI) {
+			/* On VI, the descriptor contains the size in bytes,
+			 * but TXQ must return the size in elements.
+			 * The stride is always non-zero for resources using TXQ.
+			 */
+			LLVMValueRef stride =
+				LLVMBuildExtractElement(builder, res,
+							lp_build_const_int32(gallivm, 5), "");
+			stride = LLVMBuildLShr(builder, stride,
+					       lp_build_const_int32(gallivm, 16), "");
+			stride = LLVMBuildAnd(builder, stride,
+					      lp_build_const_int32(gallivm, 0x3FFF), "");
+
+			size = LLVMBuildUDiv(builder, size, stride, "");
+		}
+
 		emit_data->args[0] = size;
 		return;
 	}
@@ -2079,7 +2845,7 @@
 	emit_data->args[0] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
 
 	/* Resource */
-	emit_data->args[1] = si_shader_ctx->resources[inst->Src[1].Register.Index];
+	emit_data->args[1] = res_ptr;
 
 	/* Texture target */
 	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
@@ -2126,6 +2892,35 @@
 	}
 }
 
+/*
+ * SI implements derivatives using the local data store (LDS)
+ * All writes to the LDS happen in all executing threads at
+ * the same time. TID is the Thread ID for the current
+ * thread and is a value between 0 and 63, representing
+ * the thread's position in the wavefront.
+ *
+ * For the pixel shader threads are grouped into quads of four pixels.
+ * The TIDs of the pixels of a quad are:
+ *
+ *  +------+------+
+ *  |4n + 0|4n + 1|
+ *  +------+------+
+ *  |4n + 2|4n + 3|
+ *  +------+------+
+ *
+ * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
+ * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
+ * the current pixel's column, and masking with 0xfffffffe yields the TID
+ * of the left pixel of the current pixel's row.
+ *
+ * Adding 1 yields the TID of the pixel to the right of the left pixel, and
+ * adding 2 yields the TID of the pixel below the top pixel.
+ */
+/* masks for thread ID. */
+#define TID_MASK_TOP_LEFT 0xfffffffc
+#define TID_MASK_TOP      0xfffffffd
+#define TID_MASK_LEFT     0xfffffffe
+
 static void si_llvm_emit_ddxy(
 	const struct lp_build_tgsi_action * action,
 	struct lp_build_tgsi_context * bld_base,
@@ -2142,25 +2937,34 @@
 	LLVMTypeRef i32;
 	unsigned swizzle[4];
 	unsigned c;
+	int idx;
+	unsigned mask;
 
 	i32 = LLVMInt32TypeInContext(gallivm->context);
 
 	indices[0] = bld_base->uint_bld.zero;
-	indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
+	indices[1] = lp_build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
 				     NULL, 0, LLVMReadNoneAttribute);
-	store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+	store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
 				 indices, 2, "");
 
+	if (opcode == TGSI_OPCODE_DDX_FINE)
+		mask = TID_MASK_LEFT;
+	else if (opcode == TGSI_OPCODE_DDY_FINE)
+		mask = TID_MASK_TOP;
+	else
+		mask = TID_MASK_TOP_LEFT;
+
 	indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
-				  lp_build_const_int32(gallivm, 0xfffffffc), "");
-	load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+				  lp_build_const_int32(gallivm, mask), "");
+	load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
 				 indices, 2, "");
 
+	/* for DDX we want to next X pixel, DDY next Y pixel. */
+	idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
 	indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
-				  lp_build_const_int32(gallivm,
-						       opcode == TGSI_OPCODE_DDX ? 1 : 2),
-				  "");
-	load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+				  lp_build_const_int32(gallivm, idx), "");
+	load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
 				 indices, 2, "");
 
 	for (c = 0; c < 4; ++c) {
@@ -2194,6 +2998,247 @@
 	emit_data->output[0] = lp_build_gather_values(gallivm, result, 4);
 }
 
+/*
+ * this takes an I,J coordinate pair,
+ * and works out the X and Y derivatives.
+ * it returns DDX(I), DDX(J), DDY(I), DDY(J).
+ */
+static LLVMValueRef si_llvm_emit_ddxy_interp(
+	struct lp_build_tgsi_context *bld_base,
+	LLVMValueRef interp_ij)
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	struct lp_build_context *base = &bld_base->base;
+	LLVMValueRef indices[2];
+	LLVMValueRef store_ptr, load_ptr_x, load_ptr_y, load_ptr_ddx, load_ptr_ddy, temp, temp2;
+	LLVMValueRef tl, tr, bl, result[4];
+	LLVMTypeRef i32;
+	unsigned c;
+
+	i32 = LLVMInt32TypeInContext(gallivm->context);
+
+	indices[0] = bld_base->uint_bld.zero;
+	indices[1] = lp_build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
+					NULL, 0, LLVMReadNoneAttribute);
+	store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+				 indices, 2, "");
+
+	temp = LLVMBuildAnd(gallivm->builder, indices[1],
+			    lp_build_const_int32(gallivm, TID_MASK_LEFT), "");
+
+	temp2 = LLVMBuildAnd(gallivm->builder, indices[1],
+			     lp_build_const_int32(gallivm, TID_MASK_TOP), "");
+
+	indices[1] = temp;
+	load_ptr_x = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+				  indices, 2, "");
+
+	indices[1] = temp2;
+	load_ptr_y = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+				  indices, 2, "");
+
+	indices[1] = LLVMBuildAdd(gallivm->builder, temp,
+				  lp_build_const_int32(gallivm, 1), "");
+	load_ptr_ddx = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+				   indices, 2, "");
+
+	indices[1] = LLVMBuildAdd(gallivm->builder, temp2,
+				  lp_build_const_int32(gallivm, 2), "");
+	load_ptr_ddy = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+				   indices, 2, "");
+
+	for (c = 0; c < 2; ++c) {
+		LLVMValueRef store_val;
+		LLVMValueRef c_ll = lp_build_const_int32(gallivm, c);
+
+		store_val = LLVMBuildExtractElement(gallivm->builder,
+						    interp_ij, c_ll, "");
+		LLVMBuildStore(gallivm->builder,
+			       store_val,
+			       store_ptr);
+
+		tl = LLVMBuildLoad(gallivm->builder, load_ptr_x, "");
+		tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, "");
+
+		tr = LLVMBuildLoad(gallivm->builder, load_ptr_ddx, "");
+		tr = LLVMBuildBitCast(gallivm->builder, tr, base->elem_type, "");
+
+		result[c] = LLVMBuildFSub(gallivm->builder, tr, tl, "");
+
+		tl = LLVMBuildLoad(gallivm->builder, load_ptr_y, "");
+		tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, "");
+
+		bl = LLVMBuildLoad(gallivm->builder, load_ptr_ddy, "");
+		bl = LLVMBuildBitCast(gallivm->builder, bl, base->elem_type, "");
+
+		result[c + 2] = LLVMBuildFSub(gallivm->builder, bl, tl, "");
+	}
+
+	return lp_build_gather_values(gallivm, result, 4);
+}
+
+static void interp_fetch_args(
+	struct lp_build_tgsi_context *bld_base,
+	struct lp_build_emit_data *emit_data)
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	const struct tgsi_full_instruction *inst = emit_data->inst;
+
+	if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
+		/* offset is in second src, first two channels */
+		emit_data->args[0] = lp_build_emit_fetch(bld_base,
+							 emit_data->inst, 1,
+							 0);
+		emit_data->args[1] = lp_build_emit_fetch(bld_base,
+							 emit_data->inst, 1,
+							 1);
+		emit_data->arg_count = 2;
+	} else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+		LLVMValueRef sample_position;
+		LLVMValueRef sample_id;
+		LLVMValueRef halfval = lp_build_const_float(gallivm, 0.5f);
+
+		/* fetch sample ID, then fetch its sample position,
+		 * and place into first two channels.
+		 */
+		sample_id = lp_build_emit_fetch(bld_base,
+						emit_data->inst, 1, 0);
+		sample_id = LLVMBuildBitCast(gallivm->builder, sample_id,
+					     LLVMInt32TypeInContext(gallivm->context),
+					     "");
+		sample_position = load_sample_position(&si_shader_ctx->radeon_bld, sample_id);
+
+		emit_data->args[0] = LLVMBuildExtractElement(gallivm->builder,
+							     sample_position,
+							     lp_build_const_int32(gallivm, 0), "");
+
+		emit_data->args[0] = LLVMBuildFSub(gallivm->builder, emit_data->args[0], halfval, "");
+		emit_data->args[1] = LLVMBuildExtractElement(gallivm->builder,
+							     sample_position,
+							     lp_build_const_int32(gallivm, 1), "");
+		emit_data->args[1] = LLVMBuildFSub(gallivm->builder, emit_data->args[1], halfval, "");
+		emit_data->arg_count = 2;
+	}
+}
+
+static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
+				struct lp_build_tgsi_context *bld_base,
+				struct lp_build_emit_data *emit_data)
+{
+	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+	struct si_shader *shader = si_shader_ctx->shader;
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	LLVMValueRef interp_param;
+	const struct tgsi_full_instruction *inst = emit_data->inst;
+	const char *intr_name;
+	int input_index;
+	int chan;
+	int i;
+	LLVMValueRef attr_number;
+	LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
+	LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK);
+	int interp_param_idx;
+	unsigned location;
+
+	assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
+	input_index = inst->Src[0].Register.Index;
+
+	if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+	    inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE)
+		location = TGSI_INTERPOLATE_LOC_CENTER;
+	else
+		location = TGSI_INTERPOLATE_LOC_CENTROID;
+
+	interp_param_idx = lookup_interp_param_index(shader->ps_input_interpolate[input_index],
+						     location);
+	if (interp_param_idx == -1)
+		return;
+	else if (interp_param_idx)
+		interp_param = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, interp_param_idx);
+	else
+		interp_param = NULL;
+
+	attr_number = lp_build_const_int32(gallivm,
+					   shader->ps_input_param_offset[input_index]);
+
+	if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+	    inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+		LLVMValueRef ij_out[2];
+		LLVMValueRef ddxy_out = si_llvm_emit_ddxy_interp(bld_base, interp_param);
+
+		/*
+		 * take the I then J parameters, and the DDX/Y for it, and
+		 * calculate the IJ inputs for the interpolator.
+		 * temp1 = ddx * offset/sample.x + I;
+		 * interp_param.I = ddy * offset/sample.y + temp1;
+		 * temp1 = ddx * offset/sample.x + J;
+		 * interp_param.J = ddy * offset/sample.y + temp1;
+		 */
+		for (i = 0; i < 2; i++) {
+			LLVMValueRef ix_ll = lp_build_const_int32(gallivm, i);
+			LLVMValueRef iy_ll = lp_build_const_int32(gallivm, i + 2);
+			LLVMValueRef ddx_el = LLVMBuildExtractElement(gallivm->builder,
+								      ddxy_out, ix_ll, "");
+			LLVMValueRef ddy_el = LLVMBuildExtractElement(gallivm->builder,
+								      ddxy_out, iy_ll, "");
+			LLVMValueRef interp_el = LLVMBuildExtractElement(gallivm->builder,
+									 interp_param, ix_ll, "");
+			LLVMValueRef temp1, temp2;
+
+			interp_el = LLVMBuildBitCast(gallivm->builder, interp_el,
+						     LLVMFloatTypeInContext(gallivm->context), "");
+
+			temp1 = LLVMBuildFMul(gallivm->builder, ddx_el, emit_data->args[0], "");
+
+			temp1 = LLVMBuildFAdd(gallivm->builder, temp1, interp_el, "");
+
+			temp2 = LLVMBuildFMul(gallivm->builder, ddy_el, emit_data->args[1], "");
+
+			temp2 = LLVMBuildFAdd(gallivm->builder, temp2, temp1, "");
+
+			ij_out[i] = LLVMBuildBitCast(gallivm->builder,
+						     temp2,
+						     LLVMIntTypeInContext(gallivm->context, 32), "");
+		}
+		interp_param = lp_build_gather_values(bld_base->base.gallivm, ij_out, 2);
+	}
+
+	intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
+	for (chan = 0; chan < 2; chan++) {
+		LLVMValueRef args[4];
+		LLVMValueRef llvm_chan;
+		unsigned schan;
+
+		schan = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], chan);
+		llvm_chan = lp_build_const_int32(gallivm, schan);
+
+		args[0] = llvm_chan;
+		args[1] = attr_number;
+		args[2] = params;
+		args[3] = interp_param;
+
+		emit_data->output[chan] =
+			lp_build_intrinsic(gallivm->builder, intr_name,
+					   input_type, args, args[3] ? 4 : 3,
+					   LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+	}
+}
+
+static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
+				       struct lp_build_emit_data *emit_data)
+{
+	LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
+	struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
+	unsigned stream;
+
+	assert(src0.File == TGSI_FILE_IMMEDIATE);
+
+	stream = LLVMConstIntGetZExtValue(imms[src0.Index][src0.SwizzleX]) & 0x3;
+	return stream;
+}
+
 /* Emit one vertex from the geometry shader */
 static void si_llvm_emit_vertex(
 	const struct lp_build_tgsi_action *action,
@@ -2213,9 +3258,14 @@
 	LLVMValueRef args[2];
 	unsigned chan;
 	int i;
+	unsigned stream;
+
+	stream = si_llvm_get_stream(bld_base, emit_data);
 
 	/* Write vertex attribute values to GSVS ring */
-	gs_next_vertex = LLVMBuildLoad(gallivm->builder, si_shader_ctx->gs_next_vertex, "");
+	gs_next_vertex = LLVMBuildLoad(gallivm->builder,
+				       si_shader_ctx->gs_next_vertex[stream],
+				       "");
 
 	/* If this thread has already emitted the declared maximum number of
 	 * vertices, kill it: excessive vertex emissions are not supposed to
@@ -2228,8 +3278,9 @@
 	kill = lp_build_select(&bld_base->base, can_emit,
 			       lp_build_const_float(gallivm, 1.0f),
 			       lp_build_const_float(gallivm, -1.0f));
-	build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
-			LLVMVoidTypeInContext(gallivm->context), &kill, 1, 0);
+
+	lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
+			   LLVMVoidTypeInContext(gallivm->context), &kill, 1, 0);
 
 	for (i = 0; i < info->num_outputs; i++) {
 		LLVMValueRef *out_ptr =
@@ -2247,7 +3298,7 @@
 			out_val = LLVMBuildBitCast(gallivm->builder, out_val, i32, "");
 
 			build_tbuffer_store(si_shader_ctx,
-					    si_shader_ctx->gsvs_ring,
+					    si_shader_ctx->gsvs_ring[stream],
 					    out_val, 1,
 					    voffset, soffset, 0,
 					    V_008F0C_BUF_DATA_FORMAT_32,
@@ -2257,12 +3308,13 @@
 	}
 	gs_next_vertex = lp_build_add(uint, gs_next_vertex,
 				      lp_build_const_int32(gallivm, 1));
-	LLVMBuildStore(gallivm->builder, gs_next_vertex, si_shader_ctx->gs_next_vertex);
+
+	LLVMBuildStore(gallivm->builder, gs_next_vertex, si_shader_ctx->gs_next_vertex[stream]);
 
 	/* Signal vertex emission */
-	args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | SENDMSG_GS);
+	args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | SENDMSG_GS | (stream << 8));
 	args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
-	build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
+	lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
 			LLVMVoidTypeInContext(gallivm->context), args, 2,
 			LLVMNoUnwindAttribute);
 }
@@ -2276,15 +3328,28 @@
 	struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	LLVMValueRef args[2];
+	unsigned stream;
 
 	/* Signal primitive cut */
-	args[0] = lp_build_const_int32(gallivm,	SENDMSG_GS_OP_CUT | SENDMSG_GS);
+	stream = si_llvm_get_stream(bld_base, emit_data);
+	args[0] = lp_build_const_int32(gallivm,	SENDMSG_GS_OP_CUT | SENDMSG_GS | (stream << 8));
 	args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
-	build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
+	lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
 			LLVMVoidTypeInContext(gallivm->context), args, 2,
 			LLVMNoUnwindAttribute);
 }
 
+static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
+				 struct lp_build_tgsi_context *bld_base,
+				 struct lp_build_emit_data *emit_data)
+{
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+
+	lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.barrier.local",
+			LLVMVoidTypeInContext(gallivm->context), NULL, 0,
+			LLVMNoUnwindAttribute);
+}
+
 static const struct lp_build_tgsi_action tex_action = {
 	.fetch_args = tex_fetch_args,
 	.emit = build_tex_intrinsic,
@@ -2296,6 +3361,11 @@
 	.intr_name = "llvm.SI.resinfo"
 };
 
+static const struct lp_build_tgsi_action interp_action = {
+	.fetch_args = interp_fetch_args,
+	.emit = build_interp_intrinsic,
+};
+
 static void create_meta_data(struct si_shader_context *si_shader_ctx)
 {
 	struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
@@ -2314,6 +3384,27 @@
 			       CONST_ADDR_SPACE);
 }
 
+static void declare_streamout_params(struct si_shader_context *si_shader_ctx,
+				     struct pipe_stream_output_info *so,
+				     LLVMTypeRef *params, LLVMTypeRef i32,
+				     unsigned *num_params)
+{
+	int i;
+
+	/* Streamout SGPRs. */
+	if (so->num_outputs) {
+		params[si_shader_ctx->param_streamout_config = (*num_params)++] = i32;
+		params[si_shader_ctx->param_streamout_write_index = (*num_params)++] = i32;
+	}
+	/* A streamout buffer offset is loaded if the stride is non-zero. */
+	for (i = 0; i < 4; i++) {
+		if (!so->stride[i])
+			continue;
+
+		params[si_shader_ctx->param_streamout_offset[i] = (*num_params)++] = i32;
+	}
+}
+
 static void create_function(struct si_shader_context *si_shader_ctx)
 {
 	struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
@@ -2346,8 +3437,10 @@
 		num_params = SI_PARAM_START_INSTANCE+1;
 
 		if (shader->key.vs.as_es) {
-			params[SI_PARAM_ES2GS_OFFSET] = i32;
-			num_params++;
+			params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
+		} else if (shader->key.vs.as_ls) {
+			params[SI_PARAM_LS_OUT_LAYOUT] = i32;
+			num_params = SI_PARAM_LS_OUT_LAYOUT+1;
 		} else {
 			if (shader->is_gs_copy_shader) {
 				last_array_pointer = SI_PARAM_CONST;
@@ -2355,30 +3448,52 @@
 			}
 
 			/* The locations of the other parameters are assigned dynamically. */
-
-			/* Streamout SGPRs. */
-			if (shader->selector->so.num_outputs) {
-				params[si_shader_ctx->param_streamout_config = num_params++] = i32;
-				params[si_shader_ctx->param_streamout_write_index = num_params++] = i32;
-			}
-			/* A streamout buffer offset is loaded if the stride is non-zero. */
-			for (i = 0; i < 4; i++) {
-				if (!shader->selector->so.stride[i])
-					continue;
-
-				params[si_shader_ctx->param_streamout_offset[i] = num_params++] = i32;
-			}
+			declare_streamout_params(si_shader_ctx, &shader->selector->so,
+						 params, i32, &num_params);
 		}
 
 		last_sgpr = num_params-1;
 
 		/* VGPRs */
 		params[si_shader_ctx->param_vertex_id = num_params++] = i32;
-		params[num_params++] = i32; /* unused*/
-		params[num_params++] = i32; /* unused */
+		params[si_shader_ctx->param_rel_auto_id = num_params++] = i32;
+		params[si_shader_ctx->param_vs_prim_id = num_params++] = i32;
 		params[si_shader_ctx->param_instance_id = num_params++] = i32;
 		break;
 
+	case TGSI_PROCESSOR_TESS_CTRL:
+		params[SI_PARAM_TCS_OUT_OFFSETS] = i32;
+		params[SI_PARAM_TCS_OUT_LAYOUT] = i32;
+		params[SI_PARAM_TCS_IN_LAYOUT] = i32;
+		params[SI_PARAM_TESS_FACTOR_OFFSET] = i32;
+		last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
+
+		/* VGPRs */
+		params[SI_PARAM_PATCH_ID] = i32;
+		params[SI_PARAM_REL_IDS] = i32;
+		num_params = SI_PARAM_REL_IDS+1;
+		break;
+
+	case TGSI_PROCESSOR_TESS_EVAL:
+		params[SI_PARAM_TCS_OUT_OFFSETS] = i32;
+		params[SI_PARAM_TCS_OUT_LAYOUT] = i32;
+		num_params = SI_PARAM_TCS_OUT_LAYOUT+1;
+
+		if (shader->key.tes.as_es) {
+			params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
+		} else {
+			declare_streamout_params(si_shader_ctx, &shader->selector->so,
+						 params, i32, &num_params);
+		}
+		last_sgpr = num_params - 1;
+
+		/* VGPRs */
+		params[si_shader_ctx->param_tes_u = num_params++] = f32;
+		params[si_shader_ctx->param_tes_v = num_params++] = f32;
+		params[si_shader_ctx->param_tes_rel_patch_id = num_params++] = i32;
+		params[si_shader_ctx->param_tes_patch_id = num_params++] = i32;
+		break;
+
 	case TGSI_PROCESSOR_GEOMETRY:
 		params[SI_PARAM_GS2VS_OFFSET] = i32;
 		params[SI_PARAM_GS_WAVE_ID] = i32;
@@ -2445,12 +3560,35 @@
 
 	if (bld_base->info &&
 	    (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
-	     bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0))
-		si_shader_ctx->ddxy_lds =
+	     bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
+	     bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
+	     bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
+	     bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
+	     bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
+		si_shader_ctx->lds =
 			LLVMAddGlobalInAddressSpace(gallivm->module,
 						    LLVMArrayType(i32, 64),
 						    "ddxy_lds",
 						    LOCAL_ADDR_SPACE);
+
+	if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX && shader->key.vs.as_ls) ||
+	    si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL ||
+	    si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL) {
+		/* This is the upper bound, maximum is 32 inputs times 32 vertices */
+		unsigned vertex_data_dw_size = 32*32*4;
+		unsigned patch_data_dw_size = 32*4;
+		/* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
+		unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
+		unsigned lds_dwords = patch_dw_size;
+
+		/* The actual size is computed outside of the shader to reduce
+		 * the number of shader variants. */
+		si_shader_ctx->lds =
+			LLVMAddGlobalInAddressSpace(gallivm->module,
+						    LLVMArrayType(i32, lds_dwords),
+						    "tess_lds",
+						    LOCAL_ADDR_SPACE);
+	}
 }
 
 static void preload_constants(struct si_shader_context *si_shader_ctx)
@@ -2527,9 +3665,13 @@
 	struct gallivm_state * gallivm = bld_base->base.gallivm;
 	unsigned i;
 
-	if (si_shader_ctx->type != TGSI_PROCESSOR_VERTEX ||
-	    si_shader_ctx->shader->key.vs.as_es ||
-	    !si_shader_ctx->shader->selector->so.num_outputs)
+	/* Streamout can only be used if the shader is compiled as VS. */
+	if (!si_shader_ctx->shader->selector->so.num_outputs ||
+	    (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
+	     (si_shader_ctx->shader->key.vs.as_es ||
+	      si_shader_ctx->shader->key.vs.as_ls)) ||
+	    (si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL &&
+	     si_shader_ctx->shader->key.tes.as_es))
 		return;
 
 	LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
@@ -2560,6 +3702,8 @@
 
 	if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
 	     si_shader_ctx->shader->key.vs.as_es) ||
+	    (si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL &&
+	     si_shader_ctx->shader->key.tes.as_es) ||
 	    si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY) {
 		LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_ESGS);
 
@@ -2567,13 +3711,21 @@
 			build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
 	}
 
-	if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY ||
-	    si_shader_ctx->shader->is_gs_copy_shader) {
+	if (si_shader_ctx->shader->is_gs_copy_shader) {
 		LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
 
-		si_shader_ctx->gsvs_ring =
+		si_shader_ctx->gsvs_ring[0] =
 			build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
 	}
+	if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+		int i;
+		for (i = 0; i < 4; i++) {
+			LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS + i);
+
+			si_shader_ctx->gsvs_ring[i] =
+				build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
+		}
+	}
 }
 
 void si_shader_binary_read_config(const struct si_screen *sscreen,
@@ -2673,11 +3825,9 @@
 	return 0;
 }
 
-int si_shader_binary_read(struct si_screen *sscreen,
-			struct si_shader *shader,
-			const struct radeon_shader_binary *binary)
+int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
 {
-
+	const struct radeon_shader_binary *binary = &shader->binary;
 	unsigned i;
 	bool dump  = r600_can_dump_shader(&sscreen->b,
 		shader->selector ? shader->selector->tokens : NULL);
@@ -2686,12 +3836,17 @@
 	si_shader_binary_upload(sscreen, shader);
 
 	if (dump) {
-		if (!binary->disassembled) {
-			fprintf(stderr, "SI CODE:\n");
-			for (i = 0; i < binary->code_size; i+=4 ) {
-				fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
-				binary->code[i + 2], binary->code[i + 1],
-				binary->code[i]);
+		if (!(sscreen->b.debug_flags & DBG_NO_ASM)) {
+			if (binary->disasm_string) {
+				fprintf(stderr, "\nShader Disassembly:\n\n");
+				fprintf(stderr, "%s\n", binary->disasm_string);
+			} else {
+				fprintf(stderr, "SI CODE:\n");
+				for (i = 0; i < binary->code_size; i+=4 ) {
+					fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
+					binary->code[i + 2], binary->code[i + 1],
+					binary->code[i]);
+				}
 			}
 		}
 
@@ -2708,15 +3863,16 @@
 		    LLVMTargetMachineRef tm, LLVMModuleRef mod)
 {
 	int r = 0;
-	bool dump = r600_can_dump_shader(&sscreen->b,
-			shader->selector ? shader->selector->tokens : NULL);
-	r = radeon_llvm_compile(mod, &shader->binary,
-		r600_get_llvm_processor_name(sscreen->b.family), dump, tm);
+	bool dump_asm = r600_can_dump_shader(&sscreen->b,
+				shader->selector ? shader->selector->tokens : NULL);
+	bool dump_ir = dump_asm && !(sscreen->b.debug_flags & DBG_NO_IR);
 
-	if (r) {
+	r = radeon_llvm_compile(mod, &shader->binary,
+		r600_get_llvm_processor_name(sscreen->b.family), dump_ir, dump_asm, tm);
+	if (r)
 		return r;
-	}
-	r = si_shader_binary_read(sscreen, shader, &shader->binary);
+
+	r = si_shader_binary_read(sscreen, shader);
 
 	FREE(shader->binary.config);
 	FREE(shader->binary.rodata);
@@ -2724,7 +3880,8 @@
 	if (shader->scratch_bytes_per_wave == 0) {
 		FREE(shader->binary.code);
 		FREE(shader->binary.relocs);
-		memset(&shader->binary, 0, sizeof(shader->binary));
+		memset(&shader->binary, 0,
+		       offsetof(struct radeon_shader_binary, disasm_string));
 	}
 	return r;
 }
@@ -2756,7 +3913,7 @@
 	preload_streamout_buffers(si_shader_ctx);
 	preload_ring_buffers(si_shader_ctx);
 
-	args[0] = si_shader_ctx->gsvs_ring;
+	args[0] = si_shader_ctx->gsvs_ring[0];
 	args[1] = lp_build_mul_imm(uint,
 				   LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
 						si_shader_ctx->param_vertex_id),
@@ -2782,7 +3939,7 @@
 
 			outputs[i].values[chan] =
 				LLVMBuildBitCast(gallivm->builder,
-						 build_intrinsic(gallivm->builder,
+						 lp_build_intrinsic(gallivm->builder,
 								 "llvm.SI.buffer.load.dword.i32.i32",
 								 LLVMInt32TypeInContext(gallivm->context),
 								 args, 9,
@@ -2822,9 +3979,21 @@
 		fprintf(stderr, "}\n");
 
 		if (key->vs.as_es)
-			fprintf(stderr, "  gs_used_inputs = 0x%"PRIx64"\n",
-				key->vs.gs_used_inputs);
+			fprintf(stderr, "  es_enabled_outputs = 0x%"PRIx64"\n",
+				key->vs.es_enabled_outputs);
 		fprintf(stderr, "  as_es = %u\n", key->vs.as_es);
+		fprintf(stderr, "  as_ls = %u\n", key->vs.as_ls);
+		break;
+
+	case PIPE_SHADER_TESS_CTRL:
+		fprintf(stderr, "  prim_mode = %u\n", key->tcs.prim_mode);
+		break;
+
+	case PIPE_SHADER_TESS_EVAL:
+		if (key->tes.as_es)
+			fprintf(stderr, "  es_enabled_outputs = 0x%"PRIx64"\n",
+				key->tes.es_enabled_outputs);
+		fprintf(stderr, "  as_es = %u\n", key->tes.as_es);
 		break;
 
 	case PIPE_SHADER_GEOMETRY:
@@ -2866,7 +4035,7 @@
 
 	/* Dump TGSI code before doing TGSI->LLVM conversion in case the
 	 * conversion fails. */
-	if (dump) {
+	if (dump && !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
 		si_dump_key(sel->type, &shader->key);
 		tgsi_dump(tokens, 0);
 		si_dump_streamout(&sel->so);
@@ -2888,6 +4057,10 @@
 	bld_base->info = poly_stipple ? &stipple_shader_info : &sel->info;
 	bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
 
+	bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
+	bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE] = interp_action;
+	bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET] = interp_action;
+
 	bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action;
@@ -2903,9 +4076,12 @@
 
 	bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
 	bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
+	bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
+	bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
 
 	bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
 	bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
+	bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
 
 	if (HAVE_LLVM >= 0x0306) {
 		bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
@@ -2923,11 +4099,25 @@
 	switch (si_shader_ctx.type) {
 	case TGSI_PROCESSOR_VERTEX:
 		si_shader_ctx.radeon_bld.load_input = declare_input_vs;
-		if (shader->key.vs.as_es) {
+		if (shader->key.vs.as_ls)
+			bld_base->emit_epilogue = si_llvm_emit_ls_epilogue;
+		else if (shader->key.vs.as_es)
 			bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
-		} else {
+		else
+			bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
+		break;
+	case TGSI_PROCESSOR_TESS_CTRL:
+		bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
+		bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
+		bld_base->emit_store = store_output_tcs;
+		bld_base->emit_epilogue = si_llvm_emit_tcs_epilogue;
+		break;
+	case TGSI_PROCESSOR_TESS_EVAL:
+		bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
+		if (shader->key.tes.as_es)
+			bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
+		else
 			bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
-		}
 		break;
 	case TGSI_PROCESSOR_GEOMETRY:
 		bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
@@ -2961,9 +4151,12 @@
 	preload_ring_buffers(&si_shader_ctx);
 
 	if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
-		si_shader_ctx.gs_next_vertex =
-			lp_build_alloca(bld_base->base.gallivm,
-					bld_base->uint_bld.elem_type, "");
+		int i;
+		for (i = 0; i < 4; i++) {
+			si_shader_ctx.gs_next_vertex[i] =
+				lp_build_alloca(bld_base->base.gallivm,
+						bld_base->uint_bld.elem_type, "");
+		}
 	}
 
 	if (!lp_build_tgsi_llvm(bld_base, tokens)) {
@@ -3015,4 +4208,5 @@
 
 	FREE(shader->binary.code);
 	FREE(shader->binary.relocs);
+	FREE(shader->binary.disasm_string);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_shader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_shader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_shader.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_shader.h	2015-09-16 14:36:09.000000000 +0000
@@ -26,6 +26,46 @@
  *      Christian König <christian.koenig@amd.com>
  */
 
+/* How linking tessellation shader inputs and outputs works.
+ *
+ * Inputs and outputs between shaders are stored in a buffer. This buffer
+ * lives in LDS (typical case for tessellation), but it can also live
+ * in memory. Each input or output has a fixed location within a vertex.
+ * The highest used input or output determines the stride between vertices.
+ *
+ * Since tessellation is only enabled in the OpenGL core profile,
+ * only these semantics are valid for per-vertex data:
+ *
+ *   Name             Location
+ *
+ *   POSITION         0
+ *   PSIZE            1
+ *   CLIPDIST0..1     2..3
+ *   CULLDIST0..1     (not implemented)
+ *   GENERIC0..31     4..35
+ *
+ * For example, a shader only writing GENERIC0 has the output stride of 5.
+ *
+ * Only these semantics are valid for per-patch data:
+ *
+ *   Name             Location
+ *
+ *   TESSOUTER        0
+ *   TESSINNER        1
+ *   PATCH0..29       2..31
+ *
+ * That's how independent shaders agree on input and output locations.
+ * The si_shader_io_get_unique_index function assigns the locations.
+ *
+ * Other required information for calculating the input and output addresses
+ * like the vertex stride, the patch stride, and the offsets where per-vertex
+ * and per-patch data start, is passed to the shader via user data SGPRs.
+ * The offsets and strides are calculated at draw time and aren't available
+ * at compile time.
+ *
+ * The same approach should be used for linking ES->GS in the future.
+ */
+
 #ifndef SI_SHADER_H
 #define SI_SHADER_H
 
@@ -43,9 +83,16 @@
 #define SI_SGPR_VERTEX_BUFFER	8  /* VS only */
 #define SI_SGPR_BASE_VERTEX	10 /* VS only */
 #define SI_SGPR_START_INSTANCE	11 /* VS only */
+#define SI_SGPR_LS_OUT_LAYOUT	12 /* VS(LS) only */
+#define SI_SGPR_TCS_OUT_OFFSETS	8  /* TCS & TES only */
+#define SI_SGPR_TCS_OUT_LAYOUT	9  /* TCS & TES only */
+#define SI_SGPR_TCS_IN_LAYOUT	10 /* TCS only */
 #define SI_SGPR_ALPHA_REF	8  /* PS only */
 
 #define SI_VS_NUM_USER_SGPR	12
+#define SI_LS_NUM_USER_SGPR	13
+#define SI_TCS_NUM_USER_SGPR	11
+#define SI_TES_NUM_USER_SGPR	10
 #define SI_GS_NUM_USER_SGPR	8
 #define SI_GSCOPY_NUM_USER_SGPR	4
 #define SI_PS_NUM_USER_SGPR	9
@@ -62,8 +109,30 @@
 #define SI_PARAM_START_INSTANCE	6
 /* the other VS parameters are assigned dynamically */
 
-/* ES only parameters */
-#define SI_PARAM_ES2GS_OFFSET	7
+/* Offsets where TCS outputs and TCS patch outputs live in LDS:
+ *   [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
+ *   [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32
+ */
+#define SI_PARAM_TCS_OUT_OFFSETS 4 /* for TCS & TES */
+
+/* Layout of TCS outputs / TES inputs:
+ *   [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4
+ *   [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4
+ *   [26:31] = gl_PatchVerticesIn, max = 32
+ */
+#define SI_PARAM_TCS_OUT_LAYOUT	5 /* for TCS & TES */
+
+/* Layout of LS outputs / TCS inputs
+ *   [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4
+ *   [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4
+ */
+#define SI_PARAM_TCS_IN_LAYOUT	6 /* TCS only */
+#define SI_PARAM_LS_OUT_LAYOUT	7 /* same value as TCS_IN_LAYOUT, LS only */
+
+/* TCS only parameters. */
+#define SI_PARAM_TESS_FACTOR_OFFSET 7
+#define SI_PARAM_PATCH_ID	8
+#define SI_PARAM_REL_IDS	9
 
 /* GS only parameters */
 #define SI_PARAM_GS2VS_OFFSET	4
@@ -115,9 +184,26 @@
 
 	unsigned	gs_output_prim;
 	unsigned	gs_max_out_vertices;
-	uint64_t	gs_used_inputs; /* mask of "get_unique_index" bits */
+	unsigned	gs_num_invocations;
+
+	/* masks of "get_unique_index" bits */
+	uint64_t	inputs_read;
+	uint64_t	outputs_written;
+	uint32_t	patch_outputs_written;
+	uint32_t	ps_colors_written;
 };
 
+/* Valid shader configurations:
+ *
+ * API shaders       VS | TCS | TES | GS |pass| PS
+ * are compiled as:     |     |     |    |thru|
+ *                      |     |     |    |    |
+ * Only VS & PS:     VS | --  | --  | -- | -- | PS
+ * With GS:          ES | --  | --  | GS | VS | PS
+ * With Tessel.:     LS | HS  | VS  | -- | -- | PS
+ * With both:        LS | HS  | ES  | GS | VS | PS
+ */
+
 union si_shader_key {
 	struct {
 		unsigned	export_16bpc:8;
@@ -130,11 +216,25 @@
 	} ps;
 	struct {
 		unsigned	instance_divisors[SI_NUM_VERTEX_BUFFERS];
-		/* The mask of "get_unique_index" bits, needed for ES,
-		 * it describes how the ES->GS ring buffer is laid out. */
-		uint64_t	gs_used_inputs;
-		unsigned	as_es:1;
+		/* Mask of "get_unique_index" bits - which outputs are read
+		 * by the next stage (needed by ES).
+		 * This describes how outputs are laid out in memory. */
+		uint64_t	es_enabled_outputs;
+		unsigned	as_es:1; /* export shader */
+		unsigned	as_ls:1; /* local shader */
+		unsigned	export_prim_id; /* when PS needs it and GS is disabled */
 	} vs;
+	struct {
+		unsigned	prim_mode:3;
+	} tcs; /* tessellation control shader */
+	struct {
+		/* Mask of "get_unique_index" bits - which outputs are read
+		 * by the next stage (needed by ES).
+		 * This describes how outputs are laid out in memory. */
+		uint64_t	es_enabled_outputs;
+		unsigned	as_es:1; /* export shader */
+		unsigned	export_prim_id; /* when PS needs it and GS is disabled */
+	} tes; /* tessellation evaluation shader */
 };
 
 struct si_shader {
@@ -161,27 +261,47 @@
 	unsigned		nparam;
 	unsigned		vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS];
 	unsigned		ps_input_param_offset[PIPE_MAX_SHADER_INPUTS];
-
+	unsigned		ps_input_interpolate[PIPE_MAX_SHADER_INPUTS];
 	bool			uses_instanceid;
 	unsigned		nr_pos_exports;
+	unsigned		nr_param_exports;
 	bool			is_gs_copy_shader;
 	bool			dx10_clamp_mode; /* convert NaNs to 0 */
+
+	unsigned		ls_rsrc1;
+	unsigned		ls_rsrc2;
 };
 
 static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
 {
-	return sctx->gs_shader ? &sctx->gs_shader->info
-                               : &sctx->vs_shader->info;
+	if (sctx->gs_shader)
+		return &sctx->gs_shader->info;
+	else if (sctx->tes_shader)
+		return &sctx->tes_shader->info;
+	else
+		return &sctx->vs_shader->info;
 }
 
 static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
 {
 	if (sctx->gs_shader)
 		return sctx->gs_shader->current->gs_copy_shader;
+	else if (sctx->tes_shader)
+		return sctx->tes_shader->current;
 	else
 		return sctx->vs_shader->current;
 }
 
+static inline bool si_vs_exports_prim_id(struct si_shader *shader)
+{
+	if (shader->selector->type == PIPE_SHADER_VERTEX)
+		return shader->key.vs.export_prim_id;
+	else if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
+		return shader->key.tes.export_prim_id;
+	else
+		return false;
+}
+
 /* radeonsi_shader.c */
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 		     struct si_shader *shader);
@@ -190,8 +310,7 @@
 void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
-int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
-		const struct radeon_shader_binary *binary);
+int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader);
 void si_shader_apply_scratch_relocs(struct si_context *sctx,
 			struct si_shader *shader,
 			uint64_t scratch_va);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -29,6 +29,7 @@
 #include "sid.h"
 #include "radeon/r600_cs.h"
 
+#include "util/u_dual_blend.h"
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
@@ -44,9 +45,24 @@
 	*list_elem = atom;
 }
 
+unsigned si_array_mode(unsigned mode)
+{
+	switch (mode) {
+	case RADEON_SURF_MODE_LINEAR_ALIGNED:
+		return V_009910_ARRAY_LINEAR_ALIGNED;
+	case RADEON_SURF_MODE_1D:
+		return V_009910_ARRAY_1D_TILED_THIN1;
+	case RADEON_SURF_MODE_2D:
+		return V_009910_ARRAY_2D_TILED_THIN1;
+	default:
+	case RADEON_SURF_MODE_LINEAR:
+		return V_009910_ARRAY_LINEAR_GENERAL;
+	}
+}
+
 uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
 {
-	if (sscreen->b.chip_class == CIK &&
+	if (sscreen->b.chip_class >= CIK &&
 	    sscreen->b.info.cik_macrotile_mode_array_valid) {
 		unsigned index, tileb;
 
@@ -218,8 +234,10 @@
  * - The COLOR1 format isn't INVALID because of possible dual-source blending,
  *   so COLOR1 is enabled pretty much all the time.
  * So CB_TARGET_MASK is the only register that can disable COLOR1.
+ *
+ * Another reason is to avoid a hang with dual source blending.
  */
-static void si_update_fb_blend_state(struct si_context *sctx)
+void si_update_fb_blend_state(struct si_context *sctx)
 {
 	struct si_pm4_state *pm4;
 	struct si_state_blend *blend = sctx->queued.named.blend;
@@ -237,6 +255,16 @@
 			mask |= 0xf << (4*i);
 	mask &= blend->cb_target_mask;
 
+	/* Avoid a hang that happens when dual source blending is enabled
+	 * but there is not enough color outputs. This is undefined behavior,
+	 * so disable color writes completely.
+	 *
+	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
+	 */
+	if (blend->dual_src_blend &&
+	    (sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
+		mask = 0;
+
 	si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
 	si_pm4_set_state(sctx, fb_blend, pm4);
 }
@@ -328,6 +356,7 @@
 		return NULL;
 
 	blend->alpha_to_one = state->alpha_to_one;
+	blend->dual_src_blend = util_blend_state_is_dual(state, 0);
 
 	if (state->logicop_enable) {
 		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
@@ -474,11 +503,14 @@
 		S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
 		S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
 		S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
+	        S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
 		S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) |
 		S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) |
 		S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
 					    info->writes_edgeflag ||
-					    info->writes_layer) |
+					    info->writes_layer ||
+					     info->writes_viewport_index) |
+		S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) |
 		(sctx->queued.named.rasterizer->clip_plane_enable &
 		 clipdist_mask));
 	r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
@@ -494,20 +526,26 @@
                                   const struct pipe_scissor_state *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor);
-	struct si_pm4_state *pm4 = &scissor->pm4;
-
-	if (scissor == NULL)
-		return;
+	struct si_state_scissor *scissor;
+	struct si_pm4_state *pm4;
+	int i;
 
-	scissor->scissor = *state;
-	si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
-		       S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) |
-		       S_028250_WINDOW_OFFSET_DISABLE(1));
-	si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR,
-		       S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy));
+	for (i = start_slot; i < start_slot + num_scissors; i++) {
+		int idx = i - start_slot;
+		int offset = i * 4 * 2;
 
-	si_pm4_set_state(sctx, scissor, scissor);
+		scissor = CALLOC_STRUCT(si_state_scissor);
+		if (scissor == NULL)
+			return;
+		pm4 = &scissor->pm4;
+		scissor->scissor = state[idx];
+		si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset,
+			       S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) |
+			       S_028250_WINDOW_OFFSET_DISABLE(1));
+		si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset,
+			       S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy));
+		si_pm4_set_state(sctx, scissor[i], scissor);
+	}
 }
 
 static void si_set_viewport_states(struct pipe_context *ctx,
@@ -516,21 +554,29 @@
                                    const struct pipe_viewport_state *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
-	struct si_pm4_state *pm4 = &viewport->pm4;
+	struct si_state_viewport *viewport;
+	struct si_pm4_state *pm4;
+	int i;
 
-	if (viewport == NULL)
-		return;
+	for (i = start_slot; i < start_slot + num_viewports; i++) {
+		int idx = i - start_slot;
+		int offset = i * 4 * 6;
 
-	viewport->viewport = *state;
-	si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
-	si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
-	si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
-	si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
-	si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
-	si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
+		viewport = CALLOC_STRUCT(si_state_viewport);
+		if (!viewport)
+			return;
+		pm4 = &viewport->pm4;
+
+		viewport->viewport = state[idx];
+		si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE + offset, fui(state[idx].scale[0]));
+		si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET + offset, fui(state[idx].translate[0]));
+		si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE + offset, fui(state[idx].scale[1]));
+		si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET + offset, fui(state[idx].translate[1]));
+		si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE + offset, fui(state[idx].scale[2]));
+		si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET + offset, fui(state[idx].translate[2]));
 
-	si_pm4_set_state(sctx, viewport, viewport);
+		si_pm4_set_state(sctx, viewport[i], viewport);
+	}
 }
 
 /*
@@ -636,18 +682,14 @@
 	rs->offset_units = state->offset_units;
 	rs->offset_scale = state->offset_scale * 16.0f;
 
-	tmp = S_0286D4_FLAT_SHADE_ENA(1);
-	if (state->sprite_coord_enable) {
-		tmp |= S_0286D4_PNT_SPRITE_ENA(1) |
-			S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
-			S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
-			S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
-			S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1);
-		if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
-			tmp |= S_0286D4_PNT_SPRITE_TOP_1(1);
-		}
-	}
-	si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0, tmp);
+	si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
+		S_0286D4_FLAT_SHADE_ENA(1) |
+		S_0286D4_PNT_SPRITE_ENA(1) |
+		S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
+		S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
+		S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
+		S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
+		S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT));
 
 	/* point size 12.4 fixed point */
 	tmp = (unsigned)(state->point_size * 8.0);
@@ -707,12 +749,12 @@
 
 	if (sctx->framebuffer.nr_samples > 1 &&
 	    (!old_rs || old_rs->multisample_enable != rs->multisample_enable))
-		sctx->db_render_state.dirty = true;
+		si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
 	si_pm4_bind_state(sctx, rasterizer, rs);
 	si_update_fb_rs_state(sctx);
 
-	sctx->clip_regs.dirty = true;
+	si_mark_atom_dirty(sctx, &sctx->clip_regs);
 }
 
 static void si_delete_rs_state(struct pipe_context *ctx, void *state)
@@ -810,7 +852,8 @@
 
 	db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
 		S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
-		S_028800_ZFUNC(state->depth.func);
+		S_028800_ZFUNC(state->depth.func) |
+		S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test);
 
 	/* stencil */
 	if (state->stencil[0].enabled) {
@@ -839,9 +882,12 @@
 		dsa->alpha_func = PIPE_FUNC_ALWAYS;
 	}
 
-	/* misc */
 	si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
 	si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
+	if (state->depth.bounds_test) {
+		si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
+		si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
+	}
 
 	return dsa;
 }
@@ -877,7 +923,7 @@
 {
 	struct si_context *sctx = (struct si_context*)ctx;
 
-	sctx->db_render_state.dirty = true;
+	si_mark_atom_dirty(sctx, &sctx->db_render_state);
 }
 
 static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
@@ -1146,7 +1192,9 @@
 				       int first_non_void)
 {
 	struct si_screen *sscreen = (struct si_screen*)screen;
-	bool enable_s3tc = sscreen->b.info.drm_minor >= 31;
+	bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 &&
+					  sscreen->b.info.drm_minor >= 31) ||
+					 sscreen->b.info.drm_major == 3;
 	boolean uniform = TRUE;
 	int i;
 
@@ -1189,7 +1237,7 @@
 	}
 
 	if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
-		if (!enable_s3tc)
+		if (!enable_compressed_formats)
 			goto out_unknown;
 
 		switch (format) {
@@ -1209,7 +1257,7 @@
 	}
 
 	if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
-		if (!enable_s3tc)
+		if (!enable_compressed_formats)
 			goto out_unknown;
 
 		switch (format) {
@@ -1238,8 +1286,7 @@
 	}
 
 	if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
-
-		if (!enable_s3tc)
+		if (!enable_compressed_formats)
 			goto out_unknown;
 
 		if (!util_format_s3tc_enabled) {
@@ -1595,7 +1642,6 @@
                                unsigned sample_count,
                                unsigned usage)
 {
-	struct si_screen *sscreen = (struct si_screen *)screen;
 	unsigned retval = 0;
 
 	if (target >= PIPE_MAX_TEXTURE_TYPES) {
@@ -1607,8 +1653,7 @@
 		return FALSE;
 
 	if (sample_count > 1) {
-		/* 2D tiling on CIK is supported since DRM 2.35.0 */
-		if (sscreen->b.chip_class >= CIK && sscreen->b.info.drm_minor < 35)
+		if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
 			return FALSE;
 
 		switch (sample_count) {
@@ -1815,6 +1860,9 @@
 	surf->cb_color_info = color_info;
 	surf->cb_color_attrib = color_attrib;
 
+	if (sctx->b.chip_class >= VI)
+		surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1);
+
 	if (rtex->fmask.size) {
 		surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
 		surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
@@ -2012,7 +2060,7 @@
 				  util_format_is_pure_integer(state->cbufs[0]->format);
 
 	if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer)
-		sctx->db_render_state.dirty = true;
+		si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
 	for (i = 0; i < state->nr_cbufs; i++) {
 		if (!state->cbufs[i])
@@ -2032,6 +2080,7 @@
 		if (rtex->fmask.size && rtex->cmask.size) {
 			sctx->framebuffer.compressed_cb_mask |= 1 << i;
 		}
+		r600_context_add_resource_size(ctx, surf->base.texture);
 	}
 	/* Set the 16BPC export for possible dual-src blending. */
 	if (i == 1 && surf && surf->export_16bpc) {
@@ -2046,20 +2095,21 @@
 		if (!surf->depth_initialized) {
 			si_init_depth_surface(sctx, surf);
 		}
+		r600_context_add_resource_size(ctx, surf->base.texture);
 	}
 
 	si_update_fb_rs_state(sctx);
 	si_update_fb_blend_state(sctx);
 
-	sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3;
+	sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3;
 	sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
 	sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
 	sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
-	sctx->framebuffer.atom.dirty = true;
+	si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
 
 	if (sctx->framebuffer.nr_samples != old_nr_samples) {
-		sctx->msaa_config.dirty = true;
-		sctx->db_render_state.dirty = true;
+		si_mark_atom_dirty(sctx, &sctx->msaa_config);
+		si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
 		/* Set sample locations as fragment shader constants. */
 		switch (sctx->framebuffer.nr_samples) {
@@ -2096,7 +2146,7 @@
 		     old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) &&
 		    (sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES ||
 		     old_nr_samples != 1))
-			sctx->msaa_sample_locs.dirty = true;
+			si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs);
 	}
 }
 
@@ -2130,20 +2180,24 @@
 				RADEON_PRIO_COLOR_META);
 		}
 
-		r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
+		r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
+					   sctx->b.chip_class >= VI ? 14 : 13);
 		radeon_emit(cs, cb->cb_color_base);	/* R_028C60_CB_COLOR0_BASE */
 		radeon_emit(cs, cb->cb_color_pitch);	/* R_028C64_CB_COLOR0_PITCH */
 		radeon_emit(cs, cb->cb_color_slice);	/* R_028C68_CB_COLOR0_SLICE */
 		radeon_emit(cs, cb->cb_color_view);	/* R_028C6C_CB_COLOR0_VIEW */
 		radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */
 		radeon_emit(cs, cb->cb_color_attrib);	/* R_028C74_CB_COLOR0_ATTRIB */
-		radeon_emit(cs, 0);			/* R_028C78 unused */
+		radeon_emit(cs, cb->cb_dcc_control);	/* R_028C78_CB_COLOR0_DCC_CONTROL */
 		radeon_emit(cs, tex->cmask.base_address_reg);	/* R_028C7C_CB_COLOR0_CMASK */
 		radeon_emit(cs, tex->cmask.slice_tile_max);	/* R_028C80_CB_COLOR0_CMASK_SLICE */
 		radeon_emit(cs, cb->cb_color_fmask);		/* R_028C84_CB_COLOR0_FMASK */
 		radeon_emit(cs, cb->cb_color_fmask_slice);	/* R_028C88_CB_COLOR0_FMASK_SLICE */
 		radeon_emit(cs, tex->color_clear_value[0]);	/* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
 		radeon_emit(cs, tex->color_clear_value[1]);	/* R_028C90_CB_COLOR0_CLEAR_WORD1 */
+
+		if (sctx->b.chip_class >= VI)
+			radeon_emit(cs, 0);	/* R_028C94_CB_COLOR0_DCC_BASE */
 	}
 	/* set CB_COLOR1_INFO for possible dual-src blending */
 	if (i == 1 && state->cbufs[0]) {
@@ -2238,22 +2292,35 @@
 	sctx->ps_iter_samples = min_samples;
 
 	if (sctx->framebuffer.nr_samples > 1)
-		sctx->msaa_config.dirty = true;
+		si_mark_atom_dirty(sctx, &sctx->msaa_config);
 }
 
 /*
  * Samplers
  */
 
-static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx,
-							struct pipe_resource *texture,
-							const struct pipe_sampler_view *state)
+/**
+ * Create a sampler view.
+ *
+ * @param ctx		context
+ * @param texture	texture
+ * @param state		sampler view template
+ * @param width0	width0 override (for compressed textures as int)
+ * @param height0	height0 override (for compressed textures as int)
+ * @param force_level   set the base address to the level (for compressed textures)
+ */
+struct pipe_sampler_view *
+si_create_sampler_view_custom(struct pipe_context *ctx,
+			      struct pipe_resource *texture,
+			      const struct pipe_sampler_view *state,
+			      unsigned width0, unsigned height0,
+			      unsigned force_level)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
 	struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
 	struct r600_texture *tmp = (struct r600_texture*)texture;
 	const struct util_format_description *desc;
-	unsigned format, num_format;
+	unsigned format, num_format, base_level, first_level, last_level;
 	uint32_t pitch = 0;
 	unsigned char state_swizzle[4], swizzle[4];
 	unsigned height, depth, width;
@@ -2286,7 +2353,7 @@
 
 	/* Buffer resource. */
 	if (texture->target == PIPE_BUFFER) {
-		unsigned stride;
+		unsigned stride, num_records;
 
 		desc = util_format_description(state->format);
 		first_non_void = util_format_get_first_non_void_channel(state->format);
@@ -2295,10 +2362,16 @@
 		format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
 		num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
 
+		num_records = state->u.buf.last_element + 1 - state->u.buf.first_element;
+		num_records = MIN2(num_records, texture->width0 / stride);
+
+		if (sctx->b.chip_class >= VI)
+			num_records *= stride;
+
 		view->state[4] = va;
 		view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
 				 S_008F04_STRIDE(stride);
-		view->state[6] = state->u.buf.last_element + 1 - state->u.buf.first_element;
+		view->state[6] = num_records;
 		view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
 				 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
 				 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
@@ -2426,13 +2499,25 @@
 		format = 0;
 	}
 
-	/* not supported any more */
-	//endian = si_colorformat_endian_swap(format);
+	base_level = 0;
+	first_level = state->u.tex.first_level;
+	last_level = state->u.tex.last_level;
+	width = width0;
+	height = height0;
+	depth = texture->depth0;
+
+	if (force_level) {
+		assert(force_level == first_level &&
+		       force_level == last_level);
+		base_level = force_level;
+		first_level = 0;
+		last_level = 0;
+		width = u_minify(width, force_level);
+		height = u_minify(height, force_level);
+		depth = u_minify(depth, force_level);
+	}
 
-	width = surflevel[0].npix_x;
-	height = surflevel[0].npix_y;
-	depth = surflevel[0].npix_z;
-	pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format);
+	pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format);
 
 	if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
 	        height = 1;
@@ -2442,8 +2527,7 @@
 	} else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
 		depth = texture->array_size / 6;
 
-	va = tmp->resource.gpu_address + surflevel[0].offset;
-	va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size * tmp->surface.array_size;
+	va = tmp->resource.gpu_address + surflevel[base_level].offset;
 
 	view->state[0] = va >> 8;
 	view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) |
@@ -2456,11 +2540,11 @@
 			  S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
 			  S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
 			  S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ?
-						      0 : state->u.tex.first_level - tmp->mipmap_shift) |
+						      0 : first_level) |
 			  S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ?
 						      util_logbase2(texture->nr_samples) :
-						      state->u.tex.last_level - tmp->mipmap_shift) |
-			  S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) |
+						      last_level) |
+			  S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, base_level, false)) |
 			  S_008F1C_POW2_PAD(texture->last_level > 0) |
 			  S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
 	view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
@@ -2512,6 +2596,16 @@
 	return &view->base;
 }
 
+static struct pipe_sampler_view *
+si_create_sampler_view(struct pipe_context *ctx,
+		       struct pipe_resource *texture,
+		       const struct pipe_sampler_view *state)
+{
+	return si_create_sampler_view_custom(ctx, texture, state,
+					     texture ? texture->width0 : 0,
+					     texture ? texture->height0 : 0, 0);
+}
+
 static void si_sampler_view_destroy(struct pipe_context *ctx,
 				    struct pipe_sampler_view *state)
 {
@@ -2754,6 +2848,7 @@
 			pipe_resource_reference(&dsti->buffer, src->buffer);
 			dsti->buffer_offset = src->buffer_offset;
 			dsti->stride = src->stride;
+			r600_context_add_resource_size(ctx, src->buffer);
 		}
 	} else {
 		for (i = 0; i < count; i++) {
@@ -2771,6 +2866,7 @@
 	if (ib) {
 		pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer);
 	        memcpy(&sctx->index_buffer, ib, sizeof(*ib));
+		r600_context_add_resource_size(ctx, ib->buffer);
 	} else {
 		pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
 	}
@@ -2834,6 +2930,30 @@
 	}
 }
 
+static void si_set_tess_state(struct pipe_context *ctx,
+			      const float default_outer_level[4],
+			      const float default_inner_level[2])
+{
+	struct si_context *sctx = (struct si_context *)ctx;
+	struct pipe_constant_buffer cb;
+	float array[8];
+
+	memcpy(array, default_outer_level, sizeof(float) * 4);
+	memcpy(array+4, default_inner_level, sizeof(float) * 2);
+
+	cb.buffer = NULL;
+	cb.user_buffer = NULL;
+	cb.buffer_size = sizeof(array);
+
+	si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer,
+			       (void*)array, sizeof(array),
+			       &cb.buffer_offset);
+
+	ctx->set_constant_buffer(ctx, PIPE_SHADER_TESS_CTRL,
+				 SI_DRIVER_STATE_CONST_BUF, &cb);
+	pipe_resource_reference(&cb.buffer, NULL);
+}
+
 static void si_texture_barrier(struct pipe_context *ctx)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
@@ -2859,6 +2979,8 @@
 	si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
 }
 
+static void si_init_config(struct si_context *sctx);
+
 void si_init_state_functions(struct si_context *sctx)
 {
 	si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
@@ -2909,30 +3031,43 @@
 	sctx->b.b.texture_barrier = si_texture_barrier;
 	sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
 	sctx->b.b.set_min_samples = si_set_min_samples;
+	sctx->b.b.set_tess_state = si_set_tess_state;
 
-	sctx->b.dma_copy = si_dma_copy;
 	sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
 	sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
 
 	sctx->b.b.draw_vbo = si_draw_vbo;
+
+	if (sctx->b.chip_class >= CIK) {
+		sctx->b.dma_copy = cik_sdma_copy;
+	} else {
+		sctx->b.dma_copy = si_dma_copy;
+	}
+
+	si_init_config(sctx);
 }
 
 static void
 si_write_harvested_raster_configs(struct si_context *sctx,
 				  struct si_pm4_state *pm4,
-				  unsigned raster_config)
+				  unsigned raster_config,
+				  unsigned raster_config_1)
 {
 	unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
 	unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
 	unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
-	unsigned num_rb = sctx->screen->b.info.r600_num_backends;
-	unsigned rb_per_pkr = num_rb / num_se / sh_per_se;
+	unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
+	unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
 	unsigned rb_per_se = num_rb / num_se;
-	unsigned se0_mask = (1 << rb_per_se) - 1;
-	unsigned se1_mask = se0_mask << rb_per_se;
+	unsigned se_mask[4];
 	unsigned se;
 
-	assert(num_se == 1 || num_se == 2);
+	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
+	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
+	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
+	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+
+	assert(num_se == 1 || num_se == 2 || num_se == 4);
 	assert(sh_per_se == 1 || sh_per_se == 2);
 	assert(rb_per_pkr == 1 || rb_per_pkr == 2);
 
@@ -2940,17 +3075,16 @@
 	 * fields are for, so I'm leaving them as their default
 	 * values. */
 
-	se0_mask &= rb_mask;
-	se1_mask &= rb_mask;
-	if (num_se == 2 && (!se0_mask || !se1_mask)) {
-		raster_config &= C_028350_SE_MAP;
-
-		if (!se0_mask) {
-			raster_config |=
-				S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
+	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
+			     (!se_mask[2] && !se_mask[3]))) {
+		raster_config_1 &= C_028354_SE_PAIR_MAP;
+
+		if (!se_mask[0] && !se_mask[1]) {
+			raster_config_1 |=
+				S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
 		} else {
-			raster_config |=
-				S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
+			raster_config_1 |=
+				S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
 		}
 	}
 
@@ -2958,10 +3092,23 @@
 		unsigned raster_config_se = raster_config;
 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
+		int idx = (se / 2) * 2;
+
+		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
+			raster_config_se &= C_028350_SE_MAP;
+
+			if (!se_mask[idx]) {
+				raster_config_se |=
+					S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
+			} else {
+				raster_config_se |=
+					S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
+			}
+		}
 
 		pkr0_mask &= rb_mask;
 		pkr1_mask &= rb_mask;
-		if (sh_per_se == 2 && (!pkr0_mask || !pkr1_mask)) {
+		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
 			raster_config_se &= C_028350_PKR_MAP;
 
 			if (!pkr0_mask) {
@@ -2973,7 +3120,7 @@
 			}
 		}
 
-		if (rb_per_pkr == 2) {
+		if (rb_per_se >= 2) {
 			unsigned rb0_mask = 1 << (se * rb_per_se);
 			unsigned rb1_mask = rb0_mask << 1;
 
@@ -2991,7 +3138,7 @@
 				}
 			}
 
-			if (sh_per_se == 2) {
+			if (rb_per_se > 2) {
 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
 				rb1_mask = rb0_mask << 1;
 				rb0_mask &= rb_mask;
@@ -3010,44 +3157,48 @@
 			}
 		}
 
-		si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
-			       SE_INDEX(se) | SH_BROADCAST_WRITES |
-			       INSTANCE_BROADCAST_WRITES);
+		/* GRBM_GFX_INDEX is privileged on VI */
+		if (sctx->b.chip_class <= CIK)
+			si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
+				       SE_INDEX(se) | SH_BROADCAST_WRITES |
+				       INSTANCE_BROADCAST_WRITES);
 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
+		if (sctx->b.chip_class >= CIK)
+			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
 	}
 
-	si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
-		       SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
-		       INSTANCE_BROADCAST_WRITES);
+	/* GRBM_GFX_INDEX is privileged on VI */
+	if (sctx->b.chip_class <= CIK)
+		si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
+			       SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
+			       INSTANCE_BROADCAST_WRITES);
 }
 
-void si_init_config(struct si_context *sctx)
+static void si_init_config(struct si_context *sctx)
 {
+	unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
+	unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
+	unsigned raster_config, raster_config_1;
 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+	int i;
 
 	if (pm4 == NULL)
 		return;
 
 	si_cmd_context_control(pm4);
 
-	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0);
-	si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0);
+	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
+	si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
 
 	/* FIXME calculate these values somehow ??? */
 	si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80);
 	si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
 	si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
 
-	si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0);
 	si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
 	si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0);
 	si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
 
-	si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0);
-	si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0);
-	si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0);
-	si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0);
-
 	si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
 	si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
 	si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
@@ -3060,62 +3211,83 @@
 
 	si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
 
-	if (sctx->b.chip_class >= CIK) {
-		switch (sctx->screen->b.family) {
-		case CHIP_BONAIRE:
-			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012);
-			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0);
-			break;
-		case CHIP_HAWAII:
-			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a);
-			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e);
-			break;
-		case CHIP_KAVERI:
-			/* XXX todo */
-		case CHIP_KABINI:
-			/* XXX todo */
-		case CHIP_MULLINS:
-			/* XXX todo */
-		default:
-			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0);
-			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0);
-			break;
-		}
-	} else {
-		unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
-		unsigned num_rb = sctx->screen->b.info.r600_num_backends;
-		unsigned raster_config;
-
-		switch (sctx->screen->b.family) {
-		case CHIP_TAHITI:
-		case CHIP_PITCAIRN:
-			raster_config = 0x2a00126a;
-			break;
-		case CHIP_VERDE:
-			raster_config = 0x0000124a;
-			break;
-		case CHIP_OLAND:
-			raster_config = 0x00000082;
-			break;
-		case CHIP_HAINAN:
-			raster_config = 0;
-			break;
-		default:
-			fprintf(stderr,
-				"radeonsi: Unknown GPU, using 0 for raster_config\n");
-			raster_config = 0;
-			break;
-		}
+	for (i = 0; i < 16; i++) {
+		si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
+		si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
+	}
+
+	switch (sctx->screen->b.family) {
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+		raster_config = 0x2a00126a;
+		raster_config_1 = 0x00000000;
+		break;
+	case CHIP_VERDE:
+		raster_config = 0x0000124a;
+		raster_config_1 = 0x00000000;
+		break;
+	case CHIP_OLAND:
+		raster_config = 0x00000082;
+		raster_config_1 = 0x00000000;
+		break;
+	case CHIP_HAINAN:
+		raster_config = 0x00000000;
+		raster_config_1 = 0x00000000;
+		break;
+	case CHIP_BONAIRE:
+		raster_config = 0x16000012;
+		raster_config_1 = 0x00000000;
+		break;
+	case CHIP_HAWAII:
+		raster_config = 0x3a00161a;
+		raster_config_1 = 0x0000002e;
+		break;
+	case CHIP_FIJI:
+		/* Fiji should be same as Hawaii, but that causes corruption in some cases */
+		raster_config = 0x16000012; /* 0x3a00161a */
+		raster_config_1 = 0x0000002a; /* 0x0000002e */
+		break;
+	case CHIP_TONGA:
+		raster_config = 0x16000012;
+		raster_config_1 = 0x0000002a;
+		break;
+	case CHIP_ICELAND:
+		raster_config = 0x00000002;
+		raster_config_1 = 0x00000000;
+		break;
+	case CHIP_CARRIZO:
+		raster_config = 0x00000002;
+		raster_config_1 = 0x00000000;
+		break;
+	case CHIP_KAVERI:
+		/* KV should be 0x00000002, but that causes problems with radeon */
+		raster_config = 0x00000000; /* 0x00000002 */
+		raster_config_1 = 0x00000000;
+		break;
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+		raster_config = 0x00000000;
+		raster_config_1 = 0x00000000;
+		break;
+	default:
+		fprintf(stderr,
+			"radeonsi: Unknown GPU, using 0 for raster_config\n");
+		raster_config = 0x00000000;
+		raster_config_1 = 0x00000000;
+		break;
+	}
 
-		/* Always use the default config when all backends are enabled
-		 * (or when we failed to determine the enabled backends).
-		 */
-		if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
-			si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
-				       raster_config);
-		} else {
-			si_write_harvested_raster_configs(sctx, pm4, raster_config);
-		}
+	/* Always use the default config when all backends are enabled
+	 * (or when we failed to determine the enabled backends).
+	 */
+	if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
+		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
+			       raster_config);
+		if (sctx->b.chip_class >= CIK)
+			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
+				       raster_config_1);
+	} else {
+		si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
 	}
 
 	si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
@@ -3130,15 +3302,11 @@
 	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
 	/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
 	si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
-	si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0);
-	si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, fui(1.0));
 	si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
 	si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
 	si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
 	si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
 	si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));
-	si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0);
-	si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0);
 	si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0);
 	si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
 	si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
@@ -3157,10 +3325,21 @@
 	si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
 
 	if (sctx->b.chip_class >= CIK) {
+		si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffc));
+		si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
+		si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xfffe));
+		si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
 		si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
 		si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0));
 		si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
 	}
 
+	if (sctx->b.chip_class >= VI) {
+		si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
+			       S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1));
+		si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
+		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
+	}
+
 	sctx->init_config = pm4;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_state_draw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_state_draw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_state_draw.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_state_draw.c	2015-09-16 14:36:09.000000000 +0000
@@ -31,6 +31,7 @@
 
 #include "util/u_index_modify.h"
 #include "util/u_upload_mgr.h"
+#include "util/u_prim.h"
 
 static void si_decompress_textures(struct si_context *sctx)
 {
@@ -64,6 +65,7 @@
 		[PIPE_PRIM_LINE_STRIP_ADJACENCY]	= V_008958_DI_PT_LINESTRIP_ADJ,
 		[PIPE_PRIM_TRIANGLES_ADJACENCY]		= V_008958_DI_PT_TRILIST_ADJ,
 		[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]	= V_008958_DI_PT_TRISTRIP_ADJ,
+		[PIPE_PRIM_PATCHES]			= V_008958_DI_PT_PATCH,
 		[R600_PRIM_RECTANGLE_LIST]		= V_008958_DI_PT_RECTLIST
         };
 	assert(mode < Elements(prim_conv));
@@ -87,6 +89,7 @@
 		[PIPE_PRIM_LINE_STRIP_ADJACENCY]	= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
 		[PIPE_PRIM_TRIANGLES_ADJACENCY]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
 		[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]	= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_PATCHES]			= V_028A6C_OUTPRIM_TYPE_POINTLIST,
 		[R600_PRIM_RECTANGLE_LIST]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP
 	};
 	assert(mode < Elements(prim_conv));
@@ -94,8 +97,128 @@
 	return prim_conv[mode];
 }
 
+/**
+ * This calculates the LDS size for tessellation shaders (VS, TCS, TES).
+ * LS.LDS_SIZE is shared by all 3 shader stages.
+ *
+ * The information about LDS and other non-compile-time parameters is then
+ * written to userdata SGPRs.
+ */
+static void si_emit_derived_tess_state(struct si_context *sctx,
+				       const struct pipe_draw_info *info,
+				       unsigned *num_patches)
+{
+	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct si_shader_selector *ls = sctx->vs_shader;
+	/* The TES pointer will only be used for sctx->last_tcs.
+	 * It would be wrong to think that TCS = TES. */
+	struct si_shader_selector *tcs =
+		sctx->tcs_shader ? sctx->tcs_shader : sctx->tes_shader;
+	unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
+	unsigned num_tcs_input_cp = info->vertices_per_patch;
+	unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
+	unsigned num_tcs_patch_outputs;
+	unsigned input_vertex_size, output_vertex_size, pervertex_output_patch_size;
+	unsigned input_patch_size, output_patch_size, output_patch0_offset;
+	unsigned perpatch_output_offset, lds_size, ls_rsrc2;
+	unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets;
+
+	*num_patches = 1; /* TODO: calculate this */
+
+	if (sctx->last_ls == ls->current &&
+	    sctx->last_tcs == tcs &&
+	    sctx->last_tes_sh_base == tes_sh_base &&
+	    sctx->last_num_tcs_input_cp == num_tcs_input_cp)
+		return;
+
+	sctx->last_ls = ls->current;
+	sctx->last_tcs = tcs;
+	sctx->last_tes_sh_base = tes_sh_base;
+	sctx->last_num_tcs_input_cp = num_tcs_input_cp;
+
+	/* This calculates how shader inputs and outputs among VS, TCS, and TES
+	 * are laid out in LDS. */
+	num_tcs_inputs = util_last_bit64(ls->outputs_written);
+
+	if (sctx->tcs_shader) {
+		num_tcs_outputs = util_last_bit64(tcs->outputs_written);
+		num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+		num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written);
+	} else {
+		/* No TCS. Route varyings from LS to TES. */
+		num_tcs_outputs = num_tcs_inputs;
+		num_tcs_output_cp = num_tcs_input_cp;
+		num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
+	}
+
+	input_vertex_size = num_tcs_inputs * 16;
+	output_vertex_size = num_tcs_outputs * 16;
+
+	input_patch_size = num_tcs_input_cp * input_vertex_size;
+
+	pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
+	output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+
+	output_patch0_offset = sctx->tcs_shader ? input_patch_size * *num_patches : 0;
+	perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
+
+	lds_size = output_patch0_offset + output_patch_size * *num_patches;
+	ls_rsrc2 = ls->current->ls_rsrc2;
+
+	if (sctx->b.chip_class >= CIK) {
+		assert(lds_size <= 65536);
+		ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512);
+	} else {
+		assert(lds_size <= 32768);
+		ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256);
+	}
+
+	/* Due to a hw bug, RSRC2_LS must be written twice with another
+	 * LS register written in between. */
+	if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
+		si_write_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
+	si_write_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
+	radeon_emit(cs, ls->current->ls_rsrc1);
+	radeon_emit(cs, ls_rsrc2);
+
+	/* Compute userdata SGPRs. */
+	assert(((input_vertex_size / 4) & ~0xff) == 0);
+	assert(((output_vertex_size / 4) & ~0xff) == 0);
+	assert(((input_patch_size / 4) & ~0x1fff) == 0);
+	assert(((output_patch_size / 4) & ~0x1fff) == 0);
+	assert(((output_patch0_offset / 16) & ~0xffff) == 0);
+	assert(((perpatch_output_offset / 16) & ~0xffff) == 0);
+	assert(num_tcs_input_cp <= 32);
+	assert(num_tcs_output_cp <= 32);
+
+	tcs_in_layout = (input_patch_size / 4) |
+			((input_vertex_size / 4) << 13);
+	tcs_out_layout = (output_patch_size / 4) |
+			 ((output_vertex_size / 4) << 13);
+	tcs_out_offsets = (output_patch0_offset / 16) |
+			  ((perpatch_output_offset / 16) << 16);
+
+	/* Set them for LS. */
+	si_write_sh_reg(cs,
+		R_00B530_SPI_SHADER_USER_DATA_LS_0 + SI_SGPR_LS_OUT_LAYOUT * 4,
+		tcs_in_layout);
+
+	/* Set them for TCS. */
+	si_write_sh_reg_seq(cs,
+		R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OUT_OFFSETS * 4, 3);
+	radeon_emit(cs, tcs_out_offsets);
+	radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
+	radeon_emit(cs, tcs_in_layout);
+
+	/* Set them for TES. */
+	si_write_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OUT_OFFSETS * 4, 2);
+	radeon_emit(cs, tcs_out_offsets);
+	radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26));
+}
+
 static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
-					  const struct pipe_draw_info *info)
+					  const struct pipe_draw_info *info,
+					  unsigned num_patches)
 {
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 	unsigned prim = info->mode;
@@ -104,11 +227,41 @@
 	/* SWITCH_ON_EOP(0) is always preferable. */
 	bool wd_switch_on_eop = false;
 	bool ia_switch_on_eop = false;
+	bool ia_switch_on_eoi = false;
 	bool partial_vs_wave = false;
+	bool partial_es_wave = false;
 
 	if (sctx->gs_shader)
 		primgroup_size = 64; /* recommended with a GS */
 
+	if (sctx->tes_shader) {
+		unsigned num_cp_out =
+			sctx->tcs_shader ?
+			sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+			info->vertices_per_patch;
+		unsigned max_size = 256 / MAX2(info->vertices_per_patch, num_cp_out);
+
+		primgroup_size = MIN2(primgroup_size, max_size);
+
+		/* primgroup_size must be set to a multiple of NUM_PATCHES */
+		primgroup_size = (primgroup_size / num_patches) * num_patches;
+
+		/* SWITCH_ON_EOI must be set if PrimID is used.
+		 * If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
+		if ((sctx->tcs_shader && sctx->tcs_shader->info.uses_primid) ||
+		    sctx->tes_shader->info.uses_primid) {
+			ia_switch_on_eoi = true;
+			partial_es_wave = true;
+		}
+
+		/* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
+		if ((sctx->b.family == CHIP_TAHITI ||
+		     sctx->b.family == CHIP_PITCAIRN ||
+		     sctx->b.family == CHIP_BONAIRE) &&
+		    sctx->gs_shader)
+			partial_vs_wave = true;
+	}
+
 	/* This is a hardware requirement. */
 	if ((rs && rs->line_stipple_enable) ||
 	    (sctx->b.screen->debug_flags & DBG_SWITCH_ON_EOP)) {
@@ -147,10 +300,44 @@
 		assert(wd_switch_on_eop || !ia_switch_on_eop);
 	}
 
+	/* Hw bug with single-primitive instances and SWITCH_ON_EOI
+	 * on multi-SE chips. */
+	if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi &&
+	    (info->indirect ||
+	     (info->instance_count > 1 &&
+	      u_prims_for_vertices(info->mode, info->count) <= 1)))
+		sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
+
+	/* Instancing bug on 2 SE chips. */
+	if (sctx->b.screen->info.max_se == 2 && ia_switch_on_eoi &&
+	    (info->indirect || info->instance_count > 1))
+		partial_vs_wave = true;
+
 	return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
+		S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
 		S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
+		S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
 		S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
-		S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0);
+		S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) |
+		S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0);
+}
+
+static unsigned si_get_ls_hs_config(struct si_context *sctx,
+				    const struct pipe_draw_info *info,
+				    unsigned num_patches)
+{
+	unsigned num_output_cp;
+
+	if (!sctx->tes_shader)
+		return 0;
+
+	num_output_cp = sctx->tcs_shader ?
+		sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+		info->vertices_per_patch;
+
+	return S_028B58_NUM_PATCHES(num_patches) |
+		S_028B58_HS_NUM_INPUT_CP(info->vertices_per_patch) |
+		S_028B58_HS_NUM_OUTPUT_CP(num_output_cp);
 }
 
 static void si_emit_scratch_reloc(struct si_context *sctx)
@@ -206,22 +393,31 @@
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 	unsigned prim = si_conv_pipe_prim(info->mode);
 	unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
-	unsigned ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info);
+	unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0;
+
+	if (sctx->tes_shader)
+		si_emit_derived_tess_state(sctx, info, &num_patches);
+
+	ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches);
+	ls_hs_config = si_get_ls_hs_config(sctx, info, num_patches);
 
 	/* Draw state. */
 	if (prim != sctx->last_prim ||
-	    ia_multi_vgt_param != sctx->last_multi_vgt_param) {
+	    ia_multi_vgt_param != sctx->last_multi_vgt_param ||
+	    ls_hs_config != sctx->last_ls_hs_config) {
 		if (sctx->b.chip_class >= CIK) {
 			radeon_emit(cs, PKT3(PKT3_DRAW_PREAMBLE, 2, 0));
 			radeon_emit(cs, prim); /* VGT_PRIMITIVE_TYPE */
 			radeon_emit(cs, ia_multi_vgt_param); /* IA_MULTI_VGT_PARAM */
-			radeon_emit(cs, 0); /* VGT_LS_HS_CONFIG */
+			radeon_emit(cs, ls_hs_config); /* VGT_LS_HS_CONFIG */
 		} else {
 			r600_write_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
 			r600_write_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
+			r600_write_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
 		}
 		sctx->last_prim = prim;
 		sctx->last_multi_vgt_param = ia_multi_vgt_param;
+		sctx->last_ls_hs_config = ls_hs_config;
 	}
 
 	if (gs_out_prim != sctx->last_gs_out_prim) {
@@ -278,12 +474,24 @@
 	if (info->indexed) {
 		radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
 
-		if (ib->index_size == 4) {
-			radeon_emit(cs, V_028A7C_VGT_INDEX_32 | (SI_BIG_ENDIAN ?
-					V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
-		} else {
-			radeon_emit(cs, V_028A7C_VGT_INDEX_16 | (SI_BIG_ENDIAN ?
-					V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
+		/* index type */
+		switch (ib->index_size) {
+		case 1:
+			radeon_emit(cs, V_028A7C_VGT_INDEX_8);
+			break;
+		case 2:
+			radeon_emit(cs, V_028A7C_VGT_INDEX_16 |
+				    (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+					     V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
+			break;
+		case 4:
+			radeon_emit(cs, V_028A7C_VGT_INDEX_32 |
+				    (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+					     V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
+			break;
+		default:
+			assert(!"unreachable");
+			return;
 		}
 	}
 
@@ -409,9 +617,14 @@
 
 	if (sctx->flags & SI_CONTEXT_INV_TC_L1)
 		cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
-	if (sctx->flags & SI_CONTEXT_INV_TC_L2)
+	if (sctx->flags & SI_CONTEXT_INV_TC_L2) {
 		cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
 
+		/* TODO: this might not be needed. */
+		if (sctx->chip_class >= VI)
+			cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
+	}
+
 	if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
 		cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
 				 S_0085F0_CB0_DEST_BASE_ENA(1) |
@@ -523,8 +736,14 @@
 	    (info->indexed || !info->count_from_stream_output))
 		return;
 
-	if (!sctx->ps_shader || !sctx->vs_shader)
+	if (!sctx->ps_shader || !sctx->vs_shader) {
+		assert(0);
+		return;
+	}
+	if (!!sctx->tes_shader != (info->mode == PIPE_PRIM_PATCHES)) {
+		assert(0);
 		return;
+	}
 
 	si_decompress_textures(sctx);
 
@@ -535,6 +754,9 @@
 	 * current_rast_prim for this draw_vbo call. */
 	if (sctx->gs_shader)
 		sctx->current_rast_prim = sctx->gs_shader->gs_output_prim;
+	else if (sctx->tes_shader)
+		sctx->current_rast_prim =
+			sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
 	else
 		sctx->current_rast_prim = info->mode;
 
@@ -550,7 +772,8 @@
 		ib.offset = sctx->index_buffer.offset;
 
 		/* Translate or upload, if needed. */
-		if (ib.index_size == 1) {
+		/* 8-bit indices are supported on VI. */
+		if (sctx->b.chip_class <= CIK && ib.index_size == 1) {
 			struct pipe_resource *out_buffer = NULL;
 			unsigned out_offset, start, count, start_offset;
 			void *ptr;
@@ -585,6 +808,8 @@
 		}
 	}
 
+	/* TODO: VI should read index buffers through TC, so this shouldn't be
+	 * needed on VI. */
 	if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) {
 		sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
 		r600_resource(ib.buffer)->TC_L2_dirty = false;
@@ -592,7 +817,7 @@
 
 	/* Check flush flags. */
 	if (sctx->b.flags)
-		sctx->atoms.s.cache_flush->dirty = true;
+		si_mark_atom_dirty(sctx, sctx->atoms.s.cache_flush);
 
 	si_need_cs_space(sctx, 0, TRUE);
 
@@ -618,7 +843,7 @@
 
 	/* Workaround for a VGT hang when streamout is enabled.
 	 * It must be done after drawing. */
-	if (sctx->b.family == CHIP_HAWAII &&
+	if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA) &&
 	    (sctx->b.streamout.streamout_enabled ||
 	     sctx->b.streamout.prims_gen_query_enabled)) {
 		sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_state.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_state.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_state.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_state.h	2015-09-16 14:36:09.000000000 +0000
@@ -30,7 +30,7 @@
 #include "si_pm4.h"
 #include "radeon/r600_pipe_common.h"
 
-#define SI_NUM_SHADERS (PIPE_SHADER_GEOMETRY+1)
+#define SI_NUM_SHADERS (PIPE_SHADER_TESS_EVAL+1)
 
 struct si_screen;
 struct si_shader;
@@ -39,6 +39,7 @@
 	struct si_pm4_state	pm4;
 	uint32_t		cb_target_mask;
 	bool			alpha_to_one;
+	bool			dual_src_blend;
 };
 
 struct si_state_sample_mask {
@@ -94,18 +95,21 @@
 		struct si_pm4_state		*blend_color;
 		struct si_pm4_state		*clip;
 		struct si_state_sample_mask	*sample_mask;
-		struct si_state_scissor		*scissor;
-		struct si_state_viewport	*viewport;
+		struct si_state_scissor		*scissor[16];
+		struct si_state_viewport	*viewport[16];
 		struct si_state_rasterizer	*rasterizer;
 		struct si_state_dsa		*dsa;
 		struct si_pm4_state		*fb_rs;
 		struct si_pm4_state		*fb_blend;
 		struct si_pm4_state		*dsa_stencil_ref;
 		struct si_pm4_state		*ta_bordercolor_base;
+		struct si_pm4_state		*ls;
+		struct si_pm4_state		*hs;
 		struct si_pm4_state		*es;
 		struct si_pm4_state		*gs;
 		struct si_pm4_state		*gs_rings;
-		struct si_pm4_state		*gs_onoff;
+		struct si_pm4_state		*tf_ring;
+		struct si_pm4_state		*vgt_shader_config;
 		struct si_pm4_state		*vs;
 		struct si_pm4_state		*ps;
 		struct si_pm4_state		*spi;
@@ -142,9 +146,13 @@
  * Ring buffers:        0..1
  * Streamout buffers:   2..5
  */
-#define SI_RING_ESGS		0
-#define SI_RING_GSVS		1
-#define SI_NUM_RING_BUFFERS	2
+#define SI_RING_TESS_FACTOR	0 /* for HS (TCS)  */
+#define SI_RING_ESGS		0 /* for ES, GS */
+#define SI_RING_GSVS		1 /* for GS, VS */
+#define SI_RING_GSVS_1		2 /* 1, 2, 3 for GS */
+#define SI_RING_GSVS_2		3
+#define SI_RING_GSVS_3		4
+#define SI_NUM_RING_BUFFERS	5
 #define SI_SO_BUF_OFFSET	SI_NUM_RING_BUFFERS
 #define SI_NUM_RW_BUFFERS	(SI_SO_BUF_OFFSET + 4)
 
@@ -232,14 +240,11 @@
 			struct pipe_resource *buffer,
 			unsigned stride, unsigned num_records,
 			bool add_tid, bool swizzle,
-			unsigned element_size, unsigned index_stride);
+			unsigned element_size, unsigned index_stride, uint64_t offset);
 void si_init_all_descriptors(struct si_context *sctx);
 bool si_upload_shader_descriptors(struct si_context *sctx);
 void si_release_all_descriptors(struct si_context *sctx);
 void si_all_descriptors_begin_new_cs(struct si_context *sctx);
-void si_copy_buffer(struct si_context *sctx,
-		    struct pipe_resource *dst, struct pipe_resource *src,
-		    uint64_t dst_offset, uint64_t src_offset, unsigned size, bool is_framebuffer);
 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
 			    const uint8_t *ptr, unsigned size, uint32_t *const_offset);
 void si_shader_change_notify(struct si_context *sctx);
@@ -247,19 +252,26 @@
 /* si_state.c */
 struct si_shader_selector;
 
+void si_update_fb_blend_state(struct si_context *sctx);
 boolean si_is_format_supported(struct pipe_screen *screen,
                                enum pipe_format format,
                                enum pipe_texture_target target,
                                unsigned sample_count,
                                unsigned usage);
 void si_init_state_functions(struct si_context *sctx);
-void si_init_config(struct si_context *sctx);
 unsigned cik_bank_wh(unsigned bankwh);
 unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode);
 unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect);
 unsigned cik_tile_split(unsigned tile_split);
+unsigned si_array_mode(unsigned mode);
 uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex);
 unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil);
+struct pipe_sampler_view *
+si_create_sampler_view_custom(struct pipe_context *ctx,
+			      struct pipe_resource *texture,
+			      const struct pipe_sampler_view *state,
+			      unsigned width0, unsigned height0,
+			      unsigned force_level);
 
 /* si_state_shader.c */
 void si_update_shaders(struct si_context *sctx);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_state_shaders.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_state_shaders.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/radeonsi/si_state_shaders.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/radeonsi/si_state_shaders.c	2015-09-16 14:36:09.000000000 +0000
@@ -30,9 +30,135 @@
 #include "sid.h"
 
 #include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_ureg.h"
 #include "util/u_memory.h"
 #include "util/u_simple_shaders.h"
 
+static void si_set_tesseval_regs(struct si_shader *shader,
+				 struct si_pm4_state *pm4)
+{
+	struct tgsi_shader_info *info = &shader->selector->info;
+	unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
+	unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
+	bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
+	bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE];
+	unsigned type, partitioning, topology;
+
+	switch (tes_prim_mode) {
+	case PIPE_PRIM_LINES:
+		type = V_028B6C_TESS_ISOLINE;
+		break;
+	case PIPE_PRIM_TRIANGLES:
+		type = V_028B6C_TESS_TRIANGLE;
+		break;
+	case PIPE_PRIM_QUADS:
+		type = V_028B6C_TESS_QUAD;
+		break;
+	default:
+		assert(0);
+		return;
+	}
+
+	switch (tes_spacing) {
+	case PIPE_TESS_SPACING_FRACTIONAL_ODD:
+		partitioning = V_028B6C_PART_FRAC_ODD;
+		break;
+	case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
+		partitioning = V_028B6C_PART_FRAC_EVEN;
+		break;
+	case PIPE_TESS_SPACING_EQUAL:
+		partitioning = V_028B6C_PART_INTEGER;
+		break;
+	default:
+		assert(0);
+		return;
+	}
+
+	if (tes_point_mode)
+		topology = V_028B6C_OUTPUT_POINT;
+	else if (tes_prim_mode == PIPE_PRIM_LINES)
+		topology = V_028B6C_OUTPUT_LINE;
+	else if (tes_vertex_order_cw)
+		/* for some reason, this must be the other way around */
+		topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
+	else
+		topology = V_028B6C_OUTPUT_TRIANGLE_CW;
+
+	si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM,
+		       S_028B6C_TYPE(type) |
+		       S_028B6C_PARTITIONING(partitioning) |
+		       S_028B6C_TOPOLOGY(topology));
+}
+
+static void si_shader_ls(struct si_shader *shader)
+{
+	struct si_pm4_state *pm4;
+	unsigned num_sgprs, num_user_sgprs;
+	unsigned vgpr_comp_cnt;
+	uint64_t va;
+
+	pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+	if (pm4 == NULL)
+		return;
+
+	va = shader->bo->gpu_address;
+	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+
+	/* We need at least 2 components for LS.
+	 * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
+	vgpr_comp_cnt = shader->uses_instanceid ? 3 : 1;
+
+	num_user_sgprs = SI_LS_NUM_USER_SGPR;
+	num_sgprs = shader->num_sgprs;
+	if (num_user_sgprs > num_sgprs) {
+		/* Last 2 reserved SGPRs are used for VCC */
+		num_sgprs = num_user_sgprs + 2;
+	}
+	assert(num_sgprs <= 104);
+
+	si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
+	si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
+
+	shader->ls_rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
+			   S_00B528_SGPRS((num_sgprs - 1) / 8) |
+		           S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt);
+	shader->ls_rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
+			   S_00B52C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0);
+}
+
+static void si_shader_hs(struct si_shader *shader)
+{
+	struct si_pm4_state *pm4;
+	unsigned num_sgprs, num_user_sgprs;
+	uint64_t va;
+
+	pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+	if (pm4 == NULL)
+		return;
+
+	va = shader->bo->gpu_address;
+	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+
+	num_user_sgprs = SI_TCS_NUM_USER_SGPR;
+	num_sgprs = shader->num_sgprs;
+	/* One SGPR after user SGPRs is pre-loaded with tessellation factor
+	 * buffer offset. */
+	if ((num_user_sgprs + 1) > num_sgprs) {
+		/* Last 2 reserved SGPRs are used for VCC */
+		num_sgprs = num_user_sgprs + 1 + 2;
+	}
+	assert(num_sgprs <= 104);
+
+	si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
+	si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
+	si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
+		       S_00B428_VGPRS((shader->num_vgprs - 1) / 4) |
+		       S_00B428_SGPRS((num_sgprs - 1) / 8));
+	si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
+		       S_00B42C_USER_SGPR(num_user_sgprs) |
+		       S_00B42C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+}
+
 static void si_shader_es(struct si_shader *shader)
 {
 	struct si_pm4_state *pm4;
@@ -48,9 +174,15 @@
 	va = shader->bo->gpu_address;
 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
 
-	vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+	if (shader->selector->type == PIPE_SHADER_VERTEX) {
+		vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+		num_user_sgprs = SI_VS_NUM_USER_SGPR;
+	} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
+		vgpr_comp_cnt = 3; /* all components are needed for TES */
+		num_user_sgprs = SI_TES_NUM_USER_SGPR;
+	} else
+		assert(0);
 
-	num_user_sgprs = SI_VS_NUM_USER_SGPR;
 	num_sgprs = shader->num_sgprs;
 	/* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
 	if ((num_user_sgprs + 1) > num_sgprs) {
@@ -69,17 +201,37 @@
 	si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
 		       S_00B32C_USER_SGPR(num_user_sgprs) |
 		       S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+
+	if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
+		si_set_tesseval_regs(shader, pm4);
+}
+
+static unsigned si_gs_get_max_stream(struct si_shader *shader)
+{
+	struct pipe_stream_output_info *so = &shader->selector->so;
+	unsigned max_stream = 0, i;
+
+	if (so->num_outputs == 0)
+		return 0;
+
+	for (i = 0; i < so->num_outputs; i++) {
+		if (so->output[i].stream > max_stream)
+			max_stream = so->output[i].stream;
+	}
+	return max_stream;
 }
 
 static void si_shader_gs(struct si_shader *shader)
 {
-	unsigned gs_vert_itemsize = shader->selector->info.num_outputs * (16 >> 2);
+	unsigned gs_vert_itemsize = shader->selector->info.num_outputs * 16;
 	unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
-	unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+	unsigned gsvs_itemsize = (gs_vert_itemsize * gs_max_vert_out) >> 2;
+	unsigned gs_num_invocations = shader->selector->gs_num_invocations;
 	unsigned cut_mode;
 	struct si_pm4_state *pm4;
 	unsigned num_sgprs, num_user_sgprs;
 	uint64_t va;
+	unsigned max_stream = si_gs_get_max_stream(shader);
 
 	/* The GSVS_RING_ITEMSIZE register takes 15 bits */
 	assert(gsvs_itemsize < (1 << 15));
@@ -107,16 +259,23 @@
 		       S_028A40_GS_WRITE_OPTIMIZE(1));
 
 	si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, gsvs_itemsize);
-	si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize);
-	si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize);
+	si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize * ((max_stream >= 2) ? 2 : 1));
+	si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize * ((max_stream >= 3) ? 3 : 1));
 
 	si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-		       util_bitcount64(shader->selector->gs_used_inputs) * (16 >> 2));
-	si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
+		       util_bitcount64(shader->selector->inputs_read) * (16 >> 2));
+	si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * (max_stream + 1));
 
 	si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
 
-	si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize);
+	si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize >> 2);
+	si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, (max_stream >= 1) ? gs_vert_itemsize >> 2 : 0);
+	si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, (max_stream >= 2) ? gs_vert_itemsize >> 2 : 0);
+	si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, (max_stream >= 3) ? gs_vert_itemsize >> 2 : 0);
+
+	si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT,
+		       S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
+		       S_028B90_ENABLE(gs_num_invocations > 0));
 
 	va = shader->bo->gpu_address;
 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
@@ -143,19 +302,29 @@
 
 static void si_shader_vs(struct si_shader *shader)
 {
-	struct tgsi_shader_info *info = &shader->selector->info;
 	struct si_pm4_state *pm4;
 	unsigned num_sgprs, num_user_sgprs;
-	unsigned nparams, i, vgpr_comp_cnt;
+	unsigned nparams, vgpr_comp_cnt;
 	uint64_t va;
 	unsigned window_space =
 	   shader->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+	bool enable_prim_id = si_vs_exports_prim_id(shader);
 
 	pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
 
 	if (pm4 == NULL)
 		return;
 
+	/* If this is the GS copy shader, the GS state writes this register.
+	 * Otherwise, the VS state writes it.
+	 */
+	if (!shader->is_gs_copy_shader) {
+		si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
+			       S_028A40_MODE(enable_prim_id ? V_028A40_GS_SCENARIO_A : 0));
+		si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, enable_prim_id);
+	} else
+		si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0);
+
 	va = shader->bo->gpu_address;
 	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
 
@@ -163,8 +332,11 @@
 		vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
 		num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR;
 	} else if (shader->selector->type == PIPE_SHADER_VERTEX) {
-		vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+		vgpr_comp_cnt = shader->uses_instanceid ? 3 : (enable_prim_id ? 2 : 0);
 		num_user_sgprs = SI_VS_NUM_USER_SGPR;
+	} else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
+		vgpr_comp_cnt = 3; /* all components are needed for TES */
+		num_user_sgprs = SI_TES_NUM_USER_SGPR;
 	} else
 		assert(0);
 
@@ -175,23 +347,8 @@
 	}
 	assert(num_sgprs <= 104);
 
-	/* Certain attributes (position, psize, etc.) don't count as params.
-	 * VS is required to export at least one param and r600_shader_from_tgsi()
-	 * takes care of adding a dummy export.
-	 */
-	for (nparams = 0, i = 0 ; i < info->num_outputs; i++) {
-		switch (info->output_semantic_name[i]) {
-		case TGSI_SEMANTIC_CLIPVERTEX:
-		case TGSI_SEMANTIC_POSITION:
-		case TGSI_SEMANTIC_PSIZE:
-			break;
-		default:
-			nparams++;
-		}
-	}
-	if (nparams < 1)
-		nparams = 1;
-
+	/* VS is required to export at least one param. */
+	nparams = MAX2(shader->nr_param_exports, 1);
 	si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
 		       S_0286C4_VS_EXPORT_COUNT(nparams - 1));
 
@@ -231,6 +388,9 @@
 			       S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
 			       S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
 			       S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
+
+	if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
+		si_set_tesseval_regs(shader, pm4);
 }
 
 static void si_shader_ps(struct si_shader *shader)
@@ -328,7 +488,18 @@
 
 	switch (shader->selector->type) {
 	case PIPE_SHADER_VERTEX:
-		if (shader->key.vs.as_es)
+		if (shader->key.vs.as_ls)
+			si_shader_ls(shader);
+		else if (shader->key.vs.as_es)
+			si_shader_es(shader);
+		else
+			si_shader_vs(shader);
+		break;
+	case PIPE_SHADER_TESS_CTRL:
+		si_shader_hs(shader);
+		break;
+	case PIPE_SHADER_TESS_EVAL:
+		if (shader->key.tes.as_es)
 			si_shader_es(shader);
 		else
 			si_shader_vs(shader);
@@ -346,26 +517,47 @@
 }
 
 /* Compute the key for the hw shader variant */
-static INLINE void si_shader_selector_key(struct pipe_context *ctx,
+static inline void si_shader_selector_key(struct pipe_context *ctx,
 					  struct si_shader_selector *sel,
 					  union si_shader_key *key)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	memset(key, 0, sizeof(*key));
+	unsigned i;
 
-	if (sel->type == PIPE_SHADER_VERTEX) {
-		unsigned i;
-		if (!sctx->vertex_elements)
-			return;
-
-		for (i = 0; i < sctx->vertex_elements->count; ++i)
-			key->vs.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor;
+	memset(key, 0, sizeof(*key));
 
-		if (sctx->gs_shader) {
+	switch (sel->type) {
+	case PIPE_SHADER_VERTEX:
+		if (sctx->vertex_elements)
+			for (i = 0; i < sctx->vertex_elements->count; ++i)
+				key->vs.instance_divisors[i] =
+					sctx->vertex_elements->elements[i].instance_divisor;
+
+		if (sctx->tes_shader)
+			key->vs.as_ls = 1;
+		else if (sctx->gs_shader) {
 			key->vs.as_es = 1;
-			key->vs.gs_used_inputs = sctx->gs_shader->gs_used_inputs;
+			key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read;
 		}
-	} else if (sel->type == PIPE_SHADER_FRAGMENT) {
+
+		if (!sctx->gs_shader && sctx->ps_shader &&
+		    sctx->ps_shader->info.uses_primid)
+			key->vs.export_prim_id = 1;
+		break;
+	case PIPE_SHADER_TESS_CTRL:
+		key->tcs.prim_mode =
+			sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
+		break;
+	case PIPE_SHADER_TESS_EVAL:
+		if (sctx->gs_shader) {
+			key->tes.as_es = 1;
+			key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read;
+		} else if (sctx->ps_shader && sctx->ps_shader->info.uses_primid)
+			key->tes.export_prim_id = 1;
+		break;
+	case PIPE_SHADER_GEOMETRY:
+		break;
+	case PIPE_SHADER_FRAGMENT: {
 		struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 
 		if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
@@ -393,11 +585,14 @@
 		}
 
 		key->ps.alpha_func = PIPE_FUNC_ALWAYS;
-
 		/* Alpha-test should be disabled if colorbuffer 0 is integer. */
 		if (sctx->queued.named.dsa &&
 		    !sctx->framebuffer.cb0_is_integer)
 			key->ps.alpha_func = sctx->queued.named.dsa->alpha_func;
+		break;
+	}
+	default:
+		assert(0);
 	}
 }
 
@@ -456,6 +651,7 @@
 		}
 		si_shader_init_pm4_state(shader);
 		sel->num_shaders++;
+		p_atomic_inc(&sctx->screen->b.num_compilations);
 	}
 
 	return 0;
@@ -473,6 +669,7 @@
 	sel->tokens = tgsi_dup_tokens(state->tokens);
 	sel->so = state->stream_output;
 	tgsi_scan_shader(state->tokens, &sel->info);
+	p_atomic_inc(&sscreen->b.num_shaders_created);
 
 	switch (pipe_shader_type) {
 	case PIPE_SHADER_GEOMETRY:
@@ -480,6 +677,8 @@
 			sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
 		sel->gs_max_out_vertices =
 			sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
+		sel->gs_num_invocations =
+			sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
 
 		for (i = 0; i < sel->info.num_inputs; i++) {
 			unsigned name = sel->info.input_semantic_name[i];
@@ -489,10 +688,40 @@
 			case TGSI_SEMANTIC_PRIMID:
 				break;
 			default:
-				sel->gs_used_inputs |=
+				sel->inputs_read |=
 					1llu << si_shader_io_get_unique_index(name, index);
 			}
 		}
+		break;
+
+	case PIPE_SHADER_VERTEX:
+	case PIPE_SHADER_TESS_CTRL:
+		for (i = 0; i < sel->info.num_outputs; i++) {
+			unsigned name = sel->info.output_semantic_name[i];
+			unsigned index = sel->info.output_semantic_index[i];
+
+			switch (name) {
+			case TGSI_SEMANTIC_TESSINNER:
+			case TGSI_SEMANTIC_TESSOUTER:
+			case TGSI_SEMANTIC_PATCH:
+				sel->patch_outputs_written |=
+					1llu << si_shader_io_get_unique_index(name, index);
+				break;
+			default:
+				sel->outputs_written |=
+					1llu << si_shader_io_get_unique_index(name, index);
+			}
+		}
+		break;
+	case PIPE_SHADER_FRAGMENT:
+		for (i = 0; i < sel->info.num_outputs; i++) {
+			unsigned name = sel->info.output_semantic_name[i];
+			unsigned index = sel->info.output_semantic_index[i];
+
+			if (name == TGSI_SEMANTIC_COLOR)
+				sel->ps_colors_written |= 1 << index;
+		}
+		break;
 	}
 
 	if (sscreen->b.debug_flags & DBG_PRECOMPILE)
@@ -519,6 +748,18 @@
 	return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
 }
 
+static void *si_create_tcs_state(struct pipe_context *ctx,
+				 const struct pipe_shader_state *state)
+{
+	return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_CTRL);
+}
+
+static void *si_create_tes_state(struct pipe_context *ctx,
+				 const struct pipe_shader_state *state)
+{
+	return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_EVAL);
+}
+
 static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
@@ -528,7 +769,7 @@
 		return;
 
 	sctx->vs_shader = sel;
-	sctx->clip_regs.dirty = true;
+	si_mark_atom_dirty(sctx, &sctx->clip_regs);
 }
 
 static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
@@ -541,13 +782,47 @@
 		return;
 
 	sctx->gs_shader = sel;
-	sctx->clip_regs.dirty = true;
+	si_mark_atom_dirty(sctx, &sctx->clip_regs);
 	sctx->last_rast_prim = -1; /* reset this so that it gets updated */
 
 	if (enable_changed)
 		si_shader_change_notify(sctx);
 }
 
+static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
+{
+	struct si_context *sctx = (struct si_context *)ctx;
+	struct si_shader_selector *sel = state;
+	bool enable_changed = !!sctx->tcs_shader != !!sel;
+
+	if (sctx->tcs_shader == sel)
+		return;
+
+	sctx->tcs_shader = sel;
+
+	if (enable_changed)
+		sctx->last_tcs = NULL; /* invalidate derived tess state */
+}
+
+static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
+{
+	struct si_context *sctx = (struct si_context *)ctx;
+	struct si_shader_selector *sel = state;
+	bool enable_changed = !!sctx->tes_shader != !!sel;
+
+	if (sctx->tes_shader == sel)
+		return;
+
+	sctx->tes_shader = sel;
+	si_mark_atom_dirty(sctx, &sctx->clip_regs);
+	sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+
+	if (enable_changed) {
+		si_shader_change_notify(sctx);
+		sctx->last_tes_sh_base = -1; /* invalidate derived tess state */
+	}
+}
+
 static void si_make_dummy_ps(struct si_context *sctx)
 {
 	if (!sctx->dummy_pixel_shader) {
@@ -574,6 +849,7 @@
 	}
 
 	sctx->ps_shader = sel;
+	si_update_fb_blend_state(sctx);
 }
 
 static void si_delete_shader_selector(struct pipe_context *ctx,
@@ -584,15 +860,33 @@
 
 	while (p) {
 		c = p->next_variant;
-		if (sel->type == PIPE_SHADER_GEOMETRY) {
+		switch (sel->type) {
+		case PIPE_SHADER_VERTEX:
+			if (p->key.vs.as_ls)
+				si_pm4_delete_state(sctx, ls, p->pm4);
+			else if (p->key.vs.as_es)
+				si_pm4_delete_state(sctx, es, p->pm4);
+			else
+				si_pm4_delete_state(sctx, vs, p->pm4);
+			break;
+		case PIPE_SHADER_TESS_CTRL:
+			si_pm4_delete_state(sctx, hs, p->pm4);
+			break;
+		case PIPE_SHADER_TESS_EVAL:
+			if (p->key.tes.as_es)
+				si_pm4_delete_state(sctx, es, p->pm4);
+			else
+				si_pm4_delete_state(sctx, vs, p->pm4);
+			break;
+		case PIPE_SHADER_GEOMETRY:
 			si_pm4_delete_state(sctx, gs, p->pm4);
 			si_pm4_delete_state(sctx, vs, p->gs_copy_shader->pm4);
-		} else if (sel->type == PIPE_SHADER_FRAGMENT)
+			break;
+		case PIPE_SHADER_FRAGMENT:
 			si_pm4_delete_state(sctx, ps, p->pm4);
-		else if (p->key.vs.as_es)
-			si_pm4_delete_state(sctx, es, p->pm4);
-		else
-			si_pm4_delete_state(sctx, vs, p->pm4);
+			break;
+		}
+
 		si_shader_destroy(ctx, p);
 		free(p);
 		p = c;
@@ -638,6 +932,30 @@
 	si_delete_shader_selector(ctx, sel);
 }
 
+static void si_delete_tcs_shader(struct pipe_context *ctx, void *state)
+{
+	struct si_context *sctx = (struct si_context *)ctx;
+	struct si_shader_selector *sel = (struct si_shader_selector *)state;
+
+	if (sctx->tcs_shader == sel) {
+		sctx->tcs_shader = NULL;
+	}
+
+	si_delete_shader_selector(ctx, sel);
+}
+
+static void si_delete_tes_shader(struct pipe_context *ctx, void *state)
+{
+	struct si_context *sctx = (struct si_context *)ctx;
+	struct si_shader_selector *sel = (struct si_shader_selector *)state;
+
+	if (sctx->tes_shader == sel) {
+		sctx->tes_shader = NULL;
+	}
+
+	si_delete_shader_selector(ctx, sel);
+}
+
 static void si_update_spi_map(struct si_context *sctx)
 {
 	struct si_shader *ps = sctx->ps_shader->current;
@@ -665,8 +983,9 @@
 		    (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
 			tmp |= S_028644_FLAT_SHADE(1);
 
-		if (name == TGSI_SEMANTIC_GENERIC &&
-		    sctx->sprite_coord_enable & (1 << index)) {
+		if (name == TGSI_SEMANTIC_PCOORD ||
+		    (name == TGSI_SEMANTIC_TEXCOORD &&
+		     sctx->sprite_coord_enable & (1 << index))) {
 			tmp |= S_028644_PT_SPRITE_TEX(1);
 		}
 
@@ -678,7 +997,10 @@
 			}
 		}
 
-		if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) {
+		if (name == TGSI_SEMANTIC_PRIMID)
+			/* PrimID is written after the last output. */
+			tmp |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]);
+		else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) {
 			/* No corresponding output found, load defaults into input.
 			 * Don't set any other bits.
 			 * (FLAT_SHADE=1 completely changes behavior) */
@@ -704,7 +1026,7 @@
 static void si_init_gs_rings(struct si_context *sctx)
 {
 	unsigned esgs_ring_size = 128 * 1024;
-	unsigned gsvs_ring_size = 64 * 1024 * 1024;
+	unsigned gsvs_ring_size = 60 * 1024 * 1024;
 
 	assert(!sctx->gs_rings);
 	sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);
@@ -716,6 +1038,12 @@
 					     PIPE_USAGE_DEFAULT, gsvs_ring_size);
 
 	if (sctx->b.chip_class >= CIK) {
+		if (sctx->b.chip_class >= VI) {
+			/* The maximum sizes are 63.999 MB on VI, because
+			 * the register fields only have 18 bits. */
+			assert(esgs_ring_size / 256 < (1 << 18));
+			assert(gsvs_ring_size / 256 < (1 << 18));
+		}
 		si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE,
 			       esgs_ring_size / 256);
 		si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE,
@@ -729,15 +1057,42 @@
 
 	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
 			   sctx->esgs_ring, 0, esgs_ring_size,
-			   true, true, 4, 64);
+			   true, true, 4, 64, 0);
 	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_ESGS,
 			   sctx->esgs_ring, 0, esgs_ring_size,
-			   false, false, 0, 0);
+			   false, false, 0, 0, 0);
 	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_GSVS,
 			   sctx->gsvs_ring, 0, gsvs_ring_size,
-			   false, false, 0, 0);
+			   false, false, 0, 0, 0);
 }
 
+static void si_update_gs_rings(struct si_context *sctx)
+{
+	unsigned gs_vert_itemsize = sctx->gs_shader->info.num_outputs * 16;
+	unsigned gs_max_vert_out = sctx->gs_shader->gs_max_out_vertices;
+	unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+	uint64_t offset;
+
+	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
+			   sctx->gsvs_ring, gsvs_itemsize,
+			   64, true, true, 4, 16, 0);
+
+	offset = gsvs_itemsize * 64;
+	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_1,
+			   sctx->gsvs_ring, gsvs_itemsize,
+			   64, true, true, 4, 16, offset);
+
+	offset = (gsvs_itemsize * 2) * 64;
+	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_2,
+			   sctx->gsvs_ring, gsvs_itemsize,
+			   64, true, true, 4, 16, offset);
+
+	offset = (gsvs_itemsize * 3) * 64;
+	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_3,
+			   sctx->gsvs_ring, gsvs_itemsize,
+			   64, true, true, 4, 16, offset);
+
+}
 /**
  * @returns 1 if \p sel has been updated to use a new scratch buffer and 0
  *          otherwise.
@@ -796,10 +1151,14 @@
 
 static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
 {
+	unsigned bytes = 0;
 
-	return MAX3(si_get_scratch_buffer_bytes_per_wave(sctx, sctx->ps_shader),
-			si_get_scratch_buffer_bytes_per_wave(sctx, sctx->gs_shader),
-			si_get_scratch_buffer_bytes_per_wave(sctx, sctx->vs_shader));
+	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->ps_shader));
+	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->gs_shader));
+	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->vs_shader));
+	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->tcs_shader));
+	bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->tes_shader));
+	return bytes;
 }
 
 static void si_update_spi_tmpring_size(struct si_context *sctx)
@@ -833,15 +1192,29 @@
 			si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
 		if (si_update_scratch_buffer(sctx, sctx->gs_shader))
 			si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
+		if (si_update_scratch_buffer(sctx, sctx->tcs_shader))
+			si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
 
-		/* VS can be bound as ES or VS. */
-		if (sctx->gs_shader) {
+		/* VS can be bound as LS, ES, or VS. */
+		if (sctx->tes_shader) {
+			if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+				si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
+		} else if (sctx->gs_shader) {
 			if (si_update_scratch_buffer(sctx, sctx->vs_shader))
 				si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
 		} else {
 			if (si_update_scratch_buffer(sctx, sctx->vs_shader))
 				si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
 		}
+
+		/* TES can be bound as ES or VS. */
+		if (sctx->gs_shader) {
+			if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+				si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
+		} else {
+			if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+				si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
+		}
 	}
 
 	/* The LLVM shader backend should be reporting aligned scratch_sizes. */
@@ -852,60 +1225,187 @@
 				S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
 }
 
+static void si_init_tess_factor_ring(struct si_context *sctx)
+{
+	assert(!sctx->tf_state);
+	sctx->tf_state = CALLOC_STRUCT(si_pm4_state);
+
+	sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
+					   PIPE_USAGE_DEFAULT,
+					   32768 * sctx->screen->b.info.max_se);
+	sctx->b.clear_buffer(&sctx->b.b, sctx->tf_ring, 0,
+			     sctx->tf_ring->width0, fui(0), false);
+	assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
+
+	if (sctx->b.chip_class >= CIK) {
+		si_pm4_set_reg(sctx->tf_state, R_030938_VGT_TF_RING_SIZE,
+			       S_030938_SIZE(sctx->tf_ring->width0 / 4));
+		si_pm4_set_reg(sctx->tf_state, R_030940_VGT_TF_MEMORY_BASE,
+			       r600_resource(sctx->tf_ring)->gpu_address >> 8);
+	} else {
+		si_pm4_set_reg(sctx->tf_state, R_008988_VGT_TF_RING_SIZE,
+			       S_008988_SIZE(sctx->tf_ring->width0 / 4));
+		si_pm4_set_reg(sctx->tf_state, R_0089B8_VGT_TF_MEMORY_BASE,
+			       r600_resource(sctx->tf_ring)->gpu_address >> 8);
+	}
+	si_pm4_add_bo(sctx->tf_state, r600_resource(sctx->tf_ring),
+		      RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+	si_pm4_bind_state(sctx, tf_ring, sctx->tf_state);
+
+	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_TESS_CTRL,
+			   SI_RING_TESS_FACTOR, sctx->tf_ring, 0,
+			   sctx->tf_ring->width0, false, false, 0, 0, 0);
+
+	sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
+}
+
+/**
+ * This is used when TCS is NULL in the VS->TCS->TES chain. In this case,
+ * VS passes its outputs to TES directly, so the fixed-function shader only
+ * has to write TESSOUTER and TESSINNER.
+ */
+static void si_generate_fixed_func_tcs(struct si_context *sctx)
+{
+	struct ureg_src const0, const1;
+	struct ureg_dst tessouter, tessinner;
+	struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_TESS_CTRL);
+
+	if (!ureg)
+		return; /* if we get here, we're screwed */
+
+	assert(!sctx->fixed_func_tcs_shader);
+
+	ureg_DECL_constant2D(ureg, 0, 1, SI_DRIVER_STATE_CONST_BUF);
+	const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0),
+				    SI_DRIVER_STATE_CONST_BUF);
+	const1 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 1),
+				    SI_DRIVER_STATE_CONST_BUF);
+
+	tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0);
+	tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0);
+
+	ureg_MOV(ureg, tessouter, const0);
+	ureg_MOV(ureg, tessinner, const1);
+	ureg_END(ureg);
+
+	sctx->fixed_func_tcs_shader =
+		ureg_create_shader_and_destroy(ureg, &sctx->b.b);
+	assert(sctx->fixed_func_tcs_shader);
+}
+
+static void si_update_vgt_shader_config(struct si_context *sctx)
+{
+	/* Calculate the index of the config.
+	 * 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */
+	unsigned index = 2*!!sctx->tes_shader + !!sctx->gs_shader;
+	struct si_pm4_state **pm4 = &sctx->vgt_shader_config[index];
+
+	if (!*pm4) {
+		uint32_t stages = 0;
+
+		*pm4 = CALLOC_STRUCT(si_pm4_state);
+
+		if (sctx->tes_shader) {
+			stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
+				  S_028B54_HS_EN(1);
+
+			if (sctx->gs_shader)
+				stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
+					  S_028B54_GS_EN(1) |
+				          S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
+			else
+				stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
+		} else if (sctx->gs_shader) {
+			stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
+				  S_028B54_GS_EN(1) |
+			          S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
+		}
+
+		si_pm4_set_reg(*pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);
+	}
+	si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
+}
+
+static void si_update_so(struct si_context *sctx, struct si_shader_selector *shader)
+{
+	struct pipe_stream_output_info *so = &shader->so;
+	uint32_t enabled_stream_buffers_mask = 0;
+	int i;
+
+	for (i = 0; i < so->num_outputs; i++)
+		enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << (so->output[i].stream * 4);
+	sctx->b.streamout.enabled_stream_buffers_mask = enabled_stream_buffers_mask;
+	sctx->b.streamout.stride_in_dw = shader->so.stride;
+}
+
 void si_update_shaders(struct si_context *sctx)
 {
 	struct pipe_context *ctx = (struct pipe_context*)sctx;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 
-	if (sctx->gs_shader) {
-		si_shader_select(ctx, sctx->gs_shader);
-		si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
-		si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
+	/* Update stages before GS. */
+	if (sctx->tes_shader) {
+		if (!sctx->tf_state)
+			si_init_tess_factor_ring(sctx);
+
+		/* VS as LS */
+		si_shader_select(ctx, sctx->vs_shader);
+		si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
 
-		sctx->b.streamout.stride_in_dw = sctx->gs_shader->so.stride;
+		if (sctx->tcs_shader) {
+			si_shader_select(ctx, sctx->tcs_shader);
+			si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
+		} else {
+			if (!sctx->fixed_func_tcs_shader)
+				si_generate_fixed_func_tcs(sctx);
+			si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+			si_pm4_bind_state(sctx, hs,
+					  sctx->fixed_func_tcs_shader->current->pm4);
+		}
 
+		si_shader_select(ctx, sctx->tes_shader);
+		if (sctx->gs_shader) {
+			/* TES as ES */
+			si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
+		} else {
+			/* TES as VS */
+			si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
+			si_update_so(sctx, sctx->tes_shader);
+		}
+	} else if (sctx->gs_shader) {
+		/* VS as ES */
 		si_shader_select(ctx, sctx->vs_shader);
 		si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
+	} else {
+		/* VS as VS */
+		si_shader_select(ctx, sctx->vs_shader);
+		si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
+		si_update_so(sctx, sctx->vs_shader);
+	}
+
+	/* Update GS. */
+	if (sctx->gs_shader) {
+		si_shader_select(ctx, sctx->gs_shader);
+		si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
+		si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
+		si_update_so(sctx, sctx->gs_shader);
 
 		if (!sctx->gs_rings)
 			si_init_gs_rings(sctx);
+
 		if (sctx->emitted.named.gs_rings != sctx->gs_rings)
 			sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
 		si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings);
 
-		si_set_ring_buffer(ctx, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
-				   sctx->gsvs_ring,
-				   sctx->gs_shader->gs_max_out_vertices *
-				   sctx->gs_shader->info.num_outputs * 16,
-				   64, true, true, 4, 16);
-
-		if (!sctx->gs_on) {
-			sctx->gs_on = CALLOC_STRUCT(si_pm4_state);
-
-			si_pm4_set_reg(sctx->gs_on, R_028B54_VGT_SHADER_STAGES_EN,
-				       S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
-				       S_028B54_GS_EN(1) |
-				       S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER));
-		}
-		si_pm4_bind_state(sctx, gs_onoff, sctx->gs_on);
+		si_update_gs_rings(sctx);
 	} else {
-		si_shader_select(ctx, sctx->vs_shader);
-		si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
-
-		sctx->b.streamout.stride_in_dw = sctx->vs_shader->so.stride;
-
-		if (!sctx->gs_off) {
-			sctx->gs_off = CALLOC_STRUCT(si_pm4_state);
-
-			si_pm4_set_reg(sctx->gs_off, R_028A40_VGT_GS_MODE, 0);
-			si_pm4_set_reg(sctx->gs_off, R_028B54_VGT_SHADER_STAGES_EN, 0);
-		}
-		si_pm4_bind_state(sctx, gs_onoff, sctx->gs_off);
 		si_pm4_bind_state(sctx, gs_rings, NULL);
 		si_pm4_bind_state(sctx, gs, NULL);
 		si_pm4_bind_state(sctx, es, NULL);
 	}
 
+	si_update_vgt_shader_config(sctx);
+
 	si_shader_select(ctx, sctx->ps_shader);
 
 	if (!sctx->ps_shader->current) {
@@ -935,29 +1435,35 @@
 
 	if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
 		sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control;
-		sctx->db_render_state.dirty = true;
+		si_mark_atom_dirty(sctx, &sctx->db_render_state);
 	}
 
 	if (sctx->smoothing_enabled != sctx->ps_shader->current->key.ps.poly_line_smoothing) {
 		sctx->smoothing_enabled = sctx->ps_shader->current->key.ps.poly_line_smoothing;
-		sctx->msaa_config.dirty = true;
+		si_mark_atom_dirty(sctx, &sctx->msaa_config);
 
 		if (sctx->b.chip_class == SI)
-			sctx->db_render_state.dirty = true;
+			si_mark_atom_dirty(sctx, &sctx->db_render_state);
 	}
 }
 
 void si_init_shader_functions(struct si_context *sctx)
 {
 	sctx->b.b.create_vs_state = si_create_vs_state;
+	sctx->b.b.create_tcs_state = si_create_tcs_state;
+	sctx->b.b.create_tes_state = si_create_tes_state;
 	sctx->b.b.create_gs_state = si_create_gs_state;
 	sctx->b.b.create_fs_state = si_create_fs_state;
 
 	sctx->b.b.bind_vs_state = si_bind_vs_shader;
+	sctx->b.b.bind_tcs_state = si_bind_tcs_shader;
+	sctx->b.b.bind_tes_state = si_bind_tes_shader;
 	sctx->b.b.bind_gs_state = si_bind_gs_shader;
 	sctx->b.b.bind_fs_state = si_bind_ps_shader;
 
 	sctx->b.b.delete_vs_state = si_delete_vs_shader;
+	sctx->b.b.delete_tcs_state = si_delete_tcs_shader;
+	sctx->b.b.delete_tes_state = si_delete_tes_shader;
 	sctx->b.b.delete_gs_state = si_delete_gs_shader;
 	sctx->b.b.delete_fs_state = si_delete_ps_shader;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/rbug/rbug_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/rbug/rbug_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/rbug/rbug_context.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/rbug/rbug_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -79,7 +79,7 @@
    struct rbug_list shaders;
 };
 
-static INLINE struct rbug_context *
+static inline struct rbug_context *
 rbug_context(struct pipe_context *pipe)
 {
    return (struct rbug_context *)pipe;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/rbug/rbug_objects.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/rbug/rbug_objects.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/rbug/rbug_objects.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/rbug/rbug_objects.h	2015-09-16 14:36:09.000000000 +0000
@@ -93,7 +93,7 @@
 };
 
 
-static INLINE struct rbug_resource *
+static inline struct rbug_resource *
 rbug_resource(struct pipe_resource *_resource)
 {
    if (!_resource)
@@ -102,7 +102,7 @@
    return (struct rbug_resource *)_resource;
 }
 
-static INLINE struct rbug_sampler_view *
+static inline struct rbug_sampler_view *
 rbug_sampler_view(struct pipe_sampler_view *_sampler_view)
 {
    if (!_sampler_view)
@@ -111,7 +111,7 @@
    return (struct rbug_sampler_view *)_sampler_view;
 }
 
-static INLINE struct rbug_surface *
+static inline struct rbug_surface *
 rbug_surface(struct pipe_surface *_surface)
 {
    if (!_surface)
@@ -120,7 +120,7 @@
    return (struct rbug_surface *)_surface;
 }
 
-static INLINE struct rbug_transfer *
+static inline struct rbug_transfer *
 rbug_transfer(struct pipe_transfer *_transfer)
 {
    if (!_transfer)
@@ -129,7 +129,7 @@
    return (struct rbug_transfer *)_transfer;
 }
 
-static INLINE struct rbug_shader *
+static inline struct rbug_shader *
 rbug_shader(void *_state)
 {
    if (!_state)
@@ -137,7 +137,7 @@
    return (struct rbug_shader *)_state;
 }
 
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
 rbug_resource_unwrap(struct pipe_resource *_resource)
 {
    if (!_resource)
@@ -145,7 +145,7 @@
    return rbug_resource(_resource)->resource;
 }
 
-static INLINE struct pipe_sampler_view *
+static inline struct pipe_sampler_view *
 rbug_sampler_view_unwrap(struct pipe_sampler_view *_sampler_view)
 {
    if (!_sampler_view)
@@ -153,7 +153,7 @@
    return rbug_sampler_view(_sampler_view)->sampler_view;
 }
 
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
 rbug_surface_unwrap(struct pipe_surface *_surface)
 {
    if (!_surface)
@@ -161,7 +161,7 @@
    return rbug_surface(_surface)->surface;
 }
 
-static INLINE struct pipe_transfer *
+static inline struct pipe_transfer *
 rbug_transfer_unwrap(struct pipe_transfer *_transfer)
 {
    if (!_transfer)
@@ -169,7 +169,7 @@
    return rbug_transfer(_transfer)->transfer;
 }
 
-static INLINE void *
+static inline void *
 rbug_shader_unwrap(void *_state)
 {
    struct rbug_shader *shader;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/rbug/rbug_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/rbug/rbug_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/rbug/rbug_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/rbug/rbug_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -226,17 +226,6 @@
 }
 
 static boolean
-rbug_screen_fence_signalled(struct pipe_screen *_screen,
-                            struct pipe_fence_handle *fence)
-{
-   struct rbug_screen *rb_screen = rbug_screen(_screen);
-   struct pipe_screen *screen = rb_screen->screen;
-
-   return screen->fence_signalled(screen,
-                                  fence);
-}
-
-static boolean
 rbug_screen_fence_finish(struct pipe_screen *_screen,
                          struct pipe_fence_handle *fence,
                          uint64_t timeout)
@@ -288,7 +277,6 @@
    rb_screen->base.resource_destroy = rbug_screen_resource_destroy;
    rb_screen->base.flush_frontbuffer = rbug_screen_flush_frontbuffer;
    rb_screen->base.fence_reference = rbug_screen_fence_reference;
-   rb_screen->base.fence_signalled = rbug_screen_fence_signalled;
    rb_screen->base.fence_finish = rbug_screen_fence_finish;
 
    rb_screen->screen = screen;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/rbug/rbug_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/rbug/rbug_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/rbug/rbug_screen.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/rbug/rbug_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -60,7 +60,7 @@
    struct rbug_list transfers;
 };
 
-static INLINE struct rbug_screen *
+static inline struct rbug_screen *
 rbug_screen(struct pipe_screen *screen)
 {
    return (struct rbug_screen *)screen;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_context.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -203,7 +203,7 @@
 };
 
 
-static INLINE struct softpipe_context *
+static inline struct softpipe_context *
 softpipe_context( struct pipe_context *pipe )
 {
    return (struct softpipe_context *)pipe;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_fence.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_fence.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_fence.c	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_fence.c	2015-09-16 14:36:09.000000000 +0000
@@ -41,15 +41,6 @@
 
 
 static boolean
-softpipe_fence_signalled(struct pipe_screen *screen,
-                         struct pipe_fence_handle *fence)
-{
-   assert(fence);
-   return TRUE;
-}
-
-
-static boolean
 softpipe_fence_finish(struct pipe_screen *screen,
                       struct pipe_fence_handle *fence,
                       uint64_t timeout)
@@ -64,5 +55,4 @@
 {
    screen->fence_reference = softpipe_fence_reference;
    screen->fence_finish = softpipe_fence_finish;
-   screen->fence_signalled = softpipe_fence_signalled;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_fs_exec.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_fs_exec.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_fs_exec.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_fs_exec.c	2015-09-16 14:36:09.000000000 +0000
@@ -52,7 +52,7 @@
 
 
 /** cast wrapper */
-static INLINE struct sp_exec_fragment_shader *
+static inline struct sp_exec_fragment_shader *
 sp_exec_fragment_shader(const struct sp_fragment_shader_variant *var)
 {
    return (struct sp_exec_fragment_shader *) var;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_prim_vbuf.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_prim_vbuf.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_prim_vbuf.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_prim_vbuf.c	2015-09-16 14:36:09.000000000 +0000
@@ -145,7 +145,7 @@
 }
 
 
-static INLINE cptrf4 get_vert( const void *vertex_buffer,
+static inline cptrf4 get_vert( const void *vertex_buffer,
                                int index,
                                int stride )
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_quad_blend.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_quad_blend.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_quad_blend.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_quad_blend.c	2015-09-16 14:36:09.000000000 +0000
@@ -63,7 +63,7 @@
 
 
 /** cast wrapper */
-static INLINE struct blend_quad_stage *
+static inline struct blend_quad_stage *
 blend_quad_stage(struct quad_stage *stage)
 {
    return (struct blend_quad_stage *) stage;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_quad_fs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_quad_fs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_quad_fs.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_quad_fs.c	2015-09-16 14:36:09.000000000 +0000
@@ -56,7 +56,7 @@
 
 
 /** cast wrapper */
-static INLINE struct quad_shade_stage *
+static inline struct quad_shade_stage *
 quad_shade_stage(struct quad_stage *qs)
 {
    return (struct quad_shade_stage *) qs;
@@ -67,7 +67,7 @@
  * Execute fragment shader for the four fragments in the quad.
  * \return TRUE if quad is alive, FALSE if all four pixels are killed
  */
-static INLINE boolean
+static inline boolean
 shade_quad(struct quad_stage *qs, struct quad_header *quad)
 {
    struct softpipe_context *softpipe = qs->softpipe;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_query.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_query.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_query.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_query.c	2015-09-16 14:36:09.000000000 +0000
@@ -277,7 +277,7 @@
    b = pipe->get_query_result(pipe, sp->render_cond_query, wait,
                               (void*)&result);
    if (b)
-      return (!result == sp->render_cond_cond);
+      return (!result) == sp->render_cond_cond;
    else
       return TRUE;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -191,7 +191,9 @@
    case PIPE_CAP_ENDIANNESS:
       return PIPE_ENDIAN_NATIVE;
    case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+      return 4;
    case PIPE_CAP_TEXTURE_GATHER_SM5:
+      return 1;
    case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
    case PIPE_CAP_TEXTURE_QUERY_LOD:
    case PIPE_CAP_SAMPLE_SHADING:
@@ -206,8 +208,9 @@
    case PIPE_CAP_FAKE_SW_MSAA:
       return 1;
    case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+      return -32;
    case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
-      return 0;
+      return 31;
    case PIPE_CAP_DRAW_INDIRECT:
       return 1;
 
@@ -231,6 +234,8 @@
    case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
       return 1;
    case PIPE_CAP_CLIP_HALFZ:
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
       return 1;
    case PIPE_CAP_VERTEXID_NOBASE:
       return 0;
@@ -239,6 +244,8 @@
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+   case PIPE_CAP_DEPTH_BOUNDS_TEST:
       return 0;
    }
    /* should only get here on unhandled cases */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_screen.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -49,7 +49,7 @@
    boolean use_llvm;
 };
 
-static INLINE struct softpipe_screen *
+static inline struct softpipe_screen *
 softpipe_screen( struct pipe_screen *pipe )
 {
    return (struct softpipe_screen *)pipe;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_setup.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_setup.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_setup.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_setup.c	2015-09-16 14:36:09.000000000 +0000
@@ -125,7 +125,7 @@
 /**
  * Clip setup->quad against the scissor/surface bounds.
  */
-static INLINE void
+static inline void
 quad_clip(struct setup_context *setup, struct quad_header *quad)
 {
    const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
@@ -156,7 +156,7 @@
 /**
  * Emit a quad (pass to next stage) with clipping.
  */
-static INLINE void
+static inline void
 clip_emit_quad(struct setup_context *setup, struct quad_header *quad)
 {
    quad_clip( setup, quad );
@@ -178,14 +178,14 @@
  * Given an X or Y coordinate, return the block/quad coordinate that it
  * belongs to.
  */
-static INLINE int
+static inline int
 block(int x)
 {
    return x & ~(2-1);
 }
 
 
-static INLINE int
+static inline int
 block_x(int x)
 {
    return x & ~(16-1);
@@ -1039,7 +1039,7 @@
 /**
  * Plot a pixel in a line segment.
  */
-static INLINE void
+static inline void
 plot(struct setup_context *setup, int x, int y)
 {
    const int iy = y & 1;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_tex_sample.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_tex_sample.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_tex_sample.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_tex_sample.c	2015-09-16 14:36:09.000000000 +0000
@@ -58,7 +58,7 @@
  * of improperly weighted linear-filtered textures.
  * The tests/texwrap.c demo is a good test.
  */
-static INLINE float
+static inline float
 frac(float f)
 {
    return f - floorf(f);
@@ -69,7 +69,7 @@
 /**
  * Linear interpolation macro
  */
-static INLINE float
+static inline float
 lerp(float a, float v0, float v1)
 {
    return v0 + a * (v1 - v0);
@@ -84,7 +84,7 @@
  * optimization!  If we find that's not true on some systems, convert
  * to a macro.
  */
-static INLINE float
+static inline float
 lerp_2d(float a, float b,
         float v00, float v10, float v01, float v11)
 {
@@ -97,7 +97,7 @@
 /**
  * As above, but 3D interpolation of 8 values.
  */
-static INLINE float
+static inline float
 lerp_3d(float a, float b, float c,
         float v000, float v100, float v010, float v110,
         float v001, float v101, float v011, float v111)
@@ -115,7 +115,7 @@
  * value.  To avoid that problem we add a large multiple of the size
  * (rather than using a conditional).
  */
-static INLINE int
+static inline int
 repeat(int coord, unsigned size)
 {
    return (coord + size * 1024) % size;
@@ -131,68 +131,80 @@
  * \param icoord  returns the integer texcoords
  */
 static void
-wrap_nearest_repeat(float s, unsigned size, int *icoord)
+wrap_nearest_repeat(float s, unsigned size, int offset, int *icoord)
 {
    /* s limited to [0,1) */
    /* i limited to [0,size-1] */
    int i = util_ifloor(s * size);
-   *icoord = repeat(i, size);
+   *icoord = repeat(i + offset, size);
 }
 
 
 static void
-wrap_nearest_clamp(float s, unsigned size, int *icoord)
+wrap_nearest_clamp(float s, unsigned size, int offset, int *icoord)
 {
    /* s limited to [0,1] */
    /* i limited to [0,size-1] */
+   s *= size;
+   s += offset;
    if (s <= 0.0F)
       *icoord = 0;
-   else if (s >= 1.0F)
+   else if (s >= size)
       *icoord = size - 1;
    else
-      *icoord = util_ifloor(s * size);
+      *icoord = util_ifloor(s);
 }
 
 
 static void
-wrap_nearest_clamp_to_edge(float s, unsigned size, int *icoord)
+wrap_nearest_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
 {
    /* s limited to [min,max] */
    /* i limited to [0, size-1] */
-   const float min = 1.0F / (2.0F * size);
-   const float max = 1.0F - min;
+   const float min = 0.5F;
+   const float max = (float)size - 0.5F;
+
+   s *= size;
+   s += offset;
+
    if (s < min)
       *icoord = 0;
    else if (s > max)
       *icoord = size - 1;
    else
-      *icoord = util_ifloor(s * size);
+      *icoord = util_ifloor(s);
 }
 
 
 static void
-wrap_nearest_clamp_to_border(float s, unsigned size, int *icoord)
+wrap_nearest_clamp_to_border(float s, unsigned size, int offset, int *icoord)
 {
    /* s limited to [min,max] */
    /* i limited to [-1, size] */
-   const float min = -1.0F / (2.0F * size);
-   const float max = 1.0F - min;
+   const float min = -0.5F;
+   const float max = size + 0.5F;
+
+   s *= size;
+   s += offset;
    if (s <= min)
       *icoord = -1;
    else if (s >= max)
       *icoord = size;
    else
-      *icoord = util_ifloor(s * size);
+      *icoord = util_ifloor(s);
 }
 
-
 static void
-wrap_nearest_mirror_repeat(float s, unsigned size, int *icoord)
+wrap_nearest_mirror_repeat(float s, unsigned size, int offset, int *icoord)
 {
    const float min = 1.0F / (2.0F * size);
    const float max = 1.0F - min;
-   const int flr = util_ifloor(s);
-   float u = frac(s);
+   int flr;
+   float u;
+
+   s += (float)offset / size;
+   flr = util_ifloor(s);
+   u = frac(s);
    if (flr & 1)
       u = 1.0F - u;
    if (u < min)
@@ -205,51 +217,52 @@
 
 
 static void
-wrap_nearest_mirror_clamp(float s, unsigned size, int *icoord)
+wrap_nearest_mirror_clamp(float s, unsigned size, int offset, int *icoord)
 {
    /* s limited to [0,1] */
    /* i limited to [0,size-1] */
-   const float u = fabsf(s);
+   const float u = fabsf(s * size + offset);
    if (u <= 0.0F)
       *icoord = 0;
-   else if (u >= 1.0F)
+   else if (u >= size)
       *icoord = size - 1;
    else
-      *icoord = util_ifloor(u * size);
+      *icoord = util_ifloor(u);
 }
 
 
 static void
-wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int *icoord)
+wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
 {
    /* s limited to [min,max] */
    /* i limited to [0, size-1] */
-   const float min = 1.0F / (2.0F * size);
-   const float max = 1.0F - min;
-   const float u = fabsf(s);
+   const float min = 0.5F;
+   const float max = (float)size - 0.5F;
+   const float u = fabsf(s * size + offset);
+
    if (u < min)
       *icoord = 0;
    else if (u > max)
       *icoord = size - 1;
    else
-      *icoord = util_ifloor(u * size);
+      *icoord = util_ifloor(u);
 }
 
 
 static void
-wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int *icoord)
+wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int offset, int *icoord)
 {
-   /* s limited to [min,max] */
-   /* i limited to [0, size-1] */
-   const float min = -1.0F / (2.0F * size);
-   const float max = 1.0F - min;
-   const float u = fabsf(s);
+   /* u limited to [-0.5, size-0.5] */
+   const float min = -0.5F;
+   const float max = (float)size + 0.5F;
+   const float u = fabsf(s * size + offset);
+
    if (u < min)
       *icoord = -1;
    else if (u > max)
       *icoord = size;
    else
-      *icoord = util_ifloor(u * size);
+      *icoord = util_ifloor(u);
 }
 
 
@@ -264,22 +277,23 @@
  * \param icoord  returns the computed integer texture coord
  */
 static void
-wrap_linear_repeat(float s, unsigned size,
+wrap_linear_repeat(float s, unsigned size, int offset,
                    int *icoord0, int *icoord1, float *w)
 {
    float u = s * size - 0.5F;
-   *icoord0 = repeat(util_ifloor(u), size);
+   *icoord0 = repeat(util_ifloor(u) + offset, size);
    *icoord1 = repeat(*icoord0 + 1, size);
    *w = frac(u);
 }
 
 
 static void
-wrap_linear_clamp(float s, unsigned size,
+wrap_linear_clamp(float s, unsigned size, int offset,
                   int *icoord0, int *icoord1, float *w)
 {
-   float u = CLAMP(s, 0.0F, 1.0F);
-   u = u * size - 0.5f;
+   float u = CLAMP(s * size + offset, 0.0F, (float)size);
+
+   u = u - 0.5f;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
    *w = frac(u);
@@ -287,11 +301,11 @@
 
 
 static void
-wrap_linear_clamp_to_edge(float s, unsigned size,
+wrap_linear_clamp_to_edge(float s, unsigned size, int offset,
                           int *icoord0, int *icoord1, float *w)
 {
-   float u = CLAMP(s, 0.0F, 1.0F);
-   u = u * size - 0.5f;
+   float u = CLAMP(s * size + offset, 0.0F, (float)size);
+   u = u - 0.5f;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
    if (*icoord0 < 0)
@@ -303,13 +317,13 @@
 
 
 static void
-wrap_linear_clamp_to_border(float s, unsigned size,
+wrap_linear_clamp_to_border(float s, unsigned size, int offset,
                             int *icoord0, int *icoord1, float *w)
 {
-   const float min = -1.0F / (2.0F * size);
-   const float max = 1.0F - min;
-   float u = CLAMP(s, min, max);
-   u = u * size - 0.5f;
+   const float min = -0.5F;
+   const float max = (float)size + 0.5F;
+   float u = CLAMP(s * size + offset, min, max);
+   u = u - 0.5f;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
    *w = frac(u);
@@ -317,11 +331,15 @@
 
 
 static void
-wrap_linear_mirror_repeat(float s, unsigned size,
+wrap_linear_mirror_repeat(float s, unsigned size, int offset,
                           int *icoord0, int *icoord1, float *w)
 {
-   const int flr = util_ifloor(s);
-   float u = frac(s);
+   int flr;
+   float u;
+
+   s += (float)offset / size;
+   flr = util_ifloor(s);
+   u = frac(s);
    if (flr & 1)
       u = 1.0F - u;
    u = u * size - 0.5F;
@@ -336,14 +354,12 @@
 
 
 static void
-wrap_linear_mirror_clamp(float s, unsigned size,
+wrap_linear_mirror_clamp(float s, unsigned size, int offset,
                          int *icoord0, int *icoord1, float *w)
 {
-   float u = fabsf(s);
-   if (u >= 1.0F)
+   float u = fabsf(s * size + offset);
+   if (u >= size)
       u = (float) size;
-   else
-      u *= size;
    u -= 0.5F;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
@@ -352,14 +368,12 @@
 
 
 static void
-wrap_linear_mirror_clamp_to_edge(float s, unsigned size,
+wrap_linear_mirror_clamp_to_edge(float s, unsigned size, int offset,
                                  int *icoord0, int *icoord1, float *w)
 {
-   float u = fabsf(s);
-   if (u >= 1.0F)
+   float u = fabsf(s * size + offset);
+   if (u >= size)
       u = (float) size;
-   else
-      u *= size;
    u -= 0.5F;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
@@ -372,18 +386,16 @@
 
 
 static void
-wrap_linear_mirror_clamp_to_border(float s, unsigned size,
+wrap_linear_mirror_clamp_to_border(float s, unsigned size, int offset,
                                    int *icoord0, int *icoord1, float *w)
 {
-   const float min = -1.0F / (2.0F * size);
-   const float max = 1.0F - min;
-   float u = fabsf(s);
+   const float min = -0.5F;
+   const float max = size + 0.5F;
+   float u = fabsf(s * size + offset);
    if (u <= min)
-      u = min * size;
+      u = min;
    else if (u >= max)
-      u = max * size;
-   else
-      u *= size;
+      u = max;
    u -= 0.5F;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
@@ -395,10 +407,10 @@
  * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
  */
 static void
-wrap_nearest_unorm_clamp(float s, unsigned size, int *icoord)
+wrap_nearest_unorm_clamp(float s, unsigned size, int offset, int *icoord)
 {
    int i = util_ifloor(s);
-   *icoord = CLAMP(i, 0, (int) size-1);
+   *icoord = CLAMP(i + offset, 0, (int) size-1);
 }
 
 
@@ -406,9 +418,9 @@
  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
  */
 static void
-wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int *icoord)
+wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int offset, int *icoord)
 {
-   *icoord = util_ifloor( CLAMP(s, -0.5F, (float) size + 0.5F) );
+   *icoord = util_ifloor( CLAMP(s + offset, -0.5F, (float) size + 0.5F) );
 }
 
 
@@ -416,9 +428,9 @@
  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
  */
 static void
-wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int *icoord)
+wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
 {
-   *icoord = util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) );
+   *icoord = util_ifloor( CLAMP(s + offset, 0.5F, (float) size - 0.5F) );
 }
 
 
@@ -426,11 +438,11 @@
  * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
  */
 static void
-wrap_linear_unorm_clamp(float s, unsigned size,
+wrap_linear_unorm_clamp(float s, unsigned size, int offset,
                         int *icoord0, int *icoord1, float *w)
 {
    /* Not exactly what the spec says, but it matches NVIDIA output */
-   float u = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
+   float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
    *w = frac(u);
@@ -441,10 +453,10 @@
  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
  */
 static void
-wrap_linear_unorm_clamp_to_border(float s, unsigned size,
+wrap_linear_unorm_clamp_to_border(float s, unsigned size, int offset,
                                   int *icoord0, int *icoord1, float *w)
 {
-   float u = CLAMP(s, -0.5F, (float) size + 0.5F);
+   float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F);
    u -= 0.5F;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
@@ -458,10 +470,10 @@
  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
  */
 static void
-wrap_linear_unorm_clamp_to_edge(float s, unsigned size,
+wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
                                 int *icoord0, int *icoord1, float *w)
 {
-   float u = CLAMP(s, +0.5F, (float) size - 0.5F);
+   float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F);
    u -= 0.5F;
    *icoord0 = util_ifloor(u);
    *icoord1 = *icoord0 + 1;
@@ -474,7 +486,7 @@
 /**
  * Do coordinate to array index conversion.  For array textures.
  */
-static INLINE int
+static inline int
 coord_to_layer(float coord, unsigned first_layer, unsigned last_layer)
 {
    int c = util_ifloor(coord + 0.5F);
@@ -575,7 +587,7 @@
 
 
 
-static INLINE const float *
+static inline const float *
 get_texel_2d_no_border(const struct sp_sampler_view *sp_sview,
                        union tex_tile_address addr, int x, int y)
 {
@@ -591,7 +603,7 @@
 }
 
 
-static INLINE const float *
+static inline const float *
 get_texel_2d(const struct sp_sampler_view *sp_sview,
              const struct sp_sampler *sp_samp,
              union tex_tile_address addr, int x, int y)
@@ -683,7 +695,7 @@
      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y }
 };
 
-static INLINE unsigned
+static inline unsigned
 get_next_face(unsigned face, int idx)
 {
    return face_array[face][idx];
@@ -693,7 +705,7 @@
  * return a new xcoord based on old face, old coords, cube size
  * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
  */
-static INLINE int
+static inline int
 get_next_xcoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
 {
    if ((face == 0 && fall_off_index != 1) ||
@@ -731,7 +743,7 @@
  * return a new ycoord based on old face, old coords, cube size
  * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
  */
-static INLINE int
+static inline int
 get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
 {
    if ((fall_off_index <= 1) && (face <= 1 || face >= 4)) {
@@ -759,7 +771,7 @@
 
 /* Gather a quad of adjacent texels within a tile:
  */
-static INLINE void
+static inline void
 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_view *sp_sview,
                                         union tex_tile_address addr,
                                         unsigned x, unsigned y,
@@ -783,7 +795,7 @@
 
 /* Gather a quad of potentially non-adjacent texels:
  */
-static INLINE void
+static inline void
 get_texel_quad_2d_no_border(const struct sp_sampler_view *sp_sview,
                             union tex_tile_address addr,
                             int x0, int y0,
@@ -798,7 +810,7 @@
 
 /* Can involve a lot of unnecessary checks for border color:
  */
-static INLINE void
+static inline void
 get_texel_quad_2d(const struct sp_sampler_view *sp_sview,
                   const struct sp_sampler *sp_samp,
                   union tex_tile_address addr,
@@ -816,7 +828,7 @@
 
 /* 3d variants:
  */
-static INLINE const float *
+static inline const float *
 get_texel_3d_no_border(const struct sp_sampler_view *sp_sview,
                        union tex_tile_address addr, int x, int y, int z)
 {
@@ -834,7 +846,7 @@
 }
 
 
-static INLINE const float *
+static inline const float *
 get_texel_3d(const struct sp_sampler_view *sp_sview,
              const struct sp_sampler *sp_samp,
              union tex_tile_address addr, int x, int y, int z)
@@ -854,7 +866,7 @@
 
 
 /* Get texel pointer for 1D array texture */
-static INLINE const float *
+static inline const float *
 get_texel_1d_array(const struct sp_sampler_view *sp_sview,
                    const struct sp_sampler *sp_samp,
                    union tex_tile_address addr, int x, int y)
@@ -872,7 +884,7 @@
 
 
 /* Get texel pointer for 2D array texture */
-static INLINE const float *
+static inline const float *
 get_texel_2d_array(const struct sp_sampler_view *sp_sview,
                    const struct sp_sampler *sp_samp,
                    union tex_tile_address addr, int x, int y, int layer)
@@ -893,7 +905,7 @@
 }
 
 
-static INLINE const float *
+static inline const float *
 get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
                         union tex_tile_address addr, int x, int y,
                         float *corner, int layer, unsigned face)
@@ -948,7 +960,7 @@
 
 
 /* Get texel pointer for cube array texture */
-static INLINE const float *
+static inline const float *
 get_texel_cube_array(const struct sp_sampler_view *sp_sview,
                      const struct sp_sampler *sp_samp,
                      union tex_tile_address addr, int x, int y, int layer)
@@ -974,7 +986,7 @@
  * If level = 2, then we'll return 64 (the width at level=2).
  * Return 1 if level > base_pot.
  */
-static INLINE unsigned
+static inline unsigned
 pot_level_size(unsigned base_pot, unsigned level)
 {
    return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
@@ -1004,25 +1016,21 @@
 
 /* Some image-filter fastpaths:
  */
-static INLINE void
+static inline void
 img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
                                 struct sp_sampler *sp_samp,
-                                float s,
-                                float t,
-                                float p,
-                                unsigned level,
-                                unsigned face_id,
+                                const struct img_filter_args *args,
                                 float *rgba)
 {
-   unsigned xpot = pot_level_size(sp_sview->xpot, level);
-   unsigned ypot = pot_level_size(sp_sview->ypot, level);
+   unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+   unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
    int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
    int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
    union tex_tile_address addr;
    int c;
 
-   float u = s * xpot - 0.5F;
-   float v = t * ypot - 0.5F;
+   float u = (args->s * xpot - 0.5F) + args->offset[0];
+   float v = (args->t * ypot - 0.5F) + args->offset[1];
 
    int uflr = util_ifloor(u);
    int vflr = util_ifloor(v);
@@ -1036,7 +1044,7 @@
    const float *tx[4];
       
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
    /* Can we fetch all four at once:
     */
@@ -1062,24 +1070,20 @@
 }
 
 
-static INLINE void
+static inline void
 img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
                                  struct sp_sampler *sp_samp,
-                                 float s,
-                                 float t,
-                                 float p,
-                                 unsigned level,
-                                 unsigned face_id,
+                                 const struct img_filter_args *args,
                                  float rgba[TGSI_QUAD_SIZE])
 {
-   unsigned xpot = pot_level_size(sp_sview->xpot, level);
-   unsigned ypot = pot_level_size(sp_sview->ypot, level);
+   unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+   unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
    const float *out;
    union tex_tile_address addr;
    int c;
 
-   float u = s * xpot;
-   float v = t * ypot;
+   float u = args->s * xpot + args->offset[0];
+   float v = args->t * ypot + args->offset[1];
 
    int uflr = util_ifloor(u);
    int vflr = util_ifloor(v);
@@ -1088,7 +1092,7 @@
    int y0 = vflr & (ypot - 1);
 
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
    out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
    for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1100,29 +1104,25 @@
 }
 
 
-static INLINE void
+static inline void
 img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
                                 struct sp_sampler *sp_samp,
-                                float s,
-                                float t,
-                                float p,
-                                unsigned level,
-                                unsigned face_id,
+                                const struct img_filter_args *args,
                                 float rgba[TGSI_QUAD_SIZE])
 {
-   unsigned xpot = pot_level_size(sp_sview->xpot, level);
-   unsigned ypot = pot_level_size(sp_sview->ypot, level);
+   unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
+   unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
    union tex_tile_address addr;
    int c;
 
-   float u = s * xpot;
-   float v = t * ypot;
+   float u = args->s * xpot + args->offset[0];
+   float v = args->t * ypot + args->offset[1];
 
    int x0, y0;
    const float *out;
 
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
    x0 = util_ifloor(u);
    if (x0 < 0) 
@@ -1149,11 +1149,7 @@
 static void
 img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
                       struct sp_sampler *sp_samp,
-                      float s,
-                      float t,
-                      float p,
-                      unsigned level,
-                      unsigned face_id,
+                      const struct img_filter_args *args,
                       float rgba[TGSI_QUAD_SIZE])
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1163,14 +1159,14 @@
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, level);
+   width = u_minify(texture->width0, args->level);
 
    assert(width > 0);
 
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
-   sp_samp->nearest_texcoord_s(s, width, &x);
+   sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
 
    out = get_texel_2d(sp_sview, sp_samp, addr, x, 0);
    for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1185,11 +1181,7 @@
 static void
 img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
                             struct sp_sampler *sp_samp,
-                            float s,
-                            float t,
-                            float p,
-                            unsigned level,
-                            unsigned face_id,
+                            const struct img_filter_args *args,
                             float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1199,15 +1191,15 @@
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, level);
+   width = u_minify(texture->width0, args->level);
 
    assert(width > 0);
 
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
-   sp_samp->nearest_texcoord_s(s, width, &x);
-   layer = coord_to_layer(t, sp_sview->base.u.tex.first_layer,
+   sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
+   layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
                           sp_sview->base.u.tex.last_layer);
 
    out = get_texel_1d_array(sp_sview, sp_samp, addr, x, layer);
@@ -1223,11 +1215,7 @@
 static void
 img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
                       struct sp_sampler *sp_samp,
-                      float s,
-                      float t,
-                      float p,
-                      unsigned level,
-                      unsigned face_id,
+                      const struct img_filter_args *args,
                       float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1237,17 +1225,17 @@
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, level);
-   height = u_minify(texture->height0, level);
+   width = u_minify(texture->width0, args->level);
+   height = u_minify(texture->height0, args->level);
 
    assert(width > 0);
    assert(height > 0);
  
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
-   sp_samp->nearest_texcoord_s(s, width, &x);
-   sp_samp->nearest_texcoord_t(t, height, &y);
+   sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
+   sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
 
    out = get_texel_2d(sp_sview, sp_samp, addr, x, y);
    for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1262,11 +1250,7 @@
 static void
 img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
                             struct sp_sampler *sp_samp,
-                            float s,
-                            float t,
-                            float p,
-                            unsigned level,
-                            unsigned face_id,
+                            const struct img_filter_args *args,
                             float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1276,18 +1260,18 @@
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, level);
-   height = u_minify(texture->height0, level);
+   width = u_minify(texture->width0, args->level);
+   height = u_minify(texture->height0, args->level);
 
    assert(width > 0);
    assert(height > 0);
  
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
-   sp_samp->nearest_texcoord_s(s, width, &x);
-   sp_samp->nearest_texcoord_t(t, height, &y);
-   layer = coord_to_layer(p, sp_sview->base.u.tex.first_layer,
+   sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
+   sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
+   layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
                           sp_sview->base.u.tex.last_layer);
 
    out = get_texel_2d_array(sp_sview, sp_samp, addr, x, y, layer);
@@ -1303,11 +1287,7 @@
 static void
 img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
                         struct sp_sampler *sp_samp,
-                        float s,
-                        float t,
-                        float p,
-                        unsigned level,
-                        unsigned face_id,
+                        const struct img_filter_args *args,
                         float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1317,29 +1297,29 @@
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, level);
-   height = u_minify(texture->height0, level);
+   width = u_minify(texture->width0, args->level);
+   height = u_minify(texture->height0, args->level);
 
    assert(width > 0);
    assert(height > 0);
  
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
    /*
     * If NEAREST filtering is done within a miplevel, always apply wrap
     * mode CLAMP_TO_EDGE.
     */
    if (sp_samp->base.seamless_cube_map) {
-      wrap_nearest_clamp_to_edge(s, width, &x);
-      wrap_nearest_clamp_to_edge(t, height, &y);
+      wrap_nearest_clamp_to_edge(args->s, width, args->offset[0], &x);
+      wrap_nearest_clamp_to_edge(args->t, height, args->offset[1], &y);
    } else {
       /* Would probably make sense to ignore mode and just do edge clamp */
-      sp_samp->nearest_texcoord_s(s, width, &x);
-      sp_samp->nearest_texcoord_t(t, height, &y);
+      sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
+      sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
    }
 
-   layerface = face_id + sp_sview->base.u.tex.first_layer;
+   layerface = args->face_id + sp_sview->base.u.tex.first_layer;
    out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
    for (c = 0; c < TGSI_QUAD_SIZE; c++)
       rgba[TGSI_NUM_CHANNELS*c] = out[c];
@@ -1352,11 +1332,7 @@
 static void
 img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
                               struct sp_sampler *sp_samp,
-                              float s,
-                              float t,
-                              float p,
-                              unsigned level,
-                              unsigned face_id,
+                              const struct img_filter_args *args,
                               float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1366,20 +1342,20 @@
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, level);
-   height = u_minify(texture->height0, level);
+   width = u_minify(texture->width0, args->level);
+   height = u_minify(texture->height0, args->level);
 
    assert(width > 0);
    assert(height > 0);
  
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
-   sp_samp->nearest_texcoord_s(s, width, &x);
-   sp_samp->nearest_texcoord_t(t, height, &y);
-   layerface = coord_to_layer(6 * p + sp_sview->base.u.tex.first_layer,
+   sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
+   sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
+   layerface = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
                               sp_sview->base.u.tex.first_layer,
-                              sp_sview->base.u.tex.last_layer - 5) + face_id;
+                              sp_sview->base.u.tex.last_layer - 5) + args->face_id;
 
    out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
    for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1393,11 +1369,7 @@
 static void
 img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
                       struct sp_sampler *sp_samp,
-                      float s,
-                      float t,
-                      float p,
-                      unsigned level,
-                      unsigned face_id,
+                      const struct img_filter_args *args,
                       float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1407,20 +1379,20 @@
    const float *out;
    int c;
 
-   width = u_minify(texture->width0, level);
-   height = u_minify(texture->height0, level);
-   depth = u_minify(texture->depth0, level);
+   width = u_minify(texture->width0, args->level);
+   height = u_minify(texture->height0, args->level);
+   depth = u_minify(texture->depth0, args->level);
 
    assert(width > 0);
    assert(height > 0);
    assert(depth > 0);
 
-   sp_samp->nearest_texcoord_s(s, width,  &x);
-   sp_samp->nearest_texcoord_t(t, height, &y);
-   sp_samp->nearest_texcoord_p(p, depth,  &z);
+   sp_samp->nearest_texcoord_s(args->s, width,  args->offset[0], &x);
+   sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
+   sp_samp->nearest_texcoord_p(args->p, depth,  args->offset[2], &z);
 
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
    out = get_texel_3d(sp_sview, sp_samp, addr, x, y, z);
    for (c = 0; c < TGSI_QUAD_SIZE; c++)
@@ -1431,11 +1403,7 @@
 static void
 img_filter_1d_linear(struct sp_sampler_view *sp_sview,
                      struct sp_sampler *sp_samp,
-                     float s,
-                     float t,
-                     float p,
-                     unsigned level,
-                     unsigned face_id,
+                     const struct img_filter_args *args,
                      float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1446,14 +1414,14 @@
    const float *tx0, *tx1;
    int c;
 
-   width = u_minify(texture->width0, level);
+   width = u_minify(texture->width0, args->level);
 
    assert(width > 0);
 
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
-   sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
+   sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
 
    tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, 0);
    tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, 0);
@@ -1467,11 +1435,7 @@
 static void
 img_filter_1d_array_linear(struct sp_sampler_view *sp_sview,
                            struct sp_sampler *sp_samp,
-                           float s,
-                           float t,
-                           float p,
-                           unsigned level,
-                           unsigned face_id,
+                           const struct img_filter_args *args,
                            float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1482,15 +1446,15 @@
    const float *tx0, *tx1;
    int c;
 
-   width = u_minify(texture->width0, level);
+   width = u_minify(texture->width0, args->level);
 
    assert(width > 0);
 
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
-   sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
-   layer = coord_to_layer(t, sp_sview->base.u.tex.first_layer,
+   sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
+   layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
                           sp_sview->base.u.tex.last_layer);
 
    tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, layer);
@@ -1501,15 +1465,77 @@
       rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
 }
 
+/*
+ * Retrieve the gathered value, need to convert to the
+ * TGSI expected interface, and take component select
+ * and swizzling into account.
+ */
+static float
+get_gather_value(const struct sp_sampler_view *sp_sview,
+                 int chan_in, int comp_sel,
+                 const float *tx[4])
+{
+   int chan;
+   unsigned swizzle;
+
+   /*
+    * softpipe samples in a different order
+    * to TGSI expects, so we need to swizzle,
+    * the samples into the correct slots.
+    */
+   switch (chan_in) {
+   case 0:
+      chan = 2;
+      break;
+   case 1:
+      chan = 3;
+      break;
+   case 2:
+      chan = 1;
+      break;
+   case 3:
+      chan = 0;
+      break;
+   default:
+      assert(0);
+      return 0.0;
+   }
+
+   /* pick which component to use for the swizzle */
+   switch (comp_sel) {
+   case 0:
+      swizzle = sp_sview->base.swizzle_r;
+      break;
+   case 1:
+      swizzle = sp_sview->base.swizzle_g;
+      break;
+   case 2:
+      swizzle = sp_sview->base.swizzle_b;
+      break;
+   case 3:
+      swizzle = sp_sview->base.swizzle_a;
+      break;
+   default:
+      assert(0);
+      return 0.0;
+   }
+
+   /* get correct result using the channel and swizzle */
+   switch (swizzle) {
+   case PIPE_SWIZZLE_ZERO:
+      return 0.0;
+   case PIPE_SWIZZLE_ONE:
+      return 1.0;
+   default:
+      return tx[chan][swizzle];
+   }
+}
+
 
 static void
 img_filter_2d_linear(struct sp_sampler_view *sp_sview,
                      struct sp_sampler *sp_samp,
-                     float s,
-                     float t,
-                     float p,
-                     unsigned level,
-                     unsigned face_id,
+                     const struct img_filter_args *args,
                      float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1517,42 +1543,45 @@
    int x0, y0, x1, y1;
    float xw, yw; /* weights */
    union tex_tile_address addr;
-   const float *tx0, *tx1, *tx2, *tx3;
+   const float *tx[4];
    int c;
 
-   width = u_minify(texture->width0, level);
-   height = u_minify(texture->height0, level);
+   width = u_minify(texture->width0, args->level);
+   height = u_minify(texture->height0, args->level);
 
    assert(width > 0);
    assert(height > 0);
 
    addr.value = 0;
-   addr.bits.level = level;
-
-   sp_samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
-   sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
+   addr.bits.level = args->level;
 
-   tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, y0);
-   tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, y0);
-   tx2 = get_texel_2d(sp_sview, sp_samp, addr, x0, y1);
-   tx3 = get_texel_2d(sp_sview, sp_samp, addr, x1, y1);
+   sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
+   sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
 
-   /* interpolate R, G, B, A */
-   for (c = 0; c < TGSI_QUAD_SIZE; c++)
-      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
-                                          tx0[c], tx1[c],
-                                          tx2[c], tx3[c]);
+   tx[0] = get_texel_2d(sp_sview, sp_samp, addr, x0, y0);
+   tx[1] = get_texel_2d(sp_sview, sp_samp, addr, x1, y0);
+   tx[2] = get_texel_2d(sp_sview, sp_samp, addr, x0, y1);
+   tx[3] = get_texel_2d(sp_sview, sp_samp, addr, x1, y1);
+
+   if (args->gather_only) {
+      for (c = 0; c < TGSI_QUAD_SIZE; c++)
+         rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
+                                                      args->gather_comp,
+                                                      tx);
+   } else {
+      /* interpolate R, G, B, A */
+      for (c = 0; c < TGSI_QUAD_SIZE; c++)
+         rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
+                                             tx[0][c], tx[1][c],
+                                             tx[2][c], tx[3][c]);
+   }
 }
 
 
 static void
 img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
                            struct sp_sampler *sp_samp,
-                           float s,
-                           float t,
-                           float p,
-                           unsigned level,
-                           unsigned face_id,
+                           const struct img_filter_args *args,
                            float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1560,44 +1589,47 @@
    int x0, y0, x1, y1, layer;
    float xw, yw; /* weights */
    union tex_tile_address addr;
-   const float *tx0, *tx1, *tx2, *tx3;
+   const float *tx[4];
    int c;
 
-   width = u_minify(texture->width0, level);
-   height = u_minify(texture->height0, level);
+   width = u_minify(texture->width0, args->level);
+   height = u_minify(texture->height0, args->level);
 
    assert(width > 0);
    assert(height > 0);
 
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
-   sp_samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
-   sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
-   layer = coord_to_layer(p, sp_sview->base.u.tex.first_layer,
+   sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
+   sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
+   layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
                           sp_sview->base.u.tex.last_layer);
 
-   tx0 = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer);
-   tx1 = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer);
-   tx2 = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer);
-   tx3 = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer);
-
-   /* interpolate R, G, B, A */
-   for (c = 0; c < TGSI_QUAD_SIZE; c++)
-      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
-                                          tx0[c], tx1[c],
-                                          tx2[c], tx3[c]);
+   tx[0] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer);
+   tx[1] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer);
+   tx[2] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer);
+   tx[3] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer);
+
+   if (args->gather_only) {
+      for (c = 0; c < TGSI_QUAD_SIZE; c++)
+         rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
+                                                      args->gather_comp,
+                                                      tx);
+   } else {
+      /* interpolate R, G, B, A */
+      for (c = 0; c < TGSI_QUAD_SIZE; c++)
+         rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
+                                             tx[0][c], tx[1][c],
+                                             tx[2][c], tx[3][c]);
+   }
 }
 
 
 static void
 img_filter_cube_linear(struct sp_sampler_view *sp_sview,
                        struct sp_sampler *sp_samp,
-                       float s,
-                       float t,
-                       float p,
-                       unsigned level,
-                       unsigned face_id,
+                       const struct img_filter_args *args,
                        float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1605,19 +1637,19 @@
    int x0, y0, x1, y1, layer;
    float xw, yw; /* weights */
    union tex_tile_address addr;
-   const float *tx0, *tx1, *tx2, *tx3;
+   const float *tx[4];
    float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
          corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
    int c;
 
-   width = u_minify(texture->width0, level);
-   height = u_minify(texture->height0, level);
+   width = u_minify(texture->width0, args->level);
+   height = u_minify(texture->height0, args->level);
 
    assert(width > 0);
    assert(height > 0);
 
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
    /*
     * For seamless if LINEAR filtering is done within a miplevel,
@@ -1625,44 +1657,47 @@
     */
    if (sp_samp->base.seamless_cube_map) {
       /* Note this is a bit overkill, actual clamping is not required */
-      wrap_linear_clamp_to_border(s, width, &x0, &x1, &xw);
-      wrap_linear_clamp_to_border(t, height, &y0, &y1, &yw);
+      wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw);
+      wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw);
    } else {
       /* Would probably make sense to ignore mode and just do edge clamp */
-      sp_samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
-      sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
+      sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
+      sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
    }
 
    layer = sp_sview->base.u.tex.first_layer;
 
    if (sp_samp->base.seamless_cube_map) {
-      tx0 = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, face_id);
-      tx1 = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, face_id);
-      tx2 = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, face_id);
-      tx3 = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, face_id);
+      tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
+      tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
+      tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
+      tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
    } else {
-      tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + face_id);
-      tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + face_id);
-      tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + face_id);
-      tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + face_id);
+      tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
+      tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
+      tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
+      tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
    }
 
-   /* interpolate R, G, B, A */
-   for (c = 0; c < TGSI_QUAD_SIZE; c++)
-      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
-                                          tx0[c], tx1[c],
-                                          tx2[c], tx3[c]);
+   if (args->gather_only) {
+      for (c = 0; c < TGSI_QUAD_SIZE; c++)
+         rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
+                                                      args->gather_comp,
+                                                      tx);
+   } else {
+      /* interpolate R, G, B, A */
+      for (c = 0; c < TGSI_QUAD_SIZE; c++)
+         rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
+                                             tx[0][c], tx[1][c],
+                                             tx[2][c], tx[3][c]);
+   }
 }
 
 
 static void
 img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
                              struct sp_sampler *sp_samp,
-                             float s,
-                             float t,
-                             float p,
-                             unsigned level,
-                             unsigned face_id,
+                             const struct img_filter_args *args,
                              float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1670,19 +1705,19 @@
    int x0, y0, x1, y1, layer;
    float xw, yw; /* weights */
    union tex_tile_address addr;
-   const float *tx0, *tx1, *tx2, *tx3;
+   const float *tx[4];
    float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
          corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
    int c;
 
-   width = u_minify(texture->width0, level);
-   height = u_minify(texture->height0, level);
+   width = u_minify(texture->width0, args->level);
+   height = u_minify(texture->height0, args->level);
 
    assert(width > 0);
    assert(height > 0);
 
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
    /*
     * For seamless if LINEAR filtering is done within a miplevel,
@@ -1690,45 +1725,48 @@
     */
    if (sp_samp->base.seamless_cube_map) {
       /* Note this is a bit overkill, actual clamping is not required */
-      wrap_linear_clamp_to_border(s, width, &x0, &x1, &xw);
-      wrap_linear_clamp_to_border(t, height, &y0, &y1, &yw);
+      wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw);
+      wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw);
    } else {
       /* Would probably make sense to ignore mode and just do edge clamp */
-      sp_samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
-      sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
+      sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
+      sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
    }
 
-   layer = coord_to_layer(6 * p + sp_sview->base.u.tex.first_layer,
+   layer = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
                           sp_sview->base.u.tex.first_layer,
                           sp_sview->base.u.tex.last_layer - 5);
 
    if (sp_samp->base.seamless_cube_map) {
-      tx0 = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, face_id);
-      tx1 = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, face_id);
-      tx2 = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, face_id);
-      tx3 = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, face_id);
+      tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
+      tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
+      tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
+      tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
    } else {
-      tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + face_id);
-      tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + face_id);
-      tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + face_id);
-      tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + face_id);
+      tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
+      tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
+      tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
+      tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
    }
 
-   /* interpolate R, G, B, A */
-   for (c = 0; c < TGSI_QUAD_SIZE; c++)
-      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
-                                          tx0[c], tx1[c],
-                                          tx2[c], tx3[c]);
+   if (args->gather_only) {
+      for (c = 0; c < TGSI_QUAD_SIZE; c++)
+         rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
+                                                      args->gather_comp,
+                                                      tx);
+   } else {
+      /* interpolate R, G, B, A */
+      for (c = 0; c < TGSI_QUAD_SIZE; c++)
+         rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
+                                             tx[0][c], tx[1][c],
+                                             tx[2][c], tx[3][c]);
+   }
 }
 
 static void
 img_filter_3d_linear(struct sp_sampler_view *sp_sview,
                      struct sp_sampler *sp_samp,
-                     float s,
-                     float t,
-                     float p,
-                     unsigned level,
-                     unsigned face_id,
+                     const struct img_filter_args *args,
                      float *rgba)
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -1739,21 +1777,20 @@
    const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
    int c;
 
-   width = u_minify(texture->width0, level);
-   height = u_minify(texture->height0, level);
-   depth = u_minify(texture->depth0, level);
+   width = u_minify(texture->width0, args->level);
+   height = u_minify(texture->height0, args->level);
+   depth = u_minify(texture->depth0, args->level);
 
    addr.value = 0;
-   addr.bits.level = level;
+   addr.bits.level = args->level;
 
    assert(width > 0);
    assert(height > 0);
    assert(depth > 0);
 
-   sp_samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
-   sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
-   sp_samp->linear_texcoord_p(p, depth,  &z0, &z1, &zw);
-
+   sp_samp->linear_texcoord_s(args->s, width,  args->offset[0], &x0, &x1, &xw);
+   sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
+   sp_samp->linear_texcoord_p(args->p, depth,  args->offset[2], &z0, &z1, &zw);
 
    tx00 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z0);
    tx01 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z0);
@@ -1782,7 +1819,7 @@
  * \param lod_in per-fragment lod_bias or explicit_lod.
  * \param lod returns the per-fragment lod.
  */
-static INLINE void
+static inline void
 compute_lod(const struct pipe_sampler_state *sampler,
             enum tgsi_sampler_control control,
             const float biased_lambda,
@@ -1822,7 +1859,7 @@
  * \param lod_in per-fragment lod_bias or explicit_lod.
  * \param lod results per-fragment lod.
  */
-static INLINE void
+static inline void
 compute_lambda_lod(struct sp_sampler_view *sp_sview,
                    struct sp_sampler *sp_samp,
                    const float s[TGSI_QUAD_SIZE],
@@ -1859,6 +1896,7 @@
       }
       break;
    case tgsi_sampler_lod_zero:
+   case tgsi_sampler_gather:
       /* this is all static state in the sampler really need clamp here? */
       lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
       break;
@@ -1868,6 +1906,12 @@
    }
 }
 
+static inline unsigned
+get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
+{
+   /* gather component is stored in lod_in slot as unsigned */
+   return (*(unsigned int *)lod_in) & 0x3;
+}
 
 static void
 mip_filter_linear(struct sp_sampler_view *sp_sview,
@@ -1879,36 +1923,45 @@
                   const float p[TGSI_QUAD_SIZE],
                   const float c0[TGSI_QUAD_SIZE],
                   const float lod_in[TGSI_QUAD_SIZE],
-                  enum tgsi_sampler_control control,
+                  const struct filter_args *filt_args,
                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
    const struct pipe_sampler_view *psview = &sp_sview->base;
    int j;
    float lod[TGSI_QUAD_SIZE];
+   struct img_filter_args args;
 
-   compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
+   compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
+
+   args.offset = filt_args->offset;
+   args.gather_only = filt_args->control == tgsi_sampler_gather;
+   args.gather_comp = get_gather_component(lod_in);
 
    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
       int level0 = psview->u.tex.first_level + (int)lod[j];
 
-      if (lod[j] < 0.0)
-         mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
-                    psview->u.tex.first_level,
-                    sp_sview->faces[j], &rgba[0][j]);
-
-      else if (level0 >= (int) psview->u.tex.last_level)
-         min_filter(sp_sview, sp_samp, s[j], t[j], p[j], psview->u.tex.last_level,
-                    sp_sview->faces[j], &rgba[0][j]);
-
+      args.s = s[j];
+      args.t = t[j];
+      args.p = p[j];
+      args.face_id = sp_sview->faces[j];
+
+      if (lod[j] < 0.0) {
+         args.level = psview->u.tex.first_level;
+         mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
+      }
+      else if (level0 >= (int) psview->u.tex.last_level) {
+         args.level = psview->u.tex.last_level;
+         min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
+      }
       else {
          float levelBlend = frac(lod[j]);
          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
          int c;
 
-         min_filter(sp_sview, sp_samp, s[j], t[j], p[j], level0,
-                    sp_sview->faces[j], &rgbax[0][0]);
-         min_filter(sp_sview, sp_samp, s[j], t[j], p[j], level0+1,
-                    sp_sview->faces[j], &rgbax[0][1]);
+         args.level = level0;
+         min_filter(sp_sview, sp_samp, &args, &rgbax[0][0]);
+         args.level = level0+1;
+         min_filter(sp_sview, sp_samp, &args, &rgbax[0][1]);
 
          for (c = 0; c < 4; c++) {
             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
@@ -1937,25 +1990,33 @@
                    const float p[TGSI_QUAD_SIZE],
                    const float c0[TGSI_QUAD_SIZE],
                    const float lod_in[TGSI_QUAD_SIZE],
-                   enum tgsi_sampler_control control,
+                   const struct filter_args *filt_args,
                    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
    const struct pipe_sampler_view *psview = &sp_sview->base;
    float lod[TGSI_QUAD_SIZE];
    int j;
+   struct img_filter_args args;
 
-   compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
+   args.offset = filt_args->offset;
+   args.gather_only = filt_args->control == tgsi_sampler_gather;
+   args.gather_comp = get_gather_component(lod_in);
+
+   compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
 
    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-      if (lod[j] < 0.0)
-         mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
-                    psview->u.tex.first_level,
-                    sp_sview->faces[j], &rgba[0][j]);
-      else {
+      args.s = s[j];
+      args.t = t[j];
+      args.p = p[j];
+      args.face_id = sp_sview->faces[j];
+
+      if (lod[j] < 0.0) {
+         args.level = psview->u.tex.first_level;
+         mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
+      } else {
          int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
-         level = MIN2(level, (int)psview->u.tex.last_level);
-         min_filter(sp_sview, sp_samp, s[j], t[j], p[j],
-                    level, sp_sview->faces[j], &rgba[0][j]);
+         args.level = MIN2(level, (int)psview->u.tex.last_level);
+         min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
       }
    }
 
@@ -1975,24 +2036,29 @@
                 const float p[TGSI_QUAD_SIZE],
                 const float c0[TGSI_QUAD_SIZE],
                 const float lod_in[TGSI_QUAD_SIZE],
-                enum tgsi_sampler_control control,
+                const struct filter_args *filt_args,
                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
    float lod[TGSI_QUAD_SIZE];
    int j;
+   struct img_filter_args args;
 
-   compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
+   args.level = sp_sview->base.u.tex.first_level;
+   args.offset = filt_args->offset;
+   args.gather_only = filt_args->control == tgsi_sampler_gather;
+
+   compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
 
    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-      if (lod[j] < 0.0) { 
-         mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
-                    sp_sview->base.u.tex.first_level,
-                    sp_sview->faces[j], &rgba[0][j]);
+      args.s = s[j];
+      args.t = t[j];
+      args.p = p[j];
+      args.face_id = sp_sview->faces[j];
+      if (lod[j] < 0.0) {
+         mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
       }
       else {
-         min_filter(sp_sview, sp_samp, s[j], t[j], p[j],
-                    sp_sview->base.u.tex.first_level,
-                    sp_sview->faces[j], &rgba[0][j]);
+         min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
       }
    }
 }
@@ -2008,15 +2074,21 @@
                                  const float p[TGSI_QUAD_SIZE],
                                  const float c0[TGSI_QUAD_SIZE],
                                  const float lod_in[TGSI_QUAD_SIZE],
-                                 enum tgsi_sampler_control control,
+                                 const struct filter_args *filt_args,
                                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
    int j;
-
-   for (j = 0; j < TGSI_QUAD_SIZE; j++)
-      mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
-                 sp_sview->base.u.tex.first_level,
-                 sp_sview->faces[j], &rgba[0][j]);
+   struct img_filter_args args;
+   args.level = sp_sview->base.u.tex.first_level;
+   args.offset = filt_args->offset;
+   args.gather_only = filt_args->control == tgsi_sampler_gather;
+   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+      args.s = s[j];
+      args.t = t[j];
+      args.p = p[j];
+      args.face_id = sp_sview->faces[j];
+      mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
+   }
 }
 
 
@@ -2072,7 +2144,7 @@
    float scaling = 1.0f / (1 << level0);
    int width = u_minify(texture->width0, level0);
    int height = u_minify(texture->height0, level0);
-
+   struct img_filter_args args;
    float ux = dudx * scaling;
    float vx = dvdx * scaling;
    float uy = dudy * scaling;
@@ -2122,7 +2194,8 @@
     * full, then the pixel values are read from the image.
     */
    ddq = 2 * A;
-   
+
+   args.level = level;
    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
       /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
        * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
@@ -2139,6 +2212,8 @@
       float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
       buffer_next = 0;
       den = 0;
+      args.face_id = sp_sview->faces[j];
+
       U = u0 - tex_u;
       for (v = v0; v <= v1; ++v) {
          float V = v - tex_v;
@@ -2170,8 +2245,10 @@
                    * accelerated img_filter_2d_nearest_XXX functions.
                    */
                   for (jj = 0; jj < buffer_next; jj++) {
-                     min_filter(sp_sview, sp_samp, s_buffer[jj], t_buffer[jj], p[jj],
-                                level, sp_sview->faces[j], &rgba_temp[0][jj]);
+                     args.s = s_buffer[jj];
+                     args.t = t_buffer[jj];
+                     args.p = p[jj];
+                     min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
                      num[0] += weight_buffer[jj] * rgba_temp[0][jj];
                      num[1] += weight_buffer[jj] * rgba_temp[1][jj];
                      num[2] += weight_buffer[jj] * rgba_temp[2][jj];
@@ -2198,8 +2275,10 @@
           * accelerated img_filter_2d_nearest_XXX functions.
           */
          for (jj = 0; jj < buffer_next; jj++) {
-            min_filter(sp_sview, sp_samp, s_buffer[jj], t_buffer[jj], p[jj],
-                       level, sp_sview->faces[j], &rgba_temp[0][jj]);
+            args.s = s_buffer[jj];
+            args.t = t_buffer[jj];
+            args.p = p[jj];
+            min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
             num[0] += weight_buffer[jj] * rgba_temp[0][jj];
             num[1] += weight_buffer[jj] * rgba_temp[1][jj];
             num[2] += weight_buffer[jj] * rgba_temp[2][jj];
@@ -2218,8 +2297,10 @@
          rgba[2]=0;
          rgba[3]=0;*/
          /* not enough pixels in resampling, resort to direct interpolation */
-         min_filter(sp_sview, sp_samp, s[j], t[j], p[j], level,
-                    sp_sview->faces[j], &rgba_temp[0][j]);
+         args.s = s[j];
+         args.t = t[j];
+         args.p = p[j];
+         min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][j]);
          den = 1;
          num[0] = rgba_temp[0][j];
          num[1] = rgba_temp[1][j];
@@ -2248,7 +2329,7 @@
                         const float p[TGSI_QUAD_SIZE],
                         const float c0[TGSI_QUAD_SIZE],
                         const float lod_in[TGSI_QUAD_SIZE],
-                        enum tgsi_sampler_control control,
+                        const struct filter_args *filt_args,
                         float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
    const struct pipe_resource *texture = sp_sview->base.texture;
@@ -2263,11 +2344,12 @@
    float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
    float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
    float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
-   
-   if (control == tgsi_sampler_lod_bias ||
-       control == tgsi_sampler_lod_none ||
+   struct img_filter_args args;
+
+   if (filt_args->control == tgsi_sampler_lod_bias ||
+       filt_args->control == tgsi_sampler_lod_none ||
        /* XXX FIXME */
-       control == tgsi_sampler_derivs_explicit) {
+       filt_args->control == tgsi_sampler_derivs_explicit) {
       /* note: instead of working with Px and Py, we will use the 
        * squared length instead, to avoid sqrt.
        */
@@ -2304,12 +2386,12 @@
        * this since 0.5*log(x) = log(sqrt(x))
        */
       lambda = 0.5F * util_fast_log2(Pmin2) + sp_samp->base.lod_bias;
-      compute_lod(&sp_samp->base, control, lambda, lod_in, lod);
+      compute_lod(&sp_samp->base, filt_args->control, lambda, lod_in, lod);
    }
    else {
-      assert(control == tgsi_sampler_lod_explicit ||
-             control == tgsi_sampler_lod_zero);
-      compute_lod(&sp_samp->base, control, sp_samp->base.lod_bias, lod_in, lod);
+      assert(filt_args->control == tgsi_sampler_lod_explicit ||
+             filt_args->control == tgsi_sampler_lod_zero);
+      compute_lod(&sp_samp->base, filt_args->control, sp_samp->base.lod_bias, lod_in, lod);
    }
    
    /* XXX: Take into account all lod values.
@@ -2322,9 +2404,14 @@
     */
    if (level0 >= (int) psview->u.tex.last_level) {
       int j;
-      for (j = 0; j < TGSI_QUAD_SIZE; j++)
-         min_filter(sp_sview, sp_samp, s[j], t[j], p[j], psview->u.tex.last_level,
-                    sp_sview->faces[j], &rgba[0][j]);
+      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+         args.s = s[j];
+         args.t = t[j];
+         args.p = p[j];
+         args.level = psview->u.tex.last_level;
+         args.face_id = sp_sview->faces[j];
+         min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
+      }
    }
    else {
       /* don't bother interpolating between multiple LODs; it doesn't
@@ -2356,29 +2443,33 @@
    const float p[TGSI_QUAD_SIZE],
    const float c0[TGSI_QUAD_SIZE],
    const float lod_in[TGSI_QUAD_SIZE],
-   enum tgsi_sampler_control control,
+   const struct filter_args *filt_args,
    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
    const struct pipe_sampler_view *psview = &sp_sview->base;
    int j;
    float lod[TGSI_QUAD_SIZE];
 
-   compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
+   compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
 
    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
       int level0 = psview->u.tex.first_level + (int)lod[j];
-
+      struct img_filter_args args;
       /* Catches both negative and large values of level0:
        */
+      args.s = s[j];
+      args.t = t[j];
+      args.p = p[j];
+      args.face_id = sp_sview->faces[j];
+      args.offset = filt_args->offset;
+      args.gather_only = filt_args->control == tgsi_sampler_gather;
       if ((unsigned)level0 >= psview->u.tex.last_level) {
          if (level0 < 0)
-            img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j],
-                                            psview->u.tex.first_level,
-                                            sp_sview->faces[j], &rgba[0][j]);
+            args.level = psview->u.tex.first_level;
          else
-            img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j],
-                                            psview->u.tex.last_level,
-                                            sp_sview->faces[j], &rgba[0][j]);
+            args.level = psview->u.tex.last_level;
+         img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args,
+                                         &rgba[0][j]);
 
       }
       else {
@@ -2386,10 +2477,10 @@
          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
          int c;
 
-         img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j], level0,
-                                         sp_sview->faces[j], &rgbax[0][0]);
-         img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j], level0+1,
-                                         sp_sview->faces[j], &rgbax[0][1]);
+         args.level = level0;
+         img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][0]);
+         args.level = level0+1;
+         img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][1]);
 
          for (c = 0; c < TGSI_NUM_CHANNELS; c++)
             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
@@ -2417,11 +2508,12 @@
                float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
    const struct pipe_sampler_state *sampler = &sp_samp->base;
-   int j;
-   int k[4];
+   int j, v;
+   int k[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
    float pc[4];
    const struct util_format_description *format_desc;
    unsigned chan_type;
+   bool is_gather = (control == tgsi_sampler_gather);
 
    /**
     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
@@ -2465,65 +2557,74 @@
       pc[3] = CLAMP(pc[3], 0.0F, 1.0F);
    }
 
-   /* compare four texcoords vs. four texture samples */
-   switch (sampler->compare_func) {
-   case PIPE_FUNC_LESS:
-      k[0] = pc[0] < rgba[0][0];
-      k[1] = pc[1] < rgba[0][1];
-      k[2] = pc[2] < rgba[0][2];
-      k[3] = pc[3] < rgba[0][3];
-      break;
-   case PIPE_FUNC_LEQUAL:
-      k[0] = pc[0] <= rgba[0][0];
-      k[1] = pc[1] <= rgba[0][1];
-      k[2] = pc[2] <= rgba[0][2];
-      k[3] = pc[3] <= rgba[0][3];
-      break;
-   case PIPE_FUNC_GREATER:
-      k[0] = pc[0] > rgba[0][0];
-      k[1] = pc[1] > rgba[0][1];
-      k[2] = pc[2] > rgba[0][2];
-      k[3] = pc[3] > rgba[0][3];
-      break;
-   case PIPE_FUNC_GEQUAL:
-      k[0] = pc[0] >= rgba[0][0];
-      k[1] = pc[1] >= rgba[0][1];
-      k[2] = pc[2] >= rgba[0][2];
-      k[3] = pc[3] >= rgba[0][3];
-      break;
-   case PIPE_FUNC_EQUAL:
-      k[0] = pc[0] == rgba[0][0];
-      k[1] = pc[1] == rgba[0][1];
-      k[2] = pc[2] == rgba[0][2];
-      k[3] = pc[3] == rgba[0][3];
-      break;
-   case PIPE_FUNC_NOTEQUAL:
-      k[0] = pc[0] != rgba[0][0];
-      k[1] = pc[1] != rgba[0][1];
-      k[2] = pc[2] != rgba[0][2];
-      k[3] = pc[3] != rgba[0][3];
-      break;
-   case PIPE_FUNC_ALWAYS:
-      k[0] = k[1] = k[2] = k[3] = 1;
-      break;
-   case PIPE_FUNC_NEVER:
-      k[0] = k[1] = k[2] = k[3] = 0;
-      break;
-   default:
-      k[0] = k[1] = k[2] = k[3] = 0;
-      assert(0);
-      break;
+   for (v = 0; v < (is_gather ? TGSI_NUM_CHANNELS : 1); v++) {
+      /* compare four texcoords vs. four texture samples */
+      switch (sampler->compare_func) {
+      case PIPE_FUNC_LESS:
+         k[v][0] = pc[0] < rgba[v][0];
+         k[v][1] = pc[1] < rgba[v][1];
+         k[v][2] = pc[2] < rgba[v][2];
+         k[v][3] = pc[3] < rgba[v][3];
+         break;
+      case PIPE_FUNC_LEQUAL:
+         k[v][0] = pc[0] <= rgba[v][0];
+         k[v][1] = pc[1] <= rgba[v][1];
+         k[v][2] = pc[2] <= rgba[v][2];
+         k[v][3] = pc[3] <= rgba[v][3];
+         break;
+      case PIPE_FUNC_GREATER:
+         k[v][0] = pc[0] > rgba[v][0];
+         k[v][1] = pc[1] > rgba[v][1];
+         k[v][2] = pc[2] > rgba[v][2];
+         k[v][3] = pc[3] > rgba[v][3];
+         break;
+      case PIPE_FUNC_GEQUAL:
+         k[v][0] = pc[0] >= rgba[v][0];
+         k[v][1] = pc[1] >= rgba[v][1];
+         k[v][2] = pc[2] >= rgba[v][2];
+         k[v][3] = pc[3] >= rgba[v][3];
+         break;
+      case PIPE_FUNC_EQUAL:
+         k[v][0] = pc[0] == rgba[v][0];
+         k[v][1] = pc[1] == rgba[v][1];
+         k[v][2] = pc[2] == rgba[v][2];
+         k[v][3] = pc[3] == rgba[v][3];
+         break;
+      case PIPE_FUNC_NOTEQUAL:
+         k[v][0] = pc[0] != rgba[v][0];
+         k[v][1] = pc[1] != rgba[v][1];
+         k[v][2] = pc[2] != rgba[v][2];
+         k[v][3] = pc[3] != rgba[v][3];
+         break;
+      case PIPE_FUNC_ALWAYS:
+         k[v][0] = k[v][1] = k[v][2] = k[v][3] = 1;
+         break;
+      case PIPE_FUNC_NEVER:
+         k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
+         break;
+      default:
+         k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
+         assert(0);
+         break;
+      }
    }
 
-   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-      rgba[0][j] = k[j];
-      rgba[1][j] = k[j];
-      rgba[2][j] = k[j];
-      rgba[3][j] = 1.0F;
+   if (is_gather) {
+      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+         for (v = 0; v < TGSI_NUM_CHANNELS; v++) {
+            rgba[v][j] = k[v][j];
+         }
+      }
+   } else {
+      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+         rgba[0][j] = k[0][j];
+         rgba[1][j] = k[0][j];
+         rgba[2][j] = k[0][j];
+         rgba[3][j] = 1.0F;
+      }
    }
 }
 
-
 static void
 do_swizzling(const struct pipe_sampler_view *sview,
              float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
@@ -2688,7 +2789,7 @@
 /**
  * Is swizzling needed for the given state key?
  */
-static INLINE bool
+static inline bool
 any_swizzle(const struct pipe_sampler_view *view)
 {
    return (view->swizzle_r != PIPE_SWIZZLE_RED ||
@@ -2701,7 +2802,7 @@
 static img_filter_func
 get_img_filter(const struct sp_sampler_view *sp_sview,
                const struct pipe_sampler_state *sampler,
-               unsigned filter)
+               unsigned filter, bool gather)
 {
    switch (sp_sview->base.target) {
    case PIPE_BUFFER:
@@ -2721,7 +2822,7 @@
    case PIPE_TEXTURE_RECT:
       /* Try for fast path:
        */
-      if (sp_sview->pot2d &&
+      if (!gather && sp_sview->pot2d &&
           sampler->wrap_s == sampler->wrap_t &&
           sampler->normalized_coords) 
       {
@@ -2791,35 +2892,38 @@
            const float p[TGSI_QUAD_SIZE],
            const float c0[TGSI_QUAD_SIZE],
            const float lod[TGSI_QUAD_SIZE],
-           enum tgsi_sampler_control control,
+           const struct filter_args *filt_args,
            float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
    mip_filter_func mip_filter;
    img_filter_func min_img_filter = NULL;
    img_filter_func mag_img_filter = NULL;
 
-   if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
+   if (filt_args->control == tgsi_sampler_gather) {
+      mip_filter = mip_filter_nearest;
+      min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true);
+   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
       mip_filter = mip_filter_linear_2d_linear_repeat_POT;
    }
    else {
       mip_filter = sp_samp->mip_filter;
-      min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter);
+      min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false);
       if (sp_samp->min_mag_equal) {
          mag_img_filter = min_img_filter;
       }
       else {
-         mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter);
+         mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter, false);
       }
    }
 
    mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
-              s, t, p, c0, lod, control, rgba);
+              s, t, p, c0, lod, filt_args, rgba);
 
    if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
-      sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, control, rgba);
+      sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, rgba);
    }
 
-   if (sp_sview->need_swizzle) {
+   if (sp_sview->need_swizzle && filt_args->control != tgsi_sampler_gather) {
       float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
       memcpy(rgba_temp, rgba, sizeof(rgba_temp));
       do_swizzling(&sp_sview->base, rgba_temp, rgba);
@@ -2840,7 +2944,7 @@
             const float p[TGSI_QUAD_SIZE],
             const float c0[TGSI_QUAD_SIZE],
             const float c1[TGSI_QUAD_SIZE],
-            enum tgsi_sampler_control control,
+            const struct filter_args *filt_args,
             float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
    unsigned j;
@@ -2918,7 +3022,7 @@
       }
    }
 
-   sample_mip(sp_sview, sp_samp, ssss, tttt, pppp, c0, c1, control, rgba);
+   sample_mip(sp_sview, sp_samp, ssss, tttt, pppp, c0, c1, filt_args, rgba);
 }
 
 
@@ -3287,7 +3391,7 @@
                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 {
    struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
-
+   struct filter_args filt_args;
    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
    assert(sampler_index < PIPE_MAX_SAMPLERS);
    assert(sp_samp->sp_sampler[sampler_index]);
@@ -3301,9 +3405,12 @@
       }
       return;
    }
+
+   filt_args.control = control;
+   filt_args.offset = offset;
    sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index],
                                               sp_samp->sp_sampler[sampler_index],
-                                              s, t, p, c0, lod, control, rgba);
+                                              s, t, p, c0, lod, &filt_args, rgba);
 }
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_tex_sample.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_tex_sample.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_tex_sample.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_tex_sample.h	2015-09-16 14:36:09.000000000 +0000
@@ -38,10 +38,12 @@
 
 typedef void (*wrap_nearest_func)(float s,
                                   unsigned size,
+                                  int offset,
                                   int *icoord);
 
 typedef void (*wrap_linear_func)(float s, 
                                  unsigned size,
+                                 int offset,
                                  int *icoord0,
                                  int *icoord1,
                                  float *w);
@@ -51,15 +53,27 @@
                                      const float t[TGSI_QUAD_SIZE],
                                      const float p[TGSI_QUAD_SIZE]);
 
+struct img_filter_args {
+   float s;
+   float t;
+   float p;
+   unsigned level;
+   unsigned face_id;
+   const int8_t *offset;
+   bool gather_only;
+   int gather_comp;
+};
+
 typedef void (*img_filter_func)(struct sp_sampler_view *sp_sview,
                                 struct sp_sampler *sp_samp,
-                                float s,
-                                float t,
-                                float p,
-                                unsigned level,
-                                unsigned face_id,
+                                const struct img_filter_args *args,
                                 float *rgba);
 
+struct filter_args {
+   enum tgsi_sampler_control control;
+   const int8_t *offset;
+};
+
 typedef void (*mip_filter_func)(struct sp_sampler_view *sp_sview,
                                 struct sp_sampler *sp_samp,
                                 img_filter_func min_filter,
@@ -69,7 +83,7 @@
                                 const float p[TGSI_QUAD_SIZE],
                                 const float c0[TGSI_QUAD_SIZE],
                                 const float lod[TGSI_QUAD_SIZE],
-                                enum tgsi_sampler_control control,
+                                const struct filter_args *args,
                                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
 
 
@@ -80,7 +94,7 @@
                             const float p[TGSI_QUAD_SIZE],
                             const float c0[TGSI_QUAD_SIZE],
                             const float lod[TGSI_QUAD_SIZE],
-                            enum tgsi_sampler_control control,
+                            const struct filter_args *args,
                             float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_tex_tile_cache.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_tex_tile_cache.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_tex_tile_cache.c	2015-09-16 14:36:09.000000000 +0000
@@ -185,7 +185,7 @@
  * This is basically a direct-map cache.
  * XXX There's probably lots of ways in which we can improve this.
  */
-static INLINE uint
+static inline uint
 tex_cache_pos( union tex_tile_address addr )
 {
    uint entry = (addr.bits.x + 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_tex_tile_cache.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_tex_tile_cache.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_tex_tile_cache.h	2015-09-16 14:36:09.000000000 +0000
@@ -127,7 +127,7 @@
 sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, 
                         union tex_tile_address addr );
 
-static INLINE union tex_tile_address
+static inline union tex_tile_address
 tex_tile_address( unsigned x,
                   unsigned y,
                   unsigned z,
@@ -147,7 +147,7 @@
 
 /* Quickly retrieve tile if it matches last lookup.
  */
-static INLINE const struct softpipe_tex_cached_tile *
+static inline const struct softpipe_tex_cached_tile *
 sp_get_cached_tile_tex(struct softpipe_tex_tile_cache *tc, 
                        union tex_tile_address addr )
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_texture.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_texture.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_texture.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_texture.h	2015-09-16 14:36:09.000000000 +0000
@@ -81,13 +81,13 @@
 
 
 /** cast wrappers */
-static INLINE struct softpipe_resource *
+static inline struct softpipe_resource *
 softpipe_resource(struct pipe_resource *pt)
 {
    return (struct softpipe_resource *) pt;
 }
 
-static INLINE struct softpipe_transfer *
+static inline struct softpipe_transfer *
 softpipe_transfer(struct pipe_transfer *pt)
 {
    return (struct softpipe_transfer *) pt;
@@ -99,7 +99,7 @@
  * This is a short-cut instead of using map()/unmap(), which should
  * probably be fixed.
  */
-static INLINE void *
+static inline void *
 softpipe_resource_data(struct pipe_resource *pt)
 {
    if (!pt)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_tile_cache.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_tile_cache.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_tile_cache.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_tile_cache.c	2015-09-16 14:36:09.000000000 +0000
@@ -52,7 +52,7 @@
    (((x) + (y) * 5 + (l) * 10) % NUM_ENTRIES)
 
 
-static INLINE int addr_to_clear_pos(union tile_address addr)
+static inline int addr_to_clear_pos(union tile_address addr)
 {
    int pos;
    pos = addr.bits.layer * (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE);
@@ -63,7 +63,7 @@
 /**
  * Is the tile at (x,y) in cleared state?
  */
-static INLINE uint
+static inline uint
 is_clear_flag_set(const uint *bitvec, union tile_address addr, unsigned max)
 {
    int pos, bit;
@@ -77,7 +77,7 @@
 /**
  * Mark the tile at (x,y) as not cleared.
  */
-static INLINE void
+static inline void
 clear_clear_flag(uint *bitvec, union tile_address addr, unsigned max)
 {
    int pos;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_tile_cache.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_tile_cache.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/softpipe/sp_tile_cache.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/softpipe/sp_tile_cache.h	2015-09-16 14:36:09.000000000 +0000
@@ -128,7 +128,7 @@
                     union tile_address addr );
 
 
-static INLINE union tile_address
+static inline union tile_address
 tile_address( unsigned x,
               unsigned y, unsigned layer )
 {
@@ -143,7 +143,7 @@
 
 /* Quickly retrieve tile if it matches last lookup.
  */
-static INLINE struct softpipe_cached_tile *
+static inline struct softpipe_cached_tile *
 sp_get_cached_tile(struct softpipe_tile_cache *tc, 
                    int x, int y, int layer )
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/include/svga3d_shaderdefs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/include/svga3d_shaderdefs.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/include/svga3d_shaderdefs.h	2015-09-16 14:36:09.000000000 +0000
@@ -507,7 +507,7 @@
  *----------------------------------------------------------------------
  */
 
-static INLINE SVGA3dShaderRegType
+static inline SVGA3dShaderRegType
 SVGA3dShaderGetRegType(uint32 token)
 {
    SVGA3dShaderSrcToken src;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/include/svga_overlay.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/include/svga_overlay.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/include/svga_overlay.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/include/svga_overlay.h	2015-09-16 14:36:09.000000000 +0000
@@ -133,7 +133,7 @@
  *----------------------------------------------------------------------
  */
 
-static INLINE Bool
+static inline Bool
 VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
                          uint32 *width,                     // IN / OUT
                          uint32 *height,                    // IN / OUT
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -20,8 +20,6 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
-AUTOMAKE_OPTIONS = subdir-objects
-
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/SConscript	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/SConscript	2015-09-16 14:36:09.000000000 +0000
@@ -11,7 +11,6 @@
 if env['gcc'] or env['clang']:
 	env.Append(CPPDEFINES = [
 		'HAVE_STDINT_H', 
-		'HAVE_SYS_TYPES_H',
 	])
 	
 env.Prepend(CPPPATH = [
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_cmd.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_cmd.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_cmd.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_cmd.c	2015-09-16 14:36:09.000000000 +0000
@@ -57,7 +57,7 @@
  *----------------------------------------------------------------------
  */
 
-static INLINE void
+static inline void
 surface_to_surfaceid(struct svga_winsys_context *swc, // IN
                      struct pipe_surface *surface,    // IN
                      SVGA3dSurfaceImageId *id,        // OUT
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_context.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -485,20 +485,20 @@
  * Inline conversion functions.  These are better-typed than the
  * macros used previously:
  */
-static INLINE struct svga_context *
+static inline struct svga_context *
 svga_context( struct pipe_context *pipe )
 {
    return (struct svga_context *)pipe;
 }
 
 
-static INLINE boolean
+static inline boolean
 svga_have_gb_objects(const struct svga_context *svga)
 {
    return svga_screen(svga->pipe.screen)->sws->have_gb_objects;
 }
 
-static INLINE boolean
+static inline boolean
 svga_have_gb_dma(const struct svga_context *svga)
 {
    return svga_screen(svga->pipe.screen)->sws->have_gb_dma;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_debug.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_debug.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_debug.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_debug.h	2015-09-16 14:36:09.000000000 +0000
@@ -53,7 +53,7 @@
 #define DBSTR(x) ""
 #endif
 
-static INLINE void
+static inline void
 SVGA_DBG( unsigned flag, const char *fmt, ... )
 {
 #ifdef DEBUG 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_draw_private.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_draw_private.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_draw_private.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_draw_private.h	2015-09-16 14:36:09.000000000 +0000
@@ -57,7 +57,7 @@
  * PIPE_PRIM_QUADS, PIPE_PRIM_QUAD_STRIP or PIPE_PRIM_POLYGON.  We convert
  * those to other types of primitives with index/translation code.
  */
-static INLINE unsigned
+static inline unsigned
 svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count)
 {
    switch (mode) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svgadump/svga_shader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svgadump/svga_shader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svgadump/svga_shader.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svgadump/svga_shader.h	2015-09-16 14:36:09.000000000 +0000
@@ -56,7 +56,7 @@
    unsigned is_reg:1;
 };
 
-static INLINE unsigned
+static inline unsigned
 sh_reg_type( struct sh_reg reg )
 {
    return reg.type_lo | (reg.type_hi << 3);
@@ -138,7 +138,7 @@
    unsigned is_reg:1;
 };
 
-static INLINE unsigned
+static inline unsigned
 sh_dstreg_type( struct sh_dstreg reg )
 {
    return reg.type_lo | (reg.type_hi << 3);
@@ -169,7 +169,7 @@
    unsigned is_reg:1;
 };
 
-static INLINE unsigned
+static inline unsigned
 sh_srcreg_type( struct sh_srcreg reg )
 {
    return reg.type_lo | (reg.type_hi << 3);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_pipe_blend.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_pipe_blend.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_pipe_blend.c	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_pipe_blend.c	2015-09-16 14:36:09.000000000 +0000
@@ -33,7 +33,7 @@
 #include "svga_hw_reg.h"
 
 
-static INLINE unsigned
+static inline unsigned
 svga_translate_blend_factor(unsigned factor)
 {
    switch (factor) {
@@ -58,7 +58,7 @@
    }
 }
 
-static INLINE unsigned
+static inline unsigned
 svga_translate_blend_func(unsigned mode)
 {
    switch (mode) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_pipe_depthstencil.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_pipe_depthstencil.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_pipe_depthstencil.c	2012-05-02 13:56:27.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_pipe_depthstencil.c	2015-09-16 14:36:09.000000000 +0000
@@ -32,7 +32,7 @@
 #include "svga_hw_reg.h"
 
 
-static INLINE unsigned
+static inline unsigned
 svga_translate_compare_func(unsigned func)
 {
    switch (func) {
@@ -50,7 +50,7 @@
    }
 }
 
-static INLINE unsigned
+static inline unsigned
 svga_translate_stencil_op(unsigned op)
 {
    switch (op) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_pipe_query.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_pipe_query.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_pipe_query.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_pipe_query.c	2015-09-16 14:36:09.000000000 +0000
@@ -59,7 +59,7 @@
 
 
 /** cast wrapper */
-static INLINE struct svga_query *
+static inline struct svga_query *
 svga_query( struct pipe_query *q )
 {
    return (struct svga_query *)q;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_pipe_sampler.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_pipe_sampler.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_pipe_sampler.c	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_pipe_sampler.c	2015-09-16 14:36:09.000000000 +0000
@@ -35,7 +35,7 @@
 
 #include "svga_debug.h"
 
-static INLINE unsigned
+static inline unsigned
 translate_wrap_mode(unsigned wrap)
 {
    switch (wrap) {
@@ -68,7 +68,7 @@
    }
 }
 
-static INLINE unsigned translate_img_filter( unsigned filter )
+static inline unsigned translate_img_filter( unsigned filter )
 {
    switch (filter) {
    case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST;
@@ -79,7 +79,7 @@
    }
 }
 
-static INLINE unsigned translate_mip_filter( unsigned filter )
+static inline unsigned translate_mip_filter( unsigned filter )
 {
    switch (filter) {
    case PIPE_TEX_MIPFILTER_NONE:    return SVGA3D_TEX_FILTER_NONE;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_resource_buffer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_resource_buffer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_resource_buffer.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_resource_buffer.c	2015-09-16 14:36:09.000000000 +0000
@@ -45,7 +45,7 @@
  * Vertex and index buffers need hardware backing.  Constant buffers
  * do not.  No other types of buffers currently supported.
  */
-static INLINE boolean
+static inline boolean
 svga_buffer_needs_hw_storage(unsigned usage)
 {
    return usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_resource_buffer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_resource_buffer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_resource_buffer.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_resource_buffer.h	2015-09-16 14:36:09.000000000 +0000
@@ -190,7 +190,7 @@
 };
 
 
-static INLINE struct svga_buffer *
+static inline struct svga_buffer *
 svga_buffer(struct pipe_resource *buffer)
 {
    if (buffer) {
@@ -205,7 +205,7 @@
  * Returns TRUE for user buffers.  We may
  * decide to use an alternate upload path for these buffers.
  */
-static INLINE boolean 
+static inline boolean 
 svga_buffer_is_user_buffer( struct pipe_resource *buffer )
 {
    if (buffer) {
@@ -219,7 +219,7 @@
  * Returns a pointer to a struct svga_winsys_screen given a
  * struct svga_buffer.
  */
-static INLINE struct svga_winsys_screen *
+static inline struct svga_winsys_screen *
 svga_buffer_winsys_screen(struct svga_buffer *sbuf)
 {
    return svga_screen(sbuf->b.b.screen)->sws;
@@ -230,7 +230,7 @@
  * Returns whether a buffer has hardware storage that is
  * visible to the GPU.
  */
-static INLINE boolean
+static inline boolean
 svga_buffer_has_hw_storage(struct svga_buffer *sbuf)
 {
    if (svga_buffer_winsys_screen(sbuf)->have_gb_objects)
@@ -242,7 +242,7 @@
 /**
  * Map the hardware storage of a buffer.
  */
-static INLINE void *
+static inline void *
 svga_buffer_hw_storage_map(struct svga_context *svga,
                            struct svga_buffer *sbuf,
                            unsigned flags, boolean *retry)
@@ -259,7 +259,7 @@
 /**
  * Unmap the hardware storage of a buffer.
  */
-static INLINE void
+static inline void
 svga_buffer_hw_storage_unmap(struct svga_context *svga,
                              struct svga_buffer *sbuf)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_resource_texture.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_resource_texture.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_resource_texture.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_resource_texture.h	2015-09-16 14:36:09.000000000 +0000
@@ -106,7 +106,7 @@
 };
 
 
-static INLINE struct svga_texture *svga_texture( struct pipe_resource *resource )
+static inline struct svga_texture *svga_texture( struct pipe_resource *resource )
 {
    struct svga_texture *tex = (struct svga_texture *)resource;
    assert(tex == NULL || tex->b.vtbl == &svga_texture_vtbl);
@@ -114,7 +114,7 @@
 }
 
 
-static INLINE struct svga_transfer *
+static inline struct svga_transfer *
 svga_transfer(struct pipe_transfer *transfer)
 {
    assert(transfer);
@@ -127,7 +127,7 @@
  * This is used to track updates to textures when we draw into
  * them via a surface.
  */
-static INLINE void
+static inline void
 svga_age_texture_view(struct svga_texture *tex, unsigned level)
 {
    assert(level < Elements(tex->view_age));
@@ -138,7 +138,7 @@
 /**
  * Mark the given texture face/level as being defined.
  */
-static INLINE void
+static inline void
 svga_define_texture_level(struct svga_texture *tex,
                           unsigned face,unsigned level)
 {
@@ -148,7 +148,7 @@
 }
 
 
-static INLINE bool
+static inline bool
 svga_is_texture_level_defined(const struct svga_texture *tex,
                               unsigned face, unsigned level)
 {
@@ -177,7 +177,7 @@
 }
 
 
-static INLINE void
+static inline void
 svga_set_texture_rendered_to(struct svga_texture *tex,
                              unsigned face, unsigned level)
 {
@@ -186,7 +186,7 @@
 }
 
 
-static INLINE void
+static inline void
 svga_clear_texture_rendered_to(struct svga_texture *tex,
                                unsigned face, unsigned level)
 {
@@ -195,7 +195,7 @@
 }
 
 
-static INLINE boolean
+static inline boolean
 svga_was_texture_rendered_to(const struct svga_texture *tex,
                              unsigned face, unsigned level)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_sampler_view.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_sampler_view.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_sampler_view.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_sampler_view.h	2015-09-16 14:36:09.000000000 +0000
@@ -86,7 +86,7 @@
 void
 svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv);
 
-static INLINE void
+static inline void
 svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_view *v)
 {
    struct svga_sampler_view *old = *ptr;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -309,6 +309,10 @@
    case PIPE_CAP_UMA:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+   case PIPE_CAP_DEPTH_BOUNDS_TEST:
       return 0;
    }
 
@@ -377,6 +381,7 @@
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
          return 0;
       }
       /* If we get here, we failed to handle a cap above */
@@ -434,6 +439,7 @@
       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
          return 0;
       }
       /* If we get here, we failed to handle a cap above */
@@ -441,7 +447,9 @@
       return 0;
    case PIPE_SHADER_GEOMETRY:
    case PIPE_SHADER_COMPUTE:
-      /* no support for geometry or compute shaders at this time */
+   case PIPE_SHADER_TESS_CTRL:
+   case PIPE_SHADER_TESS_EVAL:
+      /* no support for geometry, tess or compute shaders at this time */
       return 0;
    default:
       debug_printf("Unexpected shader type (%u) query\n", shader);
@@ -541,21 +549,15 @@
 
 
 static boolean
-svga_fence_signalled(struct pipe_screen *screen,
-                     struct pipe_fence_handle *fence)
-{
-   struct svga_winsys_screen *sws = svga_screen(screen)->sws;
-   return sws->fence_signalled(sws, fence, 0) == 0;
-}
-
-
-static boolean
 svga_fence_finish(struct pipe_screen *screen,
                   struct pipe_fence_handle *fence,
                   uint64_t timeout)
 {
    struct svga_winsys_screen *sws = svga_screen(screen)->sws;
 
+   if (!timeout)
+      return sws->fence_signalled(sws, fence, 0) == 0;
+
    SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n",
             __FUNCTION__, fence);
 
@@ -643,7 +645,6 @@
    screen->is_format_supported = svga_is_format_supported;
    screen->context_create = svga_context_create;
    screen->fence_reference = svga_fence_reference;
-   screen->fence_signalled = svga_fence_signalled;
    screen->fence_finish = svga_fence_finish;
    screen->get_driver_query_info = svga_get_driver_query_info;
    svgascreen->sws = sws;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_screen_cache.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_screen_cache.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_screen_cache.c	2014-09-10 05:44:12.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_screen_cache.c	2015-09-16 14:36:09.000000000 +0000
@@ -76,7 +76,7 @@
 /**
  * Compute the bucket for this key.
  */
-static INLINE unsigned
+static inline unsigned
 svga_screen_cache_bucket(const struct svga_host_surface_cache_key *key)
 {
    return util_hash_crc32(key, sizeof *key) % SVGA_HOST_SURFACE_CACHE_BUCKETS;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_screen.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -82,7 +82,7 @@
 
 #ifndef DEBUG
 /** cast wrapper */
-static INLINE struct svga_screen *
+static inline struct svga_screen *
 svga_screen(struct pipe_screen *pscreen)
 {
    return (struct svga_screen *) pscreen;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_shader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_shader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_shader.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_shader.h	2015-09-16 14:36:09.000000000 +0000
@@ -44,7 +44,7 @@
 /**
  * Check if a shader's bytecode exceeds the device limits.
  */
-static INLINE boolean
+static inline boolean
 svga_shader_too_large(const struct svga_context *svga,
                       const struct svga_shader_variant *variant)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_state_fs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_state_fs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_state_fs.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_state_fs.c	2015-09-16 14:36:09.000000000 +0000
@@ -41,7 +41,7 @@
 
 
 
-static INLINE int
+static inline int
 compare_fs_keys(const struct svga_fs_compile_key *a,
                 const struct svga_fs_compile_key *b)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_state_rss.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_state_rss.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_state_rss.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_state_rss.c	2015-09-16 14:36:09.000000000 +0000
@@ -61,7 +61,7 @@
 } while (0)
 
 
-static INLINE void
+static inline void
 svga_queue_rs( struct rs_queue *q,
                unsigned rss,
                unsigned value )
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_state_tss.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_state_tss.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_state_tss.c	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_state_tss.c	2015-09-16 14:36:09.000000000 +0000
@@ -274,7 +274,7 @@
 } while (0)
 
 
-static INLINE void 
+static inline void 
 svga_queue_tss( struct ts_queue *q,
                 unsigned unit,
                 unsigned tss,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_state_vs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_state_vs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_state_vs.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_state_vs.c	2015-09-16 14:36:09.000000000 +0000
@@ -41,7 +41,7 @@
 #include "svga_hw_reg.h"
 
 
-static INLINE int
+static inline int
 compare_vs_keys(const struct svga_vs_compile_key *a,
                 const struct svga_vs_compile_key *b)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_surface.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_surface.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_surface.h	2012-08-30 05:23:51.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_surface.h	2015-09-16 14:36:09.000000000 +0000
@@ -84,7 +84,7 @@
                          unsigned width, unsigned height, unsigned depth);
 
 
-static INLINE struct svga_surface *
+static inline struct svga_surface *
 svga_surface(struct pipe_surface *surface)
 {
    assert(surface);
@@ -92,7 +92,7 @@
 }
 
 
-static INLINE const struct svga_surface *
+static inline const struct svga_surface *
 svga_surface_const(const struct pipe_surface *surface)
 {
    assert(surface);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_swtnl_private.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_swtnl_private.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_swtnl_private.h	2012-01-02 08:23:27.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_swtnl_private.h	2015-09-16 14:36:09.000000000 +0000
@@ -76,7 +76,7 @@
 /**
  * Basically a cast wrapper.
  */
-static INLINE struct svga_vbuf_render *
+static inline struct svga_vbuf_render *
 svga_vbuf_render( struct vbuf_render *render )
 {
    assert(render);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_tgsi.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_tgsi.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_tgsi.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_tgsi.c	2015-09-16 14:36:09.000000000 +0000
@@ -84,7 +84,7 @@
 }
 
 
-static INLINE boolean
+static inline boolean
 reserve(struct svga_shader_emitter *emit, unsigned nr_dwords)
 {
    if (emit->ptr - emit->buf + nr_dwords * sizeof(unsigned) >= emit->size) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_tgsi_emit.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_tgsi_emit.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_tgsi_emit.h	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_tgsi_emit.h	2015-09-16 14:36:09.000000000 +0000
@@ -167,7 +167,7 @@
 
 
 /** Emit the given SVGA3dShaderInstToken opcode */
-static INLINE boolean
+static inline boolean
 emit_instruction(struct svga_shader_emitter *emit,
                  SVGA3dShaderInstToken opcode)
 {
@@ -176,7 +176,7 @@
 
 
 /** Generate a SVGA3dShaderInstToken for the given SVGA3D shader opcode */
-static INLINE SVGA3dShaderInstToken
+static inline SVGA3dShaderInstToken
 inst_token(unsigned opcode)
 {
    SVGA3dShaderInstToken inst;
@@ -192,7 +192,7 @@
  * Generate a SVGA3dShaderInstToken for the given SVGA3D shader opcode
  * with the predication flag set.
  */
-static INLINE SVGA3dShaderInstToken
+static inline SVGA3dShaderInstToken
 inst_token_predicated(unsigned opcode)
 {
    SVGA3dShaderInstToken inst;
@@ -209,7 +209,7 @@
  * Generate a SVGA3dShaderInstToken for a SETP instruction (set predicate)
  * using the given comparison operator (one of SVGA3DOPCOMP_xx).
  */
-static INLINE SVGA3dShaderInstToken
+static inline SVGA3dShaderInstToken
 inst_token_setp(unsigned operator)
 {
    SVGA3dShaderInstToken inst;
@@ -227,7 +227,7 @@
  * Note that this function is used to create tokens for output registers,
  * temp registers AND constants (see emit_def_const()).
  */
-static INLINE SVGA3dShaderDestToken
+static inline SVGA3dShaderDestToken
 dst_register(unsigned file, int number)
 {
    SVGA3dShaderDestToken dest;
@@ -255,7 +255,7 @@
  * Apply a writemask to the given SVGA3dShaderDestToken, returning a
  * new SVGA3dShaderDestToken.
  */
-static INLINE SVGA3dShaderDestToken
+static inline SVGA3dShaderDestToken
 writemask(SVGA3dShaderDestToken dest, unsigned mask)
 {
    assert(dest.mask & mask);
@@ -265,7 +265,7 @@
 
 
 /** Create a SVGA3dShaderSrcToken given a register file and number */
-static INLINE SVGA3dShaderSrcToken
+static inline SVGA3dShaderSrcToken
 src_token(unsigned file, int number)
 {
    SVGA3dShaderSrcToken src;
@@ -289,7 +289,7 @@
 
 
 /** Create a src_register given a register file and register number */
-static INLINE struct src_register
+static inline struct src_register
 src_register(unsigned file, int number)
 {
    struct src_register src;
@@ -301,7 +301,7 @@
 }
 
 /** Translate src_register into SVGA3dShaderDestToken */
-static INLINE SVGA3dShaderDestToken
+static inline SVGA3dShaderDestToken
 dst(struct src_register src)
 {
    return dst_register(SVGA3dShaderGetRegType(src.base.value), src.base.num);
@@ -309,7 +309,7 @@
 
 
 /** Translate SVGA3dShaderDestToken to a src_register */
-static INLINE struct src_register
+static inline struct src_register
 src(SVGA3dShaderDestToken dst)
 {
    return src_register(SVGA3dShaderGetRegType(dst.value), dst.num);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_tgsi.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_tgsi.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_tgsi.h	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_tgsi.h	2015-09-16 14:36:09.000000000 +0000
@@ -124,7 +124,7 @@
  * The real use of this information is matching vertex elements to
  * fragment shader inputs in the case where vertex shader is disabled.
  */
-static INLINE void svga_generate_vdecl_semantics( unsigned idx,
+static inline void svga_generate_vdecl_semantics( unsigned idx,
                                                   unsigned *usage,
                                                   unsigned *usage_index )
 {
@@ -140,12 +140,12 @@
 
 
 
-static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
+static inline unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
 {
    return sizeof *key;
 }
 
-static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
+static inline unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
 {
    return (const char *)&key->tex[key->num_textures] - (const char *)key;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_tgsi_insn.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_tgsi_insn.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/svga/svga_tgsi_insn.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/svga/svga_tgsi_insn.c	2015-09-16 14:36:09.000000000 +0000
@@ -1900,7 +1900,7 @@
                       emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
                       emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
 
-   boolean saturate = insn->Instruction.Saturate != TGSI_SAT_NONE;
+   boolean saturate = insn->Instruction.Saturate;
 
    /* If doing compare processing or tex swizzle or saturation, we need to put
     * the fetched color into a temporary so it can be used as a source later on.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -49,13 +49,13 @@
 };
 
 
-static INLINE struct trace_query *
+static inline struct trace_query *
 trace_query(struct pipe_query *query) {
    return (struct trace_query *)query;
 }
 
 
-static INLINE struct pipe_query *
+static inline struct pipe_query *
 trace_query_unwrap(struct pipe_query *query)
 {
    if (query) {
@@ -66,7 +66,7 @@
 }
 
 
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
 trace_resource_unwrap(struct trace_context *tr_ctx,
                      struct pipe_resource *resource)
 {
@@ -82,7 +82,7 @@
 }
 
 
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
 trace_surface_unwrap(struct trace_context *tr_ctx,
                      struct pipe_surface *surface)
 {
@@ -105,7 +105,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_draw_vbo(struct pipe_context *_pipe,
                        const struct pipe_draw_info *info)
 {
@@ -125,7 +125,7 @@
 }
 
 
-static INLINE struct pipe_query *
+static inline struct pipe_query *
 trace_context_create_query(struct pipe_context *_pipe,
                            unsigned query_type,
                            unsigned index)
@@ -163,7 +163,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_destroy_query(struct pipe_context *_pipe,
                             struct pipe_query *_query)
 {
@@ -185,7 +185,7 @@
 }
 
 
-static INLINE boolean
+static inline boolean
 trace_context_begin_query(struct pipe_context *_pipe,
                           struct pipe_query *query)
 {
@@ -207,7 +207,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_end_query(struct pipe_context *_pipe,
                         struct pipe_query *query)
 {
@@ -227,7 +227,7 @@
 }
 
 
-static INLINE boolean
+static inline boolean
 trace_context_get_query_result(struct pipe_context *_pipe,
                                struct pipe_query *_query,
                                boolean wait,
@@ -262,7 +262,7 @@
 }
 
 
-static INLINE void *
+static inline void *
 trace_context_create_blend_state(struct pipe_context *_pipe,
                                  const struct pipe_blend_state *state)
 {
@@ -285,7 +285,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_bind_blend_state(struct pipe_context *_pipe,
                                void *state)
 {
@@ -303,7 +303,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_delete_blend_state(struct pipe_context *_pipe,
                                  void *state)
 {
@@ -321,7 +321,7 @@
 }
 
 
-static INLINE void *
+static inline void *
 trace_context_create_sampler_state(struct pipe_context *_pipe,
                                    const struct pipe_sampler_state *state)
 {
@@ -344,7 +344,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_bind_sampler_states(struct pipe_context *_pipe,
                                   unsigned shader,
                                   unsigned start,
@@ -371,7 +371,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_delete_sampler_state(struct pipe_context *_pipe,
                                    void *state)
 {
@@ -389,7 +389,7 @@
 }
 
 
-static INLINE void *
+static inline void *
 trace_context_create_rasterizer_state(struct pipe_context *_pipe,
                                       const struct pipe_rasterizer_state *state)
 {
@@ -412,7 +412,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_bind_rasterizer_state(struct pipe_context *_pipe,
                                     void *state)
 {
@@ -430,7 +430,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_delete_rasterizer_state(struct pipe_context *_pipe,
                                       void *state)
 {
@@ -448,7 +448,7 @@
 }
 
 
-static INLINE void *
+static inline void *
 trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
                                                const struct pipe_depth_stencil_alpha_state *state)
 {
@@ -471,7 +471,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
                                              void *state)
 {
@@ -489,7 +489,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
                                                void *state)
 {
@@ -508,7 +508,7 @@
 
 
 #define TRACE_SHADER_STATE(shader_type) \
-   static INLINE void * \
+   static inline void * \
    trace_context_create_##shader_type##_state(struct pipe_context *_pipe, \
                                  const struct pipe_shader_state *state) \
    { \
@@ -524,7 +524,7 @@
       return result; \
    } \
     \
-   static INLINE void \
+   static inline void \
    trace_context_bind_##shader_type##_state(struct pipe_context *_pipe, \
                                void *state) \
    { \
@@ -537,7 +537,7 @@
       trace_dump_call_end(); \
    } \
     \
-   static INLINE void \
+   static inline void \
    trace_context_delete_##shader_type##_state(struct pipe_context *_pipe, \
                                  void *state) \
    { \
@@ -559,7 +559,7 @@
 #undef TRACE_SHADER_STATE
 
 
-static INLINE void *
+static inline void *
 trace_context_create_vertex_elements_state(struct pipe_context *_pipe,
                                            unsigned num_elements,
                                            const struct  pipe_vertex_element *elements)
@@ -587,7 +587,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_bind_vertex_elements_state(struct pipe_context *_pipe,
                                          void *state)
 {
@@ -605,7 +605,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_delete_vertex_elements_state(struct pipe_context *_pipe,
                                            void *state)
 {
@@ -623,7 +623,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_set_blend_color(struct pipe_context *_pipe,
                               const struct pipe_blend_color *state)
 {
@@ -641,7 +641,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_set_stencil_ref(struct pipe_context *_pipe,
                               const struct pipe_stencil_ref *state)
 {
@@ -659,7 +659,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_set_clip_state(struct pipe_context *_pipe,
                              const struct pipe_clip_state *state)
 {
@@ -676,7 +676,7 @@
    trace_dump_call_end();
 }
 
-static INLINE void
+static inline void
 trace_context_set_sample_mask(struct pipe_context *_pipe,
                               unsigned sample_mask)
 {
@@ -693,7 +693,7 @@
    trace_dump_call_end();
 }
 
-static INLINE void
+static inline void
 trace_context_set_constant_buffer(struct pipe_context *_pipe,
                                   uint shader, uint index,
                                   struct pipe_constant_buffer *constant_buffer)
@@ -721,7 +721,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_set_framebuffer_state(struct pipe_context *_pipe,
                                     const struct pipe_framebuffer_state *state)
 {
@@ -751,7 +751,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_set_polygon_stipple(struct pipe_context *_pipe,
                                   const struct pipe_poly_stipple *state)
 {
@@ -769,7 +769,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_set_scissor_states(struct pipe_context *_pipe,
                                  unsigned start_slot,
                                  unsigned num_scissors,
@@ -791,7 +791,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_set_viewport_states(struct pipe_context *_pipe,
                                   unsigned start_slot,
                                   unsigned num_viewports,
@@ -938,7 +938,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_set_sampler_views(struct pipe_context *_pipe,
                                 unsigned shader,
                                 unsigned start,
@@ -974,7 +974,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_set_vertex_buffers(struct pipe_context *_pipe,
                                  unsigned start_slot, unsigned num_buffers,
                                  const struct pipe_vertex_buffer *buffers)
@@ -1008,7 +1008,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_set_index_buffer(struct pipe_context *_pipe,
                                const struct pipe_index_buffer *ib)
 {
@@ -1033,7 +1033,7 @@
 }
 
 
-static INLINE struct pipe_stream_output_target *
+static inline struct pipe_stream_output_target *
 trace_context_create_stream_output_target(struct pipe_context *_pipe,
                                           struct pipe_resource *res,
                                           unsigned buffer_offset,
@@ -1063,7 +1063,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_stream_output_target_destroy(
    struct pipe_context *_pipe,
    struct pipe_stream_output_target *target)
@@ -1082,7 +1082,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_set_stream_output_targets(struct pipe_context *_pipe,
                                         unsigned num_targets,
                                         struct pipe_stream_output_target **tgs,
@@ -1104,7 +1104,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_resource_copy_region(struct pipe_context *_pipe,
                                    struct pipe_resource *dst,
                                    unsigned dst_level,
@@ -1139,7 +1139,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_blit(struct pipe_context *_pipe,
                    const struct pipe_blit_info *_info)
 {
@@ -1181,7 +1181,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_clear(struct pipe_context *_pipe,
                     unsigned buffers,
                     const union pipe_color_union *color,
@@ -1210,7 +1210,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_clear_render_target(struct pipe_context *_pipe,
                                   struct pipe_surface *dst,
                                   const union pipe_color_union *color,
@@ -1237,7 +1237,7 @@
    trace_dump_call_end();
 }
 
-static INLINE void
+static inline void
 trace_context_clear_depth_stencil(struct pipe_context *_pipe,
                                   struct pipe_surface *dst,
                                   unsigned clear_flags,
@@ -1269,7 +1269,7 @@
    trace_dump_call_end();
 }
 
-static INLINE void
+static inline void
 trace_context_flush(struct pipe_context *_pipe,
                     struct pipe_fence_handle **fence,
                     unsigned flags)
@@ -1291,7 +1291,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_context_destroy(struct pipe_context *_pipe)
 {
    struct trace_context *tr_ctx = trace_context(_pipe);
@@ -1511,8 +1511,8 @@
 
 
 static void trace_context_set_tess_state(struct pipe_context *_context,
-                                         float default_outer_level[4],
-                                         float default_inner_level[2])
+                                         const float default_outer_level[4],
+                                         const float default_inner_level[2])
 {
    struct trace_context *tr_context = trace_context(_context);
    struct pipe_context *context = tr_context->pipe;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_context.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -54,7 +54,7 @@
 trace_context_check(const struct pipe_context *pipe);
 
 
-static INLINE struct trace_context *
+static inline struct trace_context *
 trace_context(struct pipe_context *pipe)
 {
    assert(pipe);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_dump.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_dump.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_dump.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_dump.c	2015-09-16 14:36:09.000000000 +0000
@@ -64,7 +64,7 @@
 static boolean dumping = FALSE;
 
 
-static INLINE void
+static inline void
 trace_dump_write(const char *buf, size_t size)
 {
    if (stream) {
@@ -73,14 +73,14 @@
 }
 
 
-static INLINE void
+static inline void
 trace_dump_writes(const char *s)
 {
    trace_dump_write(s, strlen(s));
 }
 
 
-static INLINE void
+static inline void
 trace_dump_writef(const char *format, ...)
 {
    static char buf[1024];
@@ -93,7 +93,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_dump_escape(const char *str)
 {
    const unsigned char *p = (const unsigned char *)str;
@@ -117,7 +117,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_dump_indent(unsigned level)
 {
    unsigned i;
@@ -126,14 +126,14 @@
 }
 
 
-static INLINE void
+static inline void
 trace_dump_newline(void)
 {
    trace_dump_writes("\n");
 }
 
 
-static INLINE void
+static inline void
 trace_dump_tag(const char *name)
 {
    trace_dump_writes("<");
@@ -142,7 +142,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_dump_tag_begin(const char *name)
 {
    trace_dump_writes("<");
@@ -150,7 +150,7 @@
    trace_dump_writes(">");
 }
 
-static INLINE void
+static inline void
 trace_dump_tag_begin1(const char *name,
                       const char *attr1, const char *value1)
 {
@@ -164,7 +164,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_dump_tag_begin2(const char *name,
                       const char *attr1, const char *value1,
                       const char *attr2, const char *value2)
@@ -183,7 +183,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_dump_tag_begin3(const char *name,
                       const char *attr1, const char *value1,
                       const char *attr2, const char *value2,
@@ -207,7 +207,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_dump_tag_end(const char *name)
 {
    trace_dump_writes("</");
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_dump_defines.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_dump_defines.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_dump_defines.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_dump_defines.h	2015-09-16 14:36:09.000000000 +0000
@@ -34,7 +34,7 @@
 #include "tr_dump.h"
 
 
-static INLINE void
+static inline void
 trace_dump_format(enum pipe_format format)
 {
    if (!trace_dumping_enabled_locked())
@@ -44,7 +44,7 @@
 }
 
 
-static INLINE void
+static inline void
 trace_dump_query_type(unsigned value)
 {
    if (!trace_dumping_enabled_locked())
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_public.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_public.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_public.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_public.h	2015-09-16 14:36:09.000000000 +0000
@@ -28,6 +28,8 @@
 #ifndef TR_PUBLIC_H
 #define TR_PUBLIC_H
 
+#include "pipe/p_compiler.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -370,29 +370,6 @@
 
 
 static boolean
-trace_screen_fence_signalled(struct pipe_screen *_screen,
-                             struct pipe_fence_handle *fence)
-{
-   struct trace_screen *tr_scr = trace_screen(_screen);
-   struct pipe_screen *screen = tr_scr->screen;
-   int result;
-
-   trace_dump_call_begin("pipe_screen", "fence_signalled");
-
-   trace_dump_arg(ptr, screen);
-   trace_dump_arg(ptr, fence);
-
-   result = screen->fence_signalled(screen, fence);
-
-   trace_dump_ret(bool, result);
-
-   trace_dump_call_end();
-
-   return result;
-}
-
-
-static boolean
 trace_screen_fence_finish(struct pipe_screen *_screen,
                           struct pipe_fence_handle *fence,
                           uint64_t timeout)
@@ -503,7 +480,6 @@
    tr_scr->base.resource_get_handle = trace_screen_resource_get_handle;
    tr_scr->base.resource_destroy = trace_screen_resource_destroy;
    tr_scr->base.fence_reference = trace_screen_fence_reference;
-   tr_scr->base.fence_signalled = trace_screen_fence_signalled;
    tr_scr->base.fence_finish = trace_screen_fence_finish;
    tr_scr->base.flush_frontbuffer = trace_screen_flush_frontbuffer;
    tr_scr->base.get_timestamp = trace_screen_get_timestamp;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_texture.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_texture.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/trace/tr_texture.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/trace/tr_texture.h	2015-09-16 14:36:09.000000000 +0000
@@ -85,7 +85,7 @@
 };
 
 
-static INLINE struct trace_resource *
+static inline struct trace_resource *
 trace_resource(struct pipe_resource *texture)
 {
    if(!texture)
@@ -95,7 +95,7 @@
 }
 
 
-static INLINE struct trace_surface *
+static inline struct trace_surface *
 trace_surface(struct pipe_surface *surface)
 {
    if(!surface)
@@ -105,7 +105,7 @@
 }
 
 
-static INLINE struct trace_sampler_view *
+static inline struct trace_sampler_view *
 trace_sampler_view(struct pipe_sampler_view *sampler_view)
 {
    if (!sampler_view)
@@ -114,7 +114,7 @@
 }
 
 
-static INLINE struct trace_transfer *
+static inline struct trace_transfer *
 trace_transfer(struct pipe_transfer *transfer)
 {
    if(!transfer)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/Android.mk	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/Android.mk	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,37 @@
+# Copyright (C) 2014 Emil Velikov <emil.l.velikov@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := \
+	$(C_SOURCES)
+
+LOCAL_SHARED_LIBRARIES := libdrm
+# We need libmesa_glsl to get NIR's generated include directories.
+LOCAL_STATIC_LIBRARIES := libmesa_glsl
+LOCAL_MODULE := libmesa_pipe_vc4
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/Makefile.am	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/Makefile.am	1970-01-01 00:00:00.000000000 +0000
@@ -1,40 +0,0 @@
-# Copyright © 2014 Broadcom
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-include Makefile.sources
-include $(top_srcdir)/src/gallium/Automake.inc
-
-if USE_VC4_SIMULATOR
-SIM_CFLAGS = -DUSE_VC4_SIMULATOR=1
-endif
-
-AM_CFLAGS = \
-	$(LIBDRM_CFLAGS) \
-	$(GALLIUM_DRIVER_CFLAGS) \
-	$(SIM_CFLAGS) \
-	-I$(top_srcdir)/src/mesa/ \
-	-I$(srcdir)/../ \
-	$()
-
-noinst_LTLIBRARIES = libvc4_kernel.la
-
-libvc4_kernel_la_SOURCES = $(C_SOURCES)
-libvc4_kernel_la_LDFLAGS = $(SIM_LDFLAGS)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/Makefile.sources	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/Makefile.sources	1970-01-01 00:00:00.000000000 +0000
@@ -1,6 +0,0 @@
-C_SOURCES := \
-	vc4_drv.h \
-	vc4_gem.c \
-	vc4_validate.c \
-	vc4_validate_shaders.c \
-	$()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/vc4_drv.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/vc4_drv.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/vc4_drv.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/vc4_drv.h	2015-09-16 14:36:09.000000000 +0000
@@ -26,19 +26,6 @@
 
 #include "vc4_simulator_validate.h"
 
-enum vc4_bo_mode {
-	VC4_MODE_UNDECIDED,
-	VC4_MODE_TILE_ALLOC,
-	VC4_MODE_TSDA,
-	VC4_MODE_RENDER,
-	VC4_MODE_SHADER,
-};
-
-struct vc4_bo_exec_state {
-	struct drm_gem_cma_object *bo;
-	enum vc4_bo_mode mode;
-};
-
 struct vc4_exec_info {
 	/* Sequence number for this bin/render job. */
 	uint64_t seqno;
@@ -49,9 +36,14 @@
 	/* This is the array of BOs that were looked up at the start of exec.
 	 * Command validation will use indices into this array.
 	 */
-	struct vc4_bo_exec_state *bo;
+	struct drm_gem_cma_object **bo;
 	uint32_t bo_count;
 
+	/* List of other BOs used in the job that need to be released
+	 * once the job is complete.
+	 */
+	struct list_head unref_list;
+
 	/* Current unvalidated indices into @bo loaded by the non-hardware
 	 * VC4_PACKET_GEM_HANDLES.
 	 */
@@ -69,7 +61,6 @@
 	 * command lists.
 	 */
 	struct vc4_shader_state {
-		uint8_t packet;
 		uint32_t addr;
 		/* Maximum vertex index referenced by any primitive using this
 		 * shader state.
@@ -83,14 +74,12 @@
 	uint32_t shader_state_count;
 
 	bool found_tile_binning_mode_config_packet;
-	bool found_tile_rendering_mode_config_packet;
 	bool found_start_tile_binning_packet;
 	bool found_increment_semaphore_packet;
-	bool found_wait_on_semaphore_packet;
+	bool found_flush;
 	uint8_t bin_tiles_x, bin_tiles_y;
-	uint32_t fb_width, fb_height;
-	uint32_t tile_alloc_init_block_size;
-	struct drm_gem_cma_object *tile_alloc_bo;
+	struct drm_gem_cma_object *tile_bo;
+	uint32_t tile_alloc_offset;
 
 	/**
 	 * Computed addresses pointing into exec_bo where we start the
@@ -99,6 +88,9 @@
 	uint32_t ct0ca, ct0ea;
 	uint32_t ct1ca, ct1ea;
 
+	/* Pointer to the unvalidated bin CL (if present). */
+	void *bin_u;
+
 	/* Pointers to the shader recs.  These paddr gets incremented as CL
 	 * packets are relocated in validate_gl_shader_state, and the vaddrs
 	 * (u and v) get incremented and size decremented as the shader recs
@@ -157,13 +149,10 @@
 
 /* vc4_validate.c */
 int
-vc4_validate_cl(struct drm_device *dev,
-                void *validated,
-                void *unvalidated,
-                uint32_t len,
-                bool is_bin,
-                bool has_bin,
-                struct vc4_exec_info *exec);
+vc4_validate_bin_cl(struct drm_device *dev,
+		    void *validated,
+		    void *unvalidated,
+		    struct vc4_exec_info *exec);
 
 int
 vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
@@ -171,4 +160,14 @@
 struct vc4_validated_shader_info *
 vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
 
+struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec,
+				      uint32_t hindex);
+
+int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
+
+bool vc4_check_tex_size(struct vc4_exec_info *exec,
+			struct drm_gem_cma_object *fbo,
+			uint32_t offset, uint8_t tiling_format,
+			uint32_t width, uint32_t height, uint8_t cpp);
+
 #endif /* VC4_DRV_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/vc4_gem.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/vc4_gem.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/vc4_gem.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/vc4_gem.c	2015-09-16 14:36:09.000000000 +0000
@@ -25,24 +25,26 @@
 
 #include "vc4_drv.h"
 
-int
-vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec)
+/*
+ * Copies in the user's binning command list and generates the validated bin
+ * CL, along with associated data (shader records, uniforms).
+ */
+static int
+vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
 {
 	struct drm_vc4_submit_cl *args = exec->args;
 	void *temp = NULL;
-	void *bin, *render;
+	void *bin;
 	int ret = 0;
 	uint32_t bin_offset = 0;
-	uint32_t render_offset = bin_offset + args->bin_cl_size;
-	uint32_t shader_rec_offset = roundup(render_offset +
-					     args->render_cl_size, 16);
+	uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
+					     16);
 	uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
 	uint32_t exec_size = uniforms_offset + args->uniforms_size;
 	uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
 					  args->shader_rec_count);
 
-	if (shader_rec_offset < render_offset ||
-	    uniforms_offset < shader_rec_offset ||
+	if (uniforms_offset < shader_rec_offset ||
 	    exec_size < uniforms_offset ||
 	    args->shader_rec_count >= (UINT_MAX /
 					  sizeof(struct vc4_shader_state)) ||
@@ -66,7 +68,6 @@
 		goto fail;
 	}
 	bin = temp + bin_offset;
-	render = temp + render_offset;
 	exec->shader_rec_u = temp + shader_rec_offset;
 	exec->uniforms_u = temp + uniforms_offset;
 	exec->shader_state = temp + exec_size;
@@ -80,14 +81,6 @@
 		goto fail;
 	}
 
-	ret = copy_from_user(render,
-			     (void __user *)(uintptr_t)args->render_cl,
-			     args->render_cl_size);
-	if (ret) {
-		DRM_ERROR("Failed to copy in render cl\n");
-		goto fail;
-	}
-
 	ret = copy_from_user(exec->shader_rec_u,
 			     (void __user *)(uintptr_t)args->shader_rec,
 			     args->shader_rec_size);
@@ -114,8 +107,12 @@
 	}
 #endif
 
+	list_addtail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
+		     &exec->unref_list);
+
 	exec->ct0ca = exec->exec_bo->paddr + bin_offset;
-	exec->ct1ca = exec->exec_bo->paddr + render_offset;
+
+	exec->bin_u = bin;
 
 	exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
 	exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
@@ -125,23 +122,10 @@
 	exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
 	exec->uniforms_size = args->uniforms_size;
 
-	ret = vc4_validate_cl(dev,
-			      exec->exec_bo->vaddr + bin_offset,
-			      bin,
-			      args->bin_cl_size,
-			      true,
-			      args->bin_cl_size != 0,
-			      exec);
-	if (ret)
-		goto fail;
-
-	ret = vc4_validate_cl(dev,
-			      exec->exec_bo->vaddr + render_offset,
-			      render,
-			      args->render_cl_size,
-			      false,
-			      args->bin_cl_size != 0,
-			      exec);
+	ret = vc4_validate_bin_cl(dev,
+				  exec->exec_bo->vaddr + bin_offset,
+				  bin,
+				  exec);
 	if (ret)
 		goto fail;
 
@@ -152,4 +136,25 @@
 	return ret;
 }
 
+int
+vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	int ret = 0;
+
+	if (exec->args->bin_cl_size != 0) {
+		ret = vc4_get_bcl(dev, exec);
+		if (ret)
+			goto fail;
+	} else {
+		exec->ct0ca = exec->ct0ea = 0;
+	}
+
+	ret = vc4_get_rcl(dev, exec);
+	if (ret)
+		goto fail;
+
+fail:
+	return ret;
+}
+
 #endif /* USE_VC4_SIMULATOR */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/vc4_packet.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/vc4_packet.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/vc4_packet.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/vc4_packet.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,393 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC4_PACKET_H
+#define VC4_PACKET_H
+
+enum vc4_packet {
+        VC4_PACKET_HALT = 0,
+        VC4_PACKET_NOP = 1,
+
+        VC4_PACKET_FLUSH = 4,
+        VC4_PACKET_FLUSH_ALL = 5,
+        VC4_PACKET_START_TILE_BINNING = 6,
+        VC4_PACKET_INCREMENT_SEMAPHORE = 7,
+        VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
+
+        VC4_PACKET_BRANCH = 16,
+        VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
+
+        VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
+        VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
+        VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
+        VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
+        VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
+        VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
+
+        VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
+        VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
+
+        VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
+        VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
+
+        VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
+
+        VC4_PACKET_GL_SHADER_STATE = 64,
+        VC4_PACKET_NV_SHADER_STATE = 65,
+        VC4_PACKET_VG_SHADER_STATE = 66,
+
+        VC4_PACKET_CONFIGURATION_BITS = 96,
+        VC4_PACKET_FLAT_SHADE_FLAGS = 97,
+        VC4_PACKET_POINT_SIZE = 98,
+        VC4_PACKET_LINE_WIDTH = 99,
+        VC4_PACKET_RHT_X_BOUNDARY = 100,
+        VC4_PACKET_DEPTH_OFFSET = 101,
+        VC4_PACKET_CLIP_WINDOW = 102,
+        VC4_PACKET_VIEWPORT_OFFSET = 103,
+        VC4_PACKET_Z_CLIPPING = 104,
+        VC4_PACKET_CLIPPER_XY_SCALING = 105,
+        VC4_PACKET_CLIPPER_Z_SCALING = 106,
+
+        VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
+        VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
+        VC4_PACKET_CLEAR_COLORS = 114,
+        VC4_PACKET_TILE_COORDINATES = 115,
+
+        /* Not an actual hardware packet -- this is what we use to put
+         * references to GEM bos in the command stream, since we need the u32
+         * int the actual address packet in order to store the offset from the
+         * start of the BO.
+         */
+        VC4_PACKET_GEM_HANDLES = 254,
+} __attribute__ ((__packed__));
+
+#define VC4_PACKET_HALT_SIZE						1
+#define VC4_PACKET_NOP_SIZE						1
+#define VC4_PACKET_FLUSH_SIZE						1
+#define VC4_PACKET_FLUSH_ALL_SIZE					1
+#define VC4_PACKET_START_TILE_BINNING_SIZE				1
+#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE				1
+#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE				1
+#define VC4_PACKET_BRANCH_SIZE						5
+#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE				5
+#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE				1
+#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE			1
+#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE			5
+#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE			5
+#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE			7
+#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE			7
+#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE				14
+#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE				10
+#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE				1
+#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE			1
+#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE				2
+#define VC4_PACKET_GL_SHADER_STATE_SIZE					5
+#define VC4_PACKET_NV_SHADER_STATE_SIZE					5
+#define VC4_PACKET_VG_SHADER_STATE_SIZE					5
+#define VC4_PACKET_CONFIGURATION_BITS_SIZE				4
+#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE				5
+#define VC4_PACKET_POINT_SIZE_SIZE					5
+#define VC4_PACKET_LINE_WIDTH_SIZE					5
+#define VC4_PACKET_RHT_X_BOUNDARY_SIZE					3
+#define VC4_PACKET_DEPTH_OFFSET_SIZE					5
+#define VC4_PACKET_CLIP_WINDOW_SIZE					9
+#define VC4_PACKET_VIEWPORT_OFFSET_SIZE					5
+#define VC4_PACKET_Z_CLIPPING_SIZE					9
+#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE				9
+#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE				9
+#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE			16
+#define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE			11
+#define VC4_PACKET_CLEAR_COLORS_SIZE					14
+#define VC4_PACKET_TILE_COORDINATES_SIZE				3
+#define VC4_PACKET_GEM_HANDLES_SIZE					9
+
+#define VC4_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low))
+/* Using the GNU statement expression extension */
+#define VC4_SET_FIELD(value, field)                                       \
+        ({                                                                \
+                uint32_t fieldval = (value) << field ## _SHIFT;		  \
+                assert((fieldval & ~ field ## _MASK) == 0);               \
+                fieldval & field ## _MASK;                                \
+         })
+
+#define VC4_GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## _SHIFT)
+
+/** @{
+ * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_TILE_RENDERING_MODE_CONFIG.
+*/
+#define VC4_TILING_FORMAT_LINEAR    0
+#define VC4_TILING_FORMAT_T         1
+#define VC4_TILING_FORMAT_LT        2
+/** @} */
+
+/** @{
+ *
+ * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
+ * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
+ */
+#define VC4_LOADSTORE_FULL_RES_EOF                     (1 << 3)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL       (1 << 2)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS              (1 << 1)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR           (1 << 0)
+
+/** @{
+ *
+ * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
+ */
+
+#define VC4_LOADSTORE_TILE_BUFFER_EOF                  (1 << 3)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK (1 << 2)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS      (1 << 1)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR   (1 << 0)
+
+/** @} */
+
+/** @{
+ *
+ * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
+ */
+#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 15)
+#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR     (1 << 14)
+#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR  (1 << 13)
+#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP         (1 << 12)
+
+#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK      VC4_MASK(9, 8)
+#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT     8
+#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888         0
+#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER    1
+#define VC4_LOADSTORE_TILE_BUFFER_BGR565           2
+/** @} */
+
+/** @{
+ *
+ * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
+ */
+#define VC4_STORE_TILE_BUFFER_MODE_MASK            VC4_MASK(7, 6)
+#define VC4_STORE_TILE_BUFFER_MODE_SHIFT           6
+#define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0         (0 << 6)
+#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4     (1 << 6)
+#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16    (2 << 6)
+
+/** The values of the field are VC4_TILING_FORMAT_* */
+#define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK      VC4_MASK(5, 4)
+#define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT     4
+
+#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK      VC4_MASK(2, 0)
+#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT     0
+#define VC4_LOADSTORE_TILE_BUFFER_NONE             0
+#define VC4_LOADSTORE_TILE_BUFFER_COLOR            1
+#define VC4_LOADSTORE_TILE_BUFFER_ZS               2
+#define VC4_LOADSTORE_TILE_BUFFER_Z                3
+#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK          4
+#define VC4_LOADSTORE_TILE_BUFFER_FULL             5
+/** @} */
+
+#define VC4_INDEX_BUFFER_U8                        (0 << 4)
+#define VC4_INDEX_BUFFER_U16                       (1 << 4)
+
+/* This flag is only present in NV shader state. */
+#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS         (1 << 3)
+#define VC4_SHADER_FLAG_ENABLE_CLIPPING            (1 << 2)
+#define VC4_SHADER_FLAG_VS_POINT_SIZE              (1 << 1)
+#define VC4_SHADER_FLAG_FS_SINGLE_THREAD           (1 << 0)
+
+/** @{ byte 2 of config bits. */
+#define VC4_CONFIG_BITS_EARLY_Z_UPDATE             (1 << 1)
+#define VC4_CONFIG_BITS_EARLY_Z                    (1 << 0)
+/** @} */
+
+/** @{ byte 1 of config bits. */
+#define VC4_CONFIG_BITS_Z_UPDATE                   (1 << 7)
+/** same values in this 3-bit field as PIPE_FUNC_* */
+#define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT           4
+#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE        (1 << 3)
+
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO    (0 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD        (1 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR         (2 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO       (3 << 1)
+
+#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT       (1 << 0)
+/** @} */
+
+/** @{ byte 0 of config bits. */
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6)
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X   (1 << 6)
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X  (2 << 6)
+
+#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES        (1 << 4)
+#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET        (1 << 3)
+#define VC4_CONFIG_BITS_CW_PRIMITIVES              (1 << 2)
+#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK           (1 << 1)
+#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT          (1 << 0)
+/** @} */
+
+/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
+#define VC4_BIN_CONFIG_DB_NON_MS                   (1 << 7)
+
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK       VC4_MASK(6, 5)
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT      5
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32         0
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64         1
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128        2
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256        3
+
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK  VC4_MASK(4, 3)
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32    0
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64    1
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128   2
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256   3
+
+#define VC4_BIN_CONFIG_AUTO_INIT_TSDA              (1 << 2)
+#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT           (1 << 1)
+#define VC4_BIN_CONFIG_MS_MODE_4X                  (1 << 0)
+/** @} */
+
+/** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */
+#define VC4_RENDER_CONFIG_DB_NON_MS                (1 << 12)
+#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE (1 << 11)
+#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G      (1 << 10)
+#define VC4_RENDER_CONFIG_COVERAGE_MODE            (1 << 9)
+#define VC4_RENDER_CONFIG_ENABLE_VG_MASK           (1 << 8)
+
+/** The values of the field are VC4_TILING_FORMAT_* */
+#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK       VC4_MASK(7, 6)
+#define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT      6
+
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_1X         (0 << 4)
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_4X         (1 << 4)
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_16X        (2 << 4)
+
+#define VC4_RENDER_CONFIG_FORMAT_MASK              VC4_MASK(3, 2)
+#define VC4_RENDER_CONFIG_FORMAT_SHIFT             2
+#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED   0
+#define VC4_RENDER_CONFIG_FORMAT_RGBA8888          1
+#define VC4_RENDER_CONFIG_FORMAT_BGR565            2
+
+#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT        (1 << 1)
+#define VC4_RENDER_CONFIG_MS_MODE_4X               (1 << 0)
+
+#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX         (1 << 4)
+#define VC4_PRIMITIVE_LIST_FORMAT_32_XY            (3 << 4)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS      (0 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES       (1 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES   (2 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT         (3 << 0)
+
+enum vc4_texture_data_type {
+        VC4_TEXTURE_TYPE_RGBA8888 = 0,
+        VC4_TEXTURE_TYPE_RGBX8888 = 1,
+        VC4_TEXTURE_TYPE_RGBA4444 = 2,
+        VC4_TEXTURE_TYPE_RGBA5551 = 3,
+        VC4_TEXTURE_TYPE_RGB565 = 4,
+        VC4_TEXTURE_TYPE_LUMINANCE = 5,
+        VC4_TEXTURE_TYPE_ALPHA = 6,
+        VC4_TEXTURE_TYPE_LUMALPHA = 7,
+        VC4_TEXTURE_TYPE_ETC1 = 8,
+        VC4_TEXTURE_TYPE_S16F = 9,
+        VC4_TEXTURE_TYPE_S8 = 10,
+        VC4_TEXTURE_TYPE_S16 = 11,
+        VC4_TEXTURE_TYPE_BW1 = 12,
+        VC4_TEXTURE_TYPE_A4 = 13,
+        VC4_TEXTURE_TYPE_A1 = 14,
+        VC4_TEXTURE_TYPE_RGBA64 = 15,
+        VC4_TEXTURE_TYPE_RGBA32R = 16,
+        VC4_TEXTURE_TYPE_YUV422R = 17,
+};
+
+#define VC4_TEX_P0_OFFSET_MASK                     VC4_MASK(31, 12)
+#define VC4_TEX_P0_OFFSET_SHIFT                    12
+#define VC4_TEX_P0_CSWIZ_MASK                      VC4_MASK(11, 10)
+#define VC4_TEX_P0_CSWIZ_SHIFT                     10
+#define VC4_TEX_P0_CMMODE_MASK                     VC4_MASK(9, 9)
+#define VC4_TEX_P0_CMMODE_SHIFT                    9
+#define VC4_TEX_P0_FLIPY_MASK                      VC4_MASK(8, 8)
+#define VC4_TEX_P0_FLIPY_SHIFT                     8
+#define VC4_TEX_P0_TYPE_MASK                       VC4_MASK(7, 4)
+#define VC4_TEX_P0_TYPE_SHIFT                      4
+#define VC4_TEX_P0_MIPLVLS_MASK                    VC4_MASK(3, 0)
+#define VC4_TEX_P0_MIPLVLS_SHIFT                   0
+
+#define VC4_TEX_P1_TYPE4_MASK                      VC4_MASK(31, 31)
+#define VC4_TEX_P1_TYPE4_SHIFT                     31
+#define VC4_TEX_P1_HEIGHT_MASK                     VC4_MASK(30, 20)
+#define VC4_TEX_P1_HEIGHT_SHIFT                    20
+#define VC4_TEX_P1_ETCFLIP_MASK                    VC4_MASK(19, 19)
+#define VC4_TEX_P1_ETCFLIP_SHIFT                   19
+#define VC4_TEX_P1_WIDTH_MASK                      VC4_MASK(18, 8)
+#define VC4_TEX_P1_WIDTH_SHIFT                     8
+
+#define VC4_TEX_P1_MAGFILT_MASK                    VC4_MASK(7, 7)
+#define VC4_TEX_P1_MAGFILT_SHIFT                   7
+# define VC4_TEX_P1_MAGFILT_LINEAR                 0
+# define VC4_TEX_P1_MAGFILT_NEAREST                1
+
+#define VC4_TEX_P1_MINFILT_MASK                    VC4_MASK(6, 4)
+#define VC4_TEX_P1_MINFILT_SHIFT                   4
+# define VC4_TEX_P1_MINFILT_LINEAR                 0
+# define VC4_TEX_P1_MINFILT_NEAREST                1
+# define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR          2
+# define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN           3
+# define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR           4
+# define VC4_TEX_P1_MINFILT_LIN_MIP_LIN            5
+
+#define VC4_TEX_P1_WRAP_T_MASK                     VC4_MASK(3, 2)
+#define VC4_TEX_P1_WRAP_T_SHIFT                    2
+#define VC4_TEX_P1_WRAP_S_MASK                     VC4_MASK(1, 0)
+#define VC4_TEX_P1_WRAP_S_SHIFT                    0
+# define VC4_TEX_P1_WRAP_REPEAT                    0
+# define VC4_TEX_P1_WRAP_CLAMP                     1
+# define VC4_TEX_P1_WRAP_MIRROR                    2
+# define VC4_TEX_P1_WRAP_BORDER                    3
+
+#define VC4_TEX_P2_PTYPE_MASK                      VC4_MASK(31, 30)
+#define VC4_TEX_P2_PTYPE_SHIFT                     30
+# define VC4_TEX_P2_PTYPE_IGNORED                  0
+# define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE          1
+# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS   2
+# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS      3
+
+/* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */
+#define VC4_TEX_P2_CMST_MASK                       VC4_MASK(29, 12)
+#define VC4_TEX_P2_CMST_SHIFT                      12
+#define VC4_TEX_P2_BSLOD_MASK                      VC4_MASK(0, 0)
+#define VC4_TEX_P2_BSLOD_SHIFT                     0
+
+/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */
+#define VC4_TEX_P2_CHEIGHT_MASK                    VC4_MASK(22, 12)
+#define VC4_TEX_P2_CHEIGHT_SHIFT                   12
+#define VC4_TEX_P2_CWIDTH_MASK                     VC4_MASK(10, 0)
+#define VC4_TEX_P2_CWIDTH_SHIFT                    0
+
+/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */
+#define VC4_TEX_P2_CYOFF_MASK                      VC4_MASK(22, 12)
+#define VC4_TEX_P2_CYOFF_SHIFT                     12
+#define VC4_TEX_P2_CXOFF_MASK                      VC4_MASK(10, 0)
+#define VC4_TEX_P2_CXOFF_SHIFT                     0
+
+#endif /* VC4_PACKET_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/vc4_render_cl.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/vc4_render_cl.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/vc4_render_cl.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,449 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Render command list generation
+ *
+ * In the VC4 driver, render command list generation is performed by the
+ * kernel instead of userspace.  We do this because validating a
+ * user-submitted command list is hard to get right and has high CPU overhead,
+ * while the number of valid configurations for render command lists is
+ * actually fairly low.
+ */
+
+#include "vc4_drv.h"
+#include "vc4_packet.h"
+
+struct vc4_rcl_setup {
+	struct drm_gem_cma_object *color_read;
+	struct drm_gem_cma_object *color_ms_write;
+	struct drm_gem_cma_object *zs_read;
+	struct drm_gem_cma_object *zs_write;
+
+	struct drm_gem_cma_object *rcl;
+	u32 next_offset;
+};
+
+static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val)
+{
+	*(u8 *)(setup->rcl->vaddr + setup->next_offset) = val;
+	setup->next_offset += 1;
+}
+
+static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val)
+{
+	*(u16 *)(setup->rcl->vaddr + setup->next_offset) = val;
+	setup->next_offset += 2;
+}
+
+static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val)
+{
+	*(u32 *)(setup->rcl->vaddr + setup->next_offset) = val;
+	setup->next_offset += 4;
+}
+
+
+/*
+ * Emits a no-op STORE_TILE_BUFFER_GENERAL.
+ *
+ * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
+ * some sort before another load is triggered.
+ */
+static void vc4_store_before_load(struct vc4_rcl_setup *setup)
+{
+	rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+	rcl_u16(setup,
+		VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE,
+			      VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
+		VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
+		VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
+		VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR);
+	rcl_u32(setup, 0); /* no address, since we're in None mode */
+}
+
+/*
+ * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
+ *
+ * The tile coordinates packet triggers a pending load if there is one, are
+ * used for clipping during rendering, and determine where loads/stores happen
+ * relative to their base address.
+ */
+static void vc4_tile_coordinates(struct vc4_rcl_setup *setup,
+				 uint32_t x, uint32_t y)
+{
+	rcl_u8(setup, VC4_PACKET_TILE_COORDINATES);
+	rcl_u8(setup, x);
+	rcl_u8(setup, y);
+}
+
+static void emit_tile(struct vc4_exec_info *exec,
+		      struct vc4_rcl_setup *setup,
+		      uint8_t x, uint8_t y, bool first, bool last)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	bool has_bin = args->bin_cl_size != 0;
+
+	/* Note that the load doesn't actually occur until the
+	 * tile coords packet is processed, and only one load
+	 * may be outstanding at a time.
+	 */
+	if (setup->color_read) {
+		rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+		rcl_u16(setup, args->color_read.bits);
+		rcl_u32(setup,
+			setup->color_read->paddr + args->color_read.offset);
+	}
+
+	if (setup->zs_read) {
+		if (setup->color_read) {
+			/* Exec previous load. */
+			vc4_tile_coordinates(setup, x, y);
+			vc4_store_before_load(setup);
+		}
+
+		rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+		rcl_u16(setup, args->zs_read.bits);
+		rcl_u32(setup, setup->zs_read->paddr + args->zs_read.offset);
+	}
+
+	/* Clipping depends on tile coordinates having been
+	 * emitted, so we always need one here.
+	 */
+	vc4_tile_coordinates(setup, x, y);
+
+	/* Wait for the binner before jumping to the first
+	 * tile's lists.
+	 */
+	if (first && has_bin)
+		rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE);
+
+	if (has_bin) {
+		rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST);
+		rcl_u32(setup, (exec->tile_bo->paddr +
+				exec->tile_alloc_offset +
+				(y * exec->bin_tiles_x + x) * 32));
+	}
+
+	if (setup->zs_write) {
+		rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+		rcl_u16(setup, args->zs_write.bits |
+			(setup->color_ms_write ?
+			 VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR : 0));
+		rcl_u32(setup,
+			(setup->zs_write->paddr + args->zs_write.offset) |
+			((last && !setup->color_ms_write) ?
+			 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
+	}
+
+	if (setup->color_ms_write) {
+		if (setup->zs_write) {
+			/* Reset after previous store */
+			vc4_tile_coordinates(setup, x, y);
+		}
+
+		if (last)
+			rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
+		else
+			rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER);
+	}
+}
+
+static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
+			     struct vc4_rcl_setup *setup)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	bool has_bin = args->bin_cl_size != 0;
+	uint8_t min_x_tile = args->min_x_tile;
+	uint8_t min_y_tile = args->min_y_tile;
+	uint8_t max_x_tile = args->max_x_tile;
+	uint8_t max_y_tile = args->max_y_tile;
+	uint8_t xtiles = max_x_tile - min_x_tile + 1;
+	uint8_t ytiles = max_y_tile - min_y_tile + 1;
+	uint8_t x, y;
+	uint32_t size, loop_body_size;
+
+	size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
+	loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
+
+	if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+		size += VC4_PACKET_CLEAR_COLORS_SIZE +
+			VC4_PACKET_TILE_COORDINATES_SIZE +
+			VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+	}
+
+	if (setup->color_read) {
+		loop_body_size += (VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE);
+	}
+	if (setup->zs_read) {
+		if (setup->color_read) {
+			loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
+			loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+		}
+		loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+	}
+
+	if (has_bin) {
+		size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE;
+		loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE;
+	}
+
+	if (setup->zs_write)
+		loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+	if (setup->color_ms_write) {
+		if (setup->zs_write)
+			loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
+		loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE;
+	}
+	size += xtiles * ytiles * loop_body_size;
+
+	setup->rcl = drm_gem_cma_create(dev, size);
+	if (!setup->rcl)
+		return -ENOMEM;
+	list_addtail(&to_vc4_bo(&setup->rcl->base)->unref_head,
+		     &exec->unref_list);
+
+	rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
+	rcl_u32(setup,
+		(setup->color_ms_write ?
+		 (setup->color_ms_write->paddr +
+		  args->color_ms_write.offset) :
+		 0));
+	rcl_u16(setup, args->width);
+	rcl_u16(setup, args->height);
+	rcl_u16(setup, args->color_ms_write.bits);
+
+	/* The tile buffer gets cleared when the previous tile is stored.  If
+	 * the clear values changed between frames, then the tile buffer has
+	 * stale clear values in it, so we have to do a store in None mode (no
+	 * writes) so that we trigger the tile buffer clear.
+	 */
+	if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+		rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
+		rcl_u32(setup, args->clear_color[0]);
+		rcl_u32(setup, args->clear_color[1]);
+		rcl_u32(setup, args->clear_z);
+		rcl_u8(setup, args->clear_s);
+
+		vc4_tile_coordinates(setup, 0, 0);
+
+		rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+		rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE);
+		rcl_u32(setup, 0); /* no address, since we're in None mode */
+	}
+
+	for (y = min_y_tile; y <= max_y_tile; y++) {
+		for (x = min_x_tile; x <= max_x_tile; x++) {
+			bool first = (x == min_x_tile && y == min_y_tile);
+			bool last = (x == max_x_tile && y == max_y_tile);
+			emit_tile(exec, setup, x, y, first, last);
+		}
+	}
+
+	BUG_ON(setup->next_offset != size);
+	exec->ct1ca = setup->rcl->paddr;
+	exec->ct1ea = setup->rcl->paddr + setup->next_offset;
+
+	return 0;
+}
+
+static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
+				 struct drm_gem_cma_object **obj,
+				 struct drm_vc4_submit_rcl_surface *surf)
+{
+	uint8_t tiling = VC4_GET_FIELD(surf->bits,
+				       VC4_LOADSTORE_TILE_BUFFER_TILING);
+	uint8_t buffer = VC4_GET_FIELD(surf->bits,
+				       VC4_LOADSTORE_TILE_BUFFER_BUFFER);
+	uint8_t format = VC4_GET_FIELD(surf->bits,
+				       VC4_LOADSTORE_TILE_BUFFER_FORMAT);
+	int cpp;
+
+	if (surf->pad != 0) {
+		DRM_ERROR("Padding unset\n");
+		return -EINVAL;
+	}
+
+	if (surf->hindex == ~0)
+		return 0;
+
+	*obj = vc4_use_bo(exec, surf->hindex);
+	if (!*obj)
+		return -EINVAL;
+
+	if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
+			   VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK |
+			   VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) {
+		DRM_ERROR("Unknown bits in load/store: 0x%04x\n",
+			  surf->bits);
+		return -EINVAL;
+	}
+
+	if (tiling > VC4_TILING_FORMAT_LT) {
+		DRM_ERROR("Bad tiling format\n");
+		return -EINVAL;
+	}
+
+	if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) {
+		if (format != 0) {
+			DRM_ERROR("No color format should be set for ZS\n");
+			return -EINVAL;
+		}
+		cpp = 4;
+	} else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) {
+		switch (format) {
+		case VC4_LOADSTORE_TILE_BUFFER_BGR565:
+		case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER:
+			cpp = 2;
+			break;
+		case VC4_LOADSTORE_TILE_BUFFER_RGBA8888:
+			cpp = 4;
+			break;
+		default:
+			DRM_ERROR("Bad tile buffer format\n");
+			return -EINVAL;
+		}
+	} else {
+		DRM_ERROR("Bad load/store buffer %d.\n", buffer);
+		return -EINVAL;
+	}
+
+	if (surf->offset & 0xf) {
+		DRM_ERROR("load/store buffer must be 16b aligned.\n");
+		return -EINVAL;
+	}
+
+	if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
+				exec->args->width, exec->args->height, cpp)) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+vc4_rcl_ms_surface_setup(struct vc4_exec_info *exec,
+			 struct drm_gem_cma_object **obj,
+			 struct drm_vc4_submit_rcl_surface *surf)
+{
+	uint8_t tiling = VC4_GET_FIELD(surf->bits,
+				       VC4_RENDER_CONFIG_MEMORY_FORMAT);
+	uint8_t format = VC4_GET_FIELD(surf->bits,
+				       VC4_RENDER_CONFIG_FORMAT);
+	int cpp;
+
+	if (surf->pad != 0) {
+		DRM_ERROR("Padding unset\n");
+		return -EINVAL;
+	}
+
+	if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK |
+			   VC4_RENDER_CONFIG_FORMAT_MASK)) {
+		DRM_ERROR("Unknown bits in render config: 0x%04x\n",
+			  surf->bits);
+		return -EINVAL;
+	}
+
+	if (surf->hindex == ~0)
+		return 0;
+
+	*obj = vc4_use_bo(exec, surf->hindex);
+	if (!*obj)
+		return -EINVAL;
+
+	if (tiling > VC4_TILING_FORMAT_LT) {
+		DRM_ERROR("Bad tiling format\n");
+		return -EINVAL;
+	}
+
+	switch (format) {
+	case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED:
+	case VC4_RENDER_CONFIG_FORMAT_BGR565:
+		cpp = 2;
+		break;
+	case VC4_RENDER_CONFIG_FORMAT_RGBA8888:
+		cpp = 4;
+		break;
+	default:
+		DRM_ERROR("Bad tile buffer format\n");
+		return -EINVAL;
+	}
+
+	if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
+				exec->args->width, exec->args->height, cpp)) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	struct vc4_rcl_setup setup = {0};
+	struct drm_vc4_submit_cl *args = exec->args;
+	bool has_bin = args->bin_cl_size != 0;
+	int ret;
+
+	if (args->min_x_tile > args->max_x_tile ||
+	    args->min_y_tile > args->max_y_tile) {
+		DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n",
+			  args->min_x_tile, args->min_y_tile,
+			  args->max_x_tile, args->max_y_tile);
+		return -EINVAL;
+	}
+
+	if (has_bin &&
+	    (args->max_x_tile > exec->bin_tiles_x ||
+	     args->max_y_tile > exec->bin_tiles_y)) {
+		DRM_ERROR("Render tiles (%d,%d) outside of bin config (%d,%d)\n",
+			  args->max_x_tile, args->max_y_tile,
+			  exec->bin_tiles_x, exec->bin_tiles_y);
+		return -EINVAL;
+	}
+
+	ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_ms_surface_setup(exec, &setup.color_ms_write,
+				       &args->color_ms_write);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write);
+	if (ret)
+		return ret;
+
+	/* We shouldn't even have the job submitted to us if there's no
+	 * surface to write out.
+	 */
+	if (!setup.color_ms_write && !setup.zs_write) {
+		DRM_ERROR("RCL requires color or Z/S write\n");
+		return -EINVAL;
+	}
+
+	return vc4_create_rcl_bo(dev, exec, &setup);
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/vc4_validate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/vc4_validate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/vc4_validate.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/vc4_validate.c	2015-09-16 14:36:09.000000000 +0000
@@ -94,42 +94,42 @@
 		height <= 4 * utile_height(cpp));
 }
 
-static bool
-vc4_use_bo(struct vc4_exec_info *exec,
-	   uint32_t hindex,
-	   enum vc4_bo_mode mode,
-	   struct drm_gem_cma_object **obj)
+struct drm_gem_cma_object *
+vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
 {
-	*obj = NULL;
+	struct drm_gem_cma_object *obj;
+	struct drm_vc4_bo *bo;
 
 	if (hindex >= exec->bo_count) {
 		DRM_ERROR("BO index %d greater than BO count %d\n",
 			  hindex, exec->bo_count);
-		return false;
+		return NULL;
 	}
+	obj = exec->bo[hindex];
+	bo = to_vc4_bo(&obj->base);
 
-	if (exec->bo[hindex].mode != mode) {
-		if (exec->bo[hindex].mode == VC4_MODE_UNDECIDED) {
-			exec->bo[hindex].mode = mode;
-		} else {
-			DRM_ERROR("BO index %d reused with mode %d vs %d\n",
-				  hindex, exec->bo[hindex].mode, mode);
-			return false;
-		}
+	if (bo->validated_shader) {
+		DRM_ERROR("Trying to use shader BO as something other than "
+			  "a shader\n");
+		return NULL;
 	}
 
-	*obj = exec->bo[hindex].bo;
-	return true;
+	return obj;
+}
+
+static struct drm_gem_cma_object *
+vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
+{
+	return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
 }
 
 static bool
-vc4_use_handle(struct vc4_exec_info *exec,
-	       uint32_t gem_handles_packet_index,
-	       enum vc4_bo_mode mode,
-	       struct drm_gem_cma_object **obj)
+validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
 {
-	return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index],
-			  mode, obj);
+	/* Note that the untrusted pointer passed to these functions is
+	 * incremented past the packet byte.
+	 */
+	return (untrusted - 1 == exec->bin_u + pos);
 }
 
 static uint32_t
@@ -147,33 +147,39 @@
 		return 36 + attribute_count * 8;
 }
 
-static bool
-check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
-	       uint32_t offset, uint8_t tiling_format,
-	       uint32_t width, uint32_t height, uint8_t cpp)
+bool
+vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
+		   uint32_t offset, uint8_t tiling_format,
+		   uint32_t width, uint32_t height, uint8_t cpp)
 {
 	uint32_t aligned_width, aligned_height, stride, size;
 	uint32_t utile_w = utile_width(cpp);
 	uint32_t utile_h = utile_height(cpp);
 
-	/* The values are limited by the packet/texture parameter bitfields,
-	 * so we don't need to worry as much about integer overflow.
+	/* The shaded vertex format stores signed 12.4 fixed point
+	 * (-2048,2047) offsets from the viewport center, so we should
+	 * never have a render target larger than 4096.  The texture
+	 * unit can only sample from 2048x2048, so it's even more
+	 * restricted.  This lets us avoid worrying about overflow in
+	 * our math.
 	 */
-	BUG_ON(width > 65535);
-	BUG_ON(height > 65535);
+	if (width > 4096 || height > 4096) {
+		DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
+		return false;
+	}
 
 	switch (tiling_format) {
 	case VC4_TILING_FORMAT_LINEAR:
-		aligned_width = roundup(width, utile_w);
+		aligned_width = round_up(width, utile_w);
 		aligned_height = height;
 		break;
 	case VC4_TILING_FORMAT_T:
-		aligned_width = roundup(width, utile_w * 8);
-		aligned_height = roundup(height, utile_h * 8);
+		aligned_width = round_up(width, utile_w * 8);
+		aligned_height = round_up(height, utile_h * 8);
 		break;
 	case VC4_TILING_FORMAT_LT:
-		aligned_width = roundup(width, utile_w);
-		aligned_height = roundup(height, utile_h);
+		aligned_width = round_up(width, utile_w);
+		aligned_height = round_up(height, utile_h);
 		break;
 	default:
 		DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
@@ -181,13 +187,6 @@
 	}
 
 	stride = aligned_width * cpp;
-
-	if (INT_MAX / stride < aligned_height) {
-		DRM_ERROR("Overflow in fbo size (%dx%d -> %dx%d)\n",
-			  width, height,
-			  aligned_width, aligned_height);
-		return false;
-	}
 	size = stride * aligned_height;
 
 	if (size + offset < size ||
@@ -202,14 +201,15 @@
 	return true;
 }
 
+
 static int
-validate_flush_all(VALIDATE_ARGS)
+validate_flush(VALIDATE_ARGS)
 {
-	if (exec->found_increment_semaphore_packet) {
-		DRM_ERROR("VC4_PACKET_FLUSH_ALL after "
-			  "VC4_PACKET_INCREMENT_SEMAPHORE\n");
+	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
+		DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
 		return -EINVAL;
 	}
+	exec->found_flush = true;
 
 	return 0;
 }
@@ -234,132 +234,12 @@
 static int
 validate_increment_semaphore(VALIDATE_ARGS)
 {
-	if (exec->found_increment_semaphore_packet) {
-		DRM_ERROR("Duplicate VC4_PACKET_INCREMENT_SEMAPHORE\n");
-		return -EINVAL;
-	}
-	exec->found_increment_semaphore_packet = true;
-
-	/* Once we've found the semaphore increment, there should be one FLUSH
-	 * then the end of the command list.  The FLUSH actually triggers the
-	 * increment, so we only need to make sure there
-	 */
-
-	return 0;
-}
-
-static int
-validate_wait_on_semaphore(VALIDATE_ARGS)
-{
-	if (exec->found_wait_on_semaphore_packet) {
-		DRM_ERROR("Duplicate VC4_PACKET_WAIT_ON_SEMAPHORE\n");
-		return -EINVAL;
-	}
-	exec->found_wait_on_semaphore_packet = true;
-
-	if (!exec->found_increment_semaphore_packet) {
-		DRM_ERROR("VC4_PACKET_WAIT_ON_SEMAPHORE without "
+	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
+		DRM_ERROR("Bin CL must end with "
 			  "VC4_PACKET_INCREMENT_SEMAPHORE\n");
 		return -EINVAL;
 	}
-
-	return 0;
-}
-
-static int
-validate_branch_to_sublist(VALIDATE_ARGS)
-{
-	struct drm_gem_cma_object *target;
-	uint32_t offset;
-
-	if (!vc4_use_handle(exec, 0, VC4_MODE_TILE_ALLOC, &target))
-		return -EINVAL;
-
-	if (target != exec->tile_alloc_bo) {
-		DRM_ERROR("Jumping to BOs other than tile alloc unsupported\n");
-		return -EINVAL;
-	}
-
-	if (!exec->found_wait_on_semaphore_packet) {
-		DRM_ERROR("Jumping to tile alloc before binning finished.\n");
-		return -EINVAL;
-	}
-
-	offset = *(uint32_t *)(untrusted + 0);
-	if (offset % exec->tile_alloc_init_block_size ||
-	    offset / exec->tile_alloc_init_block_size >=
-	    exec->bin_tiles_x * exec->bin_tiles_y) {
-		DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST must jump to initial "
-			  "tile allocation space.\n");
-		return -EINVAL;
-	}
-
-	*(uint32_t *)(validated + 0) = target->paddr + offset;
-
-	return 0;
-}
-
-/**
- * validate_loadstore_tile_buffer_general() - Validation for
- * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL and
- * VC4_PACKET_STORE_TILE_BUFFER_GENERAL.
- *
- * The two packets are nearly the same, except for the TLB-clearing management
- * bits not being present for loads.  Additionally, while stores are executed
- * immediately (using the current tile coordinates), loads are queued to be
- * executed when the tile coordinates packet occurs.
- *
- * Note that coordinates packets are validated to be within the declared
- * bin_x/y, which themselves are verified to match the rendering-configuration
- * FB width and height (which the hardware uses to clip loads and stores).
- */
-static int
-validate_loadstore_tile_buffer_general(VALIDATE_ARGS)
-{
-	uint32_t packet_b0 = *(uint8_t *)(untrusted + 0);
-	uint32_t packet_b1 = *(uint8_t *)(untrusted + 1);
-	struct drm_gem_cma_object *fbo;
-	uint32_t buffer_type = packet_b0 & 0xf;
-	uint32_t untrusted_address, offset, cpp;
-
-	switch (buffer_type) {
-	case VC4_LOADSTORE_TILE_BUFFER_NONE:
-		return 0;
-	case VC4_LOADSTORE_TILE_BUFFER_COLOR:
-		if ((packet_b1 & VC4_LOADSTORE_TILE_BUFFER_MASK) ==
-		    VC4_LOADSTORE_TILE_BUFFER_RGBA8888) {
-			cpp = 4;
-		} else {
-			cpp = 2;
-		}
-		break;
-
-	case VC4_LOADSTORE_TILE_BUFFER_Z:
-	case VC4_LOADSTORE_TILE_BUFFER_ZS:
-		cpp = 4;
-		break;
-
-	default:
-		DRM_ERROR("Load/store type %d unsupported\n", buffer_type);
-		return -EINVAL;
-	}
-
-	if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &fbo))
-		return -EINVAL;
-
-	untrusted_address = *(uint32_t *)(untrusted + 2);
-	offset = untrusted_address & ~0xf;
-
-	if (!check_tex_size(exec, fbo, offset,
-			    ((packet_b0 &
-			      VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK) >>
-			     VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT),
-			    exec->fb_width, exec->fb_height, cpp)) {
-		return -EINVAL;
-	}
-
-	*(uint32_t *)(validated + 2) = (offset + fbo->paddr +
-					(untrusted_address & 0xf));
+	exec->found_increment_semaphore_packet = true;
 
 	return 0;
 }
@@ -374,11 +254,6 @@
 	uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
 	struct vc4_shader_state *shader_state;
 
-	if (exec->found_increment_semaphore_packet) {
-		DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
-		return -EINVAL;
-	}
-
 	/* Check overflow condition */
 	if (exec->shader_state_count == 0) {
 		DRM_ERROR("shader state must precede primitives\n");
@@ -389,7 +264,8 @@
 	if (max_index > shader_state->max_index)
 		shader_state->max_index = max_index;
 
-	if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &ib))
+	ib = vc4_use_handle(exec, 0);
+	if (!ib)
 		return -EINVAL;
 
 	if (offset > ib->base.size ||
@@ -412,11 +288,6 @@
 	uint32_t max_index;
 	struct vc4_shader_state *shader_state;
 
-	if (exec->found_increment_semaphore_packet) {
-		DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
-		return -EINVAL;
-	}
-
 	/* Check overflow condition */
 	if (exec->shader_state_count == 0) {
 		DRM_ERROR("shader state must precede primitives\n");
@@ -446,7 +317,6 @@
 		return -EINVAL;
 	}
 
-	exec->shader_state[i].packet = VC4_PACKET_GL_SHADER_STATE;
 	exec->shader_state[i].addr = *(uint32_t *)untrusted;
 	exec->shader_state[i].max_index = 0;
 
@@ -465,41 +335,12 @@
 }
 
 static int
-validate_nv_shader_state(VALIDATE_ARGS)
-{
-	uint32_t i = exec->shader_state_count++;
-
-	if (i >= exec->shader_state_size) {
-		DRM_ERROR("More requests for shader states than declared\n");
-		return -EINVAL;
-	}
-
-	exec->shader_state[i].packet = VC4_PACKET_NV_SHADER_STATE;
-	exec->shader_state[i].addr = *(uint32_t *)untrusted;
-
-	if (exec->shader_state[i].addr & 15) {
-		DRM_ERROR("NV shader state address 0x%08x misaligned\n",
-			  exec->shader_state[i].addr);
-		return -EINVAL;
-	}
-
-	*(uint32_t *)validated = (exec->shader_state[i].addr +
-				  exec->shader_rec_p);
-
-	return 0;
-}
-
-static int
 validate_tile_binning_config(VALIDATE_ARGS)
 {
-	struct drm_gem_cma_object *tile_allocation;
-	struct drm_gem_cma_object *tile_state_data_array;
+	struct drm_device *dev = exec->exec_bo->base.dev;
 	uint8_t flags;
-	uint32_t tile_allocation_size;
-
-	if (!vc4_use_handle(exec, 0, VC4_MODE_TILE_ALLOC, &tile_allocation) ||
-	    !vc4_use_handle(exec, 1, VC4_MODE_TSDA, &tile_state_data_array))
-		return -EINVAL;
+	uint32_t tile_state_size, tile_alloc_size;
+	uint32_t tile_count;
 
 	if (exec->found_tile_binning_mode_config_packet) {
 		DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
@@ -509,6 +350,7 @@
 
 	exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
 	exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
+	tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
 	flags = *(uint8_t *)(untrusted + 14);
 
 	if (exec->bin_tiles_x == 0 ||
@@ -518,15 +360,6 @@
 		return -EINVAL;
 	}
 
-	/* Our validation relies on the user not getting to set up their own
-	 * tile state/tile allocation BO contents.
-	 */
-	if (!(flags & VC4_BIN_CONFIG_AUTO_INIT_TSDA)) {
-		DRM_ERROR("binning config missing "
-			  "VC4_BIN_CONFIG_AUTO_INIT_TSDA\n");
-		return -EINVAL;
-	}
-
 	if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
 		     VC4_BIN_CONFIG_TILE_BUFFER_64BIT |
 		     VC4_BIN_CONFIG_MS_MODE_4X)) {
@@ -534,94 +367,52 @@
 		return -EINVAL;
 	}
 
-	if (*(uint32_t *)(untrusted + 0) != 0) {
-		DRM_ERROR("tile allocation offset != 0 unsupported\n");
-		return -EINVAL;
-	}
-	tile_allocation_size = *(uint32_t *)(untrusted + 4);
-	if (tile_allocation_size > tile_allocation->base.size) {
-		DRM_ERROR("tile allocation size %d > BO size %d\n",
-			  tile_allocation_size, tile_allocation->base.size);
-		return -EINVAL;
-	}
-	*(uint32_t *)validated = tile_allocation->paddr;
-	exec->tile_alloc_bo = tile_allocation;
-
-	exec->tile_alloc_init_block_size = 1 << (5 + ((flags >> 5) & 3));
-	if (exec->bin_tiles_x * exec->bin_tiles_y *
-	    exec->tile_alloc_init_block_size > tile_allocation_size) {
-		DRM_ERROR("tile init exceeds tile alloc size (%d vs %d)\n",
-			  exec->bin_tiles_x * exec->bin_tiles_y *
-			  exec->tile_alloc_init_block_size,
-			  tile_allocation_size);
-		return -EINVAL;
-	}
-	if (*(uint32_t *)(untrusted + 8) != 0) {
-		DRM_ERROR("TSDA offset != 0 unsupported\n");
-		return -EINVAL;
-	}
-	if (exec->bin_tiles_x * exec->bin_tiles_y * 48 >
-	    tile_state_data_array->base.size) {
-		DRM_ERROR("TSDA of %db too small for %dx%d bin config\n",
-			  tile_state_data_array->base.size,
-			  exec->bin_tiles_x, exec->bin_tiles_y);
-	}
-	*(uint32_t *)(validated + 8) = tile_state_data_array->paddr;
-
-	return 0;
-}
-
-static int
-validate_tile_rendering_mode_config(VALIDATE_ARGS)
-{
-	struct drm_gem_cma_object *fbo;
-	uint32_t flags, offset, cpp;
-
-	if (exec->found_tile_rendering_mode_config_packet) {
-		DRM_ERROR("Duplicate VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n");
-		return -EINVAL;
-	}
-	exec->found_tile_rendering_mode_config_packet = true;
-
-	if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &fbo))
-		return -EINVAL;
-
-	exec->fb_width = *(uint16_t *)(untrusted + 4);
-	exec->fb_height = *(uint16_t *)(untrusted + 6);
-
-	flags = *(uint16_t *)(untrusted + 8);
-	if ((flags & VC4_RENDER_CONFIG_FORMAT_MASK) ==
-	    VC4_RENDER_CONFIG_FORMAT_RGBA8888) {
-		cpp = 4;
-	} else {
-		cpp = 2;
-	}
-
-	offset = *(uint32_t *)untrusted;
-	if (!check_tex_size(exec, fbo, offset,
-			    ((flags &
-			      VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK) >>
-			     VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT),
-			    exec->fb_width, exec->fb_height, cpp)) {
-		return -EINVAL;
-	}
+	/* The tile state data array is 48 bytes per tile, and we put it at
+	 * the start of a BO containing both it and the tile alloc.
+	 */
+	tile_state_size = 48 * tile_count;
 
-	*(uint32_t *)validated = fbo->paddr + offset;
+	/* Since the tile alloc array will follow us, align. */
+	exec->tile_alloc_offset = roundup(tile_state_size, 4096);
 
-	return 0;
-}
+	*(uint8_t *)(validated + 14) =
+		((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
+			    VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
+		 VC4_BIN_CONFIG_AUTO_INIT_TSDA |
+		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
+			       VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
+		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
+			       VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
+
+	/* Initial block size. */
+	tile_alloc_size = 32 * tile_count;
+
+	/*
+	 * The initial allocation gets rounded to the next 256 bytes before
+	 * the hardware starts fulfilling further allocations.
+	 */
+	tile_alloc_size = roundup(tile_alloc_size, 256);
 
-static int
-validate_tile_coordinates(VALIDATE_ARGS)
-{
-	uint8_t tile_x = *(uint8_t *)(untrusted + 0);
-	uint8_t tile_y = *(uint8_t *)(untrusted + 1);
+	/* Add space for the extra allocations.  This is what gets used first,
+	 * before overflow memory.  It must have at least 4096 bytes, but we
+	 * want to avoid overflow memory usage if possible.
+	 */
+	tile_alloc_size += 1024 * 1024;
 
-	if (tile_x * 64 >= exec->fb_width || tile_y * 64 >= exec->fb_height) {
-		DRM_ERROR("Tile coordinates %d,%d > render config %dx%d\n",
-			  tile_x, tile_y, exec->fb_width, exec->fb_height);
-		return -EINVAL;
-	}
+	exec->tile_bo = drm_gem_cma_create(dev, exec->tile_alloc_offset +
+					   tile_alloc_size);
+	if (!exec->tile_bo)
+		return -ENOMEM;
+	list_addtail(&to_vc4_bo(&exec->tile_bo->base)->unref_head,
+		     &exec->unref_list);
+
+	/* tile alloc address. */
+	*(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
+					exec->tile_alloc_offset);
+	/* tile alloc size. */
+	*(uint32_t *)(validated + 4) = tile_alloc_size;
+	/* tile state address. */
+	*(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
 
 	return 0;
 }
@@ -633,78 +424,60 @@
 	return 0;
 }
 
+#define VC4_DEFINE_PACKET(packet, name, func) \
+	[packet] = { packet ## _SIZE, name, func }
+
 static const struct cmd_info {
-	bool bin;
-	bool render;
 	uint16_t len;
 	const char *name;
 	int (*func)(struct vc4_exec_info *exec, void *validated,
 		    void *untrusted);
 } cmd_info[] = {
-	[VC4_PACKET_HALT] = { 1, 1, 1, "halt", NULL },
-	[VC4_PACKET_NOP] = { 1, 1, 1, "nop", NULL },
-	[VC4_PACKET_FLUSH] = { 1, 1, 1, "flush", NULL },
-	[VC4_PACKET_FLUSH_ALL] = { 1, 0, 1, "flush all state", validate_flush_all },
-	[VC4_PACKET_START_TILE_BINNING] = { 1, 0, 1, "start tile binning", validate_start_tile_binning },
-	[VC4_PACKET_INCREMENT_SEMAPHORE] = { 1, 0, 1, "increment semaphore", validate_increment_semaphore },
-	[VC4_PACKET_WAIT_ON_SEMAPHORE] = { 0, 1, 1, "wait on semaphore", validate_wait_on_semaphore },
-	/* BRANCH_TO_SUB_LIST is actually supported in the binner as well, but
-	 * we only use it from the render CL in order to jump into the tile
-	 * allocation BO.
-	 */
-	[VC4_PACKET_BRANCH_TO_SUB_LIST] = { 0, 1, 5, "branch to sublist", validate_branch_to_sublist },
-	[VC4_PACKET_STORE_MS_TILE_BUFFER] = { 0, 1, 1, "store MS resolved tile color buffer", NULL },
-	[VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF] = { 0, 1, 1, "store MS resolved tile color buffer and EOF", NULL },
+	VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", validate_flush),
+	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning),
+	VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore),
 
-	[VC4_PACKET_STORE_TILE_BUFFER_GENERAL] = { 0, 1, 7, "Store Tile Buffer General", validate_loadstore_tile_buffer_general },
-	[VC4_PACKET_LOAD_TILE_BUFFER_GENERAL] = { 0, 1, 7, "Load Tile Buffer General", validate_loadstore_tile_buffer_general },
+	VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, "Indexed Primitive List", validate_indexed_prim_list),
 
-	[VC4_PACKET_GL_INDEXED_PRIMITIVE] = { 1, 1, 14, "Indexed Primitive List", validate_indexed_prim_list },
-
-	[VC4_PACKET_GL_ARRAY_PRIMITIVE] = { 1, 1, 10, "Vertex Array Primitives", validate_gl_array_primitive },
+	VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, "Vertex Array Primitives", validate_gl_array_primitive),
 
 	/* This is only used by clipped primitives (packets 48 and 49), which
 	 * we don't support parsing yet.
 	 */
-	[VC4_PACKET_PRIMITIVE_LIST_FORMAT] = { 1, 1, 2, "primitive list format", NULL },
+	VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL),
 
-	[VC4_PACKET_GL_SHADER_STATE] = { 1, 1, 5, "GL Shader State", validate_gl_shader_state },
-	[VC4_PACKET_NV_SHADER_STATE] = { 1, 1, 5, "NV Shader State", validate_nv_shader_state },
+	VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state),
+	/* We don't support validating NV shader states. */
 
-	[VC4_PACKET_CONFIGURATION_BITS] = { 1, 1, 4, "configuration bits", NULL },
-	[VC4_PACKET_FLAT_SHADE_FLAGS] = { 1, 1, 5, "flat shade flags", NULL },
-	[VC4_PACKET_POINT_SIZE] = { 1, 1, 5, "point size", NULL },
-	[VC4_PACKET_LINE_WIDTH] = { 1, 1, 5, "line width", NULL },
-	[VC4_PACKET_RHT_X_BOUNDARY] = { 1, 1, 3, "RHT X boundary", NULL },
-	[VC4_PACKET_DEPTH_OFFSET] = { 1, 1, 5, "Depth Offset", NULL },
-	[VC4_PACKET_CLIP_WINDOW] = { 1, 1, 9, "Clip Window", NULL },
-	[VC4_PACKET_VIEWPORT_OFFSET] = { 1, 1, 5, "Viewport Offset", NULL },
-	[VC4_PACKET_CLIPPER_XY_SCALING] = { 1, 1, 9, "Clipper XY Scaling", NULL },
+	VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, "point size", NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, "line width", NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, "RHT X boundary", NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, "Depth Offset", NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, "Clip Window", NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, "Viewport Offset", NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, "Clipper XY Scaling", NULL),
 	/* Note: The docs say this was also 105, but it was 106 in the
 	 * initial userland code drop.
 	 */
-	[VC4_PACKET_CLIPPER_Z_SCALING] = { 1, 1, 9, "Clipper Z Scale and Offset", NULL },
-
-	[VC4_PACKET_TILE_BINNING_MODE_CONFIG] = { 1, 0, 16, "tile binning configuration", validate_tile_binning_config },
-
-	[VC4_PACKET_TILE_RENDERING_MODE_CONFIG] = { 0, 1, 11, "tile rendering mode configuration", validate_tile_rendering_mode_config},
+	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, "Clipper Z Scale and Offset", NULL),
 
-	[VC4_PACKET_CLEAR_COLORS] = { 0, 1, 14, "Clear Colors", NULL },
+	VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, "tile binning configuration", validate_tile_binning_config),
 
-	[VC4_PACKET_TILE_COORDINATES] = { 0, 1, 3, "Tile Coordinates", validate_tile_coordinates },
-
-	[VC4_PACKET_GEM_HANDLES] = { 1, 1, 9, "GEM handles", validate_gem_handles },
+	VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, "GEM handles", validate_gem_handles),
 };
 
 int
-vc4_validate_cl(struct drm_device *dev,
-		void *validated,
-		void *unvalidated,
-		uint32_t len,
-		bool is_bin,
-		bool has_bin,
-		struct vc4_exec_info *exec)
+vc4_validate_bin_cl(struct drm_device *dev,
+		    void *validated,
+		    void *unvalidated,
+		    struct vc4_exec_info *exec)
 {
+	uint32_t len = exec->args->bin_cl_size;
 	uint32_t dst_offset = 0;
 	uint32_t src_offset = 0;
 
@@ -714,7 +487,7 @@
 		u8 cmd = *(uint8_t *)src_pkt;
 		const struct cmd_info *info;
 
-		if (cmd > ARRAY_SIZE(cmd_info)) {
+		if (cmd >= ARRAY_SIZE(cmd_info)) {
 			DRM_ERROR("0x%08x: packet %d out of bounds\n",
 				  src_offset, cmd);
 			return -EINVAL;
@@ -732,14 +505,6 @@
 			 src_offset, cmd, info->name, info->len);
 #endif
 
-		if ((is_bin && !info->bin) ||
-		    (!is_bin && !info->render)) {
-			DRM_ERROR("0x%08x: packet %d (%s) invalid for %s\n",
-				  src_offset, cmd, info->name,
-				  is_bin ? "binner" : "render");
-			return -EINVAL;
-		}
-
 		if (src_offset + info->len > len) {
 			DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
 				  "exceeds bounds (0x%08x)\n",
@@ -770,30 +535,24 @@
 			break;
 	}
 
-	if (is_bin) {
-		exec->ct0ea = exec->ct0ca + dst_offset;
+	exec->ct0ea = exec->ct0ca + dst_offset;
 
-		if (has_bin && !exec->found_start_tile_binning_packet) {
-			DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
-			return -EINVAL;
-		}
-	} else {
-		if (!exec->found_tile_rendering_mode_config_packet) {
-			DRM_ERROR("Render CL missing VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n");
-			return -EINVAL;
-		}
+	if (!exec->found_start_tile_binning_packet) {
+		DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
+		return -EINVAL;
+	}
 
-		/* Make sure that they actually consumed the semaphore
-		 * increment from the bin CL.  Otherwise a later submit would
-		 * have render execute immediately.
-		 */
-		if (exec->found_wait_on_semaphore_packet != has_bin) {
-			DRM_ERROR("Render CL %s VC4_PACKET_WAIT_ON_SEMAPHORE\n",
-				  exec->found_wait_on_semaphore_packet ?
-				  "has" : "missing");
-			return -EINVAL;
-		}
-		exec->ct1ea = exec->ct1ca + dst_offset;
+	/* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH.  The
+	 * semaphore is used to trigger the render CL to start up, and the
+	 * FLUSH is what caps the bin lists with
+	 * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
+	 * render CL when they get called to) and actually triggers the queued
+	 * semaphore increment.
+	 */
+	if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
+		DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
+			  "VC4_PACKET_FLUSH\n");
+		return -EINVAL;
 	}
 
 	return 0;
@@ -814,27 +573,28 @@
 	uint32_t p3 = (sample->p_offset[3] != ~0 ?
 		       *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
 	uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
-	uint32_t offset = p0 & ~0xfff;
-	uint32_t miplevels = (p0 & 15);
-	uint32_t width = (p1 >> 8) & 2047;
-	uint32_t height = (p1 >> 20) & 2047;
+	uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
+	uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
+	uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
+	uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
 	uint32_t cpp, tiling_format, utile_w, utile_h;
 	uint32_t i;
 	uint32_t cube_map_stride = 0;
 	enum vc4_texture_data_type type;
 
-	if (!vc4_use_bo(exec, texture_handle_index, VC4_MODE_RENDER, &tex))
+	tex = vc4_use_bo(exec, texture_handle_index);
+	if (!tex)
 		return false;
 
 	if (sample->is_direct) {
 		uint32_t remaining_size = tex->base.size - p0;
 		if (p0 > tex->base.size - 4) {
 			DRM_ERROR("UBO offset greater than UBO size\n");
-			return false;
+			goto fail;
 		}
 		if (p1 > remaining_size - 4) {
 			DRM_ERROR("UBO clamp would allow reads outside of UBO\n");
-			return false;
+			goto fail;
 		}
 		*validated_p0 = tex->paddr + p0;
 		return true;
@@ -845,24 +605,27 @@
 	if (height == 0)
 		height = 2048;
 
-	if (p0 & (1 << 9)) {
-		if ((p2 & (3 << 30)) == (1 << 30))
-			cube_map_stride = p2 & 0x3ffff000;
-		if ((p3 & (3 << 30)) == (1 << 30)) {
+	if (p0 & VC4_TEX_P0_CMMODE_MASK) {
+		if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
+		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
+			cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
+		if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
+		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
 			if (cube_map_stride) {
 				DRM_ERROR("Cube map stride set twice\n");
-				return false;
+				goto fail;
 			}
 
-			cube_map_stride = p3 & 0x3ffff000;
+			cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
 		}
 		if (!cube_map_stride) {
 			DRM_ERROR("Cube map stride not set\n");
-			return false;
+			goto fail;
 		}
 	}
 
-	type = ((p0 >> 4) & 15) | ((p1 >> 31) << 4);
+	type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
+		(VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
 
 	switch (type) {
 	case VC4_TEXTURE_TYPE_RGBA8888:
@@ -891,7 +654,7 @@
 	case VC4_TEXTURE_TYPE_YUV422R:
 	default:
 		DRM_ERROR("Texture format %d unsupported\n", type);
-		return false;
+		goto fail;
 	}
 	utile_w = utile_width(cpp);
 	utile_h = utile_height(cpp);
@@ -905,9 +668,9 @@
 			tiling_format = VC4_TILING_FORMAT_T;
 	}
 
-	if (!check_tex_size(exec, tex, offset + cube_map_stride * 5,
-			    tiling_format, width, height, cpp)) {
-		return false;
+	if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
+				tiling_format, width, height, cpp)) {
+		goto fail;
 	}
 
 	/* The mipmap levels are stored before the base of the texture.  Make
@@ -927,15 +690,15 @@
 
 		switch (tiling_format) {
 		case VC4_TILING_FORMAT_T:
-			aligned_width = roundup(level_width, utile_w * 8);
-			aligned_height = roundup(level_height, utile_h * 8);
+			aligned_width = round_up(level_width, utile_w * 8);
+			aligned_height = round_up(level_height, utile_h * 8);
 			break;
 		case VC4_TILING_FORMAT_LT:
-			aligned_width = roundup(level_width, utile_w);
-			aligned_height = roundup(level_height, utile_h);
+			aligned_width = round_up(level_width, utile_w);
+			aligned_height = round_up(level_height, utile_h);
 			break;
 		default:
-			aligned_width = roundup(level_width, utile_w);
+			aligned_width = round_up(level_width, utile_w);
 			aligned_height = level_height;
 			break;
 		}
@@ -948,7 +711,7 @@
 				  i, level_width, level_height,
 				  aligned_width, aligned_height,
 				  level_size, offset);
-			return false;
+			goto fail;
 		}
 
 		offset -= level_size;
@@ -957,54 +720,37 @@
 	*validated_p0 = tex->paddr + p0;
 
 	return true;
+ fail:
+	DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
+	DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
+	DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
+	DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
+	return false;
 }
 
 static int
-validate_shader_rec(struct drm_device *dev,
-		    struct vc4_exec_info *exec,
-		    struct vc4_shader_state *state)
+validate_gl_shader_rec(struct drm_device *dev,
+		       struct vc4_exec_info *exec,
+		       struct vc4_shader_state *state)
 {
 	uint32_t *src_handles;
 	void *pkt_u, *pkt_v;
-	enum shader_rec_reloc_type {
-		RELOC_CODE,
-		RELOC_VBO,
-	};
-	struct shader_rec_reloc {
-		enum shader_rec_reloc_type type;
-		uint32_t offset;
+	static const uint32_t shader_reloc_offsets[] = {
+		4, /* fs */
+		16, /* vs */
+		28, /* cs */
 	};
-	static const struct shader_rec_reloc gl_relocs[] = {
-		{ RELOC_CODE, 4 },  /* fs */
-		{ RELOC_CODE, 16 }, /* vs */
-		{ RELOC_CODE, 28 }, /* cs */
-	};
-	static const struct shader_rec_reloc nv_relocs[] = {
-		{ RELOC_CODE, 4 }, /* fs */
-		{ RELOC_VBO, 12 }
-	};
-	const struct shader_rec_reloc *relocs;
-	struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8];
-	uint32_t nr_attributes = 0, nr_fixed_relocs, nr_relocs, packet_size;
+	uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
+	struct drm_gem_cma_object *bo[shader_reloc_count + 8];
+	uint32_t nr_attributes, nr_relocs, packet_size;
 	int i;
-	struct vc4_validated_shader_info *validated_shader = NULL;
-
-	if (state->packet == VC4_PACKET_NV_SHADER_STATE) {
-		relocs = nv_relocs;
-		nr_fixed_relocs = ARRAY_SIZE(nv_relocs);
 
-		packet_size = 16;
-	} else {
-		relocs = gl_relocs;
-		nr_fixed_relocs = ARRAY_SIZE(gl_relocs);
-
-		nr_attributes = state->addr & 0x7;
-		if (nr_attributes == 0)
-			nr_attributes = 8;
-		packet_size = gl_shader_rec_size(state->addr);
-	}
-	nr_relocs = nr_fixed_relocs + nr_attributes;
+	nr_attributes = state->addr & 0x7;
+	if (nr_attributes == 0)
+		nr_attributes = 8;
+	packet_size = gl_shader_rec_size(state->addr);
 
+	nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
 	if (nr_relocs * 4 > exec->shader_rec_size) {
 		DRM_ERROR("overflowed shader recs reading %d handles "
 			  "from %d bytes left\n",
@@ -1034,21 +780,30 @@
 	exec->shader_rec_v += roundup(packet_size, 16);
 	exec->shader_rec_size -= packet_size;
 
-	for (i = 0; i < nr_relocs; i++) {
-		enum vc4_bo_mode mode;
-
-		if (i < nr_fixed_relocs && relocs[i].type == RELOC_CODE)
-			mode = VC4_MODE_SHADER;
-		else
-			mode = VC4_MODE_RENDER;
+	if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
+		DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
+		return -EINVAL;
+	}
 
-		if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i])) {
-			return false;
+	for (i = 0; i < shader_reloc_count; i++) {
+		if (src_handles[i] > exec->bo_count) {
+			DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
+			return -EINVAL;
 		}
+
+		bo[i] = exec->bo[src_handles[i]];
+		if (!bo[i])
+			return -EINVAL;
+	}
+	for (i = shader_reloc_count; i < nr_relocs; i++) {
+		bo[i] = vc4_use_bo(exec, src_handles[i]);
+		if (!bo[i])
+			return -EINVAL;
 	}
 
-	for (i = 0; i < nr_fixed_relocs; i++) {
-		uint32_t o = relocs[i].offset;
+	for (i = 0; i < shader_reloc_count; i++) {
+		struct vc4_validated_shader_info *validated_shader;
+		uint32_t o = shader_reloc_offsets[i];
 		uint32_t src_offset = *(uint32_t *)(pkt_u + o);
 		uint32_t *texture_handles_u;
 		void *uniform_data_u;
@@ -1056,58 +811,50 @@
 
 		*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
 
-		switch (relocs[i].type) {
-		case RELOC_CODE:
-			if (src_offset != 0) {
-				DRM_ERROR("Shaders must be at offset 0 of "
-					  "the BO.\n");
-				goto fail;
-			}
+		if (src_offset != 0) {
+			DRM_ERROR("Shaders must be at offset 0 of "
+				  "the BO.\n");
+			return -EINVAL;
+		}
 
-			kfree(validated_shader);
-			validated_shader = vc4_validate_shader(bo[i]);
-			if (!validated_shader)
-				goto fail;
+		validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
+		if (!validated_shader)
+			return -EINVAL;
 
-			if (validated_shader->uniforms_src_size >
-			    exec->uniforms_size) {
-				DRM_ERROR("Uniforms src buffer overflow\n");
-				goto fail;
-			}
+		if (validated_shader->uniforms_src_size >
+		    exec->uniforms_size) {
+			DRM_ERROR("Uniforms src buffer overflow\n");
+			return -EINVAL;
+		}
 
-			texture_handles_u = exec->uniforms_u;
-			uniform_data_u = (texture_handles_u +
-					  validated_shader->num_texture_samples);
-
-			memcpy(exec->uniforms_v, uniform_data_u,
-			       validated_shader->uniforms_size);
-
-			for (tex = 0;
-			     tex < validated_shader->num_texture_samples;
-			     tex++) {
-				if (!reloc_tex(exec,
-					       uniform_data_u,
-					       &validated_shader->texture_samples[tex],
-					       texture_handles_u[tex])) {
-					goto fail;
-				}
+		texture_handles_u = exec->uniforms_u;
+		uniform_data_u = (texture_handles_u +
+				  validated_shader->num_texture_samples);
+
+		memcpy(exec->uniforms_v, uniform_data_u,
+		       validated_shader->uniforms_size);
+
+		for (tex = 0;
+		     tex < validated_shader->num_texture_samples;
+		     tex++) {
+			if (!reloc_tex(exec,
+				       uniform_data_u,
+				       &validated_shader->texture_samples[tex],
+				       texture_handles_u[tex])) {
+				return -EINVAL;
 			}
+		}
 
-			*(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
-
-			exec->uniforms_u += validated_shader->uniforms_src_size;
-			exec->uniforms_v += validated_shader->uniforms_size;
-			exec->uniforms_p += validated_shader->uniforms_size;
-
-			break;
+		*(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
 
-		case RELOC_VBO:
-			break;
-		}
+		exec->uniforms_u += validated_shader->uniforms_src_size;
+		exec->uniforms_v += validated_shader->uniforms_size;
+		exec->uniforms_p += validated_shader->uniforms_size;
 	}
 
 	for (i = 0; i < nr_attributes; i++) {
-		struct drm_gem_cma_object *vbo = bo[nr_fixed_relocs + i];
+		struct drm_gem_cma_object *vbo =
+			bo[ARRAY_SIZE(shader_reloc_offsets) + i];
 		uint32_t o = 36 + i * 8;
 		uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
 		uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
@@ -1137,13 +884,7 @@
 		*(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
 	}
 
-	kfree(validated_shader);
-
 	return 0;
-
-fail:
-	kfree(validated_shader);
-	return -EINVAL;
 }
 
 int
@@ -1154,7 +895,7 @@
 	int ret = 0;
 
 	for (i = 0; i < exec->shader_state_count; i++) {
-		ret = validate_shader_rec(dev, exec, &exec->shader_state[i]);
+		ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
 		if (ret)
 			return ret;
 	}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c	2015-09-16 14:36:09.000000000 +0000
@@ -58,7 +58,8 @@
 	 *
 	 * This is used for the validation of direct address memory reads.
 	 */
-	uint32_t live_clamp_offsets[32 + 32 + 4];
+	uint32_t live_min_clamp_offsets[32 + 32 + 4];
+	bool live_max_clamp_regs[32 + 32 + 4];
 };
 
 static uint32_t
@@ -77,6 +78,25 @@
 	}
 }
 
+static uint32_t
+raddr_add_a_to_live_reg_index(uint64_t inst)
+{
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
+	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+
+	if (add_a == QPU_MUX_A) {
+		return raddr_a;
+	} else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) {
+		return 32 + raddr_b;
+	} else if (add_a <= QPU_MUX_R3) {
+		return 64 + add_a;
+	} else {
+		return ~0;
+	}
+}
+
 static bool
 is_tmu_submit(uint32_t waddr)
 {
@@ -136,9 +156,8 @@
 	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
 
 	if (is_direct) {
-		uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
 		uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
-		uint32_t clamp_offset = ~0;
+		uint32_t clamp_reg, clamp_offset;
 
 		if (sig == QPU_SIG_SMALL_IMM) {
 			DRM_ERROR("direct TMU read used small immediate\n");
@@ -159,14 +178,13 @@
 		 * This is arbitrary, but simpler than supporting flipping the
 		 * two either way.
 		 */
-		if (add_a == QPU_MUX_A) {
-			clamp_offset = validation_state->live_clamp_offsets[raddr_a];
-		} else if (add_a == QPU_MUX_B) {
-			clamp_offset = validation_state->live_clamp_offsets[32 + raddr_b];
-		} else if (add_a <= QPU_MUX_R4) {
-			clamp_offset = validation_state->live_clamp_offsets[64 + add_a];
+		clamp_reg = raddr_add_a_to_live_reg_index(inst);
+		if (clamp_reg == ~0) {
+			DRM_ERROR("direct TMU load wasn't clamped\n");
+			return false;
 		}
 
+		clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
 		if (clamp_offset == ~0) {
 			DRM_ERROR("direct TMU load wasn't clamped\n");
 			return false;
@@ -229,8 +247,6 @@
 	uint32_t waddr = (is_mul ?
 			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
 			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
-	bool is_b = is_mul != ((inst & QPU_WS) != 0);
-	uint32_t live_reg_index;
 
 	switch (waddr) {
 	case QPU_W_UNIFORMS_ADDRESS:
@@ -285,14 +301,6 @@
                 return true;
 	}
 
-	/* Clear out the live offset clamp tracking for the written register.
-	 * If this particular instruction is setting up an offset clamp, it'll
-	 * get tracked immediately after we return.
-	 */
-	live_reg_index = waddr_to_live_reg_index(waddr, is_b);
-	if (live_reg_index != ~0)
-		validation_state->live_clamp_offsets[live_reg_index] = ~0;
-
 	return true;
 }
 
@@ -301,26 +309,72 @@
 		  struct vc4_validated_shader_info *validated_shader,
 		  struct vc4_shader_validation_state *validation_state)
 {
+	uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
 	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+	uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
+	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
 	uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
 	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
 	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
 	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
-	bool is_b = inst & QPU_WS;
-	uint32_t live_reg_index;
+	bool ws = inst & QPU_WS;
+	uint32_t lri_add_a, lri_add, lri_mul;
+	bool add_a_is_min_0;
 
-	if (QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_MIN)
+	/* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
+	 * before we clear previous live state.
+	 */
+	lri_add_a = raddr_add_a_to_live_reg_index(inst);
+	add_a_is_min_0 = (lri_add_a != ~0 &&
+			  validation_state->live_max_clamp_regs[lri_add_a]);
+
+	/* Clear live state for registers written by our instruction. */
+	lri_add = waddr_to_live_reg_index(waddr_add, ws);
+	lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
+	if (lri_mul != ~0) {
+		validation_state->live_max_clamp_regs[lri_mul] = false;
+		validation_state->live_min_clamp_offsets[lri_mul] = ~0;
+	}
+	if (lri_add != ~0) {
+		validation_state->live_max_clamp_regs[lri_add] = false;
+		validation_state->live_min_clamp_offsets[lri_add] = ~0;
+	} else {
+		/* Nothing further to do for live tracking, since only ADDs
+		 * generate new live clamp registers.
+		 */
 		return;
+	}
+
+	/* Now, handle remaining live clamp tracking for the ADD operation. */
 
-	if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
-	    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
-	      sig != QPU_SIG_SMALL_IMM)) {
+	if (cond_add != QPU_COND_ALWAYS)
 		return;
-	}
 
-	live_reg_index = waddr_to_live_reg_index(waddr_add, is_b);
-	if (live_reg_index != ~0) {
-		validation_state->live_clamp_offsets[live_reg_index] =
+	if (op_add == QPU_A_MAX) {
+		/* Track live clamps of a value to a minimum of 0 (in either
+		 * arg).
+		 */
+		if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
+		    (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
+			return;
+		}
+
+		validation_state->live_max_clamp_regs[lri_add] = true;
+	} if (op_add == QPU_A_MIN) {
+		/* Track live clamps of a value clamped to a minimum of 0 and
+		 * a maximum of some uniform's offset.
+		 */
+		if (!add_a_is_min_0)
+			return;
+
+		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
+		      sig != QPU_SIG_SMALL_IMM)) {
+			return;
+		}
+
+		validation_state->live_min_clamp_offsets[lri_add] =
 			validated_shader->uniforms_size;
 	}
 }
@@ -382,8 +436,8 @@
 
 	for (i = 0; i < 8; i++)
 		validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
-	for (i = 0; i < ARRAY_SIZE(validation_state.live_clamp_offsets); i++)
-		validation_state.live_clamp_offsets[i] = ~0;
+	for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
+		validation_state.live_min_clamp_offsets[i] = ~0;
 
 	shader = shader_obj->vaddr;
 	max_ip = shader_obj->base.size / sizeof(uint64_t);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -19,8 +19,6 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-SUBDIRS = kernel
-
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
@@ -34,11 +32,10 @@
 	$(LIBDRM_CFLAGS) \
 	$(GALLIUM_DRIVER_CFLAGS) \
 	$(SIM_CFLAGS) \
-	-I$(top_srcdir)/src/mesa/ \
 	$()
 
 noinst_LTLIBRARIES = libvc4.la
 
 libvc4_la_SOURCES = $(C_SOURCES)
-libvc4_la_LIBADD = $(SIM_LIB) kernel/libvc4_kernel.la
+libvc4_la_LIBADD = $(SIM_LIB)
 libvc4_la_LDFLAGS = $(SIM_LDFLAGS)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/Makefile.sources	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/Makefile.sources	2015-09-16 14:36:09.000000000 +0000
@@ -1,4 +1,10 @@
 C_SOURCES := \
+	kernel/vc4_drv.h \
+	kernel/vc4_gem.c \
+	kernel/vc4_packet.h \
+	kernel/vc4_render_cl.c \
+	kernel/vc4_validate.c \
+	kernel/vc4_validate_shaders.c \
 	vc4_blit.c \
 	vc4_bufmgr.c \
 	vc4_bufmgr.h \
@@ -13,6 +19,8 @@
 	vc4_fence.c \
 	vc4_formats.c \
 	vc4_job.c \
+	vc4_nir_lower_blend.c \
+	vc4_nir_lower_io.c \
 	vc4_opt_algebraic.c \
 	vc4_opt_constant_folding.c \
 	vc4_opt_copy_propagation.c \
@@ -20,7 +28,6 @@
 	vc4_opt_dead_code.c \
 	vc4_opt_small_immediates.c \
 	vc4_opt_vpm_writes.c \
-	vc4_packet.h \
 	vc4_program.c \
 	vc4_qir.c \
 	vc4_qir_lower_uniforms.c \
@@ -44,4 +51,5 @@
 	vc4_state.c \
 	vc4_tiling.c \
 	vc4_tiling.h \
+	vc4_uniforms.c \
 	$()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_blit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_blit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_blit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_blit.c	2015-09-16 14:36:09.000000000 +0000
@@ -26,86 +26,7 @@
 #include "util/u_blitter.h"
 #include "vc4_context.h"
 
-static void
-vc4_tile_blit_color_rcl(struct vc4_context *vc4,
-                        struct vc4_surface *dst_surf,
-                        struct vc4_surface *src_surf)
-{
-        struct vc4_resource *src = vc4_resource(src_surf->base.texture);
-        struct vc4_resource *dst = vc4_resource(dst_surf->base.texture);
-
-        uint32_t min_x_tile = 0;
-        uint32_t min_y_tile = 0;
-        uint32_t max_x_tile = (dst_surf->base.width - 1) / 64;
-        uint32_t max_y_tile = (dst_surf->base.height - 1) / 64;
-        uint32_t xtiles = max_x_tile - min_x_tile + 1;
-        uint32_t ytiles = max_y_tile - min_y_tile + 1;
-        uint32_t reloc_size = 9;
-        uint32_t config_size = 11 + reloc_size;
-        uint32_t loadstore_size = 7 + reloc_size;
-        uint32_t tilecoords_size = 3;
-        cl_ensure_space(&vc4->rcl,
-                        config_size +
-                        xtiles * ytiles * (loadstore_size * 2 +
-                                           tilecoords_size * 1));
-        cl_ensure_space(&vc4->bo_handles, 2 * sizeof(uint32_t));
-        cl_ensure_space(&vc4->bo_pointers, 2 * sizeof(struct vc4_bo *));
-
-        cl_start_reloc(&vc4->rcl, 1);
-        cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
-        cl_reloc(vc4, &vc4->rcl, dst->bo, dst_surf->offset);
-        cl_u16(&vc4->rcl, dst_surf->base.width);
-        cl_u16(&vc4->rcl, dst_surf->base.height);
-        cl_u16(&vc4->rcl, ((dst_surf->tiling <<
-                            VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
-                           (vc4_rt_format_is_565(dst_surf->base.format) ?
-                            VC4_RENDER_CONFIG_FORMAT_BGR565 :
-                            VC4_RENDER_CONFIG_FORMAT_RGBA8888)));
-
-        uint32_t src_hindex = vc4_gem_hindex(vc4, src->bo);
-
-        for (int y = min_y_tile; y <= max_y_tile; y++) {
-                for (int x = min_x_tile; x <= max_x_tile; x++) {
-                        bool end_of_frame = (x == max_x_tile &&
-                                             y == max_y_tile);
-
-                        cl_start_reloc(&vc4->rcl, 1);
-                        cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
-                        cl_u8(&vc4->rcl,
-                              VC4_LOADSTORE_TILE_BUFFER_COLOR |
-                              (src_surf->tiling <<
-                               VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
-                        cl_u8(&vc4->rcl,
-                              vc4_rt_format_is_565(src_surf->base.format) ?
-                              VC4_LOADSTORE_TILE_BUFFER_BGR565 :
-                              VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
-                        cl_reloc_hindex(&vc4->rcl, src_hindex,
-                                        src_surf->offset);
-
-                        cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
-                        cl_u8(&vc4->rcl, x);
-                        cl_u8(&vc4->rcl, y);
-
-                        if (end_of_frame) {
-                                cl_u8(&vc4->rcl,
-                                      VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
-                        } else {
-                                cl_u8(&vc4->rcl,
-                                      VC4_PACKET_STORE_MS_TILE_BUFFER);
-                        }
-                }
-        }
-
-        vc4->draw_min_x = 0;
-        vc4->draw_min_y = 0;
-        vc4->draw_max_x = dst_surf->base.width;
-        vc4->draw_max_y = dst_surf->base.height;
-
-        dst->writes++;
-        vc4->needs_flush = true;
-}
-
-static struct vc4_surface *
+static struct pipe_surface *
 vc4_get_blit_surface(struct pipe_context *pctx,
                      struct pipe_resource *prsc, unsigned level)
 {
@@ -117,7 +38,7 @@
         tmpl.u.tex.first_layer = 0;
         tmpl.u.tex.last_layer = 0;
 
-        return vc4_surface(pctx->create_surface(pctx, prsc, &tmpl));
+        return pctx->create_surface(pctx, prsc, &tmpl);
 }
 
 static bool
@@ -141,17 +62,28 @@
         if (info->dst.resource->format != info->src.resource->format)
                 return false;
 
-        struct vc4_surface *dst_surf =
+        vc4_flush(pctx);
+
+        struct pipe_surface *dst_surf =
                 vc4_get_blit_surface(pctx, info->dst.resource, info->dst.level);
-        struct vc4_surface *src_surf =
+        struct pipe_surface *src_surf =
                 vc4_get_blit_surface(pctx, info->src.resource, info->src.level);
 
-        vc4_flush(pctx);
-        vc4_tile_blit_color_rcl(vc4, dst_surf, src_surf);
+        pipe_surface_reference(&vc4->color_read, src_surf);
+        pipe_surface_reference(&vc4->color_write, dst_surf);
+        pipe_surface_reference(&vc4->zs_read, NULL);
+        pipe_surface_reference(&vc4->zs_write, NULL);
+        vc4->draw_min_x = 0;
+        vc4->draw_min_y = 0;
+        vc4->draw_max_x = dst_surf->width;
+        vc4->draw_max_y = dst_surf->height;
+        vc4->draw_width = dst_surf->width;
+        vc4->draw_height = dst_surf->height;
+        vc4->needs_flush = true;
         vc4_job_submit(vc4);
 
-        pctx->surface_destroy(pctx, &dst_surf->base);
-        pctx->surface_destroy(pctx, &src_surf->base);
+        pipe_surface_reference(&dst_surf, NULL);
+        pipe_surface_reference(&src_surf, NULL);
 
         return true;
 }
@@ -162,7 +94,7 @@
         struct vc4_context *vc4 = vc4_context(ctx);
 
         if (!util_blitter_is_blit_supported(vc4->blitter, info)) {
-                fprintf(stderr, "blit unsupported %s -> %s",
+                fprintf(stderr, "blit unsupported %s -> %s\n",
                     util_format_short_name(info->src.resource->format),
                     util_format_short_name(info->dst.resource->format));
                 return false;
@@ -203,7 +135,7 @@
             info.dst.resource->nr_samples <= 1 &&
             !util_format_is_depth_or_stencil(info.src.resource->format) &&
             !util_format_is_pure_integer(info.src.resource->format)) {
-                fprintf(stderr, "color resolve unimplemented");
+                fprintf(stderr, "color resolve unimplemented\n");
                 return;
         }
 
@@ -215,7 +147,7 @@
         }
 
         if (info.mask & PIPE_MASK_S) {
-                fprintf(stderr, "cannot blit stencil, skipping");
+                fprintf(stderr, "cannot blit stencil, skipping\n");
                 info.mask &= ~PIPE_MASK_S;
         }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_bufmgr.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_bufmgr.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_bufmgr.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_bufmgr.c	2015-09-16 14:36:09.000000000 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2014 Broadcom
+ * Copyright © 2014-2015 Broadcom
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -34,8 +34,46 @@
 #include "vc4_context.h"
 #include "vc4_screen.h"
 
-#define container_of(ptr, type, field) \
-   (type*)((char*)ptr - offsetof(type, field))
+static bool dump_stats = false;
+
+static void
+vc4_bo_dump_stats(struct vc4_screen *screen)
+{
+        struct vc4_bo_cache *cache = &screen->bo_cache;
+
+        fprintf(stderr, "  BOs allocated:   %d\n", screen->bo_count);
+        fprintf(stderr, "  BOs size:        %dkb\n", screen->bo_size / 102);
+        fprintf(stderr, "  BOs cached:      %d\n", cache->bo_count);
+        fprintf(stderr, "  BOs cached size: %dkb\n", cache->bo_size / 102);
+
+        if (!list_empty(&cache->time_list)) {
+                struct vc4_bo *first = LIST_ENTRY(struct vc4_bo,
+                                                  cache->time_list.next,
+                                                  time_list);
+                struct vc4_bo *last = LIST_ENTRY(struct vc4_bo,
+                                                  cache->time_list.prev,
+                                                  time_list);
+
+                fprintf(stderr, "  oldest cache time: %ld\n",
+                        (long)first->free_time);
+                fprintf(stderr, "  newest cache time: %ld\n",
+                        (long)last->free_time);
+
+                struct timespec time;
+                clock_gettime(CLOCK_MONOTONIC, &time);
+                fprintf(stderr, "  now:               %ld\n",
+                        time.tv_sec);
+        }
+}
+
+static void
+vc4_bo_remove_from_cache(struct vc4_bo_cache *cache, struct vc4_bo *bo)
+{
+        list_del(&bo->time_list);
+        list_del(&bo->size_list);
+        cache->bo_count--;
+        cache->bo_size -= bo->size;
+}
 
 static struct vc4_bo *
 vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name)
@@ -48,12 +86,21 @@
 
         struct vc4_bo *bo = NULL;
         pipe_mutex_lock(cache->lock);
-        if (!is_empty_list(&cache->size_list[page_index])) {
-                struct simple_node *node = last_elem(&cache->size_list[page_index]);
-                bo = container_of(node, struct vc4_bo, size_list);
+        if (!list_empty(&cache->size_list[page_index])) {
+                bo = LIST_ENTRY(struct vc4_bo, cache->size_list[page_index].next,
+                                size_list);
+
+                /* Check that the BO has gone idle.  If not, then we want to
+                 * allocate something new instead, since we assume that the
+                 * user will proceed to CPU map it and fill it with stuff.
+                 */
+                if (!vc4_bo_wait(bo, 0, NULL)) {
+                        pipe_mutex_unlock(cache->lock);
+                        return NULL;
+                }
+
                 pipe_reference_init(&bo->reference, 1);
-                remove_from_list(&bo->time_list);
-                remove_from_list(&bo->size_list);
+                vc4_bo_remove_from_cache(cache, bo);
 
                 bo->name = name;
         }
@@ -70,8 +117,14 @@
         size = align(size, 4096);
 
         bo = vc4_bo_from_cache(screen, size, name);
-        if (bo)
+        if (bo) {
+                if (dump_stats) {
+                        fprintf(stderr, "Allocated %s %dkb from cache:\n",
+                                name, size / 1024);
+                        vc4_bo_dump_stats(screen);
+                }
                 return bo;
+        }
 
         bo = CALLOC_STRUCT(vc4_bo);
         if (!bo)
@@ -108,6 +161,13 @@
                 abort();
         }
 
+        screen->bo_count++;
+        screen->bo_size += bo->size;
+        if (dump_stats) {
+                fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
+                vc4_bo_dump_stats(screen);
+        }
+
         return bo;
 }
 
@@ -145,26 +205,47 @@
         if (ret != 0)
                 fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
 
+        screen->bo_count--;
+        screen->bo_size -= bo->size;
+
+        if (dump_stats) {
+                fprintf(stderr, "Freed %s%s%dkb:\n",
+                        bo->name ? bo->name : "",
+                        bo->name ? " " : "",
+                        bo->size / 1024);
+                vc4_bo_dump_stats(screen);
+        }
+
         free(bo);
 }
 
 static void
 free_stale_bos(struct vc4_screen *screen, time_t time)
 {
-        while (!is_empty_list(&screen->bo_cache.time_list)) {
-                struct simple_node *node =
-                        first_elem(&screen->bo_cache.time_list);
-                struct vc4_bo *bo = container_of(node, struct vc4_bo, time_list);
+        struct vc4_bo_cache *cache = &screen->bo_cache;
+        bool freed_any = false;
+
+        list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list,
+                                 time_list) {
+                if (dump_stats && !freed_any) {
+                        fprintf(stderr, "Freeing stale BOs:\n");
+                        vc4_bo_dump_stats(screen);
+                        freed_any = true;
+                }
 
                 /* If it's more than a second old, free it. */
                 if (time - bo->free_time > 2) {
-                        remove_from_list(&bo->time_list);
-                        remove_from_list(&bo->size_list);
+                        vc4_bo_remove_from_cache(cache, bo);
                         vc4_bo_free(bo);
                 } else {
                         break;
                 }
         }
+
+        if (dump_stats && freed_any) {
+                fprintf(stderr, "Freed stale BOs:\n");
+                vc4_bo_dump_stats(screen);
+        }
 }
 
 void
@@ -180,16 +261,16 @@
         }
 
         if (cache->size_list_size <= page_index) {
-                struct simple_node *new_list =
-                        ralloc_array(screen, struct simple_node, page_index + 1);
+                struct list_head *new_list =
+                        ralloc_array(screen, struct list_head, page_index + 1);
 
                 /* Move old list contents over (since the array has moved, and
-                 * therefore the pointers to the list heads have to change.
+                 * therefore the pointers to the list heads have to change).
                  */
                 for (int i = 0; i < cache->size_list_size; i++) {
-                        struct simple_node *old_head = &cache->size_list[i];
-                        if (is_empty_list(old_head))
-                                make_empty_list(&new_list[i]);
+                        struct list_head *old_head = &cache->size_list[i];
+                        if (list_empty(old_head))
+                                list_inithead(&new_list[i]);
                         else {
                                 new_list[i].next = old_head->next;
                                 new_list[i].prev = old_head->prev;
@@ -198,15 +279,23 @@
                         }
                 }
                 for (int i = cache->size_list_size; i < page_index + 1; i++)
-                        make_empty_list(&new_list[i]);
+                        list_inithead(&new_list[i]);
 
                 cache->size_list = new_list;
                 cache->size_list_size = page_index + 1;
         }
 
         bo->free_time = time;
-        insert_at_tail(&cache->size_list[page_index], &bo->size_list);
-        insert_at_tail(&cache->time_list, &bo->time_list);
+        list_addtail(&bo->size_list, &cache->size_list[page_index]);
+        list_addtail(&bo->time_list, &cache->time_list);
+        cache->bo_count++;
+        cache->bo_size += bo->size;
+        if (dump_stats) {
+                fprintf(stderr, "Freed %s %dkb to cache:\n",
+                        bo->name, bo->size / 1024);
+                vc4_bo_dump_stats(screen);
+        }
+        bo->name = NULL;
 
         free_stale_bos(screen, time);
 }
@@ -286,20 +375,63 @@
                         bo->handle);
                 return -1;
         }
+        bo->private = false;
 
         return fd;
 }
 
 struct vc4_bo *
-vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data, uint32_t size,
-                 const char *name)
+vc4_bo_alloc_shader(struct vc4_screen *screen, const void *data, uint32_t size)
 {
-        void *map;
         struct vc4_bo *bo;
+        int ret;
+
+        bo = CALLOC_STRUCT(vc4_bo);
+        if (!bo)
+                return NULL;
+
+        pipe_reference_init(&bo->reference, 1);
+        bo->screen = screen;
+        bo->size = align(size, 4096);
+        bo->name = "code";
+        bo->private = false; /* Make sure it doesn't go back to the cache. */
+
+        if (!using_vc4_simulator) {
+                struct drm_vc4_create_shader_bo create = {
+                        .size = size,
+                        .data = (uintptr_t)data,
+                };
+
+                ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_CREATE_SHADER_BO,
+                               &create);
+                bo->handle = create.handle;
+        } else {
+                struct drm_mode_create_dumb create;
+                memset(&create, 0, sizeof(create));
+
+                create.width = 128;
+                create.bpp = 8;
+                create.height = (size + 127) / 128;
+
+                ret = drmIoctl(screen->fd, DRM_IOCTL_MODE_CREATE_DUMB, &create);
+                bo->handle = create.handle;
+                assert(create.size >= size);
+
+                vc4_bo_map(bo);
+                memcpy(bo->map, data, size);
+        }
+        if (ret != 0) {
+                fprintf(stderr, "create shader ioctl failure\n");
+                abort();
+        }
+
+        screen->bo_count++;
+        screen->bo_size += bo->size;
+        if (dump_stats) {
+                fprintf(stderr, "Allocated shader %dkb:\n", size / 1024);
+                vc4_bo_dump_stats(screen);
+        }
 
-        bo = vc4_bo_alloc(screen, size, name);
-        map = vc4_bo_map(bo);
-        memcpy(map, data, size);
         return bo;
 }
 
@@ -323,60 +455,91 @@
         return true;
 }
 
+static int vc4_wait_seqno_ioctl(int fd, uint64_t seqno, uint64_t timeout_ns)
+{
+        if (using_vc4_simulator)
+                return 0;
+
+        struct drm_vc4_wait_seqno wait = {
+                .seqno = seqno,
+                .timeout_ns = timeout_ns,
+        };
+        int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait);
+        if (ret == -1)
+                return -errno;
+        else
+                return 0;
+
+}
+
 bool
-vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns)
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+               const char *reason)
 {
         if (screen->finished_seqno >= seqno)
                 return true;
 
-        struct drm_vc4_wait_seqno wait;
-        memset(&wait, 0, sizeof(wait));
-        wait.seqno = seqno;
-        wait.timeout_ns = timeout_ns;
-
-        int ret;
-        if (!using_vc4_simulator)
-                ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait);
-        else {
-                wait.seqno = screen->finished_seqno;
-                ret = 0;
+        if (unlikely(vc4_debug & VC4_DEBUG_PERF) && timeout_ns && reason) {
+                if (vc4_wait_seqno_ioctl(screen->fd, seqno, 0) == -ETIME) {
+                        fprintf(stderr, "Blocking on seqno %lld for %s\n",
+                                (long long)seqno, reason);
+                }
         }
 
-        if (ret == -ETIME) {
+        int ret = vc4_wait_seqno_ioctl(screen->fd, seqno, timeout_ns);
+        if (ret) {
+                if (ret != -ETIME) {
+                        fprintf(stderr, "wait failed: %d\n", ret);
+                        abort();
+                }
+
                 return false;
-        } else if (ret != 0) {
-                fprintf(stderr, "wait failed\n");
-                abort();
-        } else {
-                screen->finished_seqno = wait.seqno;
-                return true;
         }
+
+        screen->finished_seqno = seqno;
+        return true;
+}
+
+static int vc4_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns)
+{
+        if (using_vc4_simulator)
+                return 0;
+
+        struct drm_vc4_wait_bo wait = {
+                .handle = handle,
+                .timeout_ns = timeout_ns,
+        };
+        int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_BO, &wait);
+        if (ret == -1)
+                return -errno;
+        else
+                return 0;
+
 }
 
 bool
-vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns)
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns, const char *reason)
 {
         struct vc4_screen *screen = bo->screen;
 
-        struct drm_vc4_wait_bo wait;
-        memset(&wait, 0, sizeof(wait));
-        wait.handle = bo->handle;
-        wait.timeout_ns = timeout_ns;
+        if (unlikely(vc4_debug & VC4_DEBUG_PERF) && timeout_ns && reason) {
+                if (vc4_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) {
+                        fprintf(stderr, "Blocking on %s BO for %s\n",
+                                bo->name, reason);
+                }
+        }
 
-        int ret;
-        if (!using_vc4_simulator)
-                ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_BO, &wait);
-        else
-                ret = 0;
+        int ret = vc4_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns);
+        if (ret) {
+                if (ret != -ETIME) {
+                        fprintf(stderr, "wait failed: %d\n", ret);
+                        abort();
+                }
 
-        if (ret == -ETIME) {
                 return false;
-        } else if (ret != 0) {
-                fprintf(stderr, "wait failed\n");
-                abort();
-        } else {
-                return true;
         }
+
+        return true;
 }
 
 void *
@@ -422,7 +585,7 @@
 {
         void *map = vc4_bo_map_unsynchronized(bo);
 
-        bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE);
+        bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map");
         if (!ok) {
                 fprintf(stderr, "BO wait for map failed\n");
                 abort();
@@ -437,12 +600,14 @@
         struct vc4_screen *screen = vc4_screen(pscreen);
         struct vc4_bo_cache *cache = &screen->bo_cache;
 
-        while (!is_empty_list(&cache->time_list)) {
-                struct simple_node *node = first_elem(&cache->time_list);
-                struct vc4_bo *bo = container_of(node, struct vc4_bo, time_list);
-
-                remove_from_list(&bo->time_list);
-                remove_from_list(&bo->size_list);
+        list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list,
+                                 time_list) {
+                vc4_bo_remove_from_cache(cache, bo);
                 vc4_bo_free(bo);
         }
+
+        if (dump_stats) {
+                fprintf(stderr, "BO stats after screen destroy:\n");
+                vc4_bo_dump_stats(screen);
+        }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_bufmgr.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_bufmgr.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_bufmgr.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_bufmgr.h	2015-09-16 14:36:09.000000000 +0000
@@ -44,9 +44,9 @@
 #endif
 
         /** Entry in the linked list of buffers freed, by age. */
-        struct simple_node time_list;
+        struct list_head time_list;
         /** Entry in the per-page-count linked list of buffers freed (by age). */
-        struct simple_node size_list;
+        struct list_head size_list;
         /** Approximate second when the bo was freed. */
         time_t free_time;
         /**
@@ -58,8 +58,8 @@
 
 struct vc4_bo *vc4_bo_alloc(struct vc4_screen *screen, uint32_t size,
                             const char *name);
-struct vc4_bo *vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data,
-                                uint32_t size, const char *name);
+struct vc4_bo *vc4_bo_alloc_shader(struct vc4_screen *screen, const void *data,
+                                   uint32_t size);
 void vc4_bo_last_unreference(struct vc4_bo *bo);
 void vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time);
 struct vc4_bo *vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
@@ -113,10 +113,11 @@
 vc4_bo_map_unsynchronized(struct vc4_bo *bo);
 
 bool
-vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns);
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns, const char *reason);
 
 bool
-vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns);
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+               const char *reason);
 
 void
 vc4_bufmgr_destroy(struct pipe_screen *pscreen);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_cl.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_cl.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_cl.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_cl.c	2015-09-16 14:36:09.000000000 +0000
@@ -36,11 +36,12 @@
 void
 cl_ensure_space(struct vc4_cl *cl, uint32_t space)
 {
-        if ((cl->next - cl->base) + space <= cl->size)
+        uint32_t offset = cl_offset(cl);
+
+        if (offset + space <= cl->size)
                 return;
 
         uint32_t size = MAX2(cl->size + space, cl->size * 2);
-        uint32_t offset = cl->next -cl->base;
 
         cl->base = reralloc(ralloc_parent(cl->base), cl->base, uint8_t, size);
         cl->size = size;
@@ -60,15 +61,20 @@
         uint32_t hindex;
         uint32_t *current_handles = vc4->bo_handles.base;
 
-        for (hindex = 0;
-             hindex < (vc4->bo_handles.next - vc4->bo_handles.base) / 4;
-             hindex++) {
+        for (hindex = 0; hindex < cl_offset(&vc4->bo_handles) / 4; hindex++) {
                 if (current_handles[hindex] == bo->handle)
                         return hindex;
         }
 
-        cl_u32(&vc4->bo_handles, bo->handle);
-        cl_ptr(&vc4->bo_pointers, vc4_bo_reference(bo));
+        struct vc4_cl_out *out;
+
+        out = cl_start(&vc4->bo_handles);
+        cl_u32(&out, bo->handle);
+        cl_end(&vc4->bo_handles, out);
+
+        out = cl_start(&vc4->bo_pointers);
+        cl_ptr(&out, vc4_bo_reference(bo));
+        cl_end(&vc4->bo_pointers, out);
 
         return hindex;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_cl_dump.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_cl_dump.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_cl_dump.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_cl_dump.c	2015-09-16 14:36:09.000000000 +0000
@@ -34,7 +34,7 @@
         void *f = cl + offset;
 
         fprintf(stderr, "0x%08x 0x%08x:      %f (0x%08x)\n",
-                offset, hw_offset, *(float *)f, *(uint32_t *)f);
+                offset, hw_offset, uif(*(uint32_t *)f), *(uint32_t *)f);
 }
 
 static void
@@ -47,7 +47,33 @@
 }
 
 static void
-dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+dump_loadstore_full(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+        uint32_t bits = *(uint32_t *)(cl + offset);
+
+        fprintf(stderr, "0x%08x 0x%08x:      addr 0x%08x%s%s%s%s\n",
+                offset, hw_offset,
+                bits & ~0xf,
+                (bits & VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL) ? "" : " clear",
+                (bits & VC4_LOADSTORE_FULL_RES_DISABLE_ZS) ? "" : " zs",
+                (bits & VC4_LOADSTORE_FULL_RES_DISABLE_COLOR) ? "" : " color",
+                (bits & VC4_LOADSTORE_FULL_RES_EOF) ? " eof" : "");
+}
+
+static void
+dump_VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+        dump_loadstore_full(cl, offset, hw_offset);
+}
+
+static void
+dump_VC4_PACKET_STORE_FULL_RES_TILE_BUFFER(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+        dump_loadstore_full(cl, offset, hw_offset);
+}
+
+static void
+dump_loadstore_general(void *cl, uint32_t offset, uint32_t hw_offset)
 {
         uint8_t *bytes = cl + offset;
         uint32_t *addr = cl + offset + 2;
@@ -125,6 +151,18 @@
 }
 
 static void
+dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+        dump_loadstore_general(cl, offset, hw_offset);
+}
+
+static void
+dump_VC4_PACKET_LOAD_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+        dump_loadstore_general(cl, offset, hw_offset);
+}
+
+static void
 dump_VC4_PACKET_FLAT_SHADE_FLAGS(void *cl, uint32_t offset, uint32_t hw_offset)
 {
         uint32_t *bits = cl + offset;
@@ -174,6 +212,37 @@
 }
 
 static void
+dump_VC4_PACKET_TILE_BINNING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+        uint32_t *tile_alloc_addr = cl + offset;
+        uint32_t *tile_alloc_size = cl + offset + 4;
+        uint32_t *tile_state_addr = cl + offset + 8;
+        uint8_t *bin_x = cl + offset + 12;
+        uint8_t *bin_y = cl + offset + 13;
+        uint8_t *flags = cl + offset + 14;
+
+        fprintf(stderr, "0x%08x 0x%08x:       tile alloc addr 0x%08x\n",
+                offset, hw_offset,
+                *tile_alloc_addr);
+
+        fprintf(stderr, "0x%08x 0x%08x:       tile alloc size %db\n",
+                offset + 4, hw_offset + 4,
+                *tile_alloc_size);
+
+        fprintf(stderr, "0x%08x 0x%08x:       tile state addr 0x%08x\n",
+                offset + 8, hw_offset + 8,
+                *tile_state_addr);
+
+        fprintf(stderr, "0x%08x 0x%08x:       tiles (%d, %d)\n",
+                offset + 12, hw_offset + 12,
+                *bin_x, *bin_y);
+
+        fprintf(stderr, "0x%08x 0x%08x:       flags 0x%02x\n",
+                offset + 14, hw_offset + 14,
+                *flags);
+}
+
+static void
 dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(void *cl, uint32_t offset, uint32_t hw_offset)
 {
         uint32_t *render_offset = cl + offset;
@@ -260,63 +329,63 @@
                 offset, hw_offset, handles[0], handles[1]);
 }
 
-#define PACKET_DUMP(name, size) [name] = { #name, size, dump_##name }
-#define PACKET(name, size) [name] = { #name, size, NULL }
+#define PACKET_DUMP(name) [name] = { #name, name ## _SIZE, dump_##name }
+#define PACKET(name) [name] = { #name, name ## _SIZE, NULL }
 
 static const struct packet_info {
         const char *name;
         uint8_t size;
         void (*dump_func)(void *cl, uint32_t offset, uint32_t hw_offset);
 } packet_info[] = {
-        PACKET(VC4_PACKET_HALT, 1),
-        PACKET(VC4_PACKET_NOP, 1),
+        PACKET(VC4_PACKET_HALT),
+        PACKET(VC4_PACKET_NOP),
 
-        PACKET(VC4_PACKET_FLUSH, 1),
-        PACKET(VC4_PACKET_FLUSH_ALL, 1),
-        PACKET(VC4_PACKET_START_TILE_BINNING, 1),
-        PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, 1),
-        PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE, 1),
-
-        PACKET(VC4_PACKET_BRANCH, 5),
-        PACKET_DUMP(VC4_PACKET_BRANCH_TO_SUB_LIST, 5),
-
-        PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER, 1),
-        PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF, 1),
-        PACKET(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER, 5),
-        PACKET(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER, 5),
-        PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL, 7),
-        PACKET(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL, 7),
-
-        PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, 14),
-        PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, 10),
-
-        PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE, 48),
-        PACKET(VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE, 49),
-
-        PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, 2),
-
-        PACKET(VC4_PACKET_GL_SHADER_STATE, 5),
-        PACKET(VC4_PACKET_NV_SHADER_STATE, 5),
-        PACKET(VC4_PACKET_VG_SHADER_STATE, 5),
-
-        PACKET(VC4_PACKET_CONFIGURATION_BITS, 4),
-        PACKET_DUMP(VC4_PACKET_FLAT_SHADE_FLAGS, 5),
-        PACKET_DUMP(VC4_PACKET_POINT_SIZE, 5),
-        PACKET_DUMP(VC4_PACKET_LINE_WIDTH, 5),
-        PACKET(VC4_PACKET_RHT_X_BOUNDARY, 3),
-        PACKET(VC4_PACKET_DEPTH_OFFSET, 5),
-        PACKET(VC4_PACKET_CLIP_WINDOW, 9),
-        PACKET_DUMP(VC4_PACKET_VIEWPORT_OFFSET, 5),
-        PACKET(VC4_PACKET_Z_CLIPPING, 9),
-        PACKET_DUMP(VC4_PACKET_CLIPPER_XY_SCALING, 9),
-        PACKET_DUMP(VC4_PACKET_CLIPPER_Z_SCALING, 9),
-
-        PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 16),
-        PACKET_DUMP(VC4_PACKET_TILE_RENDERING_MODE_CONFIG, 11),
-        PACKET(VC4_PACKET_CLEAR_COLORS, 14),
-        PACKET_DUMP(VC4_PACKET_TILE_COORDINATES, 3),
+        PACKET(VC4_PACKET_FLUSH),
+        PACKET(VC4_PACKET_FLUSH_ALL),
+        PACKET(VC4_PACKET_START_TILE_BINNING),
+        PACKET(VC4_PACKET_INCREMENT_SEMAPHORE),
+        PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE),
+
+        PACKET(VC4_PACKET_BRANCH),
+        PACKET_DUMP(VC4_PACKET_BRANCH_TO_SUB_LIST),
+
+        PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER),
+        PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF),
+        PACKET_DUMP(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER),
+        PACKET_DUMP(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER),
+        PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL),
+        PACKET_DUMP(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL),
+
+        PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE),
+        PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE),
+
+        PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE),
+        PACKET(VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE),
+
+        PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT),
+
+        PACKET(VC4_PACKET_GL_SHADER_STATE),
+        PACKET(VC4_PACKET_NV_SHADER_STATE),
+        PACKET(VC4_PACKET_VG_SHADER_STATE),
+
+        PACKET(VC4_PACKET_CONFIGURATION_BITS),
+        PACKET_DUMP(VC4_PACKET_FLAT_SHADE_FLAGS),
+        PACKET_DUMP(VC4_PACKET_POINT_SIZE),
+        PACKET_DUMP(VC4_PACKET_LINE_WIDTH),
+        PACKET(VC4_PACKET_RHT_X_BOUNDARY),
+        PACKET(VC4_PACKET_DEPTH_OFFSET),
+        PACKET(VC4_PACKET_CLIP_WINDOW),
+        PACKET_DUMP(VC4_PACKET_VIEWPORT_OFFSET),
+        PACKET(VC4_PACKET_Z_CLIPPING),
+        PACKET_DUMP(VC4_PACKET_CLIPPER_XY_SCALING),
+        PACKET_DUMP(VC4_PACKET_CLIPPER_Z_SCALING),
+
+        PACKET_DUMP(VC4_PACKET_TILE_BINNING_MODE_CONFIG),
+        PACKET_DUMP(VC4_PACKET_TILE_RENDERING_MODE_CONFIG),
+        PACKET(VC4_PACKET_CLEAR_COLORS),
+        PACKET_DUMP(VC4_PACKET_TILE_COORDINATES),
 
-        PACKET_DUMP(VC4_PACKET_GEM_HANDLES, 9),
+        PACKET_DUMP(VC4_PACKET_GEM_HANDLES),
 };
 
 void
@@ -328,7 +397,7 @@
         while (offset < size) {
                 uint8_t header = cmds[offset];
 
-                if (header > ARRAY_SIZE(packet_info) ||
+                if (header >= ARRAY_SIZE(packet_info) ||
                     !packet_info[header].name) {
                         fprintf(stderr, "0x%08x 0x%08x: Unknown packet 0x%02x (%d)!\n",
                                 offset, hw_offset, header, header);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_cl.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_cl.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_cl.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_cl.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,16 +29,24 @@
 #include "util/u_math.h"
 #include "util/macros.h"
 
-#include "vc4_packet.h"
+#include "kernel/vc4_packet.h"
 
 struct vc4_bo;
 
+/**
+ * Undefined structure, used for typechecking that you're passing the pointers
+ * to these functions correctly.
+ */
+struct vc4_cl_out;
+
 struct vc4_cl {
         void *base;
-        void *next;
+        struct vc4_cl_out *next;
+        struct vc4_cl_out *reloc_next;
         uint32_t size;
-        uint32_t reloc_next;
+#ifdef DEBUG
         uint32_t reloc_count;
+#endif
 };
 
 void vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl);
@@ -49,135 +57,149 @@
 struct PACKED unaligned_16 { uint16_t x; };
 struct PACKED unaligned_32 { uint32_t x; };
 
-static inline void
-put_unaligned_32(void *ptr, uint32_t val)
+static inline uint32_t cl_offset(struct vc4_cl *cl)
 {
-        struct unaligned_32 *p = ptr;
-        p->x = val;
+        return (char *)cl->next - (char *)cl->base;
 }
 
 static inline void
-put_unaligned_16(void *ptr, uint16_t val)
+cl_advance(struct vc4_cl_out **cl, uint32_t n)
 {
-        struct unaligned_16 *p = ptr;
-        p->x = val;
+        (*cl) = (struct vc4_cl_out *)((char *)(*cl) + n);
 }
 
-static inline void
-cl_u8(struct vc4_cl *cl, uint8_t n)
+static inline struct vc4_cl_out *
+cl_start(struct vc4_cl *cl)
 {
-        assert((cl->next - cl->base) + 1 <= cl->size);
-
-        *(uint8_t *)cl->next = n;
-        cl->next++;
+        return cl->next;
 }
 
 static inline void
-cl_u16(struct vc4_cl *cl, uint16_t n)
+cl_end(struct vc4_cl *cl, struct vc4_cl_out *next)
 {
-        assert((cl->next - cl->base) + 2 <= cl->size);
+        cl->next = next;
+        assert(cl_offset(cl) <= cl->size);
+}
 
-        put_unaligned_16(cl->next, n);
-        cl->next += 2;
+
+static inline void
+put_unaligned_32(struct vc4_cl_out *ptr, uint32_t val)
+{
+        struct unaligned_32 *p = (void *)ptr;
+        p->x = val;
 }
 
 static inline void
-cl_u32(struct vc4_cl *cl, uint32_t n)
+put_unaligned_16(struct vc4_cl_out *ptr, uint16_t val)
 {
-        assert((cl->next - cl->base) + 4 <= cl->size);
+        struct unaligned_16 *p = (void *)ptr;
+        p->x = val;
+}
 
-        put_unaligned_32(cl->next, n);
-        cl->next += 4;
+static inline void
+cl_u8(struct vc4_cl_out **cl, uint8_t n)
+{
+        *(uint8_t *)(*cl) = n;
+        cl_advance(cl, 1);
 }
 
 static inline void
-cl_aligned_u32(struct vc4_cl *cl, uint32_t n)
+cl_u16(struct vc4_cl_out **cl, uint16_t n)
 {
-        assert((cl->next - cl->base) + 4 <= cl->size);
+        put_unaligned_16(*cl, n);
+        cl_advance(cl, 2);
+}
 
-        *(uint32_t *)cl->next = n;
-        cl->next += 4;
+static inline void
+cl_u32(struct vc4_cl_out **cl, uint32_t n)
+{
+        put_unaligned_32(*cl, n);
+        cl_advance(cl, 4);
 }
 
 static inline void
-cl_ptr(struct vc4_cl *cl, void *ptr)
+cl_aligned_u32(struct vc4_cl_out **cl, uint32_t n)
 {
-        assert((cl->next - cl->base) + sizeof(void *) <= cl->size);
+        *(uint32_t *)(*cl) = n;
+        cl_advance(cl, 4);
+}
 
-        *(void **)cl->next = ptr;
-        cl->next += sizeof(void *);
+static inline void
+cl_ptr(struct vc4_cl_out **cl, void *ptr)
+{
+        *(struct vc4_cl_out **)(*cl) = ptr;
+        cl_advance(cl, sizeof(void *));
 }
 
 static inline void
-cl_f(struct vc4_cl *cl, float f)
+cl_f(struct vc4_cl_out **cl, float f)
 {
         cl_u32(cl, fui(f));
 }
 
 static inline void
-cl_aligned_f(struct vc4_cl *cl, float f)
+cl_aligned_f(struct vc4_cl_out **cl, float f)
 {
         cl_aligned_u32(cl, fui(f));
 }
 
 static inline void
-cl_start_reloc(struct vc4_cl *cl, uint32_t n)
+cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
 {
         assert(n == 1 || n == 2);
+#ifdef DEBUG
         assert(cl->reloc_count == 0);
         cl->reloc_count = n;
+#endif
 
-        cl_u8(cl, VC4_PACKET_GEM_HANDLES);
-        cl->reloc_next = cl->next - cl->base;
-        cl_u32(cl, 0); /* Space where hindex will be written. */
-        cl_u32(cl, 0); /* Space where hindex will be written. */
+        cl_u8(out, VC4_PACKET_GEM_HANDLES);
+        cl->reloc_next = *out;
+        cl_u32(out, 0); /* Space where hindex will be written. */
+        cl_u32(out, 0); /* Space where hindex will be written. */
 }
 
-static inline void
+static inline struct vc4_cl_out *
 cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
 {
+#ifdef DEBUG
         assert(cl->reloc_count == 0);
         cl->reloc_count = n;
-        cl->reloc_next = cl->next - cl->base;
+#endif
+        cl->reloc_next = cl->next;
+
+        /* Reserve the space where hindex will be written. */
+        cl_advance(&cl->next, n * 4);
 
-        /* Space where hindex will be written. */
-        cl->next += n * 4;
+        return cl->next;
 }
 
 static inline void
-cl_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset)
+cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
+         struct vc4_bo *bo, uint32_t offset)
 {
-        *(uint32_t *)(cl->base + cl->reloc_next) = hindex;
-        cl->reloc_next += 4;
+        *(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
+        cl_advance(&cl->reloc_next, 4);
 
+#ifdef DEBUG
         cl->reloc_count--;
+#endif
 
-        cl_u32(cl, offset);
+        cl_u32(cl_out, offset);
 }
 
 static inline void
-cl_aligned_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset)
+cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
+                 struct vc4_cl_out **cl_out,
+                 struct vc4_bo *bo, uint32_t offset)
 {
-        *(uint32_t *)(cl->base + cl->reloc_next) = hindex;
-        cl->reloc_next += 4;
+        *(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
+        cl_advance(&cl->reloc_next, 4);
 
+#ifdef DEBUG
         cl->reloc_count--;
+#endif
 
-        cl_aligned_u32(cl, offset);
-}
-
-static inline void
-cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
-         struct vc4_bo *bo, uint32_t offset)
-{
-        cl_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset);
-}
-
-static inline void
-cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
-         struct vc4_bo *bo, uint32_t offset)
-{
-        cl_aligned_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset);
+        cl_aligned_u32(cl_out, offset);
 }
 
 void cl_ensure_space(struct vc4_cl *cl, uint32_t size);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -29,6 +29,7 @@
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "util/u_blitter.h"
+#include "util/u_upload_mgr.h"
 #include "indices/u_primconvert.h"
 #include "pipe/p_screen.h"
 
@@ -36,270 +37,12 @@
 #include "vc4_context.h"
 #include "vc4_resource.h"
 
-/**
- * Emits a no-op STORE_TILE_BUFFER_GENERAL.
- *
- * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
- * some sort before another load is triggered.
- */
-static void
-vc4_store_before_load(struct vc4_context *vc4, bool *coords_emitted)
-{
-        if (!*coords_emitted)
-                return;
-
-        cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
-        cl_u8(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
-        cl_u8(&vc4->rcl, (VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
-                          VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
-                          VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR));
-        cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
-
-        *coords_emitted = false;
-}
-
-/**
- * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
- *
- * The tile coordinates packet triggers a pending load if there is one, are
- * used for clipping during rendering, and determine where loads/stores happen
- * relative to their base address.
- */
-static void
-vc4_tile_coordinates(struct vc4_context *vc4, uint32_t x, uint32_t y,
-                       bool *coords_emitted)
-{
-        if (*coords_emitted)
-                return;
-
-        cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
-        cl_u8(&vc4->rcl, x);
-        cl_u8(&vc4->rcl, y);
-
-        *coords_emitted = true;
-}
-
-static void
-vc4_setup_rcl(struct vc4_context *vc4)
-{
-        struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
-        struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
-        struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
-        struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL;
-
-        if (!csurf)
-                vc4->resolve &= ~PIPE_CLEAR_COLOR0;
-        if (!zsurf)
-                vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
-        uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared;
-        uint32_t width = vc4->framebuffer.width;
-        uint32_t height = vc4->framebuffer.height;
-        uint32_t stride_in_tiles = align(width, 64) / 64;
-
-        assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
-        uint32_t min_x_tile = vc4->draw_min_x / 64;
-        uint32_t min_y_tile = vc4->draw_min_y / 64;
-        uint32_t max_x_tile = (vc4->draw_max_x - 1) / 64;
-        uint32_t max_y_tile = (vc4->draw_max_y - 1) / 64;
-        uint32_t xtiles = max_x_tile - min_x_tile + 1;
-        uint32_t ytiles = max_y_tile - min_y_tile + 1;
-
-#if 0
-        fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
-                vc4->resolve,
-                vc4->cleared,
-                resolve_uncleared);
-#endif
-
-        uint32_t reloc_size = 9;
-        uint32_t clear_size = 14;
-        uint32_t config_size = 11 + reloc_size;
-        uint32_t loadstore_size = 7 + reloc_size;
-        uint32_t tilecoords_size = 3;
-        uint32_t branch_size = 5 + reloc_size;
-        uint32_t color_store_size = 1;
-        uint32_t semaphore_size = 1;
-        cl_ensure_space(&vc4->rcl,
-                        clear_size +
-                        config_size +
-                        loadstore_size +
-                        semaphore_size +
-                        xtiles * ytiles * (loadstore_size * 4 +
-                                           tilecoords_size * 3 +
-                                           branch_size +
-                                           color_store_size));
-
-        if (vc4->cleared) {
-                cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
-                cl_u32(&vc4->rcl, vc4->clear_color[0]);
-                cl_u32(&vc4->rcl, vc4->clear_color[1]);
-                cl_u32(&vc4->rcl, vc4->clear_depth);
-                cl_u8(&vc4->rcl, vc4->clear_stencil);
-        }
-
-        /* The rendering mode config determines the pointer that's used for
-         * VC4_PACKET_STORE_MS_TILE_BUFFER address computations.  The kernel
-         * could handle a no-relocation rendering mode config and deny those
-         * packets, but instead we just tell the kernel we're doing our color
-         * rendering to the Z buffer, and just don't emit any of those
-         * packets.
-         */
-        struct vc4_surface *render_surf = csurf ? csurf : zsurf;
-        struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
-        cl_start_reloc(&vc4->rcl, 1);
-        cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
-        cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
-        cl_u16(&vc4->rcl, width);
-        cl_u16(&vc4->rcl, height);
-        cl_u16(&vc4->rcl, ((render_surf->tiling <<
-                            VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
-                           (vc4_rt_format_is_565(render_surf->base.format) ?
-                            VC4_RENDER_CONFIG_FORMAT_BGR565 :
-                            VC4_RENDER_CONFIG_FORMAT_RGBA8888)));
-
-        /* The tile buffer normally gets cleared when the previous tile is
-         * stored.  If the clear values changed between frames, then the tile
-         * buffer has stale clear values in it, so we have to do a store in
-         * None mode (no writes) so that we trigger the tile buffer clear.
-         *
-         * Excess clearing is only a performance cost, since per-tile contents
-         * will be loaded/stored in the loop below.
-         */
-        if (vc4->cleared & (PIPE_CLEAR_COLOR0 |
-                            PIPE_CLEAR_DEPTH |
-                            PIPE_CLEAR_STENCIL)) {
-                cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
-                cl_u8(&vc4->rcl, 0);
-                cl_u8(&vc4->rcl, 0);
-
-                cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
-                cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
-                cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
-        }
-
-        uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0;
-        uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0;
-        uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc);
-
-        for (int y = min_y_tile; y <= max_y_tile; y++) {
-                for (int x = min_x_tile; x <= max_x_tile; x++) {
-                        bool end_of_frame = (x == max_x_tile &&
-                                             y == max_y_tile);
-                        bool coords_emitted = false;
-
-                        /* Note that the load doesn't actually occur until the
-                         * tile coords packet is processed, and only one load
-                         * may be outstanding at a time.
-                         */
-                        if (resolve_uncleared & PIPE_CLEAR_COLOR) {
-                                vc4_store_before_load(vc4, &coords_emitted);
-
-                                cl_start_reloc(&vc4->rcl, 1);
-                                cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
-                                cl_u8(&vc4->rcl,
-                                      VC4_LOADSTORE_TILE_BUFFER_COLOR |
-                                      (csurf->tiling <<
-                                       VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
-                                cl_u8(&vc4->rcl,
-                                      vc4_rt_format_is_565(csurf->base.format) ?
-                                      VC4_LOADSTORE_TILE_BUFFER_BGR565 :
-                                      VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
-                                cl_reloc_hindex(&vc4->rcl, color_hindex,
-                                                csurf->offset);
-
-                                vc4_tile_coordinates(vc4, x, y, &coords_emitted);
-                        }
-
-                        if (resolve_uncleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
-                                vc4_store_before_load(vc4, &coords_emitted);
-
-                                cl_start_reloc(&vc4->rcl, 1);
-                                cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
-                                cl_u8(&vc4->rcl,
-                                      VC4_LOADSTORE_TILE_BUFFER_ZS |
-                                      (zsurf->tiling <<
-                                       VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
-                                cl_u8(&vc4->rcl, 0);
-                                cl_reloc_hindex(&vc4->rcl, depth_hindex,
-                                                zsurf->offset);
-
-                                vc4_tile_coordinates(vc4, x, y, &coords_emitted);
-                        }
-
-                        /* Clipping depends on tile coordinates having been
-                         * emitted, so make sure it's happened even if
-                         * everything was cleared to start.
-                         */
-                        vc4_tile_coordinates(vc4, x, y, &coords_emitted);
-
-                        /* Wait for the binner before jumping to the first
-                         * tile's lists.
-                         */
-                        if (x == min_x_tile && y == min_y_tile)
-                                cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE);
-
-                        cl_start_reloc(&vc4->rcl, 1);
-                        cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
-                        cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex,
-                                        (y * stride_in_tiles + x) * 32);
-
-                        if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
-                                vc4_tile_coordinates(vc4, x, y, &coords_emitted);
-
-                                cl_start_reloc(&vc4->rcl, 1);
-                                cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
-                                cl_u8(&vc4->rcl,
-                                      VC4_LOADSTORE_TILE_BUFFER_ZS |
-                                      (zsurf->tiling <<
-                                       VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
-                                cl_u8(&vc4->rcl,
-                                      VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
-                                cl_reloc_hindex(&vc4->rcl, depth_hindex,
-                                                zsurf->offset |
-                                                ((end_of_frame &&
-                                                  !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
-                                                 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
-
-                                coords_emitted = false;
-                        }
-
-                        if (vc4->resolve & PIPE_CLEAR_COLOR0) {
-                                vc4_tile_coordinates(vc4, x, y, &coords_emitted);
-                                if (end_of_frame) {
-                                        cl_u8(&vc4->rcl,
-                                              VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
-                                } else {
-                                        cl_u8(&vc4->rcl,
-                                              VC4_PACKET_STORE_MS_TILE_BUFFER);
-                                }
-
-                                coords_emitted = false;
-                        }
-
-                        /* One of the bits needs to have been set that would
-                         * have triggered an EOF.
-                         */
-                        assert(vc4->resolve & (PIPE_CLEAR_COLOR0 |
-                                               PIPE_CLEAR_DEPTH |
-                                               PIPE_CLEAR_STENCIL));
-                        /* Any coords emitted must also have been consumed by
-                         * a store.
-                         */
-                        assert(!coords_emitted);
-                }
-        }
-
-        if (vc4->resolve & PIPE_CLEAR_COLOR0)
-                ctex->writes++;
-
-        if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
-                ztex->writes++;
-}
-
 void
 vc4_flush(struct pipe_context *pctx)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
+        struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
+        struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
 
         if (!vc4->needs_flush)
                 return;
@@ -318,11 +61,37 @@
          * FLUSH completes.
          */
         cl_ensure_space(&vc4->bcl, 8);
-        cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
+        struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
+        cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
         /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
-        cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
+        cl_u8(&bcl, VC4_PACKET_FLUSH);
+        cl_end(&vc4->bcl, bcl);
 
-        vc4_setup_rcl(vc4);
+        if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) {
+                pipe_surface_reference(&vc4->color_write, cbuf);
+                if (!(vc4->cleared & PIPE_CLEAR_COLOR0)) {
+                        pipe_surface_reference(&vc4->color_read, cbuf);
+                } else {
+                        pipe_surface_reference(&vc4->color_read, NULL);
+                }
+
+        } else {
+                pipe_surface_reference(&vc4->color_write, NULL);
+                pipe_surface_reference(&vc4->color_read, NULL);
+        }
+
+        if (vc4->framebuffer.zsbuf &&
+            (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
+                pipe_surface_reference(&vc4->zs_write, zsbuf);
+                if (!(vc4->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
+                        pipe_surface_reference(&vc4->zs_read, zsbuf);
+                } else {
+                        pipe_surface_reference(&vc4->zs_read, NULL);
+                }
+        } else {
+                pipe_surface_reference(&vc4->zs_write, NULL);
+                pipe_surface_reference(&vc4->zs_read, NULL);
+        }
 
         vc4_job_submit(vc4);
 }
@@ -336,8 +105,10 @@
         vc4_flush(pctx);
 
         if (fence) {
+                struct pipe_screen *screen = pctx->screen;
                 struct vc4_fence *f = vc4_fence_create(vc4->screen,
                                                        vc4->last_emit_seqno);
+                screen->fence_reference(screen, fence, NULL);
                 *fence = (struct pipe_fence_handle *)f;
         }
 }
@@ -359,8 +130,7 @@
          * they match.
          */
         struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
-        for (int i = 0; i < (vc4->bo_handles.next -
-                             vc4->bo_handles.base) / 4; i++) {
+        for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
                 if (referenced_bos[i] == bo) {
                         return true;
                 }
@@ -410,12 +180,13 @@
         if (vc4->primconvert)
                 util_primconvert_destroy(vc4->primconvert);
 
+        if (vc4->uploader)
+                u_upload_destroy(vc4->uploader);
+
         util_slab_destroy(&vc4->transfer_pool);
 
         pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
         pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL);
-        vc4_bo_unreference(&vc4->tile_alloc);
-        vc4_bo_unreference(&vc4->tile_state);
 
         vc4_program_fini(pctx);
 
@@ -466,6 +237,9 @@
         if (!vc4->primconvert)
                 goto fail;
 
+        vc4->uploader = u_upload_create(pctx, 16 * 1024, 4,
+                                        PIPE_BIND_INDEX_BUFFER);
+
         vc4_debug |= saved_shaderdb_flag;
 
         return &vc4->base;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_context.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -67,7 +67,20 @@
 #define VC4_DIRTY_CLIP          (1 << 20)
 #define VC4_DIRTY_UNCOMPILED_VS (1 << 21)
 #define VC4_DIRTY_UNCOMPILED_FS (1 << 22)
-#define VC4_DIRTY_COMPILED_FS   (1 << 24)
+#define VC4_DIRTY_COMPILED_CS   (1 << 23)
+#define VC4_DIRTY_COMPILED_VS   (1 << 24)
+#define VC4_DIRTY_COMPILED_FS   (1 << 25)
+
+struct vc4_sampler_view {
+        struct pipe_sampler_view base;
+        uint32_t texture_p0;
+        uint32_t texture_p1;
+};
+
+struct vc4_sampler_state {
+        struct pipe_sampler_state base;
+        uint32_t texture_p1;
+};
 
 struct vc4_texture_stateobj {
         struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
@@ -121,6 +134,12 @@
         struct vc4_ubo_range *ubo_ranges;
         uint32_t num_ubo_ranges;
         uint32_t ubo_size;
+        /**
+         * VC4_DIRTY_* flags that, when set in vc4->dirty, mean that the
+         * uniforms have to be rewritten (and therefore the shader state
+         * reemitted).
+         */
+        uint32_t uniform_dirty_bits;
 
         /** bitmask of which inputs are color inputs, for flat shade handling. */
         uint32_t color_inputs;
@@ -178,12 +197,18 @@
         struct vc4_screen *screen;
 
         struct vc4_cl bcl;
-        struct vc4_cl rcl;
         struct vc4_cl shader_rec;
         struct vc4_cl uniforms;
         struct vc4_cl bo_handles;
         struct vc4_cl bo_pointers;
         uint32_t shader_rec_count;
+
+        /** @{ Surfaces to submit rendering for. */
+        struct pipe_surface *color_read;
+        struct pipe_surface *color_write;
+        struct pipe_surface *zs_read;
+        struct pipe_surface *zs_write;
+        /** @} */
         /** @{
          * Bounding box of the scissor across all queued drawing.
          *
@@ -194,9 +219,13 @@
         uint32_t draw_max_x;
         uint32_t draw_max_y;
         /** @} */
-
-        struct vc4_bo *tile_alloc;
-        struct vc4_bo *tile_state;
+        /** @{
+         * Width/height of the color framebuffer being rendered to,
+         * for VC4_TILE_RENDERING_MODE_CONFIG.
+        */
+        uint32_t draw_width;
+        uint32_t draw_height;
+        /** @} */
 
         struct util_slab_mempool transfer_pool;
         struct blitter_context *blitter;
@@ -228,6 +257,11 @@
          */
         bool draw_call_queued;
 
+        /** Maximum index buffer valid for the current shader_rec. */
+        uint32_t max_index;
+        /** Last index bias baked into the current shader_rec. */
+        uint32_t last_index_bias;
+
         struct primconvert_context *primconvert;
 
         struct hash_table *fs_cache, *vs_cache;
@@ -236,6 +270,8 @@
 
         struct ra_regs *regs;
         unsigned int reg_class_any;
+        unsigned int reg_class_a_or_b_or_acc;
+        unsigned int reg_class_r4_or_a;
         unsigned int reg_class_a;
 
         uint8_t prim_mode;
@@ -243,6 +279,8 @@
         /** Seqno of the last CL flush's job. */
         uint64_t last_emit_seqno;
 
+        struct u_upload_mgr *uploader;
+
         /** @{ Current pipeline state objects */
         struct pipe_scissor_state scissor;
         struct pipe_blend_state *blend;
@@ -314,6 +352,18 @@
         return (struct vc4_context *)pcontext;
 }
 
+static inline struct vc4_sampler_view *
+vc4_sampler_view(struct pipe_sampler_view *psview)
+{
+        return (struct vc4_sampler_view *)psview;
+}
+
+static inline struct vc4_sampler_state *
+vc4_sampler_state(struct pipe_sampler_state *psampler)
+{
+        return (struct vc4_sampler_state *)psampler;
+}
+
 struct pipe_context *vc4_context_create(struct pipe_screen *pscreen,
                                         void *priv);
 void vc4_draw_init(struct pipe_context *pctx);
@@ -325,6 +375,7 @@
 int vc4_simulator_flush(struct vc4_context *vc4,
                         struct drm_vc4_submit_cl *args);
 
+void vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader);
 void vc4_write_uniforms(struct vc4_context *vc4,
                         struct vc4_compiled_shader *shader,
                         struct vc4_constbuf_stateobj *cb,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_draw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_draw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_draw.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_draw.c	2015-09-16 14:36:09.000000000 +0000
@@ -71,64 +71,40 @@
         uint32_t height = vc4->framebuffer.height;
         uint32_t tilew = align(width, 64) / 64;
         uint32_t tileh = align(height, 64) / 64;
-
-        /* Tile alloc memory setup: We use an initial alloc size of 32b.  The
-         * hardware then aligns that to 256b (we use 4096, because all of our
-         * BO allocations align to that anyway), then for some reason the
-         * simulator wants an extra page available, even if you have overflow
-         * memory set up.
-         *
-         * XXX: The binner only does 28-bit addressing math, so the tile alloc
-         * and tile state should be in the same BO and that BO needs to not
-         * cross a 256MB boundary, somehow.
-         */
-        uint32_t tile_alloc_size = 32 * tilew * tileh;
-        tile_alloc_size = align(tile_alloc_size, 4096);
-        tile_alloc_size += 4096;
-        uint32_t tile_state_size = 48 * tilew * tileh;
-        if (!vc4->tile_alloc || vc4->tile_alloc->size < tile_alloc_size) {
-                vc4_bo_unreference(&vc4->tile_alloc);
-                vc4->tile_alloc = vc4_bo_alloc(vc4->screen, tile_alloc_size,
-                                               "tile_alloc");
-        }
-        if (!vc4->tile_state || vc4->tile_state->size < tile_state_size) {
-                vc4_bo_unreference(&vc4->tile_state);
-                vc4->tile_state = vc4_bo_alloc(vc4->screen, tile_state_size,
-                                               "tile_state");
-        }
+        struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
 
         //   Tile state data is 48 bytes per tile, I think it can be thrown away
         //   as soon as binning is finished.
-        cl_start_reloc(&vc4->bcl, 2);
-        cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
-        cl_reloc(vc4, &vc4->bcl, vc4->tile_alloc, 0);
-        cl_u32(&vc4->bcl, vc4->tile_alloc->size);
-        cl_reloc(vc4, &vc4->bcl, vc4->tile_state, 0);
-        cl_u8(&vc4->bcl, tilew);
-        cl_u8(&vc4->bcl, tileh);
-        cl_u8(&vc4->bcl,
-              VC4_BIN_CONFIG_AUTO_INIT_TSDA |
-              VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 |
-              VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32);
+        cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
+        cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
+        cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
+        cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
+        cl_u8(&bcl, tilew);
+        cl_u8(&bcl, tileh);
+        cl_u8(&bcl, 0); /* flags, filled by kernel. */
 
         /* START_TILE_BINNING resets the statechange counters in the hardware,
          * which are what is used when a primitive is binned to a tile to
          * figure out what new state packets need to be written to that tile's
          * command list.
          */
-        cl_u8(&vc4->bcl, VC4_PACKET_START_TILE_BINNING);
+        cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING);
 
         /* Reset the current compressed primitives format.  This gets modified
          * by VC4_PACKET_GL_INDEXED_PRIMITIVE and
          * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
          * of every tile.
          */
-        cl_u8(&vc4->bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
-        cl_u8(&vc4->bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
-                          VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
+        cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
+        cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
+                     VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
 
         vc4->needs_flush = true;
         vc4->draw_call_queued = true;
+        vc4->draw_width = width;
+        vc4->draw_height = height;
+
+        cl_end(&vc4->bcl, bcl);
 }
 
 static void
@@ -146,96 +122,67 @@
 }
 
 static void
-vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
+vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *info)
 {
-        struct vc4_context *vc4 = vc4_context(pctx);
-
-        if (info->mode >= PIPE_PRIM_QUADS) {
-                util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
-                util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
-                util_primconvert_draw_vbo(vc4->primconvert, info);
-                perf_debug("Fallback conversion for %d %s vertices\n",
-                           info->count, u_prim_name(info->mode));
-                return;
-        }
-
-        /* Before setting up the draw, do any fixup blits necessary. */
-        vc4_update_shadow_textures(pctx, &vc4->verttex);
-        vc4_update_shadow_textures(pctx, &vc4->fragtex);
-
-        vc4_get_draw_cl_space(vc4);
-
+        /* VC4_DIRTY_VTXSTATE */
         struct vc4_vertex_stateobj *vtx = vc4->vtx;
+        /* VC4_DIRTY_VTXBUF */
         struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf;
 
-        if (vc4->prim_mode != info->mode) {
-                vc4->prim_mode = info->mode;
-                vc4->dirty |= VC4_DIRTY_PRIM_MODE;
-        }
-
-        vc4_start_draw(vc4);
-        vc4_update_compiled_shaders(vc4, info->mode);
-
-        vc4_emit_state(pctx);
-        vc4->dirty = 0;
-
-        vc4_write_uniforms(vc4, vc4->prog.fs,
-                           &vc4->constbuf[PIPE_SHADER_FRAGMENT],
-                           &vc4->fragtex);
-        vc4_write_uniforms(vc4, vc4->prog.vs,
-                           &vc4->constbuf[PIPE_SHADER_VERTEX],
-                           &vc4->verttex);
-        vc4_write_uniforms(vc4, vc4->prog.cs,
-                           &vc4->constbuf[PIPE_SHADER_VERTEX],
-                           &vc4->verttex);
-
         /* The simulator throws a fit if VS or CS don't read an attribute, so
          * we emit a dummy read.
          */
         uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
         /* Emit the shader record. */
-        cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
-        cl_u16(&vc4->shader_rec,
+        struct vc4_cl_out *shader_rec =
+                cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
+        /* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */
+        cl_u16(&shader_rec,
                VC4_SHADER_FLAG_ENABLE_CLIPPING |
+               VC4_SHADER_FLAG_FS_SINGLE_THREAD |
                ((info->mode == PIPE_PRIM_POINTS &&
                  vc4->rasterizer->base.point_size_per_vertex) ?
                 VC4_SHADER_FLAG_VS_POINT_SIZE : 0));
-        cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */
-        cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs);
-        cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0);
-        cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
-
-        cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
-        cl_u8(&vc4->shader_rec, vc4->prog.vs->vattrs_live);
-        cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[8]);
-        cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
-        cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
-
-        cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
-        cl_u8(&vc4->shader_rec, vc4->prog.cs->vattrs_live);
-        cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[8]);
-        cl_reloc(vc4, &vc4->shader_rec, vc4->prog.cs->bo, 0);
-        cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
+
+        /* VC4_DIRTY_COMPILED_FS */
+        cl_u8(&shader_rec, 0); /* fs num uniforms (unused) */
+        cl_u8(&shader_rec, vc4->prog.fs->num_inputs);
+        cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.fs->bo, 0);
+        cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
+
+        /* VC4_DIRTY_COMPILED_VS */
+        cl_u16(&shader_rec, 0); /* vs num uniforms */
+        cl_u8(&shader_rec, vc4->prog.vs->vattrs_live);
+        cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]);
+        cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.vs->bo, 0);
+        cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
+
+        /* VC4_DIRTY_COMPILED_CS */
+        cl_u16(&shader_rec, 0); /* cs num uniforms */
+        cl_u8(&shader_rec, vc4->prog.cs->vattrs_live);
+        cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]);
+        cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.cs->bo, 0);
+        cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
 
         uint32_t max_index = 0xffff;
-        uint32_t vpm_offset = 0;
         for (int i = 0; i < vtx->num_elements; i++) {
                 struct pipe_vertex_element *elem = &vtx->pipe[i];
                 struct pipe_vertex_buffer *vb =
                         &vertexbuf->vb[elem->vertex_buffer_index];
                 struct vc4_resource *rsc = vc4_resource(vb->buffer);
-                uint32_t offset = vb->buffer_offset + elem->src_offset;
+                /* not vc4->dirty tracked: vc4->last_index_bias */
+                uint32_t offset = (vb->buffer_offset +
+                                   elem->src_offset +
+                                   vb->stride * info->index_bias);
                 uint32_t vb_size = rsc->bo->size - offset;
                 uint32_t elem_size =
                         util_format_get_blocksize(elem->src_format);
 
-                cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset);
-                cl_u8(&vc4->shader_rec, elem_size - 1);
-                cl_u8(&vc4->shader_rec, vb->stride);
-                cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[i]);
-                cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[i]);
-
-                vpm_offset += align(elem_size, 4);
+                cl_reloc(vc4, &vc4->shader_rec, &shader_rec, rsc->bo, offset);
+                cl_u8(&shader_rec, elem_size - 1);
+                cl_u8(&shader_rec, vb->stride);
+                cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[i]);
+                cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[i]);
 
                 if (vb->stride > 0) {
                         max_index = MIN2(max_index,
@@ -246,50 +193,122 @@
         if (vtx->num_elements == 0) {
                 assert(num_elements_emit == 1);
                 struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
-                cl_reloc(vc4, &vc4->shader_rec, bo, 0);
-                cl_u8(&vc4->shader_rec, 16 - 1); /* element size */
-                cl_u8(&vc4->shader_rec, 0); /* stride */
-                cl_u8(&vc4->shader_rec, 0); /* VS VPM offset */
-                cl_u8(&vc4->shader_rec, 0); /* CS VPM offset */
+                cl_reloc(vc4, &vc4->shader_rec, &shader_rec, bo, 0);
+                cl_u8(&shader_rec, 16 - 1); /* element size */
+                cl_u8(&shader_rec, 0); /* stride */
+                cl_u8(&shader_rec, 0); /* VS VPM offset */
+                cl_u8(&shader_rec, 0); /* CS VPM offset */
                 vc4_bo_unreference(&bo);
         }
+        cl_end(&vc4->shader_rec, shader_rec);
 
+        struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
         /* the actual draw call. */
-        cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE);
+        cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
         assert(vtx->num_elements <= 8);
         /* Note that number of attributes == 0 in the packet means 8
          * attributes.  This field also contains the offset into shader_rec.
          */
-        cl_u32(&vc4->bcl, num_elements_emit & 0x7);
+        cl_u32(&bcl, num_elements_emit & 0x7);
+        cl_end(&vc4->bcl, bcl);
+
+        vc4_write_uniforms(vc4, vc4->prog.fs,
+                           &vc4->constbuf[PIPE_SHADER_FRAGMENT],
+                           &vc4->fragtex);
+        vc4_write_uniforms(vc4, vc4->prog.vs,
+                           &vc4->constbuf[PIPE_SHADER_VERTEX],
+                           &vc4->verttex);
+        vc4_write_uniforms(vc4, vc4->prog.cs,
+                           &vc4->constbuf[PIPE_SHADER_VERTEX],
+                           &vc4->verttex);
+
+        vc4->last_index_bias = info->index_bias;
+        vc4->max_index = max_index;
+}
+
+static void
+vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
+{
+        struct vc4_context *vc4 = vc4_context(pctx);
+
+        if (info->mode >= PIPE_PRIM_QUADS) {
+                util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
+                util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
+                util_primconvert_draw_vbo(vc4->primconvert, info);
+                perf_debug("Fallback conversion for %d %s vertices\n",
+                           info->count, u_prim_name(info->mode));
+                return;
+        }
+
+        /* Before setting up the draw, do any fixup blits necessary. */
+        vc4_update_shadow_textures(pctx, &vc4->verttex);
+        vc4_update_shadow_textures(pctx, &vc4->fragtex);
+
+        vc4_get_draw_cl_space(vc4);
+
+        if (vc4->prim_mode != info->mode) {
+                vc4->prim_mode = info->mode;
+                vc4->dirty |= VC4_DIRTY_PRIM_MODE;
+        }
+
+        vc4_start_draw(vc4);
+        vc4_update_compiled_shaders(vc4, info->mode);
+
+        vc4_emit_state(pctx);
+
+        if ((vc4->dirty & (VC4_DIRTY_VTXBUF |
+                           VC4_DIRTY_VTXSTATE |
+                           VC4_DIRTY_PRIM_MODE |
+                           VC4_DIRTY_RASTERIZER |
+                           VC4_DIRTY_COMPILED_CS |
+                           VC4_DIRTY_COMPILED_VS |
+                           VC4_DIRTY_COMPILED_FS |
+                           vc4->prog.cs->uniform_dirty_bits |
+                           vc4->prog.vs->uniform_dirty_bits |
+                           vc4->prog.fs->uniform_dirty_bits)) ||
+            vc4->last_index_bias != info->index_bias) {
+                vc4_emit_gl_shader_state(vc4, info);
+        }
+
+        vc4->dirty = 0;
 
         /* Note that the primitive type fields match with OpenGL/gallium
          * definitions, up to but not including QUADS.
          */
+        struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
         if (info->indexed) {
-                struct vc4_resource *rsc = vc4_resource(vc4->indexbuf.buffer);
                 uint32_t offset = vc4->indexbuf.offset;
                 uint32_t index_size = vc4->indexbuf.index_size;
-                if (rsc->shadow_parent) {
-                        vc4_update_shadow_index_buffer(pctx, &vc4->indexbuf);
-                        offset = 0;
+                struct pipe_resource *prsc;
+                if (vc4->indexbuf.index_size == 4) {
+                        prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf,
+                                                           info->count, &offset);
+                        index_size = 2;
+                } else {
+                        prsc = vc4->indexbuf.buffer;
                 }
+                struct vc4_resource *rsc = vc4_resource(prsc);
 
-                cl_start_reloc(&vc4->bcl, 1);
-                cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
-                cl_u8(&vc4->bcl,
+                cl_start_reloc(&vc4->bcl, &bcl, 1);
+                cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
+                cl_u8(&bcl,
                       info->mode |
                       (index_size == 2 ?
                        VC4_INDEX_BUFFER_U16:
                        VC4_INDEX_BUFFER_U8));
-                cl_u32(&vc4->bcl, info->count);
-                cl_reloc(vc4, &vc4->bcl, rsc->bo, offset);
-                cl_u32(&vc4->bcl, max_index);
+                cl_u32(&bcl, info->count);
+                cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset);
+                cl_u32(&bcl, vc4->max_index);
+
+                if (vc4->indexbuf.index_size == 4)
+                        pipe_resource_reference(&prsc, NULL);
         } else {
-                cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
-                cl_u8(&vc4->bcl, info->mode);
-                cl_u32(&vc4->bcl, info->count);
-                cl_u32(&vc4->bcl, info->start);
+                cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
+                cl_u8(&bcl, info->mode);
+                cl_u32(&bcl, info->count);
+                cl_u32(&bcl, info->start);
         }
+        cl_end(&vc4->bcl, bcl);
 
         if (vc4->zsa && vc4->zsa->base.depth.enabled) {
                 vc4->resolve |= PIPE_CLEAR_DEPTH;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_drm.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_drm.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_drm.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_drm.h	2015-09-16 14:36:09.000000000 +0000
@@ -31,13 +31,24 @@
 #define DRM_VC4_WAIT_BO                           0x02
 #define DRM_VC4_CREATE_BO                         0x03
 #define DRM_VC4_MMAP_BO                           0x04
+#define DRM_VC4_CREATE_SHADER_BO                  0x05
 
 #define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
 #define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
 #define DRM_IOCTL_VC4_WAIT_BO             DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
 #define DRM_IOCTL_VC4_CREATE_BO           DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
 #define DRM_IOCTL_VC4_MMAP_BO             DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
+#define DRM_IOCTL_VC4_CREATE_SHADER_BO    DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
 
+struct drm_vc4_submit_rcl_surface {
+	uint32_t hindex; /* Handle index, or ~0 if not present. */
+	uint32_t offset; /* Offset to start of buffer. */
+	/*
+         * Bits for either render config (color_ms_write) or load/store packet.
+	 */
+	uint16_t bits;
+	uint16_t pad;
+};
 
 /**
  * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
@@ -62,16 +73,6 @@
 	 */
 	uint64_t bin_cl;
 
-	/* Pointer to the render command list.
-	 *
-	 * The render command list contains a set of packets to load the
-	 * current tile's state (reading from memory, or just clearing it)
-	 * into the GPU, then call into the tile allocation BO to run the
-	 * stored rendering for that tile, then store the tile's state back to
-	 * memory.
-	 */
-	uint64_t render_cl;
-
 	/* Pointer to the shader records.
 	 *
 	 * Shader records are the structures read by the hardware that contain
@@ -102,8 +103,6 @@
 
 	/* Size in bytes of the binner command list. */
 	uint32_t bin_cl_size;
-	/* Size in bytes of the render command list */
-	uint32_t render_cl_size;
 	/* Size in bytes of the set of shader records. */
 	uint32_t shader_rec_size;
 	/* Number of shader records.
@@ -119,8 +118,25 @@
 	/* Number of BO handles passed in (size is that times 4). */
 	uint32_t bo_handle_count;
 
+	/* RCL setup: */
+	uint16_t width;
+	uint16_t height;
+	uint8_t min_x_tile;
+	uint8_t min_y_tile;
+	uint8_t max_x_tile;
+	uint8_t max_y_tile;
+	struct drm_vc4_submit_rcl_surface color_read;
+	struct drm_vc4_submit_rcl_surface color_ms_write;
+	struct drm_vc4_submit_rcl_surface zs_read;
+	struct drm_vc4_submit_rcl_surface zs_write;
+	uint32_t clear_color[2];
+	uint32_t clear_z;
+	uint8_t clear_s;
+
+	uint32_t pad:24;
+
+#define VC4_SUBMIT_CL_USE_CLEAR_COLOR			(1 << 0)
 	uint32_t flags;
-	uint32_t pad;
 
 	/* Returned value of the seqno of this render job (for the
 	 * wait ioctl).
@@ -168,6 +184,29 @@
 	uint32_t pad;
 };
 
+/**
+ * struct drm_vc4_create_shader_bo - ioctl argument for creating VC4
+ * shader BOs.
+ *
+ * Since allowing a shader to be overwritten while it's also being
+ * executed from would allow privlege escalation, shaders must be
+ * created using this ioctl, and they can't be mmapped later.
+ */
+struct drm_vc4_create_shader_bo {
+	/* Size of the data argument. */
+	uint32_t size;
+	/* Flags, currently must be 0. */
+	uint32_t flags;
+
+	/* Pointer to the data. */
+	uint64_t data;
+
+	/** Returned GEM handle for the BO. */
+	uint32_t handle;
+	/* Pad, must be 0. */
+	uint32_t pad;
+};
+
 /**
  * struct drm_vc4_mmap_bo - ioctl argument for mapping VC4 BOs.
  *
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_emit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_emit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_emit.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_emit.c	2015-09-16 14:36:09.000000000 +0000
@@ -28,23 +28,24 @@
 {
         struct vc4_context *vc4 = vc4_context(pctx);
 
+        struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
         if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT)) {
                 float *vpscale = vc4->viewport.scale;
                 float *vptranslate = vc4->viewport.translate;
-                float vp_minx = -fabs(vpscale[0]) + vptranslate[0];
-                float vp_maxx = fabs(vpscale[0]) + vptranslate[0];
-                float vp_miny = -fabs(vpscale[1]) + vptranslate[1];
-                float vp_maxy = fabs(vpscale[1]) + vptranslate[1];
+                float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
+                float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
+                float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
+                float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
                 uint32_t minx = MAX2(vc4->scissor.minx, vp_minx);
                 uint32_t miny = MAX2(vc4->scissor.miny, vp_miny);
                 uint32_t maxx = MIN2(vc4->scissor.maxx, vp_maxx);
                 uint32_t maxy = MIN2(vc4->scissor.maxy, vp_maxy);
 
-                cl_u8(&vc4->bcl, VC4_PACKET_CLIP_WINDOW);
-                cl_u16(&vc4->bcl, minx);
-                cl_u16(&vc4->bcl, miny);
-                cl_u16(&vc4->bcl, maxx - minx);
-                cl_u16(&vc4->bcl, maxy - miny);
+                cl_u8(&bcl, VC4_PACKET_CLIP_WINDOW);
+                cl_u16(&bcl, minx);
+                cl_u16(&bcl, miny);
+                cl_u16(&bcl, maxx - minx);
+                cl_u16(&bcl, maxy - miny);
 
                 vc4->draw_min_x = MIN2(vc4->draw_min_x, minx);
                 vc4->draw_min_y = MIN2(vc4->draw_min_y, miny);
@@ -53,47 +54,49 @@
         }
 
         if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) {
-                cl_u8(&vc4->bcl, VC4_PACKET_CONFIGURATION_BITS);
-                cl_u8(&vc4->bcl,
+                cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS);
+                cl_u8(&bcl,
                       vc4->rasterizer->config_bits[0] |
                       vc4->zsa->config_bits[0]);
-                cl_u8(&vc4->bcl,
+                cl_u8(&bcl,
                       vc4->rasterizer->config_bits[1] |
                       vc4->zsa->config_bits[1]);
-                cl_u8(&vc4->bcl,
+                cl_u8(&bcl,
                       vc4->rasterizer->config_bits[2] |
                       vc4->zsa->config_bits[2]);
         }
 
         if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
-                cl_u8(&vc4->bcl, VC4_PACKET_DEPTH_OFFSET);
-                cl_u16(&vc4->bcl, vc4->rasterizer->offset_factor);
-                cl_u16(&vc4->bcl, vc4->rasterizer->offset_units);
+                cl_u8(&bcl, VC4_PACKET_DEPTH_OFFSET);
+                cl_u16(&bcl, vc4->rasterizer->offset_factor);
+                cl_u16(&bcl, vc4->rasterizer->offset_units);
 
-                cl_u8(&vc4->bcl, VC4_PACKET_POINT_SIZE);
-                cl_f(&vc4->bcl, vc4->rasterizer->point_size);
+                cl_u8(&bcl, VC4_PACKET_POINT_SIZE);
+                cl_f(&bcl, vc4->rasterizer->point_size);
 
-                cl_u8(&vc4->bcl, VC4_PACKET_LINE_WIDTH);
-                cl_f(&vc4->bcl, vc4->rasterizer->base.line_width);
+                cl_u8(&bcl, VC4_PACKET_LINE_WIDTH);
+                cl_f(&bcl, vc4->rasterizer->base.line_width);
         }
 
         if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
-                cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_XY_SCALING);
-                cl_f(&vc4->bcl, vc4->viewport.scale[0] * 16.0f);
-                cl_f(&vc4->bcl, vc4->viewport.scale[1] * 16.0f);
-
-                cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_Z_SCALING);
-                cl_f(&vc4->bcl, vc4->viewport.translate[2]);
-                cl_f(&vc4->bcl, vc4->viewport.scale[2]);
-
-                cl_u8(&vc4->bcl, VC4_PACKET_VIEWPORT_OFFSET);
-                cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[0]);
-                cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[1]);
+                cl_u8(&bcl, VC4_PACKET_CLIPPER_XY_SCALING);
+                cl_f(&bcl, vc4->viewport.scale[0] * 16.0f);
+                cl_f(&bcl, vc4->viewport.scale[1] * 16.0f);
+
+                cl_u8(&bcl, VC4_PACKET_CLIPPER_Z_SCALING);
+                cl_f(&bcl, vc4->viewport.translate[2]);
+                cl_f(&bcl, vc4->viewport.scale[2]);
+
+                cl_u8(&bcl, VC4_PACKET_VIEWPORT_OFFSET);
+                cl_u16(&bcl, 16 * vc4->viewport.translate[0]);
+                cl_u16(&bcl, 16 * vc4->viewport.translate[1]);
         }
 
         if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
-                cl_u8(&vc4->bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
-                cl_u32(&vc4->bcl, vc4->rasterizer->base.flatshade ?
+                cl_u8(&bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
+                cl_u32(&bcl, vc4->rasterizer->base.flatshade ?
                        vc4->prog.fs->color_inputs : 0);
         }
+
+        cl_end(&vc4->bcl, bcl);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_fence.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_fence.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_fence.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_fence.c	2015-09-16 14:36:09.000000000 +0000
@@ -60,16 +60,6 @@
 }
 
 static boolean
-vc4_fence_signalled(struct pipe_screen *pscreen,
-                    struct pipe_fence_handle *pf)
-{
-        struct vc4_screen *screen = vc4_screen(pscreen);
-        struct vc4_fence *f = (struct vc4_fence *)pf;
-
-        return vc4_wait_seqno(screen, f->seqno, 0);
-}
-
-static boolean
 vc4_fence_finish(struct pipe_screen *pscreen,
                  struct pipe_fence_handle *pf,
                  uint64_t timeout_ns)
@@ -77,7 +67,7 @@
         struct vc4_screen *screen = vc4_screen(pscreen);
         struct vc4_fence *f = (struct vc4_fence *)pf;
 
-        return vc4_wait_seqno(screen, f->seqno, timeout_ns);
+        return vc4_wait_seqno(screen, f->seqno, timeout_ns, "fence wait");
 }
 
 struct vc4_fence *
@@ -98,6 +88,5 @@
 vc4_fence_init(struct vc4_screen *screen)
 {
         screen->base.fence_reference = vc4_fence_reference;
-        screen->base.fence_signalled = vc4_fence_signalled;
         screen->base.fence_finish = vc4_fence_finish;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_formats.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_formats.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_formats.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_formats.c	2015-09-16 14:36:09.000000000 +0000
@@ -108,7 +108,7 @@
 static const struct vc4_format *
 get_format(enum pipe_format f)
 {
-        if (f > ARRAY_SIZE(vc4_format_table) ||
+        if (f >= ARRAY_SIZE(vc4_format_table) ||
             !vc4_format_table[f].present)
                 return NULL;
         else
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_job.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_job.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_job.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_job.c	2015-09-16 14:36:09.000000000 +0000
@@ -33,7 +33,6 @@
 vc4_job_init(struct vc4_context *vc4)
 {
         vc4_init_cl(vc4, &vc4->bcl);
-        vc4_init_cl(vc4, &vc4->rcl);
         vc4_init_cl(vc4, &vc4->shader_rec);
         vc4_init_cl(vc4, &vc4->uniforms);
         vc4_init_cl(vc4, &vc4->bo_handles);
@@ -45,12 +44,10 @@
 vc4_job_reset(struct vc4_context *vc4)
 {
         struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
-        for (int i = 0; i < (vc4->bo_handles.next -
-                             vc4->bo_handles.base) / 4; i++) {
+        for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
                 vc4_bo_unreference(&referenced_bos[i]);
         }
         vc4_reset_cl(&vc4->bcl);
-        vc4_reset_cl(&vc4->rcl);
         vc4_reset_cl(&vc4->shader_rec);
         vc4_reset_cl(&vc4->uniforms);
         vc4_reset_cl(&vc4->bo_handles);
@@ -75,6 +72,70 @@
         vc4->draw_max_y = 0;
 }
 
+static void
+vc4_submit_setup_rcl_surface(struct vc4_context *vc4,
+                             struct drm_vc4_submit_rcl_surface *submit_surf,
+                             struct pipe_surface *psurf,
+                             bool is_depth, bool is_write)
+{
+        struct vc4_surface *surf = vc4_surface(psurf);
+
+        if (!surf) {
+                submit_surf->hindex = ~0;
+                return;
+        }
+
+        struct vc4_resource *rsc = vc4_resource(psurf->texture);
+        submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
+        submit_surf->offset = surf->offset;
+
+        if (is_depth) {
+                submit_surf->bits =
+                        VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
+                                      VC4_LOADSTORE_TILE_BUFFER_BUFFER);
+
+        } else {
+                submit_surf->bits =
+                        VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
+                                      VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
+                        VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
+                                      VC4_LOADSTORE_TILE_BUFFER_BGR565 :
+                                      VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
+                                      VC4_LOADSTORE_TILE_BUFFER_FORMAT);
+        }
+        submit_surf->bits |=
+                VC4_SET_FIELD(surf->tiling, VC4_LOADSTORE_TILE_BUFFER_TILING);
+
+        if (is_write)
+                rsc->writes++;
+}
+
+static void
+vc4_submit_setup_ms_rcl_surface(struct vc4_context *vc4,
+                                struct drm_vc4_submit_rcl_surface *submit_surf,
+                                struct pipe_surface *psurf)
+{
+        struct vc4_surface *surf = vc4_surface(psurf);
+
+        if (!surf) {
+                submit_surf->hindex = ~0;
+                return;
+        }
+
+        struct vc4_resource *rsc = vc4_resource(psurf->texture);
+        submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
+        submit_surf->offset = surf->offset;
+
+        submit_surf->bits =
+                VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
+                              VC4_RENDER_CONFIG_FORMAT_BGR565 :
+                              VC4_RENDER_CONFIG_FORMAT_RGBA8888,
+                              VC4_RENDER_CONFIG_FORMAT) |
+                VC4_SET_FIELD(surf->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT);
+
+        rsc->writes++;
+}
+
 /**
  * Submits the job to the kernel and then reinitializes it.
  */
@@ -83,26 +144,48 @@
 {
         if (vc4_debug & VC4_DEBUG_CL) {
                 fprintf(stderr, "BCL:\n");
-                vc4_dump_cl(vc4->bcl.base, vc4->bcl.next - vc4->bcl.base, false);
-                fprintf(stderr, "RCL:\n");
-                vc4_dump_cl(vc4->rcl.base, vc4->rcl.next - vc4->rcl.base, true);
+                vc4_dump_cl(vc4->bcl.base, cl_offset(&vc4->bcl), false);
         }
 
         struct drm_vc4_submit_cl submit;
         memset(&submit, 0, sizeof(submit));
 
+        cl_ensure_space(&vc4->bo_handles, 4 * sizeof(uint32_t));
+        cl_ensure_space(&vc4->bo_pointers, 4 * sizeof(struct vc4_bo *));
+
+        vc4_submit_setup_rcl_surface(vc4, &submit.color_read,
+                                     vc4->color_read, false, false);
+        vc4_submit_setup_ms_rcl_surface(vc4, &submit.color_ms_write,
+                                        vc4->color_write);
+        vc4_submit_setup_rcl_surface(vc4, &submit.zs_read,
+                                     vc4->zs_read, true, false);
+        vc4_submit_setup_rcl_surface(vc4, &submit.zs_write,
+                                     vc4->zs_write, true, true);
+
         submit.bo_handles = (uintptr_t)vc4->bo_handles.base;
-        submit.bo_handle_count = (vc4->bo_handles.next -
-                                  vc4->bo_handles.base) / 4;
+        submit.bo_handle_count = cl_offset(&vc4->bo_handles) / 4;
         submit.bin_cl = (uintptr_t)vc4->bcl.base;
-        submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base;
-        submit.render_cl = (uintptr_t)vc4->rcl.base;
-        submit.render_cl_size = vc4->rcl.next - vc4->rcl.base;
+        submit.bin_cl_size = cl_offset(&vc4->bcl);
         submit.shader_rec = (uintptr_t)vc4->shader_rec.base;
-        submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base;
+        submit.shader_rec_size = cl_offset(&vc4->shader_rec);
         submit.shader_rec_count = vc4->shader_rec_count;
         submit.uniforms = (uintptr_t)vc4->uniforms.base;
-        submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base;
+        submit.uniforms_size = cl_offset(&vc4->uniforms);
+
+        assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
+        submit.min_x_tile = vc4->draw_min_x / 64;
+        submit.min_y_tile = vc4->draw_min_y / 64;
+        submit.max_x_tile = (vc4->draw_max_x - 1) / 64;
+        submit.max_y_tile = (vc4->draw_max_y - 1) / 64;
+        submit.width = vc4->draw_width;
+        submit.height = vc4->draw_height;
+        if (vc4->cleared) {
+                submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
+                submit.clear_color[0] = vc4->clear_color[0];
+                submit.clear_color[1] = vc4->clear_color[1];
+                submit.clear_z = vc4->clear_depth;
+                submit.clear_s = vc4->clear_stencil;
+        }
 
         if (!(vc4_debug & VC4_DEBUG_NORAST)) {
                 int ret;
@@ -122,7 +205,7 @@
 
         if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
                 if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
-                                    PIPE_TIMEOUT_INFINITE)) {
+                                    PIPE_TIMEOUT_INFINITE, "sync")) {
                         fprintf(stderr, "Wait failed.\n");
                         abort();
                 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_nir_lower_blend.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_nir_lower_blend.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_nir_lower_blend.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,431 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Implements most of the fixed function fragment pipeline in shader code.
+ *
+ * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
+ * or color mask.  Instead, you read the current contents of the destination
+ * from the tile buffer after having waited for the scoreboard (which is
+ * handled by vc4_qpu_emit.c), then do math using your output color and that
+ * destination value, and update the output color appropriately.
+ */
+
+/**
+ * Lowers fixed-function blending to a load of the destination color and a
+ * series of ALU operations before the store of the output.
+ */
+#include "util/u_format.h"
+#include "vc4_qir.h"
+#include "glsl/nir/nir_builder.h"
+#include "vc4_context.h"
+
+/** Emits a load of the previous fragment color from the tile buffer. */
+static nir_ssa_def *
+vc4_nir_get_dst_color(nir_builder *b)
+{
+        nir_intrinsic_instr *load =
+                nir_intrinsic_instr_create(b->shader,
+                                           nir_intrinsic_load_input);
+        load->num_components = 1;
+        load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT;
+        nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
+        nir_builder_instr_insert(b, &load->instr);
+        return &load->dest.ssa;
+}
+
+static  nir_ssa_def *
+vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
+{
+        nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
+        nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
+        nir_ssa_def *high = nir_fpow(b,
+                                     nir_fmul(b,
+                                              nir_fadd(b, srgb,
+                                                       nir_imm_float(b, 0.055)),
+                                              nir_imm_float(b, 1.0 / 1.055)),
+                                     nir_imm_float(b, 2.4));
+
+        return nir_bcsel(b, is_low, low, high);
+}
+
+static  nir_ssa_def *
+vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
+{
+        nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
+        nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
+        nir_ssa_def *high = nir_fsub(b,
+                                     nir_fmul(b,
+                                              nir_imm_float(b, 1.055),
+                                              nir_fpow(b,
+                                                       linear,
+                                                       nir_imm_float(b, 0.41666))),
+                                     nir_imm_float(b, 0.055));
+
+        return nir_bcsel(b, is_low, low, high);
+}
+
+static nir_ssa_def *
+vc4_blend_channel(nir_builder *b,
+                  nir_ssa_def **src,
+                  nir_ssa_def **dst,
+                  unsigned factor,
+                  int channel)
+{
+        switch(factor) {
+        case PIPE_BLENDFACTOR_ONE:
+                return nir_imm_float(b, 1.0);
+        case PIPE_BLENDFACTOR_SRC_COLOR:
+                return src[channel];
+        case PIPE_BLENDFACTOR_SRC_ALPHA:
+                return src[3];
+        case PIPE_BLENDFACTOR_DST_ALPHA:
+                return dst[3];
+        case PIPE_BLENDFACTOR_DST_COLOR:
+                return dst[channel];
+        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+                if (channel != 3) {
+                        return nir_fmin(b,
+                                        src[3],
+                                        nir_fsub(b,
+                                                 nir_imm_float(b, 1.0),
+                                                 dst[3]));
+                } else {
+                        return nir_imm_float(b, 1.0);
+                }
+        case PIPE_BLENDFACTOR_CONST_COLOR:
+                return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel);
+        case PIPE_BLENDFACTOR_CONST_ALPHA:
+                return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W);
+        case PIPE_BLENDFACTOR_ZERO:
+                return nir_imm_float(b, 0.0);
+        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+                return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
+        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+                return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
+        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+                return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
+        case PIPE_BLENDFACTOR_INV_DST_COLOR:
+                return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
+        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+                return nir_fsub(b, nir_imm_float(b, 1.0),
+                                vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel));
+        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+                return nir_fsub(b, nir_imm_float(b, 1.0),
+                                vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W));
+
+        default:
+        case PIPE_BLENDFACTOR_SRC1_COLOR:
+        case PIPE_BLENDFACTOR_SRC1_ALPHA:
+        case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+                /* Unsupported. */
+                fprintf(stderr, "Unknown blend factor %d\n", factor);
+                return nir_imm_float(b, 1.0);
+        }
+}
+
+static nir_ssa_def *
+vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
+               unsigned func)
+{
+        switch (func) {
+        case PIPE_BLEND_ADD:
+                return nir_fadd(b, src, dst);
+        case PIPE_BLEND_SUBTRACT:
+                return nir_fsub(b, src, dst);
+        case PIPE_BLEND_REVERSE_SUBTRACT:
+                return nir_fsub(b, dst, src);
+        case PIPE_BLEND_MIN:
+                return nir_fmin(b, src, dst);
+        case PIPE_BLEND_MAX:
+                return nir_fmax(b, src, dst);
+
+        default:
+                /* Unsupported. */
+                fprintf(stderr, "Unknown blend func %d\n", func);
+                return src;
+
+        }
+}
+
+static void
+vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
+                nir_ssa_def **src_color, nir_ssa_def **dst_color)
+{
+        struct pipe_rt_blend_state *blend = &c->fs_key->blend;
+
+        if (!blend->blend_enable) {
+                for (int i = 0; i < 4; i++)
+                        result[i] = src_color[i];
+                return;
+        }
+
+        /* Clamp the src color to [0, 1].  Dest is already clamped. */
+        for (int i = 0; i < 4; i++)
+                src_color[i] = nir_fsat(b, src_color[i]);
+
+        nir_ssa_def *src_blend[4], *dst_blend[4];
+        for (int i = 0; i < 4; i++) {
+                int src_factor = ((i != 3) ? blend->rgb_src_factor :
+                                  blend->alpha_src_factor);
+                int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
+                                  blend->alpha_dst_factor);
+                src_blend[i] = nir_fmul(b, src_color[i],
+                                        vc4_blend_channel(b,
+                                                          src_color, dst_color,
+                                                          src_factor, i));
+                dst_blend[i] = nir_fmul(b, dst_color[i],
+                                        vc4_blend_channel(b,
+                                                          src_color, dst_color,
+                                                          dst_factor, i));
+        }
+
+        for (int i = 0; i < 4; i++) {
+                result[i] = vc4_blend_func(b, src_blend[i], dst_blend[i],
+                                           ((i != 3) ? blend->rgb_func :
+                                            blend->alpha_func));
+        }
+}
+
+static nir_ssa_def *
+vc4_logicop(nir_builder *b, int logicop_func,
+            nir_ssa_def *src, nir_ssa_def *dst)
+{
+        switch (logicop_func) {
+        case PIPE_LOGICOP_CLEAR:
+                return nir_imm_int(b, 0);
+        case PIPE_LOGICOP_NOR:
+                return nir_inot(b, nir_ior(b, src, dst));
+        case PIPE_LOGICOP_AND_INVERTED:
+                return nir_iand(b, nir_inot(b, src), dst);
+        case PIPE_LOGICOP_COPY_INVERTED:
+                return nir_inot(b, src);
+        case PIPE_LOGICOP_AND_REVERSE:
+                return nir_iand(b, src, nir_inot(b, dst));
+        case PIPE_LOGICOP_INVERT:
+                return nir_inot(b, dst);
+        case PIPE_LOGICOP_XOR:
+                return nir_ixor(b, src, dst);
+        case PIPE_LOGICOP_NAND:
+                return nir_inot(b, nir_iand(b, src, dst));
+        case PIPE_LOGICOP_AND:
+                return nir_iand(b, src, dst);
+        case PIPE_LOGICOP_EQUIV:
+                return nir_inot(b, nir_ixor(b, src, dst));
+        case PIPE_LOGICOP_NOOP:
+                return dst;
+        case PIPE_LOGICOP_OR_INVERTED:
+                return nir_ior(b, nir_inot(b, src), dst);
+        case PIPE_LOGICOP_OR_REVERSE:
+                return nir_ior(b, src, nir_inot(b, dst));
+        case PIPE_LOGICOP_OR:
+                return nir_ior(b, src, dst);
+        case PIPE_LOGICOP_SET:
+                return nir_imm_int(b, ~0);
+        default:
+                fprintf(stderr, "Unknown logic op %d\n", logicop_func);
+                /* FALLTHROUGH */
+        case PIPE_LOGICOP_COPY:
+                return src;
+        }
+}
+
+static nir_ssa_def *
+vc4_nir_pipe_compare_func(nir_builder *b, int func,
+                          nir_ssa_def *src0, nir_ssa_def *src1)
+{
+        switch (func) {
+        default:
+                fprintf(stderr, "Unknown compare func %d\n", func);
+                /* FALLTHROUGH */
+        case PIPE_FUNC_NEVER:
+                return nir_imm_int(b, 0);
+        case PIPE_FUNC_ALWAYS:
+                return nir_imm_int(b, ~0);
+        case PIPE_FUNC_EQUAL:
+                return nir_feq(b, src0, src1);
+        case PIPE_FUNC_NOTEQUAL:
+                return nir_fne(b, src0, src1);
+        case PIPE_FUNC_GREATER:
+                return nir_flt(b, src1, src0);
+        case PIPE_FUNC_GEQUAL:
+                return nir_fge(b, src0, src1);
+        case PIPE_FUNC_LESS:
+                return nir_flt(b, src0, src1);
+        case PIPE_FUNC_LEQUAL:
+                return nir_fge(b, src1, src0);
+        }
+}
+
+static void
+vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
+                                nir_ssa_def *alpha)
+{
+        if (!c->fs_key->alpha_test)
+                return;
+
+        nir_ssa_def *alpha_ref =
+                vc4_nir_get_state_uniform(b, QUNIFORM_ALPHA_REF);
+        nir_ssa_def *condition =
+                vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func,
+                                          alpha, alpha_ref);
+
+        nir_intrinsic_instr *discard =
+                nir_intrinsic_instr_create(b->shader,
+                                           nir_intrinsic_discard_if);
+        discard->num_components = 1;
+        discard->src[0] = nir_src_for_ssa(nir_inot(b, condition));
+        nir_builder_instr_insert(b, &discard->instr);
+}
+
+static void
+vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
+                          nir_intrinsic_instr *intr)
+{
+        enum pipe_format color_format = c->fs_key->color_format;
+        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+
+        /* Pull out the float src/dst color components. */
+        nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b);
+        nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
+        nir_ssa_def *src_color[4], *unpacked_dst_color[4];
+        for (unsigned i = 0; i < 4; i++) {
+                src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false);
+                unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
+        }
+
+        /* Unswizzle the destination color. */
+        nir_ssa_def *dst_color[4];
+        for (unsigned i = 0; i < 4; i++) {
+                dst_color[i] = vc4_nir_get_swizzled_channel(b,
+                                                            unpacked_dst_color,
+                                                            format_swiz[i]);
+        }
+
+        vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
+
+        /* Turn dst color to linear. */
+        if (util_format_is_srgb(color_format)) {
+                for (int i = 0; i < 3; i++)
+                        dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
+        }
+
+        nir_ssa_def *blend_color[4];
+        vc4_do_blending(c, b, blend_color, src_color, dst_color);
+
+        /* sRGB encode the output color */
+        if (util_format_is_srgb(color_format)) {
+                for (int i = 0; i < 3; i++)
+                        blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
+        }
+
+        nir_ssa_def *swizzled_outputs[4];
+        for (int i = 0; i < 4; i++) {
+                swizzled_outputs[i] =
+                        vc4_nir_get_swizzled_channel(b, blend_color,
+                                                     format_swiz[i]);
+        }
+
+        nir_ssa_def *packed_color =
+                nir_pack_unorm_4x8(b,
+                                   nir_vec4(b,
+                                            swizzled_outputs[0],
+                                            swizzled_outputs[1],
+                                            swizzled_outputs[2],
+                                            swizzled_outputs[3]));
+
+        packed_color = vc4_logicop(b, c->fs_key->logicop_func,
+                                   packed_color, packed_dst_color);
+
+        /* If the bit isn't set in the color mask, then just return the
+         * original dst color, instead.
+         */
+        uint32_t colormask = 0xffffffff;
+        for (int i = 0; i < 4; i++) {
+                if (format_swiz[i] < 4 &&
+                    !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
+                        colormask &= ~(0xff << (i * 8));
+                }
+        }
+        packed_color = nir_ior(b,
+                               nir_iand(b, packed_color,
+                                        nir_imm_int(b, colormask)),
+                               nir_iand(b, packed_dst_color,
+                                        nir_imm_int(b, ~colormask)));
+
+        /* Turn the old vec4 output into a store of the packed color. */
+        nir_instr_rewrite_src(&intr->instr, &intr->src[0],
+                              nir_src_for_ssa(packed_color));
+        intr->num_components = 1;
+}
+
+static bool
+vc4_nir_lower_blend_block(nir_block *block, void *state)
+{
+        struct vc4_compile *c = state;
+
+        nir_foreach_instr(block, instr) {
+                if (instr->type != nir_instr_type_intrinsic)
+                        continue;
+                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+                if (intr->intrinsic != nir_intrinsic_store_output)
+                        continue;
+
+                nir_variable *output_var = NULL;
+                foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
+                        if (var->data.driver_location == intr->const_index[0]) {
+                                output_var = var;
+                                break;
+                        }
+                }
+                assert(output_var);
+                unsigned semantic_name = output_var->data.location;
+
+                if (semantic_name != TGSI_SEMANTIC_COLOR)
+                        continue;
+
+                nir_function_impl *impl =
+                        nir_cf_node_get_function(&block->cf_node);
+                nir_builder b;
+                nir_builder_init(&b, impl);
+                nir_builder_insert_before_instr(&b, &intr->instr);
+                vc4_nir_lower_blend_instr(c, &b, intr);
+        }
+        return true;
+}
+
+void
+vc4_nir_lower_blend(struct vc4_compile *c)
+{
+        nir_foreach_overload(c->s, overload) {
+                if (overload->impl) {
+                        nir_foreach_block(overload->impl,
+                                          vc4_nir_lower_blend_block, c);
+
+                        nir_metadata_preserve(overload->impl,
+                                              nir_metadata_block_index |
+                                              nir_metadata_dominance);
+                }
+        }
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_nir_lower_io.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_nir_lower_io.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_nir_lower_io.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_nir_lower_io.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,291 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "vc4_qir.h"
+#include "tgsi/tgsi_info.h"
+#include "glsl/nir/nir_builder.h"
+
+/**
+ * Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into
+ * something amenable to the VC4 architecture.
+ *
+ * Currently, it split inputs, outputs, and uniforms into scalars, drops any
+ * non-position outputs in coordinate shaders, and fixes up the addressing on
+ * indirect uniform loads.
+ */
+
+static void
+replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr,
+                            nir_ssa_def **comps)
+{
+
+        /* Batch things back together into a vec4.  This will get split by the
+         * later ALU scalarization pass.
+         */
+        nir_ssa_def *vec = nir_vec4(b, comps[0], comps[1], comps[2], comps[3]);
+
+        /* Replace the old intrinsic with a reference to our reconstructed
+         * vec4.
+         */
+        nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec),
+                                 ralloc_parent(b->impl));
+        nir_instr_remove(&intr->instr);
+}
+
+static void
+vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
+                    nir_intrinsic_instr *intr)
+{
+        nir_builder_insert_before_instr(b, &intr->instr);
+
+        if (c->stage == QSTAGE_FRAG && intr->const_index[0] ==
+            VC4_NIR_TLB_COLOR_READ_INPUT) {
+                /* This doesn't need any lowering. */
+                return;
+        }
+
+        nir_variable *input_var = NULL;
+        foreach_list_typed(nir_variable, var, node, &c->s->inputs) {
+                if (var->data.driver_location == intr->const_index[0]) {
+                        input_var = var;
+                        break;
+                }
+        }
+        assert(input_var);
+        int semantic_name = input_var->data.location;
+        int semantic_index = input_var->data.index;
+
+        /* All TGSI-to-NIR inputs are vec4. */
+        assert(intr->num_components == 4);
+
+        /* Generate scalar loads equivalent to the original VEC4. */
+        nir_ssa_def *dests[4];
+        for (unsigned i = 0; i < intr->num_components; i++) {
+                nir_intrinsic_instr *intr_comp =
+                        nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input);
+                intr_comp->num_components = 1;
+                intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+                nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+                nir_builder_instr_insert(b, &intr_comp->instr);
+
+                dests[i] = &intr_comp->dest.ssa;
+        }
+
+        switch (c->stage) {
+        case QSTAGE_FRAG:
+                switch (semantic_name) {
+                case TGSI_SEMANTIC_FACE:
+                        dests[0] = nir_fsub(b,
+                                            nir_imm_float(b, 1.0),
+                                            nir_fmul(b,
+                                                     nir_i2f(b, dests[0]),
+                                                     nir_imm_float(b, 2.0)));
+                        dests[1] = nir_imm_float(b, 0.0);
+                        dests[2] = nir_imm_float(b, 0.0);
+                        dests[3] = nir_imm_float(b, 1.0);
+                        break;
+                case TGSI_SEMANTIC_GENERIC:
+                        if (c->fs_key->point_sprite_mask &
+                            (1 << semantic_index)) {
+                                if (!c->fs_key->is_points) {
+                                        dests[0] = nir_imm_float(b, 0.0);
+                                        dests[1] = nir_imm_float(b, 0.0);
+                                }
+                                if (c->fs_key->point_coord_upper_left) {
+                                        dests[1] = nir_fsub(b,
+                                                            nir_imm_float(b, 1.0),
+                                                            dests[1]);
+                                }
+                                dests[2] = nir_imm_float(b, 0.0);
+                                dests[3] = nir_imm_float(b, 1.0);
+                        }
+                        break;
+                }
+                break;
+        case QSTAGE_COORD:
+        case QSTAGE_VERT:
+                break;
+        }
+
+        replace_intrinsic_with_vec4(b, intr, dests);
+}
+
+static void
+vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
+                     nir_intrinsic_instr *intr)
+{
+        nir_variable *output_var = NULL;
+        foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
+                if (var->data.driver_location == intr->const_index[0]) {
+                        output_var = var;
+                        break;
+                }
+        }
+        assert(output_var);
+        unsigned semantic_name = output_var->data.location;
+
+        if (c->stage == QSTAGE_COORD &&
+            (semantic_name != TGSI_SEMANTIC_POSITION &&
+             semantic_name != TGSI_SEMANTIC_PSIZE)) {
+                nir_instr_remove(&intr->instr);
+                return;
+        }
+
+        /* Color output is lowered by vc4_nir_lower_blend(). */
+        if (c->stage == QSTAGE_FRAG && semantic_name == TGSI_SEMANTIC_COLOR) {
+                intr->const_index[0] *= 4;
+                return;
+        }
+
+        /* All TGSI-to-NIR outputs are VEC4. */
+        assert(intr->num_components == 4);
+
+        nir_builder_insert_before_instr(b, &intr->instr);
+
+        for (unsigned i = 0; i < intr->num_components; i++) {
+                nir_intrinsic_instr *intr_comp =
+                        nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
+                intr_comp->num_components = 1;
+                intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+
+                assert(intr->src[0].is_ssa);
+                intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b,
+                                                                intr->src[0].ssa,
+                                                                &i, 1, false));
+                nir_builder_instr_insert(b, &intr_comp->instr);
+        }
+
+        nir_instr_remove(&intr->instr);
+}
+
+static void
+vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
+                      nir_intrinsic_instr *intr)
+{
+        /* All TGSI-to-NIR uniform loads are vec4, but we may create dword
+         * loads in our lowering passes.
+         */
+        if (intr->num_components == 1)
+                return;
+        assert(intr->num_components == 4);
+
+        nir_builder_insert_before_instr(b, &intr->instr);
+
+        /* Generate scalar loads equivalent to the original VEC4. */
+        nir_ssa_def *dests[4];
+        for (unsigned i = 0; i < intr->num_components; i++) {
+                nir_intrinsic_instr *intr_comp =
+                        nir_intrinsic_instr_create(c->s, intr->intrinsic);
+                intr_comp->num_components = 1;
+                nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+
+                if (intr->intrinsic == nir_intrinsic_load_uniform_indirect) {
+                        /* Convert the variable TGSI register index to a byte
+                         * offset.
+                         */
+                        intr_comp->src[0] =
+                                nir_src_for_ssa(nir_ishl(b,
+                                                         intr->src[0].ssa,
+                                                         nir_imm_int(b, 4)));
+
+                        /* Convert the offset to be a byte index, too. */
+                        intr_comp->const_index[0] = (intr->const_index[0] * 16 +
+                                                     i * 4);
+                } else {
+                        /* We want a dword index for non-indirect uniform
+                         * loads.
+                         */
+                        intr_comp->const_index[0] = (intr->const_index[0] * 4 +
+                                                     i);
+                }
+
+                dests[i] = &intr_comp->dest.ssa;
+
+                nir_builder_instr_insert(b, &intr_comp->instr);
+        }
+
+        replace_intrinsic_with_vec4(b, intr, dests);
+}
+
+static void
+vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
+                       struct nir_instr *instr)
+{
+        if (instr->type != nir_instr_type_intrinsic)
+                return;
+        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+        switch (intr->intrinsic) {
+        case nir_intrinsic_load_input:
+                vc4_nir_lower_input(c, b, intr);
+                break;
+
+        case nir_intrinsic_store_output:
+                vc4_nir_lower_output(c, b, intr);
+                break;
+
+        case nir_intrinsic_load_uniform:
+        case nir_intrinsic_load_uniform_indirect:
+                vc4_nir_lower_uniform(c, b, intr);
+                break;
+
+        default:
+                break;
+        }
+}
+
+static bool
+vc4_nir_lower_io_block(nir_block *block, void *arg)
+{
+        struct vc4_compile *c = arg;
+        nir_function_impl *impl =
+                nir_cf_node_get_function(&block->cf_node);
+
+        nir_builder b;
+        nir_builder_init(&b, impl);
+
+        nir_foreach_instr_safe(block, instr)
+                vc4_nir_lower_io_instr(c, &b, instr);
+
+        return true;
+}
+
+static bool
+vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl)
+{
+        nir_foreach_block(impl, vc4_nir_lower_io_block, c);
+
+        nir_metadata_preserve(impl, nir_metadata_block_index |
+                              nir_metadata_dominance);
+
+        return true;
+}
+
+void
+vc4_nir_lower_io(struct vc4_compile *c)
+{
+        nir_foreach_overload(c->s, overload) {
+                if (overload->impl)
+                        vc4_nir_lower_io_impl(c, overload->impl);
+        }
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_algebraic.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_algebraic.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_algebraic.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_algebraic.c	2015-09-16 14:36:09.000000000 +0000
@@ -136,25 +136,13 @@
 qir_opt_algebraic(struct vc4_compile *c)
 {
         bool progress = false;
-        struct simple_node *node;
-
-        foreach(node, &c->instructions) {
-                struct qinst *inst = (struct qinst *)node;
 
+        list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                 switch (inst->op) {
                 case QOP_SEL_X_Y_ZS:
                 case QOP_SEL_X_Y_ZC:
                 case QOP_SEL_X_Y_NS:
                 case QOP_SEL_X_Y_NC:
-                        if (qir_reg_equals(inst->src[0], inst->src[1])) {
-                                /* Turn "dst = (sf == x) ? a : a)" into
-                                 * "dst = a"
-                                 */
-                                replace_with_mov(c, inst, inst->src[1]);
-                                progress = true;
-                                break;
-                        }
-
                         if (is_zero(c, inst->src[1])) {
                                 /* Replace references to a 0 uniform value
                                  * with the SEL_X_0 equivalent.
@@ -210,6 +198,7 @@
 
                         /* FADD(a, FSUB(0, b)) -> FSUB(a, b) */
                         if (inst->src[1].file == QFILE_TEMP &&
+                            c->defs[inst->src[1].index] &&
                             c->defs[inst->src[1].index]->op == QOP_FSUB) {
                                 struct qinst *fsub = c->defs[inst->src[1].index];
                                 if (is_zero(c, fsub->src[0])) {
@@ -224,6 +213,7 @@
 
                         /* FADD(FSUB(0, b), a) -> FSUB(a, b) */
                         if (inst->src[0].file == QFILE_TEMP &&
+                            c->defs[inst->src[0].index] &&
                             c->defs[inst->src[0].index]->op == QOP_FSUB) {
                                 struct qinst *fsub = c->defs[inst->src[0].index];
                                 if (is_zero(c, fsub->src[0])) {
@@ -239,18 +229,20 @@
                         break;
 
                 case QOP_FMUL:
-                        if (replace_x_0_with_0(c, inst, 0) ||
-                            replace_x_0_with_0(c, inst, 1) ||
-                            fmul_replace_one(c, inst, 0) ||
-                            fmul_replace_one(c, inst, 1)) {
+                        if (!inst->dst.pack &&
+                            (replace_x_0_with_0(c, inst, 0) ||
+                             replace_x_0_with_0(c, inst, 1) ||
+                             fmul_replace_one(c, inst, 0) ||
+                             fmul_replace_one(c, inst, 1))) {
                                 progress = true;
                                 break;
                         }
                         break;
 
                 case QOP_MUL24:
-                        if (replace_x_0_with_0(c, inst, 0) ||
-                            replace_x_0_with_0(c, inst, 1)) {
+                        if (!inst->dst.pack &&
+                            (replace_x_0_with_0(c, inst, 0) ||
+                             replace_x_0_with_0(c, inst, 1))) {
                                 progress = true;
                                 break;
                         }
@@ -281,6 +273,14 @@
                                 progress = true;
                                 break;
                         }
+                        break;
+
+                case QOP_RCP:
+                        if (is_1f(c, inst->src[0])) {
+                                replace_with_mov(c, inst, inst->src[0]);
+                                progress = true;
+                                break;
+                        }
                         break;
 
                 default:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_constant_folding.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_constant_folding.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_constant_folding.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_constant_folding.c	2015-09-16 14:36:09.000000000 +0000
@@ -98,10 +98,8 @@
 qir_opt_constant_folding(struct vc4_compile *c)
 {
         bool progress = false;
-        struct simple_node *node;
 
-        foreach(node, &c->instructions) {
-                struct qinst *inst = (struct qinst *)node;
+        list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                 if (constant_fold(c, inst))
                         progress = true;
         }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c	2015-09-16 14:36:09.000000000 +0000
@@ -38,25 +38,28 @@
 qir_opt_copy_propagation(struct vc4_compile *c)
 {
         bool progress = false;
-        struct simple_node *node;
         bool debug = false;
-        struct qreg *movs = calloc(c->num_temps, sizeof(struct qreg));
-
-        foreach(node, &c->instructions) {
-                struct qinst *inst = (struct qinst *)node;
 
+        list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                 for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
                         int index = inst->src[i].index;
                         if (inst->src[i].file == QFILE_TEMP &&
-                            (movs[index].file == QFILE_TEMP ||
-                             movs[index].file == QFILE_UNIF)) {
+                            c->defs[index] &&
+                            c->defs[index]->op == QOP_MOV &&
+                            (c->defs[index]->src[0].file == QFILE_TEMP ||
+                             c->defs[index]->src[0].file == QFILE_UNIF)) {
+                                /* If it has a pack, it shouldn't be an SSA
+                                 * def.
+                                 */
+                                assert(!c->defs[index]->dst.pack);
+
                                 if (debug) {
                                         fprintf(stderr, "Copy propagate: ");
                                         qir_dump_inst(c, inst);
                                         fprintf(stderr, "\n");
                                 }
 
-                                inst->src[i] = movs[index];
+                                inst->src[i] = c->defs[index]->src[0];
 
                                 if (debug) {
                                         fprintf(stderr, "to: ");
@@ -67,17 +70,6 @@
                                 progress = true;
                         }
                 }
-
-                if (inst->op == QOP_MOV &&
-                    inst->dst.file == QFILE_TEMP &&
-                    inst->src[0].file != QFILE_VPM &&
-                    !(inst->src[0].file == QFILE_TEMP &&
-                      (c->defs[inst->src[0].index]->op == QOP_TEX_RESULT ||
-                       c->defs[inst->src[0].index]->op == QOP_TLB_COLOR_READ))) {
-                        movs[inst->dst.index] = inst->src[0];
-                }
         }
-
-        free(movs);
         return progress;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_cse.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_cse.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_cse.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_cse.c	2015-09-16 14:36:09.000000000 +0000
@@ -46,8 +46,7 @@
         struct qreg src[4];
         /**
          * If the instruction depends on the flags, how many SFs have been
-         * seen before this instruction, or if it depends on r4, how many r4
-         * writes have been seen.
+         * seen before this instruction.
          */
         uint32_t implicit_arg_update_count;
 };
@@ -63,8 +62,7 @@
 
 static struct qinst *
 vc4_find_cse(struct vc4_compile *c, struct hash_table *ht,
-             struct qinst *inst, uint32_t sf_count,
-             uint32_t r4_count)
+             struct qinst *inst, uint32_t sf_count)
 {
         if (inst->dst.file != QFILE_TEMP ||
             inst->op == QOP_MOV ||
@@ -79,8 +77,6 @@
                qir_get_op_nsrc(inst->op) * sizeof(key.src[0]));
         if (qir_depends_on_flags(inst))
                 key.implicit_arg_update_count = sf_count;
-        if (qir_reads_r4(inst))
-                key.implicit_arg_update_count = r4_count;
 
         uint32_t hash = _mesa_hash_data(&key, sizeof(key));
         struct hash_entry *entry =
@@ -121,27 +117,24 @@
 qir_opt_cse(struct vc4_compile *c)
 {
         bool progress = false;
-        struct simple_node *node, *t;
-        uint32_t sf_count = 0, r4_count = 0;
+        uint32_t sf_count = 0;
 
         struct hash_table *ht = _mesa_hash_table_create(NULL, NULL,
                                                         inst_key_equals);
         if (!ht)
                 return false;
 
-        foreach_s(node, t, &c->instructions) {
-                struct qinst *inst = (struct qinst *)node;
-
+        list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                 if (qir_has_side_effects(c, inst) ||
-                    qir_has_side_effect_reads(c, inst)) {
+                    qir_has_side_effect_reads(c, inst) ||
+                    inst->op == QOP_TLB_COLOR_READ) {
                         continue;
                 }
 
                 if (inst->sf) {
                         sf_count++;
                 } else {
-                        struct qinst *cse = vc4_find_cse(c, ht, inst,
-                                                         sf_count, r4_count);
+                        struct qinst *cse = vc4_find_cse(c, ht, inst, sf_count);
                         if (cse) {
                                 inst->src[0] = cse->dst;
                                 for (int i = 1; i < qir_get_op_nsrc(inst->op);
@@ -157,9 +150,6 @@
                                 }
                         }
                 }
-
-                if (qir_writes_r4(inst))
-                        r4_count++;
         }
 
         ralloc_free(ht);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_dead_code.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_dead_code.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_dead_code.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_dead_code.c	2015-09-16 14:36:09.000000000 +0000
@@ -86,7 +86,7 @@
         /* Whether we're eliminating texture setup currently. */
         bool dce_tex = false;
 
-        struct simple_node *node, *t;
+        struct list_head *node, *t;
         for (node = c->instructions.prev, t = node->prev;
              &c->instructions != node;
              node = t, t = t->prev) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_small_immediates.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_small_immediates.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_small_immediates.c	2015-09-16 14:36:09.000000000 +0000
@@ -37,11 +37,8 @@
 qir_opt_small_immediates(struct vc4_compile *c)
 {
         bool progress = false;
-        struct simple_node *node;
-
-        foreach(node, &c->instructions) {
-                struct qinst *inst = (struct qinst *)node;
 
+        list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                 /* The small immediate value sits in the raddr B field, so we
                  * can't have 2 small immediates in one instruction (unless
                  * they're the same value, but that should be optimized away
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_opt_vpm_writes.c	2015-09-16 14:36:09.000000000 +0000
@@ -37,15 +37,12 @@
                 return false;
 
         bool progress = false;
-        struct simple_node *node;
         struct qinst *vpm_writes[64] = { 0 };
         uint32_t use_count[c->num_temps];
         uint32_t vpm_write_count = 0;
         memset(&use_count, 0, sizeof(use_count));
 
-        foreach(node, &c->instructions) {
-                struct qinst *inst = (struct qinst *)node;
-
+        list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                 switch (inst->dst.file) {
                 case QFILE_VPM:
                         vpm_writes[vpm_write_count++] = inst;
@@ -71,7 +68,7 @@
                         continue;
 
                 struct qinst *inst = c->defs[temp];
-                if (qir_is_multi_instruction(inst))
+                if (!inst || qir_is_multi_instruction(inst))
                         continue;
 
                 if (qir_depends_on_flags(inst) || inst->sf)
@@ -82,27 +79,12 @@
                         continue;
                 }
 
-                /* A QOP_TEX_RESULT destination is r4, so we can't move
-                 * accesses to it past another QOP_TEX_RESULT which would
-                 * update it.
-                 */
-                int src;
-                for (src = 0; src < qir_get_op_nsrc(inst->op); src++) {
-                        if (inst->src[src].file == QFILE_TEMP) {
-                                if (c->defs[inst->src[src].index]->op ==
-                                    QOP_TEX_RESULT) {
-                                        break;
-                                }
-                        }
-                }
-                if (src != qir_get_op_nsrc(inst->op))
-                        continue;
-
                 /* Move the generating instruction to the end of the program
                  * to maintain the order of the VPM writes.
                  */
                 assert(!vpm_writes[i]->sf);
-                move_to_tail(&vpm_writes[i]->link, &inst->link);
+                list_del(&inst->link);
+                list_addtail(&inst->link, &vpm_writes[i]->link);
                 qir_remove_instruction(c, vpm_writes[i]);
 
                 c->defs[inst->dst.index] = NULL;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_packet.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_packet.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_packet.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_packet.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,335 +0,0 @@
-/*
- * Copyright © 2014 Broadcom
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef VC4_PACKET_H
-#define VC4_PACKET_H
-
-enum vc4_packet {
-        VC4_PACKET_HALT = 0,
-        VC4_PACKET_NOP = 1,
-
-        VC4_PACKET_FLUSH = 4,
-        VC4_PACKET_FLUSH_ALL = 5,
-        VC4_PACKET_START_TILE_BINNING = 6,
-        VC4_PACKET_INCREMENT_SEMAPHORE = 7,
-        VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
-
-        VC4_PACKET_BRANCH = 16,
-        VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
-
-        VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
-        VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
-        VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
-        VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
-        VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
-        VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
-
-        VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
-        VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
-
-        VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
-        VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
-
-        VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
-
-        VC4_PACKET_GL_SHADER_STATE = 64,
-        VC4_PACKET_NV_SHADER_STATE = 65,
-        VC4_PACKET_VG_SHADER_STATE = 66,
-
-        VC4_PACKET_CONFIGURATION_BITS = 96,
-        VC4_PACKET_FLAT_SHADE_FLAGS = 97,
-        VC4_PACKET_POINT_SIZE = 98,
-        VC4_PACKET_LINE_WIDTH = 99,
-        VC4_PACKET_RHT_X_BOUNDARY = 100,
-        VC4_PACKET_DEPTH_OFFSET = 101,
-        VC4_PACKET_CLIP_WINDOW = 102,
-        VC4_PACKET_VIEWPORT_OFFSET = 103,
-        VC4_PACKET_Z_CLIPPING = 104,
-        VC4_PACKET_CLIPPER_XY_SCALING = 105,
-        VC4_PACKET_CLIPPER_Z_SCALING = 106,
-
-        VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
-        VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
-        VC4_PACKET_CLEAR_COLORS = 114,
-        VC4_PACKET_TILE_COORDINATES = 115,
-
-        /* Not an actual hardware packet -- this is what we use to put
-         * references to GEM bos in the command stream, since we need the u32
-         * int the actual address packet in order to store the offset from the
-         * start of the BO.
-         */
-        VC4_PACKET_GEM_HANDLES = 254,
-} __attribute__ ((__packed__));
-
-
-#define VC4_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low))
-/* Using the GNU statement expression extension */
-#define VC4_SET_FIELD(value, field)                                       \
-        ({                                                                \
-                uint32_t fieldval = (value) << field ## _SHIFT;		  \
-                assert((fieldval & ~ field ## _MASK) == 0);               \
-                fieldval & field ## _MASK;                                \
-         })
-
-#define VC4_GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## _SHIFT)
-
-/** @{
- * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
- * VC4_PACKET_TILE_RENDERING_MODE_CONFIG.
-*/
-#define VC4_TILING_FORMAT_LINEAR    0
-#define VC4_TILING_FORMAT_T         1
-#define VC4_TILING_FORMAT_LT        2
-/** @} */
-
-/** @{
- *
- * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
- * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
- */
-
-#define VC4_LOADSTORE_TILE_BUFFER_EOF                  (1 << 3)
-#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK (1 << 2)
-#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS      (1 << 1)
-#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR   (1 << 0)
-
-/** @} */
-
-/** @{
- *
- * byte 1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
- * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
- */
-#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 7)
-#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR     (1 << 6)
-#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR  (1 << 5)
-#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP         (1 << 4)
-
-#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888         (0 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER    (1 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_BGR565           (2 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_MASK             (3 << 0)
-/** @} */
-
-/** @{
- *
- * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
- * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
- */
-#define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0         (0 << 6)
-#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4     (1 << 6)
-#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16    (2 << 6)
-
-/** The values of the field are VC4_TILING_FORMAT_* */
-#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK      (3 << 4)
-#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT     4
-
-
-#define VC4_LOADSTORE_TILE_BUFFER_NONE             (0 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_COLOR            (1 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_ZS               (2 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_Z                (3 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK          (4 << 0)
-#define VC4_LOADSTORE_TILE_BUFFER_FULL             (5 << 0)
-/** @} */
-
-#define VC4_INDEX_BUFFER_U8                        (0 << 4)
-#define VC4_INDEX_BUFFER_U16                       (1 << 4)
-
-/* This flag is only present in NV shader state. */
-#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS         (1 << 3)
-#define VC4_SHADER_FLAG_ENABLE_CLIPPING            (1 << 2)
-#define VC4_SHADER_FLAG_VS_POINT_SIZE              (1 << 1)
-#define VC4_SHADER_FLAG_FS_SINGLE_THREAD           (1 << 0)
-
-/** @{ byte 2 of config bits. */
-#define VC4_CONFIG_BITS_EARLY_Z_UPDATE             (1 << 1)
-#define VC4_CONFIG_BITS_EARLY_Z                    (1 << 0)
-/** @} */
-
-/** @{ byte 1 of config bits. */
-#define VC4_CONFIG_BITS_Z_UPDATE                   (1 << 7)
-/** same values in this 3-bit field as PIPE_FUNC_* */
-#define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT           4
-#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE        (1 << 3)
-
-#define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO    (0 << 1)
-#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD        (1 << 1)
-#define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR         (2 << 1)
-#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO       (3 << 1)
-
-#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT       (1 << 0)
-/** @} */
-
-/** @{ byte 0 of config bits. */
-#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6)
-#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X   (1 << 6)
-#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X  (2 << 6)
-
-#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES        (1 << 4)
-#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET        (1 << 3)
-#define VC4_CONFIG_BITS_CW_PRIMITIVES              (1 << 2)
-#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK           (1 << 1)
-#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT          (1 << 0)
-/** @} */
-
-/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
-#define VC4_BIN_CONFIG_DB_NON_MS                   (1 << 7)
-
-#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32         (0 << 5)
-#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64         (1 << 5)
-#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128        (2 << 5)
-#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256        (3 << 5)
-
-#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32    (0 << 3)
-#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64    (1 << 3)
-#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128   (2 << 3)
-#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256   (3 << 3)
-
-#define VC4_BIN_CONFIG_AUTO_INIT_TSDA              (1 << 2)
-#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT           (1 << 1)
-#define VC4_BIN_CONFIG_MS_MODE_4X                  (1 << 0)
-/** @} */
-
-/** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */
-#define VC4_RENDER_CONFIG_DB_NON_MS                (1 << 12)
-#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE (1 << 11)
-#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G      (1 << 10)
-#define VC4_RENDER_CONFIG_COVERAGE_MODE            (1 << 9)
-#define VC4_RENDER_CONFIG_ENABLE_VG_MASK           (1 << 8)
-
-/** The values of the field are VC4_TILING_FORMAT_* */
-#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK       (3 << 6)
-#define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT      6
-
-#define VC4_RENDER_CONFIG_DECIMATE_MODE_1X         (0 << 4)
-#define VC4_RENDER_CONFIG_DECIMATE_MODE_4X         (1 << 4)
-#define VC4_RENDER_CONFIG_DECIMATE_MODE_16X        (2 << 4)
-
-#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED   (0 << 2)
-#define VC4_RENDER_CONFIG_FORMAT_RGBA8888          (1 << 2)
-#define VC4_RENDER_CONFIG_FORMAT_BGR565            (2 << 2)
-#define VC4_RENDER_CONFIG_FORMAT_MASK              (3 << 2)
-
-#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT        (1 << 1)
-#define VC4_RENDER_CONFIG_MS_MODE_4X               (1 << 0)
-
-#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX         (1 << 4)
-#define VC4_PRIMITIVE_LIST_FORMAT_32_XY            (3 << 4)
-#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS      (0 << 0)
-#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES       (1 << 0)
-#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES   (2 << 0)
-#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT         (3 << 0)
-
-enum vc4_texture_data_type {
-        VC4_TEXTURE_TYPE_RGBA8888 = 0,
-        VC4_TEXTURE_TYPE_RGBX8888 = 1,
-        VC4_TEXTURE_TYPE_RGBA4444 = 2,
-        VC4_TEXTURE_TYPE_RGBA5551 = 3,
-        VC4_TEXTURE_TYPE_RGB565 = 4,
-        VC4_TEXTURE_TYPE_LUMINANCE = 5,
-        VC4_TEXTURE_TYPE_ALPHA = 6,
-        VC4_TEXTURE_TYPE_LUMALPHA = 7,
-        VC4_TEXTURE_TYPE_ETC1 = 8,
-        VC4_TEXTURE_TYPE_S16F = 9,
-        VC4_TEXTURE_TYPE_S8 = 10,
-        VC4_TEXTURE_TYPE_S16 = 11,
-        VC4_TEXTURE_TYPE_BW1 = 12,
-        VC4_TEXTURE_TYPE_A4 = 13,
-        VC4_TEXTURE_TYPE_A1 = 14,
-        VC4_TEXTURE_TYPE_RGBA64 = 15,
-        VC4_TEXTURE_TYPE_RGBA32R = 16,
-        VC4_TEXTURE_TYPE_YUV422R = 17,
-};
-
-#define VC4_TEX_P0_OFFSET_MASK                     VC4_MASK(31, 12)
-#define VC4_TEX_P0_OFFSET_SHIFT                    12
-#define VC4_TEX_P0_CSWIZ_MASK                      VC4_MASK(11, 10)
-#define VC4_TEX_P0_CSWIZ_SHIFT                     10
-#define VC4_TEX_P0_CMMODE_MASK                     VC4_MASK(9, 9)
-#define VC4_TEX_P0_CMMODE_SHIFT                    9
-#define VC4_TEX_P0_FLIPY_MASK                      VC4_MASK(8, 8)
-#define VC4_TEX_P0_FLIPY_SHIFT                     8
-#define VC4_TEX_P0_TYPE_MASK                       VC4_MASK(7, 4)
-#define VC4_TEX_P0_TYPE_SHIFT                      4
-#define VC4_TEX_P0_MIPLVLS_MASK                    VC4_MASK(3, 0)
-#define VC4_TEX_P0_MIPLVLS_SHIFT                   0
-
-#define VC4_TEX_P1_TYPE4_MASK                      VC4_MASK(31, 31)
-#define VC4_TEX_P1_TYPE4_SHIFT                     31
-#define VC4_TEX_P1_HEIGHT_MASK                     VC4_MASK(30, 20)
-#define VC4_TEX_P1_HEIGHT_SHIFT                    20
-#define VC4_TEX_P1_ETCFLIP_MASK                    VC4_MASK(19, 19)
-#define VC4_TEX_P1_ETCFLIP_SHIFT                   19
-#define VC4_TEX_P1_WIDTH_MASK                      VC4_MASK(18, 8)
-#define VC4_TEX_P1_WIDTH_SHIFT                     8
-
-#define VC4_TEX_P1_MAGFILT_MASK                    VC4_MASK(7, 7)
-#define VC4_TEX_P1_MAGFILT_SHIFT                   7
-# define VC4_TEX_P1_MAGFILT_LINEAR                 0
-# define VC4_TEX_P1_MAGFILT_NEAREST                1
-
-#define VC4_TEX_P1_MINFILT_MASK                    VC4_MASK(6, 4)
-#define VC4_TEX_P1_MINFILT_SHIFT                   4
-# define VC4_TEX_P1_MINFILT_LINEAR                 0
-# define VC4_TEX_P1_MINFILT_NEAREST                1
-# define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR          2
-# define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN           3
-# define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR           4
-# define VC4_TEX_P1_MINFILT_LIN_MIP_LIN            5
-
-#define VC4_TEX_P1_WRAP_T_MASK                     VC4_MASK(3, 2)
-#define VC4_TEX_P1_WRAP_T_SHIFT                    2
-#define VC4_TEX_P1_WRAP_S_MASK                     VC4_MASK(1, 0)
-#define VC4_TEX_P1_WRAP_S_SHIFT                    0
-# define VC4_TEX_P1_WRAP_REPEAT                    0
-# define VC4_TEX_P1_WRAP_CLAMP                     1
-# define VC4_TEX_P1_WRAP_MIRROR                    2
-# define VC4_TEX_P1_WRAP_BORDER                    3
-
-#define VC4_TEX_P2_PTYPE_MASK                      VC4_MASK(31, 30)
-#define VC4_TEX_P2_PTYPE_SHIFT                     30
-# define VC4_TEX_P2_PTYPE_IGNORED                  0
-# define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE          1
-# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS   2
-# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS      3
-
-/* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */
-#define VC4_TEX_P2_CMST_MASK                       VC4_MASK(29, 12)
-#define VC4_TEX_P2_CMST_SHIFT                      12
-#define VC4_TEX_P2_BSLOD_MASK                      VC4_MASK(0, 0)
-#define VC4_TEX_P2_BSLOD_SHIFT                     0
-
-/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */
-#define VC4_TEX_P2_CHEIGHT_MASK                    VC4_MASK(22, 12)
-#define VC4_TEX_P2_CHEIGHT_SHIFT                   12
-#define VC4_TEX_P2_CWIDTH_MASK                     VC4_MASK(10, 0)
-#define VC4_TEX_P2_CWIDTH_SHIFT                    0
-
-/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */
-#define VC4_TEX_P2_CYOFF_MASK                      VC4_MASK(22, 12)
-#define VC4_TEX_P2_CYOFF_SHIFT                     12
-#define VC4_TEX_P2_CXOFF_MASK                      VC4_MASK(10, 0)
-#define VC4_TEX_P2_CXOFF_SHIFT                     0
-
-#endif /* VC4_PACKET_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_program.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_program.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_program.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_program.c	2015-09-16 14:36:09.000000000 +0000
@@ -23,21 +23,19 @@
  */
 
 #include <inttypes.h>
-#include "pipe/p_state.h"
 #include "util/u_format.h"
 #include "util/u_hash.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
-#include "util/u_pack_color.h"
-#include "util/format_srgb.h"
 #include "util/ralloc.h"
 #include "util/hash_table.h"
 #include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_lowering.h"
 #include "tgsi/tgsi_parse.h"
+#include "glsl/nir/nir.h"
+#include "glsl/nir/nir_builder.h"
 #include "nir/tgsi_to_nir.h"
-
 #include "vc4_context.h"
 #include "vc4_qpu.h"
 #include "vc4_qir.h"
@@ -45,51 +43,8 @@
 #include "simpenrose/simpenrose.h"
 #endif
 
-struct vc4_key {
-        struct vc4_uncompiled_shader *shader_state;
-        struct {
-                enum pipe_format format;
-                unsigned compare_mode:1;
-                unsigned compare_func:3;
-                unsigned wrap_s:3;
-                unsigned wrap_t:3;
-                uint8_t swizzle[4];
-        } tex[VC4_MAX_TEXTURE_SAMPLERS];
-        uint8_t ucp_enables;
-};
-
-struct vc4_fs_key {
-        struct vc4_key base;
-        enum pipe_format color_format;
-        bool depth_enabled;
-        bool stencil_enabled;
-        bool stencil_twoside;
-        bool stencil_full_writemasks;
-        bool is_points;
-        bool is_lines;
-        bool alpha_test;
-        bool point_coord_upper_left;
-        bool light_twoside;
-        uint8_t alpha_test_func;
-        uint8_t logicop_func;
-        uint32_t point_sprite_mask;
-
-        struct pipe_rt_blend_state blend;
-};
-
-struct vc4_vs_key {
-        struct vc4_key base;
-
-        /**
-         * This is a proxy for the array of FS input semantics, which is
-         * larger than we would want to put in the key.
-         */
-        uint64_t compiled_fs_id;
-
-        enum pipe_format attr_formats[8];
-        bool is_coord;
-        bool per_vertex_point_size;
-};
+static struct qreg
+ntq_get_src(struct vc4_compile *c, nir_src src, int i);
 
 static void
 resize_qreg_array(struct vc4_compile *c,
@@ -113,10 +68,10 @@
 }
 
 static struct qreg
-indirect_uniform_load(struct vc4_compile *c,
-                      struct qreg indirect_offset,
-                      unsigned offset)
+indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
 {
+        struct qreg indirect_offset = ntq_get_src(c, intr->src[0], 0);
+        uint32_t offset = intr->const_index[0];
         struct vc4_compiler_ubo_range *range = NULL;
         unsigned i;
         for (i = 0; i < c->num_uniform_ranges; i++) {
@@ -138,38 +93,83 @@
         };
 
         offset -= range->src_offset;
-        /* Translate the user's TGSI register index from the TGSI register
-         * base to a byte offset.
-         */
-        indirect_offset = qir_SHL(c, indirect_offset, qir_uniform_ui(c, 4));
 
         /* Adjust for where we stored the TGSI register base. */
         indirect_offset = qir_ADD(c, indirect_offset,
                                   qir_uniform_ui(c, (range->dst_offset +
                                                      offset)));
+
+        /* Clamp to [0, array size).  Note that MIN/MAX are signed. */
+        indirect_offset = qir_MAX(c, indirect_offset, qir_uniform_ui(c, 0));
         indirect_offset = qir_MIN(c, indirect_offset,
                                   qir_uniform_ui(c, (range->dst_offset +
                                                      range->size - 4)));
 
         qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0));
-        struct qreg r4 = qir_TEX_RESULT(c);
         c->num_texture_samples++;
-        return qir_MOV(c, r4);
+        return qir_TEX_RESULT(c);
 }
 
-static struct qreg *
-ntq_get_dest(struct vc4_compile *c, nir_dest dest)
+nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
+                                       enum quniform_contents contents)
 {
-        assert(!dest.is_ssa);
-        nir_register *reg = dest.reg.reg;
-        struct hash_entry *entry = _mesa_hash_table_search(c->def_ht, reg);
-        assert(reg->num_array_elems == 0);
-        assert(dest.reg.base_offset == 0);
+        nir_intrinsic_instr *intr =
+                nir_intrinsic_instr_create(b->shader,
+                                           nir_intrinsic_load_uniform);
+        intr->const_index[0] = VC4_NIR_STATE_UNIFORM_OFFSET + contents;
+        intr->num_components = 1;
+        nir_ssa_dest_init(&intr->instr, &intr->dest, 1, NULL);
+        nir_builder_instr_insert(b, &intr->instr);
+        return &intr->dest.ssa;
+}
 
-        struct qreg *qregs = entry->data;
+nir_ssa_def *
+vc4_nir_get_swizzled_channel(nir_builder *b, nir_ssa_def **srcs, int swiz)
+{
+        switch (swiz) {
+        default:
+        case UTIL_FORMAT_SWIZZLE_NONE:
+                fprintf(stderr, "warning: unknown swizzle\n");
+                /* FALLTHROUGH */
+        case UTIL_FORMAT_SWIZZLE_0:
+                return nir_imm_float(b, 0.0);
+        case UTIL_FORMAT_SWIZZLE_1:
+                return nir_imm_float(b, 1.0);
+        case UTIL_FORMAT_SWIZZLE_X:
+        case UTIL_FORMAT_SWIZZLE_Y:
+        case UTIL_FORMAT_SWIZZLE_Z:
+        case UTIL_FORMAT_SWIZZLE_W:
+                return srcs[swiz];
+        }
+}
+
+static struct qreg *
+ntq_init_ssa_def(struct vc4_compile *c, nir_ssa_def *def)
+{
+        struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
+                                          def->num_components);
+        _mesa_hash_table_insert(c->def_ht, def, qregs);
         return qregs;
 }
 
+static struct qreg *
+ntq_get_dest(struct vc4_compile *c, nir_dest *dest)
+{
+        if (dest->is_ssa) {
+                struct qreg *qregs = ntq_init_ssa_def(c, &dest->ssa);
+                for (int i = 0; i < dest->ssa.num_components; i++)
+                        qregs[i] = c->undef;
+                return qregs;
+        } else {
+                nir_register *reg = dest->reg.reg;
+                assert(dest->reg.base_offset == 0);
+                assert(reg->num_array_elems == 0);
+                struct hash_entry *entry =
+                        _mesa_hash_table_search(c->def_ht, reg);
+                return entry->data;
+        }
+}
+
 static struct qreg
 ntq_get_src(struct vc4_compile *c, nir_src src, int i)
 {
@@ -279,22 +279,6 @@
 }
 
 static struct qreg
-qir_srgb_encode(struct vc4_compile *c, struct qreg linear)
-{
-        struct qreg low = qir_FMUL(c, linear, qir_uniform_f(c, 12.92));
-        struct qreg high = qir_FSUB(c,
-                                    qir_FMUL(c,
-                                             qir_uniform_f(c, 1.055),
-                                             qir_POW(c,
-                                                     linear,
-                                                     qir_uniform_f(c, 0.41666))),
-                                    qir_uniform_f(c, 0.055));
-
-        qir_SF(c, qir_FSUB(c, linear, qir_uniform_f(c, 0.0031308)));
-        return qir_SEL_X_Y_NS(c, low, high);
-}
-
-static struct qreg
 ntq_umul(struct vc4_compile *c, struct qreg src0, struct qreg src1)
 {
         struct qreg src0_hi = qir_SHR(c, src0,
@@ -322,7 +306,9 @@
                 switch (instr->src[i].src_type) {
                 case nir_tex_src_coord:
                         s = ntq_get_src(c, instr->src[i].src, 0);
-                        if (instr->sampler_dim != GLSL_SAMPLER_DIM_1D)
+                        if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
+                                t = qir_uniform_f(c, 0.5);
+                        else
                                 t = ntq_get_src(c, instr->src[i].src, 1);
                         if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
                                 r = ntq_get_src(c, instr->src[i].src, 2);
@@ -405,13 +391,13 @@
         qir_TEX_S(c, s, texture_u[next_texture_u++]);
 
         c->num_texture_samples++;
-        struct qreg r4 = qir_TEX_RESULT(c);
+        struct qreg tex = qir_TEX_RESULT(c);
 
         enum pipe_format format = c->key->tex[unit].format;
 
         struct qreg unpacked[4];
         if (util_format_is_depth_or_stencil(format)) {
-                struct qreg depthf = qir_ITOF(c, qir_SHR(c, r4,
+                struct qreg depthf = qir_ITOF(c, qir_SHR(c, tex,
                                                          qir_uniform_ui(c, 8)));
                 struct qreg normalized = qir_FMUL(c, depthf,
                                                   qir_uniform_f(c, 1.0f/0xffffff));
@@ -463,7 +449,7 @@
                         unpacked[i] = depth_output;
         } else {
                 for (int i = 0; i < 4; i++)
-                        unpacked[i] = qir_R4_UNPACK(c, r4, i);
+                        unpacked[i] = qir_UNPACK_8_F(c, tex, i);
         }
 
         const uint8_t *format_swiz = vc4_get_format_swizzle(format);
@@ -479,7 +465,7 @@
                                                             texture_output[i]);
         }
 
-        struct qreg *dest = ntq_get_dest(c, instr->dest);
+        struct qreg *dest = ntq_get_dest(c, &instr->dest);
         for (int i = 0; i < 4; i++) {
                 dest[i] = get_swizzled_channel(c, texture_output,
                                                c->key->tex[unit].swizzle[i]);
@@ -553,7 +539,7 @@
         struct qreg scaled_x =
                 qir_FMUL(c,
                          src,
-                         qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
+                         qir_uniform_f(c, 1.0 / (M_PI * 2.0)));
 
         struct qreg x = qir_FADD(c,
                                  ntq_ffract(c, scaled_x),
@@ -751,26 +737,6 @@
         c->inputs[attr * 4 + 3] = qir_RCP(c, qir_FRAG_W(c));
 }
 
-static void
-emit_point_coord_input(struct vc4_compile *c, int attr)
-{
-        if (c->point_x.file == QFILE_NULL) {
-                c->point_x = qir_uniform_f(c, 0.0);
-                c->point_y = qir_uniform_f(c, 0.0);
-        }
-
-        c->inputs[attr * 4 + 0] = c->point_x;
-        if (c->fs_key->point_coord_upper_left) {
-                c->inputs[attr * 4 + 1] = qir_FSUB(c,
-                                                   qir_uniform_f(c, 1.0),
-                                                   c->point_y);
-        } else {
-                c->inputs[attr * 4 + 1] = c->point_y;
-        }
-        c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
-        c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
-}
-
 static struct qreg
 emit_fragment_varying(struct vc4_compile *c, uint8_t semantic,
                       uint8_t index, uint8_t swizzle)
@@ -812,19 +778,6 @@
 }
 
 static void
-emit_face_input(struct vc4_compile *c, int attr)
-{
-        c->inputs[attr * 4 + 0] = qir_FSUB(c,
-                                           qir_uniform_f(c, 1.0),
-                                           qir_FMUL(c,
-                                                    qir_ITOF(c, qir_FRAG_REV_FLAG(c)),
-                                                    qir_uniform_f(c, 2.0)));
-        c->inputs[attr * 4 + 1] = qir_uniform_f(c, 0.0);
-        c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
-        c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
-}
-
-static void
 add_output(struct vc4_compile *c,
            uint32_t decl_offset,
            uint8_t semantic_name,
@@ -865,6 +818,72 @@
         c->ubo_ranges[array_id].used = false;
 }
 
+static bool
+ntq_src_is_only_ssa_def_user(nir_src *src)
+{
+        if (!src->is_ssa)
+                return false;
+
+        if (!list_empty(&src->ssa->if_uses))
+                return false;
+
+        return (src->ssa->uses.next == &src->use_link &&
+                src->ssa->uses.next->next == &src->ssa->uses);
+}
+
+/**
+ * In general, emits a nir_pack_unorm_4x8 as a series of MOVs with the pack
+ * bit set.
+ *
+ * However, as an optimization, it tries to find the instructions generating
+ * the sources to be packed and just emit the pack flag there, if possible.
+ */
+static void
+ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr)
+{
+        struct qreg result = qir_get_temp(c);
+        struct nir_alu_instr *vec4 = NULL;
+
+        /* If packing from a vec4 op (as expected), identify it so that we can
+         * peek back at what generated its sources.
+         */
+        if (instr->src[0].src.is_ssa &&
+            instr->src[0].src.ssa->parent_instr->type == nir_instr_type_alu &&
+            nir_instr_as_alu(instr->src[0].src.ssa->parent_instr)->op ==
+            nir_op_vec4) {
+                vec4 = nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
+        }
+
+        for (int i = 0; i < 4; i++) {
+                int swiz = instr->src[0].swizzle[i];
+                struct qreg src;
+                if (vec4) {
+                        src = ntq_get_src(c, vec4->src[swiz].src,
+                                          vec4->src[swiz].swizzle[0]);
+                } else {
+                        src = ntq_get_src(c, instr->src[0].src, swiz);
+                }
+
+                if (vec4 &&
+                    ntq_src_is_only_ssa_def_user(&vec4->src[swiz].src) &&
+                    src.file == QFILE_TEMP &&
+                    c->defs[src.index] &&
+                    qir_is_mul(c->defs[src.index]) &&
+                    !c->defs[src.index]->dst.pack) {
+                        struct qinst *rewrite = c->defs[src.index];
+                        c->defs[src.index] = NULL;
+                        rewrite->dst = result;
+                        rewrite->dst.pack = QPU_PACK_MUL_8A + i;
+                        continue;
+                }
+
+                qir_PACK_8_F(c, result, src, i);
+        }
+
+        struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
+        *dest = result;
+}
+
 static void
 ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
 {
@@ -879,12 +898,28 @@
                 for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
                         srcs[i] = ntq_get_src(c, instr->src[i].src,
                                               instr->src[i].swizzle[0]);
-                struct qreg *dest = ntq_get_dest(c, instr->dest.dest);
+                struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
                 for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
                         dest[i] = srcs[i];
                 return;
         }
 
+        if (instr->op == nir_op_pack_unorm_4x8) {
+                ntq_emit_pack_unorm_4x8(c, instr);
+                return;
+        }
+
+        if (instr->op == nir_op_unpack_unorm_4x8) {
+                struct qreg src = ntq_get_src(c, instr->src[0].src,
+                                              instr->src[0].swizzle[0]);
+                struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
+                for (int i = 0; i < 4; i++) {
+                        if (instr->dest.write_mask & (1 << i))
+                                dest[i] = qir_UNPACK_8_F(c, src, i);
+                }
+                return;
+        }
+
         /* General case: We can just grab the one used channel per src. */
         struct qreg src[nir_op_infos[instr->op].num_inputs];
         for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
@@ -893,7 +928,7 @@
 
         /* Pick the channel to store the output in. */
         assert(!instr->dest.saturate);
-        struct qreg *dest = ntq_get_dest(c, instr->dest.dest);
+        struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
         assert(util_is_power_of_two(instr->dest.write_mask));
         dest += ffs(instr->dest.write_mask) - 1;
 
@@ -1087,167 +1122,6 @@
         }
 }
 
-static struct qreg
-vc4_blend_channel(struct vc4_compile *c,
-                  struct qreg *dst,
-                  struct qreg *src,
-                  struct qreg val,
-                  unsigned factor,
-                  int channel)
-{
-        switch(factor) {
-        case PIPE_BLENDFACTOR_ONE:
-                return val;
-        case PIPE_BLENDFACTOR_SRC_COLOR:
-                return qir_FMUL(c, val, src[channel]);
-        case PIPE_BLENDFACTOR_SRC_ALPHA:
-                return qir_FMUL(c, val, src[3]);
-        case PIPE_BLENDFACTOR_DST_ALPHA:
-                return qir_FMUL(c, val, dst[3]);
-        case PIPE_BLENDFACTOR_DST_COLOR:
-                return qir_FMUL(c, val, dst[channel]);
-        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-                if (channel != 3) {
-                        return qir_FMUL(c,
-                                        val,
-                                        qir_FMIN(c,
-                                                 src[3],
-                                                 qir_FSUB(c,
-                                                          qir_uniform_f(c, 1.0),
-                                                          dst[3])));
-                } else {
-                        return val;
-                }
-        case PIPE_BLENDFACTOR_CONST_COLOR:
-                return qir_FMUL(c, val,
-                                qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR,
-                                            channel));
-        case PIPE_BLENDFACTOR_CONST_ALPHA:
-                return qir_FMUL(c, val,
-                                qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR, 3));
-        case PIPE_BLENDFACTOR_ZERO:
-                return qir_uniform_f(c, 0.0);
-        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
-                return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
-                                                 src[channel]));
-        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
-                return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
-                                                 src[3]));
-        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
-                return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
-                                                 dst[3]));
-        case PIPE_BLENDFACTOR_INV_DST_COLOR:
-                return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
-                                                 dst[channel]));
-        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-                return qir_FMUL(c, val,
-                                qir_FSUB(c, qir_uniform_f(c, 1.0),
-                                         qir_uniform(c,
-                                                     QUNIFORM_BLEND_CONST_COLOR,
-                                                     channel)));
-        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-                return qir_FMUL(c, val,
-                                qir_FSUB(c, qir_uniform_f(c, 1.0),
-                                         qir_uniform(c,
-                                                     QUNIFORM_BLEND_CONST_COLOR,
-                                                     3)));
-
-        default:
-        case PIPE_BLENDFACTOR_SRC1_COLOR:
-        case PIPE_BLENDFACTOR_SRC1_ALPHA:
-        case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
-        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-                /* Unsupported. */
-                fprintf(stderr, "Unknown blend factor %d\n", factor);
-                return val;
-        }
-}
-
-static struct qreg
-vc4_blend_func(struct vc4_compile *c,
-               struct qreg src, struct qreg dst,
-               unsigned func)
-{
-        switch (func) {
-        case PIPE_BLEND_ADD:
-                return qir_FADD(c, src, dst);
-        case PIPE_BLEND_SUBTRACT:
-                return qir_FSUB(c, src, dst);
-        case PIPE_BLEND_REVERSE_SUBTRACT:
-                return qir_FSUB(c, dst, src);
-        case PIPE_BLEND_MIN:
-                return qir_FMIN(c, src, dst);
-        case PIPE_BLEND_MAX:
-                return qir_FMAX(c, src, dst);
-
-        default:
-                /* Unsupported. */
-                fprintf(stderr, "Unknown blend func %d\n", func);
-                return src;
-
-        }
-}
-
-/**
- * Implements fixed function blending in shader code.
- *
- * VC4 doesn't have any hardware support for blending.  Instead, you read the
- * current contents of the destination from the tile buffer after having
- * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do
- * math using your output color and that destination value, and update the
- * output color appropriately.
- */
-static void
-vc4_blend(struct vc4_compile *c, struct qreg *result,
-          struct qreg *dst_color, struct qreg *src_color)
-{
-        struct pipe_rt_blend_state *blend = &c->fs_key->blend;
-
-        if (!blend->blend_enable) {
-                for (int i = 0; i < 4; i++)
-                        result[i] = src_color[i];
-                return;
-        }
-
-        struct qreg clamped_src[4];
-        struct qreg clamped_dst[4];
-        for (int i = 0; i < 4; i++) {
-                clamped_src[i] = qir_SAT(c, src_color[i]);
-                clamped_dst[i] = qir_SAT(c, dst_color[i]);
-        }
-        src_color = clamped_src;
-        dst_color = clamped_dst;
-
-        struct qreg src_blend[4], dst_blend[4];
-        for (int i = 0; i < 3; i++) {
-                src_blend[i] = vc4_blend_channel(c,
-                                                 dst_color, src_color,
-                                                 src_color[i],
-                                                 blend->rgb_src_factor, i);
-                dst_blend[i] = vc4_blend_channel(c,
-                                                 dst_color, src_color,
-                                                 dst_color[i],
-                                                 blend->rgb_dst_factor, i);
-        }
-        src_blend[3] = vc4_blend_channel(c,
-                                         dst_color, src_color,
-                                         src_color[3],
-                                         blend->alpha_src_factor, 3);
-        dst_blend[3] = vc4_blend_channel(c,
-                                         dst_color, src_color,
-                                         dst_color[3],
-                                         blend->alpha_dst_factor, 3);
-
-        for (int i = 0; i < 3; i++) {
-                result[i] = vc4_blend_func(c,
-                                           src_blend[i], dst_blend[i],
-                                           blend->rgb_func);
-        }
-        result[3] = vc4_blend_func(c,
-                                   src_blend[3], dst_blend[3],
-                                   blend->alpha_func);
-}
-
 static void
 clip_distance_discard(struct vc4_compile *c)
 {
@@ -1271,167 +1145,15 @@
 }
 
 static void
-alpha_test_discard(struct vc4_compile *c)
-{
-        struct qreg src_alpha;
-        struct qreg alpha_ref = qir_uniform(c, QUNIFORM_ALPHA_REF, 0);
-
-        if (!c->fs_key->alpha_test)
-                return;
-
-        if (c->output_color_index != -1)
-                src_alpha = c->outputs[c->output_color_index + 3];
-        else
-                src_alpha = qir_uniform_f(c, 1.0);
-
-        if (c->discard.file == QFILE_NULL)
-                c->discard = qir_uniform_ui(c, 0);
-
-        switch (c->fs_key->alpha_test_func) {
-        case PIPE_FUNC_NEVER:
-                c->discard = qir_uniform_ui(c, ~0);
-                break;
-        case PIPE_FUNC_ALWAYS:
-                break;
-        case PIPE_FUNC_EQUAL:
-                qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
-                c->discard = qir_SEL_X_Y_ZS(c, c->discard,
-                                            qir_uniform_ui(c, ~0));
-                break;
-        case PIPE_FUNC_NOTEQUAL:
-                qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
-                c->discard = qir_SEL_X_Y_ZC(c, c->discard,
-                                            qir_uniform_ui(c, ~0));
-                break;
-        case PIPE_FUNC_GREATER:
-                qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
-                c->discard = qir_SEL_X_Y_NC(c, c->discard,
-                                            qir_uniform_ui(c, ~0));
-                break;
-        case PIPE_FUNC_GEQUAL:
-                qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
-                c->discard = qir_SEL_X_Y_NS(c, c->discard,
-                                            qir_uniform_ui(c, ~0));
-                break;
-        case PIPE_FUNC_LESS:
-                qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
-                c->discard = qir_SEL_X_Y_NS(c, c->discard,
-                                            qir_uniform_ui(c, ~0));
-                break;
-        case PIPE_FUNC_LEQUAL:
-                qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
-                c->discard = qir_SEL_X_Y_NC(c, c->discard,
-                                            qir_uniform_ui(c, ~0));
-                break;
-        }
-}
-
-static struct qreg
-vc4_logicop(struct vc4_compile *c, struct qreg src, struct qreg dst)
-{
-        switch (c->fs_key->logicop_func) {
-        case PIPE_LOGICOP_CLEAR:
-                return qir_uniform_f(c, 0.0);
-        case PIPE_LOGICOP_NOR:
-                return qir_NOT(c, qir_OR(c, src, dst));
-        case PIPE_LOGICOP_AND_INVERTED:
-                return qir_AND(c, qir_NOT(c, src), dst);
-        case PIPE_LOGICOP_COPY_INVERTED:
-                return qir_NOT(c, src);
-        case PIPE_LOGICOP_AND_REVERSE:
-                return qir_AND(c, src, qir_NOT(c, dst));
-        case PIPE_LOGICOP_INVERT:
-                return qir_NOT(c, dst);
-        case PIPE_LOGICOP_XOR:
-                return qir_XOR(c, src, dst);
-        case PIPE_LOGICOP_NAND:
-                return qir_NOT(c, qir_AND(c, src, dst));
-        case PIPE_LOGICOP_AND:
-                return qir_AND(c, src, dst);
-        case PIPE_LOGICOP_EQUIV:
-                return qir_NOT(c, qir_XOR(c, src, dst));
-        case PIPE_LOGICOP_NOOP:
-                return dst;
-        case PIPE_LOGICOP_OR_INVERTED:
-                return qir_OR(c, qir_NOT(c, src), dst);
-        case PIPE_LOGICOP_OR_REVERSE:
-                return qir_OR(c, src, qir_NOT(c, dst));
-        case PIPE_LOGICOP_OR:
-                return qir_OR(c, src, dst);
-        case PIPE_LOGICOP_SET:
-                return qir_uniform_ui(c, ~0);
-        case PIPE_LOGICOP_COPY:
-        default:
-                return src;
-        }
-}
-
-static void
 emit_frag_end(struct vc4_compile *c)
 {
         clip_distance_discard(c);
-        alpha_test_discard(c);
-
-        enum pipe_format color_format = c->fs_key->color_format;
-        const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
-        struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef };
-        struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
-        struct qreg linear_dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
-        struct qreg packed_dst_color = c->undef;
-
-        if (c->fs_key->blend.blend_enable ||
-            c->fs_key->blend.colormask != 0xf ||
-            c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
-                struct qreg r4 = qir_TLB_COLOR_READ(c);
-                for (int i = 0; i < 4; i++)
-                        tlb_read_color[i] = qir_R4_UNPACK(c, r4, i);
-                for (int i = 0; i < 4; i++) {
-                        dst_color[i] = get_swizzled_channel(c,
-                                                            tlb_read_color,
-                                                            format_swiz[i]);
-                        if (util_format_is_srgb(color_format) && i != 3) {
-                                linear_dst_color[i] =
-                                        qir_srgb_decode(c, dst_color[i]);
-                        } else {
-                                linear_dst_color[i] = dst_color[i];
-                        }
-                }
 
-                /* Save the packed value for logic ops.  Can't reuse r4
-                 * because other things might smash it (like sRGB)
-                 */
-                packed_dst_color = qir_MOV(c, r4);
-        }
-
-        struct qreg blend_color[4];
-        struct qreg undef_array[4] = {
-                c->undef, c->undef, c->undef, c->undef
-        };
-        vc4_blend(c, blend_color, linear_dst_color,
-                  (c->output_color_index != -1 ?
-                   c->outputs + c->output_color_index :
-                   undef_array));
-
-        if (util_format_is_srgb(color_format)) {
-                for (int i = 0; i < 3; i++)
-                        blend_color[i] = qir_srgb_encode(c, blend_color[i]);
-        }
-
-        /* Debug: Sometimes you're getting a black output and just want to see
-         * if the FS is getting executed at all.  Spam magenta into the color
-         * output.
-         */
-        if (0) {
-                blend_color[0] = qir_uniform_f(c, 1.0);
-                blend_color[1] = qir_uniform_f(c, 0.0);
-                blend_color[2] = qir_uniform_f(c, 1.0);
-                blend_color[3] = qir_uniform_f(c, 0.5);
-        }
-
-        struct qreg swizzled_outputs[4];
-        for (int i = 0; i < 4; i++) {
-                swizzled_outputs[i] = get_swizzled_channel(c, blend_color,
-                                                           format_swiz[i]);
+        struct qreg color;
+        if (c->output_color_index != -1) {
+                color = c->outputs[c->output_color_index];
+        } else {
+                color = qir_uniform_ui(c, 0);
         }
 
         if (c->discard.file != QFILE_NULL)
@@ -1458,66 +1180,30 @@
                 qir_TLB_Z_WRITE(c, z);
         }
 
-        struct qreg packed_color = c->undef;
-        for (int i = 0; i < 4; i++) {
-                if (swizzled_outputs[i].file == QFILE_NULL)
-                        continue;
-                if (packed_color.file == QFILE_NULL) {
-                        packed_color = qir_PACK_8888_F(c, swizzled_outputs[i]);
-                } else {
-                        packed_color = qir_PACK_8_F(c,
-                                                    packed_color,
-                                                    swizzled_outputs[i],
-                                                    i);
-                }
-        }
-
-        if (packed_color.file == QFILE_NULL)
-                packed_color = qir_uniform_ui(c, 0);
-
-        if (c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
-                packed_color = vc4_logicop(c, packed_color, packed_dst_color);
-        }
-
-        /* If the bit isn't set in the color mask, then just return the
-         * original dst color, instead.
-         */
-        uint32_t colormask = 0xffffffff;
-        for (int i = 0; i < 4; i++) {
-                if (format_swiz[i] < 4 &&
-                    !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
-                        colormask &= ~(0xff << (i * 8));
-                }
-        }
-        if (colormask != 0xffffffff) {
-                packed_color = qir_OR(c,
-                                      qir_AND(c, packed_color,
-                                              qir_uniform_ui(c, colormask)),
-                                      qir_AND(c, packed_dst_color,
-                                              qir_uniform_ui(c, ~colormask)));
-        }
-
-        qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
-                             packed_color, c->undef));
+        qir_TLB_COLOR_WRITE(c, color);
 }
 
 static void
 emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w)
 {
-        struct qreg xyi[2];
+        struct qreg packed = qir_get_temp(c);
 
         for (int i = 0; i < 2; i++) {
                 struct qreg scale =
                         qir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0);
 
-                xyi[i] = qir_FTOI(c, qir_FMUL(c,
-                                              qir_FMUL(c,
-                                                       c->outputs[c->output_position_index + i],
-                                                       scale),
-                                              rcp_w));
+                struct qreg packed_chan = packed;
+                packed_chan.pack = QPU_PACK_A_16A + i;
+
+                qir_FTOI_dest(c, packed_chan,
+                              qir_FMUL(c,
+                                       qir_FMUL(c,
+                                                c->outputs[c->output_position_index + i],
+                                                scale),
+                                       rcp_w));
         }
 
-        qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1]));
+        qir_VPM_WRITE(c, packed);
 }
 
 static void
@@ -1690,6 +1376,7 @@
                 progress = nir_opt_peephole_select(s) || progress;
                 progress = nir_opt_algebraic(s) || progress;
                 progress = nir_opt_constant_folding(s) || progress;
+                progress = nir_opt_undef(s) || progress;
         } while (progress);
 }
 
@@ -1731,6 +1418,7 @@
                 unsigned loc = var->data.driver_location;
 
                 assert(array_len == 1);
+                (void)array_len;
                 resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
                                   (loc + 1) * 4);
 
@@ -1738,11 +1426,12 @@
                         if (semantic_name == TGSI_SEMANTIC_POSITION) {
                                 emit_fragcoord_input(c, loc);
                         } else if (semantic_name == TGSI_SEMANTIC_FACE) {
-                                emit_face_input(c, loc);
+                                c->inputs[loc * 4 + 0] = qir_FRAG_REV_FLAG(c);
                         } else if (semantic_name == TGSI_SEMANTIC_GENERIC &&
                                    (c->fs_key->point_sprite_mask &
                                     (1 << semantic_index))) {
-                                emit_point_coord_input(c, loc);
+                                c->inputs[loc * 4 + 0] = c->point_x;
+                                c->inputs[loc * 4 + 1] = c->point_y;
                         } else {
                                 emit_fragment_input(c, loc,
                                                     semantic_name,
@@ -1765,6 +1454,13 @@
                 unsigned loc = var->data.driver_location * 4;
 
                 assert(array_len == 1);
+                (void)array_len;
+
+                /* NIR hack to pass through
+                 * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS */
+                if (semantic_name == TGSI_SEMANTIC_COLOR &&
+                    semantic_index == -1)
+                        semantic_index = 0;
 
                 for (int i = 0; i < 4; i++) {
                         add_output(c,
@@ -1829,8 +1525,7 @@
 static void
 ntq_emit_load_const(struct vc4_compile *c, nir_load_const_instr *instr)
 {
-        struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
-                                          instr->def.num_components);
+        struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
         for (int i = 0; i < instr->def.num_components; i++)
                 qregs[i] = qir_uniform_ui(c, instr->value.u[i]);
 
@@ -1838,53 +1533,59 @@
 }
 
 static void
+ntq_emit_ssa_undef(struct vc4_compile *c, nir_ssa_undef_instr *instr)
+{
+        struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
+
+        /* QIR needs there to be *some* value, so pick 0 (same as for
+         * ntq_setup_registers().
+         */
+        for (int i = 0; i < instr->def.num_components; i++)
+                qregs[i] = qir_uniform_ui(c, 0);
+}
+
+static void
 ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
 {
         const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
         struct qreg *dest = NULL;
 
         if (info->has_dest) {
-                dest = ntq_get_dest(c, instr->dest);
+                dest = ntq_get_dest(c, &instr->dest);
         }
 
         switch (instr->intrinsic) {
         case nir_intrinsic_load_uniform:
-                assert(instr->const_index[1] == 1);
-
-                for (int i = 0; i < instr->num_components; i++) {
-                        dest[i] = qir_uniform(c, QUNIFORM_UNIFORM,
-                                              instr->const_index[0] * 4 + i);
+                assert(instr->num_components == 1);
+                if (instr->const_index[0] < VC4_NIR_STATE_UNIFORM_OFFSET) {
+                        *dest = qir_uniform(c, QUNIFORM_UNIFORM,
+                                            instr->const_index[0]);
+                } else {
+                        *dest = qir_uniform(c, instr->const_index[0] -
+                                            VC4_NIR_STATE_UNIFORM_OFFSET,
+                                            0);
                 }
                 break;
 
         case nir_intrinsic_load_uniform_indirect:
-                assert(instr->const_index[1] == 1);
-
-                for (int i = 0; i < instr->num_components; i++) {
-                        dest[i] = indirect_uniform_load(c,
-                                                        ntq_get_src(c, instr->src[0], 0),
-                                                        (instr->const_index[0] *
-                                                         4 + i) * sizeof(float));
-                }
+                *dest = indirect_uniform_load(c, instr);
 
                 break;
 
         case nir_intrinsic_load_input:
-                assert(instr->const_index[1] == 1);
-
-                for (int i = 0; i < instr->num_components; i++)
-                        dest[i] = c->inputs[instr->const_index[0] * 4 + i];
-
+                assert(instr->num_components == 1);
+                if (instr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
+                        *dest = qir_TLB_COLOR_READ(c);
+                } else {
+                        *dest = c->inputs[instr->const_index[0]];
+                }
                 break;
 
         case nir_intrinsic_store_output:
-                for (int i = 0; i < instr->num_components; i++) {
-                        c->outputs[instr->const_index[0] * 4 + i] =
-                                qir_MOV(c, ntq_get_src(c, instr->src[0], i));
-                }
-                c->num_outputs = MAX2(c->num_outputs,
-                                      instr->const_index[0] * 4 +
-                                      instr->num_components + 1);
+                assert(instr->num_components == 1);
+                c->outputs[instr->const_index[0]] =
+                        qir_MOV(c, ntq_get_src(c, instr->src[0], 0));
+                c->num_outputs = MAX2(c->num_outputs, instr->const_index[0] + 1);
                 break;
 
         case nir_intrinsic_discard:
@@ -1928,6 +1629,10 @@
                 ntq_emit_load_const(c, nir_instr_as_load_const(instr));
                 break;
 
+        case nir_instr_type_ssa_undef:
+                ntq_emit_ssa_undef(c, nir_instr_as_ssa_undef(instr));
+                break;
+
         case nir_instr_type_tex:
                 ntq_emit_tex(c, nir_instr_as_tex(instr));
                 break;
@@ -2085,13 +1790,17 @@
         c->s = tgsi_to_nir(tokens, &nir_options);
         nir_opt_global_to_local(c->s);
         nir_convert_to_ssa(c->s);
+        if (stage == QSTAGE_FRAG)
+                vc4_nir_lower_blend(c);
+        vc4_nir_lower_io(c);
         nir_lower_idiv(c->s);
+        nir_lower_load_const_to_scalar(c->s);
 
         vc4_optimize_nir(c->s);
 
         nir_remove_dead_variables(c->s);
 
-        nir_convert_from_ssa(c->s);
+        nir_convert_from_ssa(c->s, true);
 
         if (vc4_debug & VC4_DEBUG_SHADERDB) {
                 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d NIR instructions\n",
@@ -2188,6 +1897,8 @@
         memcpy(uinfo->contents, c->uniform_contents,
                count * sizeof(*uinfo->contents));
         uinfo->num_texture_samples = c->num_texture_samples;
+
+        vc4_set_shader_uniform_dirty_flags(shader);
 }
 
 static struct vc4_compiled_shader *
@@ -2215,11 +1926,9 @@
         shader->program_id = vc4->next_compiled_program_id++;
         if (stage == QSTAGE_FRAG) {
                 bool input_live[c->num_input_semantics];
-                struct simple_node *node;
 
                 memset(input_live, 0, sizeof(input_live));
-                foreach(node, &c->instructions) {
-                        struct qinst *inst = (struct qinst *)node;
+                list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                         for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
                                 if (inst->src[i].file == QFILE_VARY)
                                         input_live[inst->src[i].index] = true;
@@ -2262,9 +1971,8 @@
         }
 
         copy_uniform_state_to_shader(shader, c);
-        shader->bo = vc4_bo_alloc_mem(vc4->screen, c->qpu_insts,
-                                      c->qpu_inst_count * sizeof(uint64_t),
-                                      "code");
+        shader->bo = vc4_bo_alloc_shader(vc4->screen, c->qpu_insts,
+                                         c->qpu_inst_count * sizeof(uint64_t));
 
         /* Copy the compiler UBO range state to the compiled shader, dropping
          * out arrays that were never referenced by an indirect load.
@@ -2291,10 +1999,12 @@
                 }
         }
         if (shader->ubo_size) {
-                fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
-                        qir_get_stage_name(c->stage),
-                        c->program_id, c->variant_id,
-                        shader->ubo_size / 4);
+                if (vc4_debug & VC4_DEBUG_SHADERDB) {
+                        fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
+                                qir_get_stage_name(c->stage),
+                                c->program_id, c->variant_id,
+                                shader->ubo_size / 4);
+                }
         }
 
         qir_compile_destroy(c);
@@ -2424,9 +2134,20 @@
                 (prim_mode == PIPE_PRIM_POINTS &&
                  vc4->rasterizer->base.point_size_per_vertex);
 
-        vc4->prog.vs = vc4_get_compiled_shader(vc4, QSTAGE_VERT, &key->base);
+        struct vc4_compiled_shader *vs =
+                vc4_get_compiled_shader(vc4, QSTAGE_VERT, &key->base);
+        if (vs != vc4->prog.vs) {
+                vc4->prog.vs = vs;
+                vc4->dirty |= VC4_DIRTY_COMPILED_VS;
+        }
+
         key->is_coord = true;
-        vc4->prog.cs = vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base);
+        struct vc4_compiled_shader *cs =
+                vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base);
+        if (cs != vc4->prog.cs) {
+                vc4->prog.cs = cs;
+                vc4->dirty |= VC4_DIRTY_COMPILED_CS;
+        }
 }
 
 void
@@ -2493,305 +2214,6 @@
         free(so);
 }
 
-static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
-{
-        switch (p_wrap) {
-        case PIPE_TEX_WRAP_REPEAT:
-                return 0;
-        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-                return 1;
-        case PIPE_TEX_WRAP_MIRROR_REPEAT:
-                return 2;
-        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-                return 3;
-        case PIPE_TEX_WRAP_CLAMP:
-                return (using_nearest ? 1 : 3);
-        default:
-                fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
-                assert(!"not reached");
-                return 0;
-        }
-}
-
-static void
-write_texture_p0(struct vc4_context *vc4,
-                 struct vc4_texture_stateobj *texstate,
-                 uint32_t unit)
-{
-        struct pipe_sampler_view *texture = texstate->textures[unit];
-        struct vc4_resource *rsc = vc4_resource(texture->texture);
-
-        cl_reloc(vc4, &vc4->uniforms, rsc->bo,
-                 VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
-                 VC4_SET_FIELD(texture->u.tex.last_level -
-                               texture->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
-                 VC4_SET_FIELD(texture->target == PIPE_TEXTURE_CUBE,
-                               VC4_TEX_P0_CMMODE) |
-                 VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE));
-}
-
-static void
-write_texture_p1(struct vc4_context *vc4,
-                 struct vc4_texture_stateobj *texstate,
-                 uint32_t unit)
-{
-        struct pipe_sampler_view *texture = texstate->textures[unit];
-        struct vc4_resource *rsc = vc4_resource(texture->texture);
-        struct pipe_sampler_state *sampler = texstate->samplers[unit];
-        static const uint8_t minfilter_map[6] = {
-                VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR,
-                VC4_TEX_P1_MINFILT_LIN_MIP_NEAR,
-                VC4_TEX_P1_MINFILT_NEAR_MIP_LIN,
-                VC4_TEX_P1_MINFILT_LIN_MIP_LIN,
-                VC4_TEX_P1_MINFILT_NEAREST,
-                VC4_TEX_P1_MINFILT_LINEAR,
-        };
-        static const uint32_t magfilter_map[] = {
-                [PIPE_TEX_FILTER_NEAREST] = VC4_TEX_P1_MAGFILT_NEAREST,
-                [PIPE_TEX_FILTER_LINEAR] = VC4_TEX_P1_MAGFILT_LINEAR,
-        };
-
-        bool either_nearest =
-                (sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
-                 sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
-
-        cl_aligned_u32(&vc4->uniforms,
-               VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
-               VC4_SET_FIELD(texture->texture->height0 & 2047,
-                             VC4_TEX_P1_HEIGHT) |
-               VC4_SET_FIELD(texture->texture->width0 & 2047,
-                             VC4_TEX_P1_WIDTH) |
-               VC4_SET_FIELD(magfilter_map[sampler->mag_img_filter],
-                             VC4_TEX_P1_MAGFILT) |
-               VC4_SET_FIELD(minfilter_map[sampler->min_mip_filter * 2 +
-                                           sampler->min_img_filter],
-                             VC4_TEX_P1_MINFILT) |
-               VC4_SET_FIELD(translate_wrap(sampler->wrap_s, either_nearest),
-                             VC4_TEX_P1_WRAP_S) |
-               VC4_SET_FIELD(translate_wrap(sampler->wrap_t, either_nearest),
-                             VC4_TEX_P1_WRAP_T));
-}
-
-static void
-write_texture_p2(struct vc4_context *vc4,
-                 struct vc4_texture_stateobj *texstate,
-                 uint32_t data)
-{
-        uint32_t unit = data & 0xffff;
-        struct pipe_sampler_view *texture = texstate->textures[unit];
-        struct vc4_resource *rsc = vc4_resource(texture->texture);
-
-        cl_aligned_u32(&vc4->uniforms,
-               VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
-                             VC4_TEX_P2_PTYPE) |
-               VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
-               VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD));
-}
-
-
-#define SWIZ(x,y,z,w) {          \
-        UTIL_FORMAT_SWIZZLE_##x, \
-        UTIL_FORMAT_SWIZZLE_##y, \
-        UTIL_FORMAT_SWIZZLE_##z, \
-        UTIL_FORMAT_SWIZZLE_##w  \
-}
-
-static void
-write_texture_border_color(struct vc4_context *vc4,
-                           struct vc4_texture_stateobj *texstate,
-                           uint32_t unit)
-{
-        struct pipe_sampler_state *sampler = texstate->samplers[unit];
-        struct pipe_sampler_view *texture = texstate->textures[unit];
-        struct vc4_resource *rsc = vc4_resource(texture->texture);
-        union util_color uc;
-
-        const struct util_format_description *tex_format_desc =
-                util_format_description(texture->format);
-
-        float border_color[4];
-        for (int i = 0; i < 4; i++)
-                border_color[i] = sampler->border_color.f[i];
-        if (util_format_is_srgb(texture->format)) {
-                for (int i = 0; i < 3; i++)
-                        border_color[i] =
-                                util_format_linear_to_srgb_float(border_color[i]);
-        }
-
-        /* Turn the border color into the layout of channels that it would
-         * have when stored as texture contents.
-         */
-        float storage_color[4];
-        util_format_unswizzle_4f(storage_color,
-                                 border_color,
-                                 tex_format_desc->swizzle);
-
-        /* Now, pack so that when the vc4_format-sampled texture contents are
-         * replaced with our border color, the vc4_get_format_swizzle()
-         * swizzling will get the right channels.
-         */
-        if (util_format_is_depth_or_stencil(texture->format)) {
-                uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
-                                       sampler->border_color.f[0]) << 8;
-        } else {
-                switch (rsc->vc4_format) {
-                default:
-                case VC4_TEXTURE_TYPE_RGBA8888:
-                        util_pack_color(storage_color,
-                                        PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
-                        break;
-                case VC4_TEXTURE_TYPE_RGBA4444:
-                        util_pack_color(storage_color,
-                                        PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
-                        break;
-                case VC4_TEXTURE_TYPE_RGB565:
-                        util_pack_color(storage_color,
-                                        PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
-                        break;
-                case VC4_TEXTURE_TYPE_ALPHA:
-                        uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
-                        break;
-                case VC4_TEXTURE_TYPE_LUMALPHA:
-                        uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
-                                    (float_to_ubyte(storage_color[0]) << 0));
-                        break;
-                }
-        }
-
-        cl_aligned_u32(&vc4->uniforms, uc.ui[0]);
-}
-
-static uint32_t
-get_texrect_scale(struct vc4_texture_stateobj *texstate,
-                  enum quniform_contents contents,
-                  uint32_t data)
-{
-        struct pipe_sampler_view *texture = texstate->textures[data];
-        uint32_t dim;
-
-        if (contents == QUNIFORM_TEXRECT_SCALE_X)
-                dim = texture->texture->width0;
-        else
-                dim = texture->texture->height0;
-
-        return fui(1.0f / dim);
-}
-
-static struct vc4_bo *
-vc4_upload_ubo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
-               const uint32_t *gallium_uniforms)
-{
-        if (!shader->ubo_size)
-                return NULL;
-
-        struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen, shader->ubo_size, "ubo");
-        uint32_t *data = vc4_bo_map(ubo);
-        for (uint32_t i = 0; i < shader->num_ubo_ranges; i++) {
-                memcpy(data + shader->ubo_ranges[i].dst_offset,
-                       gallium_uniforms + shader->ubo_ranges[i].src_offset,
-                       shader->ubo_ranges[i].size);
-        }
-
-        return ubo;
-}
-
-void
-vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
-                   struct vc4_constbuf_stateobj *cb,
-                   struct vc4_texture_stateobj *texstate)
-{
-        struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
-        const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
-        struct vc4_bo *ubo = vc4_upload_ubo(vc4, shader, gallium_uniforms);
-
-        cl_ensure_space(&vc4->uniforms, (uinfo->count +
-                                         uinfo->num_texture_samples) * 4);
-
-        cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
-
-        for (int i = 0; i < uinfo->count; i++) {
-
-                switch (uinfo->contents[i]) {
-                case QUNIFORM_CONSTANT:
-                        cl_aligned_u32(&vc4->uniforms, uinfo->data[i]);
-                        break;
-                case QUNIFORM_UNIFORM:
-                        cl_aligned_u32(&vc4->uniforms,
-                                       gallium_uniforms[uinfo->data[i]]);
-                        break;
-                case QUNIFORM_VIEWPORT_X_SCALE:
-                        cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
-                        break;
-                case QUNIFORM_VIEWPORT_Y_SCALE:
-                        cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
-                        break;
-
-                case QUNIFORM_VIEWPORT_Z_OFFSET:
-                        cl_aligned_f(&vc4->uniforms, vc4->viewport.translate[2]);
-                        break;
-                case QUNIFORM_VIEWPORT_Z_SCALE:
-                        cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[2]);
-                        break;
-
-                case QUNIFORM_USER_CLIP_PLANE:
-                        cl_aligned_f(&vc4->uniforms,
-                                     vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
-                        break;
-
-                case QUNIFORM_TEXTURE_CONFIG_P0:
-                        write_texture_p0(vc4, texstate, uinfo->data[i]);
-                        break;
-
-                case QUNIFORM_TEXTURE_CONFIG_P1:
-                        write_texture_p1(vc4, texstate, uinfo->data[i]);
-                        break;
-
-                case QUNIFORM_TEXTURE_CONFIG_P2:
-                        write_texture_p2(vc4, texstate, uinfo->data[i]);
-                        break;
-
-                case QUNIFORM_UBO_ADDR:
-                        cl_aligned_reloc(vc4, &vc4->uniforms, ubo, 0);
-                        break;
-
-                case QUNIFORM_TEXTURE_BORDER_COLOR:
-                        write_texture_border_color(vc4, texstate, uinfo->data[i]);
-                        break;
-
-                case QUNIFORM_TEXRECT_SCALE_X:
-                case QUNIFORM_TEXRECT_SCALE_Y:
-                        cl_aligned_u32(&vc4->uniforms,
-                                       get_texrect_scale(texstate,
-                                                         uinfo->contents[i],
-                                                         uinfo->data[i]));
-                        break;
-
-                case QUNIFORM_BLEND_CONST_COLOR:
-                        cl_aligned_f(&vc4->uniforms,
-                                     CLAMP(vc4->blend_color.color[uinfo->data[i]], 0, 1));
-                        break;
-
-                case QUNIFORM_STENCIL:
-                        cl_aligned_u32(&vc4->uniforms,
-                                       vc4->zsa->stencil_uniforms[uinfo->data[i]] |
-                                       (uinfo->data[i] <= 1 ?
-                                        (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
-                                        0));
-                        break;
-
-                case QUNIFORM_ALPHA_REF:
-                        cl_aligned_f(&vc4->uniforms,
-                                     vc4->zsa->base.alpha.ref_value);
-                        break;
-                }
-#if 0
-                uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
-                fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
-                        shader, i, written_val, uif(written_val));
-#endif
-        }
-}
-
 static void
 vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qir.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qir.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qir.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qir.c	2015-09-16 14:36:09.000000000 +0000
@@ -22,7 +22,6 @@
  */
 
 #include "util/u_memory.h"
-#include "util/simple_list.h"
 #include "util/ralloc.h"
 
 #include "vc4_qir.h"
@@ -72,12 +71,11 @@
         [QOP_RSQ] = { "rsq", 1, 1, false, true },
         [QOP_EXP2] = { "exp2", 1, 2, false, true },
         [QOP_LOG2] = { "log2", 1, 2, false, true },
-        [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1, false, true },
-        [QOP_PACK_8A_F] = { "pack_8a_f", 1, 2, false, true },
-        [QOP_PACK_8B_F] = { "pack_8b_f", 1, 2, false, true },
-        [QOP_PACK_8C_F] = { "pack_8c_f", 1, 2, false, true },
-        [QOP_PACK_8D_F] = { "pack_8d_f", 1, 2, false, true },
-        [QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true },
+        [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1 },
+        [QOP_PACK_8A_F] = { "pack_8a_f", 1, 1 },
+        [QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 },
+        [QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 },
+        [QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 },
         [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
         [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
         [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
@@ -97,10 +95,6 @@
         [QOP_TEX_B] = { "tex_b", 0, 2 },
         [QOP_TEX_DIRECT] = { "tex_direct", 0, 2 },
         [QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
-        [QOP_R4_UNPACK_A] = { "r4_unpack_a", 1, 1 },
-        [QOP_R4_UNPACK_B] = { "r4_unpack_b", 1, 1 },
-        [QOP_R4_UNPACK_C] = { "r4_unpack_c", 1, 1 },
-        [QOP_R4_UNPACK_D] = { "r4_unpack_d", 1, 1 },
         [QOP_UNPACK_8A_F] = { "unpack_8a_f", 1, 1 },
         [QOP_UNPACK_8B_F] = { "unpack_8b_f", 1, 1 },
         [QOP_UNPACK_8C_F] = { "unpack_8c_f", 1, 1 },
@@ -174,6 +168,18 @@
 }
 
 bool
+qir_is_mul(struct qinst *inst)
+{
+        switch (inst->op) {
+        case QOP_FMUL:
+        case QOP_MUL24:
+                return true;
+        default:
+                return false;
+        }
+}
+
+bool
 qir_is_tex(struct qinst *inst)
 {
         return inst->op >= QOP_TEX_S && inst->op <= QOP_TEX_DIRECT;
@@ -235,20 +241,6 @@
         }
 }
 
-bool
-qir_reads_r4(struct qinst *inst)
-{
-        switch (inst->op) {
-        case QOP_R4_UNPACK_A:
-        case QOP_R4_UNPACK_B:
-        case QOP_R4_UNPACK_C:
-        case QOP_R4_UNPACK_D:
-                return true;
-        default:
-                return false;
-        }
-}
-
 static void
 qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
 {
@@ -292,6 +284,14 @@
                 inst->sf ? ".sf" : "");
 
         qir_print_reg(c, inst->dst, true);
+        if (inst->dst.pack) {
+                if (inst->dst.pack) {
+                        if (qir_is_mul(inst))
+                                vc4_qpu_disasm_pack_mul(stderr, inst->dst.pack);
+                        else
+                                vc4_qpu_disasm_pack_a(stderr, inst->dst.pack);
+                }
+        }
         for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
                 fprintf(stderr, ", ");
                 qir_print_reg(c, inst->src[i], false);
@@ -301,10 +301,7 @@
 void
 qir_dump(struct vc4_compile *c)
 {
-        struct simple_node *node;
-
-        foreach(node, &c->instructions) {
-                struct qinst *inst = (struct qinst *)node;
+        list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                 qir_dump_inst(c, inst);
                 fprintf(stderr, "\n");
         }
@@ -317,6 +314,7 @@
 
         reg.file = QFILE_TEMP;
         reg.index = c->num_temps++;
+        reg.pack = 0;
 
         if (c->num_temps > c->defs_array_size) {
                 uint32_t old_size = c->defs_array_size;
@@ -370,7 +368,7 @@
         if (inst->dst.file == QFILE_TEMP)
                 c->defs[inst->dst.index] = inst;
 
-        insert_at_tail(&c->instructions, &inst->link);
+        qir_emit_nodef(c, inst);
 }
 
 bool
@@ -384,7 +382,7 @@
 {
         struct vc4_compile *c = rzalloc(NULL, struct vc4_compile);
 
-        make_empty_list(&c->instructions);
+        list_inithead(&c->instructions);
 
         c->output_position_index = -1;
         c->output_clipvertex_index = -1;
@@ -403,7 +401,7 @@
         if (qinst->dst.file == QFILE_TEMP)
                 c->defs[qinst->dst.index] = NULL;
 
-        remove_from_list(&qinst->link);
+        list_del(&qinst->link);
         free(qinst->src);
         free(qinst);
 }
@@ -411,8 +409,11 @@
 struct qreg
 qir_follow_movs(struct vc4_compile *c, struct qreg reg)
 {
-        while (reg.file == QFILE_TEMP && c->defs[reg.index]->op == QOP_MOV)
+        while (reg.file == QFILE_TEMP &&
+               c->defs[reg.index] &&
+               c->defs[reg.index]->op == QOP_MOV) {
                 reg = c->defs[reg.index]->src[0];
+        }
 
         return reg;
 }
@@ -420,9 +421,9 @@
 void
 qir_compile_destroy(struct vc4_compile *c)
 {
-        while (!is_empty_list(&c->instructions)) {
+        while (!list_empty(&c->instructions)) {
                 struct qinst *qinst =
-                        (struct qinst *)first_elem(&c->instructions);
+                        (struct qinst *)c->instructions.next;
                 qir_remove_instruction(c, qinst);
         }
 
@@ -478,7 +479,7 @@
 qir_SF(struct vc4_compile *c, struct qreg src)
 {
         struct qinst *last_inst = NULL;
-        if (!is_empty_list(&c->instructions))
+        if (!list_empty(&c->instructions))
                 last_inst = (struct qinst *)c->instructions.prev;
 
         if (!last_inst ||
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qir.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qir.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qir.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qir.h	2015-09-16 14:36:09.000000000 +0000
@@ -33,9 +33,14 @@
 
 #include "util/macros.h"
 #include "glsl/nir/nir.h"
-#include "util/simple_list.h"
+#include "util/list.h"
 #include "util/u_math.h"
 
+#include "vc4_screen.h"
+#include "pipe/p_state.h"
+
+struct nir_builder;
+
 enum qfile {
         QFILE_NULL,
         QFILE_TEMP,
@@ -53,6 +58,7 @@
 struct qreg {
         enum qfile file;
         uint32_t index;
+        int pack;
 };
 
 enum qop {
@@ -99,7 +105,6 @@
         QOP_LOG2,
         QOP_VW_SETUP,
         QOP_VR_SETUP,
-        QOP_PACK_SCALED,
         QOP_PACK_8888_F,
         QOP_PACK_8A_F,
         QOP_PACK_8B_F,
@@ -155,19 +160,15 @@
          * the destination
          */
         QOP_TEX_RESULT,
-        QOP_R4_UNPACK_A,
-        QOP_R4_UNPACK_B,
-        QOP_R4_UNPACK_C,
-        QOP_R4_UNPACK_D
 };
 
 struct queued_qpu_inst {
-        struct simple_node link;
+        struct list_head link;
         uint64_t inst;
 };
 
 struct qinst {
-        struct simple_node link;
+        struct list_head link;
 
         enum qop op;
         struct qreg dst;
@@ -243,7 +244,11 @@
 
         QUNIFORM_TEXTURE_BORDER_COLOR,
 
-        QUNIFORM_BLEND_CONST_COLOR,
+        QUNIFORM_BLEND_CONST_COLOR_X,
+        QUNIFORM_BLEND_CONST_COLOR_Y,
+        QUNIFORM_BLEND_CONST_COLOR_Z,
+        QUNIFORM_BLEND_CONST_COLOR_W,
+
         QUNIFORM_STENCIL,
 
         QUNIFORM_ALPHA_REF,
@@ -280,6 +285,52 @@
         bool used;
 };
 
+struct vc4_key {
+        struct vc4_uncompiled_shader *shader_state;
+        struct {
+                enum pipe_format format;
+                unsigned compare_mode:1;
+                unsigned compare_func:3;
+                unsigned wrap_s:3;
+                unsigned wrap_t:3;
+                uint8_t swizzle[4];
+        } tex[VC4_MAX_TEXTURE_SAMPLERS];
+        uint8_t ucp_enables;
+};
+
+struct vc4_fs_key {
+        struct vc4_key base;
+        enum pipe_format color_format;
+        bool depth_enabled;
+        bool stencil_enabled;
+        bool stencil_twoside;
+        bool stencil_full_writemasks;
+        bool is_points;
+        bool is_lines;
+        bool alpha_test;
+        bool point_coord_upper_left;
+        bool light_twoside;
+        uint8_t alpha_test_func;
+        uint8_t logicop_func;
+        uint32_t point_sprite_mask;
+
+        struct pipe_rt_blend_state blend;
+};
+
+struct vc4_vs_key {
+        struct vc4_key base;
+
+        /**
+         * This is a proxy for the array of FS input semantics, which is
+         * larger than we would want to put in the key.
+         */
+        uint64_t compiled_fs_id;
+
+        enum pipe_format attr_formats[8];
+        bool is_coord;
+        bool per_vertex_point_size;
+};
+
 struct vc4_compile {
         struct vc4_context *vc4;
         nir_shader *s;
@@ -356,10 +407,10 @@
         struct qreg undef;
         enum qstage stage;
         uint32_t num_temps;
-        struct simple_node instructions;
+        struct list_head instructions;
         uint32_t immediates[1024];
 
-        struct simple_node qpu_inst_list;
+        struct list_head qpu_inst_list;
         uint64_t *qpu_insts;
         uint32_t qpu_inst_count;
         uint32_t qpu_inst_size;
@@ -369,6 +420,16 @@
         uint32_t variant_id;
 };
 
+/* Special nir_load_input intrinsic index for loading the current TLB
+ * destination color.
+ */
+#define VC4_NIR_TLB_COLOR_READ_INPUT		2000000000
+
+/* Special offset for nir_load_uniform values to get a QUNIFORM_*
+ * state-dependent value.
+ */
+#define VC4_NIR_STATE_UNIFORM_OFFSET		2000000000
+
 struct vc4_compile *qir_compile_init(void);
 void qir_compile_destroy(struct vc4_compile *c);
 struct qinst *qir_inst(enum qop op, struct qreg dst,
@@ -383,17 +444,23 @@
                         enum quniform_contents contents,
                         uint32_t data);
 void qir_reorder_uniforms(struct vc4_compile *c);
+
 void qir_emit(struct vc4_compile *c, struct qinst *inst);
+static inline void qir_emit_nodef(struct vc4_compile *c, struct qinst *inst)
+{
+        list_addtail(&inst->link, &c->instructions);
+}
+
 struct qreg qir_get_temp(struct vc4_compile *c);
 int qir_get_op_nsrc(enum qop qop);
 bool qir_reg_equals(struct qreg a, struct qreg b);
 bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
 bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
 bool qir_is_multi_instruction(struct qinst *inst);
+bool qir_is_mul(struct qinst *inst);
 bool qir_is_tex(struct qinst *inst);
 bool qir_depends_on_flags(struct qinst *inst);
 bool qir_writes_r4(struct qinst *inst);
-bool qir_reads_r4(struct qinst *inst);
 bool qir_src_needs_a_file(struct qinst *inst);
 struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);
 
@@ -409,6 +476,12 @@
 bool qir_opt_dead_code(struct vc4_compile *c);
 bool qir_opt_small_immediates(struct vc4_compile *c);
 bool qir_opt_vpm_writes(struct vc4_compile *c);
+void vc4_nir_lower_blend(struct vc4_compile *c);
+void vc4_nir_lower_io(struct vc4_compile *c);
+nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
+                                       enum quniform_contents contents);
+nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,
+                                          nir_ssa_def **srcs, int swiz);
 void qir_lower_uniforms(struct vc4_compile *c);
 
 void qpu_schedule_instructions(struct vc4_compile *c);
@@ -443,6 +516,12 @@
         struct qreg t = qir_get_temp(c);                                 \
         qir_emit(c, qir_inst(QOP_##name, t, a, c->undef));               \
         return t;                                                        \
+}                                                                        \
+static inline void                                                       \
+qir_##name##_dest(struct vc4_compile *c, struct qreg dest,               \
+                  struct qreg a)                                         \
+{                                                                        \
+        qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, c->undef));      \
 }
 
 #define QIR_ALU2(name)                                                   \
@@ -452,6 +531,12 @@
         struct qreg t = qir_get_temp(c);                                 \
         qir_emit(c, qir_inst(QOP_##name, t, a, b));                      \
         return t;                                                        \
+}                                                                        \
+static inline void                                                       \
+qir_##name##_dest(struct vc4_compile *c, struct qreg dest,               \
+                  struct qreg a, struct qreg b)                          \
+{                                                                        \
+        qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, b));             \
 }
 
 #define QIR_NODST_1(name)                                               \
@@ -468,6 +553,14 @@
         qir_emit(c, qir_inst(QOP_##name, c->undef, a, b));       \
 }
 
+#define QIR_PACK(name)                                                   \
+static inline struct qreg                                                \
+qir_##name(struct vc4_compile *c, struct qreg dest, struct qreg a)       \
+{                                                                        \
+        qir_emit_nodef(c, qir_inst(QOP_##name, dest, a, c->undef));      \
+        return dest;                                                     \
+}
+
 QIR_ALU1(MOV)
 QIR_ALU2(FADD)
 QIR_ALU2(FSUB)
@@ -504,12 +597,11 @@
 QIR_ALU1(RSQ)
 QIR_ALU1(EXP2)
 QIR_ALU1(LOG2)
-QIR_ALU2(PACK_SCALED)
 QIR_ALU1(PACK_8888_F)
-QIR_ALU2(PACK_8A_F)
-QIR_ALU2(PACK_8B_F)
-QIR_ALU2(PACK_8C_F)
-QIR_ALU2(PACK_8D_F)
+QIR_PACK(PACK_8A_F)
+QIR_PACK(PACK_8B_F)
+QIR_PACK(PACK_8C_F)
+QIR_PACK(PACK_8D_F)
 QIR_ALU1(VARY_ADD_C)
 QIR_NODST_2(TEX_S)
 QIR_NODST_2(TEX_T)
@@ -523,27 +615,12 @@
 QIR_ALU0(FRAG_REV_FLAG)
 QIR_ALU0(TEX_RESULT)
 QIR_ALU0(TLB_COLOR_READ)
+QIR_NODST_1(TLB_COLOR_WRITE)
 QIR_NODST_1(TLB_Z_WRITE)
 QIR_NODST_1(TLB_DISCARD_SETUP)
 QIR_NODST_1(TLB_STENCIL_SETUP)
 
 static inline struct qreg
-qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i)
-{
-        struct qreg t = qir_get_temp(c);
-        qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef));
-        return t;
-}
-
-static inline struct qreg
-qir_SEL_X_0_COND(struct vc4_compile *c, int i)
-{
-        struct qreg t = qir_get_temp(c);
-        qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef));
-        return t;
-}
-
-static inline struct qreg
 qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
 {
         struct qreg t = qir_get_temp(c);
@@ -576,11 +653,12 @@
 }
 
 static inline struct qreg
-qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan)
+qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan)
 {
-        struct qreg t = qir_get_temp(c);
-        qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val));
-        return t;
+        qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, dest, val, c->undef));
+        if (dest.file == QFILE_TEMP)
+                c->defs[dest.index] = NULL;
+        return dest;
 }
 
 static inline struct qreg
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c	2015-09-16 14:36:09.000000000 +0000
@@ -52,7 +52,7 @@
 add_uniform(struct hash_table *ht, struct qreg reg)
 {
         struct hash_entry *entry;
-        void *key = (void *)(uintptr_t)reg.index;
+        void *key = (void *)(uintptr_t)(reg.index + 1);
 
         entry = _mesa_hash_table_search(ht, key);
         if (entry) {
@@ -66,7 +66,7 @@
 remove_uniform(struct hash_table *ht, struct qreg reg)
 {
         struct hash_entry *entry;
-        void *key = (void *)(uintptr_t)reg.index;
+        void *key = (void *)(uintptr_t)(reg.index + 1);
 
         entry = _mesa_hash_table_search(ht, key);
         assert(entry);
@@ -88,7 +88,6 @@
 void
 qir_lower_uniforms(struct vc4_compile *c)
 {
-        struct simple_node *node;
         struct hash_table *ht =
                 _mesa_hash_table_create(c, index_hash, index_compare);
 
@@ -96,8 +95,7 @@
          * than one uniform referenced, and add those uniform values to the
          * ht.
          */
-        foreach(node, &c->instructions) {
-                struct qinst *inst = (struct qinst *)node;
+        list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                 uint32_t nsrc = qir_get_op_nsrc(inst->op);
 
                 uint32_t count = 0;
@@ -124,7 +122,7 @@
                 struct hash_entry *entry;
                 hash_table_foreach(ht, entry) {
                         uint32_t count = (uintptr_t)entry->data;
-                        uint32_t index = (uintptr_t)entry->key;
+                        uint32_t index = (uintptr_t)entry->key - 1;
                         if (count > max_count) {
                                 max_count = count;
                                 max_index = index;
@@ -137,10 +135,9 @@
                 struct qreg temp = qir_get_temp(c);
                 struct qreg unif = { QFILE_UNIF, max_index };
                 struct qinst *mov = qir_inst(QOP_MOV, temp, unif, c->undef);
-                insert_at_head(&c->instructions, &mov->link);
+                list_add(&mov->link, &c->instructions);
                 c->defs[temp.index] = mov;
-                foreach(node, &c->instructions) {
-                        struct qinst *inst = (struct qinst *)node;
+                list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                         uint32_t nsrc = qir_get_op_nsrc(inst->op);
 
                         uint32_t count = 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qpu_disasm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qpu_disasm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qpu_disasm.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qpu_disasm.c	2015-09-16 14:36:09.000000000 +0000
@@ -225,7 +225,7 @@
 };
 
 #define DESC(array, index)                                        \
-        ((index > ARRAY_SIZE(array) || !(array)[index]) ?         \
+        ((index >= ARRAY_SIZE(array) || !(array)[index]) ?         \
          "???" : (array)[index])
 
 static const char *
@@ -245,6 +245,18 @@
         return special_write[reg];
 }
 
+void
+vc4_qpu_disasm_pack_mul(FILE *out, uint32_t pack)
+{
+        fprintf(out, ".%s", DESC(qpu_pack_mul, pack));
+}
+
+void
+vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack)
+{
+        fprintf(out, "%s", DESC(qpu_pack_a, pack));
+}
+
 static void
 print_alu_dst(uint64_t inst, bool is_mul)
 {
@@ -263,9 +275,9 @@
                 fprintf(stderr, "%s%d?", file, waddr);
 
         if (is_mul && (inst & QPU_PM)) {
-                fprintf(stderr, ".%s", DESC(qpu_pack_mul, pack));
+                vc4_qpu_disasm_pack_mul(stderr, pack);
         } else if (is_a && !(inst & QPU_PM)) {
-                fprintf(stderr, "%s", DESC(qpu_pack_a, pack));
+                vc4_qpu_disasm_pack_a(stderr, pack);
         }
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qpu_emit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qpu_emit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qpu_emit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qpu_emit.c	2015-09-16 14:36:09.000000000 +0000
@@ -47,14 +47,14 @@
 {
         struct queued_qpu_inst *q = rzalloc(c, struct queued_qpu_inst);
         q->inst = inst;
-        insert_at_tail(&c->qpu_inst_list, &q->link);
+        list_addtail(&q->link, &c->qpu_inst_list);
 }
 
 static uint64_t *
 last_inst(struct vc4_compile *c)
 {
         struct queued_qpu_inst *q =
-                (struct queued_qpu_inst *)last_elem(&c->qpu_inst_list);
+                (struct queued_qpu_inst *)c->qpu_inst_list.prev;
         return &q->inst;
 }
 
@@ -117,11 +117,11 @@
                 return;
 
         if (mux0 == QPU_MUX_A) {
-                queue(c, qpu_a_MOV(qpu_rb(31), *src1));
-                *src1 = qpu_rb(31);
+                queue(c, qpu_a_MOV(qpu_rb(31), *src0));
+                *src0 = qpu_rb(31);
         } else {
-                queue(c, qpu_a_MOV(qpu_ra(31), *src1));
-                *src1 = qpu_ra(31);
+                queue(c, qpu_a_MOV(qpu_ra(31), *src0));
+                *src0 = qpu_ra(31);
         }
 }
 
@@ -144,7 +144,7 @@
                 QPU_UNPACK_16B_TO_F32,
         };
 
-        make_empty_list(&c->qpu_inst_list);
+        list_inithead(&c->qpu_inst_list);
 
         switch (c->stage) {
         case QSTAGE_VERT:
@@ -170,10 +170,7 @@
                 break;
         }
 
-        struct simple_node *node;
-        foreach(node, &c->instructions) {
-                struct qinst *qinst = (struct qinst *)node;
-
+        list_for_each_entry(struct qinst, qinst, &c->instructions, link) {
 #if 0
                 fprintf(stderr, "translating qinst to qpu: ");
                 qir_dump_inst(qinst);
@@ -182,10 +179,9 @@
 
                 static const struct {
                         uint32_t op;
-                        bool is_mul;
                 } translate[] = {
-#define A(name) [QOP_##name] = {QPU_A_##name, false}
-#define M(name) [QOP_##name] = {QPU_M_##name, true}
+#define A(name) [QOP_##name] = {QPU_A_##name}
+#define M(name) [QOP_##name] = {QPU_M_##name}
                         A(FADD),
                         A(FSUB),
                         A(FMIN),
@@ -237,6 +233,7 @@
                         case QFILE_VPM:
                                 assert((int)qinst->src[i].index >=
                                        last_vpm_read_index);
+                                (void)last_vpm_read_index;
                                 last_vpm_read_index = qinst->src[i].index;
                                 src[i] = qpu_ra(QPU_R_VPM);
                                 break;
@@ -322,7 +319,8 @@
                                 abort();
                         }
 
-                        queue(c, qpu_a_MOV(dst, qpu_r4()));
+                        if (dst.mux != QPU_MUX_R4)
+                                queue(c, qpu_a_MOV(dst, qpu_r4()));
 
                         break;
 
@@ -337,28 +335,12 @@
                 case QOP_PACK_8B_F:
                 case QOP_PACK_8C_F:
                 case QOP_PACK_8D_F:
-                        /* If dst doesn't happen to already contain src[0],
-                         * then we have to move it in.
-                         */
-                        if (qinst->src[0].file != QFILE_NULL &&
-                            (src[0].mux != dst.mux || src[0].addr != dst.addr)) {
-                                /* Don't overwrite src1 while setting up
-                                 * the dst!
-                                 */
-                                if (dst.mux == src[1].mux &&
-                                    dst.addr == src[1].addr) {
-                                        queue(c, qpu_m_MOV(qpu_rb(31), src[1]));
-                                        src[1] = qpu_rb(31);
-                                }
-
-                                queue(c, qpu_m_MOV(dst, src[0]));
-                        }
-
-                        queue(c, qpu_m_MOV(dst, src[1]));
-                        *last_inst(c) |= QPU_PM;
-                        *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A +
-                                                       qinst->op - QOP_PACK_8A_F,
-                                                       QPU_PACK);
+                        queue(c,
+                              qpu_m_MOV(dst, src[0]) |
+                              QPU_PM |
+                              QPU_SET_FIELD(QPU_PACK_MUL_8A +
+                                            qinst->op - QOP_PACK_8A_F,
+                                            QPU_PACK));
                         break;
 
                 case QOP_FRAG_X:
@@ -405,6 +387,8 @@
                         *last_inst(c) = qpu_set_sig(*last_inst(c),
                                                     QPU_SIG_COLOR_LOAD);
 
+                        if (dst.mux != QPU_MUX_R4)
+                                queue(c, qpu_a_MOV(dst, qpu_r4()));
                         break;
 
                 case QOP_TLB_COLOR_WRITE:
@@ -418,24 +402,6 @@
                         queue(c, qpu_a_FADD(dst, src[0], qpu_r5()));
                         break;
 
-                case QOP_PACK_SCALED: {
-                        uint64_t a = (qpu_a_MOV(dst, src[0]) |
-                                      QPU_SET_FIELD(QPU_PACK_A_16A,
-                                                    QPU_PACK));
-                        uint64_t b = (qpu_a_MOV(dst, src[1]) |
-                                      QPU_SET_FIELD(QPU_PACK_A_16B,
-                                                    QPU_PACK));
-
-                        if (dst.mux == src[1].mux && dst.addr == src[1].addr) {
-                                queue(c, b);
-                                queue(c, a);
-                        } else {
-                                queue(c, a);
-                                queue(c, b);
-                        }
-                        break;
-                }
-
                 case QOP_TEX_S:
                 case QOP_TEX_T:
                 case QOP_TEX_R:
@@ -454,21 +420,8 @@
                         queue(c, qpu_NOP());
                         *last_inst(c) = qpu_set_sig(*last_inst(c),
                                                     QPU_SIG_LOAD_TMU0);
-
-                        break;
-
-                case QOP_R4_UNPACK_A:
-                case QOP_R4_UNPACK_B:
-                case QOP_R4_UNPACK_C:
-                case QOP_R4_UNPACK_D:
-                        assert(src[0].mux == QPU_MUX_R4);
-                        queue(c, qpu_a_MOV(dst, src[0]));
-                        *last_inst(c) |= QPU_PM;
-                        *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
-                                                       (qinst->op -
-                                                        QOP_R4_UNPACK_A),
-                                                       QPU_UNPACK);
-
+                        if (dst.mux != QPU_MUX_R4)
+                                queue(c, qpu_a_MOV(dst, qpu_r4()));
                         break;
 
                 case QOP_UNPACK_8A_F:
@@ -477,20 +430,30 @@
                 case QOP_UNPACK_8D_F:
                 case QOP_UNPACK_16A_F:
                 case QOP_UNPACK_16B_F: {
-                        assert(src[0].mux == QPU_MUX_A);
+                        if (src[0].mux == QPU_MUX_R4) {
+                                queue(c, qpu_a_MOV(dst, src[0]));
+                                *last_inst(c) |= QPU_PM;
+                                *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
+                                                               (qinst->op -
+                                                                QOP_UNPACK_8A_F),
+                                                               QPU_UNPACK);
+                        } else {
+                                assert(src[0].mux == QPU_MUX_A);
 
-                        /* Since we're setting the pack bits, if the
-                         * destination is in A it would get re-packed.
-                         */
-                        queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ?
-                                             qpu_rb(31) : dst),
-                                            src[0], src[0]));
-                        *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op -
-                                                                  QOP_UNPACK_8A_F],
-                                                       QPU_UNPACK);
+                                /* Since we're setting the pack bits, if the
+                                 * destination is in A it would get re-packed.
+                                 */
+                                queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ?
+                                                     qpu_rb(31) : dst),
+                                                    src[0], src[0]));
+                                *last_inst(c) |=
+                                        QPU_SET_FIELD(unpack_map[qinst->op -
+                                                                 QOP_UNPACK_8A_F],
+                                                      QPU_UNPACK);
 
-                        if (dst.mux == QPU_MUX_A) {
-                                queue(c, qpu_a_MOV(dst, qpu_rb(31)));
+                                if (dst.mux == QPU_MUX_A) {
+                                        queue(c, qpu_a_MOV(dst, qpu_rb(31)));
+                                }
                         }
                 }
                         break;
@@ -531,14 +494,24 @@
 
                         fixup_raddr_conflict(c, dst, &src[0], &src[1]);
 
-                        if (translate[qinst->op].is_mul) {
+                        if (qir_is_mul(qinst)) {
                                 queue(c, qpu_m_alu2(translate[qinst->op].op,
                                                     dst,
                                                     src[0], src[1]));
+                                if (qinst->dst.pack) {
+                                        *last_inst(c) |= QPU_PM;
+                                        *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
+                                                                       QPU_PACK);
+                                }
                         } else {
                                 queue(c, qpu_a_alu2(translate[qinst->op].op,
                                                     dst,
                                                     src[0], src[1]));
+                                if (qinst->dst.pack) {
+                                        assert(dst.mux == QPU_MUX_A);
+                                        *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
+                                                                       QPU_PACK);
+                                }
                         }
 
                         break;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qpu.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qpu.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qpu.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qpu.h	2015-09-16 14:36:09.000000000 +0000
@@ -24,6 +24,7 @@
 #ifndef VC4_QPU_H
 #define VC4_QPU_H
 
+#include <stdio.h>
 #include <stdint.h>
 
 #include "util/u_math.h"
@@ -122,23 +123,23 @@
 static inline struct qpu_reg qpu_r4(void) { return qpu_rn(4); }
 static inline struct qpu_reg qpu_r5(void) { return qpu_rn(5); }
 
-uint64_t qpu_NOP(void);
-uint64_t qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src);
-uint64_t qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src);
+uint64_t qpu_NOP(void) ATTRIBUTE_CONST;
+uint64_t qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src) ATTRIBUTE_CONST;
+uint64_t qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src) ATTRIBUTE_CONST;
 uint64_t qpu_a_alu2(enum qpu_op_add op, struct qpu_reg dst,
-                    struct qpu_reg src0, struct qpu_reg src1);
+                    struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST;
 uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst,
-                    struct qpu_reg src0, struct qpu_reg src1);
-uint64_t qpu_merge_inst(uint64_t a, uint64_t b);
-uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val);
-uint64_t qpu_set_sig(uint64_t inst, uint32_t sig);
-uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond);
-uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond);
-uint32_t qpu_encode_small_immediate(uint32_t i);
-
-bool qpu_waddr_is_tlb(uint32_t waddr);
-bool qpu_inst_is_tlb(uint64_t inst);
-int qpu_num_sf_accesses(uint64_t inst);
+                    struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST;
+uint64_t qpu_merge_inst(uint64_t a, uint64_t b) ATTRIBUTE_CONST;
+uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
+uint64_t qpu_set_sig(uint64_t inst, uint32_t sig) ATTRIBUTE_CONST;
+uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
+uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
+uint32_t qpu_encode_small_immediate(uint32_t i) ATTRIBUTE_CONST;
+
+bool qpu_waddr_is_tlb(uint32_t waddr) ATTRIBUTE_CONST;
+bool qpu_inst_is_tlb(uint64_t inst) ATTRIBUTE_CONST;
+int qpu_num_sf_accesses(uint64_t inst) ATTRIBUTE_CONST;
 void qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst);
 
 static inline uint64_t
@@ -206,6 +207,12 @@
 vc4_qpu_disasm(const uint64_t *instructions, int num_instructions);
 
 void
+vc4_qpu_disasm_pack_mul(FILE *out, uint32_t pack);
+
+void
+vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack);
+
+void
 vc4_qpu_validate(uint64_t *insts, uint32_t num_inst);
 
 #endif /* VC4_QPU_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qpu_schedule.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qpu_schedule.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qpu_schedule.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qpu_schedule.c	2015-09-16 14:36:09.000000000 +0000
@@ -43,7 +43,7 @@
 struct schedule_node_child;
 
 struct schedule_node {
-        struct simple_node link;
+        struct list_head link;
         struct queued_qpu_inst *inst;
         struct schedule_node_child *children;
         uint32_t child_count;
@@ -400,22 +400,21 @@
 }
 
 static void
-calculate_forward_deps(struct vc4_compile *c, struct simple_node *schedule_list)
+calculate_forward_deps(struct vc4_compile *c, struct list_head *schedule_list)
 {
-        struct simple_node *node;
         struct schedule_state state;
 
         memset(&state, 0, sizeof(state));
         state.dir = F;
 
-        foreach(node, schedule_list)
-                calculate_deps(&state, (struct schedule_node *)node);
+        list_for_each_entry(struct schedule_node, node, schedule_list, link)
+                calculate_deps(&state, node);
 }
 
 static void
-calculate_reverse_deps(struct vc4_compile *c, struct simple_node *schedule_list)
+calculate_reverse_deps(struct vc4_compile *c, struct list_head *schedule_list)
 {
-        struct simple_node *node;
+        struct list_head *node;
         struct schedule_state state;
 
         memset(&state, 0, sizeof(state));
@@ -507,15 +506,13 @@
 
 static struct schedule_node *
 choose_instruction_to_schedule(struct choose_scoreboard *scoreboard,
-                               struct simple_node *schedule_list,
+                               struct list_head *schedule_list,
                                struct schedule_node *prev_inst)
 {
         struct schedule_node *chosen = NULL;
-        struct simple_node *node;
         int chosen_prio = 0;
 
-        foreach(node, schedule_list) {
-                struct schedule_node *n = (struct schedule_node *)node;
+        list_for_each_entry(struct schedule_node, n, schedule_list, link) {
                 uint64_t inst = n->inst->inst;
 
                 /* "An instruction must not read from a location in physical
@@ -596,14 +593,11 @@
 }
 
 static void
-dump_state(struct simple_node *schedule_list)
+dump_state(struct list_head *schedule_list)
 {
-        struct simple_node *node;
-
         uint32_t i = 0;
-        foreach(node, schedule_list) {
-                struct schedule_node *n = (struct schedule_node *)node;
 
+        list_for_each_entry(struct schedule_node, n, schedule_list, link) {
                 fprintf(stderr, "%3d: ", i++);
                 vc4_qpu_disasm(&n->inst->inst, 1);
                 fprintf(stderr, "\n");
@@ -639,7 +633,7 @@
 }
 
 static void
-mark_instruction_scheduled(struct simple_node *schedule_list,
+mark_instruction_scheduled(struct list_head *schedule_list,
                            struct schedule_node *node,
                            bool war_only)
 {
@@ -658,16 +652,15 @@
 
                 child->parent_count--;
                 if (child->parent_count == 0)
-                        insert_at_head(schedule_list, &child->link);
+                        list_add(&child->link, schedule_list);
 
                 node->children[i].node = NULL;
         }
 }
 
 static void
-schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
+schedule_instructions(struct vc4_compile *c, struct list_head *schedule_list)
 {
-        struct simple_node *node, *t;
         struct choose_scoreboard scoreboard;
 
         /* We reorder the uniforms as we schedule instructions, so save the
@@ -693,14 +686,12 @@
         }
 
         /* Remove non-DAG heads from the list. */
-        foreach_s(node, t, schedule_list) {
-                struct schedule_node *n = (struct schedule_node *)node;
-
+        list_for_each_entry_safe(struct schedule_node, n, schedule_list, link) {
                 if (n->parent_count != 0)
-                        remove_from_list(&n->link);
+                        list_del(&n->link);
         }
 
-        while (!is_empty_list(schedule_list)) {
+        while (!list_empty(schedule_list)) {
                 struct schedule_node *chosen =
                         choose_instruction_to_schedule(&scoreboard,
                                                        schedule_list,
@@ -724,7 +715,7 @@
                  * find an instruction to pair with it.
                  */
                 if (chosen) {
-                        remove_from_list(&chosen->link);
+                        list_del(&chosen->link);
                         mark_instruction_scheduled(schedule_list, chosen, true);
                         if (chosen->uniform != -1) {
                                 c->uniform_data[next_uniform] =
@@ -738,7 +729,7 @@
                                                                schedule_list,
                                                                chosen);
                         if (merge) {
-                                remove_from_list(&merge->link);
+                                list_del(&merge->link);
                                 inst = qpu_merge_inst(inst, merge->inst->inst);
                                 assert(inst != 0);
                                 if (merge->uniform != -1) {
@@ -813,16 +804,14 @@
 qpu_schedule_instructions(struct vc4_compile *c)
 {
         void *mem_ctx = ralloc_context(NULL);
-        struct simple_node schedule_list;
-        struct simple_node *node;
+        struct list_head schedule_list;
 
-        make_empty_list(&schedule_list);
+        list_inithead(&schedule_list);
 
         if (debug) {
                 fprintf(stderr, "Pre-schedule instructions\n");
-                foreach(node, &c->qpu_inst_list) {
-                        struct queued_qpu_inst *q =
-                                (struct queued_qpu_inst *)node;
+                list_for_each_entry(struct queued_qpu_inst, q,
+                                    &c->qpu_inst_list, link) {
                         vc4_qpu_disasm(&q->inst, 1);
                         fprintf(stderr, "\n");
                 }
@@ -831,7 +820,7 @@
 
         /* Wrap each instruction in a scheduler structure. */
         uint32_t next_uniform = 0;
-        while (!is_empty_list(&c->qpu_inst_list)) {
+        while (!list_empty(&c->qpu_inst_list)) {
                 struct queued_qpu_inst *inst =
                         (struct queued_qpu_inst *)c->qpu_inst_list.next;
                 struct schedule_node *n = rzalloc(mem_ctx, struct schedule_node);
@@ -844,16 +833,15 @@
                 } else {
                         n->uniform = -1;
                 }
-                remove_from_list(&inst->link);
-                insert_at_tail(&schedule_list, &n->link);
+                list_del(&inst->link);
+                list_addtail(&n->link, &schedule_list);
         }
         assert(next_uniform == c->num_uniforms);
 
         calculate_forward_deps(c, &schedule_list);
         calculate_reverse_deps(c, &schedule_list);
 
-        foreach(node, &schedule_list) {
-                struct schedule_node *n = (struct schedule_node *)node;
+        list_for_each_entry(struct schedule_node, n, &schedule_list, link) {
                 compute_delay(n);
         }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qpu_validate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qpu_validate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_qpu_validate.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_qpu_validate.c	2015-09-16 14:36:09.000000000 +0000
@@ -23,6 +23,13 @@
 
 #include "vc4_qpu.h"
 
+#ifdef NDEBUG
+/* Since most of our code is used in assert()s, don't warn about dead code. */
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
 static bool
 writes_reg(uint64_t inst, uint32_t w)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_query.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_query.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_query.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_query.c	2015-09-16 14:36:09.000000000 +0000
@@ -50,9 +50,10 @@
         free(query);
 }
 
-static void
+static boolean
 vc4_begin_query(struct pipe_context *ctx, struct pipe_query *query)
 {
+        return true;
 }
 
 static void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_register_allocate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_register_allocate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_register_allocate.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_register_allocate.c	2015-09-16 14:36:09.000000000 +0000
@@ -113,9 +113,12 @@
         if (vc4->regs)
                 return;
 
-        vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs));
+        vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs), true);
 
         vc4->reg_class_any = ra_alloc_reg_class(vc4->regs);
+        vc4->reg_class_a_or_b_or_acc = ra_alloc_reg_class(vc4->regs);
+        vc4->reg_class_r4_or_a = ra_alloc_reg_class(vc4->regs);
+        vc4->reg_class_a = ra_alloc_reg_class(vc4->regs);
         for (uint32_t i = 0; i < ARRAY_SIZE(vc4_regs); i++) {
                 /* Reserve ra31/rb31 for spilling fixup_raddr_conflict() in
                  * vc4_qpu_emit.c
@@ -126,15 +129,20 @@
                 /* R4 can't be written as a general purpose register. (it's
                  * TMU_NOSWAP as a write address).
                  */
-                if (vc4_regs[i].mux == QPU_MUX_R4)
+                if (vc4_regs[i].mux == QPU_MUX_R4) {
+                        ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i);
+                        ra_class_add_reg(vc4->regs, vc4->reg_class_any, i);
                         continue;
+                }
 
                 ra_class_add_reg(vc4->regs, vc4->reg_class_any, i);
+                ra_class_add_reg(vc4->regs, vc4->reg_class_a_or_b_or_acc, i);
         }
 
-        vc4->reg_class_a = ra_alloc_reg_class(vc4->regs);
-        for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2)
+        for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2) {
                 ra_class_add_reg(vc4->regs, vc4->reg_class_a, i);
+                ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i);
+        }
 
         ra_set_finalize(vc4->regs, NULL);
 }
@@ -153,6 +161,10 @@
         return a->priority - b->priority;
 }
 
+#define CLASS_BIT_A			(1 << 0)
+#define CLASS_BIT_B_OR_ACC		(1 << 1)
+#define CLASS_BIT_R4			(1 << 2)
+
 /**
  * Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
  *
@@ -161,14 +173,15 @@
 struct qpu_reg *
 vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
 {
-        struct simple_node *node;
         struct node_to_temp_map map[c->num_temps];
         uint32_t temp_to_node[c->num_temps];
         uint32_t def[c->num_temps];
         uint32_t use[c->num_temps];
+        uint8_t class_bits[c->num_temps];
         struct qpu_reg *temp_registers = calloc(c->num_temps,
                                                 sizeof(*temp_registers));
-        memset(def, 0, sizeof(def));
+        for (int i = 0; i < ARRAY_SIZE(def); i++)
+                def[i] = ~0;
         memset(use, 0, sizeof(use));
 
         /* If things aren't ever written (undefined values), just read from
@@ -182,18 +195,12 @@
         struct ra_graph *g = ra_alloc_interference_graph(vc4->regs,
                                                          c->num_temps);
 
-        for (uint32_t i = 0; i < c->num_temps; i++) {
-                ra_set_node_class(g, i, vc4->reg_class_any);
-        }
-
         /* Compute the live ranges so we can figure out interference.
          */
         uint32_t ip = 0;
-        foreach(node, &c->instructions) {
-                struct qinst *inst = (struct qinst *)node;
-
+        list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                 if (inst->dst.file == QFILE_TEMP) {
-                        def[inst->dst.index] = ip;
+                        def[inst->dst.index] = MIN2(ip, def[inst->dst.index]);
                         use[inst->dst.index] = ip;
                 }
 
@@ -226,9 +233,32 @@
                 temp_to_node[map[i].temp] = i;
         }
 
-        /* Figure out our register classes and preallocated registers*/
-        foreach(node, &c->instructions) {
-                struct qinst *inst = (struct qinst *)node;
+        /* Figure out our register classes and preallocated registers.  We
+         * start with any temp being able to be in any file, then instructions
+         * incrementally remove bits that the temp definitely can't be in.
+         */
+        memset(class_bits,
+               CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4,
+               sizeof(class_bits));
+
+        ip = 0;
+        list_for_each_entry(struct qinst, inst, &c->instructions, link) {
+                if (qir_writes_r4(inst)) {
+                        /* This instruction writes r4 (and optionally moves
+                         * its result to a temp), so nothing else can be
+                         * stored in r4 across it.
+                         */
+                        for (int i = 0; i < c->num_temps; i++) {
+                                if (def[i] < ip && use[i] > ip)
+                                        class_bits[i] &= ~CLASS_BIT_R4;
+                        }
+                } else {
+                        /* R4 can't be written as a general purpose
+                         * register. (it's TMU_NOSWAP as a write address).
+                         */
+                        if (inst->dst.file == QFILE_TEMP)
+                                class_bits[inst->dst.index] &= ~CLASS_BIT_R4;
+                }
 
                 switch (inst->op) {
                 case QOP_FRAG_Z:
@@ -241,26 +271,58 @@
                                         AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2);
                         break;
 
-                case QOP_TEX_RESULT:
-                case QOP_TLB_COLOR_READ:
-                        assert(vc4_regs[ACC_INDEX + 4].mux == QPU_MUX_R4);
-                        ra_set_node_reg(g, temp_to_node[inst->dst.index],
-                                        ACC_INDEX + 4);
-                        break;
-
-                case QOP_PACK_SCALED:
-                        /* The pack flags require an A-file dst register. */
-                        ra_set_node_class(g, temp_to_node[inst->dst.index],
-                                          vc4->reg_class_a);
-                        break;
-
                 default:
                         break;
                 }
 
+                if (inst->dst.pack && !qir_is_mul(inst)) {
+                        /* The non-MUL pack flags require an A-file dst
+                         * register.
+                         */
+                        class_bits[inst->dst.index] &= CLASS_BIT_A;
+                }
+
                 if (qir_src_needs_a_file(inst)) {
-                        ra_set_node_class(g, temp_to_node[inst->src[0].index],
-                                          vc4->reg_class_a);
+                        switch (inst->op) {
+                        case QOP_UNPACK_8A_F:
+                        case QOP_UNPACK_8B_F:
+                        case QOP_UNPACK_8C_F:
+                        case QOP_UNPACK_8D_F:
+                                /* Special case: these can be done as R4
+                                 * unpacks, as well.
+                                 */
+                                class_bits[inst->src[0].index] &= (CLASS_BIT_A |
+                                                                   CLASS_BIT_R4);
+                                break;
+                        default:
+                                class_bits[inst->src[0].index] &= CLASS_BIT_A;
+                                break;
+                        }
+                }
+                ip++;
+        }
+
+        for (uint32_t i = 0; i < c->num_temps; i++) {
+                int node = temp_to_node[i];
+
+                switch (class_bits[i]) {
+                case CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4:
+                        ra_set_node_class(g, node, vc4->reg_class_any);
+                        break;
+                case CLASS_BIT_A | CLASS_BIT_B_OR_ACC:
+                        ra_set_node_class(g, node, vc4->reg_class_a_or_b_or_acc);
+                        break;
+                case CLASS_BIT_A | CLASS_BIT_R4:
+                        ra_set_node_class(g, node, vc4->reg_class_r4_or_a);
+                        break;
+                case CLASS_BIT_A:
+                        ra_set_node_class(g, node, vc4->reg_class_a);
+                        break;
+                default:
+                        fprintf(stderr, "temp %d: bad class bits: 0x%x\n",
+                                i, class_bits[i]);
+                        abort();
+                        break;
                 }
         }
 
@@ -275,7 +337,11 @@
         }
 
         bool ok = ra_allocate(g);
-        assert(ok);
+        if (!ok) {
+                fprintf(stderr, "Failed to register allocate:\n");
+                qir_dump(c);
+                abort();
+        }
 
         for (uint32_t i = 0; i < c->num_temps; i++) {
                 temp_registers[i] = vc4_regs[ra_get_node_reg(g, temp_to_node[i])];
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_reorder_uniforms.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_reorder_uniforms.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_reorder_uniforms.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_reorder_uniforms.c	2015-09-16 14:36:09.000000000 +0000
@@ -42,10 +42,8 @@
         uint32_t *uniform_index = NULL;
         uint32_t uniform_index_size = 0;
         uint32_t next_uniform = 0;
-        struct simple_node *node;
-        foreach(node, &c->instructions) {
-                struct qinst *inst = (struct qinst *)node;
 
+        list_for_each_entry(struct qinst, inst, &c->instructions, link) {
                 for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
                         if (inst->src[i].file != QFILE_UNIF)
                                 continue;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_resource.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_resource.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_resource.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_resource.c	2015-09-16 14:36:09.000000000 +0000
@@ -26,6 +26,7 @@
 #include "util/u_format.h"
 #include "util/u_inlines.h"
 #include "util/u_surface.h"
+#include "util/u_upload_mgr.h"
 
 #include "vc4_screen.h"
 #include "vc4_context.h"
@@ -101,6 +102,12 @@
 
         if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
                 vc4_resource_bo_alloc(rsc);
+
+                /* If it might be bound as one of our vertex buffers, make
+                 * sure we re-emit vertex buffer state.
+                 */
+                if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+                        vc4->dirty |= VC4_DIRTY_VTXBUF;
         } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
                 if (vc4_cl_references_bo(pctx, rsc->bo)) {
                         if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
@@ -109,6 +116,8 @@
                             prsc->height0 == box->height &&
                             prsc->depth0 == box->depth) {
                                 vc4_resource_bo_alloc(rsc);
+                                if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+                                        vc4->dirty |= VC4_DIRTY_VTXBUF;
                         } else {
                                 vc4_flush(pctx);
                         }
@@ -161,6 +170,8 @@
                 /* We need to align the box to utile boundaries, since that's
                  * what load/store operate on.
                  */
+                uint32_t orig_width = ptrans->box.width;
+                uint32_t orig_height = ptrans->box.height;
                 uint32_t box_start_x = ptrans->box.x & (utile_w - 1);
                 uint32_t box_start_y = ptrans->box.y & (utile_h - 1);
                 ptrans->box.width += box_start_x;
@@ -174,7 +185,9 @@
                 ptrans->layer_stride = ptrans->stride;
 
                 trans->map = malloc(ptrans->stride * ptrans->box.height);
-                if (usage & PIPE_TRANSFER_READ) {
+                if (usage & PIPE_TRANSFER_READ ||
+                    ptrans->box.width != orig_width ||
+                    ptrans->box.height != orig_height) {
                         vc4_load_tiled_image(trans->map, ptrans->stride,
                                              buf + slice->offset +
                                              box->z * rsc->cube_map_stride,
@@ -638,41 +651,37 @@
  * was in user memory, it would be nice to not have uploaded it to a VBO
  * before translating.
  */
-void
-vc4_update_shadow_index_buffer(struct pipe_context *pctx,
-                               const struct pipe_index_buffer *ib)
+struct pipe_resource *
+vc4_get_shadow_index_buffer(struct pipe_context *pctx,
+                            const struct pipe_index_buffer *ib,
+                            uint32_t count,
+                            uint32_t *shadow_offset)
 {
-        struct vc4_resource *shadow = vc4_resource(ib->buffer);
-        struct vc4_resource *orig = vc4_resource(shadow->shadow_parent);
-        uint32_t count = shadow->base.b.width0 / 2;
-
-        if (shadow->writes == orig->writes)
-                return;
-
+        struct vc4_context *vc4 = vc4_context(pctx);
+        struct vc4_resource *orig = vc4_resource(ib->buffer);
         perf_debug("Fallback conversion for %d uint indices\n", count);
 
+        void *data;
+        struct pipe_resource *shadow_rsc = NULL;
+        u_upload_alloc(vc4->uploader, 0, count * 2,
+                       shadow_offset, &shadow_rsc, &data);
+        uint16_t *dst = data;
+
         struct pipe_transfer *src_transfer;
         uint32_t *src = pipe_buffer_map_range(pctx, &orig->base.b,
                                               ib->offset,
                                               count * 4,
                                               PIPE_TRANSFER_READ, &src_transfer);
 
-        struct pipe_transfer *dst_transfer;
-        uint16_t *dst = pipe_buffer_map_range(pctx, &shadow->base.b,
-                                              0,
-                                              count * 2,
-                                              PIPE_TRANSFER_WRITE, &dst_transfer);
-
         for (int i = 0; i < count; i++) {
                 uint32_t src_index = src[i];
                 assert(src_index <= 0xffff);
                 dst[i] = src_index;
         }
 
-        pctx->transfer_unmap(pctx, dst_transfer);
         pctx->transfer_unmap(pctx, src_transfer);
 
-        shadow->writes = orig->writes;
+        return shadow_rsc;
 }
 
 void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_resource.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_resource.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_resource.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_resource.h	2015-09-16 14:36:09.000000000 +0000
@@ -26,7 +26,7 @@
 #define VC4_RESOURCE_H
 
 #include "vc4_screen.h"
-#include "vc4_packet.h"
+#include "kernel/vc4_packet.h"
 #include "util/u_transfer.h"
 
 struct vc4_transfer {
@@ -45,7 +45,6 @@
 struct vc4_surface {
         struct pipe_surface base;
         uint32_t offset;
-        uint32_t stride;
         uint8_t tiling;
 };
 
@@ -83,19 +82,19 @@
         struct pipe_resource *shadow_parent;
 };
 
-static INLINE struct vc4_resource *
+static inline struct vc4_resource *
 vc4_resource(struct pipe_resource *prsc)
 {
         return (struct vc4_resource *)prsc;
 }
 
-static INLINE struct vc4_surface *
+static inline struct vc4_surface *
 vc4_surface(struct pipe_surface *psurf)
 {
         return (struct vc4_surface *)psurf;
 }
 
-static INLINE struct vc4_transfer *
+static inline struct vc4_transfer *
 vc4_transfer(struct pipe_transfer *ptrans)
 {
         return (struct vc4_transfer *)ptrans;
@@ -107,8 +106,10 @@
                                           const struct pipe_resource *tmpl);
 void vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
                                          struct pipe_sampler_view *view);
-void vc4_update_shadow_index_buffer(struct pipe_context *pctx,
-                                    const struct pipe_index_buffer *ib);
+struct pipe_resource *vc4_get_shadow_index_buffer(struct pipe_context *pctx,
+                                                  const struct pipe_index_buffer *ib,
+                                                  uint32_t count,
+                                                  uint32_t *offset);
 void vc4_dump_surface(struct pipe_surface *psurf);
 
 #endif /* VC4_RESOURCE_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -176,6 +176,10 @@
         case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
         case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
         case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+	case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+	case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+	case PIPE_CAP_DEPTH_BOUNDS_TEST:
                 return 0;
 
                 /* Stream output. */
@@ -323,6 +327,7 @@
         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
                 return 0;
         case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
         case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
@@ -459,7 +464,7 @@
         pscreen->is_format_supported = vc4_screen_is_format_supported;
 
         screen->fd = fd;
-        make_empty_list(&screen->bo_cache.time_list);
+        list_inithead(&screen->bo_cache.time_list);
 
         vc4_fence_init(screen);
 
@@ -488,6 +493,12 @@
 {
         whandle->stride = stride;
 
+        /* If we're passing some reference to our BO out to some other part of
+         * the system, then we can't do any optimizations about only us being
+         * the ones seeing it (like BO caching or shadow update avoidance).
+         */
+        bo->private = false;
+
         switch (whandle->type) {
         case DRM_API_HANDLE_TYPE_SHARED:
                 return vc4_bo_flink(bo, &whandle->handle);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_screen.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -27,7 +27,7 @@
 #include "pipe/p_screen.h"
 #include "os/os_thread.h"
 #include "state_tracker/drm_driver.h"
-#include "vc4_qir.h"
+#include "util/list.h"
 
 struct vc4_bo;
 
@@ -61,13 +61,19 @@
 
         struct vc4_bo_cache {
                 /** List of struct vc4_bo freed, by age. */
-                struct simple_node time_list;
+                struct list_head time_list;
                 /** List of struct vc4_bo freed, per size, by age. */
-                struct simple_node *size_list;
+                struct list_head *size_list;
                 uint32_t size_list_size;
 
                 pipe_mutex lock;
+
+                uint32_t bo_size;
+                uint32_t bo_count;
         } bo_cache;
+
+        uint32_t bo_size;
+        uint32_t bo_count;
 };
 
 static inline struct vc4_screen *
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_simulator.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_simulator.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_simulator.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_simulator.c	2015-09-16 14:36:09.000000000 +0000
@@ -39,11 +39,13 @@
 {
         struct vc4_context *vc4 = dev->vc4;
         struct vc4_screen *screen = vc4->screen;
-        struct drm_gem_cma_object *obj = CALLOC_STRUCT(drm_gem_cma_object);
+        struct drm_vc4_bo *drm_bo = CALLOC_STRUCT(drm_vc4_bo);
+        struct drm_gem_cma_object *obj = &drm_bo->base;
         uint32_t size = align(bo->size, 4096);
 
-        obj->bo = bo;
+        drm_bo->bo = bo;
         obj->base.size = size;
+        obj->base.dev = dev;
         obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next;
         obj->paddr = simpenrose_hw_addr(obj->vaddr);
 
@@ -72,11 +74,12 @@
         struct vc4_bo **bos = vc4->bo_pointers.base;
 
         exec->bo_count = args->bo_handle_count;
-        exec->bo = calloc(exec->bo_count, sizeof(struct vc4_bo_exec_state));
+        exec->bo = calloc(exec->bo_count, sizeof(void *));
         for (int i = 0; i < exec->bo_count; i++) {
                 struct vc4_bo *bo = bos[i];
                 struct drm_gem_cma_object *obj = vc4_wrap_bo_with_cma(dev, bo);
 
+                struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
 #if 0
                 fprintf(stderr, "bo hindex %d: %s\n", i, bo->name);
 #endif
@@ -84,7 +87,16 @@
                 vc4_bo_map(bo);
                 memcpy(obj->vaddr, bo->map, bo->size);
 
-                exec->bo[i].bo = obj;
+                exec->bo[i] = obj;
+
+                /* The kernel does this validation at shader create ioctl
+                 * time.
+                 */
+                if (strcmp(bo->name, "code") == 0) {
+                        drm_bo->validated_shader = vc4_validate_shader(obj);
+                        if (!drm_bo->validated_shader)
+                                abort();
+                }
         }
         return 0;
 }
@@ -93,8 +105,8 @@
 vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
 {
         for (int i = 0; i < exec->bo_count; i++) {
-                struct drm_gem_cma_object *obj = exec->bo[i].bo;
-                struct vc4_bo *bo = obj->bo;
+                struct drm_gem_cma_object *obj = exec->bo[i];
+                struct vc4_bo *bo = to_vc4_bo(&obj->base)->bo;
 
                 memcpy(bo->map, obj->vaddr, bo->size);
 
@@ -124,6 +136,7 @@
         int ret;
 
         memset(&exec, 0, sizeof(exec));
+        list_inithead(&exec.unref_list);
 
         if (ctex && ctex->bo->simulator_winsys_map) {
 #if 0
@@ -176,8 +189,12 @@
         if (ret)
                 return ret;
 
-        vc4_bo_unreference(&exec.exec_bo->bo);
-        free(exec.exec_bo);
+        list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec.unref_list,
+                                 unref_head) {
+		list_del(&bo->unref_head);
+                vc4_bo_unreference(&bo->bo);
+                free(bo);
+        }
 
         if (ctex && ctex->bo->simulator_winsys_map) {
                 for (int y = 0; y < ctex->base.b.height0; y++) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_simulator_validate.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_simulator_validate.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_simulator_validate.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_simulator_validate.h	2015-09-16 14:36:09.000000000 +0000
@@ -43,6 +43,7 @@
 #define kfree(ptr) free(ptr)
 #define krealloc(ptr, size, args) realloc(ptr, size)
 #define roundup(x, y) align(x, y)
+#define round_up(x, y) align(x, y)
 #define max(x, y) MAX2(x, y)
 #define min(x, y) MiN2(x, y)
 #define BUG_ON(condition) assert(!(condition))
@@ -63,16 +64,28 @@
         uint32_t simulator_mem_next;
 };
 
-struct drm_gem_cma_object {
-        struct vc4_bo *bo;
+struct drm_gem_object {
+        uint32_t size;
+        struct drm_device *dev;
+};
 
-        struct {
-                uint32_t size;
-        } base;
+struct drm_gem_cma_object {
+        struct drm_gem_object base;
         uint32_t paddr;
         void *vaddr;
 };
 
+struct drm_vc4_bo {
+        struct drm_gem_cma_object base;
+        struct vc4_bo *bo;
+        struct vc4_validated_shader_info *validated_shader;
+        struct list_head unref_head;
+};
+
+static inline struct drm_vc4_bo *to_vc4_bo(struct drm_gem_object *obj)
+{
+        return (struct drm_vc4_bo *)obj;
+}
 
 struct drm_gem_cma_object *
 drm_gem_cma_create(struct drm_device *dev, size_t size);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -107,7 +107,7 @@
         /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
          * BCM21553).
          */
-        so->point_size = MAX2(cso->point_size, .125);
+        so->point_size = MAX2(cso->point_size, .125f);
 
         if (cso->front_ccw)
                 so->config_bits[0] |= VC4_CONFIG_BITS_CW_PRIMITIVES;
@@ -304,24 +304,8 @@
 
         if (ib) {
                 assert(!ib->user_buffer);
-
-                if (ib->index_size == 4) {
-                        struct pipe_resource tmpl = *ib->buffer;
-                        assert(tmpl.format == PIPE_FORMAT_R8_UNORM);
-                        assert(tmpl.height0 == 1);
-                        tmpl.width0 = (tmpl.width0 - ib->offset) / 2;
-                        struct pipe_resource *pshadow =
-                                vc4_resource_create(&vc4->screen->base, &tmpl);
-                        struct vc4_resource *shadow = vc4_resource(pshadow);
-                        pipe_resource_reference(&shadow->shadow_parent, ib->buffer);
-
-                        pipe_resource_reference(&vc4->indexbuf.buffer, NULL);
-                        vc4->indexbuf.buffer = pshadow;
-                        vc4->indexbuf.index_size = 2;
-                } else {
-                        pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer);
-                        vc4->indexbuf.index_size = ib->index_size;
-                }
+                pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer);
+                vc4->indexbuf.index_size = ib->index_size;
                 vc4->indexbuf.offset = ib->offset;
         } else {
                 pipe_resource_reference(&vc4->indexbuf.buffer, NULL);
@@ -477,11 +461,64 @@
         }
 }
 
+static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
+{
+        switch (p_wrap) {
+        case PIPE_TEX_WRAP_REPEAT:
+                return 0;
+        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+                return 1;
+        case PIPE_TEX_WRAP_MIRROR_REPEAT:
+                return 2;
+        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+                return 3;
+        case PIPE_TEX_WRAP_CLAMP:
+                return (using_nearest ? 1 : 3);
+        default:
+                fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
+                assert(!"not reached");
+                return 0;
+        }
+}
+
 static void *
 vc4_create_sampler_state(struct pipe_context *pctx,
                          const struct pipe_sampler_state *cso)
 {
-        return vc4_generic_cso_state_create(cso, sizeof(*cso));
+        static const uint8_t minfilter_map[6] = {
+                VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR,
+                VC4_TEX_P1_MINFILT_LIN_MIP_NEAR,
+                VC4_TEX_P1_MINFILT_NEAR_MIP_LIN,
+                VC4_TEX_P1_MINFILT_LIN_MIP_LIN,
+                VC4_TEX_P1_MINFILT_NEAREST,
+                VC4_TEX_P1_MINFILT_LINEAR,
+        };
+        static const uint32_t magfilter_map[] = {
+                [PIPE_TEX_FILTER_NEAREST] = VC4_TEX_P1_MAGFILT_NEAREST,
+                [PIPE_TEX_FILTER_LINEAR] = VC4_TEX_P1_MAGFILT_LINEAR,
+        };
+        bool either_nearest =
+                (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
+                 cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
+        struct vc4_sampler_state *so = CALLOC_STRUCT(vc4_sampler_state);
+
+        if (!so)
+                return NULL;
+
+        memcpy(so, cso, sizeof(*cso));
+
+        so->texture_p1 =
+                (VC4_SET_FIELD(magfilter_map[cso->mag_img_filter],
+                               VC4_TEX_P1_MAGFILT) |
+                 VC4_SET_FIELD(minfilter_map[cso->min_mip_filter * 2 +
+                                             cso->min_img_filter],
+                               VC4_TEX_P1_MINFILT) |
+                 VC4_SET_FIELD(translate_wrap(cso->wrap_s, either_nearest),
+                               VC4_TEX_P1_WRAP_S) |
+                 VC4_SET_FIELD(translate_wrap(cso->wrap_t, either_nearest),
+                               VC4_TEX_P1_WRAP_T));
+
+        return so;
 }
 
 static void
@@ -515,13 +552,13 @@
 vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
                         const struct pipe_sampler_view *cso)
 {
-        struct pipe_sampler_view *so = malloc(sizeof(*so));
+        struct vc4_sampler_view *so = malloc(sizeof(*so));
         struct vc4_resource *rsc = vc4_resource(prsc);
 
         if (!so)
                 return NULL;
 
-        *so = *cso;
+        so->base = *cso;
 
         pipe_reference(NULL, &prsc->reference);
 
@@ -532,27 +569,43 @@
          * Also, Raspberry Pi doesn't support sampling from raster textures,
          * so we also have to copy to a temporary then.
          */
-        if (so->u.tex.first_level ||
+        if (cso->u.tex.first_level ||
             rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R) {
                 struct vc4_resource *shadow_parent = vc4_resource(prsc);
                 struct pipe_resource tmpl = shadow_parent->base.b;
                 struct vc4_resource *clone;
 
-                tmpl.width0 = u_minify(tmpl.width0, so->u.tex.first_level);
-                tmpl.height0 = u_minify(tmpl.height0, so->u.tex.first_level);
-                tmpl.last_level = so->u.tex.last_level - so->u.tex.first_level;
+                tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+                tmpl.width0 = u_minify(tmpl.width0, cso->u.tex.first_level);
+                tmpl.height0 = u_minify(tmpl.height0, cso->u.tex.first_level);
+                tmpl.last_level = cso->u.tex.last_level - cso->u.tex.first_level;
 
                 prsc = vc4_resource_create(pctx->screen, &tmpl);
+                rsc = vc4_resource(prsc);
                 clone = vc4_resource(prsc);
                 clone->shadow_parent = &shadow_parent->base.b;
                 /* Flag it as needing update of the contents from the parent. */
                 clone->writes = shadow_parent->writes - 1;
+
+                assert(clone->vc4_format != VC4_TEXTURE_TYPE_RGBA32R);
         }
-        so->texture = prsc;
-        so->reference.count = 1;
-        so->context = pctx;
+        so->base.texture = prsc;
+        so->base.reference.count = 1;
+        so->base.context = pctx;
+
+        so->texture_p0 =
+                (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
+                 VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) |
+                 VC4_SET_FIELD(cso->u.tex.last_level -
+                               cso->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
+                 VC4_SET_FIELD(cso->target == PIPE_TEXTURE_CUBE,
+                               VC4_TEX_P0_CMMODE));
+        so->texture_p1 =
+                (VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
+                 VC4_SET_FIELD(prsc->height0 & 2047, VC4_TEX_P1_HEIGHT) |
+                 VC4_SET_FIELD(prsc->width0 & 2047, VC4_TEX_P1_WIDTH));
 
-        return so;
+        return &so->base;
 }
 
 static void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_tiling.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_tiling.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_tiling.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_tiling.c	2015-09-16 14:36:09.000000000 +0000
@@ -127,13 +127,10 @@
 static void
 check_box_utile_alignment(const struct pipe_box *box, int cpp)
 {
-        uint32_t utile_w = vc4_utile_width(cpp);
-        uint32_t utile_h = vc4_utile_height(cpp);
-
-        assert(!(box->x & (utile_w - 1)));
-        assert(!(box->y & (utile_h - 1)));
-        assert(!(box->width & (utile_w - 1)));
-        assert(!(box->height & (utile_h - 1)));
+        assert(!(box->x & (vc4_utile_width(cpp) - 1)));
+        assert(!(box->y & (vc4_utile_height(cpp) - 1)));
+        assert(!(box->width & (vc4_utile_width(cpp) - 1)));
+        assert(!(box->height & (vc4_utile_height(cpp) - 1)));
 }
 
 static void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_tiling.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_tiling.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_tiling.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_tiling.h	2015-09-16 14:36:09.000000000 +0000
@@ -24,9 +24,9 @@
 #ifndef VC4_TILING_H
 #define VC4_TILING_H
 
-uint32_t vc4_utile_width(int cpp);
-uint32_t vc4_utile_height(int cpp);
-bool vc4_size_is_lt(uint32_t width, uint32_t height, int cpp);
+uint32_t vc4_utile_width(int cpp) ATTRIBUTE_CONST;
+uint32_t vc4_utile_height(int cpp) ATTRIBUTE_CONST;
+bool vc4_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST;
 void vc4_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp);
 void vc4_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp);
 void vc4_load_tiled_image(void *dst, uint32_t dst_stride,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_uniforms.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_uniforms.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/drivers/vc4/vc4_uniforms.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/drivers/vc4/vc4_uniforms.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,344 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_pack_color.h"
+#include "util/format_srgb.h"
+
+#include "vc4_context.h"
+#include "vc4_qir.h"
+
+static void
+write_texture_p0(struct vc4_context *vc4,
+                 struct vc4_cl_out **uniforms,
+                 struct vc4_texture_stateobj *texstate,
+                 uint32_t unit)
+{
+        struct vc4_sampler_view *sview =
+                vc4_sampler_view(texstate->textures[unit]);
+        struct vc4_resource *rsc = vc4_resource(sview->base.texture);
+
+        cl_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo, sview->texture_p0);
+}
+
+static void
+write_texture_p1(struct vc4_context *vc4,
+                 struct vc4_cl_out **uniforms,
+                 struct vc4_texture_stateobj *texstate,
+                 uint32_t unit)
+{
+        struct vc4_sampler_view *sview =
+                vc4_sampler_view(texstate->textures[unit]);
+        struct vc4_sampler_state *sampler =
+                vc4_sampler_state(texstate->samplers[unit]);
+
+        cl_aligned_u32(uniforms, sview->texture_p1 | sampler->texture_p1);
+}
+
+static void
+write_texture_p2(struct vc4_context *vc4,
+                 struct vc4_cl_out **uniforms,
+                 struct vc4_texture_stateobj *texstate,
+                 uint32_t data)
+{
+        uint32_t unit = data & 0xffff;
+        struct pipe_sampler_view *texture = texstate->textures[unit];
+        struct vc4_resource *rsc = vc4_resource(texture->texture);
+
+        cl_aligned_u32(uniforms,
+               VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
+                             VC4_TEX_P2_PTYPE) |
+               VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
+               VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD));
+}
+
+
+#define SWIZ(x,y,z,w) {          \
+        UTIL_FORMAT_SWIZZLE_##x, \
+        UTIL_FORMAT_SWIZZLE_##y, \
+        UTIL_FORMAT_SWIZZLE_##z, \
+        UTIL_FORMAT_SWIZZLE_##w  \
+}
+
+static void
+write_texture_border_color(struct vc4_context *vc4,
+                           struct vc4_cl_out **uniforms,
+                           struct vc4_texture_stateobj *texstate,
+                           uint32_t unit)
+{
+        struct pipe_sampler_state *sampler = texstate->samplers[unit];
+        struct pipe_sampler_view *texture = texstate->textures[unit];
+        struct vc4_resource *rsc = vc4_resource(texture->texture);
+        union util_color uc;
+
+        const struct util_format_description *tex_format_desc =
+                util_format_description(texture->format);
+
+        float border_color[4];
+        for (int i = 0; i < 4; i++)
+                border_color[i] = sampler->border_color.f[i];
+        if (util_format_is_srgb(texture->format)) {
+                for (int i = 0; i < 3; i++)
+                        border_color[i] =
+                                util_format_linear_to_srgb_float(border_color[i]);
+        }
+
+        /* Turn the border color into the layout of channels that it would
+         * have when stored as texture contents.
+         */
+        float storage_color[4];
+        util_format_unswizzle_4f(storage_color,
+                                 border_color,
+                                 tex_format_desc->swizzle);
+
+        /* Now, pack so that when the vc4_format-sampled texture contents are
+         * replaced with our border color, the vc4_get_format_swizzle()
+         * swizzling will get the right channels.
+         */
+        if (util_format_is_depth_or_stencil(texture->format)) {
+                uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
+                                       sampler->border_color.f[0]) << 8;
+        } else {
+                switch (rsc->vc4_format) {
+                default:
+                case VC4_TEXTURE_TYPE_RGBA8888:
+                        util_pack_color(storage_color,
+                                        PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
+                        break;
+                case VC4_TEXTURE_TYPE_RGBA4444:
+                        util_pack_color(storage_color,
+                                        PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
+                        break;
+                case VC4_TEXTURE_TYPE_RGB565:
+                        util_pack_color(storage_color,
+                                        PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+                        break;
+                case VC4_TEXTURE_TYPE_ALPHA:
+                        uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
+                        break;
+                case VC4_TEXTURE_TYPE_LUMALPHA:
+                        uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
+                                    (float_to_ubyte(storage_color[0]) << 0));
+                        break;
+                }
+        }
+
+        cl_aligned_u32(uniforms, uc.ui[0]);
+}
+
+static uint32_t
+get_texrect_scale(struct vc4_texture_stateobj *texstate,
+                  enum quniform_contents contents,
+                  uint32_t data)
+{
+        struct pipe_sampler_view *texture = texstate->textures[data];
+        uint32_t dim;
+
+        if (contents == QUNIFORM_TEXRECT_SCALE_X)
+                dim = texture->texture->width0;
+        else
+                dim = texture->texture->height0;
+
+        return fui(1.0f / dim);
+}
+
+static struct vc4_bo *
+vc4_upload_ubo(struct vc4_context *vc4,
+               struct vc4_compiled_shader *shader,
+               const uint32_t *gallium_uniforms)
+{
+        if (!shader->ubo_size)
+                return NULL;
+
+        struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen, shader->ubo_size, "ubo");
+        uint32_t *data = vc4_bo_map(ubo);
+        for (uint32_t i = 0; i < shader->num_ubo_ranges; i++) {
+                memcpy(data + shader->ubo_ranges[i].dst_offset,
+                       gallium_uniforms + shader->ubo_ranges[i].src_offset,
+                       shader->ubo_ranges[i].size);
+        }
+
+        return ubo;
+}
+
+void
+vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
+                   struct vc4_constbuf_stateobj *cb,
+                   struct vc4_texture_stateobj *texstate)
+{
+        struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
+        const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
+        struct vc4_bo *ubo = vc4_upload_ubo(vc4, shader, gallium_uniforms);
+
+        cl_ensure_space(&vc4->uniforms, (uinfo->count +
+                                         uinfo->num_texture_samples) * 4);
+
+        struct vc4_cl_out *uniforms =
+                cl_start_shader_reloc(&vc4->uniforms,
+                                      uinfo->num_texture_samples);
+
+        for (int i = 0; i < uinfo->count; i++) {
+
+                switch (uinfo->contents[i]) {
+                case QUNIFORM_CONSTANT:
+                        cl_aligned_u32(&uniforms, uinfo->data[i]);
+                        break;
+                case QUNIFORM_UNIFORM:
+                        cl_aligned_u32(&uniforms,
+                                       gallium_uniforms[uinfo->data[i]]);
+                        break;
+                case QUNIFORM_VIEWPORT_X_SCALE:
+                        cl_aligned_f(&uniforms, vc4->viewport.scale[0] * 16.0f);
+                        break;
+                case QUNIFORM_VIEWPORT_Y_SCALE:
+                        cl_aligned_f(&uniforms, vc4->viewport.scale[1] * 16.0f);
+                        break;
+
+                case QUNIFORM_VIEWPORT_Z_OFFSET:
+                        cl_aligned_f(&uniforms, vc4->viewport.translate[2]);
+                        break;
+                case QUNIFORM_VIEWPORT_Z_SCALE:
+                        cl_aligned_f(&uniforms, vc4->viewport.scale[2]);
+                        break;
+
+                case QUNIFORM_USER_CLIP_PLANE:
+                        cl_aligned_f(&uniforms,
+                                     vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
+                        break;
+
+                case QUNIFORM_TEXTURE_CONFIG_P0:
+                        write_texture_p0(vc4, &uniforms, texstate,
+                                         uinfo->data[i]);
+                        break;
+
+                case QUNIFORM_TEXTURE_CONFIG_P1:
+                        write_texture_p1(vc4, &uniforms, texstate,
+                                         uinfo->data[i]);
+                        break;
+
+                case QUNIFORM_TEXTURE_CONFIG_P2:
+                        write_texture_p2(vc4, &uniforms, texstate,
+                                         uinfo->data[i]);
+                        break;
+
+                case QUNIFORM_UBO_ADDR:
+                        cl_aligned_reloc(vc4, &vc4->uniforms, &uniforms, ubo, 0);
+                        break;
+
+                case QUNIFORM_TEXTURE_BORDER_COLOR:
+                        write_texture_border_color(vc4, &uniforms,
+                                                   texstate, uinfo->data[i]);
+                        break;
+
+                case QUNIFORM_TEXRECT_SCALE_X:
+                case QUNIFORM_TEXRECT_SCALE_Y:
+                        cl_aligned_u32(&uniforms,
+                                       get_texrect_scale(texstate,
+                                                         uinfo->contents[i],
+                                                         uinfo->data[i]));
+                        break;
+
+                case QUNIFORM_BLEND_CONST_COLOR_X:
+                case QUNIFORM_BLEND_CONST_COLOR_Y:
+                case QUNIFORM_BLEND_CONST_COLOR_Z:
+                case QUNIFORM_BLEND_CONST_COLOR_W:
+                        cl_aligned_f(&uniforms,
+                                     CLAMP(vc4->blend_color.color[uinfo->contents[i] -
+                                                                  QUNIFORM_BLEND_CONST_COLOR_X],
+                                           0, 1));
+                        break;
+
+                case QUNIFORM_STENCIL:
+                        cl_aligned_u32(&uniforms,
+                                       vc4->zsa->stencil_uniforms[uinfo->data[i]] |
+                                       (uinfo->data[i] <= 1 ?
+                                        (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
+                                        0));
+                        break;
+
+                case QUNIFORM_ALPHA_REF:
+                        cl_aligned_f(&uniforms,
+                                     vc4->zsa->base.alpha.ref_value);
+                        break;
+                }
+#if 0
+                uint32_t written_val = *((uint32_t *)uniforms - 1);
+                fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
+                        shader, i, written_val, uif(written_val));
+#endif
+        }
+
+        cl_end(&vc4->uniforms, uniforms);
+
+        vc4_bo_unreference(&ubo);
+}
+
+void
+vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader)
+{
+        uint32_t dirty = 0;
+
+        for (int i = 0; i < shader->uniforms.count; i++) {
+                switch (shader->uniforms.contents[i]) {
+                case QUNIFORM_CONSTANT:
+                        break;
+                case QUNIFORM_UNIFORM:
+                case QUNIFORM_UBO_ADDR:
+                        dirty |= VC4_DIRTY_CONSTBUF;
+                        break;
+
+                case QUNIFORM_VIEWPORT_X_SCALE:
+                case QUNIFORM_VIEWPORT_Y_SCALE:
+                case QUNIFORM_VIEWPORT_Z_OFFSET:
+                case QUNIFORM_VIEWPORT_Z_SCALE:
+                        dirty |= VC4_DIRTY_VIEWPORT;
+                        break;
+
+                case QUNIFORM_USER_CLIP_PLANE:
+                        dirty |= VC4_DIRTY_CLIP;
+                        break;
+
+                case QUNIFORM_TEXTURE_CONFIG_P0:
+                case QUNIFORM_TEXTURE_CONFIG_P1:
+                case QUNIFORM_TEXTURE_CONFIG_P2:
+                case QUNIFORM_TEXTURE_BORDER_COLOR:
+                case QUNIFORM_TEXRECT_SCALE_X:
+                case QUNIFORM_TEXRECT_SCALE_Y:
+                        dirty |= VC4_DIRTY_TEXSTATE;
+                        break;
+
+                case QUNIFORM_BLEND_CONST_COLOR_X:
+                case QUNIFORM_BLEND_CONST_COLOR_Y:
+                case QUNIFORM_BLEND_CONST_COLOR_Z:
+                case QUNIFORM_BLEND_CONST_COLOR_W:
+                        dirty |= VC4_DIRTY_BLEND_COLOR;
+                        break;
+
+                case QUNIFORM_STENCIL:
+                case QUNIFORM_ALPHA_REF:
+                        dirty |= VC4_DIRTY_ZSA;
+                        break;
+                }
+        }
+
+        shader->uniform_dirty_bits = dirty;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_compiler.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_compiler.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_compiler.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_compiler.h	2015-09-16 14:36:09.000000000 +0000
@@ -94,11 +94,6 @@
 #endif
 #endif
 
-/* XXX: Use standard `inline` keyword instead */
-#ifndef INLINE
-#  define INLINE inline
-#endif
-
 /* Forced function inlining */
 #ifndef ALWAYS_INLINE
 #  ifdef __GNUC__
@@ -106,7 +101,7 @@
 #  elif defined(_MSC_VER)
 #    define ALWAYS_INLINE __forceinline
 #  else
-#    define ALWAYS_INLINE INLINE
+#    define ALWAYS_INLINE inline
 #  endif
 #endif
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_config.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_config.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_config.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_config.h	2015-09-16 14:37:00.000000000 +0000
@@ -100,8 +100,8 @@
 #else
 #define PIPE_ARCH_SSE
 #endif
-#if defined(PIPE_CC_GCC) && !defined(__SSSE3__)
-/* #warning SSE3 support requires -msse3 compiler options */
+#if defined(PIPE_CC_GCC) && (__GNUC__ * 100 + __GNUC_MINOR__) < 409 && !defined(__SSSE3__)
+/* #warning SSE3 support requires -msse3 compiler options before GCC 4.9 */
 #else
 #define PIPE_ARCH_SSSE3
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_context.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -48,6 +48,7 @@
 struct pipe_draw_info;
 struct pipe_fence_handle;
 struct pipe_framebuffer_state;
+struct pipe_image_view;
 struct pipe_index_buffer;
 struct pipe_query;
 struct pipe_poly_stipple;
@@ -57,6 +58,7 @@
 struct pipe_sampler_state;
 struct pipe_sampler_view;
 struct pipe_scissor_state;
+struct pipe_shader_buffer;
 struct pipe_shader_state;
 struct pipe_stencil_ref;
 struct pipe_stream_output_target;
@@ -232,24 +234,42 @@
                              struct pipe_sampler_view **);
 
    void (*set_tess_state)(struct pipe_context *,
-                          float default_outer_level[4],
-                          float default_inner_level[2]);
+                          const float default_outer_level[4],
+                          const float default_inner_level[2]);
 
    /**
-    * Bind an array of shader resources that will be used by the
-    * graphics pipeline.  Any resources that were previously bound to
-    * the specified range will be unbound after this call.
+    * Bind an array of shader buffers that will be used by a shader.
+    * Any buffers that were previously bound to the specified range
+    * will be unbound.
     *
-    * \param start      first resource to bind.
-    * \param count      number of consecutive resources to bind.
-    * \param resources  array of pointers to the resources to bind, it
+    * \param shader     selects shader stage
+    * \param start_slot first buffer slot to bind.
+    * \param count      number of consecutive buffers to bind.
+    * \param buffers    array of pointers to the buffers to bind, it
     *                   should contain at least \a count elements
-    *                   unless it's NULL, in which case no new
-    *                   resources will be bound.
+    *                   unless it's NULL, in which case no buffers will
+    *                   be bound.
     */
-   void (*set_shader_resources)(struct pipe_context *,
-                                unsigned start, unsigned count,
-                                struct pipe_surface **resources);
+   void (*set_shader_buffers)(struct pipe_context *, unsigned shader,
+                              unsigned start_slot, unsigned count,
+                              struct pipe_shader_buffer *buffers);
+
+   /**
+    * Bind an array of images that will be used by a shader.
+    * Any images that were previously bound to the specified range
+    * will be unbound.
+    *
+    * \param shader     selects shader stage
+    * \param start_slot first image slot to bind.
+    * \param count      number of consecutive images to bind.
+    * \param buffers    array of pointers to the images to bind, it
+    *                   should contain at least \a count elements
+    *                   unless it's NULL, in which case no images will
+    *                   be bound.
+    */
+   void (*set_shader_images)(struct pipe_context *, unsigned shader,
+                             unsigned start_slot, unsigned count,
+                             struct pipe_image_view **images);
 
    void (*set_vertex_buffers)( struct pipe_context *,
                                unsigned start_slot,
@@ -361,8 +381,14 @@
                         const void *clear_value,
                         int clear_value_size);
 
-   /** Flush draw commands
+   /**
+    * Flush draw commands
     *
+    * NOTE: use screen->fence_reference() (or equivalent) to transfer
+    * new fence ref to **fence, to ensure that previous fence is unref'd
+    *
+    * \param fence  if not NULL, an old fence to unref and transfer a
+    *    new fence reference to
     * \param flags  bitfield of enum pipe_flush_flags values.
     */
    void (*flush)(struct pipe_context *pipe,
@@ -392,6 +418,17 @@
                            struct pipe_surface *);
 
    /**
+    * Create an image view into a buffer or texture to be used with load,
+    * store, and atomic instructions by a shader stage.
+    */
+   struct pipe_image_view * (*create_image_view)(struct pipe_context *ctx,
+                                                 struct pipe_resource *texture,
+                                                 const struct pipe_image_view *templat);
+
+   void (*image_view_destroy)(struct pipe_context *ctx,
+                              struct pipe_image_view *view);
+
+   /**
     * Map a resource.
     *
     * Transfers are (by default) context-private and allow uploads to be
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_defines.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_defines.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_defines.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_defines.h	2015-09-16 14:36:09.000000000 +0000
@@ -351,9 +351,10 @@
 #define PIPE_BIND_CURSOR               (1 << 11) /* mouse cursor */
 #define PIPE_BIND_CUSTOM               (1 << 12) /* state-tracker/winsys usages */
 #define PIPE_BIND_GLOBAL               (1 << 13) /* set_global_binding */
-#define PIPE_BIND_SHADER_RESOURCE      (1 << 14) /* set_shader_resources */
-#define PIPE_BIND_COMPUTE_RESOURCE     (1 << 15) /* set_compute_resources */
-#define PIPE_BIND_COMMAND_ARGS_BUFFER  (1 << 16) /* pipe_draw_info.indirect */
+#define PIPE_BIND_SHADER_BUFFER        (1 << 14) /* set_shader_buffers */
+#define PIPE_BIND_SHADER_IMAGE         (1 << 15) /* set_shader_images */
+#define PIPE_BIND_COMPUTE_RESOURCE     (1 << 16) /* set_compute_resources */
+#define PIPE_BIND_COMMAND_ARGS_BUFFER  (1 << 17) /* pipe_draw_info.indirect */
 
 /**
  * The first two flags above were previously part of the amorphous
@@ -374,9 +375,9 @@
  * The third flag has been added to be able to force textures to be created
  * in linear mode (no tiling).
  */
-#define PIPE_BIND_SCANOUT     (1 << 17) /*  */
-#define PIPE_BIND_SHARED      (1 << 18) /* get_texture_handle ??? */
-#define PIPE_BIND_LINEAR      (1 << 19)
+#define PIPE_BIND_SCANOUT     (1 << 18) /*  */
+#define PIPE_BIND_SHARED      (1 << 19) /* get_texture_handle ??? */
+#define PIPE_BIND_LINEAR      (1 << 20)
 
 
 /**
@@ -505,106 +506,110 @@
  */
 enum pipe_cap
 {
-   PIPE_CAP_NPOT_TEXTURES = 1,
-   PIPE_CAP_TWO_SIDED_STENCIL = 2,
-   PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS = 4,
-   PIPE_CAP_ANISOTROPIC_FILTER = 5,
-   PIPE_CAP_POINT_SPRITE = 6,
-   PIPE_CAP_MAX_RENDER_TARGETS = 7,
-   PIPE_CAP_OCCLUSION_QUERY = 8,
-   PIPE_CAP_QUERY_TIME_ELAPSED = 9,
-   PIPE_CAP_TEXTURE_SHADOW_MAP = 10,
-   PIPE_CAP_TEXTURE_SWIZZLE = 11,
-   PIPE_CAP_MAX_TEXTURE_2D_LEVELS = 12,
-   PIPE_CAP_MAX_TEXTURE_3D_LEVELS = 13,
-   PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS = 14,
-   PIPE_CAP_TEXTURE_MIRROR_CLAMP = 25,
-   PIPE_CAP_BLEND_EQUATION_SEPARATE = 28,
-   PIPE_CAP_SM3 = 29,  /*< Shader Model, supported */
-   PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS = 30,
-   PIPE_CAP_PRIMITIVE_RESTART = 31,
+   PIPE_CAP_NPOT_TEXTURES,
+   PIPE_CAP_TWO_SIDED_STENCIL,
+   PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS,
+   PIPE_CAP_ANISOTROPIC_FILTER,
+   PIPE_CAP_POINT_SPRITE,
+   PIPE_CAP_MAX_RENDER_TARGETS,
+   PIPE_CAP_OCCLUSION_QUERY,
+   PIPE_CAP_QUERY_TIME_ELAPSED,
+   PIPE_CAP_TEXTURE_SHADOW_MAP,
+   PIPE_CAP_TEXTURE_SWIZZLE,
+   PIPE_CAP_MAX_TEXTURE_2D_LEVELS,
+   PIPE_CAP_MAX_TEXTURE_3D_LEVELS,
+   PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS,
+   PIPE_CAP_TEXTURE_MIRROR_CLAMP,
+   PIPE_CAP_BLEND_EQUATION_SEPARATE,
+   PIPE_CAP_SM3,
+   PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS,
+   PIPE_CAP_PRIMITIVE_RESTART,
    /** blend enables and write masks per rendertarget */
-   PIPE_CAP_INDEP_BLEND_ENABLE = 33,
+   PIPE_CAP_INDEP_BLEND_ENABLE,
    /** different blend funcs per rendertarget */
-   PIPE_CAP_INDEP_BLEND_FUNC = 34,
-   PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS = 36,
-   PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT = 37,
-   PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT = 38,
-   PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER = 39,
-   PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER = 40,
-   PIPE_CAP_DEPTH_CLIP_DISABLE = 41,
-   PIPE_CAP_SHADER_STENCIL_EXPORT = 42,
-   PIPE_CAP_TGSI_INSTANCEID = 43,
-   PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR = 44,
-   PIPE_CAP_FRAGMENT_COLOR_CLAMPED = 45,
-   PIPE_CAP_MIXED_COLORBUFFER_FORMATS = 46,
-   PIPE_CAP_SEAMLESS_CUBE_MAP = 47,
-   PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE = 48,
-   PIPE_CAP_MIN_TEXEL_OFFSET = 50,
-   PIPE_CAP_MAX_TEXEL_OFFSET = 51,
-   PIPE_CAP_CONDITIONAL_RENDER = 52,
-   PIPE_CAP_TEXTURE_BARRIER = 53,
-   PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS = 55,
-   PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS = 56,
-   PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME = 57,
-   PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS = 59, /* temporary */
-   PIPE_CAP_VERTEX_COLOR_UNCLAMPED = 60,
-   PIPE_CAP_VERTEX_COLOR_CLAMPED = 61,
-   PIPE_CAP_GLSL_FEATURE_LEVEL = 62,
-   PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION = 63,
-   PIPE_CAP_USER_VERTEX_BUFFERS = 64,
-   PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY = 65,
-   PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY = 66,
-   PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY = 67,
-   PIPE_CAP_COMPUTE = 68,
-   PIPE_CAP_USER_INDEX_BUFFERS = 69,
-   PIPE_CAP_USER_CONSTANT_BUFFERS = 70,
-   PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT = 71,
-   PIPE_CAP_START_INSTANCE = 72,
-   PIPE_CAP_QUERY_TIMESTAMP = 73,
-   PIPE_CAP_TEXTURE_MULTISAMPLE = 74,
-   PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT = 75,
-   PIPE_CAP_CUBE_MAP_ARRAY = 76,
-   PIPE_CAP_TEXTURE_BUFFER_OBJECTS = 77,
-   PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT = 78,
-   PIPE_CAP_TGSI_TEXCOORD = 79,
-   PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER = 80,
-   PIPE_CAP_QUERY_PIPELINE_STATISTICS = 81,
-   PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK = 82,
-   PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE = 83,
-   PIPE_CAP_MAX_VIEWPORTS = 84,
-   PIPE_CAP_ENDIANNESS = 85,
-   PIPE_CAP_MIXED_FRAMEBUFFER_SIZES = 86,
-   PIPE_CAP_TGSI_VS_LAYER_VIEWPORT = 87,
-   PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES = 88,
-   PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS = 89,
-   PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 90,
-   PIPE_CAP_TEXTURE_GATHER_SM5 = 91,
-   PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT = 92,
-   PIPE_CAP_FAKE_SW_MSAA = 93,
-   PIPE_CAP_TEXTURE_QUERY_LOD = 94,
-   PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET = 95,
-   PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET = 96,
-   PIPE_CAP_SAMPLE_SHADING = 97,
-   PIPE_CAP_TEXTURE_GATHER_OFFSETS = 98,
-   PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION = 99,
-   PIPE_CAP_MAX_VERTEX_STREAMS = 100,
-   PIPE_CAP_DRAW_INDIRECT = 101,
-   PIPE_CAP_TGSI_FS_FINE_DERIVATIVE = 102,
-   PIPE_CAP_VENDOR_ID = 103,
-   PIPE_CAP_DEVICE_ID = 104,
-   PIPE_CAP_ACCELERATED = 105,
-   PIPE_CAP_VIDEO_MEMORY = 106,
-   PIPE_CAP_UMA = 107,
-   PIPE_CAP_CONDITIONAL_RENDER_INVERTED = 108,
-   PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE = 109,
-   PIPE_CAP_SAMPLER_VIEW_TARGET = 110,
-   PIPE_CAP_CLIP_HALFZ = 111,
-   PIPE_CAP_VERTEXID_NOBASE = 112,
-   PIPE_CAP_POLYGON_OFFSET_CLAMP = 113,
-   PIPE_CAP_MULTISAMPLE_Z_RESOLVE = 114,
-   PIPE_CAP_RESOURCE_FROM_USER_MEMORY = 115,
-   PIPE_CAP_DEVICE_RESET_STATUS_QUERY = 116,
+   PIPE_CAP_INDEP_BLEND_FUNC,
+   PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS,
+   PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT,
+   PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT,
+   PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER,
+   PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER,
+   PIPE_CAP_DEPTH_CLIP_DISABLE,
+   PIPE_CAP_SHADER_STENCIL_EXPORT,
+   PIPE_CAP_TGSI_INSTANCEID,
+   PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR,
+   PIPE_CAP_FRAGMENT_COLOR_CLAMPED,
+   PIPE_CAP_MIXED_COLORBUFFER_FORMATS,
+   PIPE_CAP_SEAMLESS_CUBE_MAP,
+   PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE,
+   PIPE_CAP_MIN_TEXEL_OFFSET,
+   PIPE_CAP_MAX_TEXEL_OFFSET,
+   PIPE_CAP_CONDITIONAL_RENDER,
+   PIPE_CAP_TEXTURE_BARRIER,
+   PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS,
+   PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS,
+   PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME,
+   PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS,
+   PIPE_CAP_VERTEX_COLOR_UNCLAMPED,
+   PIPE_CAP_VERTEX_COLOR_CLAMPED,
+   PIPE_CAP_GLSL_FEATURE_LEVEL,
+   PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION,
+   PIPE_CAP_USER_VERTEX_BUFFERS,
+   PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY,
+   PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY,
+   PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY,
+   PIPE_CAP_COMPUTE,
+   PIPE_CAP_USER_INDEX_BUFFERS,
+   PIPE_CAP_USER_CONSTANT_BUFFERS,
+   PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT,
+   PIPE_CAP_START_INSTANCE,
+   PIPE_CAP_QUERY_TIMESTAMP,
+   PIPE_CAP_TEXTURE_MULTISAMPLE,
+   PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT,
+   PIPE_CAP_CUBE_MAP_ARRAY,
+   PIPE_CAP_TEXTURE_BUFFER_OBJECTS,
+   PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT,
+   PIPE_CAP_TGSI_TEXCOORD,
+   PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER,
+   PIPE_CAP_QUERY_PIPELINE_STATISTICS,
+   PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK,
+   PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE,
+   PIPE_CAP_MAX_VIEWPORTS,
+   PIPE_CAP_ENDIANNESS,
+   PIPE_CAP_MIXED_FRAMEBUFFER_SIZES,
+   PIPE_CAP_TGSI_VS_LAYER_VIEWPORT,
+   PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES,
+   PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS,
+   PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS,
+   PIPE_CAP_TEXTURE_GATHER_SM5,
+   PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT,
+   PIPE_CAP_FAKE_SW_MSAA,
+   PIPE_CAP_TEXTURE_QUERY_LOD,
+   PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET,
+   PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET,
+   PIPE_CAP_SAMPLE_SHADING,
+   PIPE_CAP_TEXTURE_GATHER_OFFSETS,
+   PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION,
+   PIPE_CAP_MAX_VERTEX_STREAMS,
+   PIPE_CAP_DRAW_INDIRECT,
+   PIPE_CAP_TGSI_FS_FINE_DERIVATIVE,
+   PIPE_CAP_VENDOR_ID,
+   PIPE_CAP_DEVICE_ID,
+   PIPE_CAP_ACCELERATED,
+   PIPE_CAP_VIDEO_MEMORY,
+   PIPE_CAP_UMA,
+   PIPE_CAP_CONDITIONAL_RENDER_INVERTED,
+   PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE,
+   PIPE_CAP_SAMPLER_VIEW_TARGET,
+   PIPE_CAP_CLIP_HALFZ,
+   PIPE_CAP_VERTEXID_NOBASE,
+   PIPE_CAP_POLYGON_OFFSET_CLAMP,
+   PIPE_CAP_MULTISAMPLE_Z_RESOLVE,
+   PIPE_CAP_RESOURCE_FROM_USER_MEMORY,
+   PIPE_CAP_DEVICE_RESET_STATUS_QUERY,
+   PIPE_CAP_MAX_SHADER_PATCH_VARYINGS,
+   PIPE_CAP_TEXTURE_FLOAT_LINEAR,
+   PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR,
+   PIPE_CAP_DEPTH_BOUNDS_TEST,
 };
 
 #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
@@ -669,6 +674,7 @@
    PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */
    PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED,
    PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED,
+   PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE
 };
 
 /**
@@ -699,7 +705,8 @@
    PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
    PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY,
    PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS,
-   PIPE_COMPUTE_CAP_IMAGES_SUPPORTED
+   PIPE_COMPUTE_CAP_IMAGES_SUPPORTED,
+   PIPE_COMPUTE_CAP_SUBGROUP_SIZE
 };
 
 /**
@@ -758,6 +765,7 @@
    /* PIPE_QUERY_PRIMITIVES_GENERATED */
    /* PIPE_QUERY_PRIMITIVES_EMITTED */
    /* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_HZ */
    uint64_t u64;
 
    /* PIPE_DRIVER_QUERY_TYPE_UINT */
@@ -786,11 +794,13 @@
 
 enum pipe_driver_query_type
 {
-   PIPE_DRIVER_QUERY_TYPE_UINT64     = 0,
-   PIPE_DRIVER_QUERY_TYPE_UINT       = 1,
-   PIPE_DRIVER_QUERY_TYPE_FLOAT      = 2,
-   PIPE_DRIVER_QUERY_TYPE_PERCENTAGE = 3,
-   PIPE_DRIVER_QUERY_TYPE_BYTES      = 4,
+   PIPE_DRIVER_QUERY_TYPE_UINT64       = 0,
+   PIPE_DRIVER_QUERY_TYPE_UINT         = 1,
+   PIPE_DRIVER_QUERY_TYPE_FLOAT        = 2,
+   PIPE_DRIVER_QUERY_TYPE_PERCENTAGE   = 3,
+   PIPE_DRIVER_QUERY_TYPE_BYTES        = 4,
+   PIPE_DRIVER_QUERY_TYPE_MICROSECONDS = 5,
+   PIPE_DRIVER_QUERY_TYPE_HZ           = 6,
 };
 
 enum pipe_driver_query_group_type
@@ -799,6 +809,15 @@
    PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1,
 };
 
+/* Whether an average value per frame or a cumulative value should be
+ * displayed.
+ */
+enum pipe_driver_query_result_type
+{
+   PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE = 0,
+   PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1,
+};
+
 union pipe_numeric_type_union
 {
    uint64_t u64;
@@ -812,6 +831,7 @@
    unsigned query_type; /* PIPE_QUERY_DRIVER_SPECIFIC + i */
    union pipe_numeric_type_union max_value; /* max value that can be returned */
    enum pipe_driver_query_type type;
+   enum pipe_driver_query_result_type result_type;
    unsigned group_id;
 };
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_screen.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -212,12 +212,6 @@
                             struct pipe_fence_handle *fence );
 
    /**
-    * Checks whether the fence has been signalled.
-    */
-   boolean (*fence_signalled)( struct pipe_screen *screen,
-                               struct pipe_fence_handle *fence );
-
-   /**
     * Wait for the fence to finish.
     * \param timeout  in nanoseconds (may be PIPE_TIMEOUT_INFINITE).
     */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_shader_tokens.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_shader_tokens.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_shader_tokens.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_shader_tokens.h	2015-09-16 14:36:09.000000000 +0000
@@ -43,8 +43,8 @@
 #define TGSI_PROCESSOR_FRAGMENT  0
 #define TGSI_PROCESSOR_VERTEX    1
 #define TGSI_PROCESSOR_GEOMETRY  2
-#define TGSI_PROCESSOR_TESSCTRL  3
-#define TGSI_PROCESSOR_TESSEVAL  4
+#define TGSI_PROCESSOR_TESS_CTRL 3
+#define TGSI_PROCESSOR_TESS_EVAL 4
 #define TGSI_PROCESSOR_COMPUTE   5
 
 struct tgsi_processor
@@ -538,10 +538,6 @@
 #define TGSI_OPCODE_DSSG                222
 #define TGSI_OPCODE_LAST                223
 
-#define TGSI_SAT_NONE            0  /* do not saturate */
-#define TGSI_SAT_ZERO_ONE        1  /* clamp to [0,1] */
-#define TGSI_SAT_MINUS_PLUS_ONE  2  /* clamp to [-1,1] */
-
 /**
  * Opcode is the operation code to execute. A given operation defines the
  * semantics how the source registers (if any) are interpreted and what is
@@ -561,13 +557,13 @@
    unsigned Type       : 4;  /* TGSI_TOKEN_TYPE_INSTRUCTION */
    unsigned NrTokens   : 8;  /* UINT */
    unsigned Opcode     : 8;  /* TGSI_OPCODE_ */
-   unsigned Saturate   : 2;  /* TGSI_SAT_ */
+   unsigned Saturate   : 1;  /* BOOL */
    unsigned NumDstRegs : 2;  /* UINT */
    unsigned NumSrcRegs : 4;  /* UINT */
    unsigned Predicate  : 1;  /* BOOL */
    unsigned Label      : 1;
    unsigned Texture    : 1;
-   unsigned Padding    : 1;
+   unsigned Padding    : 2;
 };
 
 /*
@@ -689,7 +685,7 @@
  *
  * File, Index and Swizzle are handled the same as in tgsi_src_register.
  *
- * If ArrayID is zero the whole register file might be is indirectly addressed,
+ * If ArrayID is zero the whole register file might be indirectly addressed,
  * if not only the Declaration with this ArrayID is accessed by this operand.
  *
  */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_state.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_state.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_state.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_state.h	2015-09-16 14:36:09.000000000 +0000
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2007 VMware, Inc.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,13 +22,13 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 
 /**
  * @file
- * 
+ *
  * Abstract graphics pipe state objects.
  *
  * Basic notes:
@@ -61,7 +61,8 @@
 #define PIPE_MAX_SHADER_INPUTS    80 /* 32 GENERIC + 32 PATCH + 16 others */
 #define PIPE_MAX_SHADER_OUTPUTS   80 /* 32 GENERIC + 32 PATCH + 16 others */
 #define PIPE_MAX_SHADER_SAMPLER_VIEWS 32
-#define PIPE_MAX_SHADER_RESOURCES 32
+#define PIPE_MAX_SHADER_BUFFERS   32
+#define PIPE_MAX_SHADER_IMAGES    32
 #define PIPE_MAX_TEXTURE_LEVELS   16
 #define PIPE_MAX_SO_BUFFERS        4
 #define PIPE_MAX_SO_OUTPUTS       64
@@ -217,11 +218,14 @@
 };
 
 
-struct pipe_depth_state 
+struct pipe_depth_state
 {
    unsigned enabled:1;         /**< depth test enabled? */
    unsigned writemask:1;       /**< allow depth buffer writes? */
    unsigned func:3;            /**< depth test func (PIPE_FUNC_x) */
+   unsigned bounds_test:1;     /**< depth bounds test enabled? */
+   float bounds_min;           /**< minimum depth bound */
+   float bounds_max;           /**< maximum depth bound */
 };
 
 
@@ -268,6 +272,7 @@
    unsigned colormask:4;         /**< bitmask of PIPE_MASK_R/G/B/A */
 };
 
+
 struct pipe_blend_state
 {
    unsigned independent_blend_enable:1;
@@ -285,11 +290,13 @@
    float color[4];
 };
 
+
 struct pipe_stencil_ref
 {
    ubyte ref_value[2];
 };
 
+
 struct pipe_framebuffer_state
 {
    unsigned width, height;
@@ -367,10 +374,10 @@
    struct pipe_context *context; /**< context this view belongs to */
    union {
       struct {
-         unsigned first_layer:16;     /**< first layer to use for array textures */
-         unsigned last_layer:16;      /**< last layer to use for array textures */
-         unsigned first_level:8;      /**< first mipmap level to use */
-         unsigned last_level:8;       /**< last mipmap level to use */
+         unsigned first_layer:16;  /**< first layer to use for array textures */
+         unsigned last_layer:16;   /**< last layer to use for array textures */
+         unsigned first_level:8;   /**< first mipmap level to use */
+         unsigned last_level:8;    /**< last mipmap level to use */
       } tex;
       struct {
          unsigned first_element;
@@ -385,6 +392,31 @@
 
 
 /**
+ * A view into a writable buffer or texture that can be bound to a shader
+ * stage.
+ */
+struct pipe_image_view
+{
+   struct pipe_reference reference;
+   struct pipe_resource *resource; /**< resource into which this is a view  */
+   struct pipe_context *context; /**< context this view belongs to */
+   enum pipe_format format;      /**< typed PIPE_FORMAT_x */
+
+   union {
+      struct {
+         unsigned first_layer:16;     /**< first layer to use for array textures */
+         unsigned last_layer:16;      /**< last layer to use for array textures */
+         unsigned level:8;            /**< mipmap level to use */
+      } tex;
+      struct {
+         unsigned first_element;
+         unsigned last_element;
+      } buf;
+   } u;
+};
+
+
+/**
  * Subregion of 1D/2D/3D image resource.
  */
 struct pipe_box
@@ -455,7 +487,8 @@
  * A constant buffer.  A subrange of an existing buffer can be set
  * as a constant buffer.
  */
-struct pipe_constant_buffer {
+struct pipe_constant_buffer
+{
    struct pipe_resource *buffer; /**< the actual buffer */
    unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */
    unsigned buffer_size;   /**< how much data can be read in shader */
@@ -464,6 +497,16 @@
 
 
 /**
+ * An untyped shader buffer supporting loads, stores, and atomics.
+ */
+struct pipe_shader_buffer {
+   struct pipe_resource *buffer; /**< the actual buffer */
+   unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */
+   unsigned buffer_size;   /**< how much data can be read in shader */
+};
+
+
+/**
  * A stream output target. The structure specifies the range vertices can
  * be written to.
  *
@@ -474,8 +517,8 @@
  * and the CPU actually doesn't have to query it.
  *
  * Note that the buffer_size variable is actually specifying the available
- * space in the buffer, not the size of the attached buffer. 
- * In other words in majority of cases buffer_size would simply be 
+ * space in the buffer, not the size of the attached buffer.
+ * In other words in majority of cases buffer_size would simply be
  * 'buffer->width0 - buffer_offset', so buffer_size refers to the size
  * of the buffer left, after accounting for buffer offset, for stream output
  * to write to.
@@ -511,7 +554,7 @@
     * this attribute live in?
     */
    unsigned vertex_buffer_index;
- 
+
    enum pipe_format src_format;
 };
 
@@ -620,6 +663,7 @@
 
    boolean render_condition_enable; /**< whether the blit should honor the
                                     current render condition */
+   boolean alpha_blend; /* dst.rgb = src.rgb * src.a + dst.rgb * (1 - src.a) */
 };
 
 
@@ -642,5 +686,5 @@
 #ifdef __cplusplus
 }
 #endif
-   
+
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_video_enums.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_video_enums.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_video_enums.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_video_enums.h	2015-09-16 14:36:09.000000000 +0000
@@ -34,7 +34,8 @@
    PIPE_VIDEO_FORMAT_MPEG12,   /**< MPEG1, MPEG2 */
    PIPE_VIDEO_FORMAT_MPEG4,    /**< DIVX, XVID */
    PIPE_VIDEO_FORMAT_VC1,      /**< WMV */
-   PIPE_VIDEO_FORMAT_MPEG4_AVC /**< H.264 */
+   PIPE_VIDEO_FORMAT_MPEG4_AVC,/**< H.264 */
+   PIPE_VIDEO_FORMAT_HEVC      /**< H.265 */
 };
 
 enum pipe_video_profile
@@ -54,7 +55,12 @@
    PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH,
    PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10,
    PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH422,
-   PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH444
+   PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH444,
+   PIPE_VIDEO_PROFILE_HEVC_MAIN,
+   PIPE_VIDEO_PROFILE_HEVC_MAIN_10,
+   PIPE_VIDEO_PROFILE_HEVC_MAIN_STILL,
+   PIPE_VIDEO_PROFILE_HEVC_MAIN_12,
+   PIPE_VIDEO_PROFILE_HEVC_MAIN_444
 };
 
 /* Video caps, can be different for each codec/profile */
@@ -68,7 +74,8 @@
    PIPE_VIDEO_CAP_PREFERS_INTERLACED = 5,
    PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE = 6,
    PIPE_VIDEO_CAP_SUPPORTS_INTERLACED = 7,
-   PIPE_VIDEO_CAP_MAX_LEVEL = 8
+   PIPE_VIDEO_CAP_MAX_LEVEL = 8,
+   PIPE_VIDEO_CAP_STACKED_FRAMES = 9
 };
 
 enum pipe_video_entrypoint
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_video_state.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_video_state.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/pipe/p_video_state.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/pipe/p_video_state.h	2015-09-16 14:36:09.000000000 +0000
@@ -376,6 +376,111 @@
    bool not_referenced;
 };
 
+struct pipe_h265_sps
+{
+   uint8_t chroma_format_idc;
+   uint8_t separate_colour_plane_flag;
+   uint32_t pic_width_in_luma_samples;
+   uint32_t pic_height_in_luma_samples;
+   uint8_t bit_depth_luma_minus8;
+   uint8_t bit_depth_chroma_minus8;
+   uint8_t log2_max_pic_order_cnt_lsb_minus4;
+   uint8_t sps_max_dec_pic_buffering_minus1;
+   uint8_t log2_min_luma_coding_block_size_minus3;
+   uint8_t log2_diff_max_min_luma_coding_block_size;
+   uint8_t log2_min_transform_block_size_minus2;
+   uint8_t log2_diff_max_min_transform_block_size;
+   uint8_t max_transform_hierarchy_depth_inter;
+   uint8_t max_transform_hierarchy_depth_intra;
+   uint8_t scaling_list_enabled_flag;
+   uint8_t ScalingList4x4[6][16];
+   uint8_t ScalingList8x8[6][64];
+   uint8_t ScalingList16x16[6][64];
+   uint8_t ScalingList32x32[2][64];
+   uint8_t ScalingListDCCoeff16x16[6];
+   uint8_t ScalingListDCCoeff32x32[2];
+   uint8_t amp_enabled_flag;
+   uint8_t sample_adaptive_offset_enabled_flag;
+   uint8_t pcm_enabled_flag;
+   uint8_t pcm_sample_bit_depth_luma_minus1;
+   uint8_t pcm_sample_bit_depth_chroma_minus1;
+   uint8_t log2_min_pcm_luma_coding_block_size_minus3;
+   uint8_t log2_diff_max_min_pcm_luma_coding_block_size;
+   uint8_t pcm_loop_filter_disabled_flag;
+   uint8_t num_short_term_ref_pic_sets;
+   uint8_t long_term_ref_pics_present_flag;
+   uint8_t num_long_term_ref_pics_sps;
+   uint8_t sps_temporal_mvp_enabled_flag;
+   uint8_t strong_intra_smoothing_enabled_flag;
+};
+
+struct pipe_h265_pps
+{
+   struct pipe_h265_sps *sps;
+
+   uint8_t dependent_slice_segments_enabled_flag;
+   uint8_t output_flag_present_flag;
+   uint8_t num_extra_slice_header_bits;
+   uint8_t sign_data_hiding_enabled_flag;
+   uint8_t cabac_init_present_flag;
+   uint8_t num_ref_idx_l0_default_active_minus1;
+   uint8_t num_ref_idx_l1_default_active_minus1;
+   int8_t init_qp_minus26;
+   uint8_t constrained_intra_pred_flag;
+   uint8_t transform_skip_enabled_flag;
+   uint8_t cu_qp_delta_enabled_flag;
+   uint8_t diff_cu_qp_delta_depth;
+   int8_t pps_cb_qp_offset;
+   int8_t pps_cr_qp_offset;
+   uint8_t pps_slice_chroma_qp_offsets_present_flag;
+   uint8_t weighted_pred_flag;
+   uint8_t weighted_bipred_flag;
+   uint8_t transquant_bypass_enabled_flag;
+   uint8_t tiles_enabled_flag;
+   uint8_t entropy_coding_sync_enabled_flag;
+   uint8_t num_tile_columns_minus1;
+   uint8_t num_tile_rows_minus1;
+   uint8_t uniform_spacing_flag;
+   uint16_t column_width_minus1[20];
+   uint16_t row_height_minus1[22];
+   uint8_t loop_filter_across_tiles_enabled_flag;
+   uint8_t pps_loop_filter_across_slices_enabled_flag;
+   uint8_t deblocking_filter_control_present_flag;
+   uint8_t deblocking_filter_override_enabled_flag;
+   uint8_t pps_deblocking_filter_disabled_flag;
+   int8_t pps_beta_offset_div2;
+   int8_t pps_tc_offset_div2;
+   uint8_t lists_modification_present_flag;
+   uint8_t log2_parallel_merge_level_minus2;
+   uint8_t slice_segment_header_extension_present_flag;
+};
+
+struct pipe_h265_picture_desc
+{
+   struct pipe_picture_desc base;
+
+   struct pipe_h265_pps *pps;
+
+   uint8_t IDRPicFlag;
+   uint8_t RAPPicFlag;
+   uint8_t CurrRpsIdx;
+   uint32_t NumPocTotalCurr;
+   uint32_t NumDeltaPocsOfRefRpsIdx;
+   uint32_t NumShortTermPictureSliceHeaderBits;
+   uint32_t NumLongTermPictureSliceHeaderBits;
+
+   int32_t CurrPicOrderCntVal;
+   struct pipe_video_buffer *ref[16];
+   int32_t PicOrderCntVal[16];
+   uint8_t IsLongTerm[16];
+   uint8_t NumPocStCurrBefore;
+   uint8_t NumPocStCurrAfter;
+   uint8_t NumPocLtCurr;
+   uint8_t RefPicSetStCurrBefore[8];
+   uint8_t RefPicSetStCurrAfter[8];
+   uint8_t RefPicSetLtCurr[8];
+};
+
 #ifdef __cplusplus
 }
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/state_tracker/st_api.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/state_tracker/st_api.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/include/state_tracker/st_api.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/include/state_tracker/st_api.h	2015-09-16 14:36:09.000000000 +0000
@@ -533,7 +533,7 @@
 /**
  * Return true if the visual has the specified buffers.
  */
-static INLINE boolean
+static inline boolean
 st_visual_have_buffers(const struct st_visual *visual, unsigned mask)
 {
    return ((visual->buffer_mask & mask) == mask);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/Makefile.am	2015-09-16 14:36:08.000000000 +0000
@@ -58,6 +58,7 @@
 ## radeonsi
 if HAVE_GALLIUM_RADEONSI
 SUBDIRS += drivers/radeonsi
+SUBDIRS += winsys/amdgpu/drm
 endif
 
 ## the radeon winsys - linked in by r300, r600 and radeonsi
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/README.portability mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/README.portability
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/README.portability	2012-01-02 08:23:27.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/README.portability	2015-09-16 14:36:08.000000000 +0000
@@ -13,8 +13,6 @@
 
 * Include the p_compiler.h.
 
-* Don't use the 'inline' keyword, use the INLINE macro in p_compiler.h instead.
-
 * Cast explicitly when converting to integer types of smaller sizes.
 
 * Cast explicitly when converting between float, double and integral types.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/SConscript	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/SConscript	2015-09-16 14:36:08.000000000 +0000
@@ -46,7 +46,6 @@
 if env['dri']:
     SConscript([
         'winsys/sw/dri/SConscript',
-        'winsys/sw/kms-dri/SConscript',
         'winsys/svga/drm/SConscript',
     ])
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/dispatch.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/dispatch.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/dispatch.cpp	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/dispatch.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -123,12 +123,12 @@
       clCreateImage,
       clCreateProgramWithBuiltInKernels,
       clCompileProgram,
-      NULL, // clLinkProgram
+      clLinkProgram,
       clUnloadPlatformCompiler,
-      NULL, // clGetKernelArgInfo
-      NULL, // clEnqueueFillBuffer
-      NULL, // clEnqueueFillImage
-      NULL, // clEnqueueMigrateMemObjects
+      clGetKernelArgInfo,
+      clEnqueueFillBuffer,
+      clEnqueueFillImage,
+      clEnqueueMigrateMemObjects,
       clEnqueueMarkerWithWaitList,
       clEnqueueBarrierWithWaitList,
       NULL, // clGetExtensionFunctionAddressForPlatform
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/dispatch.hpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/dispatch.hpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/dispatch.hpp	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/dispatch.hpp	2015-09-16 14:36:09.000000000 +0000
@@ -693,7 +693,13 @@
    CL_API_ENTRY cl_int (CL_API_CALL *clUnloadPlatformCompiler)(
       cl_platform_id platform);
 
-   void *clGetKernelArgInfo;
+   CL_API_ENTRY cl_int (CL_API_CALL *clGetKernelArgInfo)(
+      cl_kernel kernel,
+      cl_uint arg_indx,
+      cl_kernel_arg_info  param_name,
+      size_t param_value_size,
+      void *param_value,
+      size_t *param_value_size_ret);
 
    CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueFillBuffer)(
       cl_command_queue command_queue,
@@ -701,7 +707,7 @@
       const void *pattern,
       size_t pattern_size,
       size_t offset,
-      size_t cb,
+      size_t size,
       cl_uint num_events_in_wait_list,
       const cl_event *event_wait_list,
       cl_event *event);
@@ -710,13 +716,20 @@
       cl_command_queue command_queue,
       cl_mem image,
       const void *fill_color,
-      const size_t origin[3],
-      const size_t region[3],
+      const size_t *origin,
+      const size_t *region,
       cl_uint num_events_in_wait_list,
       const cl_event *event_wait_list,
       cl_event *event);
 
-   void *clEnqueueMigrateMemObjects;
+   CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueMigrateMemObjects)(
+      cl_command_queue command_queue,
+      cl_uint num_mem_objects,
+      const cl_mem *mem_objects,
+      cl_mem_migration_flags flags,
+      cl_uint num_events_in_wait_list,
+      const cl_event *event_wait_list,
+      cl_event *event);
 
    CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueMarkerWithWaitList)(
       cl_command_queue command_queue,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/kernel.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/kernel.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/kernel.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/kernel.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -169,7 +169,7 @@
       break;
 
    case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
-      buf.as_scalar<size_t>() = 1;
+      buf.as_scalar<size_t>() = dev.subgroup_size();
       break;
 
    case CL_KERNEL_PRIVATE_MEM_SIZE:
@@ -189,6 +189,14 @@
    return CL_INVALID_DEVICE;
 }
 
+CLOVER_API cl_int
+clGetKernelArgInfo(cl_kernel d_kern,
+                   cl_uint idx, cl_kernel_arg_info param,
+                   size_t size, void *r_buf, size_t *r_size) {
+   CLOVER_NOT_SUPPORTED_UNTIL("1.2");
+   return CL_KERNEL_ARG_INFO_NOT_AVAILABLE;
+}
+
 namespace {
    ///
    /// Common argument checking shared by kernel invocation commands.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/memory.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/memory.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/memory.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/memory.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -357,9 +357,29 @@
               const cl_image_format *format,
               const cl_image_desc *image_desc,
               void *host_ptr, cl_int *r_errcode) {
-   // This function was added in OpenCL 1.2
-   std::cerr << "CL user error: clCreateImage() not supported by OpenCL 1.1." <<
-                std::endl;
+   CLOVER_NOT_SUPPORTED_UNTIL("1.2");
    ret_error(r_errcode, CL_INVALID_OPERATION);
    return NULL;
 }
+
+CLOVER_API cl_int
+clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer,
+                    const void *pattern, size_t pattern_size,
+                    size_t offset, size_t size,
+                    cl_uint num_events_in_wait_list,
+                    const cl_event *event_wait_list,
+                    cl_event *event) {
+   CLOVER_NOT_SUPPORTED_UNTIL("1.2");
+   return CL_INVALID_VALUE;
+}
+
+CLOVER_API cl_int
+clEnqueueFillImage(cl_command_queue command_queue, cl_mem image,
+                   const void *fill_color,
+                   const size_t *origin, const size_t *region,
+                   cl_uint num_events_in_wait_list,
+                   const cl_event *event_wait_list,
+                   cl_event *event) {
+   CLOVER_NOT_SUPPORTED_UNTIL("1.2");
+   return CL_INVALID_VALUE;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/program.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/program.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/program.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/program.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -231,6 +231,16 @@
    return e.get();
 }
 
+CLOVER_API cl_program
+clLinkProgram(cl_context d_ctx, cl_uint num_devs, const cl_device_id *d_devs,
+              const char *p_opts, cl_uint num_progs, const cl_program *d_progs,
+              void (*pfn_notify)(cl_program, void *), void *user_data,
+              cl_int *r_errcode) {
+   CLOVER_NOT_SUPPORTED_UNTIL("1.2");
+   ret_error(r_errcode, CL_LINKER_NOT_AVAILABLE);
+   return NULL;
+}
+
 CLOVER_API cl_int
 clUnloadCompiler() {
    return CL_SUCCESS;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/transfer.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/transfer.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/transfer.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/transfer.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -726,3 +726,15 @@
 } catch (error &e) {
    return e.get();
 }
+
+CLOVER_API cl_int
+clEnqueueMigrateMemObjects(cl_command_queue command_queue,
+                           cl_uint num_mem_objects,
+                           const cl_mem *mem_objects,
+                           cl_mem_migration_flags flags,
+                           cl_uint num_events_in_wait_list,
+                           const cl_event *event_wait_list,
+                           cl_event *event) {
+   CLOVER_NOT_SUPPORTED_UNTIL("1.2");
+   return CL_INVALID_VALUE;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/util.hpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/util.hpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/api/util.hpp	2014-07-15 16:33:01.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/api/util.hpp	2015-09-16 14:36:09.000000000 +0000
@@ -38,6 +38,13 @@
 #define CLOVER_ICD_API PUBLIC
 #endif
 
+#define CLOVER_NOT_SUPPORTED_UNTIL(version)                    \
+   do {                                                        \
+      std::cerr << "CL user error: " << __func__               \
+                << "() requires OpenCL version " << (version)  \
+                << " or greater." << std::endl;                \
+   } while (0)
+
 namespace clover {
    ///
    /// Return an error code in \a p if non-zero.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/compiler.hpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/compiler.hpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/compiler.hpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/compiler.hpp	2015-09-16 14:36:09.000000000 +0000
@@ -37,7 +37,8 @@
                                const std::string &opts,
                                std::string &r_log);
 
-   module compile_program_tgsi(const std::string &source);
+   module compile_program_tgsi(const std::string &source,
+                               std::string &r_log);
 }
 
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/device.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/device.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/device.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/device.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -89,12 +89,12 @@
 
 size_t
 device::max_images_read() const {
-   return PIPE_MAX_SHADER_RESOURCES;
+   return PIPE_MAX_SHADER_IMAGES;
 }
 
 size_t
 device::max_images_write() const {
-   return PIPE_MAX_SHADER_RESOURCES;
+   return PIPE_MAX_SHADER_IMAGES;
 }
 
 cl_uint
@@ -185,6 +185,11 @@
    return { v.begin(), v.end() };
 }
 
+cl_uint
+device::subgroup_size() const {
+   return get_compute_param<uint32_t>(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
+}
+
 std::string
 device::device_name() const {
    return pipe->get_name(pipe);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/device.hpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/device.hpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/device.hpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/device.hpp	2015-09-16 14:36:09.000000000 +0000
@@ -67,6 +67,7 @@
       bool has_doubles() const;
 
       std::vector<size_t> max_block_size() const;
+      cl_uint subgroup_size() const;
       std::string device_name() const;
       std::string vendor_name() const;
       enum pipe_shader_ir ir_format() const;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/error.hpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/error.hpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/error.hpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/error.hpp	2015-09-16 14:36:09.000000000 +0000
@@ -65,9 +65,9 @@
       cl_int code;
    };
 
-   class build_error : public error {
+   class compile_error : public error {
    public:
-      build_error(const std::string &what = "") :
+      compile_error(const std::string &what = "") :
          error(CL_COMPILE_PROGRAM_FAILURE, what) {
       }
    };
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/event.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/event.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/event.cpp	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/event.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -141,7 +141,7 @@
    else if (!_fence)
       return CL_QUEUED;
 
-   else if (!screen->fence_signalled(screen, _fence))
+   else if (!screen->fence_finish(screen, _fence, 0))
       return CL_SUBMITTED;
 
    else
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/kernel.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/kernel.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/kernel.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/kernel.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -182,6 +182,34 @@
          }
          break;
       }
+      case module::argument::image_size: {
+         auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get();
+         std::vector<cl_uint> image_size{
+               static_cast<cl_uint>(img->width()),
+               static_cast<cl_uint>(img->height()),
+               static_cast<cl_uint>(img->depth())};
+         for (auto x : image_size) {
+            auto arg = argument::create(marg);
+
+            arg->set(sizeof(x), &x);
+            arg->bind(*this, marg);
+         }
+         break;
+      }
+      case module::argument::image_format: {
+         auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get();
+         cl_image_format fmt = img->format();
+         std::vector<cl_uint> image_format{
+               static_cast<cl_uint>(fmt.image_channel_data_type),
+               static_cast<cl_uint>(fmt.image_channel_order)};
+         for (auto x : image_format) {
+            auto arg = argument::create(marg);
+
+            arg->set(sizeof(x), &x);
+            arg->bind(*this, marg);
+         }
+         break;
+      }
       }
    }
 
@@ -339,6 +367,9 @@
 
 void
 kernel::scalar_argument::set(size_t size, const void *value) {
+   if (!value)
+      throw error(CL_INVALID_ARG_VALUE);
+
    if (size != this->size)
       throw error(CL_INVALID_ARG_SIZE);
 
@@ -407,6 +438,9 @@
    if (value)
       throw error(CL_INVALID_ARG_VALUE);
 
+   if (!size)
+      throw error(CL_INVALID_ARG_SIZE);
+
    _storage = size;
    _set = true;
 }
@@ -466,6 +500,9 @@
 
 void
 kernel::image_rd_argument::set(size_t size, const void *value) {
+   if (!value)
+      throw error(CL_INVALID_ARG_VALUE);
+
    if (size != sizeof(cl_mem))
       throw error(CL_INVALID_ARG_SIZE);
 
@@ -494,6 +531,9 @@
 
 void
 kernel::image_wr_argument::set(size_t size, const void *value) {
+   if (!value)
+      throw error(CL_INVALID_ARG_VALUE);
+
    if (size != sizeof(cl_mem))
       throw error(CL_INVALID_ARG_SIZE);
 
@@ -522,6 +562,9 @@
 
 void
 kernel::sampler_argument::set(size_t size, const void *value) {
+   if (!value)
+      throw error(CL_INVALID_SAMPLER);
+
    if (size != sizeof(cl_sampler))
       throw error(CL_INVALID_ARG_SIZE);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/kernel.hpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/kernel.hpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/kernel.hpp	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/kernel.hpp	2015-09-16 14:36:09.000000000 +0000
@@ -190,7 +190,16 @@
          pipe_surface *st;
       };
 
-      class image_rd_argument : public argument {
+      class image_argument : public argument {
+      public:
+         const image *get() const {
+            return img;
+         }
+      protected:
+         image *img;
+      };
+
+      class image_rd_argument : public image_argument {
       public:
          virtual void set(size_t size, const void *value);
          virtual void bind(exec_context &ctx,
@@ -198,11 +207,10 @@
          virtual void unbind(exec_context &ctx);
 
       private:
-         image *img;
          pipe_sampler_view *st;
       };
 
-      class image_wr_argument : public argument {
+      class image_wr_argument : public image_argument {
       public:
          virtual void set(size_t size, const void *value);
          virtual void bind(exec_context &ctx,
@@ -210,7 +218,6 @@
          virtual void unbind(exec_context &ctx);
 
       private:
-         image *img;
          pipe_surface *st;
       };
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/memory.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/memory.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/memory.cpp	2014-11-01 15:59:41.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/memory.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -30,7 +30,7 @@
                        size_t size, void *host_ptr) :
    context(ctx), _flags(flags),
    _size(size), _host_ptr(host_ptr) {
-   if (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR))
+   if (flags & CL_MEM_COPY_HOST_PTR)
       data.append((char *)host_ptr, size);
 }
 
@@ -189,7 +189,7 @@
                  const cl_image_format *format, size_t width,
                  size_t height, size_t row_pitch,
                  void *host_ptr) :
-   image(ctx, flags, format, width, height, 0,
+   image(ctx, flags, format, width, height, 1,
          row_pitch, 0, height * row_pitch, host_ptr) {
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/module.hpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/module.hpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/module.hpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/module.hpp	2015-09-16 14:36:09.000000000 +0000
@@ -72,7 +72,9 @@
          enum semantic {
             general,
             grid_dimension,
-            grid_offset
+            grid_offset,
+            image_size,
+            image_format
          };
 
          argument(enum type type, size_t size,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/program.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/program.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/program.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/program.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -56,14 +56,14 @@
 
          try {
             auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ?
-                           compile_program_tgsi(_source) :
+                           compile_program_tgsi(_source, log) :
                            compile_program_llvm(_source, headers,
                                                 dev.ir_format(),
                                                 dev.ir_target(), build_opts(dev),
                                                 log));
             _binaries.insert({ &dev, module });
             _logs.insert({ &dev, log });
-         } catch (const build_error &) {
+         } catch (const error &) {
             _logs.insert({ &dev, log });
             throw;
          }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/resource.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/resource.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/core/resource.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/core/resource.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -118,6 +118,8 @@
                              command_queue &q, const std::string &data) :
    resource(dev, obj) {
    pipe_resource info {};
+   const bool user_ptr_support = dev.pipe->get_param(dev.pipe,
+         PIPE_CAP_RESOURCE_FROM_USER_MEMORY);
 
    if (image *img = dynamic_cast<image *>(&obj)) {
       info.format = translate_format(img->format());
@@ -130,6 +132,7 @@
       info.depth0 = 1;
    }
 
+   info.array_size = 1;
    info.target = translate_target(obj.type());
    info.bind = (PIPE_BIND_SAMPLER_VIEW |
                 PIPE_BIND_COMPUTE_RESOURCE |
@@ -137,16 +140,29 @@
                 PIPE_BIND_TRANSFER_READ |
                 PIPE_BIND_TRANSFER_WRITE);
 
+   if (obj.flags() & CL_MEM_USE_HOST_PTR && user_ptr_support) {
+      // Page alignment is normally required for this, just try, hope for the
+      // best and fall back if it fails.
+      pipe = dev.pipe->resource_from_user_memory(dev.pipe, &info, obj.host_ptr());
+      if (pipe)
+         return;
+   }
+
+   if (obj.flags() & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR)) {
+      info.usage = PIPE_USAGE_STAGING;
+   }
+
    pipe = dev.pipe->resource_create(dev.pipe, &info);
    if (!pipe)
       throw error(CL_OUT_OF_RESOURCES);
 
-   if (!data.empty()) {
+   if (obj.flags() & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) {
+      const void *data_ptr = !data.empty() ? data.data() : obj.host_ptr();
       box rect { {{ 0, 0, 0 }}, {{ info.width0, info.height0, info.depth0 }} };
       unsigned cpp = util_format_get_blocksize(info.format);
 
       q.pipe->transfer_inline_write(q.pipe, pipe, 0, PIPE_TRANSFER_WRITE,
-                                    rect, data.data(), cpp * info.width0,
+                                    rect, data_ptr, cpp * info.width0,
                                     cpp * info.width0 * info.height0);
    }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/llvm/invocation.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/llvm/invocation.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/llvm/invocation.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/llvm/invocation.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -108,7 +108,7 @@
          name, llvm::MemoryBuffer::getMemBuffer(source));
 
       if (!c.ExecuteAction(act))
-         throw build_error(log);
+         throw compile_error(log);
    }
 
    module
@@ -256,7 +256,7 @@
       r_log = log;
 
       if (!ExecSuccess)
-         throw build_error();
+         throw compile_error();
 
       // Get address spaces map to be able to find kernel argument address space
       memcpy(address_spaces, c.getTarget().getAddressSpaceMap(),
@@ -269,17 +269,19 @@
 #endif
    }
 
-   void
-   find_kernels(llvm::Module *mod, std::vector<llvm::Function *> &kernels) {
+   std::vector<llvm::Function *>
+   find_kernels(const llvm::Module *mod) {
       const llvm::NamedMDNode *kernel_node =
                                  mod->getNamedMetadata("opencl.kernels");
       // This means there are no kernels in the program.  The spec does not
       // require that we return an error here, but there will be an error if
       // the user tries to pass this program to a clCreateKernel() call.
       if (!kernel_node) {
-         return;
+         return std::vector<llvm::Function *>();
       }
 
+      std::vector<llvm::Function *> kernels;
+      kernels.reserve(kernel_node->getNumOperands());
       for (unsigned i = 0; i < kernel_node->getNumOperands(); ++i) {
 #if HAVE_LLVM >= 0x0306
          kernels.push_back(llvm::mdconst::dyn_extract<llvm::Function>(
@@ -288,11 +290,11 @@
 #endif
                                     kernel_node->getOperand(i)->getOperand(0)));
       }
+      return kernels;
    }
 
    void
-   optimize(llvm::Module *mod, unsigned optimization_level,
-            const std::vector<llvm::Function *> &kernels) {
+   optimize(llvm::Module *mod, unsigned optimization_level) {
 
 #if HAVE_LLVM >= 0x0307
       llvm::legacy::PassManager PM;
@@ -300,6 +302,8 @@
       llvm::PassManager PM;
 #endif
 
+      const std::vector<llvm::Function *> kernels = find_kernels(mod);
+
       // Add a function internalizer pass.
       //
       // By default, the function internalizer pass will look for a function
@@ -340,18 +344,100 @@
       PM.run(*mod);
    }
 
+   // Kernel metadata
+
+   struct kernel_arg_md {
+      llvm::StringRef type_name;
+      llvm::StringRef access_qual;
+      kernel_arg_md(llvm::StringRef type_name_, llvm::StringRef access_qual_):
+         type_name(type_name_), access_qual(access_qual_) {}
+   };
+
+#if HAVE_LLVM >= 0x0306
+
+   const llvm::MDNode *
+   get_kernel_metadata(const llvm::Function *kernel_func) {
+      auto mod = kernel_func->getParent();
+      auto kernels_node = mod->getNamedMetadata("opencl.kernels");
+      if (!kernels_node) {
+         return nullptr;
+      }
+
+      const llvm::MDNode *kernel_node = nullptr;
+      for (unsigned i = 0; i < kernels_node->getNumOperands(); ++i) {
+         auto func = llvm::mdconst::dyn_extract<llvm::Function>(
+               kernels_node->getOperand(i)->getOperand(0));
+         if (func == kernel_func) {
+            kernel_node = kernels_node->getOperand(i);
+            break;
+         }
+      }
+
+      return kernel_node;
+   }
+
+   llvm::MDNode*
+   node_from_op_checked(const llvm::MDOperand &md_operand,
+                        llvm::StringRef expect_name,
+                        unsigned expect_num_args)
+   {
+      auto node = llvm::cast<llvm::MDNode>(md_operand);
+      assert(node->getNumOperands() == expect_num_args &&
+             "Wrong number of operands.");
+
+      auto str_node = llvm::cast<llvm::MDString>(node->getOperand(0));
+      assert(str_node->getString() == expect_name &&
+             "Wrong metadata node name.");
+
+      return node;
+   }
+
+   std::vector<kernel_arg_md>
+   get_kernel_arg_md(const llvm::Function *kernel_func) {
+      auto num_args = kernel_func->getArgumentList().size();
+
+      auto kernel_node = get_kernel_metadata(kernel_func);
+      auto aq = node_from_op_checked(kernel_node->getOperand(2),
+                                     "kernel_arg_access_qual", num_args + 1);
+      auto ty = node_from_op_checked(kernel_node->getOperand(3),
+                                     "kernel_arg_type", num_args + 1);
+
+      std::vector<kernel_arg_md> res;
+      res.reserve(num_args);
+      for (unsigned i = 0; i < num_args; ++i) {
+         res.push_back(kernel_arg_md(
+            llvm::cast<llvm::MDString>(ty->getOperand(i+1))->getString(),
+            llvm::cast<llvm::MDString>(aq->getOperand(i+1))->getString()));
+      }
+
+      return res;
+   }
+
+#else
+
+   std::vector<kernel_arg_md>
+   get_kernel_arg_md(const llvm::Function *kernel_func) {
+      return std::vector<kernel_arg_md>(
+            kernel_func->getArgumentList().size(),
+            kernel_arg_md("", ""));
+   }
+
+#endif // HAVE_LLVM >= 0x0306
+
    std::vector<module::argument>
    get_kernel_args(const llvm::Module *mod, const std::string &kernel_name,
                    const clang::LangAS::Map &address_spaces) {
 
       std::vector<module::argument> args;
       llvm::Function *kernel_func = mod->getFunction(kernel_name);
+      assert(kernel_func && "Kernel name not found in module.");
+      auto arg_md = get_kernel_arg_md(kernel_func);
 
       llvm::DataLayout TD(mod);
+      llvm::Type *size_type =
+         TD.getSmallestLegalIntType(mod->getContext(), sizeof(cl_uint) * 8);
 
-      for (llvm::Function::const_arg_iterator I = kernel_func->arg_begin(),
-                                      E = kernel_func->arg_end(); I != E; ++I) {
-         const llvm::Argument &arg = *I;
+      for (const auto &arg: kernel_func->args()) {
 
          llvm::Type *arg_type = arg.getType();
          const unsigned arg_store_size = TD.getTypeStoreSize(arg_type);
@@ -369,6 +455,59 @@
          unsigned target_size = TD.getTypeStoreSize(target_type);
          unsigned target_align = TD.getABITypeAlignment(target_type);
 
+         llvm::StringRef type_name = arg_md[arg.getArgNo()].type_name;
+         llvm::StringRef access_qual = arg_md[arg.getArgNo()].access_qual;
+
+         // Image
+         const bool is_image2d = type_name == "image2d_t";
+         const bool is_image3d = type_name == "image3d_t";
+         if (is_image2d || is_image3d) {
+            const bool is_write_only = access_qual == "write_only";
+            const bool is_read_only = access_qual == "read_only";
+
+            enum module::argument::type marg_type;
+            if (is_image2d && is_read_only) {
+               marg_type = module::argument::image2d_rd;
+            } else if (is_image2d && is_write_only) {
+               marg_type = module::argument::image2d_wr;
+            } else if (is_image3d && is_read_only) {
+               marg_type = module::argument::image3d_rd;
+            } else if (is_image3d && is_write_only) {
+               marg_type = module::argument::image3d_wr;
+            } else {
+               assert(0 && "Wrong image access qualifier");
+            }
+
+            args.push_back(module::argument(marg_type,
+                                            arg_store_size, target_size,
+                                            target_align,
+                                            module::argument::zero_ext));
+            continue;
+         }
+
+         // Image size implicit argument
+         if (type_name == "__llvm_image_size") {
+            args.push_back(module::argument(module::argument::scalar,
+                                            sizeof(cl_uint),
+                                            TD.getTypeStoreSize(size_type),
+                                            TD.getABITypeAlignment(size_type),
+                                            module::argument::zero_ext,
+                                            module::argument::image_size));
+            continue;
+         }
+
+         // Image format implicit argument
+         if (type_name == "__llvm_image_format") {
+            args.push_back(module::argument(module::argument::scalar,
+                                            sizeof(cl_uint),
+                                            TD.getTypeStoreSize(size_type),
+                                            TD.getABITypeAlignment(size_type),
+                                            module::argument::zero_ext,
+                                            module::argument::image_format));
+            continue;
+         }
+
+         // Other types
          if (llvm::isa<llvm::PointerType>(arg_type) && arg.hasByValAttr()) {
             arg_type =
                   llvm::dyn_cast<llvm::PointerType>(arg_type)->getElementType();
@@ -413,9 +552,6 @@
       // Append implicit arguments.  XXX - The types, ordering and
       // vector size of the implicit arguments should depend on the
       // target according to the selected calling convention.
-      llvm::Type *size_type =
-         TD.getSmallestLegalIntType(mod->getContext(), sizeof(cl_uint) * 8);
-
       args.push_back(
          module::argument(module::argument::scalar, sizeof(cl_uint),
                           TD.getTypeStoreSize(size_type),
@@ -435,7 +571,6 @@
 
    module
    build_module_llvm(llvm::Module *mod,
-                     const std::vector<llvm::Function *> &kernels,
                      clang::LangAS::Map& address_spaces) {
 
       module m;
@@ -445,8 +580,11 @@
       llvm::raw_svector_ostream bitcode_ostream(llvm_bitcode);
       llvm::BitstreamWriter writer(llvm_bitcode);
       llvm::WriteBitcodeToFile(mod, bitcode_ostream);
+#if HAVE_LLVM < 0x0308
       bitcode_ostream.flush();
+#endif
 
+      const std::vector<llvm::Function *> kernels = find_kernels(mod);
       for (unsigned i = 0; i < kernels.size(); ++i) {
          std::string kernel_name = kernels[i]->getName();
          std::vector<module::argument> args =
@@ -485,7 +623,7 @@
       LLVMDisposeMessage(err_message);
 
       if (err) {
-         throw build_error();
+         throw compile_error();
       }
    }
 
@@ -505,7 +643,7 @@
       if (LLVMGetTargetFromTriple(triple.c_str(), &target, &error_message)) {
          r_log = std::string(error_message);
          LLVMDisposeMessage(error_message);
-         throw build_error();
+         throw compile_error();
       }
 
       LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
@@ -514,7 +652,7 @@
 
       if (!tm) {
          r_log = "Could not create TargetMachine: " + triple;
-         throw build_error();
+         throw compile_error();
       }
 
       if (dump_asm) {
@@ -567,7 +705,7 @@
             const char *name;
             if (gelf_getshdr(section, &symtab_header) != &symtab_header) {
                r_log = "Failed to read ELF section header.";
-               throw build_error();
+               throw compile_error();
             }
             name = elf_strptr(elf, section_str_index, symtab_header.sh_name);
            if (!strcmp(name, ".symtab")) {
@@ -577,9 +715,9 @@
          }
          if (!symtab) {
             r_log = "Unable to find symbol table.";
-            throw build_error();
+            throw compile_error();
          }
-      } catch (build_error &e) {
+      } catch (compile_error &e) {
          elf_end(elf);
          throw e;
       }
@@ -610,10 +748,11 @@
    module
    build_module_native(std::vector<char> &code,
                        const llvm::Module *mod,
-                       const std::vector<llvm::Function *> &kernels,
                        const clang::LangAS::Map &address_spaces,
                        std::string &r_log) {
 
+      const std::vector<llvm::Function *> kernels = find_kernels(mod);
+
       std::map<std::string, unsigned> kernel_offsets =
             get_kernel_offsets(code, kernels, r_log);
 
@@ -650,7 +789,7 @@
          stream.flush();
          *(std::string*)data = message;
 
-         throw build_error();
+         throw compile_error();
       }
    }
 
@@ -697,7 +836,6 @@
 
    init_targets();
 
-   std::vector<llvm::Function *> kernels;
    size_t processor_str_len = std::string(target).find_first_of("-");
    std::string processor(target, 0, processor_str_len);
    std::string triple(target, processor_str_len + 1,
@@ -709,7 +847,7 @@
    llvm_ctx.setDiagnosticHandler(diagnostic_handler, &r_log);
 
    if (get_debug_flags() & DBG_CLC)
-      debug_log(source, ".cl");
+      debug_log("// Build options: " + opts + '\n' + source, ".cl");
 
    // The input file name must have the .cl extension in order for the
    // CompilerInvocation class to recognize it as an OpenCL source file.
@@ -717,9 +855,7 @@
                                     triple, processor, opts, address_spaces,
                                     optimization_level, r_log);
 
-   find_kernels(mod, kernels);
-
-   optimize(mod, optimization_level, kernels);
+   optimize(mod, optimization_level);
 
    if (get_debug_flags() & DBG_LLVM) {
       std::string log;
@@ -738,13 +874,13 @@
          m = module();
          break;
       case PIPE_SHADER_IR_LLVM:
-         m = build_module_llvm(mod, kernels, address_spaces);
+         m = build_module_llvm(mod, address_spaces);
          break;
       case PIPE_SHADER_IR_NATIVE: {
          std::vector<char> code = compile_native(mod, triple, processor,
                                                  get_debug_flags() & DBG_ASM,
                                                  r_log);
-         m = build_module_native(code, mod, kernels, address_spaces, r_log);
+         m = build_module_native(code, mod, address_spaces, r_log);
          break;
       }
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
 include Makefile.sources
 
 AM_CPPFLAGS = \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/tgsi/compiler.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/tgsi/compiler.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/clover/tgsi/compiler.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/clover/tgsi/compiler.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -32,7 +32,7 @@
 
 namespace {
    void
-   read_header(const std::string &header, module &m) {
+   read_header(const std::string &header, module &m, std::string &r_log) {
       std::istringstream ls(header);
       std::string line;
 
@@ -45,8 +45,10 @@
          if (!(ts >> name))
             continue;
 
-         if (!(ts >> offset))
-            throw build_error("invalid kernel start address");
+         if (!(ts >> offset)) {
+            r_log = "invalid kernel start address";
+            throw compile_error();
+         }
 
          while (ts >> tok) {
             if (tok == "scalar")
@@ -67,8 +69,10 @@
                args.push_back({ module::argument::image3d_wr, 4 });
             else if (tok == "sampler")
                args.push_back({ module::argument::sampler, 0 });
-            else
-               throw build_error("invalid kernel argument");
+            else {
+               r_log = "invalid kernel argument";
+               throw compile_error();
+            }
          }
 
          m.syms.push_back({ name, 0, offset, args });
@@ -76,11 +80,13 @@
    }
 
    void
-   read_body(const char *source, module &m) {
+   read_body(const char *source, module &m, std::string &r_log) {
       tgsi_token prog[1024];
 
-      if (!tgsi_text_translate(source, prog, Elements(prog)))
-         throw build_error("translate failed");
+      if (!tgsi_text_translate(source, prog, Elements(prog))) {
+         r_log = "translate failed";
+         throw compile_error();
+      }
 
       unsigned sz = tgsi_num_tokens(prog) * sizeof(tgsi_token);
       std::vector<char> data( (char *)prog, (char *)prog + sz );
@@ -89,13 +95,13 @@
 }
 
 module
-clover::compile_program_tgsi(const std::string &source) {
+clover::compile_program_tgsi(const std::string &source, std::string &r_log) {
    const size_t body_pos = source.find("COMP\n");
    const char *body = &source[body_pos];
    module m;
 
-   read_header({ source.begin(), source.begin() + body_pos }, m);
-   read_body(body, m);
+   read_header({ source.begin(), source.begin() + body_pos }, m, r_log);
+   read_body(body, m, r_log);
 
    return m;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/Android.mk	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/Android.mk	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,60 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2015 Chih-Wei Huang <cwhuang@linux.org.tw>
+# Copyright (C) 2015 Android-x86 Open Source Project
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(common_SOURCES)
+
+LOCAL_CFLAGS := \
+	-DGALLIUM_STATIC_TARGETS=1 \
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa \
+
+LOCAL_EXPORT_C_INCLUDE_DIRS := \
+	$(LOCAL_PATH) \
+	$(LOCAL_C_INCLUDES) \
+
+LOCAL_STATIC_LIBRARIES := \
+	libmesa_dri_common \
+
+ifneq ($(filter swrast,$(MESA_GPU_DRIVERS)),)
+LOCAL_SRC_FILES += $(drisw_SOURCES)
+endif
+
+ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
+LOCAL_SRC_FILES += $(dri2_SOURCES)
+LOCAL_SHARED_LIBRARIES := libdrm
+endif
+
+LOCAL_MODULE := libmesa_st_dri
+
+LOCAL_GENERATED_SOURCES := $(MESA_DRI_OPTIONS_H)
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri2_buffer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri2_buffer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri2_buffer.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri2_buffer.h	2015-09-16 14:36:09.000000000 +0000
@@ -11,7 +11,7 @@
    struct pipe_resource *resource;
 };
 
-static INLINE struct dri2_buffer *
+static inline struct dri2_buffer *
 dri2_buffer(__DRIbuffer * driBufferPriv)
 {
    return (struct dri2_buffer *) driBufferPriv;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri2.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri2.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri2.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri2.c	2015-09-16 14:37:00.000000000 +0000
@@ -1460,7 +1460,7 @@
    throttle_ret = dd_configuration(DRM_CONF_THROTTLE);
    dmabuf_ret = dd_configuration(DRM_CONF_SHARE_FD);
 #else
-   if (pipe_loader_drm_probe_fd(&screen->dev, screen->fd, false)) {
+   if (pipe_loader_drm_probe_fd(&screen->dev, screen->fd)) {
       pscreen = pipe_loader_create_screen(screen->dev, PIPE_SEARCH_DIR);
 
       throttle_ret = pipe_loader_configuration(screen->dev, DRM_CONF_THROTTLE);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri_context.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -59,7 +59,7 @@
    struct hud_context *hud;
 };
 
-static INLINE struct dri_context *
+static inline struct dri_context *
 dri_context(__DRIcontext * driContextPriv)
 {
    if (!driContextPriv)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri_drawable.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri_drawable.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri_drawable.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri_drawable.c	2015-09-16 14:36:09.000000000 +0000
@@ -279,7 +279,12 @@
    case ST_ATTACHMENT_BACK_LEFT:
    case ST_ATTACHMENT_FRONT_RIGHT:
    case ST_ATTACHMENT_BACK_RIGHT:
-      *format = drawable->stvis.color_format;
+      /* Other pieces of the driver stack get confused and behave incorrectly
+       * when they get an sRGB drawable. st/mesa receives "drawable->stvis"
+       * though other means and handles it correctly, so we don't really need
+       * to use an sRGB format here.
+       */
+      *format = util_format_linear(drawable->stvis.color_format);
       *bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
       break;
    case ST_ATTACHMENT_DEPTH_STENCIL:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri_drawable.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri_drawable.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri_drawable.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri_drawable.h	2015-09-16 14:36:09.000000000 +0000
@@ -87,7 +87,7 @@
                              struct pipe_resource *res);
 };
 
-static INLINE struct dri_drawable *
+static inline struct dri_drawable *
 dri_drawable(__DRIdrawable * driDrawPriv)
 {
    return (struct dri_drawable *) (driDrawPriv)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri_query_renderer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri_query_renderer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri_query_renderer.c	2014-09-10 05:44:12.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri_query_renderer.c	2015-09-16 14:36:09.000000000 +0000
@@ -42,6 +42,20 @@
                                                       PIPE_CAP_UMA);
       return 0;
 
+   case __DRI2_RENDERER_HAS_TEXTURE_3D:
+      value[0] =
+         screen->base.screen->get_param(screen->base.screen,
+                                        PIPE_CAP_MAX_TEXTURE_3D_LEVELS) != 0;
+      return 0;
+
+   case __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB:
+      value[0] =
+         screen->base.screen->is_format_supported(screen->base.screen,
+                                                  PIPE_FORMAT_B8G8R8A8_SRGB,
+                                                  PIPE_TEXTURE_2D, 0,
+                                                  PIPE_BIND_RENDER_TARGET);
+      return 0;
+
    default:
       return driQueryRendererIntegerCommon(_screen, param, value);
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -103,14 +103,18 @@
 static const __DRIconfig **
 dri_fill_in_modes(struct dri_screen *screen)
 {
-   static const mesa_format mesa_formats[3] = {
+   static const mesa_format mesa_formats[] = {
       MESA_FORMAT_B8G8R8A8_UNORM,
       MESA_FORMAT_B8G8R8X8_UNORM,
+      MESA_FORMAT_B8G8R8A8_SRGB,
+      MESA_FORMAT_B8G8R8X8_SRGB,
       MESA_FORMAT_B5G6R5_UNORM,
    };
-   static const enum pipe_format pipe_formats[3] = {
+   static const enum pipe_format pipe_formats[] = {
       PIPE_FORMAT_BGRA8888_UNORM,
       PIPE_FORMAT_BGRX8888_UNORM,
+      PIPE_FORMAT_BGRA8888_SRGB,
+      PIPE_FORMAT_BGRX8888_SRGB,
       PIPE_FORMAT_B5G6R5_UNORM,
    };
    mesa_format format;
@@ -186,6 +190,11 @@
       unsigned num_msaa_modes = 0; /* includes a single-sample mode */
       uint8_t msaa_modes[MSAA_VISUAL_MAX_SAMPLES];
 
+      if (!p_screen->is_format_supported(p_screen, pipe_formats[format],
+                                         PIPE_TEXTURE_2D, 0,
+                                         PIPE_BIND_RENDER_TARGET))
+         continue;
+
       for (i = 1; i <= msaa_samples_max; i++) {
          int samples = i > 1 ? i : 0;
 
@@ -241,9 +250,15 @@
 
    if (mode->redBits == 8) {
       if (mode->alphaBits == 8)
-         stvis->color_format = PIPE_FORMAT_BGRA8888_UNORM;
+         if (mode->sRGBCapable)
+            stvis->color_format = PIPE_FORMAT_BGRA8888_SRGB;
+         else
+            stvis->color_format = PIPE_FORMAT_BGRA8888_UNORM;
       else
-         stvis->color_format = PIPE_FORMAT_BGRX8888_UNORM;
+         if (mode->sRGBCapable)
+            stvis->color_format = PIPE_FORMAT_BGRX8888_SRGB;
+         else
+            stvis->color_format = PIPE_FORMAT_BGRX8888_UNORM;
    } else {
       stvis->color_format = PIPE_FORMAT_B5G6R5_UNORM;
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/dri_screen.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/dri_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -97,7 +97,7 @@
 };
 
 /** cast wrapper */
-static INLINE struct dri_screen *
+static inline struct dri_screen *
 dri_screen(__DRIscreen * sPriv)
 {
    return (struct dri_screen *)sPriv->driverPrivate;
@@ -122,9 +122,7 @@
 
 };
 
-#ifndef __NOT_HAVE_DRM_H
-
-static INLINE boolean
+static inline boolean
 dri_with_format(__DRIscreen * sPriv)
 {
    const __DRIdri2LoaderExtension *loader = sPriv->dri2.loader;
@@ -134,16 +132,6 @@
        && (loader->getBuffersWithFormat != NULL);
 }
 
-#else
-
-static INLINE boolean
-dri_with_format(__DRIscreen * sPriv)
-{
-   return TRUE;
-}
-
-#endif
-
 void
 dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen,
                    const struct gl_config *mode);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/drisw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/drisw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/drisw.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/drisw.c	2015-09-16 14:36:09.000000000 +0000
@@ -50,7 +50,7 @@
 DEBUG_GET_ONCE_BOOL_OPTION(swrast_no_present, "SWRAST_NO_PRESENT", FALSE);
 static boolean swrast_no_present = FALSE;
 
-static INLINE void
+static inline void
 get_drawable_info(__DRIdrawable *dPriv, int *x, int *y, int *w, int *h)
 {
    __DRIscreen *sPriv = dPriv->driScreenPriv;
@@ -61,7 +61,7 @@
                            dPriv->loaderPrivate);
 }
 
-static INLINE void
+static inline void
 put_image(__DRIdrawable *dPriv, void *data, unsigned width, unsigned height)
 {
    __DRIscreen *sPriv = dPriv->driScreenPriv;
@@ -72,7 +72,7 @@
                     data, dPriv->loaderPrivate);
 }
 
-static INLINE void
+static inline void
 put_image2(__DRIdrawable *dPriv, void *data, int x, int y,
            unsigned width, unsigned height, unsigned stride)
 {
@@ -84,7 +84,7 @@
                      data, dPriv->loaderPrivate);
 }
 
-static INLINE void
+static inline void
 get_image(__DRIdrawable *dPriv, int x, int y, int width, int height, void *data)
 {
    __DRIscreen *sPriv = dPriv->driScreenPriv;
@@ -123,7 +123,7 @@
    put_image2(dPriv, data, x, y, width, height, stride);
 }
 
-static INLINE void
+static inline void
 drisw_present_texture(__DRIdrawable *dPriv,
                       struct pipe_resource *ptex, struct pipe_box *sub_box)
 {
@@ -136,7 +136,7 @@
    screen->base.screen->flush_frontbuffer(screen->base.screen, ptex, 0, 0, drawable, sub_box);
 }
 
-static INLINE void
+static inline void
 drisw_invalidate_drawable(__DRIdrawable *dPriv)
 {
    struct dri_drawable *drawable = dri_drawable(dPriv);
@@ -146,7 +146,7 @@
    p_atomic_inc(&drawable->base.stamp);
 }
 
-static INLINE void
+static inline void
 drisw_copy_to_front(__DRIdrawable * dPriv,
                     struct pipe_resource *ptex)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -50,10 +50,6 @@
 libdri_la_SOURCES = $(common_SOURCES)
 
 if HAVE_DRISW
-if !HAVE_DRI2
-AM_CPPFLAGS += \
-	-D__NOT_HAVE_DRM_H
-endif
 libdri_la_SOURCES += $(drisw_SOURCES)
 endif
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/dri/SConscript	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/dri/SConscript	2015-09-16 14:36:09.000000000 +0000
@@ -5,10 +5,7 @@
 
 env = env.Clone()
 
-# XXX: If HAVE_DRI2
 env.PkgUseModules(['DRM'])
-# else
-#env.Append(CPPDEFINES = [('__NOT_HAVE_DRM_H', '1')])
 
 env.Append(CPPPATH = [
     '#/src',
@@ -20,7 +17,6 @@
 
 env.Append(CPPDEFINES = [
     ('GALLIUM_STATIC_TARGETS', '1'),
-    'GALLIUM_SOFTPIPE',
 ])
 
 sources = env.ParseSourceList('Makefile.sources', 'common_SOURCES')
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/glx/xlib/glx_api.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/glx/xlib/glx_api.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/glx/xlib/glx_api.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/glx/xlib/glx_api.c	2015-09-16 14:36:09.000000000 +0000
@@ -40,6 +40,13 @@
 
 #include "xm_api.h"
 
+/* An "Atrribs/Attribs" typo was fixed in glxproto.h in Nov 2014.
+ * This is in case we don't have the updated header.
+ */
+#if !defined(X_GLXCreateContextAttribsARB) && \
+     defined(X_GLXCreateContextAtrribsARB)
+#define X_GLXCreateContextAttribsARB X_GLXCreateContextAtrribsARB
+#endif 
 
 /* This indicates the client-side GLX API and GLX encoder version. */
 #define CLIENT_MAJOR_VERSION 1
@@ -2168,7 +2175,7 @@
 #endif
 
       default:
-         generate_error(dpy, BadValue, 0, X_GLXCreateContextAtrribsARB, true);
+         generate_error(dpy, BadValue, 0, X_GLXCreateContextAttribsARB, true);
          return;
    }
 }
@@ -2762,14 +2769,14 @@
          break;
       default:
          /* bad attribute */
-         generate_error(dpy, BadValue, 0, X_GLXCreateContextAtrribsARB, True);
+         generate_error(dpy, BadValue, 0, X_GLXCreateContextAttribsARB, True);
          return NULL;
       }
    }
 
    /* check contextFlags */
    if (contextFlags & ~contextFlagsAll) {
-      generate_error(dpy, BadValue, 0, X_GLXCreateContextAtrribsARB, True);
+      generate_error(dpy, BadValue, 0, X_GLXCreateContextAttribsARB, True);
       return NULL;
    }
 
@@ -2777,14 +2784,14 @@
    if (profileMask != GLX_CONTEXT_CORE_PROFILE_BIT_ARB &&
        profileMask != GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB &&
        profileMask != GLX_CONTEXT_ES_PROFILE_BIT_EXT) {
-      generate_error(dpy, GLXBadProfileARB, 0, X_GLXCreateContextAtrribsARB, False);
+      generate_error(dpy, GLXBadProfileARB, 0, X_GLXCreateContextAttribsARB, False);
       return NULL;
    }
 
    /* check renderType */
    if (renderType != GLX_RGBA_TYPE &&
        renderType != GLX_COLOR_INDEX_TYPE) {
-      generate_error(dpy, BadValue, 0, X_GLXCreateContextAtrribsARB, True);
+      generate_error(dpy, BadValue, 0, X_GLXCreateContextAttribsARB, True);
       return NULL;
    }
 
@@ -2797,7 +2804,7 @@
          (majorVersion == 3 && minorVersion > 3) ||
          (majorVersion == 4 && minorVersion > 5) ||
          majorVersion > 4))) {
-      generate_error(dpy, BadMatch, 0, X_GLXCreateContextAtrribsARB, True);
+      generate_error(dpy, BadMatch, 0, X_GLXCreateContextAttribsARB, True);
       return NULL;
    }
    if (profileMask == GLX_CONTEXT_ES_PROFILE_BIT_EXT &&
@@ -2809,18 +2816,18 @@
        * different error code for invalid ES versions, but this is what NVIDIA
        * does and piglit expects.
        */
-      generate_error(dpy, GLXBadProfileARB, 0, X_GLXCreateContextAtrribsARB, False);
+      generate_error(dpy, GLXBadProfileARB, 0, X_GLXCreateContextAttribsARB, False);
       return NULL;
    }
 
    if ((contextFlags & GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB) &&
        majorVersion < 3) {
-      generate_error(dpy, BadMatch, 0, X_GLXCreateContextAtrribsARB, True);
+      generate_error(dpy, BadMatch, 0, X_GLXCreateContextAttribsARB, True);
       return NULL;
    }
 
    if (renderType == GLX_COLOR_INDEX_TYPE && majorVersion >= 3) {
-      generate_error(dpy, BadMatch, 0, X_GLXCreateContextAtrribsARB, True);
+      generate_error(dpy, BadMatch, 0, X_GLXCreateContextAttribsARB, True);
       return NULL;
    }
 
@@ -2830,7 +2837,7 @@
                         majorVersion, minorVersion,
                         profileMask, contextFlags);
    if (!ctx) {
-      generate_error(dpy, GLXBadFBConfig, 0, X_GLXCreateContextAtrribsARB, False);
+      generate_error(dpy, GLXBadFBConfig, 0, X_GLXCreateContextAttribsARB, False);
    }
 
    return ctx;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/glx/xlib/xm_api.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/glx/xlib/xm_api.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/glx/xlib/xm_api.h	2014-04-29 19:36:58.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/glx/xlib/xm_api.h	2015-09-16 14:36:09.000000000 +0000
@@ -378,13 +378,13 @@
 extern void
 xmesa_destroy_buffers_on_display(Display *dpy);
 
-static INLINE GLuint
+static inline GLuint
 xmesa_buffer_width(XMesaBuffer b)
 {
    return b->width;
 }
 
-static INLINE GLuint
+static inline GLuint
 xmesa_buffer_height(XMesaBuffer b)
 {
    return b->height;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/glx/xlib/xm_st.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/glx/xlib/xm_st.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/glx/xlib/xm_st.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/glx/xlib/xm_st.c	2015-09-16 14:36:09.000000000 +0000
@@ -46,7 +46,7 @@
 };
 
 
-static INLINE struct xmesa_st_framebuffer *
+static inline struct xmesa_st_framebuffer *
 xmesa_st_framebuffer(struct st_framebuffer_iface *stfbi)
 {
    return (struct xmesa_st_framebuffer *) stfbi->st_manager_private;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/hgl/hgl.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/hgl/hgl.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/hgl/hgl.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/hgl/hgl.c	2015-09-16 14:36:09.000000000 +0000
@@ -32,7 +32,7 @@
 
 
 // Perform a safe void to hgl_context cast
-static INLINE struct hgl_context*
+static inline struct hgl_context*
 hgl_st_context(struct st_context_iface *stctxi)
 {
 	struct hgl_context* context;
@@ -44,7 +44,7 @@
 
 
 // Perform a safe void to hgl_buffer cast
-static INLINE struct hgl_buffer*
+static inline struct hgl_buffer*
 hgl_st_framebuffer(struct st_framebuffer_iface *stfbi)
 {
 	struct hgl_buffer* buffer;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/adapter9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/adapter9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/adapter9.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/adapter9.c	2015-09-16 14:36:09.000000000 +0000
@@ -163,7 +163,7 @@
     return D3D_OK;
 }
 
-static INLINE boolean
+static inline boolean
 backbuffer_format( D3DFORMAT dfmt,
                    D3DFORMAT bfmt,
                    boolean win )
@@ -220,7 +220,7 @@
     return D3D_OK;
 }
 
-static INLINE boolean
+static inline boolean
 display_format( D3DFORMAT fmt,
                 boolean win )
 {
@@ -545,7 +545,7 @@
                      /*D3DDEVCAPS_RTPATCHES |*/
                      /*D3DDEVCAPS_RTPATCHHANDLEZERO |*/
                      /*D3DDEVCAPS_SEPARATETEXTUREMEMORIES |*/
-                     /*D3DDEVCAPS_TEXTURENONLOCALVIDMEM |*/
+                     D3DDEVCAPS_TEXTURENONLOCALVIDMEM |
                      /* D3DDEVCAPS_TEXTURESYSTEMMEMORY |*/
                      D3DDEVCAPS_TEXTUREVIDEOMEMORY |
                      D3DDEVCAPS_TLVERTEXSYSTEMMEMORY |
@@ -561,32 +561,32 @@
                                D3DPMISCCAPS_TSSARGTEMP |
                                D3DPMISCCAPS_BLENDOP |
                                D3DPIPECAP(INDEP_BLEND_ENABLE, D3DPMISCCAPS_INDEPENDENTWRITEMASKS) |
-                               /*D3DPMISCCAPS_PERSTAGECONSTANT |*/
+                               /*D3DPMISCCAPS_PERSTAGECONSTANT |*/ /* TODO */
                                /*D3DPMISCCAPS_POSTBLENDSRGBCONVERT |*/ /* TODO */
                                D3DPMISCCAPS_FOGANDSPECULARALPHA |
                                D3DPIPECAP(BLEND_EQUATION_SEPARATE, D3DPMISCCAPS_SEPARATEALPHABLEND) |
                                D3DPIPECAP(MIXED_COLORBUFFER_FORMATS, D3DPMISCCAPS_MRTINDEPENDENTBITDEPTHS) |
                                D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING |
-                               /*D3DPMISCCAPS_FOGVERTEXCLAMPED*/0;
+                               D3DPMISCCAPS_FOGVERTEXCLAMPED;
     if (!screen->get_param(screen, PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION))
         pCaps->PrimitiveMiscCaps |= D3DPMISCCAPS_CLIPTLVERTS;
 
     pCaps->RasterCaps =
         D3DPIPECAP(ANISOTROPIC_FILTER, D3DPRASTERCAPS_ANISOTROPY) |
-        /*D3DPRASTERCAPS_COLORPERSPECTIVE |*/
+        D3DPRASTERCAPS_COLORPERSPECTIVE |
         D3DPRASTERCAPS_DITHER |
         D3DPRASTERCAPS_DEPTHBIAS |
-        /*D3DPRASTERCAPS_FOGRANGE |*/
-        /*D3DPRASTERCAPS_FOGTABLE |*/
-        /*D3DPRASTERCAPS_FOGVERTEX |*/
+        D3DPRASTERCAPS_FOGRANGE |
+        D3DPRASTERCAPS_FOGTABLE |
+        D3DPRASTERCAPS_FOGVERTEX |
         D3DPRASTERCAPS_MIPMAPLODBIAS |
         D3DPRASTERCAPS_MULTISAMPLE_TOGGLE |
         D3DPRASTERCAPS_SCISSORTEST |
         D3DPRASTERCAPS_SLOPESCALEDEPTHBIAS |
         /*D3DPRASTERCAPS_WBUFFER |*/
-        /*D3DPRASTERCAPS_WFOG |*/
+        D3DPRASTERCAPS_WFOG |
         /*D3DPRASTERCAPS_ZBUFFERLESSHSR |*/
-        /*D3DPRASTERCAPS_ZFOG |*/
+        D3DPRASTERCAPS_ZFOG |
         D3DPRASTERCAPS_ZTEST;
 
     pCaps->ZCmpCaps = D3DPCMPCAPS_NEVER |
@@ -697,15 +697,12 @@
     pCaps->MaxAnisotropy =
         (DWORD)screen->get_paramf(screen, PIPE_CAPF_MAX_TEXTURE_ANISOTROPY);
 
-    pCaps->MaxVertexW = 1.0f; /* XXX */
-    pCaps->GuardBandLeft = screen->get_paramf(screen,
-                                              PIPE_CAPF_GUARD_BAND_LEFT);
-    pCaps->GuardBandTop = screen->get_paramf(screen,
-                                             PIPE_CAPF_GUARD_BAND_TOP);
-    pCaps->GuardBandRight = screen->get_paramf(screen,
-                                               PIPE_CAPF_GUARD_BAND_RIGHT);
-    pCaps->GuardBandBottom = screen->get_paramf(screen,
-                                                PIPE_CAPF_GUARD_BAND_BOTTOM);
+    /* Values for GeForce 9600 GT */
+    pCaps->MaxVertexW = 1e10f;
+    pCaps->GuardBandLeft = -1e9f;
+    pCaps->GuardBandTop = -1e9f;
+    pCaps->GuardBandRight = 1e9f;
+    pCaps->GuardBandBottom = 1e9f;
     pCaps->ExtentsAdjust = 0.0f;
 
     pCaps->StencilCaps =
@@ -724,8 +721,6 @@
         /*D3DFVFCAPS_DONOTSTRIPELEMENTS |*/
         D3DFVFCAPS_PSIZE;
 
-    /* XXX: Some of these are probably not in SM2.0 so cap them when I figure
-     * them out. For now leave them all enabled. */
     pCaps->TextureOpCaps = D3DTEXOPCAPS_DISABLE |
                            D3DTEXOPCAPS_SELECTARG1 |
                            D3DTEXOPCAPS_SELECTARG2 |
@@ -796,7 +791,8 @@
     pCaps->MaxVertexShaderConst = NINE_MAX_CONST_F;
 
     pCaps->PixelShaderVersion = D3DPS_VERSION(3,0);
-    pCaps->PixelShader1xMaxValue = 8.0f; /* XXX: wine */
+    /* Value for GeForce 9600 GT */
+    pCaps->PixelShader1xMaxValue = 65504.f;
 
     pCaps->DevCaps2 = D3DDEVCAPS2_STREAMOFFSET |
                       D3DDEVCAPS2_VERTEXELEMENTSCANSHARESTREAMOFFSET |
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/adapter9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/adapter9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/adapter9.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/adapter9.h	2015-09-16 14:36:09.000000000 +0000
@@ -49,7 +49,7 @@
     
     struct d3dadapter9_context *ctx;
 };
-static INLINE struct NineAdapter9 *
+static inline struct NineAdapter9 *
 NineAdapter9( void *data )
 {
     return (struct NineAdapter9 *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/authenticatedchannel9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/authenticatedchannel9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/authenticatedchannel9.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/authenticatedchannel9.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,7 +29,7 @@
 {
     struct NineUnknown base;
 };
-static INLINE struct NineAuthenticatedChannel9 *
+static inline struct NineAuthenticatedChannel9 *
 NineAuthenticatedChannel9( void *data )
 {
     return (struct NineAuthenticatedChannel9 *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/basetexture9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/basetexture9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/basetexture9.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/basetexture9.c	2015-09-16 14:36:09.000000000 +0000
@@ -57,7 +57,8 @@
     user_assert(!(Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL)) ||
                 Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
     user_assert(!(Usage & D3DUSAGE_DYNAMIC) ||
-                Pool != D3DPOOL_MANAGED, D3DERR_INVALIDCALL);
+                !(Pool == D3DPOOL_MANAGED ||
+                  Pool == D3DPOOL_SCRATCH), D3DERR_INVALIDCALL);
 
     hr = NineResource9_ctor(&This->base, pParams, initResource, alloc, Type, Pool, Usage);
     if (FAILED(hr))
@@ -85,6 +86,9 @@
                    util_format_has_depth(util_format_description(This->base.info.format));
 
     list_inithead(&This->list);
+    list_inithead(&This->list2);
+    if (Pool == D3DPOOL_MANAGED)
+        list_add(&This->list2, &This->base.base.device->managed_textures);
 
     return D3D_OK;
 }
@@ -98,7 +102,9 @@
     pipe_sampler_view_reference(&This->view[1], NULL);
 
     if (This->list.prev != NULL && This->list.next != NULL)
-        list_del(&This->list),
+        list_del(&This->list);
+    if (This->list2.prev != NULL && This->list2.next != NULL)
+        list_del(&This->list2);
 
     NineResource9_dtor(&This->base);
 }
@@ -153,6 +159,8 @@
     user_assert(FilterType != D3DTEXF_NONE, D3DERR_INVALIDCALL);
 
     This->mipfilter = FilterType;
+    This->dirty_mip = TRUE;
+    NineBaseTexture9_GenerateMipSubLevels(This);
 
     return D3D_OK;
 }
@@ -310,14 +318,12 @@
                 tex->dirty_box.width, tex->dirty_box.height, tex->dirty_box.depth);
 
             if (tex->dirty_box.width) {
-                for (l = 0; l <= last_level; ++l) {
+                for (l = min_level_dirty; l <= last_level; ++l) {
                     u_box_minify_2d(&box, &tex->dirty_box, l);
-                    NineVolume9_AddDirtyRegion(tex->volumes[l], &tex->dirty_box);
+                    NineVolume9_UploadSelf(tex->volumes[l], &box);
                 }
                 memset(&tex->dirty_box, 0, sizeof(tex->dirty_box));
             }
-            for (l = min_level_dirty; l <= last_level; ++l)
-                NineVolume9_UploadSelf(tex->volumes[l]);
         } else {
             assert(!"invalid texture type");
         }
@@ -361,8 +367,7 @@
                 box.width = u_minify(This->base.info.width0, l);
                 box.height = u_minify(This->base.info.height0, l);
                 box.depth = u_minify(This->base.info.depth0, l);
-                NineVolume9_AddDirtyRegion(tex->volumes[l], &box);
-                NineVolume9_UploadSelf(tex->volumes[l]);
+                NineVolume9_UploadSelf(tex->volumes[l], &box);
             }
         } else {
             assert(!"invalid texture type");
@@ -381,8 +386,7 @@
 void WINAPI
 NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This )
 {
-    struct pipe_resource *resource = This->base.resource;
-
+    struct pipe_resource *resource;
     unsigned base_level = 0;
     unsigned last_level = This->base.info.last_level - This->managed.lod;
     unsigned first_layer = 0;
@@ -405,6 +409,8 @@
 
     last_layer = util_max_layer(This->view[0]->texture, base_level);
 
+    resource = This->base.resource;
+
     util_gen_mipmap(This->pipe, resource,
                     resource->format, base_level, last_level,
                     first_layer, last_layer, filter);
@@ -530,6 +536,11 @@
             swizzle[2] = PIPE_SWIZZLE_RED;
             swizzle[3] = PIPE_SWIZZLE_RED;
         }
+    } else if (resource->format == PIPE_FORMAT_RGTC2_UNORM) {
+        swizzle[0] = PIPE_SWIZZLE_GREEN;
+        swizzle[1] = PIPE_SWIZZLE_RED;
+        swizzle[2] = PIPE_SWIZZLE_ONE;
+        swizzle[3] = PIPE_SWIZZLE_ONE;
     } else if (resource->format != PIPE_FORMAT_A8_UNORM &&
                resource->format != PIPE_FORMAT_RGTC1_UNORM) {
         /* exceptions:
@@ -578,6 +589,21 @@
         NineBaseTexture9_UploadSelf(This);
 }
 
+void
+NineBaseTexture9_UnLoad( struct NineBaseTexture9 *This )
+{
+    if (This->base.pool != D3DPOOL_MANAGED ||
+        This->managed.lod_resident == -1)
+        return;
+
+    pipe_resource_reference(&This->base.resource, NULL);
+    This->managed.lod_resident = -1;
+    This->managed.dirty = TRUE;
+
+    /* If the texture is bound, we have to re-upload it */
+    BASETEX_REGISTER_UPDATE(This);
+}
+
 #ifdef DEBUG
 void
 NineBaseTexture9_Dump( struct NineBaseTexture9 *This )
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/basetexture9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/basetexture9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/basetexture9.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/basetexture9.h	2015-09-16 14:36:09.000000000 +0000
@@ -30,7 +30,8 @@
 struct NineBaseTexture9
 {
     struct NineResource9 base;
-    struct list_head list;
+    struct list_head list; /* for update_textures */
+    struct list_head list2; /* for managed_textures */
 
     /* g3d */
     struct pipe_context *pipe;
@@ -53,7 +54,7 @@
         DWORD lod_resident;
     } managed;
 };
-static INLINE struct NineBaseTexture9 *
+static inline struct NineBaseTexture9 *
 NineBaseTexture9( void *data )
 {
     return (struct NineBaseTexture9 *)data;
@@ -94,6 +95,9 @@
 void WINAPI
 NineBaseTexture9_PreLoad( struct NineBaseTexture9 *This );
 
+void
+NineBaseTexture9_UnLoad( struct NineBaseTexture9 *This );
+
 /* For D3DPOOL_MANAGED only (after SetLOD change): */
 HRESULT
 NineBaseTexture9_CreatePipeResource( struct NineBaseTexture9 *This,
@@ -107,7 +111,7 @@
 NineBaseTexture9_UpdateSamplerView( struct NineBaseTexture9 *This,
                                     const int sRGB );
 
-static INLINE void
+static inline void
 NineBaseTexture9_Validate( struct NineBaseTexture9 *This )
 {
     DBG_FLAG(DBG_BASETEXTURE, "This=%p dirty=%i dirty_mip=%i lod=%u/%u\n",
@@ -119,7 +123,7 @@
         NineBaseTexture9_GenerateMipSubLevels(This);
 }
 
-static INLINE struct pipe_sampler_view *
+static inline struct pipe_sampler_view *
 NineBaseTexture9_GetSamplerView( struct NineBaseTexture9 *This, const int sRGB )
 {
     if (!This->view[sRGB])
@@ -131,7 +135,7 @@
 void
 NineBaseTexture9_Dump( struct NineBaseTexture9 *This );
 #else
-static INLINE void
+static inline void
 NineBaseTexture9_Dump( struct NineBaseTexture9 *This ) { }
 #endif
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/cryptosession9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/cryptosession9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/cryptosession9.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/cryptosession9.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,7 +29,7 @@
 {
     struct NineUnknown base;
 };
-static INLINE struct NineCryptoSession9 *
+static inline struct NineCryptoSession9 *
 NineCryptoSession9( void *data )
 {
     return (struct NineCryptoSession9 *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/cubetexture9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/cubetexture9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/cubetexture9.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/cubetexture9.c	2015-09-16 14:36:09.000000000 +0000
@@ -43,7 +43,7 @@
     struct pipe_screen *screen = pParams->device->screen;
     enum pipe_format pf;
     unsigned i, l, f, offset, face_size = 0;
-    unsigned *level_offsets;
+    unsigned *level_offsets = NULL;
     D3DSURFACE_DESC sfdesc;
     void *p;
     HRESULT hr;
@@ -70,6 +70,13 @@
     if (Format == D3DFMT_ATI1 || Format == D3DFMT_ATI2)
         return D3DERR_INVALIDCALL;
 
+    if (compressed_format(Format)) {
+        const unsigned w = util_format_get_blockwidth(pf);
+        const unsigned h = util_format_get_blockheight(pf);
+
+        user_assert(!(EdgeLength % w) && !(EdgeLength % h), D3DERR_INVALIDCALL);
+    }
+
     info->screen = pParams->device->screen;
     info->target = PIPE_TEXTURE_CUBE;
     info->format = pf;
@@ -106,7 +113,7 @@
         face_size = nine_format_get_size_and_offsets(pf, level_offsets,
                                                      EdgeLength, EdgeLength,
                                                      info->last_level);
-        This->managed_buffer = MALLOC(6 * face_size);
+        This->managed_buffer = align_malloc(6 * face_size, 32);
         if (!This->managed_buffer)
             return E_OUTOFMEMORY;
     }
@@ -150,8 +157,12 @@
         }
     }
 
-    for (i = 0; i < 6; ++i) /* width = 0 means empty, depth stays 1 */
+    for (i = 0; i < 6; ++i) {
+        /* Textures start initially dirty */
+        This->dirty_rect[i].width = EdgeLength;
+        This->dirty_rect[i].height = EdgeLength;
         This->dirty_rect[i].depth = 1;
+    }
 
     return D3D_OK;
 }
@@ -259,13 +270,17 @@
     user_assert(FaceType < 6, D3DERR_INVALIDCALL);
 
     if (This->base.base.pool != D3DPOOL_MANAGED) {
-        if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP)
+        if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP) {
             This->base.dirty_mip = TRUE;
+            BASETEX_REGISTER_UPDATE(&This->base);
+        }
         return D3D_OK;
     }
-    This->base.managed.dirty = TRUE;
 
-    BASETEX_REGISTER_UPDATE(&This->base);
+    if (This->base.base.pool == D3DPOOL_MANAGED) {
+        This->base.managed.dirty = TRUE;
+        BASETEX_REGISTER_UPDATE(&This->base);
+    }
 
     if (!pDirtyRect) {
         u_box_origin_2d(This->base.base.info.width0,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/cubetexture9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/cubetexture9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/cubetexture9.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/cubetexture9.h	2015-09-16 14:36:09.000000000 +0000
@@ -33,7 +33,7 @@
     struct pipe_box dirty_rect[6]; /* covers all mip levels */
     uint8_t *managed_buffer;
 };
-static INLINE struct NineCubeTexture9 *
+static inline struct NineCubeTexture9 *
 NineCubeTexture9( void *data )
 {
     return (struct NineCubeTexture9 *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/device9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/device9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/device9.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/device9.c	2015-09-16 14:36:09.000000000 +0000
@@ -119,48 +119,6 @@
             This, (IDirect3DSurface9 *)This->swapchains[0]->zsbuf);
 }
 
-void
-NineDevice9_RestoreNonCSOState( struct NineDevice9 *This, unsigned mask )
-{
-    struct pipe_context *pipe = This->pipe;
-
-    DBG("This=%p mask=%u\n", This, mask);
-
-    if (mask & 0x1) {
-        struct pipe_constant_buffer cb;
-        cb.buffer_offset = 0;
-
-        if (This->prefer_user_constbuf) {
-            cb.buffer = NULL;
-            cb.user_buffer = This->state.vs_const_f;
-        } else {
-            cb.buffer = This->constbuf_vs;
-            cb.user_buffer = NULL;
-        }
-        cb.buffer_size = This->vs_const_size;
-        pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb);
-
-        if (This->prefer_user_constbuf) {
-            cb.user_buffer = This->state.ps_const_f;
-        } else {
-            cb.buffer = This->constbuf_ps;
-        }
-        cb.buffer_size = This->ps_const_size;
-        pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
-    }
-
-    if (mask & 0x2) {
-        struct pipe_poly_stipple stipple;
-        memset(&stipple, ~0, sizeof(stipple));
-        pipe->set_polygon_stipple(pipe, &stipple);
-    }
-
-    This->state.changed.group = NINE_STATE_ALL;
-    This->state.changed.vtxbuf = (1ULL << This->caps.MaxStreams) - 1;
-    This->state.changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1;
-    This->state.changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK;
-}
-
 #define GET_PCAP(n) pScreen->get_param(pScreen, PIPE_CAP_##n)
 HRESULT
 NineDevice9_ctor( struct NineDevice9 *This,
@@ -186,6 +144,7 @@
     if (FAILED(hr)) { return hr; }
 
     list_inithead(&This->update_textures);
+    list_inithead(&This->managed_textures);
 
     This->screen = pScreen;
     This->caps = *pCaps;
@@ -341,16 +300,19 @@
         This->state.vs_const_f = CALLOC(This->vs_const_size, 1);
         This->state.ps_const_f = CALLOC(This->ps_const_size, 1);
         This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
+        This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1);
         if (!This->state.vs_const_f || !This->state.ps_const_f ||
-            !This->state.vs_lconstf_temp)
+            !This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp)
             return E_OUTOFMEMORY;
 
         if (strstr(pScreen->get_name(pScreen), "AMD") ||
             strstr(pScreen->get_name(pScreen), "ATI")) {
-            This->prefer_user_constbuf = TRUE;
             This->driver_bugs.buggy_barycentrics = TRUE;
         }
 
+        /* Disable NV path for now, needs some fixes */
+        This->prefer_user_constbuf = TRUE;
+
         tmpl.target = PIPE_BUFFER;
         tmpl.format = PIPE_FORMAT_R8_UNORM;
         tmpl.height0 = 1;
@@ -376,6 +338,8 @@
     {
         struct pipe_resource tmplt;
         struct pipe_sampler_view templ;
+        struct pipe_sampler_state samp;
+        memset(&samp, 0, sizeof(samp));
 
         tmplt.target = PIPE_TEXTURE_2D;
         tmplt.width0 = 1;
@@ -404,22 +368,39 @@
         templ.swizzle_a = PIPE_SWIZZLE_ONE;
         templ.target = This->dummy_texture->target;
 
-        This->dummy_sampler = This->pipe->create_sampler_view(This->pipe, This->dummy_texture, &templ);
-        if (!This->dummy_sampler)
+        This->dummy_sampler_view = This->pipe->create_sampler_view(This->pipe, This->dummy_texture, &templ);
+        if (!This->dummy_sampler_view)
             return D3DERR_DRIVERINTERNALERROR;
+
+        samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+        samp.max_lod = 15.0f;
+        samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+        samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+        samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+        samp.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+        samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+        samp.compare_mode = PIPE_TEX_COMPARE_NONE;
+        samp.compare_func = PIPE_FUNC_LEQUAL;
+        samp.normalized_coords = 1;
+        samp.seamless_cube_map = 1;
+        This->dummy_sampler_state = samp;
     }
 
     /* Allocate upload helper for drivers that suck (from st pov ;). */
-    {
-        unsigned bind = 0;
 
-        This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS);
-        This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS);
+    This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS);
+    This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS);
+    This->driver_caps.user_cbufs = GET_PCAP(USER_CONSTANT_BUFFERS);
+
+    if (!This->driver_caps.user_vbufs)
+        This->vertex_uploader = u_upload_create(This->pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER);
+    if (!This->driver_caps.user_ibufs)
+        This->index_uploader = u_upload_create(This->pipe, 128 * 1024, 4, PIPE_BIND_INDEX_BUFFER);
+    if (!This->driver_caps.user_cbufs) {
+        unsigned alignment = GET_PCAP(CONSTANT_BUFFER_OFFSET_ALIGNMENT);
 
-        if (!This->driver_caps.user_vbufs) bind |= PIPE_BIND_VERTEX_BUFFER;
-        if (!This->driver_caps.user_ibufs) bind |= PIPE_BIND_INDEX_BUFFER;
-        if (bind)
-            This->upload = u_upload_create(This->pipe, 1 << 20, 4, bind);
+        This->constbuf_uploader = u_upload_create(This->pipe, This->vs_const_size,
+                                                  alignment, PIPE_BIND_CONSTANT_BUFFER);
     }
 
     This->driver_caps.window_space_position_support = GET_PCAP(TGSI_VS_WINDOW_SPACE_POSITION);
@@ -429,10 +410,15 @@
     nine_ff_init(This); /* initialize fixed function code */
 
     NineDevice9_SetDefaultState(This, FALSE);
-    NineDevice9_RestoreNonCSOState(This, ~0);
+
+    {
+        struct pipe_poly_stipple stipple;
+        memset(&stipple, ~0, sizeof(stipple));
+        This->pipe->set_polygon_stipple(This->pipe, &stipple);
+    }
 
     This->update = &This->state;
-    nine_update_state(This, ~0);
+    nine_update_state(This);
 
     ID3DPresentGroup_Release(This->present);
 
@@ -452,12 +438,16 @@
     nine_ff_fini(This);
     nine_state_clear(&This->state, TRUE);
 
-    if (This->upload)
-        u_upload_destroy(This->upload);
+    if (This->vertex_uploader)
+        u_upload_destroy(This->vertex_uploader);
+    if (This->index_uploader)
+        u_upload_destroy(This->index_uploader);
+    if (This->constbuf_uploader)
+        u_upload_destroy(This->constbuf_uploader);
 
     nine_bind(&This->record, NULL);
 
-    pipe_sampler_view_reference(&This->dummy_sampler, NULL);
+    pipe_sampler_view_reference(&This->dummy_sampler_view, NULL);
     pipe_resource_reference(&This->dummy_texture, NULL);
     pipe_resource_reference(&This->constbuf_vs, NULL);
     pipe_resource_reference(&This->constbuf_ps, NULL);
@@ -465,6 +455,7 @@
     FREE(This->state.vs_const_f);
     FREE(This->state.ps_const_f);
     FREE(This->state.vs_lconstf_temp);
+    FREE(This->state.ps_lconstf_temp);
 
     if (This->swapchains) {
         for (i = 0; i < This->nswapchains; ++i)
@@ -510,7 +501,7 @@
     return &This->caps;
 }
 
-static INLINE void
+static inline void
 NineDevice9_PauseRecording( struct NineDevice9 *This )
 {
     if (This->record) {
@@ -519,7 +510,7 @@
     }
 }
 
-static INLINE void
+static inline void
 NineDevice9_ResumeRecording( struct NineDevice9 *This )
 {
     if (This->record) {
@@ -547,10 +538,14 @@
 HRESULT WINAPI
 NineDevice9_EvictManagedResources( struct NineDevice9 *This )
 {
-    /* We don't really need to do anything here, but might want to free up
-     * the GPU virtual address space by killing pipe_resources.
-     */
-    STUB(D3D_OK);
+    struct NineBaseTexture9 *tex;
+
+    DBG("This=%p\n", This);
+    LIST_FOR_EACH_ENTRY(tex, &This->managed_textures, list2) {
+        NineBaseTexture9_UnLoad(tex);
+    }
+
+    return D3D_OK;
 }
 
 HRESULT WINAPI
@@ -599,11 +594,11 @@
                                  UINT YHotSpot,
                                  IDirect3DSurface9 *pCursorBitmap )
 {
-    /* TODO: hardware cursor */
     struct NineSurface9 *surf = NineSurface9(pCursorBitmap);
     struct pipe_context *pipe = This->pipe;
     struct pipe_box box;
     struct pipe_transfer *transfer;
+    BOOL hw_cursor;
     void *ptr;
 
     DBG_FLAG(DBG_SWAPCHAIN, "This=%p XHotSpot=%u YHotSpot=%u "
@@ -611,8 +606,15 @@
 
     user_assert(pCursorBitmap, D3DERR_INVALIDCALL);
 
-    This->cursor.w = MIN2(surf->desc.Width, This->cursor.image->width0);
-    This->cursor.h = MIN2(surf->desc.Height, This->cursor.image->height0);
+    if (This->swapchains[0]->params.Windowed) {
+        This->cursor.w = MIN2(surf->desc.Width, 32);
+        This->cursor.h = MIN2(surf->desc.Height, 32);
+        hw_cursor = 1; /* always use hw cursor for windowed mode */
+    } else {
+        This->cursor.w = MIN2(surf->desc.Width, This->cursor.image->width0);
+        This->cursor.h = MIN2(surf->desc.Height, This->cursor.image->height0);
+        hw_cursor = This->cursor.w == 32 && This->cursor.h == 32;
+    }
 
     u_box_origin_2d(This->cursor.w, This->cursor.h, &box);
 
@@ -643,16 +645,21 @@
                                  lock.pBits, lock.Pitch,
                                  This->cursor.w, This->cursor.h);
 
-        if (!This->cursor.software &&
-            This->cursor.w == 32 && This->cursor.h == 32)
-            ID3DPresent_SetCursor(This->swapchains[0]->present,
-                                  lock.pBits, &This->cursor.hotspot,
-                                  This->cursor.visible);
+        if (hw_cursor)
+            hw_cursor = ID3DPresent_SetCursor(This->swapchains[0]->present,
+                                              lock.pBits,
+                                              &This->cursor.hotspot,
+                                              This->cursor.visible) == D3D_OK;
 
         NineSurface9_UnlockRect(surf);
     }
     pipe->transfer_unmap(pipe, transfer);
 
+    /* hide cursor if we emulate it */
+    if (!hw_cursor)
+        ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, FALSE);
+    This->cursor.software = !hw_cursor;
+
     return D3D_OK;
 }
 
@@ -670,7 +677,7 @@
     This->cursor.pos.y = Y;
 
     if (!This->cursor.software)
-        ID3DPresent_SetCursorPos(swap->present, &This->cursor.pos);
+        This->cursor.software = ID3DPresent_SetCursorPos(swap->present, &This->cursor.pos) != D3D_OK;
 }
 
 BOOL WINAPI
@@ -683,7 +690,7 @@
 
     This->cursor.visible = bShow && (This->cursor.hotspot.x != -1);
     if (!This->cursor.software)
-        ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, bShow);
+        This->cursor.software = ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, bShow) != D3D_OK;
 
     return old;
 }
@@ -752,8 +759,8 @@
     for (i = 0; i < This->nswapchains; ++i) {
         D3DPRESENT_PARAMETERS *params = &pPresentationParameters[i];
         hr = NineSwapChain9_Resize(This->swapchains[i], params, NULL);
-        if (FAILED(hr))
-            return (hr == D3DERR_OUTOFVIDEOMEMORY) ? hr : D3DERR_DEVICELOST;
+        if (hr != D3D_OK)
+            return hr;
     }
 
     nine_pipe_context_clear(This);
@@ -1108,6 +1115,13 @@
     default: break;
     }
 
+    if (compressed_format(Format)) {
+        const unsigned w = util_format_get_blockwidth(templ.format);
+        const unsigned h = util_format_get_blockheight(templ.format);
+
+        user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL);
+    }
+
     if (Pool == D3DPOOL_DEFAULT && Format != D3DFMT_NULL) {
         /* resource_create doesn't return an error code, so check format here */
         user_assert(templ.format != PIPE_FORMAT_NONE, D3DERR_INVALIDCALL);
@@ -1173,6 +1187,8 @@
 {
     struct NineSurface9 *dst = NineSurface9(pDestinationSurface);
     struct NineSurface9 *src = NineSurface9(pSourceSurface);
+    int copy_width, copy_height;
+    RECT destRect;
 
     DBG("This=%p pSourceSurface=%p pDestinationSurface=%p "
         "pSourceRect=%p pDestPoint=%p\n", This,
@@ -1184,13 +1200,75 @@
     if (pDestPoint)
         DBG("pDestPoint = (%u,%u)\n", pDestPoint->x, pDestPoint->y);
 
+    user_assert(dst && src, D3DERR_INVALIDCALL);
+
     user_assert(dst->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
     user_assert(src->base.pool == D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL);
 
     user_assert(dst->desc.MultiSampleType == D3DMULTISAMPLE_NONE, D3DERR_INVALIDCALL);
     user_assert(src->desc.MultiSampleType == D3DMULTISAMPLE_NONE, D3DERR_INVALIDCALL);
 
-    return NineSurface9_CopySurface(dst, src, pDestPoint, pSourceRect);
+    user_assert(!src->lock_count, D3DERR_INVALIDCALL);
+    user_assert(!dst->lock_count, D3DERR_INVALIDCALL);
+
+    user_assert(dst->desc.Format == src->desc.Format, D3DERR_INVALIDCALL);
+    user_assert(!depth_stencil_format(dst->desc.Format), D3DERR_INVALIDCALL);
+
+    if (pSourceRect) {
+        copy_width = pSourceRect->right - pSourceRect->left;
+        copy_height = pSourceRect->bottom - pSourceRect->top;
+
+        user_assert(pSourceRect->left >= 0 &&
+                    copy_width > 0 &&
+                    pSourceRect->right <= src->desc.Width &&
+                    pSourceRect->top >= 0 &&
+                    copy_height > 0 &&
+                    pSourceRect->bottom <= src->desc.Height,
+                    D3DERR_INVALIDCALL);
+    } else {
+        copy_width = src->desc.Width;
+        copy_height = src->desc.Height;
+    }
+
+    destRect.right = copy_width;
+    destRect.bottom = copy_height;
+
+    if (pDestPoint) {
+        user_assert(pDestPoint->x >= 0 && pDestPoint->y >= 0,
+                    D3DERR_INVALIDCALL);
+        destRect.right += pDestPoint->x;
+        destRect.bottom += pDestPoint->y;
+    }
+
+    user_assert(destRect.right <= dst->desc.Width &&
+                destRect.bottom <= dst->desc.Height,
+                D3DERR_INVALIDCALL);
+
+    if (compressed_format(dst->desc.Format)) {
+        const unsigned w = util_format_get_blockwidth(dst->base.info.format);
+        const unsigned h = util_format_get_blockheight(dst->base.info.format);
+
+        if (pDestPoint) {
+            user_assert(!(pDestPoint->x % w) && !(pDestPoint->y % h),
+                        D3DERR_INVALIDCALL);
+        }
+
+        if (pSourceRect) {
+            user_assert(!(pSourceRect->left % w) && !(pSourceRect->top % h),
+                        D3DERR_INVALIDCALL);
+        }
+        if (!(copy_width == src->desc.Width &&
+              copy_width == dst->desc.Width &&
+              copy_height == src->desc.Height &&
+              copy_height == dst->desc.Height)) {
+            user_assert(!(copy_width  % w) && !(copy_height % h),
+                        D3DERR_INVALIDCALL);
+        }
+    }
+
+    NineSurface9_CopyMemToDefault(dst, src, pDestPoint, pSourceRect);
+
+    return D3D_OK;
 }
 
 HRESULT WINAPI
@@ -1202,6 +1280,7 @@
     struct NineBaseTexture9 *srcb = NineBaseTexture9(pSourceTexture);
     unsigned l, m;
     unsigned last_level = dstb->base.info.last_level;
+    RECT rect;
 
     DBG("This=%p pSourceTexture=%p pDestinationTexture=%p\n", This,
         pSourceTexture, pDestinationTexture);
@@ -1227,10 +1306,6 @@
 
     user_assert(dstb->base.type == srcb->base.type, D3DERR_INVALIDCALL);
 
-    /* TODO: We can restrict the update to the dirty portions of the source.
-     * Yes, this seems silly, but it's what MSDN says ...
-     */
-
     /* Find src level that matches dst level 0: */
     user_assert(srcb->base.info.width0 >= dstb->base.info.width0 &&
                 srcb->base.info.height0 >= dstb->base.info.height0 &&
@@ -1254,9 +1329,25 @@
         struct NineTexture9 *dst = NineTexture9(dstb);
         struct NineTexture9 *src = NineTexture9(srcb);
 
-        for (l = 0; l <= last_level; ++l, ++m)
-            NineSurface9_CopySurface(dst->surfaces[l],
-                                     src->surfaces[m], NULL, NULL);
+        if (src->dirty_rect.width == 0)
+            return D3D_OK;
+
+        pipe_box_to_rect(&rect, &src->dirty_rect);
+        for (l = 0; l < m; ++l)
+            rect_minify_inclusive(&rect);
+
+        for (l = 0; l <= last_level; ++l, ++m) {
+            fit_rect_format_inclusive(dst->base.base.info.format,
+                                      &rect,
+                                      dst->surfaces[l]->desc.Width,
+                                      dst->surfaces[l]->desc.Height);
+            NineSurface9_CopyMemToDefault(dst->surfaces[l],
+                                          src->surfaces[m],
+                                          (POINT *)&rect,
+                                          &rect);
+            rect_minify_inclusive(&rect);
+        }
+        u_box_origin_2d(0, 0, &src->dirty_rect);
     } else
     if (dstb->base.type == D3DRTYPE_CUBETEXTURE) {
         struct NineCubeTexture9 *dst = NineCubeTexture9(dstb);
@@ -1265,10 +1356,25 @@
 
         /* GPUs usually have them stored as arrays of mip-mapped 2D textures. */
         for (z = 0; z < 6; ++z) {
+            if (src->dirty_rect[z].width == 0)
+                continue;
+
+            pipe_box_to_rect(&rect, &src->dirty_rect[z]);
+            for (l = 0; l < m; ++l)
+                rect_minify_inclusive(&rect);
+
             for (l = 0; l <= last_level; ++l, ++m) {
-                NineSurface9_CopySurface(dst->surfaces[l * 6 + z],
-                                         src->surfaces[m * 6 + z], NULL, NULL);
+                fit_rect_format_inclusive(dst->base.base.info.format,
+                                          &rect,
+                                          dst->surfaces[l * 6 + z]->desc.Width,
+                                          dst->surfaces[l * 6 + z]->desc.Height);
+                NineSurface9_CopyMemToDefault(dst->surfaces[l * 6 + z],
+                                              src->surfaces[m * 6 + z],
+                                              (POINT *)&rect,
+                                              &rect);
+                rect_minify_inclusive(&rect);
             }
+            u_box_origin_2d(0, 0, &src->dirty_rect[z]);
             m -= l;
         }
     } else
@@ -1276,9 +1382,12 @@
         struct NineVolumeTexture9 *dst = NineVolumeTexture9(dstb);
         struct NineVolumeTexture9 *src = NineVolumeTexture9(srcb);
 
+        if (src->dirty_box.width == 0)
+            return D3D_OK;
         for (l = 0; l <= last_level; ++l, ++m)
-            NineVolume9_CopyVolume(dst->volumes[l],
-                                   src->volumes[m], 0, 0, 0, NULL);
+            NineVolume9_CopyMemToDefault(dst->volumes[l],
+                                         src->volumes[m], 0, 0, 0, NULL);
+        u_box_3d(0, 0, 0, 0, 0, 0, &src->dirty_box);
     } else{
         assert(!"invalid texture type");
     }
@@ -1308,7 +1417,12 @@
     user_assert(dst->desc.MultiSampleType < 2, D3DERR_INVALIDCALL);
     user_assert(src->desc.MultiSampleType < 2, D3DERR_INVALIDCALL);
 
-    return NineSurface9_CopySurface(dst, src, NULL, NULL);
+    user_assert(src->desc.Width == dst->desc.Width, D3DERR_INVALIDCALL);
+    user_assert(src->desc.Height == dst->desc.Height, D3DERR_INVALIDCALL);
+
+    NineSurface9_CopyDefaultToMem(dst, src);
+
+    return D3D_OK;
 }
 
 HRESULT WINAPI
@@ -1448,6 +1562,7 @@
     blit.filter = Filter == D3DTEXF_LINEAR ?
        PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST;
     blit.scissor_enable = FALSE;
+    blit.alpha_blend = FALSE;
 
     /* If both of a src and dst dimension are negative, flip them. */
     if (blit.dst.box.width < 0 && blit.src.box.width < 0) {
@@ -1464,8 +1579,12 @@
 
     user_assert(!scaled || dst != src, D3DERR_INVALIDCALL);
     user_assert(!scaled ||
-                !NineSurface9_IsOffscreenPlain(dst) ||
+                !NineSurface9_IsOffscreenPlain(dst), D3DERR_INVALIDCALL);
+    user_assert(!NineSurface9_IsOffscreenPlain(dst) ||
                 NineSurface9_IsOffscreenPlain(src), D3DERR_INVALIDCALL);
+    user_assert(NineSurface9_IsOffscreenPlain(dst) ||
+                dst->desc.Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL),
+                D3DERR_INVALIDCALL);
     user_assert(!scaled ||
                 (!util_format_is_compressed(dst->base.info.format) &&
                  !util_format_is_compressed(src->base.info.format)),
@@ -1561,11 +1680,8 @@
     }
     d3dcolor_to_pipe_color_union(&rgba, color);
 
-    fallback =
-        !This->screen->is_format_supported(This->screen, surf->base.info.format,
-                                           surf->base.info.target,
-                                           surf->base.info.nr_samples,
-                                           PIPE_BIND_RENDER_TARGET);
+    fallback = !(surf->base.info.bind & PIPE_BIND_RENDER_TARGET);
+
     if (!fallback) {
         psurf = NineSurface9_GetSurface(surf, 0);
         if (!psurf)
@@ -1774,7 +1890,7 @@
         return D3D_OK;
     d3dcolor_to_pipe_color_union(&rgba, Color);
 
-    nine_update_state(This, NINE_STATE_FB);
+    nine_update_state_framebuffer(This);
 
     rect.x1 = This->state.viewport.X;
     rect.y1 = This->state.viewport.Y;
@@ -2012,8 +2128,10 @@
             return E_OUTOFMEMORY;
         state->ff.num_lights = N;
 
-        for (; n < Index; ++n)
+        for (; n < Index; ++n) {
+            memset(&state->ff.light[n], 0, sizeof(D3DLIGHT9));
             state->ff.light[n].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
+        }
     }
     state->ff.light[Index] = *pLight;
 
@@ -2508,6 +2626,7 @@
                                   DWORD Value )
 {
     struct nine_state *state = This->update;
+    int bumpmap_index = -1;
 
     DBG("Stage=%u Type=%u Value=%08x\n", Stage, Type, Value);
     nine_dump_D3DTSS_value(DBG_FF, Type, Value);
@@ -2516,6 +2635,36 @@
     user_assert(Type < Elements(state->ff.tex_stage[0]), D3DERR_INVALIDCALL);
 
     state->ff.tex_stage[Stage][Type] = Value;
+    switch (Type) {
+    case D3DTSS_BUMPENVMAT00:
+        bumpmap_index = 4 * Stage;
+        break;
+    case D3DTSS_BUMPENVMAT10:
+        bumpmap_index = 4 * Stage + 1;
+        break;
+    case D3DTSS_BUMPENVMAT01:
+        bumpmap_index = 4 * Stage + 2;
+        break;
+    case D3DTSS_BUMPENVMAT11:
+        bumpmap_index = 4 * Stage + 3;
+        break;
+    case D3DTSS_BUMPENVLSCALE:
+        bumpmap_index = 4 * 8 + 2 * Stage;
+        break;
+    case D3DTSS_BUMPENVLOFFSET:
+        bumpmap_index = 4 * 8 + 2 * Stage + 1;
+        break;
+    case D3DTSS_TEXTURETRANSFORMFLAGS:
+        state->changed.group |= NINE_STATE_PS1X_SHADER;
+        break;
+    default:
+        break;
+    }
+
+    if (bumpmap_index >= 0) {
+        state->bumpmap_vars[bumpmap_index] = Value;
+        state->changed.group |= NINE_STATE_PS_CONST;
+    }
 
     state->changed.group |= NINE_STATE_FF_PSSTAGES;
     state->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32);
@@ -2560,12 +2709,11 @@
     if (Sampler >= D3DDMAPSAMPLER)
         Sampler = Sampler - D3DDMAPSAMPLER + NINE_MAX_SAMPLERS_PS;
 
-    state->samp[Sampler][Type] = Value;
-    state->changed.group |= NINE_STATE_SAMPLER;
-    state->changed.sampler[Sampler] |= 1 << Type;
-
-    if (Type == D3DSAMP_SRGBTEXTURE)
-        state->changed.srgb = TRUE;
+    if (state->samp[Sampler][Type] != Value || unlikely(This->is_recording)) {
+        state->samp[Sampler][Type] = Value;
+        state->changed.group |= NINE_STATE_SAMPLER;
+        state->changed.sampler[Sampler] |= 1 << Type;
+    }
 
     return D3D_OK;
 }
@@ -2697,7 +2845,7 @@
     STUB(0);
 }
 
-static INLINE void
+static inline void
 init_draw_info(struct pipe_draw_info *info,
                struct NineDevice9 *dev, D3DPRIMITIVETYPE type, UINT count)
 {
@@ -2724,7 +2872,7 @@
     DBG("iface %p, PrimitiveType %u, StartVertex %u, PrimitiveCount %u\n",
         This, PrimitiveType, StartVertex, PrimitiveCount);
 
-    nine_update_state(This, ~0);
+    nine_update_state(This);
 
     init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
     info.indexed = FALSE;
@@ -2757,7 +2905,7 @@
     user_assert(This->state.idxbuf, D3DERR_INVALIDCALL);
     user_assert(This->state.vdecl, D3DERR_INVALIDCALL);
 
-    nine_update_state(This, ~0);
+    nine_update_state(This);
 
     init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
     info.indexed = TRUE;
@@ -2789,7 +2937,7 @@
     user_assert(pVertexStreamZeroData && VertexStreamZeroStride,
                 D3DERR_INVALIDCALL);
 
-    nine_update_state(This, ~0);
+    nine_update_state(This);
 
     init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
     info.indexed = FALSE;
@@ -2803,13 +2951,16 @@
     vtxbuf.buffer = NULL;
     vtxbuf.user_buffer = pVertexStreamZeroData;
 
-    if (!This->driver_caps.user_vbufs)
-        u_upload_data(This->upload,
+    if (!This->driver_caps.user_vbufs) {
+        u_upload_data(This->vertex_uploader,
                       0,
                       (info.max_index + 1) * VertexStreamZeroStride, /* XXX */
                       vtxbuf.user_buffer,
                       &vtxbuf.buffer_offset,
                       &vtxbuf.buffer);
+        u_upload_unmap(This->vertex_uploader);
+        vtxbuf.user_buffer = NULL;
+    }
 
     This->pipe->set_vertex_buffers(This->pipe, 0, 1, &vtxbuf);
 
@@ -2851,7 +3002,7 @@
     user_assert(IndexDataFormat == D3DFMT_INDEX16 ||
                 IndexDataFormat == D3DFMT_INDEX32, D3DERR_INVALIDCALL);
 
-    nine_update_state(This, ~0);
+    nine_update_state(This);
 
     init_draw_info(&info, This, PrimitiveType, PrimitiveCount);
     info.indexed = TRUE;
@@ -2872,23 +3023,28 @@
 
     if (!This->driver_caps.user_vbufs) {
         const unsigned base = info.min_index * VertexStreamZeroStride;
-        u_upload_data(This->upload,
+        u_upload_data(This->vertex_uploader,
                       base,
                       (info.max_index -
                        info.min_index + 1) * VertexStreamZeroStride, /* XXX */
                       (const uint8_t *)vbuf.user_buffer + base,
                       &vbuf.buffer_offset,
                       &vbuf.buffer);
+        u_upload_unmap(This->vertex_uploader);
         /* Won't be used: */
         vbuf.buffer_offset -= base;
+        vbuf.user_buffer = NULL;
     }
-    if (!This->driver_caps.user_ibufs)
-        u_upload_data(This->upload,
+    if (!This->driver_caps.user_ibufs) {
+        u_upload_data(This->index_uploader,
                       0,
                       info.count * ibuf.index_size,
                       ibuf.user_buffer,
                       &ibuf.offset,
                       &ibuf.buffer);
+        u_upload_unmap(This->index_uploader);
+        ibuf.user_buffer = NULL;
+    }
 
     This->pipe->set_vertex_buffers(This->pipe, 0, 1, &vbuf);
     This->pipe->set_index_buffer(This->pipe, &ibuf);
@@ -2935,7 +3091,7 @@
     if (!screen->get_param(screen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS))
         STUB(D3DERR_INVALIDCALL);
 
-    nine_update_state(This, ~0);
+    nine_update_state(This);
 
     /* TODO: Create shader with stream output. */
     STUB(D3DERR_INVALIDCALL);
@@ -3105,6 +3261,13 @@
 
     DBG("This=%p pShader=%p\n", This, pShader);
 
+    if (!This->is_recording && state->vs == (struct NineVertexShader9*)pShader)
+      return D3D_OK;
+
+    /* ff -> non-ff: commit back non-ff constants */
+    if (!state->vs && pShader)
+        state->commit |= NINE_STATE_COMMIT_CONST_VS;
+
     nine_bind(&state->vs, pShader);
 
     state->changed.group |= NINE_STATE_VS;
@@ -3139,6 +3302,12 @@
        return D3D_OK;
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
+    if (!This->is_recording) {
+        if (!memcmp(&state->vs_const_f[StartRegister * 4], pConstantData,
+                    Vector4fCount * 4 * sizeof(state->vs_const_f[0])))
+            return D3D_OK;
+    }
+
     memcpy(&state->vs_const_f[StartRegister * 4],
            pConstantData,
            Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
@@ -3188,6 +3357,11 @@
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
     if (This->driver_caps.vs_integer) {
+        if (!This->is_recording) {
+            if (!memcmp(&state->vs_const_i[StartRegister][0], pConstantData,
+                        Vector4iCount * sizeof(state->vs_const_i[0])))
+                return D3D_OK;
+        }
         memcpy(&state->vs_const_i[StartRegister][0],
                pConstantData,
                Vector4iCount * sizeof(state->vs_const_i[0]));
@@ -3252,6 +3426,16 @@
     user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
+    if (!This->is_recording) {
+        bool noChange = true;
+        for (i = 0; i < BoolCount; i++) {
+            if (!!state->vs_const_b[StartRegister + i] != !!pConstantData[i])
+              noChange = false;
+        }
+        if (noChange)
+            return D3D_OK;
+    }
+
     for (i = 0; i < BoolCount; i++)
         state->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
 
@@ -3433,6 +3617,13 @@
 
     DBG("This=%p pShader=%p\n", This, pShader);
 
+    if (!This->is_recording && state->ps == (struct NinePixelShader9*)pShader)
+      return D3D_OK;
+
+    /* ff -> non-ff: commit back non-ff constants */
+    if (!state->ps && pShader)
+        state->commit |= NINE_STATE_COMMIT_CONST_PS;
+
     nine_bind(&state->ps, pShader);
 
     state->changed.group |= NINE_STATE_PS;
@@ -3473,6 +3664,12 @@
        return D3D_OK;
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
+    if (!This->is_recording) {
+        if (!memcmp(&state->ps_const_f[StartRegister * 4], pConstantData,
+                    Vector4fCount * 4 * sizeof(state->ps_const_f[0])))
+            return D3D_OK;
+    }
+
     memcpy(&state->ps_const_f[StartRegister * 4],
            pConstantData,
            Vector4fCount * 4 * sizeof(state->ps_const_f[0]));
@@ -3522,6 +3719,11 @@
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
     if (This->driver_caps.ps_integer) {
+        if (!This->is_recording) {
+            if (!memcmp(&state->ps_const_i[StartRegister][0], pConstantData,
+                        Vector4iCount * sizeof(state->ps_const_i[0])))
+                return D3D_OK;
+        }
         memcpy(&state->ps_const_i[StartRegister][0],
                pConstantData,
                Vector4iCount * sizeof(state->ps_const_i[0]));
@@ -3585,6 +3787,16 @@
     user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
+    if (!This->is_recording) {
+        bool noChange = true;
+        for (i = 0; i < BoolCount; i++) {
+            if (!!state->ps_const_b[StartRegister + i] != !!pConstantData[i])
+              noChange = false;
+        }
+        if (noChange)
+            return D3D_OK;
+    }
+
     for (i = 0; i < BoolCount; i++)
         state->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/device9ex.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/device9ex.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/device9ex.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/device9ex.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,7 +29,7 @@
 {
     struct NineDevice9 base;
 };
-static INLINE struct NineDevice9Ex *
+static inline struct NineDevice9Ex *
 NineDevice9Ex( void *data )
 {
     return (struct NineDevice9Ex *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/device9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/device9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/device9.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/device9.h	2015-09-16 14:36:09.000000000 +0000
@@ -69,6 +69,7 @@
     struct nine_state state;   /* device state */
 
     struct list_head update_textures;
+    struct list_head managed_textures;
 
     boolean is_recording;
     boolean in_scene;
@@ -83,7 +84,8 @@
     uint16_t max_ps_const_f;
 
     struct pipe_resource *dummy_texture;
-    struct pipe_sampler_view *dummy_sampler;
+    struct pipe_sampler_view *dummy_sampler_view;
+    struct pipe_sampler_state dummy_sampler_state;
 
     struct gen_mipmap_state *gen_mipmap;
 
@@ -113,6 +115,7 @@
     struct {
         boolean user_vbufs;
         boolean user_ibufs;
+        boolean user_cbufs;
         boolean window_space_position_support;
         boolean vs_integer;
         boolean ps_integer;
@@ -122,7 +125,9 @@
         boolean buggy_barycentrics;
     } driver_bugs;
 
-    struct u_upload_mgr *upload;
+    struct u_upload_mgr *vertex_uploader;
+    struct u_upload_mgr *index_uploader;
+    struct u_upload_mgr *constbuf_uploader;
 
     struct nine_range_pool range_pool;
 
@@ -132,7 +137,7 @@
      * is not bound to anything by the vertex declaration */
     struct pipe_resource *dummy_vbo;
 };
-static INLINE struct NineDevice9 *
+static inline struct NineDevice9 *
 NineDevice9( void *data )
 {
     return (struct NineDevice9 *)data;
@@ -180,10 +185,6 @@
 const D3DCAPS9 *
 NineDevice9_GetCaps( struct NineDevice9 *This );
 
-/* Mask: 0x1 = constant buffers, 0x2 = stipple */
-void
-NineDevice9_RestoreNonCSOState( struct NineDevice9 *This, unsigned mask );
-
 /*** Direct3D public ***/
 
 HRESULT WINAPI
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/device9video.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/device9video.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/device9video.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/device9video.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,7 +29,7 @@
 {
     struct NineUnknown base;
 };
-static INLINE struct NineDevice9Video *
+static inline struct NineDevice9Video *
 NineDevice9Video( void *data )
 {
     return (struct NineDevice9Video *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/indexbuffer9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/indexbuffer9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/indexbuffer9.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/indexbuffer9.h	2015-09-16 14:36:09.000000000 +0000
@@ -45,7 +45,7 @@
 
     D3DINDEXBUFFER_DESC desc;
 };
-static INLINE struct NineIndexBuffer9 *
+static inline struct NineIndexBuffer9 *
 NineIndexBuffer9( void *data )
 {
     return (struct NineIndexBuffer9 *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/iunknown.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/iunknown.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/iunknown.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/iunknown.h	2015-09-16 14:36:09.000000000 +0000
@@ -52,7 +52,7 @@
 
     void (*dtor)(void *data); /* top-level dtor */
 };
-static INLINE struct NineUnknown *
+static inline struct NineUnknown *
 NineUnknown( void *data )
 {
     return (struct NineUnknown *)data;
@@ -94,14 +94,14 @@
 
 /*** Nine private methods ***/
 
-static INLINE void
+static inline void
 NineUnknown_Destroy( struct NineUnknown *This )
 {
     assert(!(This->refs | This->bind));
     This->dtor(This);
 }
 
-static INLINE UINT
+static inline UINT
 NineUnknown_Bind( struct NineUnknown *This )
 {
     UINT b = ++This->bind;
@@ -113,7 +113,7 @@
     return b;
 }
 
-static INLINE UINT
+static inline UINT
 NineUnknown_Unbind( struct NineUnknown *This )
 {
     UINT b = --This->bind;
@@ -129,7 +129,7 @@
     return b;
 }
 
-static INLINE void
+static inline void
 NineUnknown_ConvertRefToBind( struct NineUnknown *This )
 {
     NineUnknown_Bind(This);
@@ -137,7 +137,7 @@
 }
 
 /* Detach from container. */
-static INLINE void
+static inline void
 NineUnknown_Detach( struct NineUnknown *This )
 {
     assert(This->container && !This->forward);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_dump.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_dump.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_dump.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_dump.h	2015-09-16 14:36:09.000000000 +0000
@@ -31,19 +31,19 @@
 
 #else /* !DEBUG */
 
-static INLINE void
+static inline void
 nine_dump_D3DADAPTER_IDENTIFIER9(unsigned ch, const D3DADAPTER_IDENTIFIER9 *id)
 { }
-static INLINE void
+static inline void
 nine_dump_D3DCAPS9(unsigned ch, const D3DCAPS9 *caps)
 { }
-static INLINE void
+static inline void
 nine_dump_D3DLIGHT9(unsigned ch, const D3DLIGHT9 *light)
 { }
-static INLINE void
+static inline void
 nine_dump_D3DMATERIAL9(unsigned ch, const D3DMATERIAL9 *mat)
 { }
-static INLINE void
+static inline void
 nine_dump_D3DTSS_value(unsigned ch, D3DTEXTURESTAGESTATETYPE tss, DWORD value)
 { }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nineexoverlayextension.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nineexoverlayextension.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nineexoverlayextension.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nineexoverlayextension.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,7 +29,7 @@
 {
     struct NineUnknown base;
 };
-static INLINE struct Nine9ExOverlayExtension *
+static inline struct Nine9ExOverlayExtension *
 Nine9ExOverlayExtension( void *data )
 {
     return (struct Nine9ExOverlayExtension *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_ff.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_ff.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_ff.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_ff.c	2015-09-16 14:36:09.000000000 +0000
@@ -22,6 +22,7 @@
 #include "tgsi/tgsi_dump.h"
 #include "util/u_box.h"
 #include "util/u_hash_table.h"
+#include "util/u_upload_mgr.h"
 
 #define NINE_TGSI_LAZY_DEVS 1
 
@@ -30,13 +31,6 @@
 #define NINE_FF_NUM_VS_CONST 256
 #define NINE_FF_NUM_PS_CONST 24
 
-#define NINED3DTSS_TCI_DISABLE                       0
-#define NINED3DTSS_TCI_PASSTHRU                      1
-#define NINED3DTSS_TCI_CAMERASPACENORMAL             2
-#define NINED3DTSS_TCI_CAMERASPACEPOSITION           3
-#define NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR   4
-#define NINED3DTSS_TCI_SPHEREMAP                     5
-
 struct fvec4
 {
     float x, y, z, w;
@@ -63,16 +57,20 @@
             uint32_t fog_range : 1;
             uint32_t color0in_one : 1;
             uint32_t color1in_one : 1;
-            uint32_t pad1 : 8;
-            uint32_t tc_gen : 24; /* 8 * 3 bits */
-            uint32_t pad2 : 8;
-            uint32_t tc_idx : 24;
+            uint32_t fog : 1;
+            uint32_t pad1 : 7;
+            uint32_t tc_dim_input: 16; /* 8 * 2 bits */
+            uint32_t pad2 : 16;
+            uint32_t tc_dim_output: 24; /* 8 * 3 bits */
             uint32_t pad3 : 8;
-            uint32_t tc_dim : 24; /* 8 * 3 bits */
+            uint32_t tc_gen : 24; /* 8 * 3 bits */
             uint32_t pad4 : 8;
+            uint32_t tc_idx : 24;
+            uint32_t pad5 : 8;
+            uint32_t passthrough;
         };
-        uint64_t value64[2]; /* don't forget to resize VertexShader9.ff_key */
-        uint32_t value32[4];
+        uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
+        uint32_t value32[6];
     };
 };
 
@@ -106,15 +104,18 @@
                 uint32_t alphaarg2 : 3;
                 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
                 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
-                uint32_t projected : 1;
+                uint32_t pad       : 1;
                 /* that's 32 bit exactly */
             } ts[8];
-            uint32_t fog : 1; /* for vFog with programmable VS */
+            uint32_t projected : 16;
+            uint32_t fog : 1; /* for vFog coming from VS */
             uint32_t fog_mode : 2;
-            uint32_t specular : 1; /* 9 32-bit words with this */
+            uint32_t specular : 1;
+            uint32_t pad1 : 12; /* 9 32-bit words with this */
             uint8_t colorarg_b4[3];
             uint8_t colorarg_b5[3];
             uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
+            uint8_t pad2[3];
         };
         uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
         uint32_t value32[12];
@@ -222,7 +223,6 @@
  * CONST[28].x___ RS.FogEnd
  * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
  * CONST[28].__z_ RS.FogDensity
- * CONST[29]      RS.FogColor
 
  * CONST[30].x___ TWEENFACTOR
  *
@@ -295,7 +295,7 @@
     struct ureg_src mtlE;
 };
 
-static INLINE unsigned
+static inline unsigned
 get_texcoord_sn(struct pipe_screen *screen)
 {
     if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
@@ -303,7 +303,7 @@
     return TGSI_SEMANTIC_GENERIC;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
 {
     const unsigned i = vs->num_inputs++;
@@ -313,7 +313,7 @@
 }
 
 /* NOTE: dst may alias src */
-static INLINE void
+static inline void
 ureg_normalize3(struct ureg_program *ureg,
                 struct ureg_dst dst, struct ureg_src src,
                 struct ureg_dst tmp)
@@ -334,16 +334,15 @@
 {
     const struct nine_ff_vs_key *key = vs->key;
     struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
-    struct ureg_dst oPos, oCol[2], oTex[8], oPsz, oFog;
-    struct ureg_dst rCol[2]; /* oCol if no fog, TEMP otherwise */
+    struct ureg_dst oPos, oCol[2], oPsz, oFog;
     struct ureg_dst rVtx, rNrm;
     struct ureg_dst r[8];
     struct ureg_dst AR;
-    struct ureg_dst tmp, tmp_x, tmp_z;
+    struct ureg_dst tmp, tmp_x, tmp_y, tmp_z;
     unsigned i, c;
     unsigned label[32], l = 0;
     unsigned num_r = 8;
-    boolean need_rNrm = key->lighting || key->pointscale;
+    boolean need_rNrm = key->lighting || key->pointscale || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
     boolean need_rVtx = key->lighting || key->fog_mode;
     const unsigned texcoord_sn = get_texcoord_sn(device->screen);
 
@@ -406,9 +405,9 @@
     if (key->vertexpointsize)
         vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
 
-    if (key->vertexblend_indexed)
+    if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
         vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
-    if (key->vertexblend)
+    if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
         vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
     if (key->vertextween) {
         vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
@@ -420,17 +419,16 @@
     oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
     oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
     oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
+    if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
+        oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0);
+        oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
+    }
 
     if (key->vertexpointsize || key->pointscale) {
-        oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0, TGSI_WRITEMASK_X);
+        oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
+                                       TGSI_WRITEMASK_X, 0, 1);
         oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
     }
-    if (key->fog_mode) {
-        /* We apply fog to the vertex colors, oFog is for programmable shaders only ?
-         */
-        oFog = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_WRITEMASK_X);
-        oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
-    }
 
     /* Declare TEMPs:
      */
@@ -438,18 +436,11 @@
         r[i] = ureg_DECL_local_temporary(ureg);
     tmp = r[0];
     tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
+    tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
     tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
     if (key->lighting || key->vertexblend)
         AR = ureg_DECL_address(ureg);
 
-    if (key->fog_mode) {
-        rCol[0] = r[2];
-        rCol[1] = r[3];
-    } else {
-        rCol[0] = oCol[0];
-        rCol[1] = oCol[1];
-    }
-
     rVtx = ureg_writemask(r[1], TGSI_WRITEMASK_XYZ);
     rNrm = ureg_writemask(r[2], TGSI_WRITEMASK_XYZ);
 
@@ -558,8 +549,6 @@
         ureg_CLAMP(ureg, oPsz, vs->aPsz, _XXXX(cPsz1), _YYYY(cPsz1));
 #endif
     } else if (key->pointscale) {
-        struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
-        struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
         struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
         struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
 
@@ -580,72 +569,85 @@
 #endif
     }
 
-    /* Texture coordinate generation:
-     * XXX: D3DTTFF_PROJECTED, transform matrix
-     */
     for (i = 0; i < 8; ++i) {
-        struct ureg_dst dst[5];
-        struct ureg_src src;
-        unsigned c;
+        struct ureg_dst oTex, input_coord, transformed, t;
+        unsigned c, writemask;
         const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
         const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
-        const unsigned dim = (key->tc_dim >> (i * 3)) & 0x7;
+        unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
+        const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
 
+        /* No texture output of index s */
         if (tci == NINED3DTSS_TCI_DISABLE)
             continue;
-        oTex[i] = ureg_DECL_output(ureg, texcoord_sn, i);
-
-        if (tci == NINED3DTSS_TCI_PASSTHRU)
-            vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
-
-        if (!dim) {
-            dst[c = 4] = oTex[i];
-        } else {
-            dst[4] = r[5];
-            src = ureg_src(dst[4]);
-            for (c = 0; c < (dim - 1); ++c)
-                dst[c] = ureg_writemask(tmp, (1 << dim) - 1);
-            dst[c] = ureg_writemask(oTex[i], (1 << dim) - 1);
-        }
+        oTex = ureg_DECL_output(ureg, texcoord_sn, i);
+        input_coord = r[5];
+        transformed = r[6];
 
+        /* Get the coordinate */
         switch (tci) {
         case NINED3DTSS_TCI_PASSTHRU:
-            ureg_MOV(ureg, dst[4], vs->aTex[idx]);
+            /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
+             * Else the idx is used only to determine wrapping mode. */
+            vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
+            ureg_MOV(ureg, input_coord, vs->aTex[idx]);
             break;
         case NINED3DTSS_TCI_CAMERASPACENORMAL:
-            assert(dim <= 3);
-            ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rNrm));
-            ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
+            ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rNrm));
+            ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
+            dim_input = 4;
             break;
         case NINED3DTSS_TCI_CAMERASPACEPOSITION:
-            ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rVtx));
-            ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
+            ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx));
+            ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
+            dim_input = 4;
             break;
         case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
             tmp.WriteMask = TGSI_WRITEMASK_XYZ;
             ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rNrm));
             ureg_MUL(ureg, tmp, ureg_src(rNrm), _X(tmp));
             ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
-            ureg_SUB(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rVtx), ureg_src(tmp));
-            ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
+            ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx), ureg_src(tmp));
+            ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
+            dim_input = 4;
             tmp.WriteMask = TGSI_WRITEMASK_XYZW;
             break;
         case NINED3DTSS_TCI_SPHEREMAP:
             assert(!"TODO");
             break;
         default:
+            assert(0);
             break;
         }
-        if (!dim)
-            continue;
-        dst[c].WriteMask = ~dst[c].WriteMask;
-        if (dst[c].WriteMask)
-            ureg_MOV(ureg, dst[c], src); /* store untransformed components */
-        dst[c].WriteMask = ~dst[c].WriteMask;
-        if (dim > 0) ureg_MUL(ureg, dst[0], _XXXX(src), _CONST(128 + i * 4));
-        if (dim > 1) ureg_MAD(ureg, dst[1], _YYYY(src), _CONST(129 + i * 4), ureg_src(tmp));
-        if (dim > 2) ureg_MAD(ureg, dst[2], _ZZZZ(src), _CONST(130 + i * 4), ureg_src(tmp));
-        if (dim > 3) ureg_MAD(ureg, dst[3], _WWWW(src), _CONST(131 + i * 4), ureg_src(tmp));
+
+        /* Apply the transformation */
+        /* dim_output == 0 => do not transform the components.
+         * XYZRHW also disables transformation */
+        if (!dim_output || key->position_t) {
+            transformed = input_coord;
+            writemask = TGSI_WRITEMASK_XYZW;
+        } else {
+            for (c = 0; c < dim_output; c++) {
+                t = ureg_writemask(transformed, 1 << c);
+                switch (dim_input) {
+                /* dim_input = 1 2 3: -> we add trailing 1 to input*/
+                case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
+                        break;
+                case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
+                        ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
+                        break;
+                case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
+                        ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
+                        break;
+                case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
+                default:
+                    assert(0);
+                }
+            }
+            writemask = (1 << dim_output) - 1;
+        }
+
+        ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
     }
 
     /* === Lighting:
@@ -690,8 +692,6 @@
      * specular += light.specular * atten * powFact;
      */
     if (key->lighting) {
-        struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
-
         struct ureg_dst rAtt = ureg_writemask(r[1], TGSI_WRITEMASK_W);
         struct ureg_dst rHit = ureg_writemask(r[3], TGSI_WRITEMASK_XYZ);
         struct ureg_dst rMid = ureg_writemask(r[4], TGSI_WRITEMASK_XYZ);
@@ -849,22 +849,22 @@
             ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
             ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W  ), vs->mtlA, vs->mtlE);
         }
-        ureg_MAD(ureg, rCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
-        ureg_MUL(ureg, rCol[1], ureg_src(rS), vs->mtlS);
+        ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
+        ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
     } else
     /* COLOR */
     if (key->darkness) {
         if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
-            ureg_MAD(ureg, rCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19));
+            ureg_MAD(ureg, oCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19));
         } else {
-            ureg_MAD(ureg, ureg_writemask(rCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
+            ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
             ureg_ADD(ureg, ureg_writemask(tmp,     TGSI_WRITEMASK_W), vs->mtlA, vs->mtlE);
-            ureg_ADD(ureg, ureg_writemask(rCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp));
+            ureg_ADD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp));
         }
-        ureg_MUL(ureg, rCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS);
+        ureg_MUL(ureg, oCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS);
     } else {
-        ureg_MOV(ureg, rCol[0], vs->aCol[0]);
-        ureg_MOV(ureg, rCol[1], vs->aCol[1]);
+        ureg_MOV(ureg, oCol[0], vs->aCol[0]);
+        ureg_MOV(ureg, oCol[1], vs->aCol[1]);
     }
 
     /* === Process fog.
@@ -872,10 +872,6 @@
      * exp(x) = ex2(log2(e) * x)
      */
     if (key->fog_mode) {
-        /* Fog doesn't affect alpha, TODO: combine with light code output */
-        ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), _W(rCol[0]));
-        ureg_MOV(ureg, ureg_writemask(oCol[1], TGSI_WRITEMASK_W), _W(rCol[1]));
-
         if (key->position_t) {
             ureg_MOV(ureg, ureg_saturate(tmp_x), ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
         } else
@@ -903,10 +899,58 @@
             ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
         }
         ureg_MOV(ureg, oFog, _X(tmp));
-        ureg_LRP(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _X(tmp), ureg_src(rCol[0]), _CONST(29));
-        ureg_LRP(ureg, ureg_writemask(oCol[1], TGSI_WRITEMASK_XYZ), _X(tmp), ureg_src(rCol[1]), _CONST(29));
+    } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
+        ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
+    }
+
+    if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
+        struct ureg_src input;
+        struct ureg_dst output;
+        input = vs->aWgt;
+        output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18);
+        ureg_MOV(ureg, output, input);
+    }
+    if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
+        struct ureg_src input;
+        struct ureg_dst output;
+        input = vs->aInd;
+        output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
+        ureg_MOV(ureg, output, input);
+    }
+    if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
+        struct ureg_src input;
+        struct ureg_dst output;
+        input = vs->aNrm;
+        output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
+        ureg_MOV(ureg, output, input);
+    }
+    if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
+        struct ureg_src input;
+        struct ureg_dst output;
+        input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
+        output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
+        ureg_MOV(ureg, output, input);
+    }
+    if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
+        struct ureg_src input;
+        struct ureg_dst output;
+        input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
+        output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
+        ureg_MOV(ureg, output, input);
+    }
+    if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
+        struct ureg_src input;
+        struct ureg_dst output;
+        input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
+        input = ureg_scalar(input, TGSI_SWIZZLE_X);
+        output = oFog;
+        ureg_MOV(ureg, output, input);
+    }
+    if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
+        (void) 0; /* TODO: replace z of position output ? */
     }
 
+
     if (key->position_t && device->driver_caps.window_space_position_support)
         ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
 
@@ -1031,7 +1075,7 @@
     }
 }
 
-static INLINE boolean
+static inline boolean
 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
 {
     return !dst.WriteMask ||
@@ -1268,10 +1312,18 @@
             if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
                 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
             }
-            if (key->ts[s].projected)
-                ureg_TXP(ureg, ps.rTex, target, ps.vT[s], ps.s[s]);
-            else
+            if (key->projected & (3 << (s *2))) {
+                unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
+                if (dim == 4)
+                    ureg_TXP(ureg, ps.rTex, target, ps.vT[s], ps.s[s]);
+                else {
+                    ureg_RCP(ureg, ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X), ureg_scalar(ps.vT[s], dim-1));
+                    ureg_MUL(ureg, ps.rTmp, _XXXX(ps.rTmpSrc), ps.vT[s]);
+                    ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
+                }
+            } else {
                 ureg_TEX(ureg, ps.rTex, target, ps.vT[s], ps.s[s]);
+            }
         }
 
         if (s == 0 &&
@@ -1314,6 +1366,10 @@
             colorarg[2] != alphaarg[2])
             dst.WriteMask = TGSI_WRITEMASK_XYZ;
 
+        /* Special DOTPRODUCT behaviour (see wine tests) */
+        if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
+            dst.WriteMask = TGSI_WRITEMASK_XYZW;
+
         if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
         if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
         if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
@@ -1404,12 +1460,18 @@
             else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
                 s = usage / NINE_DECLUSAGE_COUNT;
                 if (s < 8)
-                    input_texture_coord[s] = 1;
+                    input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type);
                 else
                     DBG("FF given texture coordinate >= 8. Ignoring\n");
-            }
+            } else if (usage < NINE_DECLUSAGE_NONE)
+                key.passthrough |= 1 << usage;
         }
     }
+    /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
+     * We do restrict to indices 0 */
+    key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
+                         (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
+                         (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
     if (!key.vertexpointsize)
         key.pointscale = !!state->rs[D3DRS_POINTSCALEENABLE];
 
@@ -1425,6 +1487,7 @@
         key.mtl_specular = state->rs[D3DRS_SPECULARMATERIALSOURCE];
         key.mtl_emissive = state->rs[D3DRS_EMISSIVEMATERIALSOURCE];
     }
+    key.fog = !!state->rs[D3DRS_FOGENABLE];
     key.fog_mode = state->rs[D3DRS_FOGENABLE] ? state->rs[D3DRS_FOGVERTEXMODE] : 0;
     if (key.fog_mode)
         key.fog_range = !key.position_t && state->rs[D3DRS_RANGEFOGENABLE];
@@ -1446,7 +1509,7 @@
 
     for (s = 0; s < 8; ++s) {
         unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
-        unsigned dim = MIN2(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7, 4);
+        unsigned dim;
 
         if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
             gen = NINED3DTSS_TCI_PASSTHRU;
@@ -1456,7 +1519,14 @@
 
         key.tc_gen |= gen << (s * 3);
         key.tc_idx |= (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7) << (s * 3);
-        key.tc_dim |= dim << (s * 3);
+        key.tc_dim_input |= ((input_texture_coord[s]-1) & 0x3) << (s * 2);
+
+        dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
+        if (dim > 4)
+            dim = input_texture_coord[s];
+        if (dim == 1) /* NV behaviour */
+            dim = 0;
+        key.tc_dim_output |= dim << (s * 3);
     }
 
     vs = util_hash_table_get(device->ff.ht_vs, &key);
@@ -1471,6 +1541,7 @@
         memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
 
         err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs);
+        (void)err;
         assert(err == PIPE_OK);
         device->ff.num_vs++;
         NineUnknown_ConvertRefToBind(NineUnknown(vs));
@@ -1541,8 +1612,6 @@
         }
         key.ts[s].resultarg = state->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
 
-        key.ts[s].projected = !!(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & D3DTTFF_PROJECTED);
-
         if (state->texture[s]) {
             switch (state->texture[s]->base.type) {
             case D3DRTYPE_TEXTURE:       key.ts[s].textarget = 1; break;
@@ -1556,10 +1625,14 @@
             key.ts[s].textarget = 1;
         }
     }
+
+    key.projected = nine_ff_get_projected_key(state);
+
     for (; s < 8; ++s)
         key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
     if (state->rs[D3DRS_FOGENABLE])
         key.fog_mode = state->rs[D3DRS_FOGTABLEMODE];
+    key.fog = !!state->rs[D3DRS_FOGENABLE];
 
     ps = util_hash_table_get(device->ff.ht_ps, &key);
     if (ps)
@@ -1571,6 +1644,7 @@
         memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
 
         err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps);
+        (void)err;
         assert(err == PIPE_OK);
         device->ff.num_ps++;
         NineUnknown_ConvertRefToBind(NineUnknown(ps));
@@ -1687,7 +1761,6 @@
     if (isinf(dst[28].y))
         dst[28].y = 0.0f;
     dst[28].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
-    d3dcolor_to_rgba(&dst[29].x, state->rs[D3DRS_FOGCOLOR]);
 }
 
 static void
@@ -1701,7 +1774,7 @@
         return;
     for (s = 0; s < 8; ++s) {
         if (IS_D3DTS_DIRTY(state, TEXTURE0 + s))
-            M[32 + s] = *nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE);
+            nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE));
     }
 }
 
@@ -1760,28 +1833,22 @@
 void
 nine_ff_update(struct NineDevice9 *device)
 {
-    struct pipe_context *pipe = device->pipe;
     struct nine_state *state = &device->state;
+    struct pipe_constant_buffer cb;
 
     DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps);
 
     /* NOTE: the only reference belongs to the hash table */
-    if (!device->state.vs)
+    if (!device->state.vs) {
         device->ff.vs = nine_ff_get_vs(device);
-    if (!device->state.ps)
+        device->state.changed.group |= NINE_STATE_VS;
+    }
+    if (!device->state.ps) {
         device->ff.ps = nine_ff_get_ps(device);
+        device->state.changed.group |= NINE_STATE_PS;
+    }
 
     if (!device->state.vs) {
-        if (device->state.ff.clobber.vs_const) {
-            device->state.ff.clobber.vs_const = FALSE;
-            device->state.changed.group |=
-                NINE_STATE_FF_VSTRANSF |
-                NINE_STATE_FF_MATERIAL |
-                NINE_STATE_FF_LIGHTING |
-                NINE_STATE_FF_OTHER;
-            device->state.ff.changed.transform[0] |= 0xff000c;
-            device->state.ff.changed.transform[8] |= 0xff;
-        }
         nine_ff_load_vs_transforms(device);
         nine_ff_load_tex_matrices(device);
         nine_ff_load_lights(device);
@@ -1790,57 +1857,45 @@
 
         memset(state->ff.changed.transform, 0, sizeof(state->ff.changed.transform));
 
-        device->state.changed.group |= NINE_STATE_VS;
-        device->state.changed.group |= NINE_STATE_VS_CONST;
-
-        if (device->prefer_user_constbuf) {
-            struct pipe_context *pipe = device->pipe;
-            struct pipe_constant_buffer cb;
-            cb.buffer_offset = 0;
-            cb.buffer = NULL;
-            cb.user_buffer = device->ff.vs_const;
-            cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
-            pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb);
-        } else {
-            struct pipe_box box;
-            u_box_1d(0, NINE_FF_NUM_VS_CONST * 4 * sizeof(float), &box);
-            pipe->transfer_inline_write(pipe, device->constbuf_vs, 0,
-                                        0, &box,
-                                        device->ff.vs_const, 0, 0);
-            nine_ranges_insert(&device->state.changed.vs_const_f, 0, NINE_FF_NUM_VS_CONST,
-                               &device->range_pool);
+        cb.buffer_offset = 0;
+        cb.buffer = NULL;
+        cb.user_buffer = device->ff.vs_const;
+        cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
+
+        if (!device->driver_caps.user_cbufs) {
+            u_upload_data(device->constbuf_uploader,
+                          0,
+                          cb.buffer_size,
+                          cb.user_buffer,
+                          &cb.buffer_offset,
+                          &cb.buffer);
+            u_upload_unmap(device->constbuf_uploader);
+            cb.user_buffer = NULL;
         }
+        state->pipe.cb_vs_ff = cb;
+        state->commit |= NINE_STATE_COMMIT_CONST_VS;
     }
 
     if (!device->state.ps) {
-        if (device->state.ff.clobber.ps_const) {
-            device->state.ff.clobber.ps_const = FALSE;
-            device->state.changed.group |=
-                NINE_STATE_FF_PSSTAGES |
-                NINE_STATE_FF_OTHER;
-        }
         nine_ff_load_ps_params(device);
 
-        device->state.changed.group |= NINE_STATE_PS;
-        device->state.changed.group |= NINE_STATE_PS_CONST;
-
-        if (device->prefer_user_constbuf) {
-            struct pipe_context *pipe = device->pipe;
-            struct pipe_constant_buffer cb;
-            cb.buffer_offset = 0;
-            cb.buffer = NULL;
-            cb.user_buffer = device->ff.ps_const;
-            cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
-            pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
-        } else {
-            struct pipe_box box;
-            u_box_1d(0, NINE_FF_NUM_PS_CONST * 4 * sizeof(float), &box);
-            pipe->transfer_inline_write(pipe, device->constbuf_ps, 0,
-                                        0, &box,
-                                        device->ff.ps_const, 0, 0);
-            nine_ranges_insert(&device->state.changed.ps_const_f, 0, NINE_FF_NUM_PS_CONST,
-                               &device->range_pool);
+        cb.buffer_offset = 0;
+        cb.buffer = NULL;
+        cb.user_buffer = device->ff.ps_const;
+        cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
+
+        if (!device->driver_caps.user_cbufs) {
+            u_upload_data(device->constbuf_uploader,
+                          0,
+                          cb.buffer_size,
+                          cb.user_buffer,
+                          &cb.buffer_offset,
+                          &cb.buffer);
+            u_upload_unmap(device->constbuf_uploader);
+            cb.user_buffer = NULL;
         }
+        state->pipe.cb_ps_ff = cb;
+        state->commit |= NINE_STATE_COMMIT_CONST_PS;
     }
 
     device->state.changed.group &= ~NINE_STATE_FF;
@@ -1971,7 +2026,7 @@
 }
 */
 
-static INLINE float
+static inline float
 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
 {
     return A->m[r][0] * B->m[0][c] +
@@ -1980,7 +2035,7 @@
            A->m[r][3] * B->m[3][c];
 }
 
-static INLINE float
+static inline float
 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
 {
     return v->x * M->m[0][c] +
@@ -1989,7 +2044,7 @@
            1.0f * M->m[3][c];
 }
 
-static INLINE float
+static inline float
 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
 {
     return v->x * M->m[0][c] +
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_ff.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_ff.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_ff.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_ff.h	2015-09-16 14:36:09.000000000 +0000
@@ -3,6 +3,7 @@
 #define _NINE_FF_H_
 
 #include "device9.h"
+#include "vertexdeclaration9.h"
 
 boolean nine_ff_init(struct NineDevice9 *);
 void    nine_ff_fini(struct NineDevice9 *);
@@ -29,4 +30,84 @@
 void
 nine_d3d_matrix_transpose(D3DMATRIX *, const D3DMATRIX *);
 
+#define NINED3DTSS_TCI_DISABLE                       0
+#define NINED3DTSS_TCI_PASSTHRU                      1
+#define NINED3DTSS_TCI_CAMERASPACENORMAL             2
+#define NINED3DTSS_TCI_CAMERASPACEPOSITION           3
+#define NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR   4
+#define NINED3DTSS_TCI_SPHEREMAP                     5
+
+static inline unsigned
+nine_decltype_get_dim(BYTE type)
+{
+    switch (type) {
+    case D3DDECLTYPE_FLOAT1: return 1;
+    case D3DDECLTYPE_FLOAT2: return 2;
+    case D3DDECLTYPE_FLOAT3: return 3;
+    case D3DDECLTYPE_FLOAT4: return 4;
+    case D3DDECLTYPE_D3DCOLOR: return 1;
+    case D3DDECLTYPE_UBYTE4: return 4;
+    case D3DDECLTYPE_SHORT2: return 2;
+    case D3DDECLTYPE_SHORT4: return 4;
+    case D3DDECLTYPE_UBYTE4N: return 4;
+    case D3DDECLTYPE_SHORT2N: return 2;
+    case D3DDECLTYPE_SHORT4N: return 4;
+    case D3DDECLTYPE_USHORT2N: return 2;
+    case D3DDECLTYPE_USHORT4N: return 4;
+    case D3DDECLTYPE_UDEC3: return 3;
+    case D3DDECLTYPE_DEC3N: return 3;
+    case D3DDECLTYPE_FLOAT16_2: return 2;
+    case D3DDECLTYPE_FLOAT16_4: return 4;
+    default:
+        assert(!"Implementation error !");
+    }
+    return 0;
+}
+
+static inline uint16_t
+nine_ff_get_projected_key(struct nine_state *state)
+{
+    unsigned s, i;
+    uint16_t projected = 0;
+    char input_texture_coord[8];
+    memset(&input_texture_coord, 0, sizeof(input_texture_coord));
+
+    if (state->vdecl) {
+        for (i = 0; i < state->vdecl->nelems; i++) {
+            uint16_t usage = state->vdecl->usage_map[i];
+            if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
+                s = usage / NINE_DECLUSAGE_COUNT;
+                if (s < 8)
+                    input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type);
+            }
+        }
+    }
+
+    for (s = 0; s < 8; ++s) {
+        unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
+        unsigned dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
+        unsigned proj = !!(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & D3DTTFF_PROJECTED);
+
+        if (!state->vs) {
+            if (dim > 4)
+                dim = input_texture_coord[s];
+
+            if (!dim && gen == NINED3DTSS_TCI_PASSTHRU)
+                dim = input_texture_coord[s];
+            else if (!dim)
+                dim = 4;
+
+            if (dim == 1) /* NV behaviour */
+                proj = 0;
+            if (dim > input_texture_coord[s] && gen == NINED3DTSS_TCI_PASSTHRU)
+                proj = 0;
+        } else {
+            dim = 4;
+        }
+        if (proj)
+            projected |= (dim-1) << (2 * s);
+    }
+    return projected;
+}
+
 #endif /* _NINE_FF_H_ */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_helpers.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_helpers.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_helpers.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_helpers.c	2015-09-16 14:36:09.000000000 +0000
@@ -49,7 +49,7 @@
     return pool->free;
 }
 
-static INLINE struct nine_range *
+static inline struct nine_range *
 nine_range_pool_get(struct nine_range_pool *pool, int16_t bgn, int16_t end)
 {
     struct nine_range *r = pool->free;
@@ -62,7 +62,7 @@
     return r;
 }
 
-static INLINE void
+static inline void
 nine_ranges_coalesce(struct nine_range *r, struct nine_range_pool *pool)
 {
     struct nine_range *n;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_helpers.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_helpers.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_helpers.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_helpers.h	2015-09-16 14:36:09.000000000 +0000
@@ -123,7 +123,7 @@
     } \
     return D3D_OK
 
-static INLINE float asfloat(DWORD value)
+static inline float asfloat(DWORD value)
 {
     union {
         float f;
@@ -149,14 +149,14 @@
     unsigned num_slabs_max;
 };
 
-static INLINE void
+static inline void
 nine_range_pool_put(struct nine_range_pool *pool, struct nine_range *r)
 {
     r->next = pool->free;
     pool->free = r;
 }
 
-static INLINE void
+static inline void
 nine_range_pool_put_chain(struct nine_range_pool *pool,
                           struct nine_range *head,
                           struct nine_range *tail)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_pipe.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_pipe.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_pipe.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_pipe.c	2015-09-16 14:36:09.000000000 +0000
@@ -27,7 +27,8 @@
 #include "cso_cache/cso_context.h"
 
 void
-nine_convert_dsa_state(struct cso_context *ctx, const DWORD *rs)
+nine_convert_dsa_state(struct pipe_depth_stencil_alpha_state *dsa_state,
+                       const DWORD *rs)
 {
     struct pipe_depth_stencil_alpha_state dsa;
 
@@ -65,16 +66,15 @@
         dsa.alpha.ref_value = (float)rs[D3DRS_ALPHAREF] / 255.0f;
     }
 
-    cso_set_depth_stencil_alpha(ctx, &dsa);
+    *dsa_state = dsa;
 }
 
-/* TODO: Keep a static copy in device so we don't have to memset every time ? */
 void
-nine_convert_rasterizer_state(struct cso_context *ctx, const DWORD *rs)
+nine_convert_rasterizer_state(struct pipe_rasterizer_state *rast_state, const DWORD *rs)
 {
     struct pipe_rasterizer_state rast;
 
-    memset(&rast, 0, sizeof(rast)); /* memcmp safety */
+    memset(&rast, 0, sizeof(rast));
 
     rast.flatshade = rs[D3DRS_SHADEMODE] == D3DSHADE_FLAT;
  /* rast.light_twoside = 0; */
@@ -92,7 +92,7 @@
  /* rast.poly_stipple_enable = 0; */
  /* rast.point_smooth = 0; */
     rast.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
-    rast.point_quad_rasterization = !!rs[D3DRS_POINTSPRITEENABLE];
+    rast.point_quad_rasterization = 1;
     rast.point_size_per_vertex = rs[NINED3DRS_VSPOINTSIZE];
     rast.multisample = !!rs[D3DRS_MULTISAMPLEANTIALIAS];
     rast.line_smooth = !!rs[D3DRS_ANTIALIASEDLINEENABLE];
@@ -110,15 +110,31 @@
  /* rast.line_stipple_pattern = 0; */
     rast.sprite_coord_enable = rs[D3DRS_POINTSPRITEENABLE] ? 0xff : 0x00;
     rast.line_width = 1.0f;
-    rast.point_size = rs[NINED3DRS_VSPOINTSIZE] ? 1.0f : asfloat(rs[D3DRS_POINTSIZE]); /* XXX: D3DRS_POINTSIZE_MIN/MAX */
-    rast.offset_units = asfloat(rs[D3DRS_DEPTHBIAS]) * asfloat(rs[NINED3DRS_ZBIASSCALE]);
+    if (rs[NINED3DRS_VSPOINTSIZE]) {
+        rast.point_size = 1.0f;
+    } else {
+        rast.point_size = CLAMP(asfloat(rs[D3DRS_POINTSIZE]),
+                asfloat(rs[D3DRS_POINTSIZE_MIN]),
+                asfloat(rs[D3DRS_POINTSIZE_MAX]));
+    }
+    /* offset_units has the ogl/d3d11 meaning.
+     * d3d9: offset = scale * dz + bias
+     * ogl/d3d11: offset = scale * dz + r * bias
+     * with r implementation dependant and is supposed to be
+     * the smallest value the depth buffer format can hold.
+     * In practice on current and past hw it seems to be 2^-23
+     * for all formats except float formats where it varies depending
+     * on the content.
+     * For now use 1 << 23, but in the future perhaps add a way in gallium
+     * to get r for the format or get the gallium behaviour */
+    rast.offset_units = asfloat(rs[D3DRS_DEPTHBIAS]) * (float)(1 << 23);
     rast.offset_scale = asfloat(rs[D3DRS_SLOPESCALEDEPTHBIAS]);
  /* rast.offset_clamp = 0.0f; */
 
-    cso_set_rasterizer(ctx, &rast);
+    *rast_state = rast;
 }
 
-static INLINE void
+static inline void
 nine_convert_blend_state_fixup(struct pipe_blend_state *blend, const DWORD *rs)
 {
     if (unlikely(rs[D3DRS_SRCBLEND] == D3DBLEND_BOTHSRCALPHA ||
@@ -137,7 +153,7 @@
 }
 
 void
-nine_convert_blend_state(struct cso_context *ctx, const DWORD *rs)
+nine_convert_blend_state(struct pipe_blend_state *blend_state, const DWORD *rs)
 {
     struct pipe_blend_state blend;
 
@@ -181,7 +197,7 @@
 
     /* blend.force_srgb = !!rs[D3DRS_SRGBWRITEENABLE]; */
 
-    cso_set_blend(ctx, &blend);
+    *blend_state = blend;
 }
 
 void
@@ -239,8 +255,8 @@
     cso_set_samplers(cso, PIPE_SHADER_VERTEX, 0, NULL);
     cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 0, NULL);
 
-    pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
-    pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, 0, NULL);
+    cso_set_sampler_views(cso, PIPE_SHADER_VERTEX, 0, NULL);
+    cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 0, NULL);
 
     pipe->set_vertex_buffers(pipe, 0, This->caps.MaxStreams, NULL);
     pipe->set_index_buffer(pipe, NULL);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_pipe.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_pipe.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_pipe.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_pipe.h	2015-09-16 14:36:09.000000000 +0000
@@ -27,6 +27,7 @@
 #include "pipe/p_format.h"
 #include "pipe/p_screen.h"
 #include "pipe/p_state.h" /* pipe_box */
+#include "util/macros.h"
 #include "util/u_rect.h"
 #include "util/u_format.h"
 #include "nine_helpers.h"
@@ -36,14 +37,14 @@
 extern const enum pipe_format nine_d3d9_to_pipe_format_map[120];
 extern const D3DFORMAT nine_pipe_to_d3d9_format_map[PIPE_FORMAT_COUNT];
 
-void nine_convert_dsa_state(struct cso_context *, const DWORD *);
-void nine_convert_rasterizer_state(struct cso_context *, const DWORD *);
-void nine_convert_blend_state(struct cso_context *, const DWORD *);
+void nine_convert_dsa_state(struct pipe_depth_stencil_alpha_state *, const DWORD *);
+void nine_convert_rasterizer_state(struct pipe_rasterizer_state *, const DWORD *);
+void nine_convert_blend_state(struct pipe_blend_state *, const DWORD *);
 void nine_convert_sampler_state(struct cso_context *, int idx, const DWORD *);
 
 void nine_pipe_context_clear(struct NineDevice9 *);
 
-static INLINE unsigned d3dlock_buffer_to_pipe_transfer_usage(DWORD Flags)
+static inline unsigned d3dlock_buffer_to_pipe_transfer_usage(DWORD Flags)
 {
     unsigned usage;
 
@@ -70,7 +71,7 @@
     return usage;
 }
 
-static INLINE void
+static inline void
 rect_to_pipe_box(struct pipe_box *dst, const RECT *src)
 {
     dst->x = src->left;
@@ -81,7 +82,50 @@
     dst->depth = 1;
 }
 
-static INLINE boolean
+static inline void
+pipe_box_to_rect(RECT *dst, const struct pipe_box *src)
+{
+    dst->left = src->x;
+    dst->right = src->x + src->width;
+    dst->top = src->y;
+    dst->bottom = src->y + src->height;
+}
+
+static inline void
+rect_minify_inclusive(RECT *rect)
+{
+    rect->left = rect->left >> 2;
+    rect->top = rect->top >> 2;
+    rect->right = DIV_ROUND_UP(rect->right, 2);
+    rect->bottom = DIV_ROUND_UP(rect->bottom, 2);
+}
+
+/* We suppose:
+ * 0 <= rect->left < rect->right
+ * 0 <= rect->top < rect->bottom
+ */
+static inline void
+fit_rect_format_inclusive(enum pipe_format format, RECT *rect, int width, int height)
+{
+    const unsigned w = util_format_get_blockwidth(format);
+    const unsigned h = util_format_get_blockheight(format);
+
+    if (util_format_is_compressed(format)) {
+        rect->left = rect->left - rect->left % w;
+        rect->top = rect->top - rect->top % h;
+        rect->right = (rect->right % w) == 0 ?
+            rect->right :
+            rect->right - (rect->right % w) + w;
+        rect->bottom = (rect->bottom % h) == 0 ?
+            rect->bottom :
+            rect->bottom - (rect->bottom % h) + h;
+    }
+
+    rect->right = MIN2(rect->right, width);
+    rect->bottom = MIN2(rect->bottom, height);
+}
+
+static inline boolean
 rect_to_pipe_box_clamp(struct pipe_box *dst, const RECT *src)
 {
     rect_to_pipe_box(dst, src);
@@ -95,7 +139,7 @@
     return FALSE;
 }
 
-static INLINE boolean
+static inline boolean
 rect_to_pipe_box_flip(struct pipe_box *dst, const RECT *src)
 {
     rect_to_pipe_box(dst, src);
@@ -107,7 +151,7 @@
     return TRUE;
 }
 
-static INLINE void
+static inline void
 rect_to_pipe_box_xy_only(struct pipe_box *dst, const RECT *src)
 {
     user_warn(src->left > src->right || src->top > src->bottom);
@@ -118,7 +162,7 @@
     dst->height = src->bottom - src->top;
 }
 
-static INLINE boolean
+static inline boolean
 rect_to_pipe_box_xy_only_clamp(struct pipe_box *dst, const RECT *src)
 {
     rect_to_pipe_box_xy_only(dst, src);
@@ -132,7 +176,7 @@
     return FALSE;
 }
 
-static INLINE void
+static inline void
 rect_to_g3d_u_rect(struct u_rect *dst, const RECT *src)
 {
     user_warn(src->left > src->right || src->top > src->bottom);
@@ -143,7 +187,7 @@
     dst->y1 = src->bottom;
 }
 
-static INLINE void
+static inline void
 d3dbox_to_pipe_box(struct pipe_box *dst, const D3DBOX *src)
 {
     user_warn(src->Left > src->Right);
@@ -158,13 +202,30 @@
     dst->depth = src->Back - src->Front;
 }
 
-static INLINE D3DFORMAT
+static inline D3DFORMAT
 pipe_to_d3d9_format(enum pipe_format format)
 {
     return nine_pipe_to_d3d9_format_map[format];
 }
 
-static INLINE boolean
+/* ATI1 and ATI2 are not officially compressed in d3d9 */
+static inline boolean
+compressed_format( D3DFORMAT fmt )
+{
+    switch (fmt) {
+    case D3DFMT_DXT1:
+    case D3DFMT_DXT2:
+    case D3DFMT_DXT3:
+    case D3DFMT_DXT4:
+    case D3DFMT_DXT5:
+        return TRUE;
+    default:
+        break;
+    }
+    return FALSE;
+}
+
+static inline boolean
 depth_stencil_format( D3DFORMAT fmt )
 {
     static D3DFORMAT allowed[] = {
@@ -190,7 +251,7 @@
     return FALSE;
 }
 
-static INLINE unsigned
+static inline unsigned
 d3d9_get_pipe_depth_format_bindings(D3DFORMAT format)
 {
     switch (format) {
@@ -215,7 +276,7 @@
     }
 }
 
-static INLINE enum pipe_format
+static inline enum pipe_format
 d3d9_to_pipe_format_internal(D3DFORMAT format)
 {
     if (format <= D3DFMT_A2B10G10R10_XR_BIAS)
@@ -257,7 +318,7 @@
     screen->is_format_supported(screen, pipe_format, target, \
                                 sample_count, bindings)
 
-static INLINE enum pipe_format
+static inline enum pipe_format
 d3d9_to_pipe_format_checked(struct pipe_screen *screen,
                             D3DFORMAT format,
                             enum pipe_texture_target target,
@@ -298,7 +359,7 @@
     return PIPE_FORMAT_NONE;
 }
 
-static INLINE const char *
+static inline const char *
 d3dformat_to_string(D3DFORMAT fmt)
 {
     switch (fmt) {
@@ -381,7 +442,7 @@
     return "Unknown";
 }
 
-static INLINE unsigned
+static inline unsigned
 nine_fvf_stride( DWORD fvf )
 {
     unsigned texcount, i, size = 0;
@@ -428,7 +489,7 @@
     return size;
 }
 
-static INLINE void
+static inline void
 d3dcolor_to_rgba(float *rgba, D3DCOLOR color)
 {
     rgba[0] = (float)((color >> 16) & 0xFF) / 0xFF;
@@ -437,13 +498,13 @@
     rgba[3] = (float)((color >> 24) & 0xFF) / 0xFF;
 }
 
-static INLINE void
+static inline void
 d3dcolor_to_pipe_color_union(union pipe_color_union *rgba, D3DCOLOR color)
 {
     d3dcolor_to_rgba(&rgba->f[0], color);
 }
 
-static INLINE unsigned
+static inline unsigned
 d3dprimitivetype_to_pipe_prim(D3DPRIMITIVETYPE prim)
 {
     switch (prim) {
@@ -459,7 +520,7 @@
     }
 }
 
-static INLINE unsigned
+static inline unsigned
 prim_count_to_vertex_count(D3DPRIMITIVETYPE prim, UINT count)
 {
     switch (prim) {
@@ -475,7 +536,7 @@
     }
 }
 
-static INLINE unsigned
+static inline unsigned
 d3dcmpfunc_to_pipe_func(D3DCMPFUNC func)
 {
     switch (func) {
@@ -494,7 +555,7 @@
     }
 }
 
-static INLINE unsigned
+static inline unsigned
 d3dstencilop_to_pipe_stencil_op(D3DSTENCILOP op)
 {
     switch (op) {
@@ -511,7 +572,7 @@
     }
 }
 
-static INLINE unsigned
+static inline unsigned
 d3dcull_to_pipe_face(D3DCULL cull)
 {
     switch (cull) {
@@ -524,7 +585,7 @@
     }
 }
 
-static INLINE unsigned
+static inline unsigned
 d3dfillmode_to_pipe_polygon_mode(D3DFILLMODE mode)
 {
     switch (mode) {
@@ -538,7 +599,7 @@
     }
 }
 
-static INLINE unsigned
+static inline unsigned
 d3dblendop_to_pipe_blend(D3DBLENDOP op)
 {
     switch (op) {
@@ -557,7 +618,7 @@
  * Drivers may check RGB and ALPHA factors for equality so we should not
  * simply substitute the ALPHA variants.
  */
-static INLINE unsigned
+static inline unsigned
 d3dblend_alpha_to_pipe_blendfactor(D3DBLEND b)
 {
     switch (b) {
@@ -584,7 +645,7 @@
     }
 }
 
-static INLINE unsigned
+static inline unsigned
 d3dblend_color_to_pipe_blendfactor(D3DBLEND b)
 {
     switch (b) {
@@ -611,7 +672,7 @@
     }
 }
 
-static INLINE unsigned
+static inline unsigned
 d3dtextureaddress_to_pipe_tex_wrap(D3DTEXTUREADDRESS addr)
 {
     switch (addr) {
@@ -626,7 +687,7 @@
     }
 }
 
-static INLINE unsigned
+static inline unsigned
 d3dtexturefiltertype_to_pipe_tex_filter(D3DTEXTUREFILTERTYPE filter)
 {
     switch (filter) {
@@ -644,7 +705,7 @@
     }
 }
 
-static INLINE unsigned
+static inline unsigned
 d3dtexturefiltertype_to_pipe_tex_mipfilter(D3DTEXTUREFILTERTYPE filter)
 {
     switch (filter) {
@@ -662,7 +723,7 @@
     }
 }
 
-static INLINE unsigned nine_format_get_stride(enum pipe_format format,
+static inline unsigned nine_format_get_stride(enum pipe_format format,
                                               unsigned width)
 {
     unsigned stride = util_format_get_stride(format, width);
@@ -670,7 +731,7 @@
     return align(stride, 4);
 }
 
-static INLINE unsigned nine_format_get_level_alloc_size(enum pipe_format format,
+static inline unsigned nine_format_get_level_alloc_size(enum pipe_format format,
                                                         unsigned width,
                                                         unsigned height,
                                                         unsigned level)
@@ -684,7 +745,7 @@
     return size;
 }
 
-static INLINE unsigned nine_format_get_size_and_offsets(enum pipe_format format,
+static inline unsigned nine_format_get_size_and_offsets(enum pipe_format format,
                                                         unsigned *offsets,
                                                         unsigned width,
                                                         unsigned height,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_shader.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_shader.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_shader.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_shader.c	2015-09-16 14:36:09.000000000 +0000
@@ -43,7 +43,7 @@
 
 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
 
-static INLINE const char *d3dsio_to_string(unsigned opcode);
+static inline const char *d3dsio_to_string(unsigned opcode);
 
 
 #define NINED3D_SM1_VS 0xfffe
@@ -89,6 +89,15 @@
 #define NINE_SWIZZLE4(x,y,z,w) \
    TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
 
+#define NINE_CONSTANT_SRC(index) \
+   ureg_src_register(TGSI_FILE_CONSTANT, index)
+
+#define NINE_APPLY_SWIZZLE(src, s) \
+   ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
+
+#define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
+   NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
+
 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
@@ -239,7 +248,7 @@
     BYTE type;
 };
 
-static INLINE void
+static inline void
 assert_replicate_swizzle(const struct ureg_src *reg)
 {
     assert(reg->SwizzleY == reg->SwizzleX &&
@@ -444,6 +453,9 @@
         BYTE minor;
     } version;
     unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */
+    unsigned num_constf_allowed;
+    unsigned num_consti_allowed;
+    unsigned num_constb_allowed;
 
     boolean native_integers;
     boolean inline_subroutines;
@@ -505,7 +517,6 @@
 
 #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX)
 #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT)
-#define NINE_MAX_CONST_F_SHADER (tx->processor == TGSI_PROCESSOR_VERTEX ? NINE_MAX_CONST_F : NINE_MAX_CONST_F_PS3)
 
 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
 
@@ -528,7 +539,7 @@
 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
 {
    INT i;
-   if (index < 0 || index >= NINE_MAX_CONST_F_SHADER) {
+   if (index < 0 || index >= tx->num_constf_allowed) {
        tx->failure = TRUE;
        return FALSE;
    }
@@ -543,7 +554,7 @@
 static boolean
 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
 {
-   if (index < 0 || index >= NINE_MAX_CONST_I) {
+   if (index < 0 || index >= tx->num_consti_allowed) {
        tx->failure = TRUE;
        return FALSE;
    }
@@ -554,7 +565,7 @@
 static boolean
 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
 {
-   if (index < 0 || index >= NINE_MAX_CONST_B) {
+   if (index < 0 || index >= tx->num_constb_allowed) {
        tx->failure = TRUE;
        return FALSE;
    }
@@ -568,9 +579,7 @@
 {
     unsigned n;
 
-    FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_F_SHADER)
-    if (IS_VS && index >= NINE_MAX_CONST_F_SHADER)
-        WARN("lconstf index %i too high, indirect access won't work\n", index);
+    FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
 
     for (n = 0; n < tx->num_lconstf; ++n)
         if (tx->lconstf[n].idx == index)
@@ -592,7 +601,7 @@
 static void
 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
 {
-    FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_I)
+    FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
     tx->lconsti[index].idx = index;
     tx->lconsti[index].reg = tx->native_integers ?
        ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
@@ -601,14 +610,14 @@
 static void
 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
 {
-    FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_B)
+    FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
     tx->lconstb[index].idx = index;
     tx->lconstb[index].reg = tx->native_integers ?
        ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
        ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 tx_scratch(struct shader_translator *tx)
 {
     if (tx->num_scratch >= Elements(tx->regs.t)) {
@@ -620,13 +629,13 @@
     return tx->regs.t[tx->num_scratch++];
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 tx_scratch_scalar(struct shader_translator *tx)
 {
     return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 tx_src_scalar(struct ureg_dst dst)
 {
     struct ureg_src src = ureg_src(dst);
@@ -636,7 +645,7 @@
     return src;
 }
 
-static INLINE void
+static inline void
 tx_temp_alloc(struct shader_translator *tx, INT idx)
 {
     assert(idx >= 0);
@@ -654,7 +663,7 @@
         tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
 }
 
-static INLINE void
+static inline void
 tx_addr_alloc(struct shader_translator *tx, INT idx)
 {
     assert(idx == 0);
@@ -664,7 +673,7 @@
         tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
 }
 
-static INLINE void
+static inline void
 tx_pred_alloc(struct shader_translator *tx, INT idx)
 {
     assert(idx == 0);
@@ -672,7 +681,55 @@
         tx->regs.p = ureg_DECL_predicate(tx->ureg);
 }
 
-static INLINE void
+/* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
+ * the projection should be applied on the texture. It doesn't
+ * apply on texkill.
+ * The doc is very imprecise here (it says the projection is done
+ * before rasterization, thus in vs, which seems wrong since ps instructions
+ * are affected differently)
+ * For now we only apply to the ps TEX instruction and TEXBEM.
+ * Perhaps some other instructions would need it */
+static inline void
+apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
+                      struct ureg_src src, INT idx)
+{
+    struct ureg_dst tmp;
+    unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
+
+    /* no projection */
+    if (dim == 1) {
+        ureg_MOV(tx->ureg, dst, src);
+    } else {
+        tmp = tx_scratch_scalar(tx);
+        ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
+        ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
+    }
+}
+
+static inline void
+TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
+                         unsigned target, struct ureg_src src0,
+                         struct ureg_src src1, INT idx)
+{
+    unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
+    struct ureg_dst tmp;
+
+    /* dim == 1: no projection
+     * Looks like must be disabled when it makes no
+     * sense according the texture dimensions
+     */
+    if (dim == 1 || dim <= target) {
+        ureg_TEX(tx->ureg, dst, target, src0, src1);
+    } else if (dim == 4) {
+        ureg_TXP(tx->ureg, dst, target, src0, src1);
+    } else {
+        tmp = tx_scratch(tx);
+        apply_ps1x_projection(tx, tmp, src0, idx);
+        ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
+    }
+}
+
+static inline void
 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
 {
     assert(IS_PS);
@@ -682,7 +739,7 @@
                                              TGSI_INTERPOLATE_PERSPECTIVE);
 }
 
-static INLINE unsigned *
+static inline unsigned *
 tx_bgnloop(struct shader_translator *tx)
 {
     tx->loop_depth++;
@@ -692,7 +749,7 @@
     return &tx->loop_labels[tx->loop_depth - 1];
 }
 
-static INLINE unsigned *
+static inline unsigned *
 tx_endloop(struct shader_translator *tx)
 {
     assert(tx->loop_depth);
@@ -741,7 +798,7 @@
     return ureg_src_undef();
 }
 
-static INLINE unsigned *
+static inline unsigned *
 tx_cond(struct shader_translator *tx)
 {
    assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
@@ -749,14 +806,14 @@
    return &tx->cond_labels[tx->cond_depth - 1];
 }
 
-static INLINE unsigned *
+static inline unsigned *
 tx_elsecond(struct shader_translator *tx)
 {
    assert(tx->cond_depth);
    return &tx->cond_labels[tx->cond_depth - 1];
 }
 
-static INLINE void
+static inline void
 tx_endcond(struct shader_translator *tx)
 {
    assert(tx->cond_depth);
@@ -765,7 +822,7 @@
                     ureg_get_instruction_number(tx->ureg));
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 nine_ureg_dst_register(unsigned file, int index)
 {
     return ureg_dst(ureg_src_register(file, index));
@@ -1086,9 +1143,18 @@
         assert(param->idx >= 0 && param->idx < 4);
         assert(!param->rel);
         tx->info->rt_mask |= 1 << param->idx;
-        if (ureg_dst_is_undef(tx->regs.oCol[param->idx]))
-            tx->regs.oCol[param->idx] =
-               ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
+        if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
+            /* ps < 3: oCol[0] will have fog blending afterward
+             * vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
+            if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
+                tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
+            } else if (IS_VS && tx->version.major < 3 && param->idx == 1) {
+                tx->regs.oCol[1] = ureg_DECL_temporary(tx->ureg);
+            } else {
+                tx->regs.oCol[param->idx] =
+                    ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
+            }
+        }
         dst = tx->regs.oCol[param->idx];
         if (IS_VS && tx->version.major < 3)
             dst = ureg_saturate(dst);
@@ -1098,7 +1164,7 @@
         if (ureg_dst_is_undef(tx->regs.oDepth))
            tx->regs.oDepth =
               ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
-                                      TGSI_WRITEMASK_Z);
+                                      TGSI_WRITEMASK_Z, 0, 1);
         dst = tx->regs.oDepth; /* XXX: must write .z component */
         break;
     case D3DSPR_PREDICATE:
@@ -1240,7 +1306,7 @@
 #define VNOTSUPPORTED   0, 0
 #define V(maj, min)     (((maj) << 8) | (min))
 
-static INLINE const char *
+static inline const char *
 d3dsio_to_string( unsigned opcode )
 {
     static const char *names[] = {
@@ -1657,7 +1723,7 @@
     return D3D_OK;
 }
 
-static INLINE unsigned
+static inline unsigned
 sm1_insn_flags_to_tgsi_setop(BYTE flags)
 {
     switch (flags) {
@@ -1724,7 +1790,7 @@
     [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
 };
 
-static INLINE unsigned
+static inline unsigned
 sm1_to_nine_declusage(struct sm1_semantic *dcl)
 {
     return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
@@ -1824,7 +1890,7 @@
         sem->Index = 0;
         break;
     default:
-        assert(!"Invalid DECLUSAGE.");
+        unreachable(!"Invalid DECLUSAGE.");
         break;
     }
 }
@@ -1833,7 +1899,7 @@
 #define NINED3DSTT_2D     (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
 #define NINED3DSTT_CUBE   (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
-static INLINE unsigned
+static inline unsigned
 d3dstt_to_tgsi_tex(BYTE sampler_type)
 {
     switch (sampler_type) {
@@ -1846,7 +1912,7 @@
         return TGSI_TEXTURE_UNKNOWN;
     }
 }
-static INLINE unsigned
+static inline unsigned
 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
 {
     switch (sampler_type) {
@@ -1859,7 +1925,7 @@
         return TGSI_TEXTURE_UNKNOWN;
     }
 }
-static INLINE unsigned
+static inline unsigned
 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
 {
     switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
@@ -1884,7 +1950,7 @@
     }
 }
 
-static INLINE unsigned
+static inline unsigned
 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
 {
     switch (sem->Name) {
@@ -1966,7 +2032,7 @@
                 tx->info->position_t = TRUE;
             assert(sem.reg.idx < Elements(tx->regs.o));
             tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
-                ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
+                ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
 
             if (tgsi.Name == TGSI_SEMANTIC_PSIZE)
                 tx->regs.oPts = tx->regs.o[sem.reg.idx];
@@ -1979,12 +2045,13 @@
                 ureg, tgsi.Name, tgsi.Index,
                 nine_tgsi_to_interp_mode(&tgsi),
                 0, /* cylwrap */
-                sem.reg.mod & NINED3DSPDM_CENTROID);
+                sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1);
         } else
         if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
             /* FragColor or FragDepth */
             assert(sem.reg.mask != 0);
-            ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask);
+            ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
+                                    0, 1);
         }
     }
     return D3D_OK;
@@ -2134,12 +2201,79 @@
 
 DECL_SPECIAL(TEXBEM)
 {
-    STUB(D3DERR_INVALIDCALL);
-}
+    struct ureg_program *ureg = tx->ureg;
+    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+    struct ureg_dst tmp, tmp2, texcoord;
+    struct ureg_src sample, m00, m01, m10, m11;
+    struct ureg_src bumpenvlscale, bumpenvloffset;
+    const int m = tx->insn.dst[0].idx;
+    const int n = tx->insn.src[0].idx;
 
-DECL_SPECIAL(TEXBEML)
-{
-    STUB(D3DERR_INVALIDCALL);
+    assert(tx->version.major == 1);
+
+    sample = ureg_DECL_sampler(ureg, m);
+    tx->info->sampler_mask |= 1 << m;
+
+    tx_texcoord_alloc(tx, m);
+
+    tmp = tx_scratch(tx);
+    tmp2 = tx_scratch(tx);
+    texcoord = tx_scratch(tx);
+    /*
+     * Bump-env-matrix:
+     * 00 is X
+     * 01 is Y
+     * 10 is Z
+     * 11 is W
+     */
+    nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
+    m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
+    m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
+    m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
+    m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
+
+    /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
+    if (m % 2 == 0) {
+        bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
+        bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
+    } else {
+        bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
+        bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
+    }
+
+    apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
+
+    /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R  */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
+             NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
+    /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
+             NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
+             NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
+
+    /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
+             NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
+    /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
+             NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
+             NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
+
+    /* Now the texture coordinates are in tmp.xy */
+
+    if (tx->insn.opcode == D3DSIO_TEXBEM) {
+        ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
+    } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
+        /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
+        ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
+        ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
+                 bumpenvlscale, bumpenvloffset);
+        ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
+    }
+
+    tx->info->bumpenvmat_needed = 1;
+
+    return D3D_OK;
 }
 
 DECL_SPECIAL(TEXREG2AR)
@@ -2312,7 +2446,8 @@
     ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
              ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
     /* replace the depth for depth testing with the result */
-    tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z);
+    tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
+                                              TGSI_WRITEMASK_Z, 0, 1);
     ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
     /* note that we write nothing to the destination, since it's disallowed to use it afterward */
     return D3D_OK;
@@ -2410,7 +2545,8 @@
     ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
              r5r, ureg_imm1f(ureg, 1.0f));
     /* replace the depth for depth testing with the result */
-    tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z);
+    tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
+                                              TGSI_WRITEMASK_Z, 0, 1);
     ureg_MOV(ureg, tx->regs.oDepth, r5r);
 
     return D3D_OK;
@@ -2418,7 +2554,43 @@
 
 DECL_SPECIAL(BEM)
 {
-    STUB(D3DERR_INVALIDCALL);
+    struct ureg_program *ureg = tx->ureg;
+    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+    struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
+    struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
+    struct ureg_src m00, m01, m10, m11;
+    const int m = tx->insn.dst[0].idx;
+    struct ureg_dst tmp;
+    /*
+     * Bump-env-matrix:
+     * 00 is X
+     * 01 is Y
+     * 10 is Z
+     * 11 is W
+     */
+    nine_info_mark_const_f_used(tx->info, 8 + m);
+    m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
+    m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
+    m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
+    m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
+    /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r  */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
+             NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
+    /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
+             NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
+
+    /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
+             NINE_APPLY_SWIZZLE(src1, X), src0);
+    /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
+             NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
+    ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
+
+    tx->info->bumpenvmat_needed = 1;
+
+    return D3D_OK;
 }
 
 DECL_SPECIAL(TEXLD)
@@ -2479,7 +2651,7 @@
     src[1] = ureg_DECL_sampler(ureg, s);
     tx->info->sampler_mask |= 1 << s;
 
-    ureg_TEX(ureg, dst, t, src[0], src[1]);
+    TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
 
     return D3D_OK;
 }
@@ -2613,7 +2785,7 @@
     _OPI(TEX,          TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
     _OPI(TEX,          TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
     _OPI(TEXBEM,       TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
-    _OPI(TEXBEML,      TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)),
+    _OPI(TEXBEML,      TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
     _OPI(TEXREG2AR,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
     _OPI(TEXREG2GB,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
     _OPI(TEXM3x2PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
@@ -2682,7 +2854,7 @@
     }
 }
 
-static INLINE HRESULT
+static inline HRESULT
 NineTranslateInstruction_Generic(struct shader_translator *tx)
 {
     struct ureg_dst dst[1];
@@ -2700,19 +2872,19 @@
     return D3D_OK;
 }
 
-static INLINE DWORD
+static inline DWORD
 TOKEN_PEEK(struct shader_translator *tx)
 {
     return *(tx->parse);
 }
 
-static INLINE DWORD
+static inline DWORD
 TOKEN_NEXT(struct shader_translator *tx)
 {
     return *(tx->parse)++;
 }
 
-static INLINE void
+static inline void
 TOKEN_JUMP(struct shader_translator *tx)
 {
     if (tx->parse_next && tx->parse != tx->parse_next) {
@@ -2721,7 +2893,7 @@
     }
 }
 
-static INLINE boolean
+static inline boolean
 sm1_parse_eof(struct shader_translator *tx)
 {
     return TOKEN_PEEK(tx) == NINED3DSP_END;
@@ -3020,6 +3192,8 @@
     info->lconstf.data = NULL;
     info->lconstf.ranges = NULL;
 
+    info->bumpenvmat_needed = 0;
+
     for (i = 0; i < Elements(tx->regs.rL); ++i) {
         tx->regs.rL[i] = ureg_dst_undef();
     }
@@ -3060,7 +3234,7 @@
     FREE(tx);
 }
 
-static INLINE unsigned
+static inline unsigned
 tgsi_processor_from_type(unsigned shader_type)
 {
     switch (shader_type) {
@@ -3071,6 +3245,57 @@
     }
 }
 
+static void
+shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
+{
+    struct ureg_program *ureg = tx->ureg;
+    struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+    struct ureg_src fog_end, fog_coeff, fog_density;
+    struct ureg_src fog_vs, depth, fog_color;
+    struct ureg_dst fog_factor;
+
+    if (!tx->info->fog_enable) {
+        ureg_MOV(ureg, oCol0, src_col);
+        return;
+    }
+
+    if (tx->info->fog_mode != D3DFOG_NONE)
+        depth = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
+                                              TGSI_INTERPOLATE_LINEAR),
+                                              TGSI_SWIZZLE_Z);
+
+    nine_info_mark_const_f_used(tx->info, 33);
+    fog_color = NINE_CONSTANT_SRC(32);
+    fog_factor = tx_scratch_scalar(tx);
+
+    if (tx->info->fog_mode == D3DFOG_LINEAR) {
+        fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
+        fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
+        ureg_SUB(ureg, fog_factor, fog_end, depth);
+        ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
+    } else if (tx->info->fog_mode == D3DFOG_EXP) {
+        fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
+        ureg_MUL(ureg, fog_factor, depth, fog_density);
+        ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
+        ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
+    } else if (tx->info->fog_mode == D3DFOG_EXP2) {
+        fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
+        ureg_MUL(ureg, fog_factor, depth, fog_density);
+        ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
+        ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
+        ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
+    } else {
+        fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
+                                            TGSI_INTERPOLATE_PERSPECTIVE),
+                                            TGSI_SWIZZLE_X);
+        ureg_MOV(ureg, fog_factor, fog_vs);
+    }
+
+    ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
+             tx_src_scalar(fog_factor), src_col, fog_color);
+    ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
+}
+
 #define GET_CAP(n) device->screen->get_param( \
       device->screen, PIPE_CAP_##n)
 #define GET_SHADER_CAP(n) device->screen->get_shader_param( \
@@ -3120,6 +3345,24 @@
     tx->texcoord_sn = tx->want_texcoord ?
         TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
 
+    if (IS_VS) {
+        tx->num_constf_allowed = NINE_MAX_CONST_F;
+    } else if (tx->version.major < 2) {/* IS_PS v1 */
+        tx->num_constf_allowed = 8;
+    } else if (tx->version.major == 2) {/* IS_PS v2 */
+        tx->num_constf_allowed = 32;
+    } else {/* IS_PS v3 */
+        tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
+    }
+
+    if (tx->version.major < 2) {
+        tx->num_consti_allowed = 0;
+        tx->num_constb_allowed = 0;
+    } else {
+        tx->num_consti_allowed = NINE_MAX_CONST_I;
+        tx->num_constb_allowed = NINE_MAX_CONST_B;
+    }
+
     /* VS must always write position. Declare it here to make it the 1st output.
      * (Some drivers like nv50 are buggy and rely on that.)
      */
@@ -3142,10 +3385,26 @@
         goto out;
     }
 
-    if (IS_PS && (tx->version.major < 2) && tx->num_temp) {
-        ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0),
-                 ureg_src(tx->regs.r[0]));
-        info->rt_mask |= 0x1;
+    if (IS_PS && tx->version.major < 3) {
+        if (tx->version.major < 2) {
+            assert(tx->num_temp); /* there must be color output */
+            info->rt_mask |= 0x1;
+            shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
+        } else {
+            shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
+        }
+    }
+
+    if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
+        tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
+        ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
+    }
+
+    /* vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
+    if (IS_VS && tx->version.major < 3 && !ureg_dst_is_undef(tx->regs.oCol[1])) {
+        struct ureg_dst dst = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 1);
+        ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oCol[1]));
+        ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 0.0f));
     }
 
     if (info->position_t)
@@ -3230,6 +3489,7 @@
                        info->const_int_slots > 0 ?
                            max_const_f + info->const_int_slots :
                                info->const_float_slots;
+
     info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
 
     for (s = 0; s < slot_max; s++)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_shader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_shader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_shader.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_shader.h	2015-09-16 14:36:09.000000000 +0000
@@ -59,6 +59,10 @@
     uint16_t sampler_mask_shadow; /* in, which samplers use depth compare */
     uint8_t rt_mask; /* out, which render targets are being written */
 
+    uint8_t fog_enable;
+    uint8_t fog_mode;
+    uint16_t projected; /* ps 1.1 to 1.3 */
+
     unsigned const_i_base; /* in vec4 (16 byte) units */
     unsigned const_b_base; /* in vec4 (16 byte) units */
     unsigned const_used_size;
@@ -68,21 +72,22 @@
     unsigned const_bool_slots;
 
     struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */
+    uint8_t bumpenvmat_needed;
 };
 
-static INLINE void
+static inline void
 nine_info_mark_const_f_used(struct nine_shader_info *info, int idx)
 {
     if (info->const_float_slots < (idx + 1))
         info->const_float_slots = idx + 1;
 }
-static INLINE void
+static inline void
 nine_info_mark_const_i_used(struct nine_shader_info *info, int idx)
 {
     if (info->const_int_slots < (idx + 1))
         info->const_int_slots = idx + 1;
 }
-static INLINE void
+static inline void
 nine_info_mark_const_b_used(struct nine_shader_info *info, int idx)
 {
     if (info->const_bool_slots < (idx + 1))
@@ -100,7 +105,7 @@
     uint32_t key;
 };
 
-static INLINE void *
+static inline void *
 nine_shader_variant_get(struct nine_shader_variant *list, uint32_t key)
 {
     while (list->key != key && list->next)
@@ -110,7 +115,7 @@
     return NULL;
 }
 
-static INLINE boolean
+static inline boolean
 nine_shader_variant_add(struct nine_shader_variant *list,
                         uint32_t key, void *cso)
 {
@@ -127,7 +132,7 @@
     return TRUE;
 }
 
-static INLINE void
+static inline void
 nine_shader_variants_free(struct nine_shader_variant *list)
 {
     while (list->next) {
@@ -135,6 +140,50 @@
         list->next = ptr->next;
         FREE(ptr);
     }
+}
+
+struct nine_shader_variant64
+{
+    struct nine_shader_variant64 *next;
+    void *cso;
+    uint64_t key;
+};
+
+static inline void *
+nine_shader_variant_get64(struct nine_shader_variant64 *list, uint64_t key)
+{
+    while (list->key != key && list->next)
+        list = list->next;
+    if (list->key == key)
+        return list->cso;
+    return NULL;
+}
+
+static inline boolean
+nine_shader_variant_add64(struct nine_shader_variant64 *list,
+                          uint64_t key, void *cso)
+{
+    while (list->next) {
+        assert(list->key != key);
+        list = list->next;
+    }
+    list->next = MALLOC_STRUCT(nine_shader_variant64);
+    if (!list->next)
+        return FALSE;
+    list->next->next = NULL;
+    list->next->key = key;
+    list->next->cso = cso;
+    return TRUE;
+}
+
+static inline void
+nine_shader_variants_free64(struct nine_shader_variant64 *list)
+{
+    while (list->next) {
+        struct nine_shader_variant64 *ptr = list->next;
+        list->next = ptr->next;
+        FREE(ptr);
+    }
 }
 
 #endif /* _NINE_SHADER_H_ */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -33,352 +33,36 @@
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
 #include "cso_cache/cso_context.h"
+#include "util/u_upload_mgr.h"
 #include "util/u_math.h"
 
 #define DBG_CHANNEL DBG_DEVICE
 
-static uint32_t
-update_framebuffer(struct NineDevice9 *device)
-{
-    struct pipe_context *pipe = device->pipe;
-    struct nine_state *state = &device->state;
-    struct pipe_framebuffer_state *fb = &device->state.fb;
-    unsigned i;
-    struct NineSurface9 *rt0 = state->rt[0];
-    unsigned w = rt0->desc.Width;
-    unsigned h = rt0->desc.Height;
-    D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType;
-    unsigned mask = state->ps ? state->ps->rt_mask : 1;
-    const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
-
-    DBG("\n");
+/* State preparation only */
 
-    state->rt_mask = 0x0;
-    fb->nr_cbufs = 0;
-
-    /* all render targets must have the same size and the depth buffer must be
-     * bigger. Multisample has to match, according to spec. But some apps do
-     * things wrong there, and no error is returned. The behaviour they get
-     * apparently is that depth buffer is disabled if it doesn't match.
-     * Surely the same for render targets. */
-
-    /* Special case: D3DFMT_NULL is used to bound no real render target,
-     * but render to depth buffer. We have to not take into account the render
-     * target info. TODO: know what should happen when there are several render targers
-     * and the first one is D3DFMT_NULL */
-    if (rt0->desc.Format == D3DFMT_NULL && state->ds) {
-        w = state->ds->desc.Width;
-        h = state->ds->desc.Height;
-        nr_samples = state->ds->desc.MultiSampleType;
-    }
-
-    for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
-        struct NineSurface9 *rt = state->rt[i];
-
-        if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) &&
-            rt->desc.Width == w && rt->desc.Height == h &&
-            rt->desc.MultiSampleType == nr_samples) {
-            fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB);
-            state->rt_mask |= 1 << i;
-            fb->nr_cbufs = i + 1;
-
-            if (unlikely(rt->desc.Usage & D3DUSAGE_AUTOGENMIPMAP)) {
-                assert(rt->texture == D3DRTYPE_TEXTURE ||
-                       rt->texture == D3DRTYPE_CUBETEXTURE);
-                NineBaseTexture9(rt->base.base.container)->dirty_mip = TRUE;
-            }
-        } else {
-            /* Color outputs must match RT slot,
-             * drivers will have to handle NULL entries for GL, too.
-             */
-            fb->cbufs[i] = NULL;
-        }
-    }
-
-    if (state->ds && state->ds->desc.Width >= w &&
-        state->ds->desc.Height >= h &&
-        state->ds->desc.MultiSampleType == nr_samples) {
-        fb->zsbuf = NineSurface9_GetSurface(state->ds, 0);
-    } else {
-        fb->zsbuf = NULL;
-    }
-
-    fb->width = w;
-    fb->height = h;
-
-    pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */
-
-    if (fb->zsbuf) {
-        DWORD scale;
-        switch (fb->zsbuf->format) {
-        case PIPE_FORMAT_Z32_FLOAT:
-        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-            scale = fui(1.0f);
-            break;
-        case PIPE_FORMAT_Z16_UNORM:
-            scale = fui((float)(1 << 16));
-            break;
-        default:
-            scale = fui((float)(1 << 24));
-            break;
-        }
-        if (state->rs[NINED3DRS_ZBIASSCALE] != scale) {
-            state->rs[NINED3DRS_ZBIASSCALE] = scale;
-            state->changed.group |= NINE_STATE_RASTERIZER;
-        }
-    }
-
-    return state->changed.group;
-}
-
-static void
-update_viewport(struct NineDevice9 *device)
+static inline void
+prepare_blend(struct NineDevice9 *device)
 {
-    struct pipe_context *pipe = device->pipe;
-    const D3DVIEWPORT9 *vport = &device->state.viewport;
-    struct pipe_viewport_state pvport;
-
-    /* D3D coordinates are:
-     * -1 .. +1 for X,Y and
-     *  0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz)
-     */
-    pvport.scale[0] = (float)vport->Width * 0.5f;
-    pvport.scale[1] = (float)vport->Height * -0.5f;
-    pvport.scale[2] = vport->MaxZ - vport->MinZ;
-    pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X;
-    pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y;
-    pvport.translate[2] = vport->MinZ;
-
-    /* We found R600 and SI cards have some imprecision
-     * on the barycentric coordinates used for interpolation.
-     * Some shaders rely on having something precise.
-     * We found that the proprietary driver has the imprecision issue,
-     * except when the render target width and height are powers of two.
-     * It is using some sort of workaround for these cases
-     * which covers likely all the cases the applications rely
-     * on something precise.
-     * We haven't found the workaround, but it seems like it's better
-     * for applications if the imprecision is biased towards infinity
-     * instead of -infinity (which is what measured). So shift slightly
-     * the viewport: not enough to change rasterization result (in particular
-     * for multisampling), but enough to make the imprecision biased
-     * towards infinity. We do this shift only if render target width and
-     * height are powers of two.
-     * Solves 'red shadows' bug on UE3 games.
-     */
-    if (device->driver_bugs.buggy_barycentrics &&
-        ((vport->Width & (vport->Width-1)) == 0) &&
-        ((vport->Height & (vport->Height-1)) == 0)) {
-        pvport.translate[0] -= 1.0f / 128.0f;
-        pvport.translate[1] -= 1.0f / 128.0f;
-    }
-
-    pipe->set_viewport_states(pipe, 0, 1, &pvport);
+    nine_convert_blend_state(&device->state.pipe.blend, device->state.rs);
+    device->state.commit |= NINE_STATE_COMMIT_BLEND;
 }
 
-static INLINE void
-update_scissor(struct NineDevice9 *device)
+static inline void
+prepare_dsa(struct NineDevice9 *device)
 {
-    struct pipe_context *pipe = device->pipe;
-
-    pipe->set_scissor_states(pipe, 0, 1, &device->state.scissor);
+    nine_convert_dsa_state(&device->state.pipe.dsa, device->state.rs);
+    device->state.commit |= NINE_STATE_COMMIT_DSA;
 }
 
-static INLINE void
-update_blend(struct NineDevice9 *device)
+static inline void
+prepare_rasterizer(struct NineDevice9 *device)
 {
-    nine_convert_blend_state(device->cso, device->state.rs);
+    nine_convert_rasterizer_state(&device->state.pipe.rast, device->state.rs);
+    device->state.commit |= NINE_STATE_COMMIT_RASTERIZER;
 }
 
-static INLINE void
-update_dsa(struct NineDevice9 *device)
-{
-    nine_convert_dsa_state(device->cso, device->state.rs);
-}
-
-static INLINE void
-update_rasterizer(struct NineDevice9 *device)
-{
-    nine_convert_rasterizer_state(device->cso, device->state.rs);
-}
-
-/* Loop through VS inputs and pick the vertex elements with the declared
- * usage from the vertex declaration, then insert the instance divisor from
- * the stream source frequency setting.
- */
 static void
-update_vertex_elements(struct NineDevice9 *device)
-{
-    struct nine_state *state = &device->state;
-    const struct NineVertexDeclaration9 *vdecl = device->state.vdecl;
-    const struct NineVertexShader9 *vs;
-    unsigned n, b, i;
-    int index;
-    char vdecl_index_map[16]; /* vs->num_inputs <= 16 */
-    char used_streams[device->caps.MaxStreams];
-    int dummy_vbo_stream = -1;
-    BOOL need_dummy_vbo = FALSE;
-    struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
-
-    state->stream_usage_mask = 0;
-    memset(vdecl_index_map, -1, 16);
-    memset(used_streams, 0, device->caps.MaxStreams);
-    vs = device->state.vs ? device->state.vs : device->ff.vs;
-
-    if (vdecl) {
-        for (n = 0; n < vs->num_inputs; ++n) {
-            DBG("looking up input %u (usage %u) from vdecl(%p)\n",
-                n, vs->input_map[n].ndecl, vdecl);
-
-            for (i = 0; i < vdecl->nelems; i++) {
-                if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
-                    vdecl_index_map[n] = i;
-                    used_streams[vdecl->elems[i].vertex_buffer_index] = 1;
-                    break;
-                }
-            }
-            if (vdecl_index_map[n] < 0)
-                need_dummy_vbo = TRUE;
-        }
-    } else {
-        /* No vertex declaration. Likely will never happen in practice,
-         * but we need not crash on this */
-        need_dummy_vbo = TRUE;
-    }
-
-    if (need_dummy_vbo) {
-        for (i = 0; i < device->caps.MaxStreams; i++ ) {
-            if (!used_streams[i]) {
-                dummy_vbo_stream = i;
-                break;
-            }
-        }
-    }
-    /* there are less vertex shader inputs than stream slots,
-     * so if we need a slot for the dummy vbo, we should have found one */
-    assert (!need_dummy_vbo || dummy_vbo_stream != -1);
-
-    for (n = 0; n < vs->num_inputs; ++n) {
-        index = vdecl_index_map[n];
-        if (index >= 0) {
-            ve[n] = vdecl->elems[index];
-            b = ve[n].vertex_buffer_index;
-            state->stream_usage_mask |= 1 << b;
-            /* XXX wine just uses 1 here: */
-            if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
-                ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF;
-        } else {
-            /* if the vertex declaration is incomplete compared to what the
-             * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
-             * This is not precised by the spec, but is the behaviour
-             * tested on win */
-            ve[n].vertex_buffer_index = dummy_vbo_stream;
-            ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-            ve[n].src_offset = 0;
-            ve[n].instance_divisor = 0;
-        }
-    }
-
-    if (state->dummy_vbo_bound_at != dummy_vbo_stream) {
-        if (state->dummy_vbo_bound_at >= 0)
-            state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at;
-        if (dummy_vbo_stream >= 0) {
-            state->changed.vtxbuf |= 1 << dummy_vbo_stream;
-            state->vbo_bound_done = FALSE;
-        }
-        state->dummy_vbo_bound_at = dummy_vbo_stream;
-    }
-
-    cso_set_vertex_elements(device->cso, vs->num_inputs, ve);
-
-    state->changed.stream_freq = 0;
-}
-
-static INLINE uint32_t
-update_shader_variant_keys(struct NineDevice9 *device)
-{
-    struct nine_state *state = &device->state;
-    uint32_t mask = 0;
-    uint32_t vs_key = state->samplers_shadow;
-    uint32_t ps_key = state->samplers_shadow;
-
-    vs_key = (vs_key & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0);
-    ps_key = (ps_key & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0);
-
-    if (state->vs) vs_key &= state->vs->sampler_mask;
-    if (state->ps) {
-        if (unlikely(state->ps->byte_code.version < 0x20)) {
-            /* no depth textures, but variable targets */
-            uint32_t m = state->ps->sampler_mask;
-            ps_key = 0;
-            while (m) {
-                int s = ffs(m) - 1;
-                m &= ~(1 << s);
-                ps_key |= (state->texture[s] ? state->texture[s]->pstype : 1) << (s * 2);
-            }
-        } else {
-            ps_key &= state->ps->sampler_mask;
-        }
-    }
-
-    if (state->vs && state->vs_key != vs_key) {
-        state->vs_key = vs_key;
-        mask |= NINE_STATE_VS;
-    }
-    if (state->ps && state->ps_key != ps_key) {
-        state->ps_key = ps_key;
-        mask |= NINE_STATE_PS;
-    }
-    return mask;
-}
-
-static INLINE uint32_t
-update_vs(struct NineDevice9 *device)
-{
-    struct nine_state *state = &device->state;
-    struct NineVertexShader9 *vs = state->vs;
-    uint32_t changed_group = 0;
-
-    /* likely because we dislike FF */
-    if (likely(vs)) {
-        state->cso.vs = NineVertexShader9_GetVariant(vs, state->vs_key);
-    } else {
-        vs = device->ff.vs;
-        state->cso.vs = vs->variant.cso;
-    }
-    device->pipe->bind_vs_state(device->pipe, state->cso.vs);
-
-    if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) {
-        state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size;
-        changed_group |= NINE_STATE_RASTERIZER;
-    }
-
-    if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask)
-        /* Bound dummy sampler. */
-        changed_group |= NINE_STATE_SAMPLER;
-    return changed_group;
-}
-
-static INLINE uint32_t
-update_ps(struct NineDevice9 *device)
-{
-    struct nine_state *state = &device->state;
-    struct NinePixelShader9 *ps = state->ps;
-    uint32_t changed_group = 0;
-
-    if (likely(ps)) {
-        state->cso.ps = NinePixelShader9_GetVariant(ps, state->ps_key);
-    } else {
-        ps = device->ff.ps;
-        state->cso.ps = ps->variant.cso;
-    }
-    device->pipe->bind_fs_state(device->pipe, state->cso.ps);
-
-    if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask)
-        /* Bound dummy sampler. */
-        changed_group |= NINE_STATE_SAMPLER;
-    return changed_group;
-}
+prepare_ps_constants_userbuf(struct NineDevice9 *device);
 
 #define DO_UPLOAD_CONST_F(buf,p,c,d) \
     do { \
@@ -391,7 +75,7 @@
 
 /* OK, this is a bit ugly ... */
 static void
-update_constants(struct NineDevice9 *device, unsigned shader_type)
+upload_constants(struct NineDevice9 *device, unsigned shader_type)
 {
     struct pipe_context *pipe = device->pipe;
     struct pipe_resource *buf;
@@ -438,10 +122,17 @@
         lconstf_ranges = device->state.vs->lconstf.ranges;
         lconstf_data = device->state.vs->lconstf.data;
 
-        device->state.ff.clobber.vs_const = TRUE;
         device->state.changed.group &= ~NINE_STATE_VS_CONST;
     } else {
         DBG("PS\n");
+        /* features only implemented on the userbuf path */
+        if (device->state.ps->bumpenvmat_needed || (
+            device->state.ps->byte_code.version < 0x30 &&
+            device->state.rs[D3DRS_FOGENABLE])) {
+            device->prefer_user_constbuf = TRUE;
+            prepare_ps_constants_userbuf(device);
+            return;
+        }
         buf = device->constbuf_ps;
 
         const_f = device->state.ps_const_f;
@@ -464,7 +155,6 @@
         lconstf_ranges = NULL;
         lconstf_data = NULL;
 
-        device->state.ff.clobber.ps_const = TRUE;
         device->state.changed.group &= ~NINE_STATE_PS_CONST;
     }
 
@@ -524,10 +214,9 @@
 }
 
 static void
-update_vs_constants_userbuf(struct NineDevice9 *device)
+prepare_vs_constants_userbuf(struct NineDevice9 *device)
 {
     struct nine_state *state = &device->state;
-    struct pipe_context *pipe = device->pipe;
     struct pipe_constant_buffer cb;
     cb.buffer = NULL;
     cb.buffer_offset = 0;
@@ -567,7 +256,18 @@
         cb.user_buffer = dst;
     }
 
-    pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb);
+    if (!device->driver_caps.user_cbufs) {
+        u_upload_data(device->constbuf_uploader,
+                      0,
+                      cb.buffer_size,
+                      cb.user_buffer,
+                      &cb.buffer_offset,
+                      &cb.buffer);
+        u_upload_unmap(device->constbuf_uploader);
+        cb.user_buffer = NULL;
+    }
+
+    state->pipe.cb_vs = cb;
 
     if (device->state.changed.vs_const_f) {
         struct nine_range *r = device->state.changed.vs_const_f;
@@ -578,45 +278,362 @@
         device->state.changed.vs_const_f = NULL;
     }
     state->changed.group &= ~NINE_STATE_VS_CONST;
+    state->commit |= NINE_STATE_COMMIT_CONST_VS;
 }
 
 static void
-update_ps_constants_userbuf(struct NineDevice9 *device)
+prepare_ps_constants_userbuf(struct NineDevice9 *device)
+{
+    struct nine_state *state = &device->state;
+    struct pipe_constant_buffer cb;
+    cb.buffer = NULL;
+    cb.buffer_offset = 0;
+    cb.buffer_size = device->state.ps->const_used_size;
+    cb.user_buffer = device->state.ps_const_f;
+
+    if (state->changed.ps_const_i) {
+        int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f];
+        memcpy(idst, state->ps_const_i, sizeof(state->ps_const_i));
+        state->changed.ps_const_i = 0;
+    }
+    if (state->changed.ps_const_b) {
+        int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f];
+        uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
+        memcpy(bdst, state->ps_const_b, sizeof(state->ps_const_b));
+        state->changed.ps_const_b = 0;
+    }
+
+    /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */
+    if (device->state.ps->bumpenvmat_needed) {
+        memcpy(device->state.ps_lconstf_temp, cb.user_buffer, cb.buffer_size);
+        memcpy(&device->state.ps_lconstf_temp[4 * 8], &device->state.bumpmap_vars, sizeof(device->state.bumpmap_vars));
+
+        cb.user_buffer = device->state.ps_lconstf_temp;
+    }
+
+    if (state->ps->byte_code.version < 0x30 &&
+        state->rs[D3DRS_FOGENABLE]) {
+        float *dst = &state->ps_lconstf_temp[4 * 32];
+        if (cb.user_buffer != state->ps_lconstf_temp) {
+            memcpy(state->ps_lconstf_temp, cb.user_buffer, cb.buffer_size);
+            cb.user_buffer = state->ps_lconstf_temp;
+        }
+
+        d3dcolor_to_rgba(dst, state->rs[D3DRS_FOGCOLOR]);
+        if (state->rs[D3DRS_FOGTABLEMODE] == D3DFOG_LINEAR) {
+            dst[4] = asfloat(state->rs[D3DRS_FOGEND]);
+            dst[5] = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
+        } else if (state->rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE) {
+            dst[4] = asfloat(state->rs[D3DRS_FOGDENSITY]);
+        }
+        cb.buffer_size = 4 * 4 * 34;
+    }
+
+    if (!cb.buffer_size)
+        return;
+
+    if (!device->driver_caps.user_cbufs) {
+        u_upload_data(device->constbuf_uploader,
+                      0,
+                      cb.buffer_size,
+                      cb.user_buffer,
+                      &cb.buffer_offset,
+                      &cb.buffer);
+        u_upload_unmap(device->constbuf_uploader);
+        cb.user_buffer = NULL;
+    }
+
+    state->pipe.cb_ps = cb;
+
+    if (device->state.changed.ps_const_f) {
+        struct nine_range *r = device->state.changed.ps_const_f;
+        struct nine_range *p = r;
+        while (p->next)
+            p = p->next;
+        nine_range_pool_put_chain(&device->range_pool, r, p);
+        device->state.changed.ps_const_f = NULL;
+    }
+    state->changed.group &= ~NINE_STATE_PS_CONST;
+    state->commit |= NINE_STATE_COMMIT_CONST_PS;
+}
+
+static inline uint32_t
+prepare_vs(struct NineDevice9 *device, uint8_t shader_changed)
+{
+    struct nine_state *state = &device->state;
+    struct NineVertexShader9 *vs = state->vs;
+    uint32_t changed_group = 0;
+    int has_key_changed = 0;
+
+    if (likely(vs))
+        has_key_changed = NineVertexShader9_UpdateKey(vs, state);
+
+    if (!shader_changed && !has_key_changed)
+        return 0;
+
+    /* likely because we dislike FF */
+    if (likely(vs)) {
+        state->cso.vs = NineVertexShader9_GetVariant(vs);
+    } else {
+        vs = device->ff.vs;
+        state->cso.vs = vs->ff_cso;
+    }
+
+    if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) {
+        state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size;
+        changed_group |= NINE_STATE_RASTERIZER;
+    }
+
+    if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask)
+        /* Bound dummy sampler. */
+        changed_group |= NINE_STATE_SAMPLER;
+
+    state->commit |= NINE_STATE_COMMIT_VS;
+    return changed_group;
+}
+
+static inline uint32_t
+prepare_ps(struct NineDevice9 *device, uint8_t shader_changed)
+{
+    struct nine_state *state = &device->state;
+    struct NinePixelShader9 *ps = state->ps;
+    uint32_t changed_group = 0;
+    int has_key_changed = 0;
+
+    if (likely(ps))
+        has_key_changed = NinePixelShader9_UpdateKey(ps, state);
+
+    if (!shader_changed && !has_key_changed)
+        return 0;
+
+    if (likely(ps)) {
+        state->cso.ps = NinePixelShader9_GetVariant(ps);
+    } else {
+        ps = device->ff.ps;
+        state->cso.ps = ps->ff_cso;
+    }
+
+    if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask)
+        /* Bound dummy sampler. */
+        changed_group |= NINE_STATE_SAMPLER;
+
+    state->commit |= NINE_STATE_COMMIT_PS;
+    return changed_group;
+}
+
+/* State preparation incremental */
+
+/* State preparation + State commit */
+
+static uint32_t
+update_framebuffer(struct NineDevice9 *device)
+{
+    struct pipe_context *pipe = device->pipe;
+    struct nine_state *state = &device->state;
+    struct pipe_framebuffer_state *fb = &device->state.fb;
+    unsigned i;
+    struct NineSurface9 *rt0 = state->rt[0];
+    unsigned w = rt0->desc.Width;
+    unsigned h = rt0->desc.Height;
+    D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType;
+    unsigned mask = state->ps ? state->ps->rt_mask : 1;
+    const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
+
+    DBG("\n");
+
+    state->rt_mask = 0x0;
+    fb->nr_cbufs = 0;
+
+    /* all render targets must have the same size and the depth buffer must be
+     * bigger. Multisample has to match, according to spec. But some apps do
+     * things wrong there, and no error is returned. The behaviour they get
+     * apparently is that depth buffer is disabled if it doesn't match.
+     * Surely the same for render targets. */
+
+    /* Special case: D3DFMT_NULL is used to bound no real render target,
+     * but render to depth buffer. We have to not take into account the render
+     * target info. TODO: know what should happen when there are several render targers
+     * and the first one is D3DFMT_NULL */
+    if (rt0->desc.Format == D3DFMT_NULL && state->ds) {
+        w = state->ds->desc.Width;
+        h = state->ds->desc.Height;
+        nr_samples = state->ds->desc.MultiSampleType;
+    }
+
+    for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) {
+        struct NineSurface9 *rt = state->rt[i];
+
+        if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) &&
+            rt->desc.Width == w && rt->desc.Height == h &&
+            rt->desc.MultiSampleType == nr_samples) {
+            fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB);
+            state->rt_mask |= 1 << i;
+            fb->nr_cbufs = i + 1;
+
+            if (unlikely(rt->desc.Usage & D3DUSAGE_AUTOGENMIPMAP)) {
+                assert(rt->texture == D3DRTYPE_TEXTURE ||
+                       rt->texture == D3DRTYPE_CUBETEXTURE);
+                NineBaseTexture9(rt->base.base.container)->dirty_mip = TRUE;
+            }
+        } else {
+            /* Color outputs must match RT slot,
+             * drivers will have to handle NULL entries for GL, too.
+             */
+            fb->cbufs[i] = NULL;
+        }
+    }
+
+    if (state->ds && state->ds->desc.Width >= w &&
+        state->ds->desc.Height >= h &&
+        state->ds->desc.MultiSampleType == nr_samples) {
+        fb->zsbuf = NineSurface9_GetSurface(state->ds, 0);
+    } else {
+        fb->zsbuf = NULL;
+    }
+
+    fb->width = w;
+    fb->height = h;
+
+    pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */
+
+    return state->changed.group;
+}
+
+static void
+update_viewport(struct NineDevice9 *device)
+{
+    struct pipe_context *pipe = device->pipe;
+    const D3DVIEWPORT9 *vport = &device->state.viewport;
+    struct pipe_viewport_state pvport;
+
+    /* D3D coordinates are:
+     * -1 .. +1 for X,Y and
+     *  0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz)
+     */
+    pvport.scale[0] = (float)vport->Width * 0.5f;
+    pvport.scale[1] = (float)vport->Height * -0.5f;
+    pvport.scale[2] = vport->MaxZ - vport->MinZ;
+    pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X;
+    pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y;
+    pvport.translate[2] = vport->MinZ;
+
+    /* We found R600 and SI cards have some imprecision
+     * on the barycentric coordinates used for interpolation.
+     * Some shaders rely on having something precise.
+     * We found that the proprietary driver has the imprecision issue,
+     * except when the render target width and height are powers of two.
+     * It is using some sort of workaround for these cases
+     * which covers likely all the cases the applications rely
+     * on something precise.
+     * We haven't found the workaround, but it seems like it's better
+     * for applications if the imprecision is biased towards infinity
+     * instead of -infinity (which is what measured). So shift slightly
+     * the viewport: not enough to change rasterization result (in particular
+     * for multisampling), but enough to make the imprecision biased
+     * towards infinity. We do this shift only if render target width and
+     * height are powers of two.
+     * Solves 'red shadows' bug on UE3 games.
+     */
+    if (device->driver_bugs.buggy_barycentrics &&
+        ((vport->Width & (vport->Width-1)) == 0) &&
+        ((vport->Height & (vport->Height-1)) == 0)) {
+        pvport.translate[0] -= 1.0f / 128.0f;
+        pvport.translate[1] -= 1.0f / 128.0f;
+    }
+
+    pipe->set_viewport_states(pipe, 0, 1, &pvport);
+}
+
+/* Loop through VS inputs and pick the vertex elements with the declared
+ * usage from the vertex declaration, then insert the instance divisor from
+ * the stream source frequency setting.
+ */
+static void
+update_vertex_elements(struct NineDevice9 *device)
 {
     struct nine_state *state = &device->state;
-    struct pipe_context *pipe = device->pipe;
-    struct pipe_constant_buffer cb;
-    cb.buffer = NULL;
-    cb.buffer_offset = 0;
-    cb.buffer_size = device->state.ps->const_used_size;
-    cb.user_buffer = device->state.ps_const_f;
+    const struct NineVertexDeclaration9 *vdecl = device->state.vdecl;
+    const struct NineVertexShader9 *vs;
+    unsigned n, b, i;
+    int index;
+    char vdecl_index_map[16]; /* vs->num_inputs <= 16 */
+    char used_streams[device->caps.MaxStreams];
+    int dummy_vbo_stream = -1;
+    BOOL need_dummy_vbo = FALSE;
+    struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
 
-    if (!cb.buffer_size)
-        return;
+    state->stream_usage_mask = 0;
+    memset(vdecl_index_map, -1, 16);
+    memset(used_streams, 0, device->caps.MaxStreams);
+    vs = device->state.vs ? device->state.vs : device->ff.vs;
 
-    if (state->changed.ps_const_i) {
-        int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f];
-        memcpy(idst, state->ps_const_i, sizeof(state->ps_const_i));
-        state->changed.ps_const_i = 0;
+    if (vdecl) {
+        for (n = 0; n < vs->num_inputs; ++n) {
+            DBG("looking up input %u (usage %u) from vdecl(%p)\n",
+                n, vs->input_map[n].ndecl, vdecl);
+
+            for (i = 0; i < vdecl->nelems; i++) {
+                if (vdecl->usage_map[i] == vs->input_map[n].ndecl) {
+                    vdecl_index_map[n] = i;
+                    used_streams[vdecl->elems[i].vertex_buffer_index] = 1;
+                    break;
+                }
+            }
+            if (vdecl_index_map[n] < 0)
+                need_dummy_vbo = TRUE;
+        }
+    } else {
+        /* No vertex declaration. Likely will never happen in practice,
+         * but we need not crash on this */
+        need_dummy_vbo = TRUE;
     }
-    if (state->changed.ps_const_b) {
-        int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f];
-        uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
-        memcpy(bdst, state->ps_const_b, sizeof(state->ps_const_b));
-        state->changed.ps_const_b = 0;
+
+    if (need_dummy_vbo) {
+        for (i = 0; i < device->caps.MaxStreams; i++ ) {
+            if (!used_streams[i]) {
+                dummy_vbo_stream = i;
+                break;
+            }
+        }
     }
+    /* there are less vertex shader inputs than stream slots,
+     * so if we need a slot for the dummy vbo, we should have found one */
+    assert (!need_dummy_vbo || dummy_vbo_stream != -1);
 
-    pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
+    for (n = 0; n < vs->num_inputs; ++n) {
+        index = vdecl_index_map[n];
+        if (index >= 0) {
+            ve[n] = vdecl->elems[index];
+            b = ve[n].vertex_buffer_index;
+            state->stream_usage_mask |= 1 << b;
+            /* XXX wine just uses 1 here: */
+            if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
+                ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF;
+        } else {
+            /* if the vertex declaration is incomplete compared to what the
+             * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
+             * This is not precised by the spec, but is the behaviour
+             * tested on win */
+            ve[n].vertex_buffer_index = dummy_vbo_stream;
+            ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+            ve[n].src_offset = 0;
+            ve[n].instance_divisor = 0;
+        }
+    }
 
-    if (device->state.changed.ps_const_f) {
-        struct nine_range *r = device->state.changed.ps_const_f;
-        struct nine_range *p = r;
-        while (p->next)
-            p = p->next;
-        nine_range_pool_put_chain(&device->range_pool, r, p);
-        device->state.changed.ps_const_f = NULL;
+    if (state->dummy_vbo_bound_at != dummy_vbo_stream) {
+        if (state->dummy_vbo_bound_at >= 0)
+            state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at;
+        if (dummy_vbo_stream >= 0) {
+            state->changed.vtxbuf |= 1 << dummy_vbo_stream;
+            state->vbo_bound_done = FALSE;
+        }
+        state->dummy_vbo_bound_at = dummy_vbo_stream;
     }
-    state->changed.group &= ~NINE_STATE_PS_CONST;
+
+    cso_set_vertex_elements(device->cso, vs->num_inputs, ve);
+
+    state->changed.stream_freq = 0;
 }
 
 static void
@@ -627,7 +644,6 @@
     struct pipe_vertex_buffer dummy_vtxbuf;
     uint32_t mask = state->changed.vtxbuf;
     unsigned i;
-    unsigned start;
 
     DBG("mask=%x\n", mask);
 
@@ -656,28 +672,7 @@
     state->changed.vtxbuf = 0;
 }
 
-static INLINE void
-update_index_buffer(struct NineDevice9 *device)
-{
-    struct pipe_context *pipe = device->pipe;
-    if (device->state.idxbuf)
-        pipe->set_index_buffer(pipe, &device->state.idxbuf->buffer);
-    else
-        pipe->set_index_buffer(pipe, NULL);
-}
-
-/* TODO: only go through dirty textures */
-static void
-validate_textures(struct NineDevice9 *device)
-{
-    struct NineBaseTexture9 *tex, *ptr;
-    LIST_FOR_EACH_ENTRY_SAFE(tex, ptr, &device->update_textures, list) {
-        list_delinit(&tex->list);
-        NineBaseTexture9_Validate(tex);
-    }
-}
-
-static INLINE boolean
+static inline boolean
 update_sampler_derived(struct nine_state *state, unsigned s)
 {
     boolean changed = FALSE;
@@ -706,20 +701,16 @@
 static void
 update_textures_and_samplers(struct NineDevice9 *device)
 {
-    struct pipe_context *pipe = device->pipe;
     struct nine_state *state = &device->state;
     struct pipe_sampler_view *view[NINE_MAX_SAMPLERS];
-    struct pipe_sampler_state samp;
     unsigned num_textures;
     unsigned i;
-    boolean commit_views;
     boolean commit_samplers;
     uint16_t sampler_mask = state->ps ? state->ps->sampler_mask :
                             device->ff.ps->sampler_mask;
 
     /* TODO: Can we reduce iterations here ? */
 
-    commit_views = FALSE;
     commit_samplers = FALSE;
     state->bound_samplers_mask_ps = 0;
     for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_PS; ++i) {
@@ -749,26 +740,12 @@
              * unbind dummy sampler directly when they are not needed
              * anymore, but they're going to be removed as long as texture
              * or sampler states are changed. */
-            view[i] = device->dummy_sampler;
+            view[i] = device->dummy_sampler_view;
             num_textures = i + 1;
 
-            memset(&samp, 0, sizeof(samp));
-            samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
-            samp.max_lod = 15.0f;
-            samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-            samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-            samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-            samp.min_img_filter = PIPE_TEX_FILTER_NEAREST;
-            samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
-            samp.compare_mode = PIPE_TEX_COMPARE_NONE;
-            samp.compare_func = PIPE_FUNC_LEQUAL;
-            samp.normalized_coords = 1;
-            samp.seamless_cube_map = 1;
-
             cso_single_sampler(device->cso, PIPE_SHADER_FRAGMENT,
-                               s - NINE_SAMPLER_PS(0), &samp);
+                               s - NINE_SAMPLER_PS(0), &device->dummy_sampler_state);
 
-            commit_views = TRUE;
             commit_samplers = TRUE;
             state->changed.sampler[s] = ~0;
         }
@@ -776,16 +753,11 @@
         state->bound_samplers_mask_ps |= (1 << s);
     }
 
-    commit_views |= (state->changed.texture & NINE_PS_SAMPLERS_MASK) != 0;
-    commit_views |= state->changed.srgb;
-    if (commit_views)
-        pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0,
-                                num_textures, view);
+    cso_set_sampler_views(device->cso, PIPE_SHADER_FRAGMENT, num_textures, view);
 
     if (commit_samplers)
         cso_single_sampler_done(device->cso, PIPE_SHADER_FRAGMENT);
 
-    commit_views = FALSE;
     commit_samplers = FALSE;
     sampler_mask = state->vs ? state->vs->sampler_mask : 0;
     state->bound_samplers_mask_vs = 0;
@@ -816,76 +788,170 @@
              * unbind dummy sampler directly when they are not needed
              * anymore, but they're going to be removed as long as texture
              * or sampler states are changed. */
-            view[i] = device->dummy_sampler;
+            view[i] = device->dummy_sampler_view;
             num_textures = i + 1;
 
-            memset(&samp, 0, sizeof(samp));
-            samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
-            samp.max_lod = 15.0f;
-            samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-            samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-            samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-            samp.min_img_filter = PIPE_TEX_FILTER_NEAREST;
-            samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
-            samp.compare_mode = PIPE_TEX_COMPARE_NONE;
-            samp.compare_func = PIPE_FUNC_LEQUAL;
-            samp.normalized_coords = 1;
-            samp.seamless_cube_map = 1;
-
             cso_single_sampler(device->cso, PIPE_SHADER_VERTEX,
-                               s - NINE_SAMPLER_VS(0), &samp);
+                               s - NINE_SAMPLER_VS(0), &device->dummy_sampler_state);
 
-            commit_views = TRUE;
             commit_samplers = TRUE;
             state->changed.sampler[s] = ~0;
         }
 
         state->bound_samplers_mask_vs |= (1 << s);
     }
-    commit_views |= (state->changed.texture & NINE_VS_SAMPLERS_MASK) != 0;
-    commit_views |= state->changed.srgb;
-    if (commit_views)
-        pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0,
-                                num_textures, view);
+
+    cso_set_sampler_views(device->cso, PIPE_SHADER_VERTEX, num_textures, view);
 
     if (commit_samplers)
         cso_single_sampler_done(device->cso, PIPE_SHADER_VERTEX);
 
-    state->changed.srgb = FALSE;
     state->changed.texture = 0;
 }
 
+/* State commit only */
+
+static inline void
+commit_blend(struct NineDevice9 *device)
+{
+    cso_set_blend(device->cso, &device->state.pipe.blend);
+}
+
+static inline void
+commit_dsa(struct NineDevice9 *device)
+{
+    cso_set_depth_stencil_alpha(device->cso, &device->state.pipe.dsa);
+}
+
+static inline void
+commit_scissor(struct NineDevice9 *device)
+{
+    struct pipe_context *pipe = device->pipe;
+
+    pipe->set_scissor_states(pipe, 0, 1, &device->state.scissor);
+}
+
+static inline void
+commit_rasterizer(struct NineDevice9 *device)
+{
+    cso_set_rasterizer(device->cso, &device->state.pipe.rast);
+}
+
+static inline void
+commit_index_buffer(struct NineDevice9 *device)
+{
+    struct pipe_context *pipe = device->pipe;
+    if (device->state.idxbuf)
+        pipe->set_index_buffer(pipe, &device->state.idxbuf->buffer);
+    else
+        pipe->set_index_buffer(pipe, NULL);
+}
+
+static inline void
+commit_vs_constants(struct NineDevice9 *device)
+{
+    struct pipe_context *pipe = device->pipe;
+
+    if (unlikely(!device->state.vs))
+        pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff);
+    else
+        pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs);
+}
+
+static inline void
+commit_ps_constants(struct NineDevice9 *device)
+{
+    struct pipe_context *pipe = device->pipe;
+
+    if (unlikely(!device->state.ps))
+        pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &device->state.pipe.cb_ps_ff);
+    else
+        pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &device->state.pipe.cb_ps);
+}
+
+static inline void
+commit_vs(struct NineDevice9 *device)
+{
+    struct nine_state *state = &device->state;
+
+    device->pipe->bind_vs_state(device->pipe, state->cso.vs);
+}
+
+
+static inline void
+commit_ps(struct NineDevice9 *device)
+{
+    struct nine_state *state = &device->state;
+
+    device->pipe->bind_fs_state(device->pipe, state->cso.ps);
+}
+/* State Update */
 
-#define NINE_STATE_FREQ_GROUP_0 \
-   (NINE_STATE_FB |             \
-    NINE_STATE_VIEWPORT |       \
-    NINE_STATE_SCISSOR |        \
-    NINE_STATE_BLEND |          \
-    NINE_STATE_DSA |            \
-    NINE_STATE_RASTERIZER |     \
-    NINE_STATE_VS |             \
-    NINE_STATE_PS |             \
-    NINE_STATE_BLEND_COLOR |    \
-    NINE_STATE_STENCIL_REF |    \
+#define NINE_STATE_SHADER_CHANGE_VS \
+   (NINE_STATE_VS |         \
+    NINE_STATE_TEXTURE |    \
+    NINE_STATE_FOG_SHADER)
+
+#define NINE_STATE_SHADER_CHANGE_PS \
+   (NINE_STATE_PS |         \
+    NINE_STATE_TEXTURE |    \
+    NINE_STATE_FOG_SHADER | \
+    NINE_STATE_PS1X_SHADER)
+
+#define NINE_STATE_FREQUENT \
+   (NINE_STATE_RASTERIZER | \
+    NINE_STATE_TEXTURE |    \
+    NINE_STATE_SAMPLER |    \
+    NINE_STATE_VS_CONST |   \
+    NINE_STATE_PS_CONST)
+
+#define NINE_STATE_COMMON \
+   (NINE_STATE_FB |       \
+    NINE_STATE_BLEND |    \
+    NINE_STATE_DSA |      \
+    NINE_STATE_VIEWPORT | \
+    NINE_STATE_VDECL |    \
+    NINE_STATE_IDXBUF)
+
+#define NINE_STATE_RARE      \
+   (NINE_STATE_SCISSOR |     \
+    NINE_STATE_BLEND_COLOR | \
+    NINE_STATE_STENCIL_REF | \
     NINE_STATE_SAMPLE_MASK)
 
-#define NINE_STATE_FREQ_GROUP_1 ~NINE_STATE_FREQ_GROUP_0
 
-#define NINE_STATE_SHADER_VARIANT_GROUP \
-    (NINE_STATE_TEXTURE | \
-     NINE_STATE_VS | \
-     NINE_STATE_PS)
+/* TODO: only go through dirty textures */
+static void
+validate_textures(struct NineDevice9 *device)
+{
+    struct NineBaseTexture9 *tex, *ptr;
+    LIST_FOR_EACH_ENTRY_SAFE(tex, ptr, &device->update_textures, list) {
+        list_delinit(&tex->list);
+        NineBaseTexture9_Validate(tex);
+    }
+}
+
+void
+nine_update_state_framebuffer(struct NineDevice9 *device)
+{
+    struct nine_state *state = &device->state;
+
+    validate_textures(device);
+
+    if (state->changed.group & NINE_STATE_FB)
+        update_framebuffer(device);
+
+    state->changed.group &= ~NINE_STATE_FB;
+}
 
 boolean
-nine_update_state(struct NineDevice9 *device, uint32_t mask)
+nine_update_state(struct NineDevice9 *device)
 {
     struct pipe_context *pipe = device->pipe;
     struct nine_state *state = &device->state;
     uint32_t group;
 
-    DBG("changed state groups: %x | %x\n",
-        state->changed.group & NINE_STATE_FREQ_GROUP_0,
-        state->changed.group & NINE_STATE_FREQ_GROUP_1);
+    DBG("changed state groups: %x\n", state->changed.group);
 
     /* NOTE: We may want to use the cso cache for everything, or let
      * NineDevice9.RestoreNonCSOState actually set the states, then we wouldn't
@@ -896,35 +962,79 @@
     validate_textures(device); /* may clobber state */
 
     /* ff_update may change VS/PS dirty bits */
-    if ((mask & NINE_STATE_FF) && unlikely(!state->vs || !state->ps))
+    if (unlikely(!state->vs || !state->ps))
         nine_ff_update(device);
-    group = state->changed.group & mask;
+    group = state->changed.group;
 
-    if (group & NINE_STATE_SHADER_VARIANT_GROUP)
-        group |= update_shader_variant_keys(device);
+    if (group & (NINE_STATE_SHADER_CHANGE_VS | NINE_STATE_SHADER_CHANGE_PS)) {
+        if (group & NINE_STATE_SHADER_CHANGE_VS)
+            group |= prepare_vs(device, (group & NINE_STATE_VS) != 0); /* may set NINE_STATE_RASTERIZER and NINE_STATE_SAMPLER*/
+        if (group & NINE_STATE_SHADER_CHANGE_PS)
+            group |= prepare_ps(device, (group & NINE_STATE_PS) != 0);
+    }
 
-    if (group & NINE_STATE_FREQ_GROUP_0) {
+    if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) {
         if (group & NINE_STATE_FB)
-            group = update_framebuffer(device) & mask;
+            group |= update_framebuffer(device); /* may set NINE_STATE_RASTERIZER */
+        if (group & NINE_STATE_BLEND)
+            prepare_blend(device);
+        if (group & NINE_STATE_DSA)
+            prepare_dsa(device);
         if (group & NINE_STATE_VIEWPORT)
             update_viewport(device);
-        if (group & NINE_STATE_SCISSOR)
-            update_scissor(device);
+        if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) ||
+            state->changed.stream_freq & ~1)
+            update_vertex_elements(device);
+        if (group & NINE_STATE_IDXBUF)
+            commit_index_buffer(device);
+    }
 
-        if (group & NINE_STATE_DSA)
-            update_dsa(device);
-        if (group & NINE_STATE_BLEND)
-            update_blend(device);
+    if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS))) {
+        if (group & NINE_STATE_RASTERIZER)
+            prepare_rasterizer(device);
+        if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
+            update_textures_and_samplers(device);
+        if (device->prefer_user_constbuf) {
+            if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs)
+                prepare_vs_constants_userbuf(device);
+            if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps)
+                prepare_ps_constants_userbuf(device);
+        } else {
+            if ((group & NINE_STATE_VS_CONST) && state->vs)
+                upload_constants(device, PIPE_SHADER_VERTEX);
+            if ((group & NINE_STATE_PS_CONST) && state->ps)
+                upload_constants(device, PIPE_SHADER_FRAGMENT);
+        }
+    }
 
-        if (group & NINE_STATE_VS)
-            group |= update_vs(device);
+    if (state->changed.vtxbuf)
+        update_vertex_buffers(device);
 
-        if (group & NINE_STATE_RASTERIZER)
-            update_rasterizer(device);
+    if (state->commit & NINE_STATE_COMMIT_BLEND)
+        commit_blend(device);
+    if (state->commit & NINE_STATE_COMMIT_DSA)
+        commit_dsa(device);
+    if (state->commit & NINE_STATE_COMMIT_RASTERIZER)
+        commit_rasterizer(device);
+    if (state->commit & NINE_STATE_COMMIT_CONST_VS)
+        commit_vs_constants(device);
+    if (state->commit & NINE_STATE_COMMIT_CONST_PS)
+        commit_ps_constants(device);
+    if (state->commit & NINE_STATE_COMMIT_VS)
+        commit_vs(device);
+    if (state->commit & NINE_STATE_COMMIT_PS)
+        commit_ps(device);
+
+    state->commit = 0;
 
-        if (group & NINE_STATE_PS)
-            group |= update_ps(device);
+    if (unlikely(state->changed.ucp)) {
+        pipe->set_clip_state(pipe, &state->clip);
+        state->changed.ucp = 0;
+    }
 
+    if (unlikely(group & NINE_STATE_RARE)) {
+        if (group & NINE_STATE_SCISSOR)
+            commit_scissor(device);
         if (group & NINE_STATE_BLEND_COLOR) {
             struct pipe_blend_color color;
             d3dcolor_to_rgba(&color.color[0], state->rs[D3DRS_BLENDFACTOR]);
@@ -941,38 +1051,7 @@
         }
     }
 
-    if (state->changed.ucp) {
-        pipe->set_clip_state(pipe, &state->clip);
-        state->changed.ucp = 0;
-    }
-
-    if (group & (NINE_STATE_FREQ_GROUP_1 | NINE_STATE_VS)) {
-        if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
-            update_textures_and_samplers(device);
-
-        if (group & NINE_STATE_IDXBUF)
-            update_index_buffer(device);
-
-        if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) ||
-            state->changed.stream_freq & ~1)
-            update_vertex_elements(device);
-
-        if (device->prefer_user_constbuf) {
-            if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs)
-                update_vs_constants_userbuf(device);
-            if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps)
-                update_ps_constants_userbuf(device);
-        } else {
-            if ((group & NINE_STATE_VS_CONST) && state->vs)
-                update_constants(device, PIPE_SHADER_VERTEX);
-            if ((group & NINE_STATE_PS_CONST) && state->ps)
-                update_constants(device, PIPE_SHADER_FRAGMENT);
-        }
-    }
-    if (state->changed.vtxbuf)
-        update_vertex_buffers(device);
-
-    device->state.changed.group &= ~mask |
+    device->state.changed.group &=
         (NINE_STATE_FF | NINE_STATE_VS_CONST | NINE_STATE_PS_CONST);
 
     DBG("finished\n");
@@ -980,6 +1059,7 @@
     return TRUE;
 }
 
+/* State defaults */
 
 static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] =
 {
@@ -1134,6 +1214,18 @@
     [NINED3DSAMP_MINLOD] = 0,
     [NINED3DSAMP_SHADOW] = 0
 };
+
+void nine_state_restore_non_cso(struct NineDevice9 *device)
+{
+    struct nine_state *state = &device->state;
+
+    state->changed.group = NINE_STATE_ALL;
+    state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
+    state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1;
+    state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK;
+    state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS;
+}
+
 void
 nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
                         boolean is_reset)
@@ -1152,6 +1244,7 @@
     }
     state->ff.tex_stage[0][D3DTSS_COLOROP] = D3DTOP_MODULATE;
     state->ff.tex_stage[0][D3DTSS_ALPHAOP] = D3DTOP_SELECTARG1;
+    memset(&state->bumpmap_vars, 0, sizeof(state->bumpmap_vars));
 
     for (s = 0; s < Elements(state->samp); ++s) {
         memcpy(&state->samp[s], nine_samp_state_defaults,
@@ -1170,6 +1263,9 @@
     /* Set changed flags to initialize driver.
      */
     state->changed.group = NINE_STATE_ALL;
+    state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
+    state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1;
+    state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK;
 
     state->ff.changed.transform[0] = ~0;
     state->ff.changed.transform[D3DTS_WORLD / 32] |= 1 << (D3DTS_WORLD % 32);
@@ -1186,6 +1282,23 @@
         state->dummy_vbo_bound_at = -1;
         state->vbo_bound_done = FALSE;
     }
+
+    if (!device->prefer_user_constbuf) {
+        /* fill cb_vs and cb_ps for the non user constbuf path */
+        struct pipe_constant_buffer cb;
+
+        cb.buffer_offset = 0;
+        cb.buffer_size = device->vs_const_size;
+        cb.buffer = device->constbuf_vs;
+        cb.user_buffer = NULL;
+        state->pipe.cb_vs = cb;
+
+        cb.buffer_size = device->ps_const_size;
+        cb.buffer = device->constbuf_ps;
+        state->pipe.cb_ps = cb;
+
+        state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS;
+    }
 }
 
 void
@@ -1353,15 +1466,15 @@
     [D3DRS_ZFUNC] = NINE_STATE_DSA,
     [D3DRS_ALPHAREF] = NINE_STATE_DSA,
     [D3DRS_ALPHAFUNC] = NINE_STATE_DSA,
-    [D3DRS_DITHERENABLE] = NINE_STATE_RASTERIZER,
+    [D3DRS_DITHERENABLE] = NINE_STATE_BLEND,
     [D3DRS_ALPHABLENDENABLE] = NINE_STATE_BLEND,
-    [D3DRS_FOGENABLE] = NINE_STATE_FF_OTHER,
+    [D3DRS_FOGENABLE] = NINE_STATE_FF_OTHER | NINE_STATE_FOG_SHADER | NINE_STATE_PS_CONST,
     [D3DRS_SPECULARENABLE] = NINE_STATE_FF_LIGHTING,
-    [D3DRS_FOGCOLOR] = NINE_STATE_FF_OTHER,
-    [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_OTHER,
-    [D3DRS_FOGSTART] = NINE_STATE_FF_OTHER,
-    [D3DRS_FOGEND] = NINE_STATE_FF_OTHER,
-    [D3DRS_FOGDENSITY] = NINE_STATE_FF_OTHER,
+    [D3DRS_FOGCOLOR] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST,
+    [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_OTHER | NINE_STATE_FOG_SHADER | NINE_STATE_PS_CONST,
+    [D3DRS_FOGSTART] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST,
+    [D3DRS_FOGEND] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST,
+    [D3DRS_FOGDENSITY] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST,
     [D3DRS_RANGEFOGENABLE] = NINE_STATE_FF_OTHER,
     [D3DRS_STENCILENABLE] = NINE_STATE_DSA,
     [D3DRS_STENCILFAIL] = NINE_STATE_DSA,
@@ -1394,7 +1507,7 @@
     [D3DRS_VERTEXBLEND] = NINE_STATE_FF_OTHER,
     [D3DRS_CLIPPLANEENABLE] = NINE_STATE_RASTERIZER,
     [D3DRS_POINTSIZE] = NINE_STATE_RASTERIZER,
-    [D3DRS_POINTSIZE_MIN] = NINE_STATE_MISC_CONST,
+    [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER,
     [D3DRS_POINTSPRITEENABLE] = NINE_STATE_RASTERIZER,
     [D3DRS_POINTSCALEENABLE] = NINE_STATE_FF_OTHER,
     [D3DRS_POINTSCALE_A] = NINE_STATE_FF_OTHER,
@@ -1404,7 +1517,7 @@
     [D3DRS_MULTISAMPLEMASK] = NINE_STATE_SAMPLE_MASK,
     [D3DRS_PATCHEDGESTYLE] = NINE_STATE_UNHANDLED,
     [D3DRS_DEBUGMONITORTOKEN] = NINE_STATE_UNHANDLED,
-    [D3DRS_POINTSIZE_MAX] = NINE_STATE_MISC_CONST,
+    [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER,
     [D3DRS_INDEXEDVERTEXBLENDENABLE] = NINE_STATE_FF_OTHER,
     [D3DRS_COLORWRITEENABLE] = NINE_STATE_BLEND,
     [D3DRS_TWEENFACTOR] = NINE_STATE_FF_OTHER,
@@ -1446,6 +1559,8 @@
     [D3DRS_BLENDOPALPHA] = NINE_STATE_BLEND
 };
 
+/* Misc */
+
 D3DMATRIX *
 nine_state_access_transform(struct nine_state *state, D3DTRANSFORMSTATETYPE t,
                             boolean alloc)
@@ -1601,4 +1716,3 @@
         return "(invalid)";
     }
 }
-
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_state.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_state.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/nine_state.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/nine_state.h	2015-09-16 14:36:09.000000000 +0000
@@ -33,8 +33,7 @@
 
 #define NINED3DRS_VSPOINTSIZE (D3DRS_BLENDOPALPHA + 1)
 #define NINED3DRS_RTMASK      (D3DRS_BLENDOPALPHA + 2)
-#define NINED3DRS_ZBIASSCALE  (D3DRS_BLENDOPALPHA + 3)
-#define NINED3DRS_ALPHACOVERAGE  (D3DRS_BLENDOPALPHA + 4)
+#define NINED3DRS_ALPHACOVERAGE  (D3DRS_BLENDOPALPHA + 3)
 
 #define D3DRS_LAST       D3DRS_BLENDOPALPHA
 #define NINED3DRS_LAST   NINED3DRS_ALPHACOVERAGE /* 213 */
@@ -67,17 +66,26 @@
 #define NINE_STATE_BLEND_COLOR (1 << 16)
 #define NINE_STATE_STENCIL_REF (1 << 17)
 #define NINE_STATE_SAMPLE_MASK (1 << 18)
-#define NINE_STATE_MISC_CONST  (1 << 19)
-#define NINE_STATE_FF          (0x1f << 20)
-#define NINE_STATE_FF_VS       (0x17 << 20)
-#define NINE_STATE_FF_PS       (0x18 << 20)
-#define NINE_STATE_FF_LIGHTING (1 << 20)
-#define NINE_STATE_FF_MATERIAL (1 << 21)
-#define NINE_STATE_FF_VSTRANSF (1 << 22)
-#define NINE_STATE_FF_PSSTAGES (1 << 23)
-#define NINE_STATE_FF_OTHER    (1 << 24)
-#define NINE_STATE_ALL          0x1ffffff
-#define NINE_STATE_UNHANDLED   (1 << 25)
+#define NINE_STATE_FF          (0x1f << 19)
+#define NINE_STATE_FF_VS       (0x17 << 19)
+#define NINE_STATE_FF_PS       (0x18 << 19)
+#define NINE_STATE_FF_LIGHTING (1 << 19)
+#define NINE_STATE_FF_MATERIAL (1 << 20)
+#define NINE_STATE_FF_VSTRANSF (1 << 21)
+#define NINE_STATE_FF_PSSTAGES (1 << 22)
+#define NINE_STATE_FF_OTHER    (1 << 23)
+#define NINE_STATE_FOG_SHADER  (1 << 24)
+#define NINE_STATE_PS1X_SHADER (1 << 25)
+#define NINE_STATE_ALL          0x3ffffff
+#define NINE_STATE_UNHANDLED   (1 << 26)
+
+#define NINE_STATE_COMMIT_DSA  (1 << 0)
+#define NINE_STATE_COMMIT_RASTERIZER (1 << 1)
+#define NINE_STATE_COMMIT_BLEND (1 << 2)
+#define NINE_STATE_COMMIT_CONST_VS (1 << 3)
+#define NINE_STATE_COMMIT_CONST_PS (1 << 4)
+#define NINE_STATE_COMMIT_VS (1 << 5)
+#define NINE_STATE_COMMIT_PS (1 << 6)
 
 
 #define NINE_MAX_SIMULTANEOUS_RENDERTARGETS 4
@@ -94,6 +102,8 @@
      NINE_MAX_CONST_I * 4 * sizeof(int))
 
 
+#define NINE_MAX_TEXTURE_STAGES 8
+
 #define NINE_MAX_LIGHTS        65536
 #define NINE_MAX_LIGHTS_ACTIVE 8
 
@@ -124,7 +134,6 @@
         uint16_t vs_const_b; /* NINE_MAX_CONST_B == 16 */
         uint16_t ps_const_b;
         uint8_t ucp;
-        boolean srgb;
     } changed;
 
     struct NineSurface9 *rt[NINE_MAX_SIMULTANEOUS_RENDERTARGETS];
@@ -143,13 +152,13 @@
     int    vs_const_i[NINE_MAX_CONST_I][4];
     BOOL   vs_const_b[NINE_MAX_CONST_B];
     float *vs_lconstf_temp;
-    uint32_t vs_key;
 
     struct NinePixelShader9 *ps;
     float *ps_const_f;
     int    ps_const_i[NINE_MAX_CONST_I][4];
     BOOL   ps_const_b[NINE_MAX_CONST_B];
-    uint32_t ps_key;
+    float *ps_lconstf_temp;
+    uint32_t bumpmap_vars[6 * NINE_MAX_TEXTURE_STAGES];
 
     struct {
         void *vs;
@@ -184,13 +193,9 @@
     struct {
         struct {
             uint32_t group;
-            uint32_t tex_stage[NINE_MAX_SAMPLERS][(NINED3DTSS_COUNT + 31) / 32];
+            uint32_t tex_stage[NINE_MAX_TEXTURE_STAGES][(NINED3DTSS_COUNT + 31) / 32];
             uint32_t transform[(NINED3DTS_COUNT + 31) / 32];
         } changed;
-        struct {
-            boolean vs_const;
-            boolean ps_const;
-        } clobber;
 
         D3DMATRIX *transform; /* access only via nine_state_access_transform */
         unsigned num_transforms;
@@ -205,8 +210,19 @@
 
         D3DMATERIAL9 material;
 
-        DWORD tex_stage[NINE_MAX_SAMPLERS][NINED3DTSS_COUNT];
+        DWORD tex_stage[NINE_MAX_TEXTURE_STAGES][NINED3DTSS_COUNT];
     } ff;
+
+    uint32_t commit;
+    struct {
+        struct pipe_depth_stencil_alpha_state dsa;
+        struct pipe_rasterizer_state rast;
+        struct pipe_blend_state blend;
+        struct pipe_constant_buffer cb_vs;
+        struct pipe_constant_buffer cb_ps;
+        struct pipe_constant_buffer cb_vs_ff;
+        struct pipe_constant_buffer cb_ps_ff;
+    } pipe;
 };
 
 /* map D3DRS -> NINE_STATE_x
@@ -220,8 +236,10 @@
 
 struct NineDevice9;
 
-boolean nine_update_state(struct NineDevice9 *, uint32_t group_mask);
+void nine_update_state_framebuffer(struct NineDevice9 *);
+boolean nine_update_state(struct NineDevice9 *);
 
+void nine_state_restore_non_cso(struct NineDevice9 *device);
 void nine_state_set_defaults(struct NineDevice9 *, const D3DCAPS9 *,
                              boolean is_reset);
 void nine_state_clear(struct nine_state *, const boolean device);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/pixelshader9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/pixelshader9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/pixelshader9.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/pixelshader9.c	2015-09-16 14:36:09.000000000 +0000
@@ -46,7 +46,7 @@
         return hr;
 
     if (cso) {
-        This->variant.cso = cso;
+        This->ff_cso = cso;
         return D3D_OK;
     }
     device = This->base.device;
@@ -57,6 +57,8 @@
     info.const_b_base = NINE_CONST_B_BASE(device->max_ps_const_f) / 16;
     info.sampler_mask_shadow = 0x0;
     info.sampler_ps1xtypes = 0x0;
+    info.fog_enable = 0;
+    info.projected = 0;
 
     hr = nine_translate_shader(device, &info);
     if (FAILED(hr))
@@ -69,9 +71,13 @@
     This->byte_code.size = info.byte_size;
 
     This->variant.cso = info.cso;
+    This->last_cso = info.cso;
+    This->last_key = 0;
+
     This->sampler_mask = info.sampler_mask;
     This->rt_mask = info.rt_mask;
     This->const_used_size = info.const_used_size;
+    This->bumpenvmat_needed = info.bumpenvmat_needed;
     /* no constant relative addressing for ps */
     assert(info.lconstf.data == NULL);
     assert(info.lconstf.ranges == NULL);
@@ -82,11 +88,12 @@
 void
 NinePixelShader9_dtor( struct NinePixelShader9 *This )
 {
-    DBG("This=%p cso=%p\n", This, This->variant.cso);
+    DBG("This=%p\n", This);
 
     if (This->base.device) {
         struct pipe_context *pipe = This->base.device->pipe;
-        struct nine_shader_variant *var = &This->variant;
+        struct nine_shader_variant64 *var = &This->variant;
+
         do {
             if (var->cso) {
                 if (This->base.device->state.cso.ps == var->cso)
@@ -95,8 +102,14 @@
             }
             var = var->next;
         } while (var);
+
+        if (This->ff_cso) {
+            if (This->ff_cso == This->base.device->state.cso.ps)
+                pipe->bind_fs_state(pipe, NULL);
+            pipe->delete_fs_state(pipe, This->ff_cso);
+        }
     }
-    nine_shader_variants_free(&This->variant);
+    nine_shader_variants_free64(&This->variant);
 
     FREE((void *)This->byte_code.tokens); /* const_cast */
 
@@ -124,10 +137,16 @@
 }
 
 void *
-NinePixelShader9_GetVariant( struct NinePixelShader9 *This,
-                             uint32_t key )
+NinePixelShader9_GetVariant( struct NinePixelShader9 *This )
 {
-    void *cso = nine_shader_variant_get(&This->variant, key);
+    void *cso;
+    uint64_t key;
+
+    key = This->next_key;
+    if (key == This->last_key)
+        return This->last_cso;
+
+    cso = nine_shader_variant_get64(&This->variant, key);
     if (!cso) {
         struct NineDevice9 *device = This->base.device;
         struct nine_shader_info info;
@@ -139,13 +158,20 @@
         info.byte_code = This->byte_code.tokens;
         info.sampler_mask_shadow = key & 0xffff;
         info.sampler_ps1xtypes = key;
+        info.fog_enable = device->state.rs[D3DRS_FOGENABLE];
+        info.fog_mode = device->state.rs[D3DRS_FOGTABLEMODE];
+        info.projected = (key >> 48) & 0xffff;
 
         hr = nine_translate_shader(This->base.device, &info);
         if (FAILED(hr))
             return NULL;
-        nine_shader_variant_add(&This->variant, key, info.cso);
+        nine_shader_variant_add64(&This->variant, key, info.cso);
         cso = info.cso;
     }
+
+    This->last_key = key;
+    This->last_cso = cso;
+
     return cso;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/pixelshader9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/pixelshader9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/pixelshader9.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/pixelshader9.h	2015-09-16 14:36:09.000000000 +0000
@@ -25,13 +25,16 @@
 
 #include "iunknown.h"
 #include "nine_shader.h"
+#include "nine_state.h"
+#include "basetexture9.h"
+#include "nine_ff.h"
 
 struct nine_lconstf;
 
 struct NinePixelShader9
 {
     struct NineUnknown base;
-    struct nine_shader_variant variant;
+    struct nine_shader_variant64 variant;
 
     struct {
         const DWORD *tokens;
@@ -41,21 +44,67 @@
 
     unsigned const_used_size; /* in bytes */
 
+    uint8_t bumpenvmat_needed;
     uint16_t sampler_mask;
-    uint16_t sampler_mask_shadow;
     uint8_t rt_mask;
 
     uint64_t ff_key[6];
+    void *ff_cso;
+
+    uint64_t last_key;
+    void *last_cso;
+
+    uint64_t next_key;
 };
-static INLINE struct NinePixelShader9 *
+static inline struct NinePixelShader9 *
 NinePixelShader9( void *data )
 {
     return (struct NinePixelShader9 *)data;
 }
 
+static inline BOOL
+NinePixelShader9_UpdateKey( struct NinePixelShader9 *ps,
+                            struct nine_state *state )
+{
+    uint16_t samplers_shadow;
+    uint32_t samplers_ps1_types;
+    uint16_t projected;
+    uint64_t key;
+    BOOL res;
+
+    if (unlikely(ps->byte_code.version < 0x20)) {
+        /* no depth textures, but variable targets */
+        uint32_t m = ps->sampler_mask;
+        samplers_ps1_types = 0;
+        while (m) {
+            int s = ffs(m) - 1;
+            m &= ~(1 << s);
+            samplers_ps1_types |= (state->texture[s] ? state->texture[s]->pstype : 1) << (s * 2);
+        }
+        key = samplers_ps1_types;
+    } else {
+        samplers_shadow = (uint16_t)((state->samplers_shadow & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0));
+        key = samplers_shadow & ps->sampler_mask;
+    }
+
+    if (ps->byte_code.version < 0x30) {
+        key |= ((uint64_t)state->rs[D3DRS_FOGENABLE]) << 32;
+        key |= ((uint64_t)state->rs[D3DRS_FOGTABLEMODE]) << 33;
+    }
+
+    if (unlikely(ps->byte_code.version < 0x14)) {
+        projected = nine_ff_get_projected_key(state);
+        key |= ((uint64_t) projected) << 48;
+    }
+
+    res = ps->last_key != key;
+    if (res)
+        ps->next_key = key;
+    return res;
+}
+
 void *
-NinePixelShader9_GetVariant( struct NinePixelShader9 *vs,
-                             uint32_t key );
+NinePixelShader9_GetVariant( struct NinePixelShader9 *ps );
 
 /*** public ***/
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/query9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/query9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/query9.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/query9.c	2015-09-16 14:36:09.000000000 +0000
@@ -57,7 +57,7 @@
 
 #define GET_DATA_SIZE_CASE2(a, b) case D3DQUERYTYPE_##a: return sizeof(D3DDEVINFO_##b)
 #define GET_DATA_SIZE_CASET(a, b) case D3DQUERYTYPE_##a: return sizeof(b)
-static INLINE DWORD
+static inline DWORD
 nine_query_result_size(D3DQUERYTYPE type)
 {
     switch (type) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/query9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/query9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/query9.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/query9.h	2015-09-16 14:36:09.000000000 +0000
@@ -41,7 +41,7 @@
     enum nine_query_state state;
     boolean instant; /* true if D3DISSUE_BEGIN is not needed / invalid */
 };
-static INLINE struct NineQuery9 *
+static inline struct NineQuery9 *
 NineQuery9( void *data )
 {
     return (struct NineQuery9 *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/resource9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/resource9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/resource9.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/resource9.c	2015-09-16 14:36:09.000000000 +0000
@@ -161,20 +161,22 @@
                               DWORD *pSizeOfData )
 {
     struct pheader *header;
+    DWORD sizeofdata;
 
     DBG("This=%p refguid=%p pData=%p pSizeOfData=%p\n",
         This, refguid, pData, pSizeOfData);
 
-    user_assert(pSizeOfData, E_POINTER);
-
     header = util_hash_table_get(This->pdata, refguid);
     if (!header) { return D3DERR_NOTFOUND; }
 
+    user_assert(pSizeOfData, E_POINTER);
+    sizeofdata = *pSizeOfData;
+    *pSizeOfData = header->size;
+
     if (!pData) {
-        *pSizeOfData = header->size;
         return D3D_OK;
     }
-    if (*pSizeOfData < header->size) {
+    if (sizeofdata < header->size) {
         return D3DERR_MOREDATA;
     }
 
@@ -206,10 +208,13 @@
 NineResource9_SetPriority( struct NineResource9 *This,
                            DWORD PriorityNew )
 {
-    DWORD prev = This->priority;
-
+    DWORD prev;
     DBG("This=%p, PriorityNew=%d\n", This, PriorityNew);
 
+    if (This->pool != D3DPOOL_MANAGED || This->type == D3DRTYPE_SURFACE)
+        return 0;
+
+    prev = This->priority;
     This->priority = PriorityNew;
     return prev;
 }
@@ -217,6 +222,9 @@
 DWORD WINAPI
 NineResource9_GetPriority( struct NineResource9 *This )
 {
+    if (This->pool != D3DPOOL_MANAGED || This->type == D3DRTYPE_SURFACE)
+        return 0;
+
     return This->priority;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/resource9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/resource9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/resource9.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/resource9.h	2015-09-16 14:36:09.000000000 +0000
@@ -46,7 +46,7 @@
     /* for [GS]etPrivateData/FreePrivateData */
     struct util_hash_table *pdata;
 };
-static INLINE struct NineResource9 *
+static inline struct NineResource9 *
 NineResource9( void *data )
 {
     return (struct NineResource9 *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/stateblock9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/stateblock9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/stateblock9.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/stateblock9.c	2015-09-16 14:36:09.000000000 +0000
@@ -251,7 +251,7 @@
         dst->ff.material = src->ff.material;
 
     if (mask->changed.group & NINE_STATE_FF_PSSTAGES) {
-        for (s = 0; s < NINE_MAX_SAMPLERS; ++s) {
+        for (s = 0; s < NINE_MAX_TEXTURE_STAGES; ++s) {
             for (i = 0; i < NINED3DTSS_COUNT; ++i)
                 if (mask->ff.changed.tex_stage[s][i / 32] & (1 << (i % 32)))
                     dst->ff.tex_stage[s][i] = src->ff.tex_stage[s][i];
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/stateblock9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/stateblock9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/stateblock9.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/stateblock9.h	2015-09-16 14:36:09.000000000 +0000
@@ -43,7 +43,7 @@
 
     enum nine_stateblock_type type;
 };
-static INLINE struct NineStateBlock9 *
+static inline struct NineStateBlock9 *
 NineStateBlock9( void *data )
 {
     return (struct NineStateBlock9 *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/surface9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/surface9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/surface9.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/surface9.c	2015-09-16 14:36:09.000000000 +0000
@@ -104,11 +104,11 @@
     /* Ram buffer with no parent. Has to allocate the resource itself */
     if (!pResource && !pContainer) {
         assert(!user_buffer);
-        This->data = MALLOC(
+        This->data = align_malloc(
             nine_format_get_level_alloc_size(This->base.info.format,
                                              pDesc->Width,
                                              pDesc->Height,
-                                             0));
+                                             0), 32);
         if (!This->data)
             return E_OUTOFMEMORY;
     }
@@ -261,7 +261,7 @@
 }
 
 /* Add the dirty rects to the source texture */
-INLINE void
+inline void
 NineSurface9_AddDirtyRect( struct NineSurface9 *This,
                            const struct pipe_box *box )
 {
@@ -273,7 +273,7 @@
             This->texture == D3DRTYPE_CUBETEXTURE ||
             This->texture == D3DRTYPE_TEXTURE);
 
-    if (This->base.pool != D3DPOOL_MANAGED)
+    if (This->base.pool == D3DPOOL_DEFAULT)
         return;
 
     /* Add a dirty rect to level 0 of the parent texture */
@@ -287,7 +287,7 @@
             NineTexture9(This->base.base.container);
 
         NineTexture9_AddDirtyRect(tex, &dirty_rect);
-    } else { /* This->texture == D3DRTYPE_CUBETEXTURE */
+    } else if (This->texture == D3DRTYPE_CUBETEXTURE) {
         struct NineCubeTexture9 *ctex =
             NineCubeTexture9(This->base.base.container);
 
@@ -295,7 +295,7 @@
     }
 }
 
-static INLINE uint8_t *
+static inline uint8_t *
 NineSurface9_GetSystemMemPointer(struct NineSurface9 *This, int x, int y)
 {
     unsigned x_offset = util_format_get_stride(This->base.info.format, x);
@@ -323,6 +323,13 @@
         nine_D3DLOCK_to_str(Flags));
     NineSurface9_Dump(This);
 
+    /* check if it's already locked */
+    user_assert(This->lock_count == 0, D3DERR_INVALIDCALL);
+
+    /* set pBits to NULL after lock_count check */
+    user_assert(pLockedRect, E_POINTER);
+    pLockedRect->pBits = NULL;
+
 #ifdef NINE_STRICT
     user_assert(This->base.pool != D3DPOOL_DEFAULT ||
                 (resource && (resource->flags & NINE_RESOURCE_FLAG_LOCKABLE)),
@@ -337,19 +344,17 @@
     user_assert(!((Flags & D3DLOCK_DISCARD) && (Flags & D3DLOCK_READONLY)),
                 D3DERR_INVALIDCALL);
 
-    /* check if it's already locked */
-    user_assert(This->lock_count == 0, D3DERR_INVALIDCALL);
-    user_assert(pLockedRect, E_POINTER);
-
     user_assert(This->desc.MultiSampleType == D3DMULTISAMPLE_NONE,
                 D3DERR_INVALIDCALL);
 
-    if (pRect && This->base.pool == D3DPOOL_DEFAULT &&
-        util_format_is_compressed(This->base.info.format)) {
+    if (pRect && This->desc.Pool == D3DPOOL_DEFAULT &&
+        compressed_format (This->desc.Format)) {
         const unsigned w = util_format_get_blockwidth(This->base.info.format);
         const unsigned h = util_format_get_blockheight(This->base.info.format);
-        user_assert(!(pRect->left % w) && !(pRect->right % w) &&
-                    !(pRect->top % h) && !(pRect->bottom % h),
+        user_assert((pRect->left == 0 && pRect->right == This->desc.Width &&
+                     pRect->top == 0 && pRect->bottom == This->desc.Height) ||
+                    (!(pRect->left % w) && !(pRect->right % w) &&
+                    !(pRect->top % h) && !(pRect->bottom % h)),
                     D3DERR_INVALIDCALL);
     }
 
@@ -363,13 +368,9 @@
         usage |= PIPE_TRANSFER_DONTBLOCK;
 
     if (pRect) {
+        /* Windows XP accepts invalid locking rectangles, Windows 7 rejects
+         * them. Use Windows XP behaviour for now. */
         rect_to_pipe_box(&box, pRect);
-        if (u_box_clip_2d(&box, &box, This->desc.Width,
-                          This->desc.Height) < 0) {
-            DBG("pRect clipped by Width=%u Height=%u\n",
-                This->desc.Width, This->desc.Height);
-            return D3DERR_INVALIDCALL;
-        }
     } else {
         u_box_origin_2d(This->desc.Width, This->desc.Height, &box);
     }
@@ -463,140 +464,92 @@
     (void *)NineSurface9_ReleaseDC
 };
 
-HRESULT
-NineSurface9_CopySurface( struct NineSurface9 *This,
-                          struct NineSurface9 *From,
-                          const POINT *pDestPoint,
-                          const RECT *pSourceRect )
+/* When this function is called, we have already checked
+ * The copy regions fit the surfaces */
+void
+NineSurface9_CopyMemToDefault( struct NineSurface9 *This,
+                               struct NineSurface9 *From,
+                               const POINT *pDestPoint,
+                               const RECT *pSourceRect )
 {
     struct pipe_context *pipe = This->pipe;
     struct pipe_resource *r_dst = This->base.resource;
-    struct pipe_resource *r_src = From->base.resource;
-    struct pipe_transfer *transfer;
-    struct pipe_box src_box;
     struct pipe_box dst_box;
-    uint8_t *p_dst;
     const uint8_t *p_src;
+    int src_x, src_y, dst_x, dst_y, copy_width, copy_height;
 
-    DBG("This=%p From=%p pDestPoint=%p pSourceRect=%p\n",
-        This, From, pDestPoint, pSourceRect);
-
-    assert(This->base.pool != D3DPOOL_MANAGED &&
-           From->base.pool != D3DPOOL_MANAGED);
+    assert(This->base.pool == D3DPOOL_DEFAULT &&
+           From->base.pool == D3DPOOL_SYSTEMMEM);
 
-    user_assert(This->desc.Format == From->desc.Format, D3DERR_INVALIDCALL);
+    if (pDestPoint) {
+        dst_x = pDestPoint->x;
+        dst_y = pDestPoint->y;
+    } else {
+        dst_x = 0;
+        dst_y = 0;
+    }
 
-    dst_box.x = pDestPoint ? pDestPoint->x : 0;
-    dst_box.y = pDestPoint ? pDestPoint->y : 0;
+    if (pSourceRect) {
+        src_x = pSourceRect->left;
+        src_y = pSourceRect->top;
+        copy_width = pSourceRect->right - pSourceRect->left;
+        copy_height = pSourceRect->bottom - pSourceRect->top;
+    } else {
+        src_x = 0;
+        src_y = 0;
+        copy_width = From->desc.Width;
+        copy_height = From->desc.Height;
+    }
 
-    user_assert(dst_box.x >= 0 &&
-                dst_box.y >= 0, D3DERR_INVALIDCALL);
+    u_box_2d_zslice(dst_x, dst_y, This->layer,
+                    copy_width, copy_height, &dst_box);
 
-    dst_box.z = This->layer;
-    src_box.z = From->layer;
+    p_src = NineSurface9_GetSystemMemPointer(From, src_x, src_y);
 
-    dst_box.depth = 1;
-    src_box.depth = 1;
+    pipe->transfer_inline_write(pipe, r_dst, This->level,
+                                0, /* WRITE|DISCARD are implicit */
+                                &dst_box, p_src, From->stride, 0);
 
-    if (pSourceRect) {
-        /* make sure it doesn't range outside the source surface */
-        user_assert(pSourceRect->left >= 0 &&
-                    pSourceRect->right <= From->desc.Width &&
-                    pSourceRect->top >= 0 &&
-                    pSourceRect->bottom <= From->desc.Height,
-                    D3DERR_INVALIDCALL);
-        if (rect_to_pipe_box_xy_only_clamp(&src_box, pSourceRect))
-            return D3D_OK;
-    } else {
-        src_box.x = 0;
-        src_box.y = 0;
-        src_box.width = From->desc.Width;
-        src_box.height = From->desc.Height;
-    }
-
-    /* limits */
-    dst_box.width = This->desc.Width - dst_box.x;
-    dst_box.height = This->desc.Height - dst_box.y;
-
-    user_assert(src_box.width <= dst_box.width &&
-                src_box.height <= dst_box.height, D3DERR_INVALIDCALL);
-
-    dst_box.width = src_box.width;
-    dst_box.height = src_box.height;
-
-    /* check source block align for compressed textures */
-    if (util_format_is_compressed(From->base.info.format) &&
-        ((src_box.width != From->desc.Width) ||
-         (src_box.height != From->desc.Height))) {
-        const unsigned w = util_format_get_blockwidth(From->base.info.format);
-        const unsigned h = util_format_get_blockheight(From->base.info.format);
-        user_assert(!(src_box.width % w) &&
-                    !(src_box.height % h),
-                    D3DERR_INVALIDCALL);
-    }
+    NineSurface9_MarkContainerDirty(This);
+}
 
-    /* check destination block align for compressed textures */
-    if (util_format_is_compressed(This->base.info.format) &&
-        ((dst_box.width != This->desc.Width) ||
-         (dst_box.height != This->desc.Height) ||
-         dst_box.x != 0 ||
-         dst_box.y != 0)) {
-        const unsigned w = util_format_get_blockwidth(This->base.info.format);
-        const unsigned h = util_format_get_blockheight(This->base.info.format);
-        user_assert(!(dst_box.x % w) && !(dst_box.width % w) &&
-                    !(dst_box.y % h) && !(dst_box.height % h),
-                    D3DERR_INVALIDCALL);
-    }
+void
+NineSurface9_CopyDefaultToMem( struct NineSurface9 *This,
+                               struct NineSurface9 *From )
+{
+    struct pipe_context *pipe = This->pipe;
+    struct pipe_resource *r_src = From->base.resource;
+    struct pipe_transfer *transfer;
+    struct pipe_box src_box;
+    uint8_t *p_dst;
+    const uint8_t *p_src;
 
-    if (r_dst && r_src) {
-        pipe->resource_copy_region(pipe,
-                                   r_dst, This->level,
-                                   dst_box.x, dst_box.y, dst_box.z,
-                                   r_src, From->level,
-                                   &src_box);
-    } else
-    if (r_dst) {
-        p_src = NineSurface9_GetSystemMemPointer(From, src_box.x, src_box.y);
-
-        pipe->transfer_inline_write(pipe, r_dst, This->level,
-                                    0, /* WRITE|DISCARD are implicit */
-                                    &dst_box, p_src, From->stride, 0);
-    } else
-    if (r_src) {
-        p_dst = NineSurface9_GetSystemMemPointer(This, 0, 0);
-
-        p_src = pipe->transfer_map(pipe, r_src, From->level,
-                                   PIPE_TRANSFER_READ,
-                                   &src_box, &transfer);
-        if (!p_src)
-            return D3DERR_DRIVERINTERNALERROR;
-
-        util_copy_rect(p_dst, This->base.info.format,
-                       This->stride, dst_box.x, dst_box.y,
-                       dst_box.width, dst_box.height,
-                       p_src,
-                       transfer->stride, src_box.x, src_box.y);
+    assert(This->base.pool == D3DPOOL_SYSTEMMEM &&
+           From->base.pool == D3DPOOL_DEFAULT);
 
-        pipe->transfer_unmap(pipe, transfer);
-    } else {
-        p_dst = NineSurface9_GetSystemMemPointer(This, 0, 0);
-        p_src = NineSurface9_GetSystemMemPointer(From, 0, 0);
+    assert(This->desc.Width == From->desc.Width);
+    assert(This->desc.Height == From->desc.Height);
 
-        util_copy_rect(p_dst, This->base.info.format,
-                       This->stride, dst_box.x, dst_box.y,
-                       dst_box.width, dst_box.height,
-                       p_src,
-                       From->stride, src_box.x, src_box.y);
-    }
+    u_box_origin_2d(This->desc.Width, This->desc.Height, &src_box);
+    src_box.z = From->layer;
 
-    if (This->base.pool == D3DPOOL_DEFAULT)
-        NineSurface9_MarkContainerDirty(This);
-    if (!r_dst && This->base.resource)
-        NineSurface9_AddDirtyRect(This, &dst_box);
+    p_src = pipe->transfer_map(pipe, r_src, From->level,
+                               PIPE_TRANSFER_READ,
+                               &src_box, &transfer);
+    p_dst = NineSurface9_GetSystemMemPointer(This, 0, 0);
+
+    assert (p_src && p_dst);
+
+    util_copy_rect(p_dst, This->base.info.format,
+                   This->stride, 0, 0,
+                   This->desc.Width, This->desc.Height,
+                   p_src,
+                   transfer->stride, 0, 0);
 
-    return D3D_OK;
+    pipe->transfer_unmap(pipe, transfer);
 }
 
+
 /* Gladly, rendering to a MANAGED surface is not permitted, so we will
  * never have to do the reverse, i.e. download the surface.
  */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/surface9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/surface9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/surface9.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/surface9.h	2015-09-16 14:36:09.000000000 +0000
@@ -50,7 +50,7 @@
     uint8_t *data; /* system memory backing */
     unsigned stride; /* for system memory backing */
 };
-static INLINE struct NineSurface9 *
+static inline struct NineSurface9 *
 NineSurface9( void *data )
 {
     return (struct NineSurface9 *)data;
@@ -89,7 +89,7 @@
 struct pipe_surface *
 NineSurface9_CreatePipeSurface( struct NineSurface9 *This, const int sRGB );
 
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
 NineSurface9_GetSurface( struct NineSurface9 *This, int sRGB )
 {
     if (This->surface[sRGB])
@@ -97,13 +97,13 @@
     return NineSurface9_CreatePipeSurface(This, sRGB);
 }
 
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
 NineSurface9_GetResource( struct NineSurface9 *This )
 {
     return This->base.resource;
 }
 
-static INLINE void
+static inline void
 NineSurface9_SetResource( struct NineSurface9 *This,
                           struct pipe_resource *resource, unsigned level )
 {
@@ -125,13 +125,17 @@
 NineSurface9_UploadSelf( struct NineSurface9 *This,
                          const struct pipe_box *damaged );
 
-HRESULT
-NineSurface9_CopySurface( struct NineSurface9 *This,
-                          struct NineSurface9 *From,
-                          const POINT *pDestPoint,
-                          const RECT *pSourceRect );
+void
+NineSurface9_CopyMemToDefault( struct NineSurface9 *This,
+                               struct NineSurface9 *From,
+                               const POINT *pDestPoint,
+                               const RECT *pSourceRect );
+
+void
+NineSurface9_CopyDefaultToMem( struct NineSurface9 *This,
+                               struct NineSurface9 *From );
 
-static INLINE boolean
+static inline boolean
 NineSurface9_IsOffscreenPlain (struct NineSurface9 *This )
 {
     return This->base.usage == 0 && !This->texture;
@@ -141,7 +145,7 @@
 void
 NineSurface9_Dump( struct NineSurface9 *This );
 #else
-static INLINE void
+static inline void
 NineSurface9_Dump( struct NineSurface9 *This ) { }
 #endif
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/swapchain9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/swapchain9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/swapchain9.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/swapchain9.c	2015-09-16 14:36:09.000000000 +0000
@@ -184,7 +184,9 @@
 
     /* Note: It is the role of the backend to fill if necessary
      * BackBufferWidth and BackBufferHeight */
-    ID3DPresent_SetPresentParameters(This->present, pParams, This->mode);
+    hr = ID3DPresent_SetPresentParameters(This->present, pParams, This->mode);
+    if (hr != D3D_OK)
+        return hr;
 
     /* When we have flip behaviour, d3d9 expects we get back the screen buffer when we flip.
      * Here we don't get back the initial content of the screen. To emulate the behaviour
@@ -575,9 +577,10 @@
         blit.filter = PIPE_TEX_FILTER_NEAREST;
         blit.scissor_enable = FALSE;
 
-        ID3DPresent_GetCursorPos(This->present, &device->cursor.pos);
-
-        /* NOTE: blit messes up when box.x + box.width < 0, fix driver */
+        /* NOTE: blit messes up when box.x + box.width < 0, fix driver
+         * NOTE2: device->cursor.pos contains coordinates relative to the screen.
+         * This happens to be also the position of the cursor when we are fullscreen.
+         * We don't use sw cursor for Windowed mode */
         blit.dst.box.x = MAX2(device->cursor.pos.x, 0) - device->cursor.hotspot.x;
         blit.dst.box.y = MAX2(device->cursor.pos.y, 0) - device->cursor.hotspot.y;
         blit.dst.box.width = blit.src.box.width;
@@ -587,13 +590,14 @@
             blit.src.box.width, blit.src.box.height,
             blit.dst.box.x, blit.dst.box.y);
 
+        blit.alpha_blend = TRUE;
         This->pipe->blit(This->pipe, &blit);
     }
 
     if (device->hud && resource) {
         hud_draw(device->hud, resource); /* XXX: no offset */
         /* HUD doesn't clobber stipple */
-        NineDevice9_RestoreNonCSOState(device, ~0x2);
+        nine_state_restore_non_cso(device);
     }
 }
 
@@ -631,7 +635,7 @@
     return;
 }
 
-static INLINE HRESULT
+static inline HRESULT
 present( struct NineSwapChain9 *This,
          const RECT *pSourceRect,
          const RECT *pDestRect,
@@ -704,6 +708,7 @@
         blit.mask = PIPE_MASK_RGBA;
         blit.filter = PIPE_TEX_FILTER_NEAREST;
         blit.scissor_enable = FALSE;
+        blit.alpha_blend = FALSE;
 
         This->pipe->blit(This->pipe, &blit);
     }
@@ -726,7 +731,7 @@
         BOOL still_draw = FALSE;
         fence = swap_fences_see_front(This);
         if (fence) {
-            still_draw = !This->screen->fence_signalled(This->screen, fence);
+            still_draw = !This->screen->fence_finish(This->screen, fence, 0);
             This->screen->fence_reference(This->screen, &fence, NULL);
         }
         if (still_draw)
@@ -835,7 +840,7 @@
     ID3DPresent_WaitBufferReleased(This->present, This->present_handles[0]);
 
     This->base.device->state.changed.group |= NINE_STATE_FB;
-    nine_update_state(This->base.device, NINE_STATE_FB);
+    nine_update_state_framebuffer(This->base.device);
 
     return hr;
 }
@@ -856,6 +861,8 @@
     DBG("GetFrontBufferData: This=%p pDestSurface=%p\n",
         This, pDestSurface);
 
+    user_assert(dest_surface->base.pool == D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL);
+
     width = dest_surface->desc.Width;
     height = dest_surface->desc.Height;
 
@@ -870,7 +877,7 @@
     desc.MultiSampleQuality = 0;
     desc.Width = width;
     desc.Height = height;
-    /* NineSurface9_CopySurface needs same format. */
+    /* NineSurface9_CopyDefaultToMem needs same format. */
     desc.Format = dest_surface->desc.Format;
     desc.Usage = D3DUSAGE_RENDERTARGET;
     hr = NineSurface9_new(pDevice, NineUnknown(This), temp_resource, NULL, 0,
@@ -883,7 +890,7 @@
 
     ID3DPresent_FrontBufferCopy(This->present, temp_handle);
 
-    NineSurface9_CopySurface(dest_surface, temp_surface, NULL, NULL);
+    NineSurface9_CopyDefaultToMem(dest_surface, temp_surface);
 
     ID3DPresent_DestroyD3DWindowBuffer(This->present, temp_handle);
     NineUnknown_Destroy(NineUnknown(temp_surface));
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/swapchain9ex.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/swapchain9ex.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/swapchain9ex.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/swapchain9ex.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,7 +29,7 @@
 {
     struct NineSwapChain9 base;
 };
-static INLINE struct NineSwapChain9Ex *
+static inline struct NineSwapChain9Ex *
 NineSwapChain9Ex( void *data )
 {
     return (struct NineSwapChain9Ex *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/swapchain9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/swapchain9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/swapchain9.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/swapchain9.h	2015-09-16 14:36:09.000000000 +0000
@@ -76,7 +76,7 @@
     BOOL enable_threadpool;
 };
 
-static INLINE struct NineSwapChain9 *
+static inline struct NineSwapChain9 *
 NineSwapChain9( void *data )
 {
     return (struct NineSwapChain9 *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/texture9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/texture9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/texture9.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/texture9.c	2015-09-16 14:36:09.000000000 +0000
@@ -101,6 +101,13 @@
     if (Format != D3DFMT_NULL && pf == PIPE_FORMAT_NONE)
         return D3DERR_INVALIDCALL;
 
+    if (compressed_format(Format)) {
+        const unsigned w = util_format_get_blockwidth(pf);
+        const unsigned h = util_format_get_blockheight(pf);
+
+        user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL);
+    }
+
     info->screen = screen;
     info->target = PIPE_TEXTURE_2D;
     info->format = pf;
@@ -152,10 +159,10 @@
          * apps access sublevels of texture even if they locked only first
          * level) */
         level_offsets = alloca(sizeof(unsigned) * (info->last_level + 1));
-        user_buffer = MALLOC(
+        user_buffer = align_malloc(
             nine_format_get_size_and_offsets(pf, level_offsets,
                                              Width, Height,
-                                             info->last_level));
+                                             info->last_level), 32);
         This->managed_buffer = user_buffer;
         if (!This->managed_buffer)
             return E_OUTOFMEMORY;
@@ -202,6 +209,9 @@
             return hr;
     }
 
+    /* Textures start initially dirty */
+    This->dirty_rect.width = Width;
+    This->dirty_rect.height = Height;
     This->dirty_rect.depth = 1; /* widht == 0 means empty, depth stays 1 */
 
     if (pSharedHandle && !*pSharedHandle) {/* Pool == D3DPOOL_SYSTEMMEM */
@@ -219,7 +229,8 @@
     if (This->surfaces) {
         /* The surfaces should have 0 references and be unbound now. */
         for (l = 0; l <= This->base.base.info.last_level; ++l)
-            NineUnknown_Destroy(&This->surfaces[l]->base.base);
+            if (This->surfaces[l])
+                NineUnknown_Destroy(&This->surfaces[l]->base.base);
         FREE(This->surfaces);
     }
 
@@ -295,18 +306,22 @@
         pDirtyRect ? pDirtyRect->left : 0, pDirtyRect ? pDirtyRect->top : 0,
         pDirtyRect ? pDirtyRect->right : 0, pDirtyRect ? pDirtyRect->bottom : 0);
 
-    /* Tracking dirty regions on DEFAULT or SYSTEMMEM resources is pointless,
+    /* Tracking dirty regions on DEFAULT resources is pointless,
      * because we always write to the final storage. Just marked it dirty in
      * case we need to generate mip maps.
      */
-    if (This->base.base.pool != D3DPOOL_MANAGED) {
-        if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP)
+    if (This->base.base.pool == D3DPOOL_DEFAULT) {
+        if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP) {
             This->base.dirty_mip = TRUE;
+            BASETEX_REGISTER_UPDATE(&This->base);
+        }
         return D3D_OK;
     }
-    This->base.managed.dirty = TRUE;
 
-    BASETEX_REGISTER_UPDATE(&This->base);
+    if (This->base.base.pool == D3DPOOL_MANAGED) {
+        This->base.managed.dirty = TRUE;
+        BASETEX_REGISTER_UPDATE(&This->base);
+    }
 
     if (!pDirtyRect) {
         u_box_origin_2d(This->base.base.info.width0,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/texture9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/texture9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/texture9.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/texture9.h	2015-09-16 14:36:09.000000000 +0000
@@ -33,7 +33,7 @@
     struct pipe_box dirty_rect; /* covers all mip levels */
     uint8_t *managed_buffer;
 };
-static INLINE struct NineTexture9 *
+static inline struct NineTexture9 *
 NineTexture9( void *data )
 {
     return (struct NineTexture9 *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/vertexbuffer9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/vertexbuffer9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/vertexbuffer9.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/vertexbuffer9.h	2015-09-16 14:36:09.000000000 +0000
@@ -40,7 +40,7 @@
 
     D3DVERTEXBUFFER_DESC desc;
 };
-static INLINE struct NineVertexBuffer9 *
+static inline struct NineVertexBuffer9 *
 NineVertexBuffer9( void *data )
 {
     return (struct NineVertexBuffer9 *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/vertexdeclaration9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/vertexdeclaration9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/vertexdeclaration9.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/vertexdeclaration9.c	2015-09-16 14:36:09.000000000 +0000
@@ -34,7 +34,7 @@
 
 #define DBG_CHANNEL DBG_VERTEXDECLARATION
 
-static INLINE enum pipe_format decltype_format(BYTE type)
+static inline enum pipe_format decltype_format(BYTE type)
 {
     switch (type) {
     case D3DDECLTYPE_FLOAT1:    return PIPE_FORMAT_R32_FLOAT;
@@ -60,7 +60,7 @@
     return PIPE_FORMAT_NONE;
 }
 
-static INLINE unsigned decltype_size(BYTE type)
+static inline unsigned decltype_size(BYTE type)
 {
     switch (type) {
     case D3DDECLTYPE_FLOAT1: return 1 * sizeof(float);
@@ -90,7 +90,7 @@
  * simple lookup table won't work in that case. Let's just wait
  * with making this more generic until we need it.
  */
-static INLINE boolean
+static inline boolean
 nine_d3ddeclusage_check(unsigned usage, unsigned usage_idx)
 {
     switch (usage) {
@@ -162,7 +162,7 @@
     [NINE_DECLUSAGE_FOG]             = "FOG",
     [NINE_DECLUSAGE_NONE]            = "(NONE)",
 };
-static INLINE const char *
+static inline const char *
 nine_declusage_name(unsigned ndcl)
 {
     return nine_declusage_names[ndcl % NINE_DECLUSAGE_COUNT];
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/vertexdeclaration9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/vertexdeclaration9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/vertexdeclaration9.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/vertexdeclaration9.h	2015-09-16 14:36:09.000000000 +0000
@@ -47,7 +47,7 @@
     D3DVERTEXELEMENT9 *decls;
     DWORD fvf;
 };
-static INLINE struct NineVertexDeclaration9 *
+static inline struct NineVertexDeclaration9 *
 NineVertexDeclaration9( void *data )
 {
     return (struct NineVertexDeclaration9 *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/vertexshader9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/vertexshader9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/vertexshader9.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/vertexshader9.c	2015-09-16 14:36:09.000000000 +0000
@@ -48,9 +48,10 @@
         return hr;
 
     if (cso) {
-        This->variant.cso = cso;
+        This->ff_cso = cso;
         return D3D_OK;
     }
+
     device = This->base.device;
 
     info.type = PIPE_SHADER_VERTEX;
@@ -59,6 +60,7 @@
     info.const_b_base = NINE_CONST_B_BASE(device->max_vs_const_f) / 16;
     info.sampler_mask_shadow = 0x0;
     info.sampler_ps1xtypes = 0x0;
+    info.fog_enable = 0;
 
     hr = nine_translate_shader(device, &info);
     if (FAILED(hr))
@@ -71,6 +73,9 @@
     This->byte_code.size = info.byte_size;
 
     This->variant.cso = info.cso;
+    This->last_cso = info.cso;
+    This->last_key = 0;
+
     This->const_used_size = info.const_used_size;
     This->lconstf = info.lconstf;
     This->sampler_mask = info.sampler_mask;
@@ -87,11 +92,12 @@
 void
 NineVertexShader9_dtor( struct NineVertexShader9 *This )
 {
-    DBG("This=%p cso=%p\n", This, This->variant.cso);
+    DBG("This=%p\n", This);
 
     if (This->base.device) {
         struct pipe_context *pipe = This->base.device->pipe;
         struct nine_shader_variant *var = &This->variant;
+
         do {
             if (var->cso) {
                 if (This->base.device->state.cso.vs == var->cso)
@@ -100,6 +106,12 @@
             }
             var = var->next;
         } while (var);
+
+        if (This->ff_cso) {
+            if (This->ff_cso == This->base.device->state.cso.vs)
+                pipe->bind_vs_state(pipe, NULL);
+            pipe->delete_vs_state(pipe, This->ff_cso);
+        }
     }
     nine_shader_variants_free(&This->variant);
 
@@ -130,10 +142,16 @@
 }
 
 void *
-NineVertexShader9_GetVariant( struct NineVertexShader9 *This,
-                              uint32_t key )
+NineVertexShader9_GetVariant( struct NineVertexShader9 *This )
 {
-    void *cso = nine_shader_variant_get(&This->variant, key);
+    void *cso;
+    uint32_t key;
+
+    key = This->next_key;
+    if (key == This->last_key)
+        return This->last_cso;
+
+    cso = nine_shader_variant_get(&This->variant, key);
     if (!cso) {
         struct NineDevice9 *device = This->base.device;
         struct nine_shader_info info;
@@ -144,6 +162,7 @@
         info.const_b_base = NINE_CONST_B_BASE(device->max_vs_const_f) / 16;
         info.byte_code = This->byte_code.tokens;
         info.sampler_mask_shadow = key & 0xf;
+        info.fog_enable = device->state.rs[D3DRS_FOGENABLE];
 
         hr = nine_translate_shader(This->base.device, &info);
         if (FAILED(hr))
@@ -151,6 +170,10 @@
         nine_shader_variant_add(&This->variant, key, info.cso);
         cso = info.cso;
     }
+
+    This->last_key = key;
+    This->last_cso = cso;
+
     return cso;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/vertexshader9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/vertexshader9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/vertexshader9.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/vertexshader9.h	2015-09-16 14:36:09.000000000 +0000
@@ -25,6 +25,7 @@
 
 #include "iunknown.h"
 #include "nine_shader.h"
+#include "nine_state.h"
 
 struct NineVertexShader9
 {
@@ -43,7 +44,6 @@
     } byte_code;
 
     uint8_t sampler_mask;
-    uint8_t sampler_mask_shadow;
 
     boolean position_t; /* if true, disable vport transform */
     boolean point_size; /* if true, set rasterizer.point_size_per_vertex to 1 */
@@ -54,17 +54,43 @@
 
     const struct pipe_stream_output_info *so;
 
-    uint64_t ff_key[2];
+    uint64_t ff_key[3];
+    void *ff_cso;
+
+    uint32_t last_key;
+    void *last_cso;
+
+    uint32_t next_key;
 };
-static INLINE struct NineVertexShader9 *
+static inline struct NineVertexShader9 *
 NineVertexShader9( void *data )
 {
     return (struct NineVertexShader9 *)data;
 }
 
+static inline BOOL
+NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs,
+                             struct nine_state *state )
+{
+    uint8_t samplers_shadow;
+    uint32_t key;
+    BOOL res;
+
+    samplers_shadow = (uint8_t)((state->samplers_shadow & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0));
+    samplers_shadow &= vs->sampler_mask;
+    key = samplers_shadow;
+
+    if (vs->byte_code.version < 0x30)
+        key |= state->rs[D3DRS_FOGENABLE] << 8;
+
+    res = vs->last_key != key;
+    if (res)
+        vs->next_key = key;
+    return res;
+}
+
 void *
-NineVertexShader9_GetVariant( struct NineVertexShader9 *vs,
-                              uint32_t key );
+NineVertexShader9_GetVariant( struct NineVertexShader9 *vs );
 
 /*** public ***/
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/volume9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/volume9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/volume9.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/volume9.c	2015-09-16 14:36:09.000000000 +0000
@@ -23,6 +23,7 @@
 #include "device9.h"
 #include "volume9.h"
 #include "basetexture9.h" /* for marking dirty */
+#include "volumetexture9.h"
 #include "nine_helpers.h"
 #include "nine_pipe.h"
 #include "nine_dump.h"
@@ -43,7 +44,7 @@
     DBG("(%p(This=%p),level=%u) Allocating 0x%x bytes of system memory.\n",
         This->base.container, This, This->level, size);
 
-    This->data = (uint8_t *)MALLOC(size);
+    This->data = (uint8_t *)align_malloc(size, 32);
     if (!This->data)
         return E_OUTOFMEMORY;
     return D3D_OK;
@@ -152,7 +153,7 @@
     return NineUnknown_QueryInterface(NineUnknown(This)->container, riid, ppContainer);
 }
 
-static INLINE void
+static inline void
 NineVolume9_MarkContainerDirty( struct NineVolume9 *This )
 {
     struct NineBaseTexture9 *tex;
@@ -182,51 +183,27 @@
     return D3D_OK;
 }
 
-static INLINE boolean
-NineVolume9_IsDirty(struct NineVolume9 *This)
-{
-    return This->dirty_box[0].width != 0;
-}
-
-INLINE void
+inline void
 NineVolume9_AddDirtyRegion( struct NineVolume9 *This,
                             const struct pipe_box *box )
 {
-    struct pipe_box cover_a, cover_b;
-    float vol[2];
+    D3DBOX dirty_region;
+    struct NineVolumeTexture9 *tex = NineVolumeTexture9(This->base.container);
 
     if (!box) {
-        u_box_3d(0, 0, 0, This->desc.Width, This->desc.Height,
-                 This->desc.Depth, &This->dirty_box[0]);
-        memset(&This->dirty_box[1], 0, sizeof(This->dirty_box[1]));
-        return;
-    }
-    if (!This->dirty_box[0].width) {
-        This->dirty_box[0] = *box;
-        return;
-    }
-
-    u_box_union_3d(&cover_a, &This->dirty_box[0], box);
-    vol[0] = u_box_volume_3d(&cover_a);
-
-    if (This->dirty_box[1].width == 0) {
-        vol[1] = u_box_volume_3d(&This->dirty_box[0]);
-        if (vol[0] > (vol[1] * 1.5f))
-            This->dirty_box[1] = *box;
-        else
-            This->dirty_box[0] = cover_a;
+        NineVolumeTexture9_AddDirtyBox(tex, NULL);
     } else {
-        u_box_union_3d(&cover_b, &This->dirty_box[1], box);
-        vol[1] = u_box_volume_3d(&cover_b);
-
-        if (vol[0] > vol[1])
-            This->dirty_box[1] = cover_b;
-        else
-            This->dirty_box[0] = cover_a;
+        dirty_region.Left = box->x << This->level_actual;
+        dirty_region.Top = box->y << This->level_actual;
+        dirty_region.Front = box->z << This->level_actual;
+        dirty_region.Right = dirty_region.Left + (box->width << This->level_actual);
+        dirty_region.Bottom = dirty_region.Top + (box->height << This->level_actual);
+        dirty_region.Back = dirty_region.Front + (box->depth << This->level_actual);
+        NineVolumeTexture9_AddDirtyBox(tex, &dirty_region);
     }
 }
 
-static INLINE uint8_t *
+static inline uint8_t *
 NineVolume9_GetSystemMemPointer(struct NineVolume9 *This, int x, int y, int z)
 {
     unsigned x_offset = util_format_get_stride(This->info.format, x);
@@ -254,21 +231,26 @@
         pBox ? pBox->Front : 0, pBox ? pBox->Back : 0,
         nine_D3DLOCK_to_str(Flags));
 
+    /* check if it's already locked */
+    user_assert(This->lock_count == 0, D3DERR_INVALIDCALL);
+
+    /* set pBits to NULL after lock_count check */
+    user_assert(pLockedVolume, E_POINTER);
+    pLockedVolume->pBits = NULL;
+
     user_assert(This->desc.Pool != D3DPOOL_DEFAULT ||
                 (This->desc.Usage & D3DUSAGE_DYNAMIC), D3DERR_INVALIDCALL);
 
     user_assert(!((Flags & D3DLOCK_DISCARD) && (Flags & D3DLOCK_READONLY)),
                 D3DERR_INVALIDCALL);
 
-    user_assert(This->lock_count == 0, D3DERR_INVALIDCALL);
-    user_assert(pLockedVolume, E_POINTER);
-
-    if (pBox && This->desc.Pool == D3DPOOL_DEFAULT &&
-        util_format_is_compressed(This->info.format)) {
+    if (pBox && compressed_format (This->desc.Format)) { /* For volume all pools are checked */
         const unsigned w = util_format_get_blockwidth(This->info.format);
         const unsigned h = util_format_get_blockheight(This->info.format);
-        user_assert(!(pBox->Left % w) && !(pBox->Right % w) &&
-                    !(pBox->Top % h) && !(pBox->Bottom % h),
+        user_assert((pBox->Left == 0 && pBox->Right == This->desc.Width &&
+                     pBox->Top == 0 && pBox->Bottom == This->desc.Height) ||
+                    (!(pBox->Left % w) && !(pBox->Right % w) &&
+                     !(pBox->Top % h) && !(pBox->Bottom % h)),
                     D3DERR_INVALIDCALL);
     }
 
@@ -312,8 +294,7 @@
 
     if (!(Flags & (D3DLOCK_NO_DIRTY_UPDATE | D3DLOCK_READONLY))) {
         NineVolume9_MarkContainerDirty(This);
-        if (This->desc.Pool == D3DPOOL_MANAGED)
-            NineVolume9_AddDirtyRegion(This, &box);
+        NineVolume9_AddDirtyRegion(This, &box);
     }
 
     ++This->lock_count;
@@ -333,42 +314,31 @@
     return D3D_OK;
 }
 
-
+/* When this function is called, we have already checked
+ * The copy regions fit the volumes */
 HRESULT
-NineVolume9_CopyVolume( struct NineVolume9 *This,
-                        struct NineVolume9 *From,
-                        unsigned dstx, unsigned dsty, unsigned dstz,
-                        struct pipe_box *pSrcBox )
+NineVolume9_CopyMemToDefault( struct NineVolume9 *This,
+                              struct NineVolume9 *From,
+                              unsigned dstx, unsigned dsty, unsigned dstz,
+                              struct pipe_box *pSrcBox )
 {
     struct pipe_context *pipe = This->pipe;
     struct pipe_resource *r_dst = This->resource;
-    struct pipe_resource *r_src = From->resource;
-    struct pipe_transfer *transfer;
     struct pipe_box src_box;
     struct pipe_box dst_box;
-    uint8_t *p_dst;
     const uint8_t *p_src;
 
     DBG("This=%p From=%p dstx=%u dsty=%u dstz=%u pSrcBox=%p\n",
         This, From, dstx, dsty, dstz, pSrcBox);
 
-    assert(This->desc.Pool != D3DPOOL_MANAGED &&
-           From->desc.Pool != D3DPOOL_MANAGED);
-    user_assert(This->desc.Format == From->desc.Format, D3DERR_INVALIDCALL);
+    assert(This->desc.Pool == D3DPOOL_DEFAULT &&
+           From->desc.Pool == D3DPOOL_SYSTEMMEM);
 
     dst_box.x = dstx;
     dst_box.y = dsty;
     dst_box.z = dstz;
 
     if (pSrcBox) {
-        /* make sure it doesn't range outside the source volume */
-        user_assert(pSrcBox->x >= 0 &&
-                    (pSrcBox->width - pSrcBox->x) <= From->desc.Width &&
-                    pSrcBox->y >= 0 &&
-                    (pSrcBox->height - pSrcBox->y) <= From->desc.Height &&
-                    pSrcBox->z >= 0 &&
-                    (pSrcBox->depth - pSrcBox->z) <= From->desc.Depth,
-                    D3DERR_INVALIDCALL);
         src_box = *pSrcBox;
     } else {
         src_box.x = 0;
@@ -378,101 +348,54 @@
         src_box.height = From->desc.Height;
         src_box.depth = From->desc.Depth;
     }
-    /* limits */
-    dst_box.width = This->desc.Width - dst_box.x;
-    dst_box.height = This->desc.Height - dst_box.y;
-    dst_box.depth = This->desc.Depth - dst_box.z;
-
-    user_assert(src_box.width <= dst_box.width &&
-                src_box.height <= dst_box.height &&
-                src_box.depth <= dst_box.depth, D3DERR_INVALIDCALL);
 
     dst_box.width = src_box.width;
     dst_box.height = src_box.height;
     dst_box.depth = src_box.depth;
 
-    if (r_dst && r_src) {
-        pipe->resource_copy_region(pipe,
-                                   r_dst, This->level,
-                                   dst_box.x, dst_box.y, dst_box.z,
-                                   r_src, From->level,
-                                   &src_box);
-    } else
-    if (r_dst) {
-        p_src = NineVolume9_GetSystemMemPointer(From,
-            src_box.x, src_box.y, src_box.z);
-
-        pipe->transfer_inline_write(pipe, r_dst, This->level,
-                                    0, /* WRITE|DISCARD are implicit */
-                                    &dst_box, p_src,
-                                    From->stride, From->layer_stride);
-    } else
-    if (r_src) {
-        p_dst = NineVolume9_GetSystemMemPointer(This, 0, 0, 0);
-        p_src = pipe->transfer_map(pipe, r_src, From->level,
-                                   PIPE_TRANSFER_READ,
-                                   &src_box, &transfer);
-        if (!p_src)
-            return D3DERR_DRIVERINTERNALERROR;
-
-        util_copy_box(p_dst, This->info.format,
-                      This->stride, This->layer_stride,
-                      dst_box.x, dst_box.y, dst_box.z,
-                      dst_box.width, dst_box.height, dst_box.depth,
-                      p_src,
-                      transfer->stride, transfer->layer_stride,
-                      src_box.x, src_box.y, src_box.z);
-
-        pipe->transfer_unmap(pipe, transfer);
-    } else {
-        p_dst = NineVolume9_GetSystemMemPointer(This, 0, 0, 0);
-        p_src = NineVolume9_GetSystemMemPointer(From, 0, 0, 0);
+    p_src = NineVolume9_GetSystemMemPointer(From,
+         src_box.x, src_box.y, src_box.z);
 
-        util_copy_box(p_dst, This->info.format,
-                      This->stride, This->layer_stride,
-                      dst_box.x, dst_box.y, dst_box.z,
-                      dst_box.width, dst_box.height, dst_box.depth,
-                      p_src,
-                      From->stride, From->layer_stride,
-                      src_box.x, src_box.y, src_box.z);
-    }
+    pipe->transfer_inline_write(pipe, r_dst, This->level,
+                                0, /* WRITE|DISCARD are implicit */
+                                &dst_box, p_src,
+                                From->stride, From->layer_stride);
 
-    if (This->desc.Pool == D3DPOOL_DEFAULT)
-        NineVolume9_MarkContainerDirty(This);
-    if (!r_dst && This->resource)
-        NineVolume9_AddDirtyRegion(This, &dst_box);
+    NineVolume9_MarkContainerDirty(This);
 
     return D3D_OK;
 }
 
 HRESULT
-NineVolume9_UploadSelf( struct NineVolume9 *This )
+NineVolume9_UploadSelf( struct NineVolume9 *This,
+                        const struct pipe_box *damaged )
 {
     struct pipe_context *pipe = This->pipe;
     struct pipe_resource *res = This->resource;
+    struct pipe_box box;
     uint8_t *ptr;
-    unsigned i;
 
-    DBG("This=%p dirty=%i data=%p res=%p\n", This, NineVolume9_IsDirty(This),
+    DBG("This=%p damaged=%p data=%p res=%p\n", This, damaged,
         This->data, res);
 
     assert(This->desc.Pool == D3DPOOL_MANAGED);
-
-    if (!NineVolume9_IsDirty(This))
-        return D3D_OK;
     assert(res);
 
-    for (i = 0; i < Elements(This->dirty_box); ++i) {
-        const struct pipe_box *box = &This->dirty_box[i];
-        if (box->width == 0)
-            break;
-        ptr = NineVolume9_GetSystemMemPointer(This, box->x, box->y, box->z);
-
-        pipe->transfer_inline_write(pipe, res, This->level,
-                                    0,
-                                    box, ptr, This->stride, This->layer_stride);
+    if (damaged) {
+        box = *damaged;
+    } else {
+        box.x = 0;
+        box.y = 0;
+        box.z = 0;
+        box.width = This->desc.Width;
+        box.height = This->desc.Height;
+        box.depth = This->desc.Depth;
     }
-    NineVolume9_ClearDirtyRegion(This);
+
+    ptr = NineVolume9_GetSystemMemPointer(This, box.x, box.y, box.z);
+
+    pipe->transfer_inline_write(pipe, res, This->level, 0, &box,
+                                ptr, This->stride, This->layer_stride);
 
     return D3D_OK;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/volume9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/volume9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/volume9.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/volume9.h	2015-09-16 14:36:09.000000000 +0000
@@ -50,14 +50,12 @@
     struct pipe_transfer *transfer;
     unsigned lock_count;
 
-    struct pipe_box dirty_box[2];
-
     struct pipe_context *pipe;
 
     /* for [GS]etPrivateData/FreePrivateData */
     struct util_hash_table *pdata;
 };
-static INLINE struct NineVolume9 *
+static inline struct NineVolume9 *
 NineVolume9( void *data )
 {
     return (struct NineVolume9 *)data;
@@ -73,7 +71,7 @@
 
 /*** Nine private ***/
 
-static INLINE void
+static inline void
 NineVolume9_SetResource( struct NineVolume9 *This,
                          struct pipe_resource *resource, unsigned level )
 {
@@ -85,20 +83,15 @@
 NineVolume9_AddDirtyRegion( struct NineVolume9 *This,
                             const struct pipe_box *box );
 
-static INLINE void
-NineVolume9_ClearDirtyRegion( struct NineVolume9 *This )
-{
-    memset(&This->dirty_box, 0, sizeof(This->dirty_box));
-}
-
 HRESULT
-NineVolume9_CopyVolume( struct NineVolume9 *This,
-                        struct NineVolume9 *From,
-                        unsigned dstx, unsigned dsty, unsigned dstz,
-                        struct pipe_box *pSrcBox );
+NineVolume9_CopyMemToDefault( struct NineVolume9 *This,
+                              struct NineVolume9 *From,
+                              unsigned dstx, unsigned dsty, unsigned dstz,
+                              struct pipe_box *pSrcBox );
 
 HRESULT
-NineVolume9_UploadSelf( struct NineVolume9 *This );
+NineVolume9_UploadSelf( struct NineVolume9 *This,
+                        const struct pipe_box *damaged );
 
 
 /*** Direct3D public ***/
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/volumetexture9.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/volumetexture9.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/volumetexture9.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/volumetexture9.c	2015-09-16 14:36:09.000000000 +0000
@@ -64,6 +64,13 @@
     if (Format == D3DFMT_ATI1 || Format == D3DFMT_ATI2)
         return D3DERR_INVALIDCALL;
 
+    if (compressed_format(Format)) {
+        const unsigned w = util_format_get_blockwidth(pf);
+        const unsigned h = util_format_get_blockheight(pf);
+        /* Compressed formats are not compressed on depth component */
+        user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL);
+    }
+
     info->screen = pParams->device->screen;
     info->target = PIPE_TEXTURE_3D;
     info->format = pf;
@@ -116,6 +123,9 @@
             return hr;
     }
 
+    /* Textures start initially dirty */
+    NineVolumeTexture9_AddDirtyBox(This, NULL);
+
     return D3D_OK;
 }
 
@@ -193,12 +203,14 @@
 {
     DBG("This=%p pDirtybox=%p\n", This, pDirtyBox);
 
-    if (This->base.base.pool != D3DPOOL_MANAGED) {
+    if (This->base.base.pool == D3DPOOL_DEFAULT) {
         return D3D_OK;
     }
-    This->base.managed.dirty = TRUE;
 
-    BASETEX_REGISTER_UPDATE(&This->base);
+    if (This->base.base.pool == D3DPOOL_MANAGED) {
+        This->base.managed.dirty = TRUE;
+        BASETEX_REGISTER_UPDATE(&This->base);
+    }
 
     if (!pDirtyBox) {
         This->dirty_box.x = 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/volumetexture9.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/volumetexture9.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/nine/volumetexture9.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/nine/volumetexture9.h	2015-09-16 14:36:09.000000000 +0000
@@ -32,7 +32,7 @@
     struct NineVolume9 **volumes;
     struct pipe_box dirty_box;
 };
-static INLINE struct NineVolumeTexture9 *
+static inline struct NineVolumeTexture9 *
 NineVolumeTexture9( void *data )
 {
     return (struct NineVolumeTexture9 *)data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/omx/vid_enc.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/omx/vid_enc.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/omx/vid_enc.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/omx/vid_enc.c	2015-09-16 14:36:09.000000000 +0000
@@ -180,6 +180,11 @@
                                 PIPE_VIDEO_ENTRYPOINT_ENCODE, PIPE_VIDEO_CAP_SUPPORTED))
       return OMX_ErrorBadParameter;
  
+   priv->stacked_frames_num = screen->get_video_param(screen,
+                                PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH,
+                                PIPE_VIDEO_ENTRYPOINT_ENCODE,
+                                PIPE_VIDEO_CAP_STACKED_FRAMES);
+
    priv->s_pipe = screen->context_create(screen, priv->screen);
    if (!priv->s_pipe)
       return OMX_ErrorInsufficientResources;
@@ -259,6 +264,7 @@
    LIST_INITHEAD(&priv->free_tasks);
    LIST_INITHEAD(&priv->used_tasks);
    LIST_INITHEAD(&priv->b_frames);
+   LIST_INITHEAD(&priv->stacked_tasks);
 
    return OMX_ErrorNone;
 }
@@ -271,6 +277,7 @@
    enc_ReleaseTasks(&priv->free_tasks);
    enc_ReleaseTasks(&priv->used_tasks);
    enc_ReleaseTasks(&priv->b_frames);
+   enc_ReleaseTasks(&priv->stacked_tasks);
 
    if (priv->ports) {
       for (i = 0; i < priv->sPortTypesParam[OMX_PortDomainVideo].nPorts; ++i) {
@@ -1116,6 +1123,7 @@
    struct input_buf_private *inp = buf->pInputPortPrivate;
    enum pipe_h264_enc_picture_type picture_type;
    struct encode_task *task;
+   unsigned stacked_num = 0;
    OMX_ERRORTYPE err;
 
    enc_MoveTasks(&inp->tasks, &priv->free_tasks);
@@ -1127,6 +1135,8 @@
       if (buf->nFlags & OMX_BUFFERFLAG_EOS) {
          buf->nFilledLen = buf->nAllocLen;
          enc_ClearBframes(port, inp);
+         enc_MoveTasks(&priv->stacked_tasks, &inp->tasks);
+         priv->codec->flush(priv->codec);
       }
       return base_port_SendBufferFunction(port, buf);
    }
@@ -1166,7 +1176,16 @@
       /* handle I or P frame */
       priv->ref_idx_l0 = priv->ref_idx_l1;
       enc_HandleTask(port, task, picture_type);
-      LIST_ADDTAIL(&task->list, &inp->tasks);
+      LIST_ADDTAIL(&task->list, &priv->stacked_tasks);
+      LIST_FOR_EACH_ENTRY(task, &priv->stacked_tasks, list) {
+         ++stacked_num;
+      }
+      if (stacked_num == priv->stacked_frames_num) {
+         struct encode_task *t;
+         t = LIST_ENTRY(struct encode_task, priv->stacked_tasks.next, list);
+         LIST_DEL(&t->list);
+         LIST_ADDTAIL(&t->list, &inp->tasks);
+      }
       priv->ref_idx_l1 = priv->frame_num++;
 
       /* handle B frames */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/omx/vid_enc.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/omx/vid_enc.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/omx/vid_enc.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/omx/vid_enc.h	2015-09-16 14:36:09.000000000 +0000
@@ -73,6 +73,7 @@
 	struct list_head free_tasks; \
 	struct list_head used_tasks; \
 	struct list_head b_frames; \
+	struct list_head stacked_tasks; \
 	OMX_U32 frame_rate; \
 	OMX_U32 frame_num; \
 	OMX_U32 pic_order_cnt; \
@@ -86,7 +87,8 @@
 	struct vl_compositor_state cstate; \
 	struct pipe_video_buffer *scale_buffer[OMX_VID_ENC_NUM_SCALING_BUFFERS]; \
 	OMX_CONFIG_SCALEFACTORTYPE scale; \
-	OMX_U32 current_scale_buffer;
+	OMX_U32 current_scale_buffer; \
+	OMX_U32 stacked_frames_num;
 ENDCLASS(vid_enc_PrivateType)
 
 OMX_ERRORTYPE vid_enc_LoaderComponent(stLoaderComponentType *comp);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/osmesa/osmesa.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/osmesa/osmesa.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/osmesa/osmesa.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/osmesa/osmesa.c	2015-09-16 14:36:09.000000000 +0000
@@ -168,7 +168,7 @@
 }
 
 
-static INLINE boolean
+static inline boolean
 little_endian(void)
 {
    const unsigned ui = 1;
@@ -292,7 +292,7 @@
 /**
  * Return the osmesa_buffer that corresponds to an st_framebuffer_iface.
  */
-static INLINE struct osmesa_buffer *
+static inline struct osmesa_buffer *
 stfbi_to_osbuffer(struct st_framebuffer_iface *stfbi)
 {
    return (struct osmesa_buffer *) stfbi->st_manager_private;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/vdpau/decode.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/vdpau/decode.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/vdpau/decode.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/vdpau/decode.c	2015-09-16 14:36:09.000000000 +0000
@@ -413,6 +413,115 @@
    return VDP_STATUS_OK;
 }
 
+static VdpStatus
+vlVdpDecoderRenderH265(struct pipe_h265_picture_desc *picture,
+                       VdpPictureInfoHEVC *picture_info)
+{
+   unsigned i;
+
+   picture->pps->sps->chroma_format_idc = picture_info->chroma_format_idc;
+   picture->pps->sps->separate_colour_plane_flag = picture_info->separate_colour_plane_flag;
+   picture->pps->sps->pic_width_in_luma_samples = picture_info->pic_width_in_luma_samples;
+   picture->pps->sps->pic_height_in_luma_samples = picture_info->pic_height_in_luma_samples;
+   picture->pps->sps->bit_depth_luma_minus8 = picture_info->bit_depth_luma_minus8;
+   picture->pps->sps->bit_depth_chroma_minus8 = picture_info->bit_depth_chroma_minus8;
+   picture->pps->sps->log2_max_pic_order_cnt_lsb_minus4 = picture_info->log2_max_pic_order_cnt_lsb_minus4;
+   picture->pps->sps->sps_max_dec_pic_buffering_minus1 = picture_info->sps_max_dec_pic_buffering_minus1;
+   picture->pps->sps->log2_min_luma_coding_block_size_minus3 = picture_info->log2_min_luma_coding_block_size_minus3;
+   picture->pps->sps->log2_diff_max_min_luma_coding_block_size = picture_info->log2_diff_max_min_luma_coding_block_size;
+   picture->pps->sps->log2_min_transform_block_size_minus2 = picture_info->log2_min_transform_block_size_minus2;
+   picture->pps->sps->log2_diff_max_min_transform_block_size = picture_info->log2_diff_max_min_transform_block_size;
+   picture->pps->sps->max_transform_hierarchy_depth_inter = picture_info->max_transform_hierarchy_depth_inter;
+   picture->pps->sps->max_transform_hierarchy_depth_intra = picture_info->max_transform_hierarchy_depth_intra;
+   picture->pps->sps->scaling_list_enabled_flag = picture_info->scaling_list_enabled_flag;
+   memcpy(picture->pps->sps->ScalingList4x4, picture_info->ScalingList4x4, 6*16);
+   memcpy(picture->pps->sps->ScalingList8x8, picture_info->ScalingList8x8, 6*64);
+   memcpy(picture->pps->sps->ScalingList16x16, picture_info->ScalingList16x16, 6*64);
+   memcpy(picture->pps->sps->ScalingList32x32, picture_info->ScalingList32x32, 2*64);
+   memcpy(picture->pps->sps->ScalingListDCCoeff16x16, picture_info->ScalingListDCCoeff16x16, 6);
+   memcpy(picture->pps->sps->ScalingListDCCoeff32x32, picture_info->ScalingListDCCoeff32x32, 2);
+   picture->pps->sps->amp_enabled_flag = picture_info->amp_enabled_flag;
+   picture->pps->sps->sample_adaptive_offset_enabled_flag = picture_info->sample_adaptive_offset_enabled_flag;
+   picture->pps->sps->pcm_enabled_flag = picture_info->pcm_enabled_flag;
+   picture->pps->sps->pcm_sample_bit_depth_luma_minus1 = picture_info->pcm_sample_bit_depth_luma_minus1;
+   picture->pps->sps->pcm_sample_bit_depth_chroma_minus1 = picture_info->pcm_sample_bit_depth_chroma_minus1;
+   picture->pps->sps->log2_min_pcm_luma_coding_block_size_minus3 = picture_info->log2_min_pcm_luma_coding_block_size_minus3;
+   picture->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size = picture_info->log2_diff_max_min_pcm_luma_coding_block_size;
+   picture->pps->sps->pcm_loop_filter_disabled_flag = picture_info->pcm_loop_filter_disabled_flag;
+   picture->pps->sps->num_short_term_ref_pic_sets = picture_info->num_short_term_ref_pic_sets;
+   picture->pps->sps->long_term_ref_pics_present_flag = picture_info->long_term_ref_pics_present_flag;
+   picture->pps->sps->num_long_term_ref_pics_sps = picture_info->num_long_term_ref_pics_sps;
+   picture->pps->sps->sps_temporal_mvp_enabled_flag = picture_info->sps_temporal_mvp_enabled_flag;
+   picture->pps->sps->strong_intra_smoothing_enabled_flag = picture_info->strong_intra_smoothing_enabled_flag;
+
+   picture->pps->dependent_slice_segments_enabled_flag = picture_info->dependent_slice_segments_enabled_flag;
+   picture->pps->output_flag_present_flag = picture_info->output_flag_present_flag;
+   picture->pps->num_extra_slice_header_bits = picture_info->num_extra_slice_header_bits;
+   picture->pps->sign_data_hiding_enabled_flag = picture_info->sign_data_hiding_enabled_flag;
+   picture->pps->cabac_init_present_flag = picture_info->cabac_init_present_flag;
+   picture->pps->num_ref_idx_l0_default_active_minus1 = picture_info->num_ref_idx_l0_default_active_minus1;
+   picture->pps->num_ref_idx_l1_default_active_minus1 = picture_info->num_ref_idx_l1_default_active_minus1;
+   picture->pps->init_qp_minus26 = picture_info->init_qp_minus26;
+   picture->pps->constrained_intra_pred_flag = picture_info->constrained_intra_pred_flag;
+   picture->pps->transform_skip_enabled_flag = picture_info->transform_skip_enabled_flag;
+   picture->pps->cu_qp_delta_enabled_flag = picture_info->cu_qp_delta_enabled_flag;
+   picture->pps->diff_cu_qp_delta_depth = picture_info->diff_cu_qp_delta_depth;
+   picture->pps->pps_cb_qp_offset = picture_info->pps_cb_qp_offset;
+   picture->pps->pps_cr_qp_offset = picture_info->pps_cr_qp_offset;
+   picture->pps->pps_slice_chroma_qp_offsets_present_flag = picture_info->pps_slice_chroma_qp_offsets_present_flag;
+   picture->pps->weighted_pred_flag = picture_info->weighted_pred_flag;
+   picture->pps->weighted_bipred_flag = picture_info->weighted_bipred_flag;
+   picture->pps->transquant_bypass_enabled_flag = picture_info->transquant_bypass_enabled_flag;
+   picture->pps->tiles_enabled_flag = picture_info->tiles_enabled_flag;
+   picture->pps->entropy_coding_sync_enabled_flag = picture_info->entropy_coding_sync_enabled_flag;
+   picture->pps->num_tile_columns_minus1 = picture_info->num_tile_columns_minus1;
+   picture->pps->num_tile_rows_minus1 = picture_info->num_tile_rows_minus1;
+   picture->pps->uniform_spacing_flag = picture_info->uniform_spacing_flag;
+   memcpy(picture->pps->column_width_minus1, picture_info->column_width_minus1, 20 * 2);
+   memcpy(picture->pps->row_height_minus1, picture_info->row_height_minus1, 22 * 2);
+   picture->pps->loop_filter_across_tiles_enabled_flag = picture_info->loop_filter_across_tiles_enabled_flag;
+   picture->pps->pps_loop_filter_across_slices_enabled_flag = picture_info->pps_loop_filter_across_slices_enabled_flag;
+   picture->pps->deblocking_filter_control_present_flag = picture_info->deblocking_filter_control_present_flag;
+   picture->pps->deblocking_filter_override_enabled_flag = picture_info->deblocking_filter_override_enabled_flag;
+   picture->pps->pps_deblocking_filter_disabled_flag = picture_info->pps_deblocking_filter_disabled_flag;
+   picture->pps->pps_beta_offset_div2 = picture_info->pps_beta_offset_div2;
+   picture->pps->pps_tc_offset_div2 = picture_info->pps_tc_offset_div2;
+   picture->pps->lists_modification_present_flag = picture_info->lists_modification_present_flag;
+   picture->pps->log2_parallel_merge_level_minus2 = picture_info->log2_parallel_merge_level_minus2;
+   picture->pps->slice_segment_header_extension_present_flag = picture_info->slice_segment_header_extension_present_flag;
+
+   picture->IDRPicFlag = picture_info->IDRPicFlag;
+   picture->RAPPicFlag = picture_info->RAPPicFlag;
+   picture->CurrRpsIdx = picture_info->CurrRpsIdx;
+   picture->NumPocTotalCurr = picture_info->NumPocTotalCurr;
+   picture->NumDeltaPocsOfRefRpsIdx = picture_info->NumDeltaPocsOfRefRpsIdx;
+   picture->NumShortTermPictureSliceHeaderBits = picture_info->NumShortTermPictureSliceHeaderBits;
+   picture->NumLongTermPictureSliceHeaderBits = picture_info->NumLongTermPictureSliceHeaderBits;
+   picture->CurrPicOrderCntVal = picture_info->CurrPicOrderCntVal;
+
+   for (i = 0; i < 16; ++i) {
+      VdpStatus ret = vlVdpGetReferenceFrame
+      (
+         picture_info->RefPics[i],
+         &picture->ref[i]
+      );
+      if (ret != VDP_STATUS_OK)
+         return ret;
+
+      picture->PicOrderCntVal[i] = picture_info->PicOrderCntVal[i];
+      picture->IsLongTerm[i] = picture_info->IsLongTerm[i];
+   }
+
+   picture->NumPocStCurrBefore = picture_info->NumPocStCurrBefore;
+   picture->NumPocStCurrAfter = picture_info->NumPocStCurrAfter;
+   picture->NumPocLtCurr = picture_info->NumPocLtCurr;
+   memcpy(picture->RefPicSetStCurrBefore, picture_info->RefPicSetStCurrBefore, 8);
+   memcpy(picture->RefPicSetStCurrAfter, picture_info->RefPicSetStCurrAfter, 8);
+   memcpy(picture->RefPicSetLtCurr, picture_info->RefPicSetLtCurr, 8);
+
+   return VDP_STATUS_OK;
+}
+
 static void
 vlVdpDecoderFixVC1Startcode(uint32_t *num_buffers, const void *buffers[], unsigned sizes[])
 {
@@ -461,14 +570,17 @@
    struct pipe_video_codec *dec;
    bool buffer_support[2];
    unsigned i;
-   struct pipe_h264_sps sps = {};
-   struct pipe_h264_pps pps = { &sps };
+   struct pipe_h264_sps sps_h264 = {};
+   struct pipe_h264_pps pps_h264 = { &sps_h264 };
+   struct pipe_h265_sps sps_h265 = {};
+   struct pipe_h265_pps pps_h265 = { &sps_h265 };
    union {
       struct pipe_picture_desc base;
       struct pipe_mpeg12_picture_desc mpeg12;
       struct pipe_mpeg4_picture_desc mpeg4;
       struct pipe_vc1_picture_desc vc1;
       struct pipe_h264_picture_desc h264;
+      struct pipe_h265_picture_desc h265;
    } desc;
 
    if (!(picture_info && bitstream_buffers))
@@ -547,9 +659,13 @@
       ret = vlVdpDecoderRenderVC1(&desc.vc1, (VdpPictureInfoVC1 *)picture_info);
       break;
    case PIPE_VIDEO_FORMAT_MPEG4_AVC:
-      desc.h264.pps = &pps;
+      desc.h264.pps = &pps_h264;
       ret = vlVdpDecoderRenderH264(&desc.h264, (VdpPictureInfoH264 *)picture_info);
       break;
+   case PIPE_VIDEO_FORMAT_HEVC:
+      desc.h265.pps = &pps_h265;
+      ret = vlVdpDecoderRenderH265(&desc.h265, (VdpPictureInfoHEVC *)picture_info);
+      break;
    default:
       return VDP_STATUS_INVALID_DECODER_PROFILE;
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/vdpau/presentation.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/vdpau/presentation.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/vdpau/presentation.c	2014-09-10 05:44:12.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/vdpau/presentation.c	2015-09-16 14:36:09.000000000 +0000
@@ -369,7 +369,7 @@
    } else {
       pipe_mutex_lock(pq->device->mutex);
       screen = pq->device->vscreen->pscreen;
-      if (screen->fence_signalled(screen, surf->fence)) {
+      if (screen->fence_finish(screen, surf->fence, 0)) {
          screen->fence_reference(screen, &surf->fence, NULL);
          *status = VDP_PRESENTATION_QUEUE_STATUS_VISIBLE;
          pipe_mutex_unlock(pq->device->mutex);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/vdpau/vdpau_private.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/vdpau/vdpau_private.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/vdpau/vdpau_private.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/vdpau/vdpau_private.h	2015-09-16 14:36:09.000000000 +0000
@@ -261,6 +261,16 @@
          return PIPE_VIDEO_PROFILE_VC1_MAIN;
       case VDP_DECODER_PROFILE_VC1_ADVANCED:
          return PIPE_VIDEO_PROFILE_VC1_ADVANCED;
+      case VDP_DECODER_PROFILE_HEVC_MAIN:
+         return PIPE_VIDEO_PROFILE_HEVC_MAIN;
+      case VDP_DECODER_PROFILE_HEVC_MAIN_10:
+         return PIPE_VIDEO_PROFILE_HEVC_MAIN_10;
+      case VDP_DECODER_PROFILE_HEVC_MAIN_STILL:
+         return PIPE_VIDEO_PROFILE_HEVC_MAIN_STILL;
+      case VDP_DECODER_PROFILE_HEVC_MAIN_12:
+         return PIPE_VIDEO_PROFILE_HEVC_MAIN_12;
+      case VDP_DECODER_PROFILE_HEVC_MAIN_444:
+         return PIPE_VIDEO_PROFILE_HEVC_MAIN_444;
       default:
          return PIPE_VIDEO_PROFILE_UNKNOWN;
    }
@@ -292,6 +302,16 @@
          return VDP_DECODER_PROFILE_VC1_MAIN;
       case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
          return VDP_DECODER_PROFILE_VC1_ADVANCED;
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN:
+         return VDP_DECODER_PROFILE_HEVC_MAIN;
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
+         return VDP_DECODER_PROFILE_HEVC_MAIN_10;
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN_STILL:
+         return VDP_DECODER_PROFILE_HEVC_MAIN_STILL;
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN_12:
+         return VDP_DECODER_PROFILE_HEVC_MAIN_12;
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN_444:
+         return VDP_DECODER_PROFILE_HEVC_MAIN_444;
       default:
          assert(0);
          return -1;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/Makefile.sources	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/Makefile.sources	2015-09-16 14:36:09.000000000 +0000
@@ -8,6 +8,8 @@
 	stw_ext_swapinterval.c \
 	stw_framebuffer.c \
 	stw_getprocaddress.c \
+	stw_nopfuncs.c \
+	stw_nopfuncs.h \
 	stw_pixelformat.c \
 	stw_st.c \
 	stw_tls.c \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_context.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -226,14 +226,13 @@
        *         be implemented, as determined by the implementation.
        *       * The core profile of version 3.2 or greater."
        *
-       * and because Mesa doesn't support GL_ARB_compatibility, the only chance to
-       * honour a 3.1 context is through core profile.
+       * But Mesa doesn't support GL_ARB_compatibility, while most prevalent
+       * Windows OpenGL implementations do, and unfortunately many Windows
+       * applications don't check whether they receive or not a context with
+       * GL_ARB_compatibility, so returning a core profile here does more harm
+       * than good.
        */
-      if (majorVersion == 3 && minorVersion == 1) {
-         attribs.profile = ST_PROFILE_OPENGL_CORE;
-      } else {
-         attribs.profile = ST_PROFILE_DEFAULT;
-      }
+      attribs.profile = ST_PROFILE_DEFAULT;
       break;
    case WGL_CONTEXT_ES_PROFILE_BIT_EXT:
       if (majorVersion >= 2) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_device.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_device.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_device.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_device.h	2015-09-16 14:36:09.000000000 +0000
@@ -80,7 +80,7 @@
 extern struct stw_device *stw_dev;
 
 
-static INLINE struct stw_context *
+static inline struct stw_context *
 stw_lookup_context_locked( DHGLRC dhglrc )
 {
    if (dhglrc == 0 || stw_dev == NULL)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_ext_pixelformat.c	2015-09-16 14:36:09.000000000 +0000
@@ -88,7 +88,12 @@
       return TRUE;
 
    case WGL_SWAP_METHOD_ARB:
-      *pvalue = pfi->pfd.dwFlags & PFD_SWAP_COPY ? WGL_SWAP_COPY_ARB : WGL_SWAP_UNDEFINED_ARB;
+      if (pfi->pfd.dwFlags & PFD_SWAP_COPY)
+         *pvalue = WGL_SWAP_COPY_ARB;
+      else if (pfi->pfd.dwFlags & PFD_SWAP_EXCHANGE)
+         *pvalue = WGL_SWAP_EXCHANGE_EXT;
+      else
+         *pvalue = WGL_SWAP_UNDEFINED_ARB;
       return TRUE;
 
    case WGL_SWAP_LAYER_BUFFERS_ARB:
@@ -232,7 +237,7 @@
       break;
 
    case WGL_SAMPLE_BUFFERS_ARB:
-      *pvalue = 1;
+      *pvalue = (pfi->stvis.samples > 1);
       break;
 
    case WGL_SAMPLES_ARB:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_framebuffer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_framebuffer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_framebuffer.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_framebuffer.c	2015-09-16 14:36:09.000000000 +0000
@@ -45,7 +45,7 @@
  * Search the framebuffer with the matching HWND while holding the
  * stw_dev::fb_mutex global lock.
  */
-static INLINE struct stw_framebuffer *
+static inline struct stw_framebuffer *
 stw_framebuffer_from_hwnd_locked(
    HWND hwnd )
 {
@@ -376,7 +376,7 @@
 /**
  * Given an hdc, return the corresponding stw_framebuffer.
  */
-static INLINE struct stw_framebuffer *
+static inline struct stw_framebuffer *
 stw_framebuffer_from_hdc_locked(
    HDC hdc )
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_getprocaddress.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_getprocaddress.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_getprocaddress.c	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_getprocaddress.c	2015-09-16 14:36:09.000000000 +0000
@@ -35,6 +35,7 @@
 #include "glapi/glapi.h"
 #include "stw_device.h"
 #include "stw_icd.h"
+#include "stw_nopfuncs.h"
 
 struct stw_extension_entry
 {
@@ -79,6 +80,7 @@
    LPCSTR lpszProc )
 {
    const struct stw_extension_entry *entry;
+   PROC p;
 
    if (!stw_dev)
       return NULL;
@@ -88,8 +90,23 @@
          if (strcmp( lpszProc, entry->name ) == 0)
             return entry->proc;
 
-   if (lpszProc[0] == 'g' && lpszProc[1] == 'l')
-      return (PROC) _glapi_get_proc_address( lpszProc );
+   if (lpszProc[0] == 'g' && lpszProc[1] == 'l') {
+      p = (PROC) _glapi_get_proc_address(lpszProc);
+      if (p)
+         return p;
+   }
+
+   /* If we get here, we'd normally just return NULL, but since some apps
+    * (like Viewperf12) crash when they try to use the null pointer, try
+    * returning a pointer to a no-op function instead.
+    */
+   p = stw_get_nop_function(lpszProc);
+   if (p) {
+      debug_printf("wglGetProcAddress(\"%s\") returning no-op function\n",
+                   lpszProc);
+      return p;
+   }
 
+   debug_printf("wglGetProcAddress(\"%s\") returning NULL\n", lpszProc);
    return NULL;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_nopfuncs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_nopfuncs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_nopfuncs.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_nopfuncs.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,464 @@
+/**************************************************************************
+ *
+ * Copyright 2015 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * No-op GL API functions.
+ *
+ * Some OpenGL apps (like Viewperf12) call wglGetProcAddress() to get
+ * a pointer to an extension function, get a NULL pointer, but don't bother
+ * to check for NULL before jumping through the pointer.  This causes a
+ * crash.
+ *
+ * As a work-around we provide some no-op functions here to avoid those
+ * crashes.
+ */
+
+#include <GL/gl.h>
+#include "stw_nopfuncs.h"
+#include "util/u_debug.h"
+
+
+static void
+warning(const char *name)
+{
+   /* use name+4 to skip "nop_" prefix */
+   _debug_printf("Application calling unsupported %s function\n", name+4);
+}
+
+static void APIENTRY
+nop_glBindMultiTextureEXT(GLenum texunit, GLenum target, GLuint texture)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glColor3hNV(GLhalfNV red, GLhalfNV green, GLhalfNV blue)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glColor3hvNV(const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glColor4hNV(GLhalfNV red, GLhalfNV green, GLhalfNV blue, GLhalfNV alpha)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glColor4hvNV(const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glDisableClientStateIndexedEXT(GLenum array, GLuint index)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glEnableClientStateIndexedEXT(GLenum array, GLuint index)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glFogCoordhNV(GLhalfNV fog)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glFogCoordhvNV(const GLhalfNV *fog)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glGetNamedBufferParameterivEXT(GLuint buffer, GLenum pname, GLint *params)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glGetNamedBufferSubDataEXT(GLuint buffer, GLintptr offset, GLsizeiptr size, void *data)
+{
+   warning(__func__);
+}
+
+static void *APIENTRY
+nop_glMapNamedBufferEXT(GLuint buffer, GLenum access)
+{
+   warning(__func__);
+   return NULL;
+}
+
+static void APIENTRY
+nop_glMatrixLoadfEXT(GLenum mode, const GLfloat *m)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMatrixLoadIdentityEXT(GLenum mode)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexCoord1hNV(GLenum target, GLhalfNV s)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexCoord1hvNV(GLenum target, const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexCoord2hNV(GLenum target, GLhalfNV s, GLhalfNV t)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexCoord2hvNV(GLenum target, const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexCoord3hNV(GLenum target, GLhalfNV s, GLhalfNV t, GLhalfNV r)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexCoord3hvNV(GLenum target, const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexCoord4hNV(GLenum target, GLhalfNV s, GLhalfNV t, GLhalfNV r, GLhalfNV q)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexCoord4hvNV(GLenum target, const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexCoordPointerEXT(GLenum texunit, GLint size, GLenum type, GLsizei stride, const void *pointer)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexEnvfEXT(GLenum texunit, GLenum target, GLenum pname, GLfloat param)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexEnvfvEXT(GLenum texunit, GLenum target, GLenum pname, const GLfloat *params)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexEnviEXT(GLenum texunit, GLenum target, GLenum pname, GLint param)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexGenfvEXT(GLenum texunit, GLenum coord, GLenum pname, const GLfloat *params)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glMultiTexGeniEXT(GLenum texunit, GLenum coord, GLenum pname, GLint param)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glNamedBufferDataEXT(GLuint buffer, GLsizeiptr size, const void *data, GLenum usage)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glNamedBufferSubDataEXT(GLuint buffer, GLintptr offset, GLsizeiptr size, const void *data)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glNamedProgramLocalParameter4fvEXT(GLuint program, GLenum target, GLuint index, const GLfloat *params)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glNamedProgramLocalParameters4fvEXT(GLuint program, GLenum target, GLuint index, GLsizei count, const GLfloat *params)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glNormal3hNV(GLhalfNV nx, GLhalfNV ny, GLhalfNV nz)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glNormal3hvNV(const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glPatchParameterfv(GLenum pname, const GLfloat *values)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glPatchParameteri(GLenum pname, GLint value)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glSecondaryColor3hNV(GLhalfNV red, GLhalfNV green, GLhalfNV blue)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glSecondaryColor3hvNV(const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glTexCoord1hNV(GLhalfNV s)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glTexCoord1hvNV(const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glTexCoord2hNV(GLhalfNV s, GLhalfNV t)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glTexCoord2hvNV(const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glTexCoord3hNV(GLhalfNV s, GLhalfNV t, GLhalfNV r)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glTexCoord3hvNV(const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glTexCoord4hNV(GLhalfNV s, GLhalfNV t, GLhalfNV r, GLhalfNV q)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glTexCoord4hvNV(const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glTextureParameterfEXT(GLuint texture, GLenum target, GLenum pname, GLfloat param)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glTextureParameterfvEXT(GLuint texture, GLenum target, GLenum pname, const GLfloat *params)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glTextureParameteriEXT(GLuint texture, GLenum target, GLenum pname, GLint param)
+{
+   warning(__func__);
+}
+
+static GLboolean APIENTRY
+nop_glUnmapNamedBufferEXT(GLuint buffer)
+{
+   warning(__func__);
+   return GL_FALSE;
+}
+
+static void APIENTRY
+nop_glVertex2hNV(GLhalfNV x, GLhalfNV y)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glVertex2hvNV(const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glVertex3hNV(GLhalfNV x, GLhalfNV y, GLhalfNV z)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glVertex3hvNV(const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glVertex4hNV(GLhalfNV x, GLhalfNV y, GLhalfNV z, GLhalfNV w)
+{
+   warning(__func__);
+}
+
+static void APIENTRY
+nop_glVertex4hvNV(const GLhalfNV *v)
+{
+   warning(__func__);
+}
+
+
+PROC
+stw_get_nop_function(const char *name)
+{
+   struct {
+      const char *name;
+      PROC p;
+   } table[] = {
+      { "glBindMultiTextureEXT", (PROC) nop_glBindMultiTextureEXT },
+      { "glColor3hNV", (PROC) nop_glColor3hNV },
+      { "glColor3hvNV", (PROC) nop_glColor3hvNV },
+      { "glColor4hNV", (PROC) nop_glColor4hNV },
+      { "glColor4hvNV", (PROC) nop_glColor4hvNV },
+      { "glDisableClientStateIndexedEXT", (PROC) nop_glDisableClientStateIndexedEXT },
+      { "glEnableClientStateIndexedEXT", (PROC) nop_glEnableClientStateIndexedEXT },
+      { "glFogCoordhNV", (PROC) nop_glFogCoordhNV },
+      { "glFogCoordhvNV", (PROC) nop_glFogCoordhvNV },
+      { "glGetNamedBufferParameterivEXT", (PROC) nop_glGetNamedBufferParameterivEXT },
+      { "glGetNamedBufferSubDataEXT", (PROC) nop_glGetNamedBufferSubDataEXT },
+      { "glMapNamedBufferEXT", (PROC) nop_glMapNamedBufferEXT },
+      { "glMatrixLoadfEXT", (PROC) nop_glMatrixLoadfEXT },
+      { "glMatrixLoadIdentityEXT", (PROC) nop_glMatrixLoadIdentityEXT },
+      { "glMultiTexCoord1hNV", (PROC) nop_glMultiTexCoord1hNV },
+      { "glMultiTexCoord1hvNV", (PROC) nop_glMultiTexCoord1hvNV },
+      { "glMultiTexCoord2hNV", (PROC) nop_glMultiTexCoord2hNV },
+      { "glMultiTexCoord2hvNV", (PROC) nop_glMultiTexCoord2hvNV },
+      { "glMultiTexCoord3hNV", (PROC) nop_glMultiTexCoord3hNV },
+      { "glMultiTexCoord3hvNV", (PROC) nop_glMultiTexCoord3hvNV },
+      { "glMultiTexCoord4hNV", (PROC) nop_glMultiTexCoord4hNV },
+      { "glMultiTexCoord4hvNV", (PROC) nop_glMultiTexCoord4hvNV },
+      { "glMultiTexCoordPointerEXT", (PROC) nop_glMultiTexCoordPointerEXT },
+      { "glMultiTexEnvfEXT", (PROC) nop_glMultiTexEnvfEXT },
+      { "glMultiTexEnvfvEXT", (PROC) nop_glMultiTexEnvfvEXT },
+      { "glMultiTexEnviEXT", (PROC) nop_glMultiTexEnviEXT },
+      { "glMultiTexGenfvEXT", (PROC) nop_glMultiTexGenfvEXT },
+      { "glMultiTexGeniEXT", (PROC) nop_glMultiTexGeniEXT },
+      { "glNamedBufferDataEXT", (PROC) nop_glNamedBufferDataEXT },
+      { "glNamedBufferSubDataEXT", (PROC) nop_glNamedBufferSubDataEXT },
+      { "glNamedProgramLocalParameter4fvEXT", (PROC) nop_glNamedProgramLocalParameter4fvEXT },
+      { "glNamedProgramLocalParameters4fvEXT", (PROC) nop_glNamedProgramLocalParameters4fvEXT },
+      { "glNormal3hNV", (PROC) nop_glNormal3hNV },
+      { "glNormal3hvNV", (PROC) nop_glNormal3hvNV },
+      { "glPatchParameterfv", (PROC) nop_glPatchParameterfv },
+      { "glPatchParameteri", (PROC) nop_glPatchParameteri },
+      { "glSecondaryColor3hNV", (PROC) nop_glSecondaryColor3hNV },
+      { "glSecondaryColor3hvNV", (PROC) nop_glSecondaryColor3hvNV },
+      { "glTexCoord1hNV", (PROC) nop_glTexCoord1hNV },
+      { "glTexCoord1hvNV", (PROC) nop_glTexCoord1hvNV },
+      { "glTexCoord2hNV", (PROC) nop_glTexCoord2hNV },
+      { "glTexCoord2hvNV", (PROC) nop_glTexCoord2hvNV },
+      { "glTexCoord3hNV", (PROC) nop_glTexCoord3hNV },
+      { "glTexCoord3hvNV", (PROC) nop_glTexCoord3hvNV },
+      { "glTexCoord4hNV", (PROC) nop_glTexCoord4hNV },
+      { "glTexCoord4hvNV", (PROC) nop_glTexCoord4hvNV },
+      { "glTextureParameterfEXT", (PROC) nop_glTextureParameterfEXT },
+      { "glTextureParameterfvEXT", (PROC) nop_glTextureParameterfvEXT },
+      { "glTextureParameteriEXT", (PROC) nop_glTextureParameteriEXT },
+      { "glUnmapNamedBufferEXT", (PROC) nop_glUnmapNamedBufferEXT },
+      { "glVertex2hNV", (PROC) nop_glVertex2hNV },
+      { "glVertex2hvNV", (PROC) nop_glVertex2hvNV },
+      { "glVertex3hNV", (PROC) nop_glVertex3hNV },
+      { "glVertex3hvNV", (PROC) nop_glVertex3hvNV },
+      { "glVertex4hNV", (PROC) nop_glVertex4hNV },
+      { "glVertex4hvNV", (PROC) nop_glVertex4hvNV },
+      { NULL, NULL }
+   };
+
+   int i;
+
+   for (i = 0; table[i].name; i++) {
+      if (strcmp(table[i].name, name) == 0)
+         return table[i].p;
+   }
+   return NULL;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_nopfuncs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_nopfuncs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_nopfuncs.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_nopfuncs.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,11 @@
+
+
+#ifndef STW_NOPFUNCS_H
+#define STW_NOPFUNCS_H
+
+
+PROC
+stw_get_nop_function(const char *name);
+
+
+#endif /* STW_NOPFUNCS_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_pixelformat.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_pixelformat.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_pixelformat.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_pixelformat.c	2015-09-16 14:36:09.000000000 +0000
@@ -113,7 +113,9 @@
 const unsigned 
 stw_pf_multisample[] = {
    0,
-   4
+   4,
+   8,
+   16
 };
 
 
@@ -222,22 +224,31 @@
    unsigned ms, db, ds, acc;
    unsigned bind_flags = PIPE_BIND_RENDER_TARGET;
    unsigned num_added = 0;
+   int force_samples = 0;
 
-   if (!extended) {
-      bind_flags |= PIPE_BIND_DISPLAY_TARGET;
+   /* Since GLUT for Windows doesn't support MSAA we have an env var
+    * to force all pixel formats to have a particular number of samples.
+    */
+   {
+      const char *samples= getenv("SVGA_FORCE_MSAA");
+      if (samples)
+         force_samples = atoi(samples);
    }
 
-   if (!screen->is_format_supported(screen, color->format,
-                                    PIPE_TEXTURE_2D, 0, bind_flags)) {
-      return 0;
+   if (!extended) {
+      bind_flags |= PIPE_BIND_DISPLAY_TARGET;
    }
 
    for (ms = 0; ms < Elements(stw_pf_multisample); ms++) {
       unsigned samples = stw_pf_multisample[ms];
 
-      /* FIXME: re-enabled MSAA when we can query it */
-      if (samples)
+      if (force_samples && samples != force_samples)
+         continue;
+
+      if (!screen->is_format_supported(screen, color->format,
+                                       PIPE_TEXTURE_2D, samples, bind_flags)) {
          continue;
+      }
 
       for (db = 0; db < Elements(stw_pf_doublebuffer); db++) {
          unsigned doublebuffer = stw_pf_doublebuffer[db];
@@ -246,7 +257,7 @@
             const struct stw_pf_depth_info *depth = &stw_pf_depth_stencil[ds];
 
             if (!screen->is_format_supported(screen, depth->format,
-                                             PIPE_TEXTURE_2D, 0,
+                                             PIPE_TEXTURE_2D, samples,
                                              PIPE_BIND_DEPTH_STENCIL)) {
                continue;
             }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_st.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_st.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_st.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_st.c	2015-09-16 14:36:09.000000000 +0000
@@ -46,7 +46,7 @@
    unsigned texture_mask;
 };
 
-static INLINE struct stw_st_framebuffer *
+static inline struct stw_st_framebuffer *
 stw_st_framebuffer(struct st_framebuffer_iface *stfb)
 {
    return (struct stw_st_framebuffer *) stfb;
@@ -77,6 +77,7 @@
    templ.depth0 = 1;
    templ.array_size = 1;
    templ.last_level = 0;
+   templ.nr_samples = stwfb->stvis.samples;
 
    for (i = 0; i < ST_ATTACHMENT_COUNT; i++) {
       enum pipe_format format;
@@ -95,6 +96,7 @@
       case ST_ATTACHMENT_BACK_LEFT:
          format = stwfb->stvis.color_format;
          bind = PIPE_BIND_DISPLAY_TARGET |
+                PIPE_BIND_SAMPLER_VIEW |
                 PIPE_BIND_RENDER_TARGET;
          break;
       case ST_ATTACHMENT_DEPTH_STENCIL:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_tls.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_tls.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/wgl/stw_tls.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/wgl/stw_tls.c	2015-09-16 14:36:09.000000000 +0000
@@ -50,7 +50,7 @@
 static struct stw_tls_data *g_pendingTlsData = NULL;
 
 
-static INLINE struct stw_tls_data *
+static inline struct stw_tls_data *
 stw_tls_data_create(DWORD dwThreadId);
 
 static struct stw_tls_data *
@@ -111,7 +111,7 @@
 /**
  * Install windows hook for a given thread (not necessarily the current one).
  */
-static INLINE struct stw_tls_data *
+static inline struct stw_tls_data *
 stw_tls_data_create(DWORD dwThreadId)
 {
    struct stw_tls_data *data;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xa/xa_composite.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xa/xa_composite.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xa/xa_composite.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xa/xa_composite.c	2015-09-16 14:36:09.000000000 +0000
@@ -167,7 +167,7 @@
 }
 
 
-static INLINE int
+static inline int
 xa_repeat_to_gallium(int mode)
 {
     switch(mode) {
@@ -185,7 +185,7 @@
     return PIPE_TEX_WRAP_REPEAT;
 }
 
-static INLINE boolean
+static inline boolean
 xa_filter_to_gallium(int xrender_filter, int *out_filter)
 {
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xa/xa_priv.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xa/xa_priv.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xa/xa_priv.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xa/xa_priv.h	2015-09-16 14:36:09.000000000 +0000
@@ -123,7 +123,7 @@
     const struct xa_composite *comp;
 };
 
-static INLINE void
+static inline void
 xa_scissor_reset(struct xa_context *ctx)
 {
     ctx->scissor.maxx = 0;
@@ -133,7 +133,7 @@
     ctx->scissor_valid = FALSE;
 }
 
-static INLINE void
+static inline void
 xa_scissor_update(struct xa_context *ctx, unsigned minx, unsigned miny,
 		unsigned maxx, unsigned maxy)
 {
@@ -189,13 +189,13 @@
  * Inline utilities
  */
 
-static INLINE int
+static inline int
 xa_min(int a, int b)
 {
     return ((a <= b) ? a : b);
 }
 
-static INLINE void
+static inline void
 xa_pixel_to_float4(uint32_t pixel, float *color)
 {
     uint32_t	    r, g, b, a;
@@ -210,7 +210,7 @@
     color[3] = ((float)a) / 255.;
 }
 
-static INLINE void
+static inline void
 xa_pixel_to_float4_a8(uint32_t pixel, float *color)
 {
     uint32_t a;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xa/xa_renderer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xa/xa_renderer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xa/xa_renderer.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xa/xa_renderer.c	2015-09-16 14:36:09.000000000 +0000
@@ -45,14 +45,14 @@
 renderer_set_constants(struct xa_context *r,
 		       int shader_type, const float *params, int param_bytes);
 
-static INLINE boolean
+static inline boolean
 is_affine(float *matrix)
 {
     return floatIsZero(matrix[2]) && floatIsZero(matrix[5])
 	&& floatsEqual(matrix[8], 1);
 }
 
-static INLINE void
+static inline void
 map_point(float *mat, float x, float y, float *out_x, float *out_y)
 {
     if (!mat) {
@@ -71,7 +71,7 @@
     }
 }
 
-static INLINE void
+static inline void
 renderer_draw(struct xa_context *r)
 {
     int num_verts = r->buffer_size / (r->attrs_per_vertex * NUM_COMPONENTS);
@@ -97,7 +97,7 @@
     xa_scissor_reset(r);
 }
 
-static INLINE void
+static inline void
 renderer_draw_conditional(struct xa_context *r, int next_batch)
 {
     if (r->buffer_size + next_batch >= XA_VB_SIZE ||
@@ -135,7 +135,7 @@
     }
 }
 
-static INLINE void
+static inline void
 add_vertex_color(struct xa_context *r, float x, float y, float color[4])
 {
     float *vertex = r->buffer + r->buffer_size;
@@ -153,7 +153,7 @@
     r->buffer_size += 8;
 }
 
-static INLINE void
+static inline void
 add_vertex_1tex(struct xa_context *r, float x, float y, float s, float t)
 {
     float *vertex = r->buffer + r->buffer_size;
@@ -171,7 +171,7 @@
     r->buffer_size += 8;
 }
 
-static INLINE void
+static inline void
 add_vertex_2tex(struct xa_context *r,
 		float x, float y, float s0, float t0, float s1, float t1)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xa/xa_tgsi.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xa/xa_tgsi.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xa/xa_tgsi.c	2012-01-02 08:23:27.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xa/xa_tgsi.c	2015-09-16 14:36:09.000000000 +0000
@@ -106,7 +106,7 @@
     struct cso_hash *fs_hash;
 };
 
-static INLINE void
+static inline void
 src_in_mask(struct ureg_program *ureg,
 	    struct ureg_dst dst,
 	    struct ureg_src src,
@@ -368,7 +368,7 @@
     return ureg_create_shader_and_destroy(ureg, pipe);
 }
 
-static INLINE void
+static inline void
 xrender_tex(struct ureg_program *ureg,
 	    struct ureg_dst dst,
 	    struct ureg_src coords,
@@ -617,7 +617,7 @@
     FREE(sc);
 }
 
-static INLINE void *
+static inline void *
 shader_from_cache(struct pipe_context *pipe,
 		  unsigned type, struct cso_hash *hash, unsigned key)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xa/xa_tracker.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xa/xa_tracker.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xa/xa_tracker.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xa/xa_tracker.c	2015-09-16 14:36:09.000000000 +0000
@@ -153,7 +153,7 @@
     loader_fd = dup(drm_fd);
     if (loader_fd == -1)
         return NULL;
-    if (pipe_loader_drm_probe_fd(&xa->dev, loader_fd, false))
+    if (pipe_loader_drm_probe_fd(&xa->dev, loader_fd))
 	xa->screen = pipe_loader_create_screen(xa->dev, PIPE_SEARCH_DIR);
 #endif
     if (!xa->screen)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xvmc/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xvmc/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xvmc/Makefile.am	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xvmc/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -20,7 +20,6 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 
-AUTOMAKE_OPTIONS = subdir-objects
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xvmc/surface.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xvmc/surface.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xvmc/surface.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xvmc/surface.c	2015-09-16 14:36:09.000000000 +0000
@@ -489,7 +489,7 @@
    *status = 0;
 
    if (surface_priv->fence)
-      if (!pipe->screen->fence_signalled(pipe->screen, surface_priv->fence))
+      if (!pipe->screen->fence_finish(pipe->screen, surface_priv->fence, 0))
          *status |= XVMC_RENDERING;
 
    return Success;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xvmc/xvmc_private.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xvmc/xvmc_private.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/state_trackers/xvmc/xvmc_private.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/state_trackers/xvmc/xvmc_private.h	2015-09-16 14:36:09.000000000 +0000
@@ -106,7 +106,7 @@
 #define XVMC_WARN  2
 #define XVMC_TRACE 3
 
-static INLINE void XVMC_MSG(int level, const char *fmt, ...)
+static inline void XVMC_MSG(int level, const char *fmt, ...)
 {
    static int debug_level = -1;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/d3dadapter9/description.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/d3dadapter9/description.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/d3dadapter9/description.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/d3dadapter9/description.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,324 @@
+/*
+ * Copyright 2015 Patrick Rudolph <siro@das-labor.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include <string.h>
+#include "adapter9.h"
+
+#define DBG_CHANNEL DBG_ADAPTER
+
+/* prototypes */
+void
+d3d_match_vendor_id( D3DADAPTER_IDENTIFIER9* drvid,
+        unsigned fallback_ven,
+        unsigned fallback_dev,
+        const char* fallback_name );
+void d3d_fill_driver_version(D3DADAPTER_IDENTIFIER9* drvid);
+void d3d_fill_cardname(D3DADAPTER_IDENTIFIER9* drvid);
+
+enum d3d_vendor_id
+{
+    HW_VENDOR_SOFTWARE              = 0x0000,
+    HW_VENDOR_AMD                   = 0x1002,
+    HW_VENDOR_NVIDIA                = 0x10de,
+    HW_VENDOR_VMWARE                = 0x15ad,
+    HW_VENDOR_INTEL                 = 0x8086,
+};
+
+struct card_lookup_table {
+    const char *mesaname;
+    const char *d3d9name;
+}
+cards_amd[] = {
+    {"HAWAII",                      "AMD Radeon R9 290"},
+    {"KAVERI",                      "AMD Radeon(TM) R7 Graphics"},
+    {"KABINI",                      "AMD Radeon HD 8400 / R3 Series"},
+    {"BONAIRE",                     "AMD Radeon HD 8770"},
+    {"OLAND",                       "AMD Radeon HD 8670"},
+    {"HAINAN",                      "AMD Radeon HD 8600M Series"},
+    {"TAHITI",                      "AMD Radeon HD 7900 Series"},
+    {"PITCAIRN",                    "AMD Radeon HD 7800 Series"},
+    {"CAPE VERDE",                  "AMD Radeon HD 7700 Series"},
+    {"ARUBA",                       "AMD Radeon HD 7660D"},
+    {"CAYMAN",                      "AMD Radeon HD 6900 Series"},
+    {"BARTS",                       "AMD Radeon HD 6800 Series"},
+    {"TURKS",                       "AMD Radeon HD 6600 Series"},
+    {"SUMO2",                       "AMD Radeon HD 6410D"},
+    {"SUMO",                        "AMD Radeon HD 6550D"},
+    {"CAICOS",                      "AMD Radeon HD 6400 Series"},
+    {"PALM",                        "AMD Radeon HD 6300 series Graphics"},
+    {"HEMLOCK",                     "ATI Radeon HD 5900 Series"},
+    {"CYPRESS",                     "ATI Radeon HD 5800 Series"},
+    {"JUNIPER",                     "ATI Radeon HD 5700 Series"},
+    {"REDWOOD",                     "ATI Radeon HD 5600 Series"},
+    {"CEDAR",                       "ATI Radeon HD 5500 Series"},
+    {"R700",                        "ATI Radeon HD 4800 Series"},
+    {"RV790",                       "ATI Radeon HD 4800 Series"},
+    {"RV770",                       "ATI Radeon HD 4800 Series"},
+    {"RV740",                       "ATI Radeon HD 4700 Series"},
+    {"RV730",                       "ATI Radeon HD 4600 Series"},
+    {"RV710",                       "ATI Radeon HD 4350"},
+    {"RS880",                       "ATI Mobility Radeon HD 4200"},
+    {"RS780",                       "ATI Radeon HD 3200 Graphics"},
+    {"R680",                        "ATI Radeon HD 2900 XT"},
+    {"R600",                        "ATI Radeon HD 2900 XT"},
+    {"RV670",                       "ATI Radeon HD 2900 XT"},
+    {"RV635",                       "ATI Mobility Radeon HD 2600"},
+    {"RV630",                       "ATI Mobility Radeon HD 2600"},
+    {"RV620",                       "ATI Mobility Radeon HD 2350"},
+    {"RV610",                       "ATI Mobility Radeon HD 2350"},
+    {"R580",                        "ATI Radeon X1600 Series"},
+    {"R520",                        "ATI Radeon X1600 Series"},
+    {"RV570",                       "ATI Radeon X1600 Series"},
+    {"RV560",                       "ATI Radeon X1600 Series"},
+    {"RV535",                       "ATI Radeon X1600 Series"},
+    {"RV530",                       "ATI Radeon X1600 Series"},
+    {"RV516",                       "ATI Radeon X700 SE"},
+    {"RV515",                       "ATI Radeon X700 SE"},
+    {"R481",                        "ATI Radeon X700 SE"},
+    {"R480",                        "ATI Radeon X700 SE"},
+    {"R430",                        "ATI Radeon X700 SE"},
+    {"R423",                        "ATI Radeon X700 SE"},
+    {"R420",                        "ATI Radeon X700 SE"},
+    {"R410",                        "ATI Radeon X700 SE"},
+    {"RV410",                       "ATI Radeon X700 SE"},
+    {"RS740",                       "ATI RADEON XPRESS 200M Series"},
+    {"RS690",                       "ATI RADEON XPRESS 200M Series"},
+    {"RS600",                       "ATI RADEON XPRESS 200M Series"},
+    {"RS485",                       "ATI RADEON XPRESS 200M Series"},
+    {"RS482",                       "ATI RADEON XPRESS 200M Series"},
+    {"RS480",                       "ATI RADEON XPRESS 200M Series"},
+    {"RS400",                       "ATI RADEON XPRESS 200M Series"},
+    {"R360",                        "ATI Radeon 9500"},
+    {"R350",                        "ATI Radeon 9500"},
+    {"R300",                        "ATI Radeon 9500"},
+    {"RV370",                       "ATI Radeon 9500"},
+    {"RV360",                       "ATI Radeon 9500"},
+    {"RV351",                       "ATI Radeon 9500"},
+    {"RV350",                       "ATI Radeon 9500"},
+},
+cards_nvidia[] =
+{
+    {"NV124",                       "NVIDIA GeForce GTX 970"},
+    {"NV117",                       "NVIDIA GeForce GTX 750"},
+    {"NVF1",                        "NVIDIA GeForce GTX 780 Ti"},
+    {"NVF0",                        "NVIDIA GeForce GTX 780"},
+    {"NVE6",                        "NVIDIA GeForce GTX 770M"},
+    {"NVE4",                        "NVIDIA GeForce GTX 680"},
+    {"NVD9",                        "NVIDIA GeForce GT 520"},
+    {"NVCF",                        "NVIDIA GeForce GTX 550 Ti"},
+    {"NVCE",                        "NVIDIA GeForce GTX 560"},
+    {"NVC8",                        "NVIDIA GeForce GTX 570"},
+    {"NVC4",                        "NVIDIA GeForce GTX 460"},
+    {"NVC3",                        "NVIDIA GeForce GT 440"},
+    {"NVC1",                        "NVIDIA GeForce GT 420"},
+    {"NVC0",                        "NVIDIA GeForce GTX 480"},
+    {"NVAF",                        "NVIDIA GeForce GT 320M"},
+    {"NVAC",                        "NVIDIA GeForce 8200"},
+    {"NVAA",                        "NVIDIA GeForce 8200"},
+    {"NVA8",                        "NVIDIA GeForce 210"},
+    {"NVA5",                        "NVIDIA GeForce GT 220"},
+    {"NVA3",                        "NVIDIA GeForce GT 240"},
+    {"NVA0",                        "NVIDIA GeForce GTX 280"},
+    {"NV98",                        "NVIDIA GeForce 9200"},
+    {"NV96",                        "NVIDIA GeForce 9400 GT"},
+    {"NV94",                        "NVIDIA GeForce 9600 GT"},
+    {"NV92",                        "NVIDIA GeForce 9800 GT"},
+    {"NV86",                        "NVIDIA GeForce 8500 GT"},
+    {"NV84",                        "NVIDIA GeForce 8600 GT"},
+    {"NV50",                        "NVIDIA GeForce 8800 GTX"},
+    {"NV68",                        "NVIDIA GeForce 6200"},
+    {"NV67",                        "NVIDIA GeForce 6200"},
+    {"NV63",                        "NVIDIA GeForce 6200"},
+    {"NV4E",                        "NVIDIA GeForce 6200"},
+    {"NV4C",                        "NVIDIA GeForce 6200"},
+    {"NV4B",                        "NVIDIA GeForce 7600 GT"},
+    {"NV4A",                        "NVIDIA GeForce 6200"},
+    {"NV49",                        "NVIDIA GeForce 7800 GT"},
+    {"NV47",                        "NVIDIA GeForce 7800 GT"},
+    {"NV46",                        "NVIDIA GeForce Go 7400",},
+    {"NV45",                        "NVIDIA GeForce 6800"},
+    {"NV44",                        "NVIDIA GeForce 6200"},
+    {"NV43",                        "NVIDIA GeForce 6600 GT"},
+    {"NV42",                        "NVIDIA GeForce 6800"},
+    {"NV41",                        "NVIDIA GeForce 6800"},
+    {"NV40",                        "NVIDIA GeForce 6800"},
+    {"NV38",                        "NVIDIA GeForce FX 5800"},
+    {"NV36",                        "NVIDIA GeForce FX 5800"},
+    {"NV35",                        "NVIDIA GeForce FX 5800"},
+    {"NV34",                        "NVIDIA GeForce FX 5200"},
+    {"NV31",                        "NVIDIA GeForce FX 5600"},
+    {"NV30",                        "NVIDIA GeForce FX 5800"},
+    {"nv28",                        "NVIDIA GeForce4 Ti 4200"},
+    {"nv25",                        "NVIDIA GeForce4 Ti 4200"},
+    {"nv20",                        "NVIDIA GeForce3"},
+    {"nv1F",                        "NVIDIA GeForce4 MX 460"},
+    {"nv1A",                        "NVIDIA GeForce2 GTS/GeForce2 Pro"},
+    {"nv18",                        "NVIDIA GeForce4 MX 460"},
+    {"nv17",                        "NVIDIA GeForce4 MX 460"},
+    {"nv16",                        "NVIDIA GeForce2 GTS/GeForce2 Pro"},
+    {"nv15",                        "NVIDIA GeForce2 GTS/GeForce2 Pro"},
+    {"nv11",                        "NVIDIA GeForce2 MX/MX 400"},
+    {"nv10",                        "NVIDIA GeForce 256"},
+},
+cards_vmware[] =
+{
+    {"SVGA3D",                      "VMware SVGA 3D (Microsoft Corporation - WDDM)"},
+},
+cards_intel[] =
+{
+    {"Haswell Mobile",              "Intel(R) Haswell Mobile"},
+    {"Ivybridge Server",            "Intel(R) Ivybridge Server"},
+    {"Ivybridge Mobile",            "Intel(R) Ivybridge Mobile"},
+    {"Ivybridge Desktop",           "Intel(R) Ivybridge Desktop"},
+    {"Sandybridge Server",          "Intel(R) Sandybridge Server"},
+    {"Sandybridge Mobile",          "Intel(R) Sandybridge Mobile"},
+    {"Sandybridge Desktop",         "Intel(R) Sandybridge Desktop"},
+    {"Ironlake Mobile",             "Intel(R) Ironlake Mobile"},
+    {"Ironlake Desktop",            "Intel(R) Ironlake Desktop"},
+    {"B43",                         "Intel(R) B43"},
+    {"G41",                         "Intel(R) G41"},
+    {"G45",                         "Intel(R) G45/G43"},
+    {"Q45",                         "Intel(R) Q45/Q43"},
+    {"Integrated Graphics Device",  "Intel(R) Integrated Graphics Device"},
+    {"GM45",                        "Mobile Intel(R) GM45 Express Chipset Family"},
+    {"965GME",                      "Intel(R) 965GME"},
+    {"965GM",                       "Mobile Intel(R) 965 Express Chipset Family"},
+    {"946GZ",                       "Intel(R) 946GZ"},
+    {"965G",                        "Intel(R) 965G"},
+    {"965Q",                        "Intel(R) 965Q"},
+    {"Pineview M",                  "Intel(R) IGD"},
+    {"Pineview G",                  "Intel(R) IGD"},
+    {"IGD",                         "Intel(R) IGD"},
+    {"Q33",                         "Intel(R) Q33"},
+    {"G33",                         "Intel(R) G33"},
+    {"Q35",                         "Intel(R) Q35"},
+    {"945GME",                      "Intel(R) 945GME"},
+    {"945GM",                       "Mobile Intel(R) 945GM Express Chipset Family"},
+    {"945G",                        "Intel(R) 945G"},
+    {"915GM",                       "Mobile Intel(R) 915GM/GMS,910GML Express Chipset Family"},
+    {"E7221G",                      "Intel(R) E7221G"},
+    {"915G",                        "Intel(R) 82915G/GV/910GL Express Chipset Family"},
+    {"865G",                        "Intel(R) 82865G Graphics Controller"},
+    {"845G",                        "Intel(R) 845G"},
+    {"855GM",                       "Intel(R) 82852/82855 GM/GME Graphics Controller"},
+    {"830M",                        "Intel(R) 82830M Graphics Controller"},
+};
+
+/* override VendorId, DeviceId and Description for unknown vendors */
+void
+d3d_match_vendor_id( D3DADAPTER_IDENTIFIER9* drvid,
+        unsigned fallback_ven,
+        unsigned fallback_dev,
+        const char* fallback_name )
+{
+    if (drvid->VendorId == HW_VENDOR_INTEL ||
+        drvid->VendorId == HW_VENDOR_VMWARE ||
+        drvid->VendorId == HW_VENDOR_AMD ||
+        drvid->VendorId == HW_VENDOR_NVIDIA)
+        return;
+
+    DBG("unknown vendor 0x4%x, emulating 0x4%x\n", drvid->VendorId, fallback_ven);
+    drvid->VendorId = fallback_ven;
+    drvid->DeviceId = fallback_dev;
+    strncpy(drvid->Description, fallback_name, sizeof(drvid->Description));
+}
+
+/* fill in driver name and version */
+void d3d_fill_driver_version(D3DADAPTER_IDENTIFIER9* drvid) {
+    switch (drvid->VendorId) {
+    case HW_VENDOR_INTEL:
+        drvid->DriverVersionLowPart = 0x000A0682;
+        drvid->DriverVersionHighPart = 0x0006000F;
+        strncpy(drvid->Driver, "igdumd32.dll", sizeof(drvid->Driver));
+        break;
+    case HW_VENDOR_VMWARE:
+        drvid->DriverVersionLowPart = 0x0001046E;
+        drvid->DriverVersionHighPart = 0x0006000E;
+        strncpy(drvid->Driver, "vm3dum.dll", sizeof(drvid->Driver));
+        break;
+    case HW_VENDOR_AMD:
+        drvid->DriverVersionLowPart = 0x000A0500;
+        drvid->DriverVersionHighPart = 0x00060011;
+        strncpy(drvid->Driver, "atiumdag.dll", sizeof(drvid->Driver));
+        break;
+    case HW_VENDOR_NVIDIA:
+        drvid->DriverVersionLowPart = 0x000D0FD4;
+        drvid->DriverVersionHighPart = 0x00060012;
+        strncpy(drvid->Driver, "nvd3dum.dll", sizeof(drvid->Driver));
+        break;
+    default:
+        break;
+    }
+}
+
+/* try to match the device name and override it with Windows like device names */
+void d3d_fill_cardname(D3DADAPTER_IDENTIFIER9* drvid) {
+    unsigned i;
+    switch (drvid->VendorId) {
+    case HW_VENDOR_INTEL:
+        for (i = 0; i < sizeof(cards_intel) / sizeof(cards_intel[0]); i++) {
+            if (strstr(drvid->Description, cards_intel[i].mesaname)) {
+                strncpy(drvid->Description, cards_intel[i].d3d9name, sizeof(drvid->Description));
+                return;
+            }
+        }
+        /* use a fall-back if nothing matches */
+        DBG("Unknown card name %s!\n", drvid->DeviceName);
+        strncpy(drvid->Description, cards_intel[0].d3d9name, sizeof(drvid->Description));
+        break;
+    case HW_VENDOR_VMWARE:
+        for (i = 0; i < sizeof(cards_vmware) / sizeof(cards_vmware[0]); i++) {
+            if (strstr(drvid->Description, cards_vmware[i].mesaname)) {
+                strncpy(drvid->Description, cards_vmware[i].d3d9name, sizeof(drvid->Description));
+                return;
+            }
+        }
+        /* use a fall-back if nothing matches */
+        DBG("Unknown card name %s!\n", drvid->DeviceName);
+        strncpy(drvid->Description, cards_vmware[0].d3d9name, sizeof(drvid->Description));
+        break;
+    case HW_VENDOR_AMD:
+        for (i = 0; i < sizeof(cards_amd) / sizeof(cards_amd[0]); i++) {
+            if (strstr(drvid->Description, cards_amd[i].mesaname)) {
+                strncpy(drvid->Description, cards_amd[i].d3d9name, sizeof(drvid->Description));
+                return;
+            }
+        }
+        /* use a fall-back if nothing matches */
+        DBG("Unknown card name %s!\n", drvid->DeviceName);
+        strncpy(drvid->Description, cards_amd[0].d3d9name, sizeof(drvid->Description));
+        break;
+    case HW_VENDOR_NVIDIA:
+        for (i = 0; i < sizeof(cards_nvidia) / sizeof(cards_nvidia[0]); i++) {
+            if (strstr(drvid->Description, cards_nvidia[i].mesaname)) {
+                strncpy(drvid->Description, cards_nvidia[i].d3d9name, sizeof(drvid->Description));
+                return;
+            }
+        }
+        /* use a fall-back if nothing matches */
+        DBG("Unknown card name %s!\n", drvid->DeviceName);
+        strncpy(drvid->Description, cards_nvidia[0].d3d9name, sizeof(drvid->Description));
+        break;
+    default:
+        break;
+    }
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/d3dadapter9/drm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/d3dadapter9/drm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/d3dadapter9/drm.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/d3dadapter9/drm.c	2015-09-16 14:36:09.000000000 +0000
@@ -46,12 +46,6 @@
 
 #define DBG_CHANNEL DBG_ADAPTER
 
-#define VERSION_DWORD(hi, lo) \
-    ((DWORD)( \
-        ((DWORD)((hi) & 0xFFFF) << 16) | \
-         (DWORD)((lo) & 0xFFFF) \
-    ))
-
 const char __driConfigOptionsNine[] =
 DRI_CONF_BEGIN
     DRI_CONF_SECTION_PERFORMANCE
@@ -63,12 +57,21 @@
     DRI_CONF_SECTION_END
 DRI_CONF_END;
 
-/* Regarding os versions, we should not define our own as that would simply be
- * weird. Defaulting to Win2k/XP seems sane considering the origin of D3D9. The
- * driver also defaults to being a generic D3D9 driver, which of course only
- * matters if you're actually using the DDI. */
-#define VERSION_HIGH    VERSION_DWORD(0x0006, 0x000E) /* winxp, d3d9 */
-#define VERSION_LOW     VERSION_DWORD(0x0000, 0x0001) /* version, build */
+/* define fallback value here: NVIDIA GeForce GTX 970 */
+#define FALLBACK_NAME "NV124"
+#define FALLBACK_DEVID 0x13C2
+#define FALLBACK_VENID 0x10de
+
+/* prototypes */
+void
+d3d_match_vendor_id( D3DADAPTER_IDENTIFIER9* drvid,
+		unsigned fallback_ven,
+		unsigned fallback_dev,
+		const char* fallback_name );
+
+void d3d_fill_driver_version(D3DADAPTER_IDENTIFIER9* drvid);
+
+void d3d_fill_cardname(D3DADAPTER_IDENTIFIER9* drvid);
 
 struct d3dadapter9drm_context
 {
@@ -101,7 +104,7 @@
 
 /* read a DWORD in the form 0xnnnnnnnn, which is how sysfs pci id stuff is
  * formatted. */
-static INLINE DWORD
+static inline DWORD
 read_file_dword( const char *name )
 {
     char buf[32];
@@ -123,7 +126,7 @@
  * dword at an offset in the raw PCI header. The reason this isn't used for all
  * data is that the kernel will make corrections but not expose them in the raw
  * header bytes. */
-static INLINE DWORD
+static inline DWORD
 read_config_dword( int fd,
                    unsigned offset )
 {
@@ -135,7 +138,7 @@
     return r;
 }
 
-static INLINE void
+static inline void
 get_bus_info( int fd,
               DWORD *vendorid,
               DWORD *deviceid,
@@ -152,15 +155,15 @@
         *subsysid = 0;
         *revision = 0;
     } else {
-        DBG("Unable to detect card. Fake GTX 680.\n");
-        *vendorid = 0x10de; /* NV GTX 680 */
-        *deviceid = 0x1180;
+        DBG("Unable to detect card. Faking %s\n", FALLBACK_NAME);
+        *vendorid = FALLBACK_VENID;
+        *deviceid = FALLBACK_DEVID;
         *subsysid = 0;
         *revision = 0;
     }
 }
 
-static INLINE void
+static inline void
 read_descriptor( struct d3dadapter9_context *ctx,
                  int fd )
 {
@@ -169,33 +172,23 @@
     memset(drvid, 0, sizeof(*drvid));
     get_bus_info(fd, &drvid->VendorId, &drvid->DeviceId,
                  &drvid->SubSysId, &drvid->Revision);
+    snprintf(drvid->DeviceName, sizeof(drvid->DeviceName),
+                 "Gallium 0.4 with %s", ctx->hal->get_vendor(ctx->hal));
+    strncpy(drvid->Description, ctx->hal->get_name(ctx->hal),
+                 sizeof(drvid->Description));
+
+    /* choose fall-back vendor if necessary to allow
+     * the following functions to return sane results */
+    d3d_match_vendor_id(drvid, FALLBACK_VENID, FALLBACK_DEVID, FALLBACK_NAME);
+    /* fill in driver name and version info */
+    d3d_fill_driver_version(drvid);
+    /* override Description field with Windows like names */
+    d3d_fill_cardname(drvid);
 
-    strncpy(drvid->Driver, "libd3dadapter9.so", sizeof(drvid->Driver));
-    strncpy(drvid->DeviceName, ctx->hal->get_name(ctx->hal), 32);
-    snprintf(drvid->Description, sizeof(drvid->Description),
-             "Gallium 0.4 with %s", ctx->hal->get_vendor(ctx->hal));
-
-    drvid->DriverVersionLowPart = VERSION_LOW;
-    drvid->DriverVersionHighPart = VERSION_HIGH;
-
-    /* To make a pseudo-real GUID we use the PCI bus data and some string */
-    drvid->DeviceIdentifier.Data1 = drvid->VendorId;
-    drvid->DeviceIdentifier.Data2 = drvid->DeviceId;
-    drvid->DeviceIdentifier.Data3 = drvid->SubSysId;
-    memcpy(drvid->DeviceIdentifier.Data4, "Gallium3D", 8);
-
-    drvid->WHQLLevel = 1; /* This fakes WHQL validaion */
-
-    /* XXX Fake NVIDIA binary driver on Windows.
-     *
-     * OS version: 4=95/98/NT4, 5=2000, 6=2000/XP, 7=Vista, 8=Win7
-     */
-    strncpy(drvid->Driver, "nvd3dum.dll", sizeof(drvid->Driver));
-    strncpy(drvid->Description, "NVIDIA GeForce GTX 680", sizeof(drvid->Description));
-    drvid->DriverVersionLowPart = VERSION_DWORD(12, 6658); /* minor, build */
-    drvid->DriverVersionHighPart = VERSION_DWORD(6, 15); /* OS, major */
-    drvid->SubSysId = 0;
-    drvid->Revision = 0;
+    /* this driver isn't WHQL certified */
+    drvid->WHQLLevel = 0;
+
+    /* this value is fixed */
     drvid->DeviceIdentifier.Data1 = 0xaeb2cdd4;
     drvid->DeviceIdentifier.Data2 = 0x6e41;
     drvid->DeviceIdentifier.Data3 = 0x43ea;
@@ -207,7 +200,6 @@
     drvid->DeviceIdentifier.Data4[5] = 0x76;
     drvid->DeviceIdentifier.Data4[6] = 0x07;
     drvid->DeviceIdentifier.Data4[7] = 0x81;
-    drvid->WHQLLevel = 0;
 }
 
 static HRESULT WINAPI
@@ -243,7 +235,7 @@
     ctx->base.hal = dd_create_screen(fd);
 #else
     /* use pipe-loader to dlopen appropriate drm driver */
-    if (!pipe_loader_drm_probe_fd(&ctx->dev, fd, FALSE)) {
+    if (!pipe_loader_drm_probe_fd(&ctx->dev, fd)) {
         ERR("Failed to probe drm fd %d.\n", fd);
         FREE(ctx);
         close(fd);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/d3dadapter9/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/d3dadapter9/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/d3dadapter9/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/d3dadapter9/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -54,6 +54,7 @@
 pkgconfig_DATA = d3d.pc
 
 d3dadapter9_la_SOURCES = \
+	description.c \
 	getproc.c \
 	drm.c
 
@@ -118,8 +119,7 @@
 
 d3dadapter9_la_LIBADD += \
 	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
-	$(GALLIUM_PIPE_LOADER_LIBS)
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/dri/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/dri/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/dri/Android.mk	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/dri/Android.mk	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,125 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2015 Chih-Wei Huang <cwhuang@linux.org.tw>
+# Copyright (C) 2015 Android-x86 Open Source Project
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := gallium_dri
+
+ifeq ($(MESA_LOLLIPOP_BUILD),true)
+LOCAL_MODULE_RELATIVE_PATH := $(notdir $(MESA_DRI_MODULE_PATH))
+else
+LOCAL_MODULE_PATH := $(MESA_DRI_MODULE_PATH)
+endif
+
+LOCAL_SRC_FILES := target.c
+
+LOCAL_CFLAGS := -DDRI_TARGET
+
+LOCAL_SHARED_LIBRARIES := \
+	libdl \
+	libglapi \
+	libexpat \
+
+ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
+LOCAL_CFLAGS += -DHAVE_LIBDRM
+LOCAL_SHARED_LIBRARIES += libdrm
+endif
+
+ifneq ($(filter freedreno,$(MESA_GPU_DRIVERS)),)
+LOCAL_CFLAGS += -DGALLIUM_FREEDRENO
+gallium_DRIVERS += libmesa_winsys_freedreno libmesa_pipe_freedreno
+LOCAL_SHARED_LIBRARIES += libdrm_freedreno
+endif
+ifneq ($(filter i915g,$(MESA_GPU_DRIVERS)),)
+gallium_DRIVERS += libmesa_winsys_i915 libmesa_pipe_i915
+LOCAL_SHARED_LIBRARIES += libdrm_intel
+LOCAL_CFLAGS += -DGALLIUM_I915
+endif
+ifneq ($(filter ilo,$(MESA_GPU_DRIVERS)),)
+gallium_DRIVERS += libmesa_winsys_intel libmesa_pipe_ilo
+LOCAL_SHARED_LIBRARIES += libdrm_intel
+LOCAL_CFLAGS += -DGALLIUM_ILO
+endif
+ifneq ($(filter nouveau,$(MESA_GPU_DRIVERS)),)
+gallium_DRIVERS +=  libmesa_winsys_nouveau libmesa_pipe_nouveau
+LOCAL_CFLAGS += -DGALLIUM_NOUVEAU
+LOCAL_SHARED_LIBRARIES += libdrm_nouveau
+endif
+ifneq ($(filter r%,$(MESA_GPU_DRIVERS)),)
+ifneq ($(filter r300g,$(MESA_GPU_DRIVERS)),)
+gallium_DRIVERS += libmesa_pipe_r300
+LOCAL_CFLAGS += -DGALLIUM_R300
+endif
+ifneq ($(filter r600g,$(MESA_GPU_DRIVERS)),)
+gallium_DRIVERS += libmesa_pipe_r600
+LOCAL_CFLAGS += -DGALLIUM_R600
+endif
+ifneq ($(filter radeonsi,$(MESA_GPU_DRIVERS)),)
+gallium_DRIVERS += libmesa_pipe_radeonsi libmesa_winsys_amdgpu
+LOCAL_SHARED_LIBRARIES += libLLVM libdrm_amdgpu
+LOCAL_CFLAGS += -DGALLIUM_RADEONSI
+endif
+gallium_DRIVERS += libmesa_winsys_radeon libmesa_pipe_radeon
+LOCAL_SHARED_LIBRARIES += libdrm_radeon
+endif
+ifneq ($(filter swrast,$(MESA_GPU_DRIVERS)),)
+gallium_DRIVERS += libmesa_pipe_softpipe libmesa_winsys_sw_dri
+LOCAL_CFLAGS += -DGALLIUM_SOFTPIPE
+endif
+ifneq ($(filter vc4,$(MESA_GPU_DRIVERS)),)
+LOCAL_CFLAGS += -DGALLIUM_VC4
+gallium_DRIVERS += libmesa_winsys_vc4 libmesa_pipe_vc4
+endif
+ifneq ($(filter vmwgfx,$(MESA_GPU_DRIVERS)),)
+gallium_DRIVERS += libmesa_winsys_svga libmesa_pipe_svga
+LOCAL_CFLAGS += -DGALLIUM_VMWGFX
+endif
+ifneq ($(filter nouveau r600g,$(MESA_GPU_DRIVERS)),)
+LOCAL_SHARED_LIBRARIES += $(if $(filter true,$(MESA_LOLLIPOP_BUILD)),libc++,libstlport)
+endif
+
+LOCAL_STATIC_LIBRARIES := \
+	$(gallium_DRIVERS) \
+	libmesa_st_dri \
+	libmesa_st_mesa \
+	libmesa_glsl \
+	libmesa_dri_common \
+	libmesa_megadriver_stub \
+	libmesa_gallium \
+	libmesa_util \
+	libmesa_loader \
+
+ifeq ($(MESA_ENABLE_LLVM),true)
+LOCAL_STATIC_LIBRARIES += \
+	libLLVMR600CodeGen \
+	libLLVMR600Desc \
+	libLLVMR600Info \
+	libLLVMR600AsmPrinter \
+	libelf
+LOCAL_LDLIBS += $(if $(filter true,$(MESA_LOLLIPOP_BUILD)),-lgcc)
+endif
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_SHARED_LIBRARY)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/dri/dri.sym mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/dri/dri.sym
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/dri/dri.sym	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/dri/dri.sym	2015-09-16 14:36:09.000000000 +0000
@@ -4,6 +4,7 @@
 		__driDriverGetExtensions*;
 		nouveau_drm_screen_create;
 		radeon_drm_winsys_create;
+		amdgpu_winsys_create;
 	local:
 		*;
 };
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/dri/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/dri/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/dri/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/dri/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -95,8 +95,7 @@
 
 gallium_dri_la_LIBADD += \
 	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
-	$(GALLIUM_PIPE_LOADER_LIBS)
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/dri/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/dri/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/dri/SConscript	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/dri/SConscript	2015-09-16 14:36:09.000000000 +0000
@@ -25,11 +25,12 @@
     env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE')
     env.Prepend(LIBS = [llvmpipe])
 
+env.PkgUseModules('DRM')
+
 env.Append(CPPDEFINES = [
     'GALLIUM_VMWGFX',
     'GALLIUM_SOFTPIPE',
     'DRI_TARGET',
-    'HAVE_LIBDRM',
 ])
 
 env.Prepend(LIBS = [
@@ -37,7 +38,6 @@
     svgadrm,
     svga,
     ws_dri,
-    ws_kms_dri,
     softpipe,
     libloader,
     mesautil,
@@ -58,9 +58,6 @@
 env.Command('vmwgfx_dri.so', 'gallium_dri.so', "ln -f ${SOURCE} ${TARGET}")
 # swrast_dri.so
 env.Command('swrast_dri.so', 'gallium_dri.so', "ln -f ${SOURCE} ${TARGET}")
-# kms_swrast_dri.so
-env.Command('kms_swrast_dri.so', 'gallium_dri.so', "ln -f ${SOURCE} ${TARGET}")
 
 env.Alias('dri-vmwgfx', module)
 env.Alias('dri-swrast', module)
-env.Alias('dri-kms-swrast', module)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/dri-vdpau.dyn mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/dri-vdpau.dyn
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/dri-vdpau.dyn	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/dri-vdpau.dyn	2015-09-16 14:36:09.000000000 +0000
@@ -1,4 +1,5 @@
 {
 	nouveau_drm_screen_create;
 	radeon_drm_winsys_create;
+	amdgpu_winsys_create;
 };
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/libgl-xlib/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/libgl-xlib/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/libgl-xlib/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/libgl-xlib/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -24,6 +24,11 @@
 GL_MINOR = 5
 GL_TINY = $(MESA_MAJOR)$(MESA_MINOR)0$(MESA_TINY)
 
+if HAVE_SHARED_GLAPI
+SHARED_GLAPI_CFLAGS = -DGLX_SHARED_GLAPI
+SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la
+endif
+
 AM_CPPFLAGS = \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src \
@@ -35,6 +40,7 @@
 	-I$(top_srcdir)/src/gallium/state_trackers/glx/xlib \
 	-I$(top_srcdir)/src/gallium/auxiliary \
 	-I$(top_srcdir)/src/gallium/winsys \
+	$(SHARED_GLAPI_CFLAGS) \
 	-DGALLIUM_SOFTPIPE \
 	-DGALLIUM_RBUG \
 	-DGALLIUM_TRACE
@@ -65,6 +71,7 @@
 	$(top_builddir)/src/mapi/glapi/libglapi.la \
 	$(top_builddir)/src/mesa/libmesagallium.la \
 	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
+	$(SHARED_GLAPI_LIB) \
 	$(GL_LIB_DEPS) \
 	$(CLOCK_LIB)
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/omx/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/omx/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/omx/Makefile.am	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/omx/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -57,8 +57,7 @@
 
 libomx_mesa_la_LIBADD += \
 	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
-	$(GALLIUM_PIPE_LOADER_LIBS)
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/opencl/.gitignore mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/opencl/.gitignore
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/opencl/.gitignore	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/opencl/.gitignore	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1 @@
+/mesa.icd
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/opencl/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/opencl/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/opencl/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/opencl/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
 include $(top_srcdir)/src/gallium/Automake.inc
 
 lib_LTLIBRARIES = lib@OPENCL_LIBNAME@.la
@@ -17,12 +15,11 @@
 endif
 
 lib@OPENCL_LIBNAME@_la_LIBADD = \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_client.la \
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
 	$(top_builddir)/src/gallium/state_trackers/clover/libclover.la \
 	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
 	$(top_builddir)/src/util/libmesautil.la \
 	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
-	$(GALLIUM_PIPE_LOADER_CLIENT_LIBS) \
 	$(ELF_LIB) \
 	-ldl \
 	-lclangCodeGen \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/pipe-loader/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/pipe-loader/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/pipe-loader/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/pipe-loader/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -155,10 +155,12 @@
 pipe_radeonsi_la_LIBADD = \
 	$(PIPE_LIBS) \
 	$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
+	$(top_builddir)/src/gallium/winsys/amdgpu/drm/libamdgpuwinsys.la \
 	$(top_builddir)/src/gallium/drivers/radeon/libradeon.la \
 	$(top_builddir)/src/gallium/drivers/radeonsi/libradeonsi.la \
 	$(LIBDRM_LIBS) \
-	$(RADEON_LIBS)
+	$(RADEON_LIBS) \
+	$(AMDGPU_LIBS)
 
 endif
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/pipe-loader/pipe_radeonsi.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/pipe-loader/pipe_radeonsi.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/pipe-loader/pipe_radeonsi.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/pipe-loader/pipe_radeonsi.c	2015-09-16 14:36:09.000000000 +0000
@@ -2,6 +2,7 @@
 #include "target-helpers/inline_debug_helper.h"
 #include "radeon/drm/radeon_drm_public.h"
 #include "radeon/radeon_winsys.h"
+#include "amdgpu/drm/amdgpu_public.h"
 #include "radeonsi/si_public.h"
 
 static struct pipe_screen *
@@ -9,7 +10,12 @@
 {
    struct radeon_winsys *rw;
 
-   rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
+   /* First, try amdgpu. */
+   rw = amdgpu_winsys_create(fd, radeonsi_screen_create);
+
+   if (!rw)
+      rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
+
    return rw ? debug_screen_wrap(rw->screen) : NULL;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/va/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/va/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/va/Makefile.am	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/va/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -54,8 +54,7 @@
 
 gallium_drv_video_la_LIBADD += \
 	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
-	$(GALLIUM_PIPE_LOADER_LIBS)
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/vdpau/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/vdpau/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/vdpau/Makefile.am	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/vdpau/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -66,8 +66,7 @@
 
 libvdpau_gallium_la_LIBADD += \
 	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
-	$(GALLIUM_PIPE_LOADER_LIBS)
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/vdpau/vdpau.sym mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/vdpau/vdpau.sym
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/vdpau/vdpau.sym	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/vdpau/vdpau.sym	2015-09-16 14:36:09.000000000 +0000
@@ -3,6 +3,7 @@
                vdp_imp_device_create_x11;
                nouveau_drm_screen_create;
                radeon_drm_winsys_create;
+               amdgpu_winsys_create;
        local:
                *;
 };
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/xa/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/xa/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/xa/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/xa/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -81,8 +81,7 @@
 
 libxatracker_la_LIBADD += \
 	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
-	$(GALLIUM_PIPE_LOADER_LIBS)
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/xvmc/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/xvmc/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/targets/xvmc/Makefile.am	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/targets/xvmc/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -52,11 +52,9 @@
 	$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
 
 else # HAVE_GALLIUM_STATIC_TARGETS
-# XXX: Use the pipe-loader-client over pipe-loader ?
 libXvMCgallium_la_LIBADD += \
 	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
-	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
-	$(GALLIUM_PIPE_LOADER_LIBS)
+	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/tests/graw/graw_util.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/tests/graw/graw_util.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/tests/graw/graw_util.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/tests/graw/graw_util.h	2015-09-16 14:36:09.000000000 +0000
@@ -26,7 +26,7 @@
 
 
 
-static INLINE boolean
+static inline boolean
 graw_util_create_window(struct graw_info *info,
                         int width, int height,
                         int num_cbufs, bool zstencil_buf)
@@ -144,7 +144,7 @@
 }
 
 
-static INLINE void
+static inline void
 graw_util_default_state(struct graw_info *info, boolean depth_test)
 {
    {
@@ -181,7 +181,7 @@
 }
 
 
-static INLINE void
+static inline void
 graw_util_viewport(struct graw_info *info,
                    float x, float y,
                    float width, float height,
@@ -205,7 +205,7 @@
 }
 
 
-static INLINE void
+static inline void
 graw_util_flush_front(const struct graw_info *info)
 {
    info->screen->flush_frontbuffer(info->screen, info->color_buf[0],
@@ -213,7 +213,7 @@
 }
 
 
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
 graw_util_create_tex2d(const struct graw_info *info,
                        int width, int height, enum pipe_format format,
                        const void *data)
@@ -278,7 +278,7 @@
 }
 
 
-static INLINE void *
+static inline void *
 graw_util_create_simple_sampler(const struct graw_info *info,
                                 unsigned wrap_mode,
                                 unsigned img_filter)
@@ -304,7 +304,7 @@
 }
 
 
-static INLINE struct pipe_sampler_view *
+static inline struct pipe_sampler_view *
 graw_util_create_simple_sampler_view(const struct graw_info *info,
                                      struct pipe_resource *texture)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/tests/trivial/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/tests/trivial/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/tests/trivial/Makefile.am	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/tests/trivial/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -12,11 +12,10 @@
 	$(GALLIUM_PIPE_LOADER_DEFINES)
 
 LDADD = \
-	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_client.la \
+	$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
 	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
 	$(top_builddir)/src/util/libmesautil.la \
 	$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
-	$(GALLIUM_PIPE_LOADER_CLIENT_LIBS) \
 	$(GALLIUM_COMMON_LIB_DEPS)
 
 noinst_PROGRAMS = compute tri quad-tex
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/tests/trivial/quad-tex.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/tests/trivial/quad-tex.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/tests/trivial/quad-tex.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/tests/trivial/quad-tex.c	2015-09-16 14:36:09.000000000 +0000
@@ -270,7 +270,9 @@
 	}
 
 	/* fragment shader */
-	p->fs = util_make_fragment_tex_shader(p->pipe, TGSI_TEXTURE_2D, TGSI_INTERPOLATE_LINEAR);
+	p->fs = util_make_fragment_tex_shader(p->pipe, TGSI_TEXTURE_2D,
+	                                      TGSI_INTERPOLATE_LINEAR,
+	                                      TGSI_RETURN_TYPE_FLOAT);
 }
 
 static void close_prog(struct program *p)
@@ -295,6 +297,8 @@
 
 static void draw(struct program *p)
 {
+	const struct pipe_sampler_state *samplers[] = {&p->sampler};
+
 	/* set the render target */
 	cso_set_framebuffer(p->cso, &p->framebuffer);
 
@@ -308,8 +312,7 @@
 	cso_set_viewport(p->cso, &p->viewport);
 
 	/* sampler */
-	cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler);
-	cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+	cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 1, samplers);
 
 	/* texture sampler view */
 	cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->view);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,1008 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  addrinterface.cpp
+* @brief Contains the addrlib interface functions
+***************************************************************************************************
+*/
+#include "addrinterface.h"
+#include "addrlib.h"
+
+#include "addrcommon.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Create/Destroy/Config functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrCreate
+*
+*   @brief
+*       Create address lib object
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrCreate(
+    const ADDR_CREATE_INPUT*    pAddrCreateIn,  ///< [in] infomation for creating address lib object
+    ADDR_CREATE_OUTPUT*         pAddrCreateOut) ///< [out] address lib handle
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    returnCode = AddrLib::Create(pAddrCreateIn, pAddrCreateOut);
+
+    return returnCode;
+}
+
+
+
+/**
+***************************************************************************************************
+*   AddrDestroy
+*
+*   @brief
+*       Destroy address lib object
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrDestroy(
+    ADDR_HANDLE hLib) ///< [in] address lib handle
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (hLib)
+    {
+        AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+        pLib->Destroy();
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                                    Surface functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrComputeSurfaceInfo
+*
+*   @brief
+*       Calculate surface width/height/depth/alignments and suitable tiling mode
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo(
+    ADDR_HANDLE                             hLib, ///< [in] address lib handle
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,  ///< [in] surface information
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut) ///< [out] surface parameters and alignments
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeSurfaceInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+
+
+/**
+***************************************************************************************************
+*   AddrComputeSurfaceAddrFromCoord
+*
+*   @brief
+*       Compute surface address according to coordinates
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord(
+    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
+    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,  ///< [in] surface info and coordinates
+    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut) ///< [out] surface address
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrComputeSurfaceCoordFromAddr
+*
+*   @brief
+*       Compute coordinates according to surface address
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr(
+    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
+    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,  ///< [in] surface info and address
+    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut) ///< [out] coordinates
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeSurfaceCoordFromAddr(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                                   HTile functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrComputeHtileInfo
+*
+*   @brief
+*       Compute Htile pitch, height, base alignment and size in bytes
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo(
+    ADDR_HANDLE                             hLib, ///< [in] address lib handle
+    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,  ///< [in] Htile information
+    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut) ///< [out] Htile pitch, height and size in bytes
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeHtileInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrComputeHtileAddrFromCoord
+*
+*   @brief
+*       Compute Htile address according to coordinates (of depth buffer)
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord(
+    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
+    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Htile info and coordinates
+    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Htile address
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeHtileAddrFromCoord(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrComputeHtileCoordFromAddr
+*
+*   @brief
+*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
+*       Htile address
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr(
+    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
+    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,  ///< [in] Htile info and address
+    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Htile coordinates
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeHtileCoordFromAddr(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     C-mask functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrComputeCmaskInfo
+*
+*   @brief
+*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
+*       info
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo(
+    ADDR_HANDLE                             hLib, ///< [in] address lib handle
+    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,  ///< [in] Cmask pitch and height
+    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut) ///< [out] Cmask pitch, height and size in bytes
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeCmaskInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrComputeCmaskAddrFromCoord
+*
+*   @brief
+*       Compute Cmask address according to coordinates (of MSAA color buffer)
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord(
+    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
+    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Cmask info and coordinates
+    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Cmask address
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeCmaskAddrFromCoord(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrComputeCmaskCoordFromAddr
+*
+*   @brief
+*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
+*       Cmask address
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr(
+    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
+    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,  ///< [in] Cmask info and address
+    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Cmask coordinates
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeCmaskCoordFromAddr(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     F-mask functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrComputeFmaskInfo
+*
+*   @brief
+*       Compute Fmask pitch/height/depth/alignments and size in bytes
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo(
+    ADDR_HANDLE                             hLib, ///< [in] address lib handle
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,  ///< [in] Fmask information
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut) ///< [out] Fmask pitch and height
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeFmaskInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrComputeFmaskAddrFromCoord
+*
+*   @brief
+*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord(
+    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
+    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,  ///< [in] Fmask info and coordinates
+    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut) ///< [out] Fmask address
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeFmaskAddrFromCoord(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrComputeFmaskCoordFromAddr
+*
+*   @brief
+*       Compute coordinates (x,y,slice,sample,plane) according to Fmask address
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
+    ADDR_HANDLE                                     hLib, ///< [in] address lib handle
+    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,  ///< [in] Fmask info and address
+    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut) ///< [out] Fmask coordinates
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeFmaskCoordFromAddr(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     DCC key functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrComputeDccInfo
+*
+*   @brief
+*       Compute DCC key size, base alignment based on color surface size, tile info or tile index
+*
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
+    ADDR_HANDLE                             hLib,   ///< [in] handle of addrlib
+    const ADDR_COMPUTE_DCCINFO_INPUT*       pIn,    ///< [in] input
+    ADDR_COMPUTE_DCCINFO_OUTPUT*            pOut)   ///< [out] output
+{
+    ADDR_E_RETURNCODE returnCode;
+
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    if (pLib != NULL)
+    {
+       returnCode = pLib->ComputeDccInfo(pIn, pOut);
+    }
+    else
+    {
+       returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Below functions are element related or helper functions
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrGetVersion
+*
+*   @brief
+*       Get AddrLib version number. Client may check this return value against ADDRLIB_VERSION
+*       defined in addrinterface.h to see if there is a mismatch.
+***************************************************************************************************
+*/
+UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib)
+{
+    UINT_32 version = 0;
+
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_ASSERT(pLib != NULL);
+
+    if (pLib)
+    {
+        version = pLib->GetVersion();
+    }
+
+    return version;
+}
+
+/**
+***************************************************************************************************
+*   AddrUseTileIndex
+*
+*   @brief
+*       Return TRUE if tileIndex is enabled in this address library
+***************************************************************************************************
+*/
+BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib)
+{
+    BOOL_32 useTileIndex = FALSE;
+
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_ASSERT(pLib != NULL);
+
+    if (pLib)
+    {
+        useTileIndex = pLib->UseTileIndex(0);
+    }
+
+    return useTileIndex;
+}
+
+/**
+***************************************************************************************************
+*   AddrUseCombinedSwizzle
+*
+*   @brief
+*       Return TRUE if combined swizzle is enabled in this address library
+***************************************************************************************************
+*/
+BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib)
+{
+    BOOL_32 useCombinedSwizzle = FALSE;
+
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_ASSERT(pLib != NULL);
+
+    if (pLib)
+    {
+        useCombinedSwizzle = pLib->UseCombinedSwizzle();
+    }
+
+    return useCombinedSwizzle;
+}
+
+/**
+***************************************************************************************************
+*   AddrExtractBankPipeSwizzle
+*
+*   @brief
+*       Extract Bank and Pipe swizzle from base256b
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle(
+    ADDR_HANDLE                                 hLib,     ///< [in] addrlib handle
+    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,      ///< [in] input structure
+    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut)     ///< [out] output structure
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ExtractBankPipeSwizzle(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrCombineBankPipeSwizzle
+*
+*   @brief
+*       Combine Bank and Pipe swizzle
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle(
+    ADDR_HANDLE                                 hLib,
+    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
+    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut)
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->CombineBankPipeSwizzle(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrComputeSliceSwizzle
+*
+*   @brief
+*       Compute a swizzle for slice from a base swizzle
+*   @return
+*       ADDR_OK if no error
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle(
+    ADDR_HANDLE                                 hLib,
+    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*      pIn,
+    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*           pOut)
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeSliceTileSwizzle(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrComputeBaseSwizzle
+*
+*   @brief
+*       Return a Combined Bank and Pipe swizzle base on surface based on surface type/index
+*   @return
+*       ADDR_OK if no error
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
+    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT*       pOut)
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputeBaseSwizzle(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   ElemFlt32ToDepthPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a depth/stencil pixel value
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+*
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel(
+    ADDR_HANDLE                         hLib,    ///< [in] addrlib handle
+    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,     ///< [in] per-component value
+    ELEM_FLT32TODEPTHPIXEL_OUTPUT*      pOut)    ///< [out] final pixel value
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    if (pLib != NULL)
+    {
+        pLib->Flt32ToDepthPixel(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   ElemFlt32ToColorPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+*
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel(
+    ADDR_HANDLE                         hLib,    ///< [in] addrlib handle
+    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,     ///< [in] format, surface number and swap value
+    ELEM_FLT32TOCOLORPIXEL_OUTPUT*      pOut)    ///< [out] final pixel value
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    if (pLib != NULL)
+    {
+        pLib->Flt32ToColorPixel(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   ElemGetExportNorm
+*
+*   @brief
+*       Helper function to check one format can be EXPORT_NUM,
+*       which is a register CB_COLOR_INFO.SURFACE_FORMAT.
+*       FP16 can be reported as EXPORT_NORM for rv770 in r600
+*       family
+*
+***************************************************************************************************
+*/
+BOOL_32 ADDR_API ElemGetExportNorm(
+    ADDR_HANDLE                     hLib, ///< [in] addrlib handle
+    const ELEM_GETEXPORTNORM_INPUT* pIn)  ///< [in] input structure
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+    BOOL_32 enabled = FALSE;
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        enabled = pLib->GetExportNorm(pIn);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    ADDR_ASSERT(returnCode == ADDR_OK);
+
+    return enabled;
+}
+
+/**
+***************************************************************************************************
+*   AddrConvertTileInfoToHW
+*
+*   @brief
+*       Convert tile info from real value to hardware register value
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW(
+    ADDR_HANDLE                             hLib, ///< [in] address lib handle
+    const ADDR_CONVERT_TILEINFOTOHW_INPUT*  pIn,  ///< [in] tile info with real value
+    ADDR_CONVERT_TILEINFOTOHW_OUTPUT*       pOut) ///< [out] tile info with HW register value
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ConvertTileInfoToHW(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrConvertTileIndex
+*
+*   @brief
+*       Convert tile index to tile mode/type/info
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex(
+    ADDR_HANDLE                          hLib, ///< [in] address lib handle
+    const ADDR_CONVERT_TILEINDEX_INPUT*  pIn,  ///< [in] input - tile index
+    ADDR_CONVERT_TILEINDEX_OUTPUT*       pOut) ///< [out] tile mode/type/info
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ConvertTileIndex(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrConvertTileIndex1
+*
+*   @brief
+*       Convert tile index to tile mode/type/info
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1(
+    ADDR_HANDLE                          hLib, ///< [in] address lib handle
+    const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,  ///< [in] input - tile index
+    ADDR_CONVERT_TILEINDEX_OUTPUT*       pOut) ///< [out] tile mode/type/info
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ConvertTileIndex1(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrGetTileIndex
+*
+*   @brief
+*       Get tile index from tile mode/type/info
+*
+*   @return
+*       ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+*
+*   @note
+*       Only meaningful for SI (and above)
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex(
+    ADDR_HANDLE                     hLib,
+    const ADDR_GET_TILEINDEX_INPUT* pIn,
+    ADDR_GET_TILEINDEX_OUTPUT*      pOut)
+{
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->GetTileIndex(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrComputePrtInfo
+*
+*   @brief
+*       Interface function for ComputePrtInfo
+*
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
+    ADDR_HANDLE                 hLib,
+    const ADDR_PRT_INFO_INPUT*  pIn,
+    ADDR_PRT_INFO_OUTPUT*       pOut)
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+    if (pLib != NULL)
+    {
+        returnCode = pLib->ComputePrtInfo(pIn, pOut);
+    }
+    else
+    {
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,2166 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  addrinterface.h
+* @brief Contains the addrlib interfaces declaration and parameter defines
+***************************************************************************************************
+*/
+#ifndef __ADDR_INTERFACE_H__
+#define __ADDR_INTERFACE_H__
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#include "addrtypes.h"
+
+#define ADDRLIB_VERSION_MAJOR 5
+#define ADDRLIB_VERSION_MINOR 25
+#define ADDRLIB_VERSION ((ADDRLIB_VERSION_MAJOR << 16) | ADDRLIB_VERSION_MINOR)
+
+/// Virtually all interface functions need ADDR_HANDLE as first parameter
+typedef VOID*   ADDR_HANDLE;
+
+/// Client handle used in callbacks
+typedef VOID*   ADDR_CLIENT_HANDLE;
+
+/**
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                                  Callback functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*    typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)(
+*         const ADDR_ALLOCSYSMEM_INPUT* pInput);
+*    typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)(
+*         VOID* pVirtAddr);
+*    typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)(
+*         const ADDR_DEBUGPRINT_INPUT* pInput);
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                               Create/Destroy/Config functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*     AddrCreate()
+*     AddrDestroy()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                                  Surface functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*     AddrComputeSurfaceInfo()
+*     AddrComputeSurfaceAddrFromCoord()
+*     AddrComputeSurfaceCoordFromAddr()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                                   HTile functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*     AddrComputeHtileInfo()
+*     AddrComputeHtileAddrFromCoord()
+*     AddrComputeHtileCoordFromAddr()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                                   C-mask functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*     AddrComputeCmaskInfo()
+*     AddrComputeCmaskAddrFromCoord()
+*     AddrComputeCmaskCoordFromAddr()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                                   F-mask functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*     AddrComputeFmaskInfo()
+*     AddrComputeFmaskAddrFromCoord()
+*     AddrComputeFmaskCoordFromAddr()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                               Element/Utility functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*     ElemFlt32ToDepthPixel()
+*     ElemFlt32ToColorPixel()
+*     AddrExtractBankPipeSwizzle()
+*     AddrCombineBankPipeSwizzle()
+*     AddrComputeSliceSwizzle()
+*     AddrConvertTileInfoToHW()
+*     AddrConvertTileIndex()
+*     AddrConvertTileIndex1()
+*     AddrGetTileIndex()
+*     AddrComputeBaseSwizzle()
+*     AddrUseTileIndex()
+*     AddrUseCombinedSwizzle()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* //                                    Dump functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+*     AddrDumpSurfaceInfo()
+*     AddrDumpFmaskInfo()
+*     AddrDumpCmaskInfo()
+*     AddrDumpHtileInfo()
+*
+**/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                                      Callback functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* @brief Alloc system memory flags.
+* @note These flags are reserved for future use and if flags are added will minimize the impact
+*       of the client.
+***************************************************************************************************
+*/
+typedef union _ADDR_ALLOCSYSMEM_FLAGS
+{
+    struct
+    {
+        UINT_32 reserved    : 32;  ///< Reserved for future use.
+    } fields;
+    UINT_32 value;
+
+} ADDR_ALLOCSYSMEM_FLAGS;
+
+/**
+***************************************************************************************************
+* @brief Alloc system memory input structure
+***************************************************************************************************
+*/
+typedef struct _ADDR_ALLOCSYSMEM_INPUT
+{
+    UINT_32                 size;           ///< Size of this structure in bytes
+
+    ADDR_ALLOCSYSMEM_FLAGS  flags;          ///< System memory flags.
+    UINT_32                 sizeInBytes;    ///< System memory allocation size in bytes.
+    ADDR_CLIENT_HANDLE      hClient;        ///< Client handle
+} ADDR_ALLOCSYSMEM_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_ALLOCSYSMEM
+*   @brief
+*       Allocate system memory callback function. Returns valid pointer on success.
+***************************************************************************************************
+*/
+typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)(
+    const ADDR_ALLOCSYSMEM_INPUT* pInput);
+
+/**
+***************************************************************************************************
+* @brief Free system memory input structure
+***************************************************************************************************
+*/
+typedef struct _ADDR_FREESYSMEM_INPUT
+{
+    UINT_32                 size;           ///< Size of this structure in bytes
+
+    VOID*                   pVirtAddr;      ///< Virtual address
+    ADDR_CLIENT_HANDLE      hClient;        ///< Client handle
+} ADDR_FREESYSMEM_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_FREESYSMEM
+*   @brief
+*       Free system memory callback function.
+*       Returns ADDR_OK on success.
+***************************************************************************************************
+*/
+typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)(
+    const ADDR_FREESYSMEM_INPUT* pInput);
+
+/**
+***************************************************************************************************
+* @brief Print debug message input structure
+***************************************************************************************************
+*/
+typedef struct _ADDR_DEBUGPRINT_INPUT
+{
+    UINT_32             size;           ///< Size of this structure in bytes
+
+    CHAR*               pDebugString;   ///< Debug print string
+    va_list             ap;             ///< Variable argument list
+    ADDR_CLIENT_HANDLE  hClient;        ///< Client handle
+} ADDR_DEBUGPRINT_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_DEBUGPRINT
+*   @brief
+*       Print debug message callback function.
+*       Returns ADDR_OK on success.
+***************************************************************************************************
+*/
+typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)(
+    const ADDR_DEBUGPRINT_INPUT* pInput);
+
+/**
+***************************************************************************************************
+* ADDR_CALLBACKS
+*
+*   @brief
+*       Address Library needs client to provide system memory alloc/free routines.
+***************************************************************************************************
+*/
+typedef struct _ADDR_CALLBACKS
+{
+    ADDR_ALLOCSYSMEM allocSysMem;   ///< Routine to allocate system memory
+    ADDR_FREESYSMEM  freeSysMem;    ///< Routine to free system memory
+    ADDR_DEBUGPRINT  debugPrint;    ///< Routine to print debug message
+} ADDR_CALLBACKS;
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Create/Destroy functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* ADDR_CREATE_FLAGS
+*
+*   @brief
+*       This structure is used to pass some setup in creation of AddrLib
+*   @note
+***************************************************************************************************
+*/
+typedef union _ADDR_CREATE_FLAGS
+{
+    struct
+    {
+        UINT_32 noCubeMipSlicesPad     : 1;    ///< Turn cubemap faces padding off
+        UINT_32 fillSizeFields         : 1;    ///< If clients fill size fields in all input and
+                                               ///  output structure
+        UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in input valid
+        UINT_32 useCombinedSwizzle     : 1;    ///< Use combined tile swizzle
+        UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub level
+        UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice alignment
+        UINT_32 degradeBaseLevel       : 1;    ///< Degrade to 1D modes automatically for base level
+        UINT_32 allowLargeThickTile    : 1;    ///< Allow 64*thickness*bytesPerPixel > rowSize
+        UINT_32 reserved               : 24;   ///< Reserved bits for future use
+    };
+
+    UINT_32 value;
+} ADDR_CREATE_FLAGS;
+
+/**
+***************************************************************************************************
+*   ADDR_REGISTER_VALUE
+*
+*   @brief
+*       Data from registers to setup AddrLib global data, used in AddrCreate
+***************************************************************************************************
+*/
+typedef struct _ADDR_REGISTER_VALUE
+{
+    UINT_32  gbAddrConfig;       ///< For R8xx, use GB_ADDR_CONFIG register value.
+                                 ///  For R6xx/R7xx, use GB_TILING_CONFIG.
+                                 ///  But they can be treated as the same.
+                                 ///  if this value is 0, use chip to set default value
+    UINT_32  backendDisables;    ///< 1 bit per backend, starting with LSB. 1=disabled,0=enabled.
+                                 ///  Register value of CC_RB_BACKEND_DISABLE.BACKEND_DISABLE
+
+                                 ///  R800 registers-----------------------------------------------
+    UINT_32  noOfBanks;          ///< Number of h/w ram banks - For r800: MC_ARB_RAMCFG.NOOFBANK
+                                 ///  No enums for this value in h/w header files
+                                 ///  0: 4
+                                 ///  1: 8
+                                 ///  2: 16
+    UINT_32  noOfRanks;          ///  MC_ARB_RAMCFG.NOOFRANK
+                                 ///  0: 1
+                                 ///  1: 2
+                                 ///  SI (R1000) registers-----------------------------------------
+    const UINT_32* pTileConfig;  ///< Global tile setting tables
+    UINT_32  noOfEntries;        ///< Number of entries in pTileConfig
+
+                                 ///< CI registers-------------------------------------------------
+    const UINT_32* pMacroTileConfig;    ///< Global macro tile mode table
+    UINT_32  noOfMacroEntries;   ///< Number of entries in pMacroTileConfig
+
+} ADDR_REGISTER_VALUE;
+
+/**
+***************************************************************************************************
+* ADDR_CREATE_INPUT
+*
+*   @brief
+*       Parameters use to create an AddrLib Object. Caller must provide all fields.
+*
+***************************************************************************************************
+*/
+typedef struct _ADDR_CREATE_INPUT
+{
+    UINT_32             size;                ///< Size of this structure in bytes
+
+    UINT_32             chipEngine;          ///< Chip Engine
+    UINT_32             chipFamily;          ///< Chip Family
+    UINT_32             chipRevision;        ///< Chip Revision
+    ADDR_CALLBACKS      callbacks;           ///< Callbacks for sysmem alloc/free/print
+    ADDR_CREATE_FLAGS   createFlags;         ///< Flags to setup AddrLib
+    ADDR_REGISTER_VALUE regValue;            ///< Data from registers to setup AddrLib global data
+    ADDR_CLIENT_HANDLE  hClient;             ///< Client handle
+    UINT_32             minPitchAlignPixels; ///< Minimum pitch alignment in pixels
+} ADDR_CREATE_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_CREATEINFO_OUTPUT
+*
+*   @brief
+*       Return AddrLib handle to client driver
+*
+***************************************************************************************************
+*/
+typedef struct _ADDR_CREATE_OUTPUT
+{
+    UINT_32     size;    ///< Size of this structure in bytes
+
+    ADDR_HANDLE hLib;    ///< Address lib handle
+} ADDR_CREATE_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrCreate
+*
+*   @brief
+*       Create AddrLib object, must be called before any interface calls
+*
+*   @return
+*       ADDR_OK if successful
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrCreate(
+    const ADDR_CREATE_INPUT*    pAddrCreateIn,
+    ADDR_CREATE_OUTPUT*         pAddrCreateOut);
+
+
+
+/**
+***************************************************************************************************
+*   AddrDestroy
+*
+*   @brief
+*       Destroy AddrLib object, must be called to free internally allocated resources.
+*
+*   @return
+*      ADDR_OK if successful
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrDestroy(
+    ADDR_HANDLE hLib);
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                                    Surface functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* @brief
+*       Bank/tiling parameters. On function input, these can be set as desired or
+*       left 0 for AddrLib to calculate/default. On function output, these are the actual
+*       parameters used.
+* @note
+*       Valid bankWidth/bankHeight value:
+*       1,2,4,8. They are factors instead of pixels or bytes.
+*
+*       The bank number remains constant across each row of the
+*       macro tile as each pipe is selected, so the number of
+*       tiles in the x direction with the same bank number will
+*       be bank_width * num_pipes.
+***************************************************************************************************
+*/
+typedef struct _ADDR_TILEINFO
+{
+    ///  Any of these parameters can be set to 0 to use the HW default.
+    UINT_32     banks;              ///< Number of banks, numerical value
+    UINT_32     bankWidth;          ///< Number of tiles in the X direction in the same bank
+    UINT_32     bankHeight;         ///< Number of tiles in the Y direction in the same bank
+    UINT_32     macroAspectRatio;   ///< Macro tile aspect ratio. 1-1:1, 2-4:1, 4-16:1, 8-64:1
+    UINT_32     tileSplitBytes;     ///< Tile split size, in bytes
+    AddrPipeCfg pipeConfig;         ///< Pipe Config = HW enum + 1
+} ADDR_TILEINFO;
+
+// Create a define to avoid client change. The removal of R800 is because we plan to implement SI
+// within 800 HWL - An AddrPipeCfg is added in above data structure
+typedef ADDR_TILEINFO ADDR_R800_TILEINFO;
+
+/**
+***************************************************************************************************
+* @brief
+*       Information needed by quad buffer stereo support
+***************************************************************************************************
+*/
+typedef struct _ADDR_QBSTEREOINFO
+{
+    UINT_32         eyeHeight;          ///< Height (in pixel rows) to right eye
+    UINT_32         rightOffset;        ///< Offset (in bytes) to right eye
+    UINT_32         rightSwizzle;       ///< TileSwizzle for right eyes
+} ADDR_QBSTEREOINFO;
+
+/**
+***************************************************************************************************
+*   ADDR_SURFACE_FLAGS
+*
+*   @brief
+*       Surface flags
+***************************************************************************************************
+*/
+typedef union _ADDR_SURFACE_FLAGS
+{
+    struct
+    {
+        UINT_32 color         : 1; ///< Flag indicates this is a color buffer
+        UINT_32 depth         : 1; ///< Flag indicates this is a depth/stencil buffer
+        UINT_32 stencil       : 1; ///< Flag indicates this is a stencil buffer
+        UINT_32 texture       : 1; ///< Flag indicates this is a texture
+        UINT_32 cube          : 1; ///< Flag indicates this is a cubemap
+
+        UINT_32 volume        : 1; ///< Flag indicates this is a volume texture
+        UINT_32 fmask         : 1; ///< Flag indicates this is an fmask
+        UINT_32 cubeAsArray   : 1; ///< Flag indicates if treat cubemap as arrays
+        UINT_32 compressZ     : 1; ///< Flag indicates z buffer is compressed
+        UINT_32 overlay       : 1; ///< Flag indicates this is an overlay surface
+        UINT_32 noStencil     : 1; ///< Flag indicates this depth has no separate stencil
+        UINT_32 display       : 1; ///< Flag indicates this should match display controller req.
+        UINT_32 opt4Space     : 1; ///< Flag indicates this surface should be optimized for space
+                                   ///  i.e. save some memory but may lose performance
+        UINT_32 prt           : 1; ///< Flag for partially resident texture
+        UINT_32 qbStereo      : 1; ///< Quad buffer stereo surface
+        UINT_32 pow2Pad       : 1; ///< SI: Pad to pow2, must set for mipmap (include level0)
+        UINT_32 interleaved   : 1; ///< Special flag for interleaved YUV surface padding
+        UINT_32 degrade4Space : 1; ///< Degrade base level's tile mode to save memory
+        UINT_32 tcCompatible  : 1; ///< Flag indicates surface needs to be shader readable
+        UINT_32 dispTileType  : 1; ///< NI: force display Tiling for 128 bit shared resoruce
+        UINT_32 dccCompatible : 1; ///< VI: whether to support dcc fast clear
+        UINT_32 czDispCompatible: 1; ///< SI+: CZ family (Carrizo) has a HW bug needs special alignment.
+                                     ///<      This flag indicates we need to follow the alignment with
+                                     ///<      CZ families or other ASICs under PX configuration + CZ.
+        UINT_32 reserved      :10; ///< Reserved bits
+    };
+
+    UINT_32 value;
+} ADDR_SURFACE_FLAGS;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_SURFACE_INFO_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeSurfaceInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_INFO_INPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+
+    AddrTileMode        tileMode;           ///< Tile mode
+    AddrFormat          format;             ///< If format is set to valid one, bpp/width/height
+                                            ///  might be overwritten
+    UINT_32             bpp;                ///< Bits per pixel
+    UINT_32             numSamples;         ///< Number of samples
+    UINT_32             width;              ///< Width, in pixels
+    UINT_32             height;             ///< Height, in pixels
+    UINT_32             numSlices;          ///< Number surface slice/depth,
+                                            ///  Note:
+                                            ///  For cubemap, driver clients usually set numSlices
+                                            ///  to 1 in per-face calc.
+                                            ///  For 7xx and above, we need pad faces as slices.
+                                            ///  In this case, clients should set numSlices to 6 and
+                                            ///  this is also can be turned off by createFlags when
+                                            ///  calling AddrCreate
+    UINT_32             slice;              ///< Slice index
+    UINT_32             mipLevel;           ///< Current mipmap level.
+                                            ///  Padding/tiling have different rules for level0 and
+                                            ///  sublevels
+    ADDR_SURFACE_FLAGS  flags;              ///< Surface type flags
+    UINT_32             numFrags;           ///< Number of fragments, leave it zero or the same as
+                                            ///  number of samples for normal AA; Set it to the
+                                            ///  number of fragments for EQAA
+    /// r800 and later HWL parameters
+    // Needed by 2D tiling, for linear and 1D tiling, just keep them 0's
+    ADDR_TILEINFO*      pTileInfo;          ///< 2D tile parameters. Set to 0 to default/calculate
+    AddrTileType        tileType;           ///< Micro tiling type, not needed when tileIndex != -1
+    INT_32              tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
+                                            ///  while the global useTileIndex is set to 1
+    UINT_32             basePitch;          ///< Base level pitch in pixels, 0 means ignored, is a
+                                            ///  must for mip levels from SI+.
+                                            ///  Don't use pitch in blocks for compressed formats!
+} ADDR_COMPUTE_SURFACE_INFO_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_SURFACE_INFO_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeSurfInfo
+*   @note
+        Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch
+        Pixel: Original pixel
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_INFO_OUTPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    UINT_32         pitch;          ///< Pitch in elements (in blocks for compressed formats)
+    UINT_32         height;         ///< Height in elements (in blocks for compressed formats)
+    UINT_32         depth;          ///< Number of slice/depth
+    UINT_64         surfSize;       ///< Surface size in bytes
+    AddrTileMode    tileMode;       ///< Actual tile mode. May differ from that in input
+    UINT_32         baseAlign;      ///< Base address alignment
+    UINT_32         pitchAlign;     ///< Pitch alignment, in elements
+    UINT_32         heightAlign;    ///< Height alignment, in elements
+    UINT_32         depthAlign;     ///< Depth alignment, aligned to thickness, for 3d texture
+    UINT_32         bpp;            ///< Bits per elements (e.g. blocks for BCn, 1/3 for 96bit)
+    UINT_32         pixelPitch;     ///< Pitch in original pixels
+    UINT_32         pixelHeight;    ///< Height in original pixels
+    UINT_32         pixelBits;      ///< Original bits per pixel, passed from input
+    UINT_64         sliceSize;      ///< Size of slice specified by input's slice
+                                    ///  The result is controlled by surface flags & createFlags
+                                    ///  By default this value equals to surfSize for volume
+    UINT_32         pitchTileMax;   ///< PITCH_TILE_MAX value for h/w register
+    UINT_32         heightTileMax;  ///< HEIGHT_TILE_MAX value for h/w register
+    UINT_32         sliceTileMax;   ///< SLICE_TILE_MAX value for h/w register
+
+    UINT_32         numSamples;     ///< Pass the effective numSamples processed in this call
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*  pTileInfo;      ///< Tile parameters used. Filled in if 0 on input
+    AddrTileType    tileType;       ///< Micro tiling type, only valid when tileIndex != -1
+    INT_32          tileIndex;      ///< Tile index, MAY be "downgraded"
+
+    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+    /// Special information to work around SI mipmap swizzle bug UBTS #317508
+    BOOL_32         last2DLevel;    ///< TRUE if this is the last 2D(3D) tiled
+                                    ///< Only meaningful when create flag checkLast2DLevel is set
+    /// Stereo info
+    ADDR_QBSTEREOINFO*  pStereoInfo;///< Stereo information, needed when .qbStereo flag is TRUE
+} ADDR_COMPUTE_SURFACE_INFO_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeSurfaceInfo
+*
+*   @brief
+*       Compute surface width/height/depth/alignments and suitable tiling mode
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeSurfaceAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    UINT_32         x;                  ///< X coordinate
+    UINT_32         y;                  ///< Y coordinate
+    UINT_32         slice;              ///< Slice index
+    UINT_32         sample;             ///< Sample index, use fragment index for EQAA
+
+    UINT_32         bpp;                ///< Bits per pixel
+    UINT_32         pitch;              ///< Surface pitch, in pixels
+    UINT_32         height;             ///< Surface height, in pixels
+    UINT_32         numSlices;          ///< Surface depth
+    UINT_32         numSamples;         ///< Number of samples
+
+    AddrTileMode    tileMode;           ///< Tile mode
+    BOOL_32         isDepth;            ///< TRUE if the surface uses depth sample ordering within
+                                        ///  micro tile. Textures can also choose depth sample order
+    UINT_32         tileBase;           ///< Base offset (in bits) inside micro tile which handles
+                                        ///  the case that components are stored separately
+    UINT_32         compBits;           ///< The component bits actually needed(for planar surface)
+
+    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
+                                        ///  number of samples for normal AA; Set it to the
+                                        ///  number of fragments for EQAA
+    /// r800 and later HWL parameters
+    // Used for 1D tiling above
+    AddrTileType    tileType;           ///< See defintion of AddrTileType
+    struct
+    {
+        UINT_32     ignoreSE : 1;       ///< TRUE if shader engines are ignored. This is texture
+                                        ///  only flag. Only non-RT texture can set this to TRUE
+        UINT_32     reserved :31;       ///< Reserved for future use.
+    };
+    // 2D tiling needs following structure
+    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
+    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    union
+    {
+        struct
+        {
+            UINT_32  bankSwizzle;       ///< Bank swizzle
+            UINT_32  pipeSwizzle;       ///< Pipe swizzle
+        };
+        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
+    };
+
+#if ADDR_AM_BUILD // These two fields are not valid in SW blt since no HTILE access
+    UINT_32         addr5Swizzle;       ///< ADDR5_SWIZZLE_MASK of DB_DEPTH_INFO
+    BOOL_32         is32ByteTile;       ///< Caller must have access to HTILE buffer and know if
+                                        ///  this tile is compressed to 32B
+#endif
+} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeSurfaceAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_64 addr;           ///< Byte address
+    UINT_32 bitPosition;    ///< Bit position within surfaceAddr, 0-7.
+                            ///  For surface bpp < 8, e.g. FMT_1.
+    UINT_32 prtBlockIndex;  ///< Index of a PRT tile (64K block)
+} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeSurfaceAddrFromCoord
+*
+*   @brief
+*       Compute surface address from a given coordinate.
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeSurfaceCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    UINT_64         addr;               ///< Address in bytes
+    UINT_32         bitPosition;        ///< Bit position in addr. 0-7. for surface bpp < 8,
+                                        ///  e.g. FMT_1;
+    UINT_32         bpp;                ///< Bits per pixel
+    UINT_32         pitch;              ///< Pitch, in pixels
+    UINT_32         height;             ///< Height in pixels
+    UINT_32         numSlices;          ///< Surface depth
+    UINT_32         numSamples;         ///< Number of samples
+
+    AddrTileMode    tileMode;           ///< Tile mode
+    BOOL_32         isDepth;            ///< Surface uses depth sample ordering within micro tile.
+                                        ///  Note: Textures can choose depth sample order as well.
+    UINT_32         tileBase;           ///< Base offset (in bits) inside micro tile which handles
+                                        ///  the case that components are stored separately
+    UINT_32         compBits;           ///< The component bits actually needed(for planar surface)
+
+    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
+                                        ///  number of samples for normal AA; Set it to the
+                                        ///  number of fragments for EQAA
+    /// r800 and later HWL parameters
+    // Used for 1D tiling above
+    AddrTileType    tileType;           ///< See defintion of AddrTileType
+    struct
+    {
+        UINT_32     ignoreSE : 1;       ///< TRUE if shader engines are ignored. This is texture
+                                        ///  only flag. Only non-RT texture can set this to TRUE
+        UINT_32     reserved :31;       ///< Reserved for future use.
+    };
+    // 2D tiling needs following structure
+    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
+    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    union
+    {
+        struct
+        {
+            UINT_32  bankSwizzle;       ///< Bank swizzle
+            UINT_32  pipeSwizzle;       ///< Pipe swizzle
+        };
+        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
+    };
+} ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeSurfaceCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
+{
+    UINT_32 size;   ///< Size of this structure in bytes
+
+    UINT_32 x;      ///< X coordinate
+    UINT_32 y;      ///< Y coordinate
+    UINT_32 slice;  ///< Index of slices
+    UINT_32 sample; ///< Index of samples, means fragment index for EQAA
+} ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeSurfaceCoordFromAddr
+*
+*   @brief
+*       Compute coordinate from a given surface address
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut);
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                                   HTile functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   ADDR_HTILE_FLAGS
+*
+*   @brief
+*       HTILE flags
+***************************************************************************************************
+*/
+typedef union _ADDR_HTILE_FLAGS
+{
+    struct
+    {
+        UINT_32 tcCompatible  : 1; ///< Flag indicates surface needs to be shader readable
+        UINT_32 reserved      :31; ///< Reserved bits
+    };
+
+    UINT_32 value;
+} ADDR_HTILE_FLAGS;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_HTILE_INFO_INPUT
+*
+*   @brief
+*       Input structure of AddrComputeHtileInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_INFO_INPUT
+{
+    UINT_32            size;            ///< Size of this structure in bytes
+
+    ADDR_HTILE_FLAGS   flags;           ///< HTILE flags
+    UINT_32            pitch;           ///< Surface pitch, in pixels
+    UINT_32            height;          ///< Surface height, in pixels
+    UINT_32            numSlices;       ///< Number of slices
+    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
+    AddrHtileBlockSize blockWidth;      ///< 4 or 8. EG above only support 8
+    AddrHtileBlockSize blockHeight;     ///< 4 or 8. EG above only support 8
+    ADDR_TILEINFO*     pTileInfo;       ///< Tile info
+
+    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
+                                        ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_HTILE_INFO_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_HTILE_INFO_OUTPUT
+*
+*   @brief
+*       Output structure of AddrComputeHtileInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_INFO_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_32 pitch;          ///< Pitch in pixels of depth buffer represented in this
+                            ///  HTile buffer. This might be larger than original depth
+                            ///  buffer pitch when called with an unaligned pitch.
+    UINT_32 height;         ///< Height in pixels, as above
+    UINT_64 htileBytes;     ///< Size of HTILE buffer, in bytes
+    UINT_32 baseAlign;      ///< Base alignment
+    UINT_32 bpp;            ///< Bits per pixel for HTILE is how many bits for an 8x8 block!
+    UINT_32 macroWidth;     ///< Macro width in pixels, actually squared cache shape
+    UINT_32 macroHeight;    ///< Macro height in pixels
+    UINT_64 sliceSize;      ///< Slice size, in bytes.
+} ADDR_COMPUTE_HTILE_INFO_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeHtileInfo
+*
+*   @brief
+*       Compute Htile pitch, height, base alignment and size in bytes
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,
+    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeHtileAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
+{
+    UINT_32            size;            ///< Size of this structure in bytes
+
+    UINT_32            pitch;           ///< Pitch, in pixels
+    UINT_32            height;          ///< Height in pixels
+    UINT_32            x;               ///< X coordinate
+    UINT_32            y;               ///< Y coordinate
+    UINT_32            slice;           ///< Index of slice
+    UINT_32            numSlices;       ///< Number of slices
+    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
+    AddrHtileBlockSize blockWidth;      ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8
+    AddrHtileBlockSize blockHeight;     ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8
+    ADDR_TILEINFO*     pTileInfo;       ///< Tile info
+
+    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
+                                        ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeHtileAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_64 addr;           ///< Address in bytes
+    UINT_32 bitPosition;    ///< Bit position, 0 or 4. CMASK and HTILE shares some lib method.
+                            ///  So we keep bitPosition for HTILE as well
+} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeHtileAddrFromCoord
+*
+*   @brief
+*       Compute Htile address according to coordinates (of depth buffer)
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,
+    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeHtileCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT
+{
+    UINT_32            size;            ///< Size of this structure in bytes
+
+    UINT_64            addr;            ///< Address
+    UINT_32            bitPosition;     ///< Bit position 0 or 4. CMASK and HTILE share some methods
+                                        ///  so we keep bitPosition for HTILE as well
+    UINT_32            pitch;           ///< Pitch, in pixels
+    UINT_32            height;          ///< Height, in pixels
+    UINT_32            numSlices;       ///< Number of slices
+    BOOL_32            isLinear;        ///< Linear or tiled HTILE layout
+    AddrHtileBlockSize blockWidth;      ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8
+    AddrHtileBlockSize blockHeight;     ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8
+    ADDR_TILEINFO*     pTileInfo;       ///< Tile info
+
+    INT_32             tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    INT_32             macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
+                                        ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeHtileCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
+{
+    UINT_32 size;   ///< Size of this structure in bytes
+
+    UINT_32 x;      ///< X coordinate
+    UINT_32 y;      ///< Y coordinate
+    UINT_32 slice;  ///< Slice index
+} ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeHtileCoordFromAddr
+*
+*   @brief
+*       Compute coordinates within depth buffer (1st pixel of a micro tile) according to
+*       Htile address
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,
+    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut);
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     C-mask functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   ADDR_CMASK_FLAGS
+*
+*   @brief
+*       CMASK flags
+***************************************************************************************************
+*/
+typedef union _ADDR_CMASK_FLAGS
+{
+    struct
+    {
+        UINT_32 tcCompatible  : 1; ///< Flag indicates surface needs to be shader readable
+        UINT_32 reserved      :31; ///< Reserved bits
+    };
+
+    UINT_32 value;
+} ADDR_CMASK_FLAGS;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_CMASK_INFO_INPUT
+*
+*   @brief
+*       Input structure of AddrComputeCmaskInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASKINFO_INPUT
+{
+    UINT_32             size;            ///< Size of this structure in bytes
+
+    ADDR_CMASK_FLAGS    flags;           ///< CMASK flags
+    UINT_32             pitch;           ///< Pitch, in pixels, of color buffer
+    UINT_32             height;          ///< Height, in pixels, of color buffer
+    UINT_32             numSlices;       ///< Number of slices, of color buffer
+    BOOL_32             isLinear;        ///< Linear or tiled layout, Only SI can be linear
+    ADDR_TILEINFO*      pTileInfo;       ///< Tile info
+
+    INT_32              tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
+                                         ///  while the global useTileIndex is set to 1
+    INT_32              macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
+                                         ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_CMASK_INFO_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_CMASK_INFO_OUTPUT
+*
+*   @brief
+*       Output structure of AddrComputeCmaskInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_INFO_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_32 pitch;          ///< Pitch in pixels of color buffer which
+                            ///  this Cmask matches. The size might be larger than
+                            ///  original color buffer pitch when called with
+                            ///  an unaligned pitch.
+    UINT_32 height;         ///< Height in pixels, as above
+    UINT_64 cmaskBytes;     ///< Size in bytes of CMask buffer
+    UINT_32 baseAlign;      ///< Base alignment
+    UINT_32 blockMax;       ///< Cmask block size. Need this to set CB_COLORn_MASK register
+    UINT_32 macroWidth;     ///< Macro width in pixels, actually squared cache shape
+    UINT_32 macroHeight;    ///< Macro height in pixels
+    UINT_64 sliceSize;      ///< Slice size, in bytes.
+} ADDR_COMPUTE_CMASK_INFO_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeCmaskInfo
+*
+*   @brief
+*       Compute Cmask pitch, height, base alignment and size in bytes from color buffer
+*       info
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,
+    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeCmaskAddrFromCoord
+*
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
+{
+    UINT_32          size;           ///< Size of this structure in bytes
+    UINT_32          x;              ///< X coordinate
+    UINT_32          y;              ///< Y coordinate
+    UINT_64          fmaskAddr;      ///< Fmask addr for tc compatible Cmask
+    UINT_32          slice;          ///< Slice index
+    UINT_32          pitch;          ///< Pitch in pixels, of color buffer
+    UINT_32          height;         ///< Height in pixels, of color buffer
+    UINT_32          numSlices;      ///< Number of slices
+    UINT_32          bpp;
+    BOOL_32          isLinear;       ///< Linear or tiled layout, Only SI can be linear
+    ADDR_CMASK_FLAGS flags;          ///< CMASK flags
+    ADDR_TILEINFO*   pTileInfo;      ///< Tile info
+
+    INT_32           tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
+                                     ///< while the global useTileIndex is set to 1
+    INT_32           macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+                                     ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeCmaskAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_64 addr;           ///< CMASK address in bytes
+    UINT_32 bitPosition;    ///< Bit position within addr, 0-7. CMASK is 4 bpp,
+                            ///  so the address may be located in bit 0 (0) or 4 (4)
+} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeCmaskAddrFromCoord
+*
+*   @brief
+*       Compute Cmask address according to coordinates (of MSAA color buffer)
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,
+    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeCmaskCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT
+{
+    UINT_32        size;            ///< Size of this structure in bytes
+
+    UINT_64        addr;            ///< CMASK address in bytes
+    UINT_32        bitPosition;     ///< Bit position within addr, 0-7. CMASK is 4 bpp,
+                                    ///  so the address may be located in bit 0 (0) or 4 (4)
+    UINT_32        pitch;           ///< Pitch, in pixels
+    UINT_32        height;          ///< Height in pixels
+    UINT_32        numSlices;       ///< Number of slices
+    BOOL_32        isLinear;        ///< Linear or tiled layout, Only SI can be linear
+    ADDR_TILEINFO* pTileInfo;       ///< Tile info
+
+    INT_32         tileIndex;       ///< Tile index, MUST be -1 if you don't want to use it
+                                    ///  while the global useTileIndex is set to 1
+    INT_32         macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
+                                    ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeCmaskCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
+{
+    UINT_32 size;   ///< Size of this structure in bytes
+
+    UINT_32 x;      ///< X coordinate
+    UINT_32 y;      ///< Y coordinate
+    UINT_32 slice;  ///< Slice index
+} ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeCmaskCoordFromAddr
+*
+*   @brief
+*       Compute coordinates within color buffer (1st pixel of a micro tile) according to
+*       Cmask address
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,
+    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut);
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     F-mask functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_FMASK_INFO_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeFmaskInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_INFO_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    AddrTileMode    tileMode;           ///< Tile mode
+    UINT_32         pitch;              ///< Surface pitch, in pixels
+    UINT_32         height;             ///< Surface height, in pixels
+    UINT_32         numSlices;          ///< Number of slice/depth
+    UINT_32         numSamples;         ///< Number of samples
+    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
+                                        ///  number of samples for normal AA; Set it to the
+                                        ///  number of fragments for EQAA
+    /// r800 and later HWL parameters
+    struct
+    {
+        UINT_32 resolved:   1;          ///< TRUE if the surface is for resolved fmask, only used
+                                        ///  by H/W clients. S/W should always set it to FALSE.
+        UINT_32 reserved:  31;          ///< Reserved for future use.
+    };
+    ADDR_TILEINFO*  pTileInfo;          ///< 2D tiling parameters. Clients must give valid data
+    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+} ADDR_COMPUTE_FMASK_INFO_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_FMASK_INFO_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeFmaskInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_INFO_OUTPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    UINT_32         pitch;          ///< Pitch of fmask in pixels
+    UINT_32         height;         ///< Height of fmask in pixels
+    UINT_32         numSlices;      ///< Slices of fmask
+    UINT_64         fmaskBytes;     ///< Size of fmask in bytes
+    UINT_32         baseAlign;      ///< Base address alignment
+    UINT_32         pitchAlign;     ///< Pitch alignment
+    UINT_32         heightAlign;    ///< Height alignment
+    UINT_32         bpp;            ///< Bits per pixel of FMASK is: number of bit planes
+    UINT_32         numSamples;     ///< Number of samples, used for dump, export this since input
+                                    ///  may be changed in 9xx and above
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*  pTileInfo;      ///< Tile parameters used. Fmask can have different
+                                    ///  bank_height from color buffer
+    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
+                                    ///  while the global useTileIndex is set to 1
+    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+    UINT_64         sliceSize;      ///< Size of slice in bytes
+} ADDR_COMPUTE_FMASK_INFO_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeFmaskInfo
+*
+*   @brief
+*       Compute Fmask pitch/height/depth/alignments and size in bytes
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeFmaskAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    UINT_32         x;                  ///< X coordinate
+    UINT_32         y;                  ///< Y coordinate
+    UINT_32         slice;              ///< Slice index
+    UINT_32         plane;              ///< Plane number
+    UINT_32         sample;             ///< Sample index (fragment index for EQAA)
+
+    UINT_32         pitch;              ///< Surface pitch, in pixels
+    UINT_32         height;             ///< Surface height, in pixels
+    UINT_32         numSamples;         ///< Number of samples
+    UINT_32         numFrags;           ///< Number of fragments, leave it zero or the same as
+                                        ///  number of samples for normal AA; Set it to the
+                                        ///  number of fragments for EQAA
+
+    AddrTileMode    tileMode;           ///< Tile mode
+    union
+    {
+        struct
+        {
+            UINT_32  bankSwizzle;       ///< Bank swizzle
+            UINT_32  pipeSwizzle;       ///< Pipe swizzle
+        };
+        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
+    };
+
+    /// r800 and later HWL parameters
+    struct
+    {
+        UINT_32 resolved:   1;          ///< TRUE if this is a resolved fmask, used by H/W clients
+        UINT_32 ignoreSE:   1;          ///< TRUE if shader engines are ignored.
+        UINT_32 reserved:  30;          ///< Reserved for future use.
+    };
+    ADDR_TILEINFO*  pTileInfo;          ///< 2D tiling parameters. Client must provide all data
+
+} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeFmaskAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_64 addr;           ///< Fmask address
+    UINT_32 bitPosition;    ///< Bit position within fmaskAddr, 0-7.
+} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeFmaskAddrFromCoord
+*
+*   @brief
+*       Compute Fmask address according to coordinates (x,y,slice,sample,plane)
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,
+    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT
+*
+*   @brief
+*       Input structure for AddrComputeFmaskCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    UINT_64         addr;               ///< Address
+    UINT_32         bitPosition;        ///< Bit position within addr, 0-7.
+
+    UINT_32         pitch;              ///< Pitch, in pixels
+    UINT_32         height;             ///< Height in pixels
+    UINT_32         numSamples;         ///< Number of samples
+    UINT_32         numFrags;           ///< Number of fragments
+    AddrTileMode    tileMode;           ///< Tile mode
+    union
+    {
+        struct
+        {
+            UINT_32  bankSwizzle;       ///< Bank swizzle
+            UINT_32  pipeSwizzle;       ///< Pipe swizzle
+        };
+        UINT_32     tileSwizzle;        ///< Combined swizzle, if useCombinedSwizzle is TRUE
+    };
+
+    /// r800 and later HWL parameters
+    struct
+    {
+        UINT_32 resolved:   1;          ///< TRUE if this is a resolved fmask, used by HW components
+        UINT_32 ignoreSE:   1;          ///< TRUE if shader engines are ignored.
+        UINT_32 reserved:  30;          ///< Reserved for future use.
+    };
+    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Client must provide all data
+
+} ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
+*
+*   @brief
+*       Output structure for AddrComputeFmaskCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
+{
+    UINT_32 size;       ///< Size of this structure in bytes
+
+    UINT_32 x;          ///< X coordinate
+    UINT_32 y;          ///< Y coordinate
+    UINT_32 slice;      ///< Slice index
+    UINT_32 plane;      ///< Plane number
+    UINT_32 sample;     ///< Sample index (fragment index for EQAA)
+} ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeFmaskCoordFromAddr
+*
+*   @brief
+*       Compute FMASK coordinate from an given address
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
+    ADDR_HANDLE                                     hLib,
+    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,
+    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut);
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                          Element/utility functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrGetVersion
+*
+*   @brief
+*       Get AddrLib version number
+***************************************************************************************************
+*/
+UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib);
+
+/**
+***************************************************************************************************
+*   AddrUseTileIndex
+*
+*   @brief
+*       Return TRUE if tileIndex is enabled in this address library
+***************************************************************************************************
+*/
+BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib);
+
+/**
+***************************************************************************************************
+*   AddrUseCombinedSwizzle
+*
+*   @brief
+*       Return TRUE if combined swizzle is enabled in this address library
+***************************************************************************************************
+*/
+BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib);
+
+/**
+***************************************************************************************************
+*   ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT
+*
+*   @brief
+*       Input structure of AddrExtractBankPipeSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    UINT_32         base256b;       ///< Base256b value
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*  pTileInfo;      ///< 2D tile parameters. Client must provide all data
+
+    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
+                                    ///  while the global useTileIndex is set to 1
+    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+                                    ///< README: When tileIndex is not -1, this must be valid
+} ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT
+*
+*   @brief
+*       Output structure of AddrExtractBankPipeSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_32 bankSwizzle;    ///< Bank swizzle
+    UINT_32 pipeSwizzle;    ///< Pipe swizzle
+} ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrExtractBankPipeSwizzle
+*
+*   @brief
+*       Extract Bank and Pipe swizzle from base256b
+*   @return
+*       ADDR_OK if no error
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle(
+    ADDR_HANDLE                                 hLib,
+    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,
+    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut);
+
+
+/**
+***************************************************************************************************
+*   ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT
+*
+*   @brief
+*       Input structure of AddrCombineBankPipeSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    UINT_32         bankSwizzle;    ///< Bank swizzle
+    UINT_32         pipeSwizzle;    ///< Pipe swizzle
+    UINT_64         baseAddr;       ///< Base address (leave it zero for driver clients)
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*  pTileInfo;      ///< 2D tile parameters. Client must provide all data
+
+    INT_32          tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
+                                    ///  while the global useTileIndex is set to 1
+    INT_32          macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+                                    ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT
+*
+*   @brief
+*       Output structure of AddrCombineBankPipeSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_32 tileSwizzle;    ///< Combined swizzle
+} ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrCombineBankPipeSwizzle
+*
+*   @brief
+*       Combine Bank and Pipe swizzle
+*   @return
+*       ADDR_OK if no error
+*   @note
+*       baseAddr here is full MCAddress instead of base256b
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle(
+    ADDR_HANDLE                                 hLib,
+    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
+    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_SLICESWIZZLE_INPUT
+*
+*   @brief
+*       Input structure of AddrComputeSliceSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SLICESWIZZLE_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    AddrTileMode    tileMode;           ///< Tile Mode
+    UINT_32         baseSwizzle;        ///< Base tile swizzle
+    UINT_32         slice;              ///< Slice index
+    UINT_64         baseAddr;           ///< Base address, driver should leave it 0 in most cases
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*  pTileInfo;          ///< 2D tile parameters. Actually banks needed here!
+
+    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
+                                        ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_SLICESWIZZLE_INPUT;
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_SLICESWIZZLE_OUTPUT
+*
+*   @brief
+*       Output structure of AddrComputeSliceSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SLICESWIZZLE_OUTPUT
+{
+    UINT_32  size;           ///< Size of this structure in bytes
+
+    UINT_32  tileSwizzle;    ///< Recalculated tileSwizzle value
+} ADDR_COMPUTE_SLICESWIZZLE_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeSliceSwizzle
+*
+*   @brief
+*       Extract Bank and Pipe swizzle from base256b
+*   @return
+*       ADDR_OK if no error
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,
+    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut);
+
+
+/**
+***************************************************************************************************
+*   AddrSwizzleGenOption
+*
+*   @brief
+*       Which swizzle generating options: legacy or linear
+***************************************************************************************************
+*/
+typedef enum _AddrSwizzleGenOption
+{
+    ADDR_SWIZZLE_GEN_DEFAULT    = 0,    ///< As is in client driver implemention for swizzle
+    ADDR_SWIZZLE_GEN_LINEAR     = 1,    ///< Using a linear increment of swizzle
+} AddrSwizzleGenOption;
+
+/**
+***************************************************************************************************
+*   AddrSwizzleOption
+*
+*   @brief
+*       Controls how swizzle is generated
+***************************************************************************************************
+*/
+typedef union _ADDR_SWIZZLE_OPTION
+{
+    struct
+    {
+        UINT_32 genOption       : 1;    ///< The way swizzle is generated, see AddrSwizzleGenOption
+        UINT_32 reduceBankBit   : 1;    ///< TRUE if we need reduce swizzle bits
+        UINT_32 reserved        :30;    ///< Reserved bits
+    };
+
+    UINT_32 value;
+
+} ADDR_SWIZZLE_OPTION;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_BASE_SWIZZLE_INPUT
+*
+*   @brief
+*       Input structure of AddrComputeBaseSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_INPUT
+{
+    UINT_32             size;           ///< Size of this structure in bytes
+
+    ADDR_SWIZZLE_OPTION option;         ///< Swizzle option
+    UINT_32             surfIndex;      ///< Index of this surface type
+    AddrTileMode        tileMode;       ///< Tile Mode
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*      pTileInfo;      ///< 2D tile parameters. Actually banks needed here!
+
+    INT_32              tileIndex;      ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    INT_32              macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+                                        ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_BASE_SWIZZLE_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
+*
+*   @brief
+*       Output structure of AddrComputeBaseSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_32 tileSwizzle;    ///< Combined swizzle
+} ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeBaseSwizzle
+*
+*   @brief
+*       Return a Combined Bank and Pipe swizzle base on surface based on surface type/index
+*   @return
+*       ADDR_OK if no error
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
+    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT*       pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ELEM_GETEXPORTNORM_INPUT
+*
+*   @brief
+*       Input structure for ElemGetExportNorm
+*
+***************************************************************************************************
+*/
+typedef struct _ELEM_GETEXPORTNORM_INPUT
+{
+    UINT_32             size;       ///< Size of this structure in bytes
+
+    AddrColorFormat     format;     ///< Color buffer format; Client should use ColorFormat
+    AddrSurfaceNumber   num;        ///< Surface number type; Client should use NumberType
+    AddrSurfaceSwap     swap;       ///< Surface swap byte swap; Client should use SurfaceSwap
+    UINT_32             numSamples; ///< Number of samples
+} ELEM_GETEXPORTNORM_INPUT;
+
+/**
+***************************************************************************************************
+*  ElemGetExportNorm
+*
+*   @brief
+*       Helper function to check one format can be EXPORT_NUM, which is a register
+*       CB_COLOR_INFO.SURFACE_FORMAT. FP16 can be reported as EXPORT_NORM for rv770 in r600
+*       family
+*   @note
+*       The implementation is only for r600.
+*       00 - EXPORT_FULL: PS exports are 4 pixels with 4 components with 32-bits-per-component. (two
+*       clocks per export)
+*       01 - EXPORT_NORM: PS exports are 4 pixels with 4 components with 16-bits-per-component. (one
+*       clock per export)
+*
+***************************************************************************************************
+*/
+BOOL_32 ADDR_API ElemGetExportNorm(
+    ADDR_HANDLE                     hLib,
+    const ELEM_GETEXPORTNORM_INPUT* pIn);
+
+
+
+/**
+***************************************************************************************************
+*   ELEM_FLT32TODEPTHPIXEL_INPUT
+*
+*   @brief
+*       Input structure for addrFlt32ToDepthPixel
+*
+***************************************************************************************************
+*/
+typedef struct _ELEM_FLT32TODEPTHPIXEL_INPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    AddrDepthFormat format;         ///< Depth buffer format
+    ADDR_FLT_32     comps[2];       ///< Component values (Z/stencil)
+} ELEM_FLT32TODEPTHPIXEL_INPUT;
+
+/**
+***************************************************************************************************
+*   ELEM_FLT32TODEPTHPIXEL_INPUT
+*
+*   @brief
+*       Output structure for ElemFlt32ToDepthPixel
+*
+***************************************************************************************************
+*/
+typedef struct _ELEM_FLT32TODEPTHPIXEL_OUTPUT
+{
+    UINT_32 size;           ///< Size of this structure in bytes
+
+    UINT_8* pPixel;         ///< Real depth value. Same data type as depth buffer.
+                            ///  Client must provide enough storage for this type.
+    UINT_32 depthBase;      ///< Tile base in bits for depth bits
+    UINT_32 stencilBase;    ///< Tile base in bits for stencil bits
+    UINT_32 depthBits;      ///< Bits for depth
+    UINT_32 stencilBits;    ///< Bits for stencil
+} ELEM_FLT32TODEPTHPIXEL_OUTPUT;
+
+/**
+***************************************************************************************************
+*   ElemFlt32ToDepthPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a depth/stencil pixel value
+*
+*   @return
+*       Return code
+*
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel(
+    ADDR_HANDLE                         hLib,
+    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
+    ELEM_FLT32TODEPTHPIXEL_OUTPUT*      pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ELEM_FLT32TOCOLORPIXEL_INPUT
+*
+*   @brief
+*       Input structure for addrFlt32ToColorPixel
+*
+***************************************************************************************************
+*/
+typedef struct _ELEM_FLT32TOCOLORPIXEL_INPUT
+{
+    UINT_32            size;           ///< Size of this structure in bytes
+
+    AddrColorFormat    format;         ///< Color buffer format
+    AddrSurfaceNumber  surfNum;        ///< Surface number
+    AddrSurfaceSwap    surfSwap;       ///< Surface swap
+    ADDR_FLT_32        comps[4];       ///< Component values (r/g/b/a)
+} ELEM_FLT32TOCOLORPIXEL_INPUT;
+
+/**
+***************************************************************************************************
+*   ELEM_FLT32TOCOLORPIXEL_INPUT
+*
+*   @brief
+*       Output structure for ElemFlt32ToColorPixel
+*
+***************************************************************************************************
+*/
+typedef struct _ELEM_FLT32TOCOLORPIXEL_OUTPUT
+{
+    UINT_32 size;       ///< Size of this structure in bytes
+
+    UINT_8* pPixel;     ///< Real color value. Same data type as color buffer.
+                        ///  Client must provide enough storage for this type.
+} ELEM_FLT32TOCOLORPIXEL_OUTPUT;
+
+/**
+***************************************************************************************************
+*   ElemFlt32ToColorPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
+*
+*   @return
+*       Return code
+*
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel(
+    ADDR_HANDLE                         hLib,
+    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
+    ELEM_FLT32TOCOLORPIXEL_OUTPUT*      pOut);
+
+
+/**
+***************************************************************************************************
+*   ADDR_CONVERT_TILEINFOTOHW_INPUT
+*
+*   @brief
+*       Input structure for AddrConvertTileInfoToHW
+*   @note
+*       When reverse is TRUE, indices are igonred
+***************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINFOTOHW_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+    BOOL_32         reverse;            ///< Convert control flag.
+                                        ///  FALSE: convert from real value to HW value;
+                                        ///  TRUE: convert from HW value to real value.
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*  pTileInfo;          ///< Tile parameters with real value
+
+    INT_32          tileIndex;          ///< Tile index, MUST be -1 if you don't want to use it
+                                        ///  while the global useTileIndex is set to 1
+    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
+                                        ///< README: When tileIndex is not -1, this must be valid
+} ADDR_CONVERT_TILEINFOTOHW_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_CONVERT_TILEINFOTOHW_OUTPUT
+*
+*   @brief
+*       Output structure for AddrConvertTileInfoToHW
+***************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINFOTOHW_OUTPUT
+{
+    UINT_32             size;               ///< Size of this structure in bytes
+
+    /// r800 and later HWL parameters
+    ADDR_TILEINFO*      pTileInfo;          ///< Tile parameters with hardware register value
+
+} ADDR_CONVERT_TILEINFOTOHW_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrConvertTileInfoToHW
+*
+*   @brief
+*       Convert tile info from real value to hardware register value
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW(
+    ADDR_HANDLE                             hLib,
+    const ADDR_CONVERT_TILEINFOTOHW_INPUT*  pIn,
+    ADDR_CONVERT_TILEINFOTOHW_OUTPUT*       pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_CONVERT_TILEINDEX_INPUT
+*
+*   @brief
+*       Input structure for AddrConvertTileIndex
+***************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINDEX_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    INT_32          tileIndex;          ///< Tile index
+    INT_32          macroModeIndex;     ///< Index in macro tile mode table if there is one (CI)
+    BOOL_32         tileInfoHw;         ///< Set to TRUE if client wants HW enum, otherwise actual
+} ADDR_CONVERT_TILEINDEX_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_CONVERT_TILEINDEX_OUTPUT
+*
+*   @brief
+*       Output structure for AddrConvertTileIndex
+***************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINDEX_OUTPUT
+{
+    UINT_32             size;           ///< Size of this structure in bytes
+
+    AddrTileMode        tileMode;       ///< Tile mode
+    AddrTileType        tileType;       ///< Tile type
+    ADDR_TILEINFO*      pTileInfo;      ///< Tile info
+
+} ADDR_CONVERT_TILEINDEX_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrConvertTileIndex
+*
+*   @brief
+*       Convert tile index to tile mode/type/info
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex(
+    ADDR_HANDLE                         hLib,
+    const ADDR_CONVERT_TILEINDEX_INPUT* pIn,
+    ADDR_CONVERT_TILEINDEX_OUTPUT*      pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_CONVERT_TILEINDEX1_INPUT
+*
+*   @brief
+*       Input structure for AddrConvertTileIndex1 (without macro mode index)
+***************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINDEX1_INPUT
+{
+    UINT_32         size;               ///< Size of this structure in bytes
+
+    INT_32          tileIndex;          ///< Tile index
+    UINT_32         bpp;                ///< Bits per pixel
+    UINT_32         numSamples;         ///< Number of samples
+    BOOL_32         tileInfoHw;         ///< Set to TRUE if client wants HW enum, otherwise actual
+} ADDR_CONVERT_TILEINDEX1_INPUT;
+
+/**
+***************************************************************************************************
+*   AddrConvertTileIndex1
+*
+*   @brief
+*       Convert tile index to tile mode/type/info
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1(
+    ADDR_HANDLE                             hLib,
+    const ADDR_CONVERT_TILEINDEX1_INPUT*    pIn,
+    ADDR_CONVERT_TILEINDEX_OUTPUT*          pOut);
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_GET_TILEINDEX_INPUT
+*
+*   @brief
+*       Input structure for AddrGetTileIndex
+***************************************************************************************************
+*/
+typedef struct _ADDR_GET_TILEINDEX_INPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    AddrTileMode    tileMode;       ///< Tile mode
+    AddrTileType    tileType;       ///< Tile-type: disp/non-disp/...
+    ADDR_TILEINFO*  pTileInfo;      ///< Pointer to tile-info structure, can be NULL for linear/1D
+} ADDR_GET_TILEINDEX_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_GET_TILEINDEX_OUTPUT
+*
+*   @brief
+*       Output structure for AddrGetTileIndex
+***************************************************************************************************
+*/
+typedef struct _ADDR_GET_TILEINDEX_OUTPUT
+{
+    UINT_32         size;           ///< Size of this structure in bytes
+
+    INT_32          index;          ///< index in table
+} ADDR_GET_TILEINDEX_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrGetTileIndex
+*
+*   @brief
+*       Get the tiling mode index in table
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex(
+    ADDR_HANDLE                     hLib,
+    const ADDR_GET_TILEINDEX_INPUT* pIn,
+    ADDR_GET_TILEINDEX_OUTPUT*      pOut);
+
+
+
+
+/**
+***************************************************************************************************
+*   ADDR_PRT_INFO_INPUT
+*
+*   @brief
+*       Input structure for AddrComputePrtInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_PRT_INFO_INPUT
+{
+    AddrFormat          format;        ///< Surface format
+    UINT_32             baseMipWidth;  ///< Base mipmap width
+    UINT_32             baseMipHeight; ///< Base mipmap height
+    UINT_32             baseMipDepth;  ///< Base mipmap depth
+    UINT_32             numFrags;      ///< Number of fragments,
+} ADDR_PRT_INFO_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_PRT_INFO_OUTPUT
+*
+*   @brief
+*       Input structure for AddrComputePrtInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_PRT_INFO_OUTPUT
+{
+    UINT_32             prtTileWidth;
+    UINT_32             prtTileHeight;
+} ADDR_PRT_INFO_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputePrtInfo
+*
+*   @brief
+*       Compute prt surface related information
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
+    ADDR_HANDLE                 hLib,
+    const ADDR_PRT_INFO_INPUT*  pIn,
+    ADDR_PRT_INFO_OUTPUT*       pOut);
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                                     DCC key functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   _ADDR_COMPUTE_DCCINFO_INPUT
+*
+*   @brief
+*       Input structure of AddrComputeDccInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_DCCINFO_INPUT
+{
+    UINT_32             size;            ///< Size of this structure in bytes
+    UINT_32             bpp;             ///< BitPP of color surface
+    UINT_32             numSamples;      ///< Sample number of color surface
+    UINT_64             colorSurfSize;   ///< Size of color surface to which dcc key is bound
+    AddrTileMode        tileMode;        ///< Tile mode of color surface
+    ADDR_TILEINFO       tileInfo;        ///< Tile info of color surface
+    UINT_32             tileSwizzle;     ///< Tile swizzle
+    INT_32              tileIndex;       ///< Tile index of color surface,
+                                         ///< MUST be -1 if you don't want to use it
+                                         ///< while the global useTileIndex is set to 1
+    INT_32              macroModeIndex;  ///< Index in macro tile mode table if there is one (CI)
+                                         ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_DCCINFO_INPUT;
+
+/**
+***************************************************************************************************
+*   ADDR_COMPUTE_DCCINFO_OUTPUT
+*
+*   @brief
+*       Output structure of AddrComputeDccInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_DCCINFO_OUTPUT
+{
+    UINT_32 size;                 ///< Size of this structure in bytes
+    UINT_64 dccRamBaseAlign;      ///< Base alignment of dcc key
+    UINT_64 dccRamSize;           ///< Size of dcc key
+    UINT_64 dccFastClearSize;     ///< Size of dcc key portion that can be fast cleared
+    BOOL_32 subLvlCompressible;   ///< whether sub resource is compressiable
+} ADDR_COMPUTE_DCCINFO_OUTPUT;
+
+/**
+***************************************************************************************************
+*   AddrComputeDccInfo
+*
+*   @brief
+*       Compute DCC key size, base alignment
+*       info
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
+    ADDR_HANDLE                             hLib,
+    const ADDR_COMPUTE_DCCINFO_INPUT*       pIn,
+    ADDR_COMPUTE_DCCINFO_OUTPUT*            pOut);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif // __ADDR_INTERFACE_H__
+
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/addrtypes.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/addrtypes.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/addrtypes.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/addrtypes.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,590 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  addrtypes.h
+* @brief Contains the helper function and constants
+***************************************************************************************************
+*/
+#ifndef __ADDR_TYPES_H__
+#define __ADDR_TYPES_H__
+
+#if defined(__APPLE__) || defined(TCORE_BUILD)
+// External definitions header maintained by Mac driver team (and TCORE team)
+// Helps address compilation issues & reduces code covered by NDA
+#include "addrExtDef.h"
+
+#else
+
+// Windows and/or Linux
+#if !defined(VOID)
+typedef void           VOID;
+#endif
+
+#if !defined(FLOAT)
+typedef float          FLOAT;
+#endif
+
+#if !defined(CHAR)
+typedef char           CHAR;
+#endif
+
+#if !defined(INT)
+typedef int            INT;
+#endif
+
+#include <stdarg.h> // va_list...etc need this header
+
+#endif // defined (__APPLE__)
+
+/**
+***************************************************************************************************
+*   Calling conventions
+***************************************************************************************************
+*/
+#ifndef ADDR_CDECL
+    #if defined(__GNUC__)
+        #define ADDR_CDECL __attribute__((cdecl))
+    #else
+        #define ADDR_CDECL __cdecl
+    #endif
+#endif
+
+#ifndef ADDR_STDCALL
+    #if defined(__GNUC__)
+        #if defined(__AMD64__)
+            #define ADDR_STDCALL
+        #else
+            #define ADDR_STDCALL __attribute__((stdcall))
+        #endif
+    #else
+        #define ADDR_STDCALL __stdcall
+    #endif
+#endif
+
+#ifndef ADDR_FASTCALL
+    #if defined(__GNUC__)
+        #define ADDR_FASTCALL __attribute__((regparm(0)))
+    #else
+        #define ADDR_FASTCALL __fastcall
+    #endif
+#endif
+
+#ifndef GC_CDECL
+    #define GC_CDECL  ADDR_CDECL
+#endif
+
+#ifndef GC_STDCALL
+    #define GC_STDCALL  ADDR_STDCALL
+#endif
+
+#ifndef GC_FASTCALL
+    #define GC_FASTCALL  ADDR_FASTCALL
+#endif
+
+
+#if defined(__GNUC__)
+    #define ADDR_INLINE static inline   // inline needs to be static to link
+#else
+    // win32, win64, other platforms
+    #define ADDR_INLINE   __inline
+#endif // #if defined(__GNUC__)
+
+#define ADDR_API ADDR_FASTCALL //default call convention is fast call
+
+/**
+***************************************************************************************************
+* Global defines used by other modules
+***************************************************************************************************
+*/
+#if !defined(TILEINDEX_INVALID)
+#define TILEINDEX_INVALID                -1
+#endif
+
+#if !defined(TILEINDEX_LINEAR_GENERAL)
+#define TILEINDEX_LINEAR_GENERAL         -2
+#endif
+
+#if !defined(TILEINDEX_LINEAR_ALIGNED)
+#define TILEINDEX_LINEAR_ALIGNED          8
+#endif
+
+/**
+***************************************************************************************************
+* Return codes
+***************************************************************************************************
+*/
+typedef enum _ADDR_E_RETURNCODE
+{
+    // General Return
+    ADDR_OK    = 0,
+    ADDR_ERROR = 1,
+
+    // Specific Errors
+    ADDR_OUTOFMEMORY,
+    ADDR_INVALIDPARAMS,
+    ADDR_NOTSUPPORTED,
+    ADDR_NOTIMPLEMENTED,
+    ADDR_PARAMSIZEMISMATCH,
+    ADDR_INVALIDGBREGVALUES,
+
+} ADDR_E_RETURNCODE;
+
+/**
+***************************************************************************************************
+* @brief
+*   Neutral enums that define tile modes for all H/W
+* @note
+*   R600/R800 tiling mode can be cast to hw enums directly but never cast into HW enum from
+*   ADDR_TM_2D_TILED_XTHICK
+*
+***************************************************************************************************
+*/
+typedef enum _AddrTileMode
+{
+    ADDR_TM_LINEAR_GENERAL      = 0,    ///< Least restrictions, pitch: multiple of 8 if not buffer
+    ADDR_TM_LINEAR_ALIGNED      = 1,    ///< Requests pitch or slice to be multiple of 64 pixels
+    ADDR_TM_1D_TILED_THIN1      = 2,    ///< Linear array of 8x8 tiles
+    ADDR_TM_1D_TILED_THICK      = 3,    ///< Linear array of 8x8x4 tiles
+    ADDR_TM_2D_TILED_THIN1      = 4,    ///< A set of macro tiles consist of 8x8 tiles
+    ADDR_TM_2D_TILED_THIN2      = 5,    ///< 600 HWL only, macro tile ratio is 1:4
+    ADDR_TM_2D_TILED_THIN4      = 6,    ///< 600 HWL only, macro tile ratio is 1:16
+    ADDR_TM_2D_TILED_THICK      = 7,    ///< A set of macro tiles consist of 8x8x4 tiles
+    ADDR_TM_2B_TILED_THIN1      = 8,    ///< 600 HWL only, with bank swap
+    ADDR_TM_2B_TILED_THIN2      = 9,    ///< 600 HWL only, with bank swap and ratio is 1:4
+    ADDR_TM_2B_TILED_THIN4      = 10,   ///< 600 HWL only, with bank swap and ratio is 1:16
+    ADDR_TM_2B_TILED_THICK      = 11,   ///< 600 HWL only, with bank swap, consists of 8x8x4 tiles
+    ADDR_TM_3D_TILED_THIN1      = 12,   ///< Macro tiling w/ pipe rotation between slices
+    ADDR_TM_3D_TILED_THICK      = 13,   ///< Macro tiling w/ pipe rotation bwtween slices, thick
+    ADDR_TM_3B_TILED_THIN1      = 14,   ///< 600 HWL only, with bank swap
+    ADDR_TM_3B_TILED_THICK      = 15,   ///< 600 HWL only, with bank swap, thick
+    ADDR_TM_2D_TILED_XTHICK     = 16,   ///< Tile is 8x8x8, valid from NI
+    ADDR_TM_3D_TILED_XTHICK     = 17,   ///< Tile is 8x8x8, valid from NI
+    ADDR_TM_POWER_SAVE          = 18,   ///< Power save mode, only used by KMD on NI
+    ADDR_TM_PRT_TILED_THIN1     = 19,   ///< No bank/pipe rotation or hashing beyond macrotile size
+    ADDR_TM_PRT_2D_TILED_THIN1  = 20,   ///< Same as 2D_TILED_THIN1, PRT only
+    ADDR_TM_PRT_3D_TILED_THIN1  = 21,   ///< Same as 3D_TILED_THIN1, PRT only
+    ADDR_TM_PRT_TILED_THICK     = 22,   ///< No bank/pipe rotation or hashing beyond macrotile size
+    ADDR_TM_PRT_2D_TILED_THICK  = 23,   ///< Same as 2D_TILED_THICK, PRT only
+    ADDR_TM_PRT_3D_TILED_THICK  = 24,   ///< Same as 3D_TILED_THICK, PRT only
+    ADDR_TM_COUNT               = 25,   ///< Must be the value of the last tile mode
+} AddrTileMode;
+
+/**
+***************************************************************************************************
+*   AddrFormat
+*
+*   @brief
+*       Neutral enum for SurfaceFormat
+*
+***************************************************************************************************
+*/
+typedef enum _AddrFormat {
+    ADDR_FMT_INVALID                              = 0x00000000,
+    ADDR_FMT_8                                    = 0x00000001,
+    ADDR_FMT_4_4                                  = 0x00000002,
+    ADDR_FMT_3_3_2                                = 0x00000003,
+    ADDR_FMT_RESERVED_4                           = 0x00000004,
+    ADDR_FMT_16                                   = 0x00000005,
+    ADDR_FMT_16_FLOAT                             = 0x00000006,
+    ADDR_FMT_8_8                                  = 0x00000007,
+    ADDR_FMT_5_6_5                                = 0x00000008,
+    ADDR_FMT_6_5_5                                = 0x00000009,
+    ADDR_FMT_1_5_5_5                              = 0x0000000a,
+    ADDR_FMT_4_4_4_4                              = 0x0000000b,
+    ADDR_FMT_5_5_5_1                              = 0x0000000c,
+    ADDR_FMT_32                                   = 0x0000000d,
+    ADDR_FMT_32_FLOAT                             = 0x0000000e,
+    ADDR_FMT_16_16                                = 0x0000000f,
+    ADDR_FMT_16_16_FLOAT                          = 0x00000010,
+    ADDR_FMT_8_24                                 = 0x00000011,
+    ADDR_FMT_8_24_FLOAT                           = 0x00000012,
+    ADDR_FMT_24_8                                 = 0x00000013,
+    ADDR_FMT_24_8_FLOAT                           = 0x00000014,
+    ADDR_FMT_10_11_11                             = 0x00000015,
+    ADDR_FMT_10_11_11_FLOAT                       = 0x00000016,
+    ADDR_FMT_11_11_10                             = 0x00000017,
+    ADDR_FMT_11_11_10_FLOAT                       = 0x00000018,
+    ADDR_FMT_2_10_10_10                           = 0x00000019,
+    ADDR_FMT_8_8_8_8                              = 0x0000001a,
+    ADDR_FMT_10_10_10_2                           = 0x0000001b,
+    ADDR_FMT_X24_8_32_FLOAT                       = 0x0000001c,
+    ADDR_FMT_32_32                                = 0x0000001d,
+    ADDR_FMT_32_32_FLOAT                          = 0x0000001e,
+    ADDR_FMT_16_16_16_16                          = 0x0000001f,
+    ADDR_FMT_16_16_16_16_FLOAT                    = 0x00000020,
+    ADDR_FMT_RESERVED_33                          = 0x00000021,
+    ADDR_FMT_32_32_32_32                          = 0x00000022,
+    ADDR_FMT_32_32_32_32_FLOAT                    = 0x00000023,
+    ADDR_FMT_RESERVED_36                          = 0x00000024,
+    ADDR_FMT_1                                    = 0x00000025,
+    ADDR_FMT_1_REVERSED                           = 0x00000026,
+    ADDR_FMT_GB_GR                                = 0x00000027,
+    ADDR_FMT_BG_RG                                = 0x00000028,
+    ADDR_FMT_32_AS_8                              = 0x00000029,
+    ADDR_FMT_32_AS_8_8                            = 0x0000002a,
+    ADDR_FMT_5_9_9_9_SHAREDEXP                    = 0x0000002b,
+    ADDR_FMT_8_8_8                                = 0x0000002c,
+    ADDR_FMT_16_16_16                             = 0x0000002d,
+    ADDR_FMT_16_16_16_FLOAT                       = 0x0000002e,
+    ADDR_FMT_32_32_32                             = 0x0000002f,
+    ADDR_FMT_32_32_32_FLOAT                       = 0x00000030,
+    ADDR_FMT_BC1                                  = 0x00000031,
+    ADDR_FMT_BC2                                  = 0x00000032,
+    ADDR_FMT_BC3                                  = 0x00000033,
+    ADDR_FMT_BC4                                  = 0x00000034,
+    ADDR_FMT_BC5                                  = 0x00000035,
+    ADDR_FMT_BC6                                  = 0x00000036,
+    ADDR_FMT_BC7                                  = 0x00000037,
+    ADDR_FMT_32_AS_32_32_32_32                    = 0x00000038,
+    ADDR_FMT_APC3                                 = 0x00000039,
+    ADDR_FMT_APC4                                 = 0x0000003a,
+    ADDR_FMT_APC5                                 = 0x0000003b,
+    ADDR_FMT_APC6                                 = 0x0000003c,
+    ADDR_FMT_APC7                                 = 0x0000003d,
+    ADDR_FMT_CTX1                                 = 0x0000003e,
+    ADDR_FMT_RESERVED_63                          = 0x0000003f,
+} AddrFormat;
+
+/**
+***************************************************************************************************
+*   AddrDepthFormat
+*
+*   @brief
+*       Neutral enum for addrFlt32ToDepthPixel
+*
+***************************************************************************************************
+*/
+typedef enum _AddrDepthFormat
+{
+    ADDR_DEPTH_INVALID                            = 0x00000000,
+    ADDR_DEPTH_16                                 = 0x00000001,
+    ADDR_DEPTH_X8_24                              = 0x00000002,
+    ADDR_DEPTH_8_24                               = 0x00000003,
+    ADDR_DEPTH_X8_24_FLOAT                        = 0x00000004,
+    ADDR_DEPTH_8_24_FLOAT                         = 0x00000005,
+    ADDR_DEPTH_32_FLOAT                           = 0x00000006,
+    ADDR_DEPTH_X24_8_32_FLOAT                     = 0x00000007,
+
+} AddrDepthFormat;
+
+/**
+***************************************************************************************************
+*   AddrColorFormat
+*
+*   @brief
+*       Neutral enum for ColorFormat
+*
+***************************************************************************************************
+*/
+typedef enum _AddrColorFormat
+{
+    ADDR_COLOR_INVALID                            = 0x00000000,
+    ADDR_COLOR_8                                  = 0x00000001,
+    ADDR_COLOR_4_4                                = 0x00000002,
+    ADDR_COLOR_3_3_2                              = 0x00000003,
+    ADDR_COLOR_RESERVED_4                         = 0x00000004,
+    ADDR_COLOR_16                                 = 0x00000005,
+    ADDR_COLOR_16_FLOAT                           = 0x00000006,
+    ADDR_COLOR_8_8                                = 0x00000007,
+    ADDR_COLOR_5_6_5                              = 0x00000008,
+    ADDR_COLOR_6_5_5                              = 0x00000009,
+    ADDR_COLOR_1_5_5_5                            = 0x0000000a,
+    ADDR_COLOR_4_4_4_4                            = 0x0000000b,
+    ADDR_COLOR_5_5_5_1                            = 0x0000000c,
+    ADDR_COLOR_32                                 = 0x0000000d,
+    ADDR_COLOR_32_FLOAT                           = 0x0000000e,
+    ADDR_COLOR_16_16                              = 0x0000000f,
+    ADDR_COLOR_16_16_FLOAT                        = 0x00000010,
+    ADDR_COLOR_8_24                               = 0x00000011,
+    ADDR_COLOR_8_24_FLOAT                         = 0x00000012,
+    ADDR_COLOR_24_8                               = 0x00000013,
+    ADDR_COLOR_24_8_FLOAT                         = 0x00000014,
+    ADDR_COLOR_10_11_11                           = 0x00000015,
+    ADDR_COLOR_10_11_11_FLOAT                     = 0x00000016,
+    ADDR_COLOR_11_11_10                           = 0x00000017,
+    ADDR_COLOR_11_11_10_FLOAT                     = 0x00000018,
+    ADDR_COLOR_2_10_10_10                         = 0x00000019,
+    ADDR_COLOR_8_8_8_8                            = 0x0000001a,
+    ADDR_COLOR_10_10_10_2                         = 0x0000001b,
+    ADDR_COLOR_X24_8_32_FLOAT                     = 0x0000001c,
+    ADDR_COLOR_32_32                              = 0x0000001d,
+    ADDR_COLOR_32_32_FLOAT                        = 0x0000001e,
+    ADDR_COLOR_16_16_16_16                        = 0x0000001f,
+    ADDR_COLOR_16_16_16_16_FLOAT                  = 0x00000020,
+    ADDR_COLOR_RESERVED_33                        = 0x00000021,
+    ADDR_COLOR_32_32_32_32                        = 0x00000022,
+    ADDR_COLOR_32_32_32_32_FLOAT                  = 0x00000023,
+} AddrColorFormat;
+
+/**
+***************************************************************************************************
+*   AddrSurfaceNumber
+*
+*   @brief
+*       Neutral enum for SurfaceNumber
+*
+***************************************************************************************************
+*/
+typedef enum _AddrSurfaceNumber {
+    ADDR_NUMBER_UNORM                             = 0x00000000,
+    ADDR_NUMBER_SNORM                             = 0x00000001,
+    ADDR_NUMBER_USCALED                           = 0x00000002,
+    ADDR_NUMBER_SSCALED                           = 0x00000003,
+    ADDR_NUMBER_UINT                              = 0x00000004,
+    ADDR_NUMBER_SINT                              = 0x00000005,
+    ADDR_NUMBER_SRGB                              = 0x00000006,
+    ADDR_NUMBER_FLOAT                             = 0x00000007,
+} AddrSurfaceNumber;
+
+/**
+***************************************************************************************************
+*   AddrSurfaceSwap
+*
+*   @brief
+*       Neutral enum for SurfaceSwap
+*
+***************************************************************************************************
+*/
+typedef enum _AddrSurfaceSwap {
+    ADDR_SWAP_STD                                 = 0x00000000,
+    ADDR_SWAP_ALT                                 = 0x00000001,
+    ADDR_SWAP_STD_REV                             = 0x00000002,
+    ADDR_SWAP_ALT_REV                             = 0x00000003,
+} AddrSurfaceSwap;
+
+/**
+***************************************************************************************************
+*   AddrHtileBlockSize
+*
+*   @brief
+*       Size of HTILE blocks, valid values are 4 or 8 for now
+***************************************************************************************************
+*/
+typedef enum _AddrHtileBlockSize
+{
+    ADDR_HTILE_BLOCKSIZE_4 = 4,
+    ADDR_HTILE_BLOCKSIZE_8 = 8,
+} AddrHtileBlockSize;
+
+
+/**
+***************************************************************************************************
+*   AddrPipeCfg
+*
+*   @brief
+*       The pipe configuration field specifies both the number of pipes and
+*       how pipes are interleaved on the surface.
+*       The expression of number of pipes, the shader engine tile size, and packer tile size
+*       is encoded in a PIPE_CONFIG register field.
+*       In general the number of pipes usually matches the number of memory channels of the
+*       hardware configuration.
+*       For hw configurations w/ non-pow2 memory number of memory channels, it usually matches
+*       the number of ROP units(? TODO: which registers??)
+*       The enum value = hw enum + 1 which is to reserve 0 for requesting default.
+***************************************************************************************************
+*/
+typedef enum _AddrPipeCfg
+{
+    ADDR_PIPECFG_INVALID         = 0,
+    ADDR_PIPECFG_P2              = 1, /// 2 pipes,
+    ADDR_PIPECFG_P4_8x16         = 5, /// 4 pipes,
+    ADDR_PIPECFG_P4_16x16        = 6,
+    ADDR_PIPECFG_P4_16x32        = 7,
+    ADDR_PIPECFG_P4_32x32        = 8,
+    ADDR_PIPECFG_P8_16x16_8x16   = 9, /// 8 pipes
+    ADDR_PIPECFG_P8_16x32_8x16   = 10,
+    ADDR_PIPECFG_P8_32x32_8x16   = 11,
+    ADDR_PIPECFG_P8_16x32_16x16  = 12,
+    ADDR_PIPECFG_P8_32x32_16x16  = 13,
+    ADDR_PIPECFG_P8_32x32_16x32  = 14,
+    ADDR_PIPECFG_P8_32x64_32x32  = 15,
+    ADDR_PIPECFG_P16_32x32_8x16  = 17, /// 16 pipes
+    ADDR_PIPECFG_P16_32x32_16x16 = 18,
+    ADDR_PIPECFG_MAX             = 19,
+} AddrPipeCfg;
+
+/**
+***************************************************************************************************
+* AddrTileType
+*
+*   @brief
+*       Neutral enums that specifies micro tile type (MICRO_TILE_MODE)
+***************************************************************************************************
+*/
+typedef enum _AddrTileType
+{
+    ADDR_DISPLAYABLE        = 0,    ///< Displayable tiling
+    ADDR_NON_DISPLAYABLE    = 1,    ///< Non-displayable tiling, a.k.a thin micro tiling
+    ADDR_DEPTH_SAMPLE_ORDER = 2,    ///< Same as non-displayable plus depth-sample-order
+    ADDR_ROTATED            = 3,    ///< Rotated displayable tiling
+    ADDR_THICK              = 4,    ///< Thick micro-tiling, only valid for THICK and XTHICK
+} AddrTileType;
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//  Type definitions: short system-independent names for address library types
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#if !defined(__APPLE__)
+
+#ifndef BOOL_32        // no bool type in C
+/// @brief Boolean type, since none is defined in C
+/// @ingroup type
+#define BOOL_32 int
+#endif
+
+#ifndef INT_32
+#define INT_32  int
+#endif
+
+#ifndef UINT_32
+#define UINT_32 unsigned int
+#endif
+
+#ifndef INT_16
+#define INT_16  short
+#endif
+
+#ifndef UINT_16
+#define UINT_16 unsigned short
+#endif
+
+#ifndef INT_8
+#define INT_8   char
+#endif
+
+#ifndef UINT_8
+#define UINT_8  unsigned char
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+//
+//  64-bit integer types depend on the compiler
+//
+#if defined( __GNUC__ ) || defined( __WATCOMC__ )
+#define INT_64   long long
+#define UINT_64  unsigned long long
+
+#elif defined( _WIN32 )
+#define INT_64   __int64
+#define UINT_64  unsigned __int64
+
+#else
+#error Unsupported compiler and/or operating system for 64-bit integers
+
+/// @brief 64-bit signed integer type (compiler dependent)
+/// @ingroup type
+///
+/// The addrlib defines a 64-bit signed integer type for either
+/// Gnu/Watcom compilers (which use the first syntax) or for
+/// the Windows VCC compiler (which uses the second syntax).
+#define INT_64  long long OR __int64
+
+/// @brief 64-bit unsigned integer type (compiler dependent)
+/// @ingroup type
+///
+/// The addrlib defines a 64-bit unsigned integer type for either
+/// Gnu/Watcom compilers (which use the first syntax) or for
+/// the Windows VCC compiler (which uses the second syntax).
+///
+#define UINT_64  unsigned long long OR unsigned __int64
+#endif
+
+#endif // #if !defined(__APPLE__)
+
+//  ADDR64X is used to print addresses in hex form on both Windows and Linux
+//
+#if defined( __GNUC__ ) || defined( __WATCOMC__ )
+#define ADDR64X "llx"
+#define ADDR64D "lld"
+
+#elif defined( _WIN32 )
+#define ADDR64X "I64x"
+#define ADDR64D "I64d"
+
+#else
+#error Unsupported compiler and/or operating system for 64-bit integers
+
+/// @brief Addrlib device address 64-bit printf tag  (compiler dependent)
+/// @ingroup type
+///
+/// This allows printf to display an ADDR_64 for either the Windows VCC compiler
+/// (which used this value) or the Gnu/Watcom compilers (which use "llx".
+/// An example of use is printf("addr 0x%"ADDR64X"\n", address);
+///
+#define ADDR64X "llx" OR "I64x"
+#define ADDR64D "lld" OR "I64d"
+#endif
+
+
+/// @brief Union for storing a 32-bit float or 32-bit integer
+/// @ingroup type
+///
+/// This union provides a simple way to convert between a 32-bit float
+/// and a 32-bit integer. It also prevents the compiler from producing
+/// code that alters NaN values when assiging or coying floats.
+/// Therefore, all address library routines that pass or return 32-bit
+/// floating point data do so by passing or returning a FLT_32.
+///
+typedef union {
+    INT_32   i;
+    UINT_32  u;
+    float    f;
+} ADDR_FLT_32;
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+//  Macros for controlling linking and building on multiple systems
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#if defined(_MSC_VER)
+#if defined(va_copy)
+#undef va_copy  //redefine va_copy to support VC2013
+#endif
+#endif
+
+#if !defined(va_copy)
+#define va_copy(dst, src) \
+    ((void) memcpy(&(dst), &(src), sizeof(va_list)))
+#endif
+
+#endif // __ADDR_TYPES_H__
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrcommon.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrcommon.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrcommon.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrcommon.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,558 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  addrcommon.h
+* @brief Contains the helper function and constants
+***************************************************************************************************
+*/
+
+#ifndef __ADDR_COMMON_H__
+#define __ADDR_COMMON_H__
+
+#include "addrinterface.h"
+
+
+// ADDR_LNX_KERNEL_BUILD is for internal build
+// Moved from addrinterface.h so __KERNEL__ is not needed any more
+#if ADDR_LNX_KERNEL_BUILD // || (defined(__GNUC__) && defined(__KERNEL__))
+    #include "lnx_common_defs.h" // ported from cmmqs
+#elif !defined(__APPLE__)
+    #include <stdlib.h>
+    #include <string.h>
+#endif
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Common constants
+///////////////////////////////////////////////////////////////////////////////////////////////////
+static const UINT_32 MicroTileWidth      = 8;       ///< Micro tile width, for 1D and 2D tiling
+static const UINT_32 MicroTileHeight     = 8;       ///< Micro tile height, for 1D and 2D tiling
+static const UINT_32 ThickTileThickness  = 4;       ///< Micro tile thickness, for THICK modes
+static const UINT_32 XThickTileThickness = 8;       ///< Extra thick tiling thickness
+static const UINT_32 PowerSaveTileBytes  = 64;      ///< Nuber of bytes per tile for power save 64
+static const UINT_32 CmaskCacheBits      = 1024;    ///< Number of bits for CMASK cache
+static const UINT_32 CmaskElemBits       = 4;       ///< Number of bits for CMASK element
+static const UINT_32 HtileCacheBits      = 16384;   ///< Number of bits for HTILE cache 512*32
+
+static const UINT_32 MicroTilePixels     = MicroTileWidth * MicroTileHeight;
+
+static const INT_32 TileIndexInvalid        = TILEINDEX_INVALID;
+static const INT_32 TileIndexLinearGeneral  = TILEINDEX_LINEAR_GENERAL;
+static const INT_32 TileIndexNoMacroIndex   = -3;
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Common macros
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#define BITS_PER_BYTE 8
+#define BITS_TO_BYTES(x) ( ((x) + (BITS_PER_BYTE-1)) / BITS_PER_BYTE )
+#define BYTES_TO_BITS(x) ( (x) * BITS_PER_BYTE )
+
+/// Helper macros to select a single bit from an int (undefined later in section)
+#define _BIT(v,b)      (((v) >> (b) ) & 1)
+
+/**
+***************************************************************************************************
+* @brief Enums to identify AddrLib type
+***************************************************************************************************
+*/
+enum AddrLibClass
+{
+    BASE_ADDRLIB = 0x0,
+    R600_ADDRLIB = 0x6,
+    R800_ADDRLIB = 0x8,
+    SI_ADDRLIB   = 0xa,
+    CI_ADDRLIB   = 0xb,
+};
+
+/**
+***************************************************************************************************
+* AddrChipFamily
+*
+*   @brief
+*       Neutral enums that specifies chip family.
+*
+***************************************************************************************************
+*/
+enum AddrChipFamily
+{
+    ADDR_CHIP_FAMILY_IVLD,    ///< Invalid family
+    ADDR_CHIP_FAMILY_R6XX,
+    ADDR_CHIP_FAMILY_R7XX,
+    ADDR_CHIP_FAMILY_R8XX,
+    ADDR_CHIP_FAMILY_NI,
+    ADDR_CHIP_FAMILY_SI,
+    ADDR_CHIP_FAMILY_CI,
+    ADDR_CHIP_FAMILY_VI,
+};
+
+/**
+***************************************************************************************************
+* ADDR_CONFIG_FLAGS
+*
+*   @brief
+*       This structure is used to set addr configuration flags.
+***************************************************************************************************
+*/
+union ADDR_CONFIG_FLAGS
+{
+    struct
+    {
+        /// Clients do not need to set these flags except forceLinearAligned.
+        /// There flags are set up by AddrLib inside thru AddrInitGlobalParamsFromRegister
+        UINT_32 optimalBankSwap        : 1;    ///< New bank tiling for RV770 only
+        UINT_32 noCubeMipSlicesPad     : 1;    ///< Disables faces padding for cubemap mipmaps
+        UINT_32 fillSizeFields         : 1;    ///< If clients fill size fields in all input and
+                                               ///  output structure
+        UINT_32 ignoreTileInfo         : 1;    ///< Don't use tile info structure
+        UINT_32 useTileIndex           : 1;    ///< Make tileIndex field in input valid
+        UINT_32 useCombinedSwizzle     : 1;    ///< Use combined swizzle
+        UINT_32 checkLast2DLevel       : 1;    ///< Check the last 2D mip sub level
+        UINT_32 useHtileSliceAlign     : 1;    ///< Do htile single slice alignment
+        UINT_32 degradeBaseLevel       : 1;    ///< Degrade to 1D modes automatically for base level
+        UINT_32 allowLargeThickTile    : 1;    ///< Allow 64*thickness*bytesPerPixel > rowSize
+        UINT_32 reserved               : 22;   ///< Reserved bits for future use
+    };
+
+    UINT_32 value;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Platform specific debug break defines
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#if DEBUG
+    #if defined(__GNUC__)
+        #define ADDR_DBG_BREAK()
+    #elif defined(__APPLE__)
+        #define ADDR_DBG_BREAK()    { IOPanic("");}
+    #else
+        #define ADDR_DBG_BREAK()    { __debugbreak(); }
+    #endif
+#else
+    #define ADDR_DBG_BREAK()
+#endif
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Debug assertions used in AddrLib
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#if DEBUG
+#define ADDR_ASSERT(__e) if ( !((__e) ? TRUE : FALSE)) { ADDR_DBG_BREAK(); }
+#define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK()
+#define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case")
+#define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented");
+#else //DEBUG
+#define ADDR_ASSERT(__e)
+#define ADDR_ASSERT_ALWAYS()
+#define ADDR_UNHANDLED_CASE()
+#define ADDR_NOT_IMPLEMENTED()
+#endif //DEBUG
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Debug print macro from legacy address library
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#if DEBUG
+
+#define ADDR_PRNT(a)    AddrObject::DebugPrint a
+
+/// @brief Macro for reporting informational messages
+/// @ingroup util
+///
+/// This macro optionally prints an informational message to stdout.
+/// The first parameter is a condition -- if it is true, nothing is done.
+/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
+/// starting with a string. This is passed to printf() or an equivalent
+/// in order to format the informational message. For example,
+/// ADDR_INFO(0, ("test %d",3) ); prints out "test 3".
+///
+#define ADDR_INFO(cond, a)         \
+{ if (!(cond)) { ADDR_PRNT(a); } }
+
+
+/// @brief Macro for reporting error warning messages
+/// @ingroup util
+///
+/// This macro optionally prints an error warning message to stdout,
+/// followed by the file name and line number where the macro was called.
+/// The first parameter is a condition -- if it is true, nothing is done.
+/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
+/// starting with a string. This is passed to printf() or an equivalent
+/// in order to format the informational message. For example,
+/// ADDR_WARN(0, ("test %d",3) ); prints out "test 3" followed by
+/// a second line with the file name and line number.
+///
+#define ADDR_WARN(cond, a)         \
+{ if (!(cond))                     \
+  { ADDR_PRNT(a);                  \
+    ADDR_PRNT(("  WARNING in file %s, line %d\n", __FILE__, __LINE__)); \
+} }
+
+
+/// @brief Macro for reporting fatal error conditions
+/// @ingroup util
+///
+/// This macro optionally stops execution of the current routine
+/// after printing an error warning message to stdout,
+/// followed by the file name and line number where the macro was called.
+/// The first parameter is a condition -- if it is true, nothing is done.
+/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
+/// starting with a string. This is passed to printf() or an equivalent
+/// in order to format the informational message. For example,
+/// ADDR_EXIT(0, ("test %d",3) ); prints out "test 3" followed by
+/// a second line with the file name and line number, then stops execution.
+///
+#define ADDR_EXIT(cond, a)         \
+{ if (!(cond))                     \
+  { ADDR_PRNT(a); ADDR_DBG_BREAK();\
+} }
+
+#else // DEBUG
+
+#define ADDRDPF 1 ? (void)0 : (void)
+
+#define ADDR_PRNT(a)
+
+#define ADDR_DBG_BREAK()
+
+#define ADDR_INFO(cond, a)
+
+#define ADDR_WARN(cond, a)
+
+#define ADDR_EXIT(cond, a)
+
+#endif // DEBUG
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Misc helper functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrXorReduce
+*
+*   @brief
+*       Xor the right-side numberOfBits bits of x.
+***************************************************************************************************
+*/
+static inline UINT_32 XorReduce(
+    UINT_32 x,
+    UINT_32 numberOfBits)
+{
+    UINT_32 i;
+    UINT_32 result = x & 1;
+
+    for (i=1; i<numberOfBits; i++)
+    {
+        result ^= ((x>>i) & 1);
+    }
+
+    return result;
+}
+
+/**
+***************************************************************************************************
+*   IsPow2
+*
+*   @brief
+*       Check if the size (UINT_32) is pow 2
+***************************************************************************************************
+*/
+static inline UINT_32 IsPow2(
+    UINT_32 dim)        ///< [in] dimension of miplevel
+{
+    ADDR_ASSERT(dim > 0);
+    return !(dim & (dim - 1));
+}
+
+/**
+***************************************************************************************************
+*   IsPow2
+*
+*   @brief
+*       Check if the size (UINT_64) is pow 2
+***************************************************************************************************
+*/
+static inline UINT_64 IsPow2(
+    UINT_64 dim)        ///< [in] dimension of miplevel
+{
+    ADDR_ASSERT(dim > 0);
+    return !(dim & (dim - 1));
+}
+
+/**
+***************************************************************************************************
+*   ByteAlign
+*
+*   @brief
+*       Align UINT_32 "x" to "align" alignment, "align" should be power of 2
+***************************************************************************************************
+*/
+static inline UINT_32 PowTwoAlign(
+    UINT_32 x,
+    UINT_32 align)
+{
+    //
+    // Assert that x is a power of two.
+    //
+    ADDR_ASSERT(IsPow2(align));
+    return (x + (align - 1)) & (~(align - 1));
+}
+
+/**
+***************************************************************************************************
+*   ByteAlign
+*
+*   @brief
+*       Align UINT_64 "x" to "align" alignment, "align" should be power of 2
+***************************************************************************************************
+*/
+static inline UINT_64 PowTwoAlign(
+    UINT_64 x,
+    UINT_64 align)
+{
+    //
+    // Assert that x is a power of two.
+    //
+    ADDR_ASSERT(IsPow2(align));
+    return (x + (align - 1)) & (~(align - 1));
+}
+
+/**
+***************************************************************************************************
+*   Min
+*
+*   @brief
+*       Get the min value between two unsigned values
+***************************************************************************************************
+*/
+static inline UINT_32 Min(
+    UINT_32 value1,
+    UINT_32 value2)
+{
+    return ((value1 < (value2)) ? (value1) : value2);
+}
+
+/**
+***************************************************************************************************
+*   Min
+*
+*   @brief
+*       Get the min value between two signed values
+***************************************************************************************************
+*/
+static inline INT_32 Min(
+    INT_32 value1,
+    INT_32 value2)
+{
+    return ((value1 < (value2)) ? (value1) : value2);
+}
+
+/**
+***************************************************************************************************
+*   Max
+*
+*   @brief
+*       Get the max value between two unsigned values
+***************************************************************************************************
+*/
+static inline UINT_32 Max(
+    UINT_32 value1,
+    UINT_32 value2)
+{
+    return ((value1 > (value2)) ? (value1) : value2);
+}
+
+/**
+***************************************************************************************************
+*   Max
+*
+*   @brief
+*       Get the max value between two signed values
+***************************************************************************************************
+*/
+static inline INT_32 Max(
+    INT_32 value1,
+    INT_32 value2)
+{
+    return ((value1 > (value2)) ? (value1) : value2);
+}
+
+/**
+***************************************************************************************************
+*   NextPow2
+*
+*   @brief
+*       Compute the mipmap's next level dim size
+***************************************************************************************************
+*/
+static inline UINT_32 NextPow2(
+    UINT_32 dim)        ///< [in] dimension of miplevel
+{
+    UINT_32 newDim;
+
+    newDim = 1;
+
+    if (dim > 0x7fffffff)
+    {
+        ADDR_ASSERT_ALWAYS();
+        newDim = 0x80000000;
+    }
+    else
+    {
+        while (newDim < dim)
+        {
+            newDim <<= 1;
+        }
+    }
+
+    return newDim;
+}
+
+/**
+***************************************************************************************************
+*   Log2
+*
+*   @brief
+*       Compute log of base 2
+***************************************************************************************************
+*/
+static inline UINT_32 Log2(
+    UINT_32 x)      ///< [in] the value should calculate log based 2
+{
+    UINT_32 y;
+
+    //
+    // Assert that x is a power of two.
+    //
+    ADDR_ASSERT(IsPow2(x));
+
+    y = 0;
+    while (x > 1)
+    {
+        x >>= 1;
+        y++;
+    }
+
+    return y;
+}
+
+/**
+***************************************************************************************************
+*   QLog2
+*
+*   @brief
+*       Compute log of base 2 quickly (<= 16)
+***************************************************************************************************
+*/
+static inline UINT_32 QLog2(
+    UINT_32 x)      ///< [in] the value should calculate log based 2
+{
+    ADDR_ASSERT(x <= 16);
+
+    UINT_32 y = 0;
+
+    switch (x)
+    {
+        case 1:
+            y = 0;
+            break;
+        case 2:
+            y = 1;
+            break;
+        case 4:
+            y = 2;
+            break;
+        case 8:
+            y = 3;
+            break;
+        case 16:
+            y = 4;
+            break;
+        default:
+            ADDR_ASSERT_ALWAYS();
+    }
+
+    return y;
+}
+
+/**
+***************************************************************************************************
+*   SafeAssign
+*
+*   @brief
+*       NULL pointer safe assignment
+***************************************************************************************************
+*/
+static inline VOID SafeAssign(
+    UINT_32*    pLVal,  ///< [in] Pointer to left val
+    UINT_32     rVal)   ///< [in] Right value
+{
+    if (pLVal)
+    {
+        *pLVal = rVal;
+    }
+}
+
+/**
+***************************************************************************************************
+*   SafeAssign
+*
+*   @brief
+*       NULL pointer safe assignment for 64bit values
+***************************************************************************************************
+*/
+static inline VOID SafeAssign(
+    UINT_64*    pLVal,  ///< [in] Pointer to left val
+    UINT_64     rVal)   ///< [in] Right value
+{
+    if (pLVal)
+    {
+        *pLVal = rVal;
+    }
+}
+
+/**
+***************************************************************************************************
+*   SafeAssign
+*
+*   @brief
+*       NULL pointer safe assignment for AddrTileMode
+***************************************************************************************************
+*/
+static inline VOID SafeAssign(
+    AddrTileMode*    pLVal, ///< [in] Pointer to left val
+    AddrTileMode     rVal)  ///< [in] Right value
+{
+    if (pLVal)
+    {
+        *pLVal = rVal;
+    }
+}
+
+#endif // __ADDR_COMMON_H__
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,1674 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  addrelemlib.cpp
+* @brief Contains the class implementation for element/pixel related functions
+***************************************************************************************************
+*/
+
+#include "addrelemlib.h"
+#include "addrlib.h"
+
+
+/**
+***************************************************************************************************
+*   AddrElemLib::AddrElemLib
+*
+*   @brief
+*       constructor
+*
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+AddrElemLib::AddrElemLib(
+    AddrLib* const pAddrLib) :  ///< [in] Parent addrlib instance pointer
+    AddrObject(pAddrLib->GetClient()),
+    m_pAddrLib(pAddrLib)
+{
+    switch (m_pAddrLib->GetAddrChipFamily())
+    {
+        case ADDR_CHIP_FAMILY_R6XX:
+            m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
+            m_fp16ExportNorm = 0;
+            break;
+        case ADDR_CHIP_FAMILY_R7XX:
+            m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
+            m_fp16ExportNorm = 1;
+            break;
+        case ADDR_CHIP_FAMILY_R8XX:
+        case ADDR_CHIP_FAMILY_NI: // Same as 8xx
+            m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
+            m_fp16ExportNorm = 1;
+            break;
+        default:
+            m_fp16ExportNorm = 1;
+            m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
+    }
+
+    m_configFlags.value = 0;
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::~AddrElemLib
+*
+*   @brief
+*       destructor
+*
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+AddrElemLib::~AddrElemLib()
+{
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::Create
+*
+*   @brief
+*       Creates and initializes AddrLib object.
+*
+*   @return
+*       Returns point to ADDR_CREATEINFO if successful.
+***************************************************************************************************
+*/
+AddrElemLib* AddrElemLib::Create(
+    const AddrLib* const        pAddrLib)   ///< [in] Pointer of parent AddrLib instance
+{
+    AddrElemLib* pElemLib = NULL;
+
+    if (pAddrLib)
+    {
+        pElemLib = new(pAddrLib->GetClient()) AddrElemLib(const_cast<AddrLib* const>(pAddrLib));
+    }
+
+    return pElemLib;
+}
+
+/**************************************************************************************************
+*   AddrElemLib::Flt32sToInt32s
+*
+*   @brief
+*       Convert a ADDR_FLT_32 value to Int32 value
+*
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::Flt32sToInt32s(
+    ADDR_FLT_32     value,      ///< [in] ADDR_FLT_32 value
+    UINT_32         bits,       ///< [in] nubmer of bits in value
+    AddrNumberType  numberType, ///< [in] the type of number
+    UINT_32*        pResult)    ///< [out] Int32 value
+{
+    UINT_8 round = 128;    //ADDR_ROUND_BY_HALF
+    UINT_32 uscale;
+    UINT_32 sign;
+
+    //convert each component to an INT_32
+    switch ( numberType )
+    {
+        case ADDR_NO_NUMBER:    //fall through
+        case ADDR_ZERO:         //fall through
+        case ADDR_ONE:          //fall through
+        case ADDR_EPSILON:      //fall through
+            return;        // these are zero-bit components, so don't set result
+
+        case ADDR_UINT_BITS:            // unsigned integer bit field, clamped to range
+            uscale = (1<<bits) - 1;
+            if (bits == 32)               // special case unsigned 32-bit int
+            {
+                *pResult = value.i;
+            }
+            else
+            {
+                if ((value.i < 0) || (value.u > uscale))
+                {
+                    *pResult = uscale;
+                }
+                else
+                {
+                    *pResult = value.i;
+                }
+                return;
+            }
+
+        // The algorithm used in the DB and TX differs at one value for 24-bit unorms
+        case ADDR_UNORM_R6XXDB:        // unsigned repeating fraction
+            if ((bits==24) && (value.i == 0x33000000))
+            {
+                *pResult = 1;
+                return;
+            }              // Else treat like ADDR_UNORM_R6XX
+
+        case ADDR_UNORM_R6XX:            // unsigned repeating fraction
+            if (value.f <= 0)
+            {
+                *pResult = 0;            // first clamp to [0..1]
+            }
+            else
+            {
+                if (value.f >= 1)
+                {
+                     *pResult = (1<<bits) - 1;
+                }
+                else
+                {
+                    if ((value.i | 0x87FFFFFF) == 0xFFFFFFFF)
+                    {
+                        *pResult = 0;                        // NaN, so force to 0
+                    }
+
+                    #if 0 // floating point version for documentation
+                    else
+                    {
+                        FLOAT f = value.f * ((1<<bits) - 1);
+                        *pResult = static_cast<INT_32>(f + (round/256.0f));
+                    }
+                    #endif
+                    else
+                    {
+                        ADDR_FLT_32 scaled;
+                        ADDR_FLT_32 shifted;
+                        UINT_64 truncated, rounded;
+                        UINT_32 altShift;
+                        UINT_32 mask = (1 << bits) - 1;
+                        UINT_32 half = 1 << (bits - 1);
+                        UINT_32 mant24 = (value.i & 0x7FFFFF) + 0x800000;
+                        UINT_64 temp = mant24 - (mant24>>bits) -
+                            static_cast<INT_32>((mant24 & mask) > half);
+                        UINT_32 exp8 = value.i >> 23;
+                        UINT_32 shift = 126 - exp8 + 24 - bits;
+                        UINT_64 final;
+
+                        if (shift >= 32) // This is zero, even with maximum dither add
+                        {
+                            final = 0;
+                        }
+                        else
+                        {
+                            final = ((temp<<8) + (static_cast<UINT_64>(round)<<shift)) >> (shift+8);
+                        }
+                        //ADDR_EXIT( *pResult == final,
+                        //    ("Float %x converted to %d-bit Unorm %x != bitwise %x",
+                        //     value.u, bits, (UINT_32)*pResult, (UINT_32)final) );
+                        if (final > mask)
+                        {
+                            final = mask;
+                        }
+
+                        scaled.f  = value.f * ((1<<bits) - 1);
+                        shifted.f = (scaled.f * 256);
+                        truncated = ((shifted.i&0x7FFFFF) + (INT_64)0x800000) << 8;
+                        altShift  = 126 + 24 + 8 - ((shifted.i>>23)&0xFF);
+                        truncated = (altShift > 60) ? 0 : truncated >> altShift;
+                        rounded   = static_cast<INT_32>((round + truncated) >> 8);
+                        //if (rounded > ((1<<bits) - 1))
+                        //    rounded = ((1<<bits) - 1);
+                        *pResult = static_cast<INT_32>(rounded); //(INT_32)final;
+                    }
+                }
+            }
+
+            return;
+
+        case ADDR_S8FLOAT32:    // 32-bit IEEE float, passes through NaN values
+            *pResult = value.i;
+            return;
+
+        // @@ FIX ROUNDING in this code, fix the denorm case
+        case ADDR_U4FLOATC:         // Unsigned float, 4-bit exponent. bias 15, clamped [0..1]
+            sign = (value.i >> 31) & 1;
+            if ((value.i&0x7F800000) == 0x7F800000)    // If NaN or INF:
+            {
+                if ((value.i&0x007FFFFF) != 0)             // then if NaN
+                {
+                    *pResult = 0;                       // return 0
+                }
+                else
+                {
+                    *pResult = (sign)?0:0xF00000;           // else +INF->+1, -INF->0
+                }
+                return;
+            }
+            if (value.f <= 0)
+            {
+                *pResult = 0;
+            }
+            else
+            {
+                if (value.f>=1)
+                {
+                    *pResult = 0xF << (bits-4);
+                }
+                else
+                {
+                    if ((value.i>>23) > 112 )
+                    {
+                        // 24-bit float: normalized
+                        // value.i += 1 << (22-bits+4);
+                        // round the IEEE mantissa to mantissa size
+                        // @@ NOTE: add code to support rounding
+                        value.u &= 0x7FFFFFF;             // mask off high 4 exponent bits
+                        *pResult = value.i >> (23-bits+4);// shift off unused mantissa bits
+                    }
+                    else
+                    {
+                        // 24-bit float: denormalized
+                        value.f = value.f / (1<<28) / (1<<28);
+                        value.f = value.f / (1<<28) / (1<<28);    // convert to IEEE denorm
+                        // value.i += 1 << (22-bits+4);
+                        // round the IEEE mantissa to mantissa size
+                        // @@ NOTE: add code to support rounding
+                        *pResult = value.i >> (23-bits+4);    // shift off unused mantissa bits
+                    }
+                }
+            }
+
+            return;
+
+        default:                    // invalid number mode
+            //ADDR_EXIT(0, ("Invalid AddrNumber %d", numberType) );
+            break;
+
+    }
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::Int32sToPixel
+*
+*   @brief
+*       Pack 32-bit integer values into an uncompressed pixel,
+*       in the proper order
+*
+*   @return
+*       N/A
+*
+*   @note
+*       This entry point packes four 32-bit integer values into
+*       an uncompressed pixel. The pixel values are specifies in
+*       standard order, e.g. depth/stencil. This routine asserts
+*       if called on compressed pixel.
+***************************************************************************************************
+*/
+VOID AddrElemLib::Int32sToPixel(
+    UINT_32              numComps,      ///< [in] number of components
+    UINT_32*             pComps,        ///< [in] compnents
+    UINT_32*             pCompBits,     ///< [in] total bits in each component
+    UINT_32*             pCompStart,    ///< [in] the first bit position of each component
+    ADDR_COMPONENT_FLAGS properties,    ///< [in] properties about byteAligned, exportNorm
+    UINT_32              resultBits,    ///< [in] result bits: total bpp after decompression
+    UINT_8*              pPixel)        ///< [out] a depth/stencil pixel value
+{
+    UINT_32 i;
+    UINT_32 j;
+    UINT_32 start;
+    UINT_32 size;
+    UINT_32 byte;
+    UINT_32 value = 0;
+    UINT_32 compMask;
+    UINT_32 elemMask=0;
+    UINT_32 elementXor = 0;  // address xor when reading bytes from elements
+
+
+    // @@ NOTE: assert if called on a compressed format!
+
+    if (properties.byteAligned)    // Components are all byte-sized
+    {
+        for (i = 0; i < numComps; i++)        // Then for each component
+        {
+            // Copy the bytes of the component into the element
+            start = pCompStart[i] / 8;
+            size  = pCompBits[i]  / 8;
+            for (j = 0; j < size; j++)
+            {
+                pPixel[(j+start)^elementXor] = static_cast<UINT_8>(pComps[i] >> (8*j));
+            }
+        }
+    }
+    else                        // Element is 32-bits or less, components are bit fields
+    {
+        // First, extract each component in turn and combine it into a 32-bit value
+        for (i = 0; i < numComps; i++)
+        {
+            compMask = (1 << pCompBits[i]) - 1;
+            elemMask |= compMask << pCompStart[i];
+            value |= (pComps[i] & compMask) << pCompStart[i];
+        }
+
+        // Mext, copy the masked value into the element
+        size = (resultBits + 7) / 8;
+        for (i = 0; i < size; i++)
+        {
+            byte = pPixel[i^elementXor] & ~(elemMask >> (8*i));
+            pPixel[i^elementXor] = static_cast<UINT_8>(byte | ((elemMask & value) >> (8*i)));
+        }
+    }
+}
+
+/**
+***************************************************************************************************
+*   Flt32ToDepthPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a depth/stencil pixel value
+*
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::Flt32ToDepthPixel(
+    AddrDepthFormat     format,     ///< [in] Depth format
+    const ADDR_FLT_32   comps[2],   ///< [in] two components of depth
+    UINT_8*             pPixel      ///< [out] depth pixel value
+    ) const
+{
+    UINT_32 i;
+    UINT_32 values[2];
+    ADDR_COMPONENT_FLAGS properties;    // byteAligned, exportNorm
+    UINT_32 resultBits = 0;             // result bits: total bits per pixel after decompression
+
+    ADDR_PIXEL_FORMATINFO fmt;
+
+    // get type for each component
+    PixGetDepthCompInfo(format, &fmt);
+
+    //initialize properties
+    properties.byteAligned = TRUE;
+    properties.exportNorm  = TRUE;
+    properties.floatComp   = FALSE;
+
+    //set properties and result bits
+    for (i = 0; i < 2; i++)
+    {
+        if ((fmt.compBit[i] & 7) || (fmt.compStart[i] & 7))
+        {
+            properties.byteAligned = FALSE;
+        }
+
+        if (resultBits < fmt.compStart[i] + fmt.compBit[i])
+        {
+            resultBits = fmt.compStart[i] + fmt.compBit[i];
+        }
+
+        // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
+        if (fmt.compBit[i] > 11 || fmt.numType[i] >= ADDR_USCALED)
+        {
+            properties.exportNorm = FALSE;
+        }
+
+        // Mark if there are any floating point components
+        if ((fmt.numType[i] == ADDR_U4FLOATC) || (fmt.numType[i] >= ADDR_S8FLOAT) )
+        {
+            properties.floatComp = TRUE;
+        }
+    }
+
+    // Convert the two input floats to integer values
+    for (i = 0; i < 2; i++)
+    {
+        Flt32sToInt32s(comps[i], fmt.compBit[i], fmt.numType[i], &values[i]);
+    }
+
+    // Then pack the two integer components, in the proper order
+    Int32sToPixel(2, values, fmt.compBit, fmt.compStart, properties, resultBits, pPixel );
+
+}
+
+/**
+***************************************************************************************************
+*   Flt32ToColorPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
+*
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::Flt32ToColorPixel(
+    AddrColorFormat     format,     ///< [in] Color format
+    AddrSurfaceNumber   surfNum,    ///< [in] Surface number
+    AddrSurfaceSwap     surfSwap,   ///< [in] Surface swap
+    const ADDR_FLT_32   comps[4],   ///< [in] four components of color
+    UINT_8*             pPixel      ///< [out] a red/green/blue/alpha pixel value
+    ) const
+{
+    ADDR_PIXEL_FORMATINFO pixelInfo;
+
+    UINT_32 i;
+    UINT_32 values[4];
+    ADDR_COMPONENT_FLAGS properties;    // byteAligned, exportNorm
+    UINT_32 resultBits = 0;             // result bits: total bits per pixel after decompression
+
+    memset(&pixelInfo, 0, sizeof(ADDR_PIXEL_FORMATINFO));
+
+    PixGetColorCompInfo(format, surfNum, surfSwap, &pixelInfo);
+
+    //initialize properties
+    properties.byteAligned = TRUE;
+    properties.exportNorm  = TRUE;
+    properties.floatComp   = FALSE;
+
+    //set properties and result bits
+    for (i = 0; i < 4; i++)
+    {
+        if ( (pixelInfo.compBit[i] & 7) || (pixelInfo.compStart[i] & 7) )
+        {
+            properties.byteAligned = FALSE;
+        }
+
+        if (resultBits < pixelInfo.compStart[i] + pixelInfo.compBit[i])
+        {
+            resultBits = pixelInfo.compStart[i] + pixelInfo.compBit[i];
+        }
+
+        if (m_fp16ExportNorm)
+        {
+            // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
+            // or if it's not FP and <=16 bits
+            if (((pixelInfo.compBit[i] > 11) || (pixelInfo.numType[i] >= ADDR_USCALED))
+                && (pixelInfo.numType[i] !=ADDR_U4FLOATC))
+            {
+                properties.exportNorm = FALSE;
+            }
+        }
+        else
+        {
+            // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
+            if (pixelInfo.compBit[i] > 11 || pixelInfo.numType[i] >= ADDR_USCALED)
+            {
+                properties.exportNorm = FALSE;
+            }
+        }
+
+        // Mark if there are any floating point components
+        if ( (pixelInfo.numType[i] == ADDR_U4FLOATC) ||
+             (pixelInfo.numType[i] >= ADDR_S8FLOAT) )
+        {
+            properties.floatComp = TRUE;
+        }
+    }
+
+    // Convert the four input floats to integer values
+    for (i = 0; i < 4; i++)
+    {
+        Flt32sToInt32s(comps[i], pixelInfo.compBit[i], pixelInfo.numType[i], &values[i]);
+    }
+
+    // Then pack the four integer components, in the proper order
+    Int32sToPixel(4, values, &pixelInfo.compBit[0], &pixelInfo.compStart[0],
+                  properties, resultBits, pPixel);
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::GetCompType
+*
+*   @brief
+*       Fill per component info
+*
+*   @return
+*       N/A
+*
+***************************************************************************************************
+*/
+VOID AddrElemLib::GetCompType(
+    AddrColorFormat         format,     ///< [in] surface format
+    AddrSurfaceNumber       numType,  ///< [in] number type
+    ADDR_PIXEL_FORMATINFO*  pInfo)       ///< [in][out] per component info out
+{
+    BOOL_32 handled = FALSE;
+
+    // Floating point formats override the number format
+    switch (format)
+    {
+        case ADDR_COLOR_16_FLOAT:            // fall through for all pure floating point format
+        case ADDR_COLOR_16_16_FLOAT:
+        case ADDR_COLOR_16_16_16_16_FLOAT:
+        case ADDR_COLOR_32_FLOAT:
+        case ADDR_COLOR_32_32_FLOAT:
+        case ADDR_COLOR_32_32_32_32_FLOAT:
+        case ADDR_COLOR_10_11_11_FLOAT:
+        case ADDR_COLOR_11_11_10_FLOAT:
+            numType = ADDR_NUMBER_FLOAT;
+            break;
+            // Special handling for the depth formats
+        case ADDR_COLOR_8_24:                // fall through for these 2 similar format
+        case ADDR_COLOR_24_8:
+            for (UINT_32 c = 0; c < 4; c++)
+            {
+                if (pInfo->compBit[c] == 8)
+                {
+                    pInfo->numType[c] = ADDR_UINT_BITS;
+                }
+                else if (pInfo->compBit[c]  == 24)
+                {
+                    pInfo->numType[c] = ADDR_UNORM_R6XX;
+                }
+                else
+                {
+                    pInfo->numType[c] = ADDR_NO_NUMBER;
+                }
+            }
+            handled = TRUE;
+            break;
+        case ADDR_COLOR_8_24_FLOAT:          // fall through for these 3 similar format
+        case ADDR_COLOR_24_8_FLOAT:
+        case ADDR_COLOR_X24_8_32_FLOAT:
+            for (UINT_32 c = 0; c < 4; c++)
+            {
+                if (pInfo->compBit[c] == 8)
+                {
+                    pInfo->numType[c] = ADDR_UINT_BITS;
+                }
+                else if (pInfo->compBit[c] == 24)
+                {
+                    pInfo->numType[c] = ADDR_U4FLOATC;
+                }
+                else if (pInfo->compBit[c] == 32)
+                {
+                    pInfo->numType[c] = ADDR_S8FLOAT32;
+                }
+                else
+                {
+                    pInfo->numType[c] = ADDR_NO_NUMBER;
+                }
+            }
+            handled = TRUE;
+            break;
+        default:
+            break;
+    }
+
+    if (!handled)
+    {
+        for (UINT_32 c = 0; c < 4; c++)
+        {
+            // Assign a number type for each component
+            AddrSurfaceNumber cnum;
+
+            // First handle default component values
+            if (pInfo->compBit[c] == 0)
+            {
+                if (c < 3)
+                {
+                    pInfo->numType[c] = ADDR_ZERO;      // Default is zero for RGB
+                }
+                else if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
+                {
+                    pInfo->numType[c] = ADDR_EPSILON;   // Alpha INT_32 bits default is 0x01
+                }
+                else
+                {
+                    pInfo->numType[c] = ADDR_ONE;       // Alpha normal default is float 1.0
+                }
+                continue;
+            }
+            // Now handle small components
+            else if (pInfo->compBit[c] == 1)
+            {
+                if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
+                {
+                    cnum = ADDR_NUMBER_UINT;
+                }
+                else
+                {
+                    cnum = ADDR_NUMBER_UNORM;
+                }
+            }
+            else
+            {
+                cnum = numType;
+            }
+
+            // If no default, set the number type fom num, compbits, and architecture
+            switch (cnum)
+            {
+                case ADDR_NUMBER_SRGB:
+                    pInfo->numType[c] = (c < 3) ? ADDR_GAMMA8_R6XX : ADDR_UNORM_R6XX;
+                    break;
+                case ADDR_NUMBER_UNORM:
+                    pInfo->numType[c] = ADDR_UNORM_R6XX;
+                    break;
+                case ADDR_NUMBER_SNORM:
+                    pInfo->numType[c] = ADDR_SNORM_R6XX;
+                    break;
+                case ADDR_NUMBER_USCALED:
+                    pInfo->numType[c] = ADDR_USCALED;  // @@ Do we need separate Pele routine?
+                    break;
+                case ADDR_NUMBER_SSCALED:
+                    pInfo->numType[c] = ADDR_SSCALED;  // @@ Do we need separate Pele routine?
+                    break;
+                case ADDR_NUMBER_FLOAT:
+                    if (pInfo->compBit[c] == 32)
+                    {
+                        pInfo->numType[c] = ADDR_S8FLOAT32;
+                    }
+                    else if (pInfo->compBit[c] == 16)
+                    {
+                        pInfo->numType[c] = ADDR_S5FLOAT;
+                    }
+                    else if (pInfo->compBit[c] >= 10)
+                    {
+                        pInfo->numType[c] = ADDR_U5FLOAT;
+                    }
+                    else
+                    {
+                        ADDR_ASSERT_ALWAYS();
+                    }
+                    break;
+                case ADDR_NUMBER_SINT:
+                    pInfo->numType[c] = ADDR_SINT_BITS;
+                    break;
+                case ADDR_NUMBER_UINT:
+                    pInfo->numType[c] = ADDR_UINT_BITS;
+                    break;
+
+                default:
+                    ADDR_ASSERT(!"Invalid number type");
+                    pInfo->numType[c] = ADDR_NO_NUMBER;
+                    break;
+             }
+        }
+    }
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::GetCompSwap
+*
+*   @brief
+*       Get components swapped for color surface
+*
+*   @return
+*       N/A
+*
+***************************************************************************************************
+*/
+VOID AddrElemLib::GetCompSwap(
+    AddrSurfaceSwap         swap,   ///< [in] swap mode
+    ADDR_PIXEL_FORMATINFO*  pInfo)  ///< [in/out] output per component info
+{
+    switch (pInfo->comps)
+    {
+        case 4:
+            switch (swap)
+            {
+                case ADDR_SWAP_ALT:
+                    SwapComps( 0, 2, pInfo );
+                    break;    // BGRA
+                case ADDR_SWAP_STD_REV:
+                    SwapComps( 0, 3, pInfo );
+                    SwapComps( 1, 2, pInfo );
+                    break;    // ABGR
+                case ADDR_SWAP_ALT_REV:
+                    SwapComps( 0, 3, pInfo );
+                    SwapComps( 0, 2, pInfo );
+                    SwapComps( 0, 1, pInfo );
+                    break;    // ARGB
+                default:
+                    break;
+            }
+            break;
+        case 3:
+            switch (swap)
+            {
+                case ADDR_SWAP_ALT_REV:
+                    SwapComps( 0, 3, pInfo );
+                    SwapComps( 0, 2, pInfo );
+                    break;    // AGR
+                case ADDR_SWAP_STD_REV:
+                    SwapComps( 0, 2, pInfo );
+                    break;    // BGR
+                case ADDR_SWAP_ALT:
+                    SwapComps( 2, 3, pInfo );
+                    break;    // RGA
+                default:
+                    break;    // RGB
+            }
+            break;
+        case 2:
+            switch (swap)
+            {
+                case ADDR_SWAP_ALT_REV:
+                    SwapComps( 0, 1, pInfo );
+                    SwapComps( 1, 3, pInfo );
+                    break;    // AR
+                case ADDR_SWAP_STD_REV:
+                    SwapComps( 0, 1, pInfo );
+                    break;    // GR
+                case ADDR_SWAP_ALT:
+                    SwapComps( 1, 3, pInfo );
+                    break;    // RA
+                default:
+                    break;    // RG
+            }
+            break;
+        case 1:
+            switch (swap)
+            {
+                case ADDR_SWAP_ALT_REV:
+                    SwapComps( 0, 3, pInfo );
+                    break;    // A
+                case ADDR_SWAP_STD_REV:
+                    SwapComps( 0, 2, pInfo );
+                    break;    // B
+                case ADDR_SWAP_ALT:
+                    SwapComps( 0, 1, pInfo );
+                    break;    // G
+                default:
+                    break;    // R
+            }
+            break;
+    }
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::GetCompSwap
+*
+*   @brief
+*       Get components swapped for color surface
+*
+*   @return
+*       N/A
+*
+***************************************************************************************************
+*/
+VOID AddrElemLib::SwapComps(
+    UINT_32                 c0,     ///< [in] component index 0
+    UINT_32                 c1,     ///< [in] component index 1
+    ADDR_PIXEL_FORMATINFO*  pInfo)  ///< [in/out] output per component info
+{
+    UINT_32 start;
+    UINT_32 bits;
+
+    start = pInfo->compStart[c0];
+    pInfo->compStart[c0] = pInfo->compStart[c1];
+    pInfo->compStart[c1] = start;
+
+    bits  = pInfo->compBit[c0];
+    pInfo->compBit[c0] = pInfo->compBit[c1];
+    pInfo->compBit[c1] = bits;
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::PixGetColorCompInfo
+*
+*   @brief
+*       Get per component info for color surface
+*
+*   @return
+*       N/A
+*
+***************************************************************************************************
+*/
+VOID AddrElemLib::PixGetColorCompInfo(
+    AddrColorFormat         format, ///< [in] surface format, read from register
+    AddrSurfaceNumber       number, ///< [in] pixel number type
+    AddrSurfaceSwap         swap,   ///< [in] component swap mode
+    ADDR_PIXEL_FORMATINFO*  pInfo   ///< [out] output per component info
+    ) const
+{
+    // 1. Get componet bits
+    switch (format)
+    {
+        case ADDR_COLOR_8:
+            GetCompBits(8, 0, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_1_5_5_5:
+            GetCompBits(5, 5, 5, 1, pInfo);
+            break;
+        case ADDR_COLOR_5_6_5:
+            GetCompBits(8, 6, 5, 0, pInfo);
+            break;
+        case ADDR_COLOR_6_5_5:
+            GetCompBits(5, 5, 6, 0, pInfo);
+            break;
+        case ADDR_COLOR_8_8:
+            GetCompBits(8, 8, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_4_4_4_4:
+            GetCompBits(4, 4, 4, 4, pInfo);
+            break;
+        case ADDR_COLOR_16:
+            GetCompBits(16, 0, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_8_8_8_8:
+            GetCompBits(8, 8, 8, 8, pInfo);
+            break;
+        case ADDR_COLOR_2_10_10_10:
+            GetCompBits(10, 10, 10, 2, pInfo);
+            break;
+        case ADDR_COLOR_10_11_11:
+            GetCompBits(11, 11, 10, 0, pInfo);
+            break;
+        case ADDR_COLOR_11_11_10:
+            GetCompBits(10, 11, 11, 0, pInfo);
+            break;
+        case ADDR_COLOR_16_16:
+            GetCompBits(16, 16, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_16_16_16_16:
+            GetCompBits(16, 16, 16, 16, pInfo);
+            break;
+        case ADDR_COLOR_16_FLOAT:
+            GetCompBits(16, 0, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_16_16_FLOAT:
+            GetCompBits(16, 16, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_32_FLOAT:
+            GetCompBits(32, 0, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_32_32_FLOAT:
+            GetCompBits(32, 32, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_16_16_16_16_FLOAT:
+            GetCompBits(16, 16, 16, 16, pInfo);
+            break;
+        case ADDR_COLOR_32_32_32_32_FLOAT:
+            GetCompBits(32, 32, 32, 32, pInfo);
+            break;
+
+        case ADDR_COLOR_32:
+            GetCompBits(32, 0, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_32_32:
+            GetCompBits(32, 32, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_32_32_32_32:
+            GetCompBits(32, 32, 32, 32, pInfo);
+            break;
+        case ADDR_COLOR_10_10_10_2:
+            GetCompBits(2, 10, 10, 10, pInfo);
+            break;
+        case ADDR_COLOR_10_11_11_FLOAT:
+            GetCompBits(11, 11, 10, 0, pInfo);
+            break;
+        case ADDR_COLOR_11_11_10_FLOAT:
+            GetCompBits(10, 11, 11, 0, pInfo);
+            break;
+        case ADDR_COLOR_5_5_5_1:
+            GetCompBits(1, 5, 5, 5, pInfo);
+            break;
+        case ADDR_COLOR_3_3_2:
+            GetCompBits(2, 3, 3, 0, pInfo);
+            break;
+        case ADDR_COLOR_4_4:
+            GetCompBits(4, 4, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_8_24:
+        case ADDR_COLOR_8_24_FLOAT:  // same bit count, fall through
+            GetCompBits(24, 8, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_24_8:
+        case ADDR_COLOR_24_8_FLOAT:  // same bit count, fall through
+            GetCompBits(8, 24, 0, 0, pInfo);
+            break;
+        case ADDR_COLOR_X24_8_32_FLOAT:
+            GetCompBits(32, 8, 0, 0, pInfo);
+            break;
+
+        case ADDR_COLOR_INVALID:
+            GetCompBits(0, 0, 0, 0, pInfo);
+            break;
+        default:
+            ADDR_ASSERT(0);
+            GetCompBits(0, 0, 0, 0, pInfo);
+            break;
+    }
+
+    // 2. Get component number type
+
+    GetCompType(format, number, pInfo);
+
+    // 3. Swap components if needed
+
+    GetCompSwap(swap, pInfo);
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::PixGetDepthCompInfo
+*
+*   @brief
+*       Get per component info for depth surface
+*
+*   @return
+*       N/A
+*
+***************************************************************************************************
+*/
+VOID AddrElemLib::PixGetDepthCompInfo(
+    AddrDepthFormat         format,     ///< [in] surface format, read from register
+    ADDR_PIXEL_FORMATINFO*  pInfo       ///< [out] output per component bits and type
+    ) const
+{
+    if (m_depthPlanarType == ADDR_DEPTH_PLANAR_R800)
+    {
+        if (format == ADDR_DEPTH_8_24_FLOAT)
+        {
+            format = ADDR_DEPTH_X24_8_32_FLOAT; // Use this format to represent R800's D24FS8
+        }
+
+        if (format == ADDR_DEPTH_X8_24_FLOAT)
+        {
+            format = ADDR_DEPTH_32_FLOAT;
+        }
+    }
+
+    switch (format)
+    {
+        case ADDR_DEPTH_16:
+            GetCompBits(16, 0, 0, 0, pInfo);
+            break;
+        case ADDR_DEPTH_8_24:
+        case ADDR_DEPTH_8_24_FLOAT:      // similar format, fall through
+            GetCompBits(24, 8, 0, 0, pInfo);
+            break;
+        case ADDR_DEPTH_X8_24:
+        case ADDR_DEPTH_X8_24_FLOAT:     // similar format, fall through
+            GetCompBits(24, 0, 0, 0, pInfo);
+            break;
+        case ADDR_DEPTH_32_FLOAT:
+            GetCompBits(32, 0, 0, 0, pInfo);
+            break;
+        case ADDR_DEPTH_X24_8_32_FLOAT:
+            GetCompBits(32, 8, 0, 0, pInfo);
+            break;
+        case ADDR_DEPTH_INVALID:
+            GetCompBits(0, 0, 0, 0, pInfo);
+            break;
+        default:
+            ADDR_ASSERT(0);
+            GetCompBits(0, 0, 0, 0, pInfo);
+            break;
+    }
+
+    switch (format)
+    {
+        case ADDR_DEPTH_16:
+            pInfo->numType [0] = ADDR_UNORM_R6XX;
+            pInfo->numType [1] = ADDR_ZERO;
+            break;
+        case ADDR_DEPTH_8_24:
+            pInfo->numType [0] = ADDR_UNORM_R6XXDB;
+            pInfo->numType [1] = ADDR_UINT_BITS;
+            break;
+        case ADDR_DEPTH_8_24_FLOAT:
+            pInfo->numType [0] = ADDR_U4FLOATC;
+            pInfo->numType [1] = ADDR_UINT_BITS;
+            break;
+        case ADDR_DEPTH_X8_24:
+            pInfo->numType [0] = ADDR_UNORM_R6XXDB;
+            pInfo->numType [1] = ADDR_ZERO;
+            break;
+        case ADDR_DEPTH_X8_24_FLOAT:
+            pInfo->numType [0] = ADDR_U4FLOATC;
+            pInfo->numType [1] = ADDR_ZERO;
+            break;
+        case ADDR_DEPTH_32_FLOAT:
+            pInfo->numType [0] = ADDR_S8FLOAT32;
+            pInfo->numType [1] = ADDR_ZERO;
+            break;
+        case ADDR_DEPTH_X24_8_32_FLOAT:
+            pInfo->numType [0] = ADDR_S8FLOAT32;
+            pInfo->numType [1] = ADDR_UINT_BITS;
+            break;
+        default:
+            pInfo->numType [0] = ADDR_NO_NUMBER;
+            pInfo->numType [1] = ADDR_NO_NUMBER;
+            break;
+    }
+
+    pInfo->numType [2] = ADDR_NO_NUMBER;
+    pInfo->numType [3] = ADDR_NO_NUMBER;
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::PixGetExportNorm
+*
+*   @brief
+*       Check if fp16 export norm can be enabled.
+*
+*   @return
+*       TRUE if this can be enabled.
+*
+***************************************************************************************************
+*/
+BOOL_32 AddrElemLib::PixGetExportNorm(
+    AddrColorFormat     colorFmt,       ///< [in] surface format, read from register
+    AddrSurfaceNumber   numberFmt,      ///< [in] pixel number type
+    AddrSurfaceSwap     swap            ///< [in] components swap type
+    ) const
+{
+    BOOL_32 enabled = TRUE;
+
+    ADDR_PIXEL_FORMATINFO formatInfo;
+
+    PixGetColorCompInfo(colorFmt, numberFmt, swap, &formatInfo);
+
+    for (UINT_32 c = 0; c < 4; c++)
+    {
+        if (m_fp16ExportNorm)
+        {
+            if (((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) &&
+                (formatInfo.numType[c] != ADDR_U4FLOATC)    &&
+                (formatInfo.numType[c] != ADDR_S5FLOAT)     &&
+                (formatInfo.numType[c] != ADDR_S5FLOATM)    &&
+                (formatInfo.numType[c] != ADDR_U5FLOAT)     &&
+                (formatInfo.numType[c] != ADDR_U3FLOATM))
+            {
+                enabled = FALSE;
+                break;
+            }
+        }
+        else
+        {
+            if ((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED))
+            {
+                enabled = FALSE;
+                break;
+            }
+        }
+    }
+
+    return enabled;
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::AdjustSurfaceInfo
+*
+*   @brief
+*       Adjust bpp/base pitch/width/height according to elemMode and expandX/Y
+*
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::AdjustSurfaceInfo(
+    AddrElemMode    elemMode,       ///< [in] element mode
+    UINT_32         expandX,        ///< [in] decompression expansion factor in X
+    UINT_32         expandY,        ///< [in] decompression expansion factor in Y
+    UINT_32*        pBpp,           ///< [in/out] bpp
+    UINT_32*        pBasePitch,     ///< [in/out] base pitch
+    UINT_32*        pWidth,         ///< [in/out] width
+    UINT_32*        pHeight)        ///< [in/out] height
+{
+    UINT_32 packedBits;
+    UINT_32 basePitch;
+    UINT_32 width;
+    UINT_32 height;
+    UINT_32 bpp;
+    BOOL_32 bBCnFormat = FALSE;
+
+    ADDR_ASSERT(pBpp != NULL);
+    ADDR_ASSERT(pWidth != NULL && pHeight != NULL && pBasePitch != NULL);
+
+    if (pBpp)
+    {
+        bpp = *pBpp;
+
+        switch (elemMode)
+        {
+            case ADDR_EXPANDED:
+                packedBits = bpp / expandX / expandY;
+                break;
+            case ADDR_PACKED_STD: // Different bit order
+            case ADDR_PACKED_REV:
+                packedBits = bpp * expandX * expandY;
+                break;
+            case ADDR_PACKED_GBGR:
+            case ADDR_PACKED_BGRG:
+                packedBits = bpp; // 32-bit packed ==> 2 32-bit result
+                break;
+            case ADDR_PACKED_BC1: // Fall through
+            case ADDR_PACKED_BC4:
+                packedBits = 64;
+                bBCnFormat = TRUE;
+                break;
+            case ADDR_PACKED_BC2: // Fall through
+            case ADDR_PACKED_BC3: // Fall through
+            case ADDR_PACKED_BC5: // Fall through
+                bBCnFormat = TRUE;
+                packedBits = 128;
+                break;
+            case ADDR_ROUND_BY_HALF:  // Fall through
+            case ADDR_ROUND_TRUNCATE: // Fall through
+            case ADDR_ROUND_DITHER:   // Fall through
+            case ADDR_UNCOMPRESSED:
+                packedBits = bpp;
+                break;
+            default:
+                packedBits = bpp;
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+
+        *pBpp = packedBits;
+    }
+
+    if (pWidth && pHeight && pBasePitch)
+    {
+        basePitch = *pBasePitch;
+        width     = *pWidth;
+        height    = *pHeight;
+
+        if ((expandX > 1) || (expandY > 1))
+        {
+            if (elemMode == ADDR_EXPANDED)
+            {
+                basePitch *= expandX;
+                width     *= expandX;
+                height    *= expandY;
+            }
+            else
+            {
+                // Evergreen family workaround
+                if (bBCnFormat && (m_pAddrLib->GetAddrChipFamily() == ADDR_CHIP_FAMILY_R8XX))
+                {
+                    // For BCn we now pad it to POW2 at the beginning so it is safe to
+                    // divide by 4 directly
+                    basePitch = basePitch / expandX;
+                    width     = width  / expandX;
+                    height    = height / expandY;
+#if DEBUG
+                    width     = (width == 0) ? 1 : width;
+                    height    = (height == 0) ? 1 : height;
+
+                    if ((*pWidth > PowTwoAlign(width, 8) * expandX) ||
+                        (*pHeight > PowTwoAlign(height, 8) * expandY)) // 8 is 1D tiling alignment
+                    {
+                        // if this assertion is hit we may have issues if app samples
+                        // rightmost/bottommost pixels
+                        ADDR_ASSERT_ALWAYS();
+                    }
+#endif
+                }
+                else // Not BCn format we still keep old way (FMT_1? No real test yet)
+                {
+                    basePitch = (basePitch + expandX - 1) / expandX;
+                    width     = (width + expandX - 1) / expandX;
+                    height    = (height + expandY - 1) / expandY;
+                }
+            }
+
+            *pBasePitch = basePitch; // 0 is legal value for base pitch.
+            *pWidth     = (width == 0) ? 1 : width;
+            *pHeight    = (height == 0) ? 1 : height;
+        } //if (pWidth && pHeight && pBasePitch)
+    }
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::RestoreSurfaceInfo
+*
+*   @brief
+*       Reverse operation of AdjustSurfaceInfo
+*
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::RestoreSurfaceInfo(
+    AddrElemMode    elemMode,       ///< [in] element mode
+    UINT_32         expandX,        ///< [in] decompression expansion factor in X
+    UINT_32         expandY,        ///< [out] decompression expansion factor in Y
+    UINT_32*        pBpp,           ///< [in/out] bpp
+    UINT_32*        pWidth,         ///< [in/out] width
+    UINT_32*        pHeight)        ///< [in/out] height
+{
+    UINT_32 originalBits;
+    UINT_32 width;
+    UINT_32 height;
+    UINT_32 bpp;
+
+    ADDR_ASSERT(pBpp != NULL);
+    ADDR_ASSERT(pWidth != NULL && pHeight != NULL);
+
+    if (pBpp)
+    {
+        bpp = *pBpp;
+
+        switch (elemMode)
+        {
+        case ADDR_EXPANDED:
+            originalBits = bpp * expandX * expandY;
+            break;
+        case ADDR_PACKED_STD: // Different bit order
+        case ADDR_PACKED_REV:
+            originalBits = bpp / expandX / expandY;
+            break;
+        case ADDR_PACKED_GBGR:
+        case ADDR_PACKED_BGRG:
+            originalBits = bpp; // 32-bit packed ==> 2 32-bit result
+            break;
+        case ADDR_PACKED_BC1: // Fall through
+        case ADDR_PACKED_BC4:
+            originalBits = 64;
+            break;
+        case ADDR_PACKED_BC2: // Fall through
+        case ADDR_PACKED_BC3: // Fall through
+            case ADDR_PACKED_BC5:
+            originalBits = 128;
+            break;
+        case ADDR_ROUND_BY_HALF:  // Fall through
+        case ADDR_ROUND_TRUNCATE: // Fall through
+        case ADDR_ROUND_DITHER:   // Fall through
+        case ADDR_UNCOMPRESSED:
+            originalBits = bpp;
+            break;
+        default:
+            originalBits = bpp;
+            ADDR_ASSERT_ALWAYS();
+            break;
+        }
+
+        *pBpp = originalBits;
+    }
+
+    if (pWidth && pHeight)
+    {
+        width    = *pWidth;
+        height   = *pHeight;
+
+        if ((expandX > 1) || (expandY > 1))
+        {
+            if (elemMode == ADDR_EXPANDED)
+            {
+                width /= expandX;
+                height /= expandY;
+            }
+            else
+            {
+                width *= expandX;
+                height *= expandY;
+            }
+        }
+
+        *pWidth  = (width == 0) ? 1 : width;
+        *pHeight = (height == 0) ? 1 : height;
+    }
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::GetBitsPerPixel
+*
+*   @brief
+*       Compute the total bits per element according to a format
+*       code. For compressed formats, this is not the same as
+*       the number of bits per decompressed element.
+*
+*   @return
+*       Bits per pixel
+***************************************************************************************************
+*/
+UINT_32 AddrElemLib::GetBitsPerPixel(
+    AddrFormat          format,         ///< [in] surface format code
+    AddrElemMode*       pElemMode,      ///< [out] element mode
+    UINT_32*            pExpandX,       ///< [out] decompression expansion factor in X
+    UINT_32*            pExpandY,       ///< [out] decompression expansion factor in Y
+    UINT_32*            pUnusedBits)    ///< [out] bits unused
+{
+    UINT_32 bpp;
+    UINT_32 expandX = 1;
+    UINT_32 expandY = 1;
+    UINT_32 bitUnused = 0;
+    AddrElemMode elemMode = ADDR_UNCOMPRESSED; // default value
+
+    switch (format)
+    {
+        case ADDR_FMT_8:
+            bpp = 8;
+            break;
+        case ADDR_FMT_1_5_5_5:
+        case ADDR_FMT_5_6_5:
+        case ADDR_FMT_6_5_5:
+        case ADDR_FMT_8_8:
+        case ADDR_FMT_4_4_4_4:
+        case ADDR_FMT_16:
+        case ADDR_FMT_16_FLOAT:
+            bpp = 16;
+            break;
+        case ADDR_FMT_GB_GR: // treat as FMT_8_8
+            elemMode = ADDR_PACKED_GBGR;
+            bpp = 16;
+            break;
+        case ADDR_FMT_BG_RG: // treat as FMT_8_8
+            elemMode = ADDR_PACKED_BGRG;
+            bpp = 16;
+            break;
+        case ADDR_FMT_8_8_8_8:
+        case ADDR_FMT_2_10_10_10:
+        case ADDR_FMT_10_11_11:
+        case ADDR_FMT_11_11_10:
+        case ADDR_FMT_16_16:
+        case ADDR_FMT_16_16_FLOAT:
+        case ADDR_FMT_32:
+        case ADDR_FMT_32_FLOAT:
+        case ADDR_FMT_24_8:
+        case ADDR_FMT_24_8_FLOAT:
+            bpp = 32;
+            break;
+        case ADDR_FMT_16_16_16_16:
+        case ADDR_FMT_16_16_16_16_FLOAT:
+        case ADDR_FMT_32_32:
+        case ADDR_FMT_32_32_FLOAT:
+        case ADDR_FMT_CTX1:
+            bpp = 64;
+            break;
+        case ADDR_FMT_32_32_32_32:
+        case ADDR_FMT_32_32_32_32_FLOAT:
+            bpp = 128;
+            break;
+        case ADDR_FMT_INVALID:
+            bpp = 0;
+            break;
+        case ADDR_FMT_1_REVERSED:
+            elemMode = ADDR_PACKED_REV;
+            expandX = 8;
+            bpp = 1;
+            break;
+        case ADDR_FMT_1:
+            elemMode = ADDR_PACKED_STD;
+            expandX = 8;
+            bpp = 1;
+            break;
+        case ADDR_FMT_4_4:
+        case ADDR_FMT_3_3_2:
+            bpp = 8;
+            break;
+        case ADDR_FMT_5_5_5_1:
+            bpp = 16;
+            break;
+        case ADDR_FMT_32_AS_8:
+        case ADDR_FMT_32_AS_8_8:
+        case ADDR_FMT_8_24:
+        case ADDR_FMT_8_24_FLOAT:
+        case ADDR_FMT_10_10_10_2:
+        case ADDR_FMT_10_11_11_FLOAT:
+        case ADDR_FMT_11_11_10_FLOAT:
+        case ADDR_FMT_5_9_9_9_SHAREDEXP:
+            bpp = 32;
+            break;
+        case ADDR_FMT_X24_8_32_FLOAT:
+            bpp = 64;
+            bitUnused = 24;
+            break;
+        case ADDR_FMT_8_8_8:
+            elemMode = ADDR_EXPANDED;
+            bpp = 24;//@@ 8;      // read 3 elements per pixel
+            expandX = 3;
+            break;
+        case ADDR_FMT_16_16_16:
+        case ADDR_FMT_16_16_16_FLOAT:
+            elemMode = ADDR_EXPANDED;
+            bpp = 48;//@@ 16;      // read 3 elements per pixel
+            expandX = 3;
+            break;
+        case ADDR_FMT_32_32_32_FLOAT:
+        case ADDR_FMT_32_32_32:
+            elemMode = ADDR_EXPANDED;
+            expandX = 3;
+            bpp = 96;//@@ 32;      // read 3 elements per pixel
+            break;
+        case ADDR_FMT_BC1:
+            elemMode = ADDR_PACKED_BC1;
+            expandX = 4;
+            expandY = 4;
+            bpp = 64;
+            break;
+        case ADDR_FMT_BC4:
+            elemMode = ADDR_PACKED_BC4;
+            expandX = 4;
+            expandY = 4;
+            bpp = 64;
+            break;
+        case ADDR_FMT_BC2:
+            elemMode = ADDR_PACKED_BC2;
+            expandX = 4;
+            expandY = 4;
+            bpp = 128;
+            break;
+        case ADDR_FMT_BC3:
+            elemMode = ADDR_PACKED_BC3;
+            expandX = 4;
+            expandY = 4;
+            bpp = 128;
+            break;
+        case ADDR_FMT_BC5:
+        case ADDR_FMT_BC6: // reuse ADDR_PACKED_BC5
+        case ADDR_FMT_BC7: // reuse ADDR_PACKED_BC5
+            elemMode = ADDR_PACKED_BC5;
+            expandX = 4;
+            expandY = 4;
+            bpp = 128;
+            break;
+        default:
+            bpp = 0;
+            ADDR_ASSERT_ALWAYS();
+            break;
+            // @@ or should this be an error?
+    }
+
+    SafeAssign(pExpandX, expandX);
+    SafeAssign(pExpandY, expandY);
+    SafeAssign(pUnusedBits, bitUnused);
+    SafeAssign(reinterpret_cast<UINT_32*>(pElemMode), elemMode);
+
+    return bpp;
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::GetCompBits
+*
+*   @brief
+*       Set each component's bit size and bit start. And set element mode and number type
+*
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::GetCompBits(
+    UINT_32 c0,                     ///< [in] bits of component 0
+    UINT_32 c1,                     ///< [in] bits of component 1
+    UINT_32 c2,                     ///< [in] bits of component 2
+    UINT_32 c3,                     ///< [in] bits of component 3
+    ADDR_PIXEL_FORMATINFO* pInfo,   ///< [out] per component info out
+    AddrElemMode elemMode)          ///< [in] element mode
+{
+    pInfo->comps = 0;
+
+    pInfo->compBit[0] = c0;
+    pInfo->compBit[1] = c1;
+    pInfo->compBit[2] = c2;
+    pInfo->compBit[3] = c3;
+
+    pInfo->compStart[0] = 0;
+    pInfo->compStart[1] = c0;
+    pInfo->compStart[2] = c0+c1;
+    pInfo->compStart[3] = c0+c1+c2;
+
+    pInfo->elemMode = elemMode;
+    // still needed since component swap may depend on number of components
+    for (INT i=0; i<4; i++)
+    {
+        if (pInfo->compBit[i] == 0)
+        {
+            pInfo->compStart[i]  = 0;       // all null components start at bit 0
+            pInfo->numType[i] = ADDR_NO_NUMBER; // and have no number type
+        }
+        else
+        {
+            pInfo->comps++;
+        }
+    }
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::GetCompBits
+*
+*   @brief
+*       Set the clear color (or clear depth/stencil) for a surface
+*
+*   @note
+*       If clearColor is zero, a default clear value is used in place of comps[4].
+*       If float32 is set, full precision is used, else the mantissa is reduced to 12-bits
+*
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::SetClearComps(
+    ADDR_FLT_32 comps[4],   ///< [in/out] components
+    BOOL_32 clearColor,     ///< [in] TRUE if clear color is set (CLEAR_COLOR)
+    BOOL_32 float32)        ///< [in] TRUE if float32 component (BLEND_FLOAT32)
+{
+    INT_32 i;
+
+    // Use default clearvalues if clearColor is disabled
+    if (clearColor == FALSE)
+    {
+        for (i=0; i<3; i++)
+        {
+            comps[i].f = 0.0;
+        }
+        comps[3].f = 1.0;
+    }
+
+    // Otherwise use the (modified) clear value
+    else
+    {
+        for (i=0; i<4; i++)
+        {   // If full precision, use clear value unchanged
+            if (float32)
+            {
+                // Do nothing
+                //comps[i] = comps[i];
+            }
+            // Else if it is a NaN, use the standard NaN value
+            else if ((comps[i].u & 0x7FFFFFFF) > 0x7F800000)
+            {
+                comps[i].u = 0xFFC00000;
+            }
+            // Else reduce the mantissa precision
+            else
+            {
+                comps[i].u = comps[i].u & 0xFFFFF000;
+            }
+        }
+    }
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::IsBlockCompressed
+*
+*   @brief
+*       TRUE if this is block compressed format
+*
+*   @note
+*
+*   @return
+*       BOOL_32
+***************************************************************************************************
+*/
+BOOL_32 AddrElemLib::IsBlockCompressed(
+    AddrFormat format)  ///< [in] Format
+{
+    return format >= ADDR_FMT_BC1 && format <= ADDR_FMT_BC7;
+}
+
+
+/**
+***************************************************************************************************
+*   AddrElemLib::IsCompressed
+*
+*   @brief
+*       TRUE if this is block compressed format or 1 bit format
+*
+*   @note
+*
+*   @return
+*       BOOL_32
+***************************************************************************************************
+*/
+BOOL_32 AddrElemLib::IsCompressed(
+    AddrFormat format)  ///< [in] Format
+{
+    return IsBlockCompressed(format) || format == ADDR_FMT_BC1 || format == ADDR_FMT_BC7;
+}
+
+/**
+***************************************************************************************************
+*   AddrElemLib::IsExpand3x
+*
+*   @brief
+*       TRUE if this is 3x expand format
+*
+*   @note
+*
+*   @return
+*       BOOL_32
+***************************************************************************************************
+*/
+BOOL_32 AddrElemLib::IsExpand3x(
+    AddrFormat format)  ///< [in] Format
+{
+    BOOL_32 is3x = FALSE;
+
+    switch (format)
+    {
+        case ADDR_FMT_8_8_8:
+        case ADDR_FMT_16_16_16:
+        case ADDR_FMT_16_16_16_FLOAT:
+        case ADDR_FMT_32_32_32:
+        case ADDR_FMT_32_32_32_FLOAT:
+            is3x = TRUE;
+            break;
+        default:
+            break;
+    }
+
+    return is3x;
+}
+
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,270 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  addrelemlib.h
+* @brief Contains the class for element/pixel related functions
+***************************************************************************************************
+*/
+
+#ifndef __ELEM_LIB_H__
+#define __ELEM_LIB_H__
+
+#include "addrinterface.h"
+#include "addrobject.h"
+#include "addrcommon.h"
+
+class AddrLib;
+
+// The masks for property bits within the Properties INT_32
+union ADDR_COMPONENT_FLAGS
+{
+    struct
+    {
+        UINT_32 byteAligned    : 1;    ///< all components are byte aligned
+        UINT_32 exportNorm     : 1;    ///< components support R6xx NORM compression
+        UINT_32 floatComp      : 1;    ///< there is at least one floating point component
+    };
+
+    UINT_32 value;
+};
+
+// Copy from legacy lib's AddrNumberType
+enum AddrNumberType
+{
+    // The following number types have the range [-1..1]
+    ADDR_NO_NUMBER,         // This component doesn't exist and has no default value
+    ADDR_EPSILON,           // Force component value to integer 0x00000001
+    ADDR_ZERO,              // Force component value to integer 0x00000000
+    ADDR_ONE,               // Force component value to floating point 1.0
+    // Above values don't have any bits per component (keep ADDR_ONE the last of these)
+
+    ADDR_UNORM,             // Unsigned normalized (repeating fraction) full precision
+    ADDR_SNORM,             // Signed normalized (repeating fraction) full precision
+    ADDR_GAMMA,             // Gamma-corrected, full precision
+
+    ADDR_UNORM_R5XXRB,      // Unsigned normalized (repeating fraction) for r5xx RB
+    ADDR_SNORM_R5XXRB,      // Signed normalized (repeating fraction) for r5xx RB
+    ADDR_GAMMA_R5XXRB,      // Gamma-corrected for r5xx RB (note: unnormalized value)
+    ADDR_UNORM_R5XXBC,      // Unsigned normalized (repeating fraction) for r5xx BC
+    ADDR_SNORM_R5XXBC,      // Signed normalized (repeating fraction) for r5xx BC
+    ADDR_GAMMA_R5XXBC,      // Gamma-corrected for r5xx BC (note: unnormalized value)
+
+    ADDR_UNORM_R6XX,        // Unsigned normalized (repeating fraction) for R6xx
+    ADDR_UNORM_R6XXDB,      // Unorms for 24-bit depth: one value differs from ADDR_UNORM_R6XX
+    ADDR_SNORM_R6XX,        // Signed normalized (repeating fraction) for R6xx
+    ADDR_GAMMA8_R6XX,       // Gamma-corrected for r6xx
+    ADDR_GAMMA8_R7XX_TP,    // Gamma-corrected for r7xx TP 12bit unorm 8.4.
+
+    ADDR_U4FLOATC,          // Unsigned float: 4-bit exponent, bias=15, no NaN, clamp [0..1]
+    ADDR_GAMMA_4SEG,        // Gamma-corrected, four segment approximation
+    ADDR_U0FIXED,           // Unsigned 0.N-bit fixed point
+
+    // The following number types have large ranges (LEAVE ADDR_USCALED first or fix Finish routine)
+    ADDR_USCALED,           // Unsigned integer converted to/from floating point
+    ADDR_SSCALED,           // Signed integer converted to/from floating point
+    ADDR_USCALED_R5XXRB,    // Unsigned integer to/from floating point for r5xx RB
+    ADDR_SSCALED_R5XXRB,    // Signed integer to/from floating point for r5xx RB
+    ADDR_UINT_BITS,         // Keep in unsigned integer form, clamped to specified range
+    ADDR_SINT_BITS,         // Keep in signed integer form, clamped to specified range
+    ADDR_UINTBITS,          // @@ remove Keep in unsigned integer form, use modulus to reduce bits
+    ADDR_SINTBITS,          // @@ remove Keep in signed integer form, use modulus to reduce bits
+
+    // The following number types and ADDR_U4FLOATC have exponents
+    // (LEAVE ADDR_S8FLOAT first or fix Finish routine)
+    ADDR_S8FLOAT,           // Signed floating point with 8-bit exponent, bias=127
+    ADDR_S8FLOAT32,         // 32-bit IEEE float, passes through NaN values
+    ADDR_S5FLOAT,           // Signed floating point with 5-bit exponent, bias=15
+    ADDR_S5FLOATM,          // Signed floating point with 5-bit exponent, bias=15, no NaN/Inf
+    ADDR_U5FLOAT,           // Signed floating point with 5-bit exponent, bias=15
+    ADDR_U3FLOATM,          // Unsigned floating point with 3-bit exponent, bias=3
+
+    ADDR_S5FIXED,           // Signed 5.N-bit fixed point, with rounding
+
+    ADDR_END_NUMBER         // Used for range comparisons
+};
+
+// Copy from legacy lib's AddrElement
+enum AddrElemMode
+{
+    // These formats allow both packing an unpacking
+    ADDR_ROUND_BY_HALF,     // add 1/2 and truncate when packing this element
+    ADDR_ROUND_TRUNCATE,    // truncate toward 0 for sign/mag, else toward neg
+    ADDR_ROUND_DITHER,      // Pack by dithering -- requires (x,y) position
+
+    // These formats only allow unpacking, no packing
+    ADDR_UNCOMPRESSED,      // Elements are not compressed: one data element per pixel/texel
+    ADDR_EXPANDED,          // Elements are split up and stored in multiple data elements
+    ADDR_PACKED_STD,        // Elements are compressed into ExpandX by ExpandY data elements
+    ADDR_PACKED_REV,        // Like ADDR_PACKED, but X order of pixels is reverved
+    ADDR_PACKED_GBGR,       // Elements are compressed 4:2:2 in G1B_G0R order (high to low)
+    ADDR_PACKED_BGRG,       // Elements are compressed 4:2:2 in BG1_RG0 order (high to low)
+    ADDR_PACKED_BC1,        // Each data element is uncompressed to a 4x4 pixel/texel array
+    ADDR_PACKED_BC2,        // Each data element is uncompressed to a 4x4 pixel/texel array
+    ADDR_PACKED_BC3,        // Each data element is uncompressed to a 4x4 pixel/texel array
+    ADDR_PACKED_BC4,        // Each data element is uncompressed to a 4x4 pixel/texel array
+    ADDR_PACKED_BC5,        // Each data element is uncompressed to a 4x4 pixel/texel array
+
+    // These formats provide various kinds of compression
+    ADDR_ZPLANE_R5XX,       // Compressed Zplane using r5xx architecture format
+    ADDR_ZPLANE_R6XX,       // Compressed Zplane using r6xx architecture format
+    //@@ Fill in the compression modes
+
+    ADDR_END_ELEMENT        // Used for range comparisons
+};
+
+enum AddrDepthPlanarType
+{
+    ADDR_DEPTH_PLANAR_NONE = 0, // No plane z/stencl
+    ADDR_DEPTH_PLANAR_R600 = 1, // R600 z and stencil planes are store within a tile
+    ADDR_DEPTH_PLANAR_R800 = 2, // R800 has separate z and stencil planes
+};
+
+/**
+***************************************************************************************************
+*   ADDR_PIXEL_FORMATINFO
+*
+*   @brief
+*       Per component info
+*
+***************************************************************************************************
+*/
+struct ADDR_PIXEL_FORMATINFO
+{
+    UINT_32             compBit[4];
+    AddrNumberType      numType[4];
+    UINT_32             compStart[4];
+    AddrElemMode        elemMode;
+    UINT_32             comps;          ///< Number of components
+};
+
+/**
+***************************************************************************************************
+* @brief This class contains asic indepentent element related attributes and operations
+***************************************************************************************************
+*/
+class AddrElemLib : public AddrObject
+{
+protected:
+    AddrElemLib(AddrLib* const pAddrLib);
+
+public:
+
+    /// Makes this class virtual
+    virtual ~AddrElemLib();
+
+    static AddrElemLib *Create(
+        const AddrLib* const pAddrLib);
+
+    /// The implementation is only for R6xx/R7xx, so make it virtual in case we need for R8xx
+    BOOL_32 PixGetExportNorm(
+        AddrColorFormat colorFmt,
+        AddrSurfaceNumber numberFmt, AddrSurfaceSwap swap) const;
+
+    /// Below method are asic independent, so make them just static.
+    /// Remove static if we need different operation in hwl.
+
+    VOID    Flt32ToDepthPixel(
+        AddrDepthFormat format, const ADDR_FLT_32 comps[2], UINT_8 *pPixel) const;
+
+    VOID    Flt32ToColorPixel(
+        AddrColorFormat format, AddrSurfaceNumber surfNum, AddrSurfaceSwap surfSwap,
+        const ADDR_FLT_32 comps[4], UINT_8 *pPixel) const;
+
+    static VOID    Flt32sToInt32s(
+        ADDR_FLT_32 value, UINT_32 bits, AddrNumberType numberType, UINT_32* pResult);
+
+    static VOID    Int32sToPixel(
+        UINT_32 numComps, UINT_32* pComps, UINT_32* pCompBits, UINT_32* pCompStart,
+        ADDR_COMPONENT_FLAGS properties, UINT_32 resultBits, UINT_8* pPixel);
+
+    VOID    PixGetColorCompInfo(
+        AddrColorFormat format, AddrSurfaceNumber number, AddrSurfaceSwap swap,
+        ADDR_PIXEL_FORMATINFO* pInfo) const;
+
+    VOID    PixGetDepthCompInfo(
+        AddrDepthFormat format, ADDR_PIXEL_FORMATINFO* pInfo) const;
+
+    UINT_32 GetBitsPerPixel(
+        AddrFormat format, AddrElemMode* pElemMode,
+        UINT_32* pExpandX = NULL, UINT_32* pExpandY = NULL, UINT_32* pBitsUnused = NULL);
+
+    static VOID    SetClearComps(
+        ADDR_FLT_32 comps[4], BOOL_32 clearColor, BOOL_32 float32);
+
+    VOID    AdjustSurfaceInfo(
+        AddrElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
+        UINT_32* pBpp, UINT_32* pBasePitch, UINT_32* pWidth, UINT_32* pHeight);
+
+    VOID    RestoreSurfaceInfo(
+        AddrElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
+        UINT_32* pBpp, UINT_32* pWidth, UINT_32* pHeight);
+
+    /// Checks if depth and stencil are planar inside a tile
+    BOOL_32 IsDepthStencilTilePlanar()
+    {
+        return (m_depthPlanarType == ADDR_DEPTH_PLANAR_R600) ? TRUE : FALSE;
+    }
+
+    /// Sets m_configFlags, copied from AddrLib
+    VOID    SetConfigFlags(ADDR_CONFIG_FLAGS flags)
+    {
+        m_configFlags = flags;
+    }
+
+    static BOOL_32 IsCompressed(AddrFormat format);
+    static BOOL_32 IsBlockCompressed(AddrFormat format);
+    static BOOL_32 IsExpand3x(AddrFormat format);
+
+protected:
+
+    static VOID    GetCompBits(
+        UINT_32 c0, UINT_32 c1, UINT_32 c2, UINT_32 c3,
+        ADDR_PIXEL_FORMATINFO* pInfo,
+        AddrElemMode elemMode = ADDR_ROUND_BY_HALF);
+
+    static VOID    GetCompType(
+        AddrColorFormat format, AddrSurfaceNumber numType,
+        ADDR_PIXEL_FORMATINFO* pInfo);
+
+    static VOID    GetCompSwap(
+        AddrSurfaceSwap swap, ADDR_PIXEL_FORMATINFO* pInfo);
+
+    static VOID    SwapComps(
+        UINT_32 c0, UINT_32 c1, ADDR_PIXEL_FORMATINFO* pInfo);
+
+private:
+
+    UINT_32             m_fp16ExportNorm;   ///< If allow FP16 to be reported as EXPORT_NORM
+    AddrDepthPlanarType m_depthPlanarType;
+
+    ADDR_CONFIG_FLAGS   m_configFlags;      ///< Copy of AddrLib's configFlags
+    AddrLib* const      m_pAddrLib;         ///< Pointer to parent addrlib instance
+};
+
+#endif
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,4023 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  addrlib.cpp
+* @brief Contains the implementation for the AddrLib base class..
+***************************************************************************************************
+*/
+
+#include "addrinterface.h"
+#include "addrlib.h"
+#include "addrcommon.h"
+
+#if defined(__APPLE__)
+
+UINT_32 div64_32(UINT_64 n, UINT_32 base)
+{
+    UINT_64 rem = n;
+    UINT_64 b = base;
+    UINT_64 res, d = 1;
+    UINT_32 high = rem >> 32;
+
+    res = 0;
+    if (high >= base)
+    {
+        high /= base;
+        res = (UINT_64) high << 32;
+        rem -= (UINT_64) (high*base) << 32;
+    }
+
+    while ((INT_64)b > 0 && b < rem)
+    {
+        b = b+b;
+        d = d+d;
+    }
+
+    do
+    {
+        if (rem >= b)
+        {
+            rem -= b;
+            res += d;
+        }
+        b >>= 1;
+        d >>= 1;
+    } while (d);
+
+    n = res;
+    return rem;
+}
+
+extern "C"
+UINT_32 __umoddi3(UINT_64 n, UINT_32 base)
+{
+    return div64_32(n, base);
+}
+
+#endif // __APPLE__
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Static Const Member
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+const AddrTileModeFlags AddrLib::m_modeFlags[ADDR_TM_COUNT] =
+{// T   L  1  2  3  P  Pr B
+    {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_GENERAL
+    {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_ALIGNED
+    {1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THIN1
+    {4, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THICK
+    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN1
+    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN2
+    {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN4
+    {4, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THICK
+    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN1
+    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN2
+    {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN4
+    {4, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THICK
+    {1, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THIN1
+    {4, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THICK
+    {1, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THIN1
+    {4, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THICK
+    {8, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_XTHICK
+    {8, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_XTHICK
+    {1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_POWER_SAVE
+    {1, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THIN1
+    {1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THIN1
+    {1, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THIN1
+    {4, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THICK
+    {4, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THICK
+    {4, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THICK
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Constructor/Destructor
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrLib::AddrLib
+*
+*   @brief
+*       Constructor for the AddrLib class
+*
+***************************************************************************************************
+*/
+AddrLib::AddrLib() :
+    m_class(BASE_ADDRLIB),
+    m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
+    m_chipRevision(0),
+    m_version(ADDRLIB_VERSION),
+    m_pipes(0),
+    m_banks(0),
+    m_pipeInterleaveBytes(0),
+    m_rowSize(0),
+    m_minPitchAlignPixels(1),
+    m_maxSamples(8),
+    m_pElemLib(NULL)
+{
+    m_configFlags.value = 0;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::AddrLib
+*
+*   @brief
+*       Constructor for the AddrLib class with hClient as parameter
+*
+***************************************************************************************************
+*/
+AddrLib::AddrLib(const AddrClient* pClient) :
+    AddrObject(pClient),
+    m_class(BASE_ADDRLIB),
+    m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
+    m_chipRevision(0),
+    m_version(ADDRLIB_VERSION),
+    m_pipes(0),
+    m_banks(0),
+    m_pipeInterleaveBytes(0),
+    m_rowSize(0),
+    m_minPitchAlignPixels(1),
+    m_maxSamples(8),
+    m_pElemLib(NULL)
+{
+    m_configFlags.value = 0;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::~AddrLib
+*
+*   @brief
+*       Destructor for the AddrLib class
+*
+***************************************************************************************************
+*/
+AddrLib::~AddrLib()
+{
+    if (m_pElemLib)
+    {
+        delete m_pElemLib;
+    }
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Initialization/Helper
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrLib::Create
+*
+*   @brief
+*       Creates and initializes AddrLib object.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::Create(
+    const ADDR_CREATE_INPUT* pCreateIn,     ///< [in] pointer to ADDR_CREATE_INPUT
+    ADDR_CREATE_OUTPUT*      pCreateOut)    ///< [out] pointer to ADDR_CREATE_OUTPUT
+{
+    AddrLib* pLib = NULL;
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (pCreateIn->createFlags.fillSizeFields == TRUE)
+    {
+        if ((pCreateIn->size != sizeof(ADDR_CREATE_INPUT)) ||
+            (pCreateOut->size != sizeof(ADDR_CREATE_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if ((returnCode == ADDR_OK)                    &&
+        (pCreateIn->callbacks.allocSysMem != NULL) &&
+        (pCreateIn->callbacks.freeSysMem != NULL))
+    {
+        AddrClient client = {
+            pCreateIn->hClient,
+            pCreateIn->callbacks
+        };
+
+        switch (pCreateIn->chipEngine)
+        {
+            case CIASICIDGFXENGINE_SOUTHERNISLAND:
+                switch (pCreateIn->chipFamily)
+                {
+                    case FAMILY_SI:
+                        pLib = AddrSIHwlInit(&client);
+                        break;
+                    case FAMILY_VI:
+                    case FAMILY_CZ: // VI based fusion(carrizo)
+                    case FAMILY_CI:
+                    case FAMILY_KV: // CI based fusion
+                        pLib = AddrCIHwlInit(&client);
+                        break;
+                    default:
+                        ADDR_ASSERT_ALWAYS();
+                        break;
+                }
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+    }
+
+    if ((pLib != NULL))
+    {
+        BOOL_32 initValid;
+
+        // Pass createFlags to configFlags first since these flags may be overwritten
+        pLib->m_configFlags.noCubeMipSlicesPad  = pCreateIn->createFlags.noCubeMipSlicesPad;
+        pLib->m_configFlags.fillSizeFields      = pCreateIn->createFlags.fillSizeFields;
+        pLib->m_configFlags.useTileIndex        = pCreateIn->createFlags.useTileIndex;
+        pLib->m_configFlags.useCombinedSwizzle  = pCreateIn->createFlags.useCombinedSwizzle;
+        pLib->m_configFlags.checkLast2DLevel    = pCreateIn->createFlags.checkLast2DLevel;
+        pLib->m_configFlags.useHtileSliceAlign  = pCreateIn->createFlags.useHtileSliceAlign;
+        pLib->m_configFlags.degradeBaseLevel    = pCreateIn->createFlags.degradeBaseLevel;
+        pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile;
+
+        pLib->SetAddrChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision);
+
+        pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels);
+
+        // Global parameters initialized and remaining configFlags bits are set as well
+        initValid = pLib->HwlInitGlobalParams(pCreateIn);
+
+        if (initValid)
+        {
+            pLib->m_pElemLib = AddrElemLib::Create(pLib);
+        }
+        else
+        {
+            pLib->m_pElemLib = NULL; // Don't go on allocating element lib
+            returnCode = ADDR_INVALIDGBREGVALUES;
+        }
+
+        if (pLib->m_pElemLib == NULL)
+        {
+            delete pLib;
+            pLib = NULL;
+            ADDR_ASSERT_ALWAYS();
+        }
+        else
+        {
+            pLib->m_pElemLib->SetConfigFlags(pLib->m_configFlags);
+        }
+    }
+
+    pCreateOut->hLib = pLib;
+
+    if ((pLib == NULL) &&
+        (returnCode == ADDR_OK))
+    {
+        // Unknown failures, we return the general error code
+        returnCode = ADDR_ERROR;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::SetAddrChipFamily
+*
+*   @brief
+*       Convert familyID defined in atiid.h to AddrChipFamily and set m_chipFamily/m_chipRevision
+*   @return
+*      N/A
+***************************************************************************************************
+*/
+VOID AddrLib::SetAddrChipFamily(
+    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
+    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
+{
+    AddrChipFamily family = ADDR_CHIP_FAMILY_IVLD;
+
+    family = HwlConvertChipFamily(uChipFamily, uChipRevision);
+
+    ADDR_ASSERT(family != ADDR_CHIP_FAMILY_IVLD);
+
+    m_chipFamily    = family;
+    m_chipRevision  = uChipRevision;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::SetMinPitchAlignPixels
+*
+*   @brief
+*       Set m_minPitchAlignPixels with input param
+*
+*   @return
+*      N/A
+***************************************************************************************************
+*/
+VOID AddrLib::SetMinPitchAlignPixels(
+    UINT_32 minPitchAlignPixels)    ///< [in] minmum pitch alignment in pixels
+{
+    m_minPitchAlignPixels = (minPitchAlignPixels == 0)? 1 : minPitchAlignPixels;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::GetAddrLib
+*
+*   @brief
+*       Get AddrLib pointer
+*
+*   @return
+*      An AddrLib class pointer
+***************************************************************************************************
+*/
+AddrLib * AddrLib::GetAddrLib(
+    ADDR_HANDLE hLib)   ///< [in] handle of ADDR_HANDLE
+{
+    return static_cast<AddrLib *>(hLib);
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Surface Methods
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeSurfaceInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeSurfaceInfo.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeSurfaceInfo(
+     const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,    ///< [in] input structure
+     ADDR_COMPUTE_SURFACE_INFO_OUTPUT*      pOut    ///< [out] output structure
+     ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    // We suggest client do sanity check but a check here is also good
+    if (pIn->bpp > 128)
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    // Thick modes don't support multisample
+    if (ComputeSurfaceThickness(pIn->tileMode) > 1 && pIn->numSamples > 1)
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        // Get a local copy of input structure and only reference pIn for unadjusted values
+        ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
+        ADDR_TILEINFO tileInfoNull = {0};
+
+        if (UseTileInfo())
+        {
+            // If the original input has a valid ADDR_TILEINFO pointer then copy its contents.
+            // Otherwise the default 0's in tileInfoNull are used.
+            if (pIn->pTileInfo)
+            {
+                tileInfoNull = *pIn->pTileInfo;
+            }
+            localIn.pTileInfo  = &tileInfoNull;
+        }
+
+        localIn.numSamples = pIn->numSamples == 0 ? 1 : pIn->numSamples;
+
+        // Do mipmap check first
+        // If format is BCn, pre-pad dimension to power-of-two according to HWL
+        ComputeMipLevel(&localIn);
+
+        if (m_configFlags.checkLast2DLevel)
+        {
+            // Save this level's original height in pixels
+            pOut->height = pIn->height;
+        }
+
+        UINT_32 expandX = 1;
+        UINT_32 expandY = 1;
+        AddrElemMode elemMode;
+
+        // Save outputs that may not go through HWL
+        pOut->pixelBits = localIn.bpp;
+        pOut->numSamples = localIn.numSamples;
+        pOut->last2DLevel = FALSE;
+
+#if !ALT_TEST
+        if (localIn.numSamples > 1)
+        {
+            ADDR_ASSERT(localIn.mipLevel == 0);
+        }
+#endif
+
+        if (localIn.format != ADDR_FMT_INVALID) // Set format to INVALID will skip this conversion
+        {
+            // Get compression/expansion factors and element mode
+            // (which indicates compression/expansion
+            localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format,
+                                                        &elemMode,
+                                                        &expandX,
+                                                        &expandY);
+
+            // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is
+            // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear-
+            // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw
+            // restrictions are different.
+            // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround
+            // but we use this flag to skip RestoreSurfaceInfo below
+
+            if ((elemMode == ADDR_EXPANDED) &&
+                (expandX > 1))
+            {
+                ADDR_ASSERT(localIn.tileMode == ADDR_TM_LINEAR_ALIGNED || localIn.height == 1);
+            }
+
+            GetElemLib()->AdjustSurfaceInfo(elemMode,
+                                            expandX,
+                                            expandY,
+                                            &localIn.bpp,
+                                            &localIn.basePitch,
+                                            &localIn.width,
+                                            &localIn.height);
+
+            // Overwrite these parameters if we have a valid format
+        }
+        else if (localIn.bpp != 0)
+        {
+            localIn.width  = (localIn.width != 0) ? localIn.width : 1;
+            localIn.height = (localIn.height != 0) ? localIn.height : 1;
+        }
+        else // Rule out some invalid parameters
+        {
+            ADDR_ASSERT_ALWAYS();
+
+            returnCode = ADDR_INVALIDPARAMS;
+        }
+
+        // Check mipmap after surface expansion
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = PostComputeMipLevel(&localIn, pOut);
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            if (UseTileIndex(localIn.tileIndex))
+            {
+                // Make sure pTileInfo is not NULL
+                ADDR_ASSERT(localIn.pTileInfo);
+
+                UINT_32 numSamples = GetNumFragments(localIn.numSamples, localIn.numFrags);
+
+                INT_32 macroModeIndex = TileIndexNoMacroIndex;
+
+                if (localIn.tileIndex != TileIndexLinearGeneral)
+                {
+                    // Try finding a macroModeIndex
+                    macroModeIndex = HwlComputeMacroModeIndex(localIn.tileIndex,
+                                                              localIn.flags,
+                                                              localIn.bpp,
+                                                              numSamples,
+                                                              localIn.pTileInfo,
+                                                              &localIn.tileMode,
+                                                              &localIn.tileType);
+                }
+
+                // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
+                if (macroModeIndex == TileIndexNoMacroIndex)
+                {
+                    returnCode = HwlSetupTileCfg(localIn.tileIndex, macroModeIndex,
+                                                 localIn.pTileInfo,
+                                                 &localIn.tileMode, &localIn.tileType);
+                }
+                // If macroModeIndex is invalid, then assert this is not macro tiled
+                else if (macroModeIndex == TileIndexInvalid)
+                {
+                    ADDR_ASSERT(!IsMacroTiled(localIn.tileMode));
+                }
+            }
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            AddrTileMode tileMode = localIn.tileMode;
+            AddrTileType tileType = localIn.tileType;
+
+            // HWL layer may override tile mode if necessary
+            if (HwlOverrideTileMode(&localIn, &tileMode, &tileType))
+            {
+                localIn.tileMode = tileMode;
+                localIn.tileType = tileType;
+            }
+            // Degrade base level if applicable
+            if (DegradeBaseLevel(&localIn, &tileMode))
+            {
+                localIn.tileMode = tileMode;
+            }
+        }
+
+        // Call main function to compute surface info
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlComputeSurfaceInfo(&localIn, pOut);
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            // Since bpp might be changed we just pass it through
+            pOut->bpp  = localIn.bpp;
+
+            // Also original width/height/bpp
+            pOut->pixelPitch    = pOut->pitch;
+            pOut->pixelHeight   = pOut->height;
+
+#if DEBUG
+            if (localIn.flags.display)
+            {
+                ADDR_ASSERT((pOut->pitchAlign % 32) == 0);
+            }
+#endif //DEBUG
+
+            if (localIn.format != ADDR_FMT_INVALID)
+            {
+                //
+                // 96 bits surface of level 1+ requires element pitch of 32 bits instead
+                // In hwl function we skip multiplication of 3 then we should skip division of 3
+                // We keep pitch that represents 32 bit element instead of 96 bits since we
+                // will get an odd number if divided by 3.
+                //
+                if (!((expandX == 3) && (localIn.mipLevel > 0)))
+                {
+
+                    GetElemLib()->RestoreSurfaceInfo(elemMode,
+                                                     expandX,
+                                                     expandY,
+                                                     &localIn.bpp,
+                                                     &pOut->pixelPitch,
+                                                     &pOut->pixelHeight);
+                }
+            }
+
+            if (localIn.flags.qbStereo)
+            {
+                if (pOut->pStereoInfo)
+                {
+                    ComputeQbStereoInfo(pOut);
+                }
+            }
+
+            if (localIn.flags.volume) // For volume sliceSize equals to all z-slices
+            {
+                pOut->sliceSize = pOut->surfSize;
+            }
+            else // For array: sliceSize is likely to have slice-padding (the last one)
+            {
+                pOut->sliceSize = pOut->surfSize / pOut->depth;
+
+                // array or cubemap
+                if (pIn->numSlices > 1)
+                {
+                    // If this is the last slice then add the padding size to this slice
+                    if (pIn->slice == (pIn->numSlices - 1))
+                    {
+                        pOut->sliceSize += pOut->sliceSize * (pOut->depth - pIn->numSlices);
+                    }
+                    else if (m_configFlags.checkLast2DLevel)
+                    {
+                        // Reset last2DLevel flag if this is not the last array slice
+                        pOut->last2DLevel = FALSE;
+                    }
+                }
+            }
+
+            pOut->pitchTileMax = pOut->pitch / 8 - 1;
+            pOut->heightTileMax = pOut->height / 8 - 1;
+            pOut->sliceTileMax = pOut->pitch * pOut->height / 64 - 1;
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeSurfaceInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeSurfaceInfo.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeSurfaceAddrFromCoord(
+    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            const ADDR_SURFACE_FLAGS flags = {{0}};
+            UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags);
+
+            // Try finding a macroModeIndex
+            INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex,
+                                                             flags,
+                                                             input.bpp,
+                                                             numSamples,
+                                                             input.pTileInfo,
+                                                             &input.tileMode,
+                                                             &input.tileType);
+
+            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
+            if (macroModeIndex == TileIndexNoMacroIndex)
+            {
+                returnCode = HwlSetupTileCfg(input.tileIndex, macroModeIndex,
+                                             input.pTileInfo, &input.tileMode, &input.tileType);
+            }
+            // If macroModeIndex is invalid, then assert this is not macro tiled
+            else if (macroModeIndex == TileIndexInvalid)
+            {
+                ADDR_ASSERT(!IsMacroTiled(input.tileMode));
+            }
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlComputeSurfaceAddrFromCoord(pIn, pOut);
+
+            if (returnCode == ADDR_OK)
+            {
+                pOut->prtBlockIndex = static_cast<UINT_32>(pOut->addr / (64 * 1024));
+            }
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeSurfaceCoordFromAddr
+*
+*   @brief
+*       Interface function stub of ComputeSurfaceCoordFromAddr.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeSurfaceCoordFromAddr(
+    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            const ADDR_SURFACE_FLAGS flags = {{0}};
+            UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags);
+
+            // Try finding a macroModeIndex
+            INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex,
+                                                             flags,
+                                                             input.bpp,
+                                                             numSamples,
+                                                             input.pTileInfo,
+                                                             &input.tileMode,
+                                                             &input.tileType);
+
+            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
+            if (macroModeIndex == TileIndexNoMacroIndex)
+            {
+                returnCode = HwlSetupTileCfg(input.tileIndex, macroModeIndex,
+                                             input.pTileInfo, &input.tileMode, &input.tileType);
+            }
+            // If macroModeIndex is invalid, then assert this is not macro tiled
+            else if (macroModeIndex == TileIndexInvalid)
+            {
+                ADDR_ASSERT(!IsMacroTiled(input.tileMode));
+            }
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlComputeSurfaceCoordFromAddr(pIn, pOut);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeSliceTileSwizzle
+*
+*   @brief
+*       Interface function stub of ComputeSliceTileSwizzle.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeSliceTileSwizzle(
+    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_SLICESWIZZLE_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex,
+                                         input.pTileInfo, &input.tileMode);
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlComputeSliceTileSwizzle(pIn, pOut);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ExtractBankPipeSwizzle
+*
+*   @brief
+*       Interface function stub of AddrExtractBankPipeSwizzle.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ExtractBankPipeSwizzle(
+    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,    ///< [in] input structure
+    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT)) ||
+            (pOut->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlExtractBankPipeSwizzle(pIn, pOut);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::CombineBankPipeSwizzle
+*
+*   @brief
+*       Interface function stub of AddrCombineBankPipeSwizzle.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::CombineBankPipeSwizzle(
+    const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlCombineBankPipeSwizzle(pIn->bankSwizzle,
+                                                   pIn->pipeSwizzle,
+                                                   pIn->pTileInfo,
+                                                   pIn->baseAddr,
+                                                   &pOut->tileSwizzle);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeBaseSwizzle
+*
+*   @brief
+*       Interface function stub of AddrCompueBaseSwizzle.
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeBaseSwizzle(
+    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
+    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_BASE_SWIZZLE_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            if (IsMacroTiled(pIn->tileMode))
+            {
+                returnCode = HwlComputeBaseSwizzle(pIn, pOut);
+            }
+            else
+            {
+                pOut->tileSwizzle = 0;
+            }
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeFmaskInfo
+*
+*   @brief
+*       Interface function stub of ComputeFmaskInfo.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeFmaskInfo(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
+    )
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    // No thick MSAA
+    if (ComputeSurfaceThickness(pIn->tileMode) > 1)
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_FMASK_INFO_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+
+            if (pOut->pTileInfo)
+            {
+                // Use temp tile info for calcalation
+                input.pTileInfo = pOut->pTileInfo;
+            }
+            else
+            {
+                input.pTileInfo = &tileInfoNull;
+            }
+
+            ADDR_SURFACE_FLAGS flags = {{0}};
+            flags.fmask = 1;
+
+            // Try finding a macroModeIndex
+            INT_32 macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex,
+                                                             flags,
+                                                             HwlComputeFmaskBits(pIn, NULL),
+                                                             pIn->numSamples,
+                                                             input.pTileInfo,
+                                                             &input.tileMode);
+
+            // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
+            if (macroModeIndex == TileIndexNoMacroIndex)
+            {
+                returnCode = HwlSetupTileCfg(input.tileIndex, macroModeIndex,
+                                             input.pTileInfo, &input.tileMode);
+            }
+
+            ADDR_ASSERT(macroModeIndex != TileIndexInvalid);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            if (pIn->numSamples > 1)
+            {
+                returnCode = HwlComputeFmaskInfo(pIn, pOut);
+            }
+            else
+            {
+                memset(pOut, 0, sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT));
+
+                returnCode = ADDR_INVALIDPARAMS;
+            }
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeFmaskAddrFromCoord
+*
+*   @brief
+*       Interface function stub of ComputeFmaskAddrFromCoord.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeFmaskAddrFromCoord(
+    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_ASSERT(pIn->numSamples > 1);
+
+        if (pIn->numSamples > 1)
+        {
+            returnCode = HwlComputeFmaskAddrFromCoord(pIn, pOut);
+        }
+        else
+        {
+            returnCode = ADDR_INVALIDPARAMS;
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeFmaskCoordFromAddr
+*
+*   @brief
+*       Interface function stub of ComputeFmaskAddrFromCoord.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeFmaskCoordFromAddr(
+    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*  pIn,     ///< [in] input structure
+    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut           ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_ASSERT(pIn->numSamples > 1);
+
+        if (pIn->numSamples > 1)
+        {
+            returnCode = HwlComputeFmaskCoordFromAddr(pIn, pOut);
+        }
+        else
+        {
+            returnCode = ADDR_INVALIDPARAMS;
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ConvertTileInfoToHW
+*
+*   @brief
+*       Convert tile info from real value to HW register value in HW layer
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ConvertTileInfoToHW(
+    const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
+    ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut      ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_INPUT)) ||
+            (pOut->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_CONVERT_TILEINFOTOHW_INPUT input;
+        // if pIn->reverse is TRUE, indices are ignored
+        if (pIn->reverse == FALSE && UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = HwlConvertTileInfoToHW(pIn, pOut);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ConvertTileIndex
+*
+*   @brief
+*       Convert tile index to tile mode/type/info
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ConvertTileIndex(
+    const ADDR_CONVERT_TILEINDEX_INPUT* pIn, ///< [in] input structure
+    ADDR_CONVERT_TILEINDEX_OUTPUT* pOut      ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX_INPUT)) ||
+            (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+
+        returnCode = HwlSetupTileCfg(pIn->tileIndex, pIn->macroModeIndex,
+                                     pOut->pTileInfo, &pOut->tileMode, &pOut->tileType);
+
+        if (returnCode == ADDR_OK && pIn->tileInfoHw)
+        {
+            ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0};
+            ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0};
+
+            hwInput.pTileInfo = pOut->pTileInfo;
+            hwInput.tileIndex = -1;
+            hwOutput.pTileInfo = pOut->pTileInfo;
+
+            returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ConvertTileIndex1
+*
+*   @brief
+*       Convert tile index to tile mode/type/info
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ConvertTileIndex1(
+    const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,   ///< [in] input structure
+    ADDR_CONVERT_TILEINDEX_OUTPUT* pOut         ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX1_INPUT)) ||
+            (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_SURFACE_FLAGS flags = {{0}};
+
+        HwlComputeMacroModeIndex(pIn->tileIndex, flags, pIn->bpp, pIn->numSamples,
+                                 pOut->pTileInfo, &pOut->tileMode, &pOut->tileType);
+
+        if (pIn->tileInfoHw)
+        {
+            ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0};
+            ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0};
+
+            hwInput.pTileInfo = pOut->pTileInfo;
+            hwInput.tileIndex = -1;
+            hwOutput.pTileInfo = pOut->pTileInfo;
+
+            returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::GetTileIndex
+*
+*   @brief
+*       Get tile index from tile mode/type/info
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::GetTileIndex(
+    const ADDR_GET_TILEINDEX_INPUT* pIn, ///< [in] input structure
+    ADDR_GET_TILEINDEX_OUTPUT* pOut      ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_GET_TILEINDEX_INPUT)) ||
+            (pOut->size != sizeof(ADDR_GET_TILEINDEX_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        returnCode = HwlGetTileIndex(pIn, pOut);
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeSurfaceThickness
+*
+*   @brief
+*       Compute surface thickness
+*
+*   @return
+*       Surface thickness
+***************************************************************************************************
+*/
+UINT_32 AddrLib::ComputeSurfaceThickness(
+    AddrTileMode tileMode)    ///< [in] tile mode
+{
+    return m_modeFlags[tileMode].thickness;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                               CMASK/HTILE
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeHtileInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeHtilenfo
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeHtileInfo(
+    const ADDR_COMPUTE_HTILE_INFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR_COMPUTE_HTILE_INFO_OUTPUT*         pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
+    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_HTILE_INFO_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            pOut->bpp = ComputeHtileInfo(pIn->flags,
+                                         pIn->pitch,
+                                         pIn->height,
+                                         pIn->numSlices,
+                                         pIn->isLinear,
+                                         isWidth8,
+                                         isHeight8,
+                                         pIn->pTileInfo,
+                                         &pOut->pitch,
+                                         &pOut->height,
+                                         &pOut->htileBytes,
+                                         &pOut->macroWidth,
+                                         &pOut->macroHeight,
+                                         &pOut->sliceSize,
+                                         &pOut->baseAlign);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeCmaskInfo
+*
+*   @brief
+*       Interface function stub of AddrComputeCmaskInfo
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeCmaskInfo(
+    const ADDR_COMPUTE_CMASK_INFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR_COMPUTE_CMASK_INFO_OUTPUT*         pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_INFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_INFO_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_CMASK_INFO_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            returnCode = ComputeCmaskInfo(pIn->flags,
+                                          pIn->pitch,
+                                          pIn->height,
+                                          pIn->numSlices,
+                                          pIn->isLinear,
+                                          pIn->pTileInfo,
+                                          &pOut->pitch,
+                                          &pOut->height,
+                                          &pOut->cmaskBytes,
+                                          &pOut->macroWidth,
+                                          &pOut->macroHeight,
+                                          &pOut->sliceSize,
+                                          &pOut->baseAlign,
+                                          &pOut->blockMax);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeDccInfo
+*
+*   @brief
+*       Interface function to compute DCC key info
+*
+*   @return
+*       return code of HwlComputeDccInfo
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeDccInfo(
+    const ADDR_COMPUTE_DCCINFO_INPUT*    pIn,    ///< [in] input structure
+    ADDR_COMPUTE_DCCINFO_OUTPUT*         pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE ret = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_DCCINFO_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT)))
+        {
+            ret = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (ret == ADDR_OK)
+    {
+        ADDR_COMPUTE_DCCINFO_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+
+            ret = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex,
+                                  &input.tileInfo, &input.tileMode);
+
+            pIn = &input;
+        }
+
+        if (ADDR_OK == ret)
+        {
+            ret = HwlComputeDccInfo(pIn, pOut);
+        }
+    }
+
+    return ret;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeHtileAddrFromCoord
+*
+*   @brief
+*       Interface function stub of AddrComputeHtileAddrFromCoord
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeHtileAddrFromCoord(
+    const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
+    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch,
+                                                      pIn->height,
+                                                      pIn->x,
+                                                      pIn->y,
+                                                      pIn->slice,
+                                                      pIn->numSlices,
+                                                      1,
+                                                      pIn->isLinear,
+                                                      isWidth8,
+                                                      isHeight8,
+                                                      pIn->pTileInfo,
+                                                      &pOut->bitPosition);
+        }
+    }
+
+    return returnCode;
+
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeHtileCoordFromAddr
+*
+*   @brief
+*       Interface function stub of AddrComputeHtileCoordFromAddr
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeHtileCoordFromAddr(
+    const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    BOOL_32 isWidth8  = (pIn->blockWidth == 8) ? TRUE : FALSE;
+    BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            HwlComputeXmaskCoordFromAddr(pIn->addr,
+                                         pIn->bitPosition,
+                                         pIn->pitch,
+                                         pIn->height,
+                                         pIn->numSlices,
+                                         1,
+                                         pIn->isLinear,
+                                         isWidth8,
+                                         isHeight8,
+                                         pIn->pTileInfo,
+                                         &pOut->x,
+                                         &pOut->y,
+                                         &pOut->slice);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeCmaskAddrFromCoord
+*
+*   @brief
+*       Interface function stub of AddrComputeCmaskAddrFromCoord
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeCmaskAddrFromCoord(
+    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            if (pIn->flags.tcCompatible == TRUE)
+            {
+                returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut);
+            }
+            else
+            {
+                pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch,
+                                                          pIn->height,
+                                                          pIn->x,
+                                                          pIn->y,
+                                                          pIn->slice,
+                                                          pIn->numSlices,
+                                                          2,
+                                                          pIn->isLinear,
+                                                          FALSE, //this is cmask, isWidth8 is not needed
+                                                          FALSE, //this is cmask, isHeight8 is not needed
+                                                          pIn->pTileInfo,
+                                                          &pOut->bitPosition);
+            }
+
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeCmaskCoordFromAddr
+*
+*   @brief
+*       Interface function stub of AddrComputeCmaskCoordFromAddr
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeCmaskCoordFromAddr(
+    const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT)) ||
+            (pOut->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        ADDR_TILEINFO tileInfoNull;
+        ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT input;
+
+        if (UseTileIndex(pIn->tileIndex))
+        {
+            input = *pIn;
+            // Use temp tile info for calcalation
+            input.pTileInfo = &tileInfoNull;
+
+            returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+            // Change the input structure
+            pIn = &input;
+        }
+
+        if (returnCode == ADDR_OK)
+        {
+            HwlComputeXmaskCoordFromAddr(pIn->addr,
+                                         pIn->bitPosition,
+                                         pIn->pitch,
+                                         pIn->height,
+                                         pIn->numSlices,
+                                         2,
+                                         pIn->isLinear,
+                                         FALSE,
+                                         FALSE,
+                                         pIn->pTileInfo,
+                                         &pOut->x,
+                                         &pOut->y,
+                                         &pOut->slice);
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeTileDataWidthAndHeight
+*
+*   @brief
+*       Compute the squared cache shape for per-tile data (CMASK and HTILE)
+*
+*   @return
+*       N/A
+*
+*   @note
+*       MacroWidth and macroHeight are measured in pixels
+***************************************************************************************************
+*/
+VOID AddrLib::ComputeTileDataWidthAndHeight(
+    UINT_32         bpp,             ///< [in] bits per pixel
+    UINT_32         cacheBits,       ///< [in] bits of cache
+    ADDR_TILEINFO*  pTileInfo,       ///< [in] Tile info
+    UINT_32*        pMacroWidth,     ///< [out] macro tile width
+    UINT_32*        pMacroHeight     ///< [out] macro tile height
+    ) const
+{
+    UINT_32 height = 1;
+    UINT_32 width  = cacheBits / bpp;
+    UINT_32 pipes  = HwlGetPipes(pTileInfo);
+
+    // Double height until the macro-tile is close to square
+    // Height can only be doubled if width is even
+
+    while ((width > height * 2 * pipes) && !(width & 1))
+    {
+        width  /= 2;
+        height *= 2;
+    }
+
+    *pMacroWidth  = 8 * width;
+    *pMacroHeight = 8 * height * pipes;
+
+    // Note: The above iterative comptuation is equivalent to the following
+    //
+    //int log2_height = ((log2(cacheBits)-log2(bpp)-log2(pipes))/2);
+    //int macroHeight = pow2( 3+log2(pipes)+log2_height );
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::HwlComputeTileDataWidthAndHeightLinear
+*
+*   @brief
+*       Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
+*
+*   @return
+*       N/A
+*
+*   @note
+*       MacroWidth and macroHeight are measured in pixels
+***************************************************************************************************
+*/
+VOID AddrLib::HwlComputeTileDataWidthAndHeightLinear(
+    UINT_32*        pMacroWidth,     ///< [out] macro tile width
+    UINT_32*        pMacroHeight,    ///< [out] macro tile height
+    UINT_32         bpp,             ///< [in] bits per pixel
+    ADDR_TILEINFO*  pTileInfo        ///< [in] tile info
+    ) const
+{
+    ADDR_ASSERT(bpp != 4);              // Cmask does not support linear layout prior to SI
+    *pMacroWidth  = 8 * 512 / bpp;      // Align width to 512-bit memory accesses
+    *pMacroHeight = 8 * m_pipes;        // Align height to number of pipes
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeHtileInfo
+*
+*   @brief
+*       Compute htile pitch,width, bytes per 2D slice
+*
+*   @return
+*       Htile bpp i.e. How many bits for an 8x8 tile
+*       Also returns by output parameters:
+*       *Htile pitch, height, total size in bytes, macro-tile dimensions and slice size*
+***************************************************************************************************
+*/
+UINT_32 AddrLib::ComputeHtileInfo(
+    ADDR_HTILE_FLAGS flags,             ///< [in] htile flags
+    UINT_32          pitchIn,           ///< [in] pitch input
+    UINT_32          heightIn,          ///< [in] height input
+    UINT_32          numSlices,         ///< [in] number of slices
+    BOOL_32          isLinear,          ///< [in] if it is linear mode
+    BOOL_32          isWidth8,          ///< [in] if htile block width is 8
+    BOOL_32          isHeight8,         ///< [in] if htile block height is 8
+    ADDR_TILEINFO*   pTileInfo,         ///< [in] Tile info
+    UINT_32*         pPitchOut,         ///< [out] pitch output
+    UINT_32*         pHeightOut,        ///< [out] height output
+    UINT_64*         pHtileBytes,       ///< [out] bytes per 2D slice
+    UINT_32*         pMacroWidth,       ///< [out] macro-tile width in pixels
+    UINT_32*         pMacroHeight,      ///< [out] macro-tile width in pixels
+    UINT_64*         pSliceSize,        ///< [out] slice size in bytes
+    UINT_32*         pBaseAlign         ///< [out] base alignment
+    ) const
+{
+
+    UINT_32 macroWidth;
+    UINT_32 macroHeight;
+    UINT_32 baseAlign;
+    UINT_64 surfBytes;
+    UINT_64 sliceBytes;
+
+    numSlices = Max(1u, numSlices);
+
+    const UINT_32 bpp = HwlComputeHtileBpp(isWidth8, isHeight8);
+    const UINT_32 cacheBits = HtileCacheBits;
+
+    if (isLinear)
+    {
+        HwlComputeTileDataWidthAndHeightLinear(&macroWidth,
+                                               &macroHeight,
+                                               bpp,
+                                               pTileInfo);
+    }
+    else
+    {
+        ComputeTileDataWidthAndHeight(bpp,
+                                      cacheBits,
+                                      pTileInfo,
+                                      &macroWidth,
+                                      &macroHeight);
+    }
+
+    *pPitchOut = PowTwoAlign(pitchIn,  macroWidth);
+    *pHeightOut = PowTwoAlign(heightIn,  macroHeight);
+
+    baseAlign = HwlComputeHtileBaseAlign(flags.tcCompatible, isLinear, pTileInfo);
+
+    surfBytes = HwlComputeHtileBytes(*pPitchOut,
+                                     *pHeightOut,
+                                     bpp,
+                                     isLinear,
+                                     numSlices,
+                                     &sliceBytes,
+                                     baseAlign);
+
+    *pHtileBytes = surfBytes;
+
+    //
+    // Use SafeAssign since they are optional
+    //
+    SafeAssign(pMacroWidth, macroWidth);
+
+    SafeAssign(pMacroHeight, macroHeight);
+
+    SafeAssign(pSliceSize,  sliceBytes);
+
+    SafeAssign(pBaseAlign, baseAlign);
+
+    return bpp;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeCmaskBaseAlign
+*
+*   @brief
+*       Compute cmask base alignment
+*
+*   @return
+*       Cmask base alignment
+***************************************************************************************************
+*/
+UINT_32 AddrLib::ComputeCmaskBaseAlign(
+    ADDR_CMASK_FLAGS flags,           ///< [in] Cmask flags
+    ADDR_TILEINFO*   pTileInfo        ///< [in] Tile info
+    ) const
+{
+    UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo);
+
+    if (flags.tcCompatible)
+    {
+        ADDR_ASSERT(pTileInfo != NULL);
+        if (pTileInfo)
+        {
+            baseAlign *= pTileInfo->banks;
+        }
+    }
+
+    return baseAlign;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeCmaskBytes
+*
+*   @brief
+*       Compute cmask size in bytes
+*
+*   @return
+*       Cmask size in bytes
+***************************************************************************************************
+*/
+UINT_64 AddrLib::ComputeCmaskBytes(
+    UINT_32 pitch,        ///< [in] pitch
+    UINT_32 height,       ///< [in] height
+    UINT_32 numSlices     ///< [in] number of slices
+    ) const
+{
+    return BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * numSlices * CmaskElemBits) /
+        MicroTilePixels;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeCmaskInfo
+*
+*   @brief
+*       Compute cmask pitch,width, bytes per 2D slice
+*
+*   @return
+*       BlockMax. Also by output parameters: Cmask pitch,height, total size in bytes,
+*       macro-tile dimensions
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeCmaskInfo(
+    ADDR_CMASK_FLAGS flags,            ///< [in] cmask flags
+    UINT_32          pitchIn,           ///< [in] pitch input
+    UINT_32          heightIn,          ///< [in] height input
+    UINT_32          numSlices,         ///< [in] number of slices
+    BOOL_32          isLinear,          ///< [in] is linear mode
+    ADDR_TILEINFO*   pTileInfo,         ///< [in] Tile info
+    UINT_32*         pPitchOut,         ///< [out] pitch output
+    UINT_32*         pHeightOut,        ///< [out] height output
+    UINT_64*         pCmaskBytes,       ///< [out] bytes per 2D slice
+    UINT_32*         pMacroWidth,       ///< [out] macro-tile width in pixels
+    UINT_32*         pMacroHeight,      ///< [out] macro-tile width in pixels
+    UINT_64*         pSliceSize,        ///< [out] slice size in bytes
+    UINT_32*         pBaseAlign,        ///< [out] base alignment
+    UINT_32*         pBlockMax          ///< [out] block max == slice / 128 / 128 - 1
+    ) const
+{
+    UINT_32 macroWidth;
+    UINT_32 macroHeight;
+    UINT_32 baseAlign;
+    UINT_64 surfBytes;
+    UINT_64 sliceBytes;
+
+    numSlices = Max(1u, numSlices);
+
+    const UINT_32 bpp = CmaskElemBits;
+    const UINT_32 cacheBits = CmaskCacheBits;
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (isLinear)
+    {
+        HwlComputeTileDataWidthAndHeightLinear(&macroWidth,
+                                               &macroHeight,
+                                               bpp,
+                                               pTileInfo);
+    }
+    else
+    {
+        ComputeTileDataWidthAndHeight(bpp,
+                                      cacheBits,
+                                      pTileInfo,
+                                      &macroWidth,
+                                      &macroHeight);
+    }
+
+    *pPitchOut = (pitchIn + macroWidth - 1) & ~(macroWidth - 1);
+    *pHeightOut = (heightIn + macroHeight - 1) & ~(macroHeight - 1);
+
+
+    sliceBytes = ComputeCmaskBytes(*pPitchOut,
+                                   *pHeightOut,
+                                   1);
+
+    baseAlign = ComputeCmaskBaseAlign(flags, pTileInfo);
+
+    while (sliceBytes % baseAlign)
+    {
+        *pHeightOut += macroHeight;
+
+        sliceBytes = ComputeCmaskBytes(*pPitchOut,
+                                       *pHeightOut,
+                                       1);
+    }
+
+    surfBytes = sliceBytes * numSlices;
+
+    *pCmaskBytes = surfBytes;
+
+    //
+    // Use SafeAssign since they are optional
+    //
+    SafeAssign(pMacroWidth, macroWidth);
+
+    SafeAssign(pMacroHeight, macroHeight);
+
+    SafeAssign(pBaseAlign, baseAlign);
+
+    SafeAssign(pSliceSize, sliceBytes);
+
+    UINT_32 slice = (*pPitchOut) * (*pHeightOut);
+    UINT_32 blockMax = slice / 128 / 128 - 1;
+
+#if DEBUG
+    if (slice % (64*256) != 0)
+    {
+        ADDR_ASSERT_ALWAYS();
+    }
+#endif //DEBUG
+
+    UINT_32 maxBlockMax = HwlGetMaxCmaskBlockMax();
+
+    if (blockMax > maxBlockMax)
+    {
+        blockMax = maxBlockMax;
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    SafeAssign(pBlockMax, blockMax);
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeXmaskCoordYFromPipe
+*
+*   @brief
+*       Compute the Y coord from pipe number for cmask/htile
+*
+*   @return
+*       Y coordinate
+*
+***************************************************************************************************
+*/
+UINT_32 AddrLib::ComputeXmaskCoordYFromPipe(
+    UINT_32         pipe,       ///< [in] pipe number
+    UINT_32         x           ///< [in] x coordinate
+    ) const
+{
+    UINT_32 pipeBit0;
+    UINT_32 pipeBit1;
+    UINT_32 xBit0;
+    UINT_32 xBit1;
+    UINT_32 yBit0;
+    UINT_32 yBit1;
+
+    UINT_32 y = 0;
+
+    UINT_32 numPipes = m_pipes; // SI has its implementation
+    //
+    // Convert pipe + x to y coordinate.
+    //
+    switch (numPipes)
+    {
+        case 1:
+            //
+            // 1 pipe
+            //
+            // p0 = 0
+            //
+            y = 0;
+            break;
+        case 2:
+            //
+            // 2 pipes
+            //
+            // p0 = x0 ^ y0
+            //
+            // y0 = p0 ^ x0
+            //
+            pipeBit0 = pipe & 0x1;
+
+            xBit0 = x & 0x1;
+
+            yBit0 = pipeBit0 ^ xBit0;
+
+            y = yBit0;
+            break;
+        case 4:
+            //
+            // 4 pipes
+            //
+            // p0 = x1 ^ y0
+            // p1 = x0 ^ y1
+            //
+            // y0 = p0 ^ x1
+            // y1 = p1 ^ x0
+            //
+            pipeBit0 =  pipe & 0x1;
+            pipeBit1 = (pipe & 0x2) >> 1;
+
+            xBit0 =  x & 0x1;
+            xBit1 = (x & 0x2) >> 1;
+
+            yBit0 = pipeBit0 ^ xBit1;
+            yBit1 = pipeBit1 ^ xBit0;
+
+            y = (yBit0 |
+                 (yBit1 << 1));
+            break;
+        case 8:
+            //
+            // 8 pipes
+            //
+            // r600 and r800 have different method
+            //
+            y = HwlComputeXmaskCoordYFrom8Pipe(pipe, x);
+            break;
+        default:
+            break;
+    }
+    return y;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::HwlComputeXmaskCoordFromAddr
+*
+*   @brief
+*       Compute the coord from an address of a cmask/htile
+*
+*   @return
+*       N/A
+*
+*   @note
+*       This method is reused by htile, so rename to Xmask
+***************************************************************************************************
+*/
+VOID AddrLib::HwlComputeXmaskCoordFromAddr(
+    UINT_64         addr,           ///< [in] address
+    UINT_32         bitPosition,    ///< [in] bitPosition in a byte
+    UINT_32         pitch,          ///< [in] pitch
+    UINT_32         height,         ///< [in] height
+    UINT_32         numSlices,      ///< [in] number of slices
+    UINT_32         factor,         ///< [in] factor that indicates cmask or htile
+    BOOL_32         isLinear,       ///< [in] linear or tiled HTILE layout
+    BOOL_32         isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+    BOOL_32         isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+    ADDR_TILEINFO*  pTileInfo,      ///< [in] Tile info
+    UINT_32*        pX,             ///< [out] x coord
+    UINT_32*        pY,             ///< [out] y coord
+    UINT_32*        pSlice          ///< [out] slice index
+    ) const
+{
+    UINT_32 pipe;
+    UINT_32 numPipes;
+    UINT_32 numPipeBits;
+    UINT_32 macroTilePitch;
+    UINT_32 macroTileHeight;
+
+    UINT_64 bitAddr;
+
+    UINT_32 microTileCoordY;
+
+    UINT_32 elemBits;
+
+    UINT_32 pitchAligned = pitch;
+    UINT_32 heightAligned = height;
+    UINT_64 totalBytes;
+
+    UINT_64 elemOffset;
+
+    UINT_64 macroIndex;
+    UINT_32 microIndex;
+
+    UINT_64 macroNumber;
+    UINT_32 microNumber;
+
+    UINT_32 macroX;
+    UINT_32 macroY;
+    UINT_32 macroZ;
+
+    UINT_32 microX;
+    UINT_32 microY;
+
+    UINT_32 tilesPerMacro;
+    UINT_32 macrosPerPitch;
+    UINT_32 macrosPerSlice;
+
+    //
+    // Extract pipe.
+    //
+    numPipes = HwlGetPipes(pTileInfo);
+    pipe = ComputePipeFromAddr(addr, numPipes);
+
+    //
+    // Compute the number of group and pipe bits.
+    //
+    numPipeBits  = Log2(numPipes);
+
+    UINT_32 groupBits = 8 * m_pipeInterleaveBytes;
+    UINT_32 pipes = numPipes;
+
+
+    //
+    // Compute the micro tile size, in bits. And macro tile pitch and height.
+    //
+    if (factor == 2) //CMASK
+    {
+        ADDR_CMASK_FLAGS flags = {{0}};
+
+        elemBits = CmaskElemBits;
+
+        ComputeCmaskInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         pTileInfo,
+                         &pitchAligned,
+                         &heightAligned,
+                         &totalBytes,
+                         &macroTilePitch,
+                         &macroTileHeight);
+    }
+    else  //HTILE
+    {
+        ADDR_HTILE_FLAGS flags = {{0}};
+
+        if (factor != 1)
+        {
+            factor = 1;
+        }
+
+        elemBits = HwlComputeHtileBpp(isWidth8, isHeight8);
+
+        ComputeHtileInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         isWidth8,
+                         isHeight8,
+                         pTileInfo,
+                         &pitchAligned,
+                         &heightAligned,
+                         &totalBytes,
+                         &macroTilePitch,
+                         &macroTileHeight);
+    }
+
+    // Should use aligned dims
+    //
+    pitch = pitchAligned;
+    height = heightAligned;
+
+
+    //
+    // Convert byte address to bit address.
+    //
+    bitAddr = BYTES_TO_BITS(addr) + bitPosition;
+
+
+    //
+    // Remove pipe bits from address.
+    //
+
+    bitAddr = (bitAddr % groupBits) + ((bitAddr/groupBits/pipes)*groupBits);
+
+
+    elemOffset = bitAddr / elemBits;
+
+    tilesPerMacro = (macroTilePitch/factor) * macroTileHeight / MicroTilePixels >> numPipeBits;
+
+    macrosPerPitch = pitch / (macroTilePitch/factor);
+    macrosPerSlice = macrosPerPitch * height / macroTileHeight;
+
+    macroIndex = elemOffset / factor / tilesPerMacro;
+    microIndex = static_cast<UINT_32>(elemOffset % (tilesPerMacro * factor));
+
+    macroNumber = macroIndex * factor + microIndex % factor;
+    microNumber = microIndex / factor;
+
+    macroX = static_cast<UINT_32>((macroNumber % macrosPerPitch));
+    macroY = static_cast<UINT_32>((macroNumber % macrosPerSlice) / macrosPerPitch);
+    macroZ = static_cast<UINT_32>((macroNumber / macrosPerSlice));
+
+
+    microX = microNumber % (macroTilePitch / factor / MicroTileWidth);
+    microY = (microNumber / (macroTilePitch / factor / MicroTileHeight));
+
+    *pX = macroX * (macroTilePitch/factor) + microX * MicroTileWidth;
+    *pY = macroY * macroTileHeight + (microY * MicroTileHeight << numPipeBits);
+    *pSlice = macroZ;
+
+    microTileCoordY = ComputeXmaskCoordYFromPipe(pipe,
+                                                 *pX/MicroTileWidth);
+
+
+    //
+    // Assemble final coordinates.
+    //
+    *pY += microTileCoordY * MicroTileHeight;
+
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::HwlComputeXmaskAddrFromCoord
+*
+*   @brief
+*       Compute the address from an address of cmask (prior to si)
+*
+*   @return
+*       Address in bytes
+*
+***************************************************************************************************
+*/
+UINT_64 AddrLib::HwlComputeXmaskAddrFromCoord(
+    UINT_32        pitch,          ///< [in] pitch
+    UINT_32        height,         ///< [in] height
+    UINT_32        x,              ///< [in] x coord
+    UINT_32        y,              ///< [in] y coord
+    UINT_32        slice,          ///< [in] slice/depth index
+    UINT_32        numSlices,      ///< [in] number of slices
+    UINT_32        factor,         ///< [in] factor that indicates cmask(2) or htile(1)
+    BOOL_32        isLinear,       ///< [in] linear or tiled HTILE layout
+    BOOL_32        isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+    BOOL_32        isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+    ADDR_TILEINFO* pTileInfo,      ///< [in] Tile info
+    UINT_32*       pBitPosition    ///< [out] bit position inside a byte
+    ) const
+{
+    UINT_64 addr;
+    UINT_32 numGroupBits;
+    UINT_32 numPipeBits;
+    UINT_32 newPitch = 0;
+    UINT_32 newHeight = 0;
+    UINT_64 sliceBytes = 0;
+    UINT_64 totalBytes = 0;
+    UINT_64 sliceOffset;
+    UINT_32 pipe;
+    UINT_32 macroTileWidth;
+    UINT_32 macroTileHeight;
+    UINT_32 macroTilesPerRow;
+    UINT_32 macroTileBytes;
+    UINT_32 macroTileIndexX;
+    UINT_32 macroTileIndexY;
+    UINT_64 macroTileOffset;
+    UINT_32 pixelBytesPerRow;
+    UINT_32 pixelOffsetX;
+    UINT_32 pixelOffsetY;
+    UINT_32 pixelOffset;
+    UINT_64 totalOffset;
+    UINT_64 offsetLo;
+    UINT_64 offsetHi;
+    UINT_64 groupMask;
+
+
+    UINT_32 elemBits = 0;
+
+    UINT_32 numPipes = m_pipes; // This function is accessed prior to si only
+
+    if (factor == 2) //CMASK
+    {
+        elemBits = CmaskElemBits;
+
+        // For asics before SI, cmask is always tiled
+        isLinear = FALSE;
+    }
+    else //HTILE
+    {
+        if (factor != 1) // Fix compile warning
+        {
+            factor = 1;
+        }
+
+        elemBits = HwlComputeHtileBpp(isWidth8, isHeight8);
+    }
+
+    //
+    // Compute the number of group bits and pipe bits.
+    //
+    numGroupBits = Log2(m_pipeInterleaveBytes);
+    numPipeBits  = Log2(numPipes);
+
+    //
+    // Compute macro tile dimensions.
+    //
+    if (factor == 2) // CMASK
+    {
+        ADDR_CMASK_FLAGS flags = {{0}};
+
+        ComputeCmaskInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         pTileInfo,
+                         &newPitch,
+                         &newHeight,
+                         &totalBytes,
+                         &macroTileWidth,
+                         &macroTileHeight);
+
+        sliceBytes = totalBytes / numSlices;
+    }
+    else // HTILE
+    {
+        ADDR_HTILE_FLAGS flags = {{0}};
+
+        ComputeHtileInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         isWidth8,
+                         isHeight8,
+                         pTileInfo,
+                         &newPitch,
+                         &newHeight,
+                         &totalBytes,
+                         &macroTileWidth,
+                         &macroTileHeight,
+                         &sliceBytes);
+    }
+
+    sliceOffset = slice * sliceBytes;
+
+    //
+    // Get the pipe.  Note that neither slice rotation nor pipe swizzling apply for CMASK.
+    //
+    pipe = ComputePipeFromCoord(x,
+                                y,
+                                0,
+                                ADDR_TM_2D_TILED_THIN1,
+                                0,
+                                FALSE,
+                                pTileInfo);
+
+    //
+    // Compute the number of macro tiles per row.
+    //
+    macroTilesPerRow = newPitch / macroTileWidth;
+
+    //
+    // Compute the number of bytes per macro tile.
+    //
+    macroTileBytes = BITS_TO_BYTES((macroTileWidth * macroTileHeight * elemBits) / MicroTilePixels);
+
+    //
+    // Compute the offset to the macro tile containing the specified coordinate.
+    //
+    macroTileIndexX = x / macroTileWidth;
+    macroTileIndexY = y / macroTileHeight;
+    macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes;
+
+    //
+    // Compute the pixel offset within the macro tile.
+    //
+    pixelBytesPerRow = BITS_TO_BYTES(macroTileWidth * elemBits) / MicroTileWidth;
+
+    //
+    // The nibbles are interleaved (see below), so the part of the offset relative to the x
+    // coordinate repeats halfway across the row. (Not for HTILE)
+    //
+    if (factor == 2)
+    {
+        pixelOffsetX = (x % (macroTileWidth / 2)) / MicroTileWidth;
+    }
+    else
+    {
+        pixelOffsetX = (x % (macroTileWidth)) / MicroTileWidth * BITS_TO_BYTES(elemBits);
+    }
+
+    //
+    // Compute the y offset within the macro tile.
+    //
+    pixelOffsetY = (((y % macroTileHeight) / MicroTileHeight) / numPipes) * pixelBytesPerRow;
+
+    pixelOffset = pixelOffsetX + pixelOffsetY;
+
+    //
+    // Combine the slice offset and macro tile offset with the pixel offset, accounting for the
+    // pipe bits in the middle of the address.
+    //
+    totalOffset = ((sliceOffset + macroTileOffset) >> numPipeBits) + pixelOffset;
+
+    //
+    // Split the offset to put some bits below the pipe bits and some above.
+    //
+    groupMask = (1 << numGroupBits) - 1;
+    offsetLo  = totalOffset &  groupMask;
+    offsetHi  = (totalOffset & ~groupMask) << numPipeBits;
+
+    //
+    // Assemble the address from its components.
+    //
+    addr  = offsetLo;
+    addr |= offsetHi;
+    // This is to remove warning with /analyze option
+    UINT_32 pipeBits = pipe << numGroupBits;
+    addr |= pipeBits;
+
+    //
+    // Compute the bit position.  The lower nibble is used when the x coordinate within the macro
+    // tile is less than half of the macro tile width, and the upper nibble is used when the x
+    // coordinate within the macro tile is greater than or equal to half the macro tile width.
+    //
+    *pBitPosition = ((x % macroTileWidth) < (macroTileWidth / factor)) ? 0 : 4;
+
+    return addr;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Surface Addressing Shared
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeSurfaceAddrFromCoordLinear
+*
+*   @brief
+*       Compute address from coord for linear surface
+*
+*   @return
+*       Address in bytes
+*
+***************************************************************************************************
+*/
+UINT_64 AddrLib::ComputeSurfaceAddrFromCoordLinear(
+    UINT_32  x,              ///< [in] x coord
+    UINT_32  y,              ///< [in] y coord
+    UINT_32  slice,          ///< [in] slice/depth index
+    UINT_32  sample,         ///< [in] sample index
+    UINT_32  bpp,            ///< [in] bits per pixel
+    UINT_32  pitch,          ///< [in] pitch
+    UINT_32  height,         ///< [in] height
+    UINT_32  numSlices,      ///< [in] number of slices
+    UINT_32* pBitPosition    ///< [out] bit position inside a byte
+    ) const
+{
+    const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height;
+
+    UINT_64 sliceOffset = (slice + sample * numSlices)* sliceSize;
+    UINT_64 rowOffset   = static_cast<UINT_64>(y) * pitch;
+    UINT_64 pixOffset   = x;
+
+    UINT_64 addr = (sliceOffset + rowOffset + pixOffset) * bpp;
+
+    *pBitPosition = static_cast<UINT_32>(addr % 8);
+    addr /= 8;
+
+    return addr;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeSurfaceCoordFromAddrLinear
+*
+*   @brief
+*       Compute the coord from an address of a linear surface
+*
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID AddrLib::ComputeSurfaceCoordFromAddrLinear(
+    UINT_64  addr,           ///< [in] address
+    UINT_32  bitPosition,    ///< [in] bitPosition in a byte
+    UINT_32  bpp,            ///< [in] bits per pixel
+    UINT_32  pitch,          ///< [in] pitch
+    UINT_32  height,         ///< [in] height
+    UINT_32  numSlices,      ///< [in] number of slices
+    UINT_32* pX,             ///< [out] x coord
+    UINT_32* pY,             ///< [out] y coord
+    UINT_32* pSlice,         ///< [out] slice/depth index
+    UINT_32* pSample         ///< [out] sample index
+    ) const
+{
+    const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height;
+    const UINT_64 linearOffset = (BYTES_TO_BITS(addr) + bitPosition) / bpp;
+
+    *pX = static_cast<UINT_32>((linearOffset % sliceSize) % pitch);
+    *pY = static_cast<UINT_32>((linearOffset % sliceSize) / pitch % height);
+    *pSlice  = static_cast<UINT_32>((linearOffset / sliceSize) % numSlices);
+    *pSample = static_cast<UINT_32>((linearOffset / sliceSize) / numSlices);
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeSurfaceCoordFromAddrMicroTiled
+*
+*   @brief
+*       Compute the coord from an address of a micro tiled surface
+*
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID AddrLib::ComputeSurfaceCoordFromAddrMicroTiled(
+    UINT_64         addr,               ///< [in] address
+    UINT_32         bitPosition,        ///< [in] bitPosition in a byte
+    UINT_32         bpp,                ///< [in] bits per pixel
+    UINT_32         pitch,              ///< [in] pitch
+    UINT_32         height,             ///< [in] height
+    UINT_32         numSamples,         ///< [in] number of samples
+    AddrTileMode    tileMode,           ///< [in] tile mode
+    UINT_32         tileBase,           ///< [in] base offset within a tile
+    UINT_32         compBits,           ///< [in] component bits actually needed(for planar surface)
+    UINT_32*        pX,                 ///< [out] x coord
+    UINT_32*        pY,                 ///< [out] y coord
+    UINT_32*        pSlice,             ///< [out] slice/depth index
+    UINT_32*        pSample,            ///< [out] sample index,
+    AddrTileType    microTileType,      ///< [in] micro tiling order
+    BOOL_32         isDepthSampleOrder  ///< [in] TRUE if in depth sample order
+    ) const
+{
+    UINT_64 bitAddr;
+    UINT_32 microTileThickness;
+    UINT_32 microTileBits;
+    UINT_64 sliceBits;
+    UINT_64 rowBits;
+    UINT_32 sliceIndex;
+    UINT_32 microTileCoordX;
+    UINT_32 microTileCoordY;
+    UINT_32 pixelOffset;
+    UINT_32 pixelCoordX = 0;
+    UINT_32 pixelCoordY = 0;
+    UINT_32 pixelCoordZ = 0;
+    UINT_32 pixelCoordS = 0;
+
+    //
+    // Convert byte address to bit address.
+    //
+    bitAddr = BYTES_TO_BITS(addr) + bitPosition;
+
+    //
+    // Compute the micro tile size, in bits.
+    //
+    switch (tileMode)
+    {
+        case ADDR_TM_1D_TILED_THICK:
+            microTileThickness = ThickTileThickness;
+            break;
+        default:
+            microTileThickness = 1;
+            break;
+    }
+
+    microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples;
+
+    //
+    // Compute number of bits per slice and number of bits per row of micro tiles.
+    //
+    sliceBits = static_cast<UINT_64>(pitch) * height * microTileThickness * bpp * numSamples;
+
+    rowBits   = (pitch / MicroTileWidth) * microTileBits;
+
+    //
+    // Extract the slice index.
+    //
+    sliceIndex = static_cast<UINT_32>(bitAddr / sliceBits);
+    bitAddr -= sliceIndex * sliceBits;
+
+    //
+    // Extract the y coordinate of the micro tile.
+    //
+    microTileCoordY = static_cast<UINT_32>(bitAddr / rowBits) * MicroTileHeight;
+    bitAddr -= (microTileCoordY / MicroTileHeight) * rowBits;
+
+    //
+    // Extract the x coordinate of the micro tile.
+    //
+    microTileCoordX = static_cast<UINT_32>(bitAddr / microTileBits) * MicroTileWidth;
+
+    //
+    // Compute the pixel offset within the micro tile.
+    //
+    pixelOffset = static_cast<UINT_32>(bitAddr % microTileBits);
+
+    //
+    // Extract pixel coordinates from the offset.
+    //
+    HwlComputePixelCoordFromOffset(pixelOffset,
+                                   bpp,
+                                   numSamples,
+                                   tileMode,
+                                   tileBase,
+                                   compBits,
+                                   &pixelCoordX,
+                                   &pixelCoordY,
+                                   &pixelCoordZ,
+                                   &pixelCoordS,
+                                   microTileType,
+                                   isDepthSampleOrder);
+
+    //
+    // Assemble final coordinates.
+    //
+    *pX     = microTileCoordX + pixelCoordX;
+    *pY     = microTileCoordY + pixelCoordY;
+    *pSlice = (sliceIndex * microTileThickness) + pixelCoordZ;
+    *pSample = pixelCoordS;
+
+    if (microTileThickness > 1)
+    {
+        *pSample = 0;
+    }
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputePipeFromAddr
+*
+*   @brief
+*       Compute the pipe number from an address
+*
+*   @return
+*       Pipe number
+*
+***************************************************************************************************
+*/
+UINT_32 AddrLib::ComputePipeFromAddr(
+    UINT_64 addr,        ///< [in] address
+    UINT_32 numPipes     ///< [in] number of banks
+    ) const
+{
+    UINT_32 pipe;
+
+    UINT_32 groupBytes = m_pipeInterleaveBytes; //just different terms
+
+    // R600
+    // The LSBs of the address are arranged as follows:
+    //   bank | pipe | group
+    //
+    // To get the pipe number, shift off the group bits and mask the pipe bits.
+    //
+
+    // R800
+    // The LSBs of the address are arranged as follows:
+    //   bank | bankInterleave | pipe | pipeInterleave
+    //
+    // To get the pipe number, shift off the pipe interleave bits and mask the pipe bits.
+    //
+
+    pipe = static_cast<UINT_32>(addr >> Log2(groupBytes)) & (numPipes - 1);
+
+    return pipe;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputePixelIndexWithinMicroTile
+*
+*   @brief
+*       Compute the pixel index inside a micro tile of surface
+*
+*   @return
+*       Pixel index
+*
+***************************************************************************************************
+*/
+UINT_32 AddrLib::ComputePixelIndexWithinMicroTile(
+    UINT_32         x,              ///< [in] x coord
+    UINT_32         y,              ///< [in] y coord
+    UINT_32         z,              ///< [in] slice/depth index
+    UINT_32         bpp,            ///< [in] bits per pixel
+    AddrTileMode    tileMode,       ///< [in] tile mode
+    AddrTileType    microTileType   ///< [in] pixel order in display/non-display mode
+    ) const
+{
+    UINT_32 pixelBit0 = 0;
+    UINT_32 pixelBit1 = 0;
+    UINT_32 pixelBit2 = 0;
+    UINT_32 pixelBit3 = 0;
+    UINT_32 pixelBit4 = 0;
+    UINT_32 pixelBit5 = 0;
+    UINT_32 pixelBit6 = 0;
+    UINT_32 pixelBit7 = 0;
+    UINT_32 pixelBit8 = 0;
+    UINT_32 pixelNumber;
+
+    UINT_32 x0 = _BIT(x, 0);
+    UINT_32 x1 = _BIT(x, 1);
+    UINT_32 x2 = _BIT(x, 2);
+    UINT_32 y0 = _BIT(y, 0);
+    UINT_32 y1 = _BIT(y, 1);
+    UINT_32 y2 = _BIT(y, 2);
+    UINT_32 z0 = _BIT(z, 0);
+    UINT_32 z1 = _BIT(z, 1);
+    UINT_32 z2 = _BIT(z, 2);
+
+    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+    // Compute the pixel number within the micro tile.
+
+    if (microTileType != ADDR_THICK)
+    {
+        if (microTileType == ADDR_DISPLAYABLE)
+        {
+            switch (bpp)
+            {
+                case 8:
+                    pixelBit0 = x0;
+                    pixelBit1 = x1;
+                    pixelBit2 = x2;
+                    pixelBit3 = y1;
+                    pixelBit4 = y0;
+                    pixelBit5 = y2;
+                    break;
+                case 16:
+                    pixelBit0 = x0;
+                    pixelBit1 = x1;
+                    pixelBit2 = x2;
+                    pixelBit3 = y0;
+                    pixelBit4 = y1;
+                    pixelBit5 = y2;
+                    break;
+                case 32:
+                    pixelBit0 = x0;
+                    pixelBit1 = x1;
+                    pixelBit2 = y0;
+                    pixelBit3 = x2;
+                    pixelBit4 = y1;
+                    pixelBit5 = y2;
+                    break;
+                case 64:
+                    pixelBit0 = x0;
+                    pixelBit1 = y0;
+                    pixelBit2 = x1;
+                    pixelBit3 = x2;
+                    pixelBit4 = y1;
+                    pixelBit5 = y2;
+                    break;
+                case 128:
+                    pixelBit0 = y0;
+                    pixelBit1 = x0;
+                    pixelBit2 = x1;
+                    pixelBit3 = x2;
+                    pixelBit4 = y1;
+                    pixelBit5 = y2;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    break;
+            }
+        }
+        else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
+        {
+            pixelBit0 = x0;
+            pixelBit1 = y0;
+            pixelBit2 = x1;
+            pixelBit3 = y1;
+            pixelBit4 = x2;
+            pixelBit5 = y2;
+        }
+        else if (microTileType == ADDR_ROTATED)
+        {
+            ADDR_ASSERT(thickness == 1);
+
+            switch (bpp)
+            {
+                case 8:
+                    pixelBit0 = y0;
+                    pixelBit1 = y1;
+                    pixelBit2 = y2;
+                    pixelBit3 = x1;
+                    pixelBit4 = x0;
+                    pixelBit5 = x2;
+                    break;
+                case 16:
+                    pixelBit0 = y0;
+                    pixelBit1 = y1;
+                    pixelBit2 = y2;
+                    pixelBit3 = x0;
+                    pixelBit4 = x1;
+                    pixelBit5 = x2;
+                    break;
+                case 32:
+                    pixelBit0 = y0;
+                    pixelBit1 = y1;
+                    pixelBit2 = x0;
+                    pixelBit3 = y2;
+                    pixelBit4 = x1;
+                    pixelBit5 = x2;
+                    break;
+                case 64:
+                    pixelBit0 = y0;
+                    pixelBit1 = x0;
+                    pixelBit2 = y1;
+                    pixelBit3 = x1;
+                    pixelBit4 = x2;
+                    pixelBit5 = y2;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    break;
+            }
+        }
+
+        if (thickness > 1)
+        {
+            pixelBit6 = z0;
+            pixelBit7 = z1;
+        }
+    }
+    else // ADDR_THICK
+    {
+        ADDR_ASSERT(thickness > 1);
+
+        switch (bpp)
+        {
+            case 8:
+            case 16:
+                pixelBit0 = x0;
+                pixelBit1 = y0;
+                pixelBit2 = x1;
+                pixelBit3 = y1;
+                pixelBit4 = z0;
+                pixelBit5 = z1;
+                break;
+            case 32:
+                pixelBit0 = x0;
+                pixelBit1 = y0;
+                pixelBit2 = x1;
+                pixelBit3 = z0;
+                pixelBit4 = y1;
+                pixelBit5 = z1;
+                break;
+            case 64:
+            case 128:
+                pixelBit0 = y0;
+                pixelBit1 = x0;
+                pixelBit2 = z0;
+                pixelBit3 = x1;
+                pixelBit4 = y1;
+                pixelBit5 = z1;
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+
+        pixelBit6 = x2;
+        pixelBit7 = y2;
+    }
+
+    if (thickness == 8)
+    {
+        pixelBit8 = z2;
+    }
+
+    pixelNumber = ((pixelBit0     ) |
+                   (pixelBit1 << 1) |
+                   (pixelBit2 << 2) |
+                   (pixelBit3 << 3) |
+                   (pixelBit4 << 4) |
+                   (pixelBit5 << 5) |
+                   (pixelBit6 << 6) |
+                   (pixelBit7 << 7) |
+                   (pixelBit8 << 8));
+
+    return pixelNumber;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::AdjustPitchAlignment
+*
+*   @brief
+*       Adjusts pitch alignment for flipping surface
+*
+*   @return
+*       N/A
+*
+***************************************************************************************************
+*/
+VOID AddrLib::AdjustPitchAlignment(
+    ADDR_SURFACE_FLAGS  flags,      ///< [in] Surface flags
+    UINT_32*            pPitchAlign ///< [out] Pointer to pitch alignment
+    ) const
+{
+    // Display engine hardwires lower 5 bit of GRPH_PITCH to ZERO which means 32 pixel alignment
+    // Maybe it will be fixed in future but let's make it general for now.
+    if (flags.display || flags.overlay)
+    {
+        *pPitchAlign = PowTwoAlign(*pPitchAlign, 32);
+
+        if(flags.display)
+        {
+            *pPitchAlign = Max(m_minPitchAlignPixels, *pPitchAlign);
+        }
+    }
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::PadDimensions
+*
+*   @brief
+*       Helper function to pad dimensions
+*
+*   @return
+*       N/A
+*
+***************************************************************************************************
+*/
+VOID AddrLib::PadDimensions(
+    AddrTileMode        tileMode,    ///< [in] tile mode
+    UINT_32             bpp,         ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,       ///< [in] surface flags
+    UINT_32             numSamples,  ///< [in] number of samples
+    ADDR_TILEINFO*      pTileInfo,   ///< [in/out] bank structure.
+    UINT_32             padDims,     ///< [in] Dimensions to pad valid value 1,2,3
+    UINT_32             mipLevel,    ///< [in] MipLevel
+    UINT_32*            pPitch,      ///< [in/out] pitch in pixels
+    UINT_32             pitchAlign,  ///< [in] pitch alignment
+    UINT_32*            pHeight,     ///< [in/out] height in pixels
+    UINT_32             heightAlign, ///< [in] height alignment
+    UINT_32*            pSlices,     ///< [in/out] number of slices
+    UINT_32             sliceAlign   ///< [in] number of slice alignment
+    ) const
+{
+    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+    ADDR_ASSERT(padDims <= 3);
+
+    //
+    // Override padding for mip levels
+    //
+    if (mipLevel > 0)
+    {
+        if (flags.cube)
+        {
+            // for cubemap, we only pad when client call with 6 faces as an identity
+            if (*pSlices > 1)
+            {
+                padDims = 3; // we should pad cubemap sub levels when we treat it as 3d texture
+            }
+            else
+            {
+                padDims = 2;
+            }
+        }
+    }
+
+    // Any possibilities that padDims is 0?
+    if (padDims == 0)
+    {
+        padDims = 3;
+    }
+
+    if (IsPow2(pitchAlign))
+    {
+        *pPitch = PowTwoAlign((*pPitch), pitchAlign);
+    }
+    else // add this code to pass unit test, r600 linear mode is not align bpp to pow2 for linear
+    {
+        *pPitch += pitchAlign - 1;
+        *pPitch /= pitchAlign;
+        *pPitch *= pitchAlign;
+    }
+
+    if (padDims > 1)
+    {
+        *pHeight = PowTwoAlign((*pHeight), heightAlign);
+    }
+
+    if (padDims > 2 || thickness > 1)
+    {
+        // for cubemap single face, we do not pad slices.
+        // if we pad it, the slice number should be set to 6 and current mip level > 1
+        if (flags.cube && (!m_configFlags.noCubeMipSlicesPad || flags.cubeAsArray))
+        {
+            *pSlices = NextPow2(*pSlices);
+        }
+
+        // normal 3D texture or arrays or cubemap has a thick mode? (Just pass unit test)
+        if (thickness > 1)
+        {
+            *pSlices = PowTwoAlign((*pSlices), sliceAlign);
+        }
+
+    }
+
+    HwlPadDimensions(tileMode,
+                     bpp,
+                     flags,
+                     numSamples,
+                     pTileInfo,
+                     padDims,
+                     mipLevel,
+                     pPitch,
+                     pitchAlign,
+                     pHeight,
+                     heightAlign,
+                     pSlices,
+                     sliceAlign);
+}
+
+
+/**
+***************************************************************************************************
+*   AddrLib::HwlPreHandleBaseLvl3xPitch
+*
+*   @brief
+*       Pre-handler of 3x pitch (96 bit) adjustment
+*
+*   @return
+*       Expected pitch
+***************************************************************************************************
+*/
+UINT_32 AddrLib::HwlPreHandleBaseLvl3xPitch(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
+    UINT_32                                 expPitch    ///< [in] pitch
+    ) const
+{
+    ADDR_ASSERT(pIn->width == expPitch);
+    //
+    // If pitch is pre-multiplied by 3, we retrieve original one here to get correct miplevel size
+    //
+    if (AddrElemLib::IsExpand3x(pIn->format) &&
+        pIn->mipLevel == 0 &&
+        pIn->tileMode == ADDR_TM_LINEAR_ALIGNED)
+    {
+        expPitch /= 3;
+        expPitch = NextPow2(expPitch);
+    }
+
+    return expPitch;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::HwlPostHandleBaseLvl3xPitch
+*
+*   @brief
+*       Post-handler of 3x pitch adjustment
+*
+*   @return
+*       Expected pitch
+***************************************************************************************************
+*/
+UINT_32 AddrLib::HwlPostHandleBaseLvl3xPitch(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
+    UINT_32                                 expPitch    ///< [in] pitch
+    ) const
+{
+    //
+    // 96 bits surface of sub levels require element pitch of 32 bits instead
+    // So we just return pitch in 32 bit pixels without timing 3
+    //
+    if (AddrElemLib::IsExpand3x(pIn->format) &&
+        pIn->mipLevel == 0 &&
+        pIn->tileMode == ADDR_TM_LINEAR_ALIGNED)
+    {
+        expPitch *= 3;
+    }
+
+    return expPitch;
+}
+
+
+/**
+***************************************************************************************************
+*   AddrLib::IsMacroTiled
+*
+*   @brief
+*       Check if the tile mode is macro tiled
+*
+*   @return
+*       TRUE if it is macro tiled (2D/2B/3D/3B)
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::IsMacroTiled(
+    AddrTileMode tileMode)  ///< [in] tile mode
+{
+   return m_modeFlags[tileMode].isMacro;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::IsMacro3dTiled
+*
+*   @brief
+*       Check if the tile mode is 3D macro tiled
+*
+*   @return
+*       TRUE if it is 3D macro tiled
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::IsMacro3dTiled(
+    AddrTileMode tileMode)  ///< [in] tile mode
+{
+    return m_modeFlags[tileMode].isMacro3d;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::IsMicroTiled
+*
+*   @brief
+*       Check if the tile mode is micro tiled
+*
+*   @return
+*       TRUE if micro tiled
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::IsMicroTiled(
+    AddrTileMode tileMode)  ///< [in] tile mode
+{
+    return m_modeFlags[tileMode].isMicro;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::IsLinear
+*
+*   @brief
+*       Check if the tile mode is linear
+*
+*   @return
+*       TRUE if linear
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::IsLinear(
+    AddrTileMode tileMode)  ///< [in] tile mode
+{
+    return m_modeFlags[tileMode].isLinear;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::IsPrtNoRotationTileMode
+*
+*   @brief
+*       Return TRUE if it is prt tile without rotation
+*   @note
+*       This function just used by CI
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::IsPrtNoRotationTileMode(
+    AddrTileMode tileMode)
+{
+    return m_modeFlags[tileMode].isPrtNoRotation;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::IsPrtTileMode
+*
+*   @brief
+*       Return TRUE if it is prt tile
+*   @note
+*       This function just used by CI
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::IsPrtTileMode(
+    AddrTileMode tileMode)
+{
+    return m_modeFlags[tileMode].isPrt;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::Bits2Number
+*
+*   @brief
+*       Cat a array of binary bit to a number
+*
+*   @return
+*       The number combined with the array of bits
+***************************************************************************************************
+*/
+UINT_32 AddrLib::Bits2Number(
+    UINT_32 bitNum,     ///< [in] how many bits
+    ...)                ///< [in] varaible bits value starting from MSB
+{
+    UINT_32 number = 0;
+    UINT_32 i;
+    va_list bits_ptr;
+
+    va_start(bits_ptr, bitNum);
+
+    for(i = 0; i < bitNum; i++)
+    {
+        number |= va_arg(bits_ptr, UINT_32);
+        number <<= 1;
+    }
+
+    number>>=1;
+
+    va_end(bits_ptr);
+
+    return number;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeMipLevel
+*
+*   @brief
+*       Compute mipmap level width/height/slices
+*   @return
+*      N/A
+***************************************************************************************************
+*/
+VOID AddrLib::ComputeMipLevel(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in/out] Input structure
+    ) const
+{
+    if (AddrElemLib::IsBlockCompressed(pIn->format))
+    {
+        if (pIn->mipLevel == 0)
+        {
+            // DXTn's level 0 must be multiple of 4
+            // But there are exceptions:
+            // 1. Internal surface creation in hostblt/vsblt/etc...
+            // 2. Runtime doesn't reject ATI1/ATI2 whose width/height are not multiple of 4
+            pIn->width = PowTwoAlign(pIn->width, 4);
+            pIn->height = PowTwoAlign(pIn->height, 4);
+        }
+    }
+
+    HwlComputeMipLevel(pIn);
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::DegradeBaseLevel
+*
+*   @brief
+*       Check if base level's tile mode can be degraded
+*   @return
+*       TRUE if degraded, also returns degraded tile mode (unchanged if not degraded)
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::DegradeBaseLevel(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] Input structure for surface info
+    AddrTileMode*                           pTileMode   ///< [out] Degraded tile mode
+    ) const
+{
+    BOOL_32 degraded = FALSE;
+    AddrTileMode tileMode = pIn->tileMode;
+    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+    if (m_configFlags.degradeBaseLevel) // This is a global setting
+    {
+        if (pIn->flags.degrade4Space        && // Degradation per surface
+            pIn->mipLevel == 0              &&
+            pIn->numSamples == 1            &&
+            IsMacroTiled(tileMode))
+        {
+            if (HwlDegradeBaseLevel(pIn))
+            {
+                *pTileMode = thickness == 1 ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
+                degraded = TRUE;
+            }
+            else if (thickness > 1)
+            {
+                // As in the following HwlComputeSurfaceInfo, thick modes may be degraded to
+                // thinner modes, we should re-evaluate whether the corresponding thinner modes
+                // need to be degraded. If so, we choose 1D thick mode instead.
+                tileMode = DegradeLargeThickTile(pIn->tileMode, pIn->bpp);
+                if (tileMode != pIn->tileMode)
+                {
+                    ADDR_COMPUTE_SURFACE_INFO_INPUT input = *pIn;
+                    input.tileMode = tileMode;
+                    if (HwlDegradeBaseLevel(&input))
+                    {
+                        *pTileMode = ADDR_TM_1D_TILED_THICK;
+                        degraded = TRUE;
+                    }
+                }
+            }
+        }
+    }
+
+    return degraded;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::DegradeLargeThickTile
+*
+*   @brief
+*       Check if the thickness needs to be reduced if a tile is too large
+*   @return
+*       The degraded tile mode (unchanged if not degraded)
+***************************************************************************************************
+*/
+AddrTileMode AddrLib::DegradeLargeThickTile(
+    AddrTileMode tileMode,
+    UINT_32 bpp) const
+{
+    // Override tilemode
+    // When tile_width (8) * tile_height (8) * thickness * element_bytes is > row_size,
+    // it is better to just use THIN mode in this case
+    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+    if (thickness > 1 && m_configFlags.allowLargeThickTile == 0)
+    {
+        UINT_32 tileSize = MicroTilePixels * thickness * (bpp >> 3);
+
+        if (tileSize > m_rowSize)
+        {
+            switch (tileMode)
+            {
+                case ADDR_TM_2D_TILED_XTHICK:
+                    if ((tileSize >> 1) <= m_rowSize)
+                    {
+                        tileMode = ADDR_TM_2D_TILED_THICK;
+                        break;
+                    }
+                    // else fall through
+                case ADDR_TM_2D_TILED_THICK:
+                    tileMode    = ADDR_TM_2D_TILED_THIN1;
+                    break;
+
+                case ADDR_TM_3D_TILED_XTHICK:
+                    if ((tileSize >> 1) <= m_rowSize)
+                    {
+                        tileMode = ADDR_TM_3D_TILED_THICK;
+                        break;
+                    }
+                    // else fall through
+                case ADDR_TM_3D_TILED_THICK:
+                    tileMode    = ADDR_TM_3D_TILED_THIN1;
+                    break;
+
+                case ADDR_TM_PRT_TILED_THICK:
+                    tileMode    = ADDR_TM_PRT_TILED_THIN1;
+                    break;
+
+                case ADDR_TM_PRT_2D_TILED_THICK:
+                    tileMode    = ADDR_TM_PRT_2D_TILED_THIN1;
+                    break;
+
+                case ADDR_TM_PRT_3D_TILED_THICK:
+                    tileMode    = ADDR_TM_PRT_3D_TILED_THIN1;
+                    break;
+
+                default:
+                    break;
+            }
+        }
+    }
+
+    return tileMode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::PostComputeMipLevel
+*   @brief
+*       Compute MipLevel info (including level 0) after surface adjustment
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::PostComputeMipLevel(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT*    pIn,   ///< [in/out] Input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*   pOut   ///< [out] Output structure
+    ) const
+{
+    // Mipmap including level 0 must be pow2 padded since either SI hw expects so or it is
+    // required by CFX  for Hw Compatibility between NI and SI. Otherwise it is only needed for
+    // mipLevel > 0. Any h/w has different requirement should implement its own virtual function
+
+    if (pIn->flags.pow2Pad)
+    {
+        pIn->width      = NextPow2(pIn->width);
+        pIn->height     = NextPow2(pIn->height);
+        pIn->numSlices  = NextPow2(pIn->numSlices);
+    }
+    else if (pIn->mipLevel > 0)
+    {
+        pIn->width      = NextPow2(pIn->width);
+        pIn->height     = NextPow2(pIn->height);
+
+        if (!pIn->flags.cube)
+        {
+            pIn->numSlices = NextPow2(pIn->numSlices);
+        }
+
+        // for cubemap, we keep its value at first
+    }
+
+    return ADDR_OK;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::HwlSetupTileCfg
+*
+*   @brief
+*       Map tile index to tile setting.
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::HwlSetupTileCfg(
+    INT_32          index,            ///< [in] Tile index
+    INT_32          macroModeIndex,   ///< [in] Index in macro tile mode table(CI)
+    ADDR_TILEINFO*  pInfo,            ///< [out] Tile Info
+    AddrTileMode*   pMode,            ///< [out] Tile mode
+    AddrTileType*   pType             ///< [out] Tile type
+    ) const
+{
+    return ADDR_NOTSUPPORTED;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::HwlGetPipes
+*
+*   @brief
+*       Get number pipes
+*   @return
+*       num pipes
+***************************************************************************************************
+*/
+UINT_32 AddrLib::HwlGetPipes(
+    const ADDR_TILEINFO* pTileInfo    ///< [in] Tile info
+    ) const
+{
+    //pTileInfo can be NULL when asic is 6xx and 8xx.
+    return m_pipes;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputeQbStereoInfo
+*
+*   @brief
+*       Get quad buffer stereo information
+*   @return
+*       TRUE if no error
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::ComputeQbStereoInfo(
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [in/out] updated pOut+pStereoInfo
+    ) const
+{
+    BOOL_32 success = FALSE;
+
+    if (pOut->pStereoInfo)
+    {
+        ADDR_ASSERT(pOut->bpp >= 8);
+        ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0);
+
+        // Save original height
+        pOut->pStereoInfo->eyeHeight = pOut->height;
+
+        // Right offset
+        pOut->pStereoInfo->rightOffset = static_cast<UINT_32>(pOut->surfSize);
+
+        pOut->pStereoInfo->rightSwizzle = HwlComputeQbStereoRightSwizzle(pOut);
+        // Double height
+        pOut->height <<= 1;
+        pOut->pixelHeight <<= 1;
+
+        // Double size
+        pOut->surfSize <<= 1;
+
+        // Right start address meets the base align since it is guaranteed by AddrLib
+
+        // 1D surface on SI may break this rule, but we can force it to meet by checking .qbStereo.
+        success = TRUE;
+    }
+
+    return success;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//                               Element lib
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+/**
+***************************************************************************************************
+*   AddrLib::Flt32ToColorPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a depth/stencil pixel value
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::Flt32ToDepthPixel(
+    const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
+    ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ELEM_FLT32TODEPTHPIXEL_INPUT)) ||
+            (pOut->size != sizeof(ELEM_FLT32TODEPTHPIXEL_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        GetElemLib()->Flt32ToDepthPixel(pIn->format,
+                                        pIn->comps,
+                                        pOut->pPixel);
+        UINT_32 depthBase = 0;
+        UINT_32 stencilBase = 0;
+        UINT_32 depthBits = 0;
+        UINT_32 stencilBits = 0;
+
+        switch (pIn->format)
+        {
+            case ADDR_DEPTH_16:
+                depthBits = 16;
+                break;
+            case ADDR_DEPTH_X8_24:
+            case ADDR_DEPTH_8_24:
+            case ADDR_DEPTH_X8_24_FLOAT:
+            case ADDR_DEPTH_8_24_FLOAT:
+                depthBase = 8;
+                depthBits = 24;
+                stencilBits = 8;
+                break;
+            case ADDR_DEPTH_32_FLOAT:
+                depthBits = 32;
+                break;
+            case ADDR_DEPTH_X24_8_32_FLOAT:
+                depthBase = 8;
+                depthBits = 32;
+                stencilBits = 8;
+                break;
+            default:
+                break;
+        }
+
+        // Overwrite base since R800 has no "tileBase"
+        if (GetElemLib()->IsDepthStencilTilePlanar() == FALSE)
+        {
+            depthBase = 0;
+            stencilBase = 0;
+        }
+
+        depthBase *= 64;
+        stencilBase *= 64;
+
+        pOut->stencilBase = stencilBase;
+        pOut->depthBase = depthBase;
+        pOut->depthBits = depthBits;
+        pOut->stencilBits = stencilBits;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::Flt32ToColorPixel
+*
+*   @brief
+*       Convert a FLT_32 value to a red/green/blue/alpha pixel value
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::Flt32ToColorPixel(
+    const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
+    ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if ((pIn->size != sizeof(ELEM_FLT32TOCOLORPIXEL_INPUT)) ||
+            (pOut->size != sizeof(ELEM_FLT32TOCOLORPIXEL_OUTPUT)))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        GetElemLib()->Flt32ToColorPixel(pIn->format,
+                                        pIn->surfNum,
+                                        pIn->surfSwap,
+                                        pIn->comps,
+                                        pOut->pPixel);
+    }
+
+    return returnCode;
+}
+
+
+/**
+***************************************************************************************************
+*   AddrLib::GetExportNorm
+*
+*   @brief
+*       Check one format can be EXPORT_NUM
+*   @return
+*       TRUE if EXPORT_NORM can be used
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::GetExportNorm(
+    const ELEM_GETEXPORTNORM_INPUT* pIn) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    BOOL_32 enabled = FALSE;
+
+    if (GetFillSizeFieldsFlags() == TRUE)
+    {
+        if (pIn->size != sizeof(ELEM_GETEXPORTNORM_INPUT))
+        {
+            returnCode = ADDR_PARAMSIZEMISMATCH;
+        }
+    }
+
+    if (returnCode == ADDR_OK)
+    {
+        enabled = GetElemLib()->PixGetExportNorm(pIn->format,
+                                                 pIn->num,
+                                                 pIn->swap);
+    }
+
+    return enabled;
+}
+
+/**
+***************************************************************************************************
+*   AddrLib::ComputePrtInfo
+*
+*   @brief
+*       Compute prt surface related info
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputePrtInfo(
+    const ADDR_PRT_INFO_INPUT*  pIn,
+    ADDR_PRT_INFO_OUTPUT*       pOut) const
+{
+    ADDR_ASSERT(pOut != NULL);
+
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    UINT_32     expandX = 1;
+    UINT_32     expandY = 1;
+    AddrElemMode elemMode;
+
+    UINT_32     bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
+                                                &elemMode,
+                                                &expandX,
+                                                &expandY);
+
+    if (bpp <8 || bpp == 24 || bpp == 48 || bpp == 96 )
+    {
+        returnCode = ADDR_INVALIDPARAMS;
+    }
+
+    UINT_32     numFrags = pIn->numFrags;
+    ADDR_ASSERT(numFrags <= 8);
+
+    UINT_32     tileWidth = 0;
+    UINT_32     tileHeight = 0;
+    if (returnCode == ADDR_OK)
+    {
+        // 3D texture without depth or 2d texture
+        if (pIn->baseMipDepth > 1 || pIn->baseMipHeight > 1)
+        {
+            if (bpp == 8)
+            {
+                tileWidth = 256;
+                tileHeight = 256;
+            }
+            else if (bpp == 16)
+            {
+                tileWidth = 256;
+                tileHeight = 128;
+            }
+            else if (bpp == 32)
+            {
+                tileWidth = 128;
+                tileHeight = 128;
+            }
+            else if (bpp == 64)
+            {
+                // assume it is BC1/4
+                tileWidth = 512;
+                tileHeight = 256;
+
+                if (elemMode == ADDR_UNCOMPRESSED)
+                {
+                    tileWidth = 128;
+                    tileHeight = 64;
+                }
+            }
+            else if (bpp == 128)
+            {
+                // assume it is BC2/3/5/6H/7
+                tileWidth = 256;
+                tileHeight = 256;
+
+                if (elemMode == ADDR_UNCOMPRESSED)
+                {
+                    tileWidth = 64;
+                    tileHeight = 64;
+                }
+            }
+
+            if (numFrags == 2)
+            {
+                tileWidth = tileWidth / 2;
+            }
+            else if (numFrags == 4)
+            {
+                tileWidth = tileWidth / 2;
+                tileHeight = tileHeight / 2;
+            }
+            else if (numFrags == 8)
+            {
+                tileWidth = tileWidth / 4;
+                tileHeight = tileHeight / 2;
+            }
+        }
+        else    // 1d
+        {
+            tileHeight = 1;
+            if (bpp == 8)
+            {
+                tileWidth = 65536;
+            }
+            else if (bpp == 16)
+            {
+                tileWidth = 32768;
+            }
+            else if (bpp == 32)
+            {
+                tileWidth = 16384;
+            }
+            else if (bpp == 64)
+            {
+                tileWidth = 8192;
+            }
+            else if (bpp == 128)
+            {
+                tileWidth = 4096;
+            }
+        }
+    }
+
+    pOut->prtTileWidth = tileWidth;
+    pOut->prtTileHeight = tileHeight;
+
+    return returnCode;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,695 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  addrlib.h
+* @brief Contains the AddrLib base class definition.
+***************************************************************************************************
+*/
+
+#ifndef __ADDR_LIB_H__
+#define __ADDR_LIB_H__
+
+
+#include "addrinterface.h"
+#include "addrobject.h"
+#include "addrelemlib.h"
+
+#if BRAHMA_BUILD
+#include "amdgpu_id.h"
+#else
+#include "atiid.h"
+#endif
+
+#ifndef CIASICIDGFXENGINE_R600
+#define CIASICIDGFXENGINE_R600 0x00000006
+#endif
+
+#ifndef CIASICIDGFXENGINE_R800
+#define CIASICIDGFXENGINE_R800 0x00000008
+#endif
+
+#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
+#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
+#endif
+
+#ifndef CIASICIDGFXENGINE_SEAISLAND
+#define CIASICIDGFXENGINE_SEAISLAND 0x0000000B
+#endif
+/**
+***************************************************************************************************
+* @brief Neutral enums that define pipeinterleave
+***************************************************************************************************
+*/
+enum AddrPipeInterleave
+{
+    ADDR_PIPEINTERLEAVE_256B = 256,
+    ADDR_PIPEINTERLEAVE_512B = 512,
+};
+
+/**
+***************************************************************************************************
+* @brief Neutral enums that define DRAM row size
+***************************************************************************************************
+*/
+enum AddrRowSize
+{
+    ADDR_ROWSIZE_1KB = 1024,
+    ADDR_ROWSIZE_2KB = 2048,
+    ADDR_ROWSIZE_4KB = 4096,
+    ADDR_ROWSIZE_8KB = 8192,
+};
+
+/**
+***************************************************************************************************
+* @brief Neutral enums that define bank interleave
+***************************************************************************************************
+*/
+enum AddrBankInterleave
+{
+    ADDR_BANKINTERLEAVE_1 = 1,
+    ADDR_BANKINTERLEAVE_2 = 2,
+    ADDR_BANKINTERLEAVE_4 = 4,
+    ADDR_BANKINTERLEAVE_8 = 8,
+};
+
+/**
+***************************************************************************************************
+* @brief Neutral enums that define MGPU chip tile size
+***************************************************************************************************
+*/
+enum AddrChipTileSize
+{
+    ADDR_CHIPTILESIZE_16 = 16,
+    ADDR_CHIPTILESIZE_32 = 32,
+    ADDR_CHIPTILESIZE_64 = 64,
+    ADDR_CHIPTILESIZE_128 = 128,
+};
+
+/**
+***************************************************************************************************
+* @brief Neutral enums that define shader engine tile size
+***************************************************************************************************
+*/
+enum AddrEngTileSize
+{
+    ADDR_SE_TILESIZE_16 = 16,
+    ADDR_SE_TILESIZE_32 = 32,
+};
+
+/**
+***************************************************************************************************
+* @brief Neutral enums that define bank swap size
+***************************************************************************************************
+*/
+enum AddrBankSwapSize
+{
+    ADDR_BANKSWAP_128B = 128,
+    ADDR_BANKSWAP_256B = 256,
+    ADDR_BANKSWAP_512B = 512,
+    ADDR_BANKSWAP_1KB = 1024,
+};
+
+/**
+***************************************************************************************************
+* @brief Neutral enums that define bank swap size
+***************************************************************************************************
+*/
+enum AddrSampleSplitSize
+{
+    ADDR_SAMPLESPLIT_1KB = 1024,
+    ADDR_SAMPLESPLIT_2KB = 2048,
+    ADDR_SAMPLESPLIT_4KB = 4096,
+    ADDR_SAMPLESPLIT_8KB = 8192,
+};
+
+/**
+***************************************************************************************************
+* @brief Flags for AddrTileMode
+***************************************************************************************************
+*/
+struct AddrTileModeFlags
+{
+    UINT_32 thickness       : 4;
+    UINT_32 isLinear        : 1;
+    UINT_32 isMicro         : 1;
+    UINT_32 isMacro         : 1;
+    UINT_32 isMacro3d       : 1;
+    UINT_32 isPrt           : 1;
+    UINT_32 isPrtNoRotation : 1;
+    UINT_32 isBankSwapped   : 1;
+};
+
+/**
+***************************************************************************************************
+* @brief This class contains asic independent address lib functionalities
+***************************************************************************************************
+*/
+class AddrLib : public AddrObject
+{
+public:
+    virtual ~AddrLib();
+
+    static ADDR_E_RETURNCODE Create(
+        const ADDR_CREATE_INPUT* pCreateInfo, ADDR_CREATE_OUTPUT* pCreateOut);
+
+    /// Pair of Create
+    VOID Destroy()
+    {
+        delete this;
+    }
+
+    static AddrLib* GetAddrLib(
+        ADDR_HANDLE hLib);
+
+    /// Returns AddrLib version (from compiled binary instead include file)
+    UINT_32 GetVersion()
+    {
+        return m_version;
+    }
+
+    /// Returns asic chip family name defined by AddrLib
+    AddrChipFamily GetAddrChipFamily()
+    {
+        return m_chipFamily;
+    }
+
+    /// Returns tileIndex support
+    BOOL_32 UseTileIndex(INT_32 index) const
+    {
+        return m_configFlags.useTileIndex && (index != TileIndexInvalid);
+    }
+
+    /// Returns combined swizzle support
+    BOOL_32 UseCombinedSwizzle() const
+    {
+        return m_configFlags.useCombinedSwizzle;
+    }
+
+    //
+    // Interface stubs
+    //
+    ADDR_E_RETURNCODE ComputeSurfaceInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord(
+        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
+        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT*  pIn,
+        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeSliceTileSwizzle(
+        const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,
+        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ExtractBankPipeSwizzle(
+        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
+        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE CombineBankPipeSwizzle(
+        const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT*  pIn,
+        ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeBaseSwizzle(
+        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT*  pIn,
+        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeFmaskInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT*  pIn,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+
+    ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord(
+        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*  pIn,
+        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr(
+        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*  pIn,
+        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ConvertTileInfoToHW(
+        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
+        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ConvertTileIndex(
+        const ADDR_CONVERT_TILEINDEX_INPUT* pIn,
+        ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ConvertTileIndex1(
+        const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,
+        ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE GetTileIndex(
+        const ADDR_GET_TILEINDEX_INPUT* pIn,
+        ADDR_GET_TILEINDEX_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeHtileInfo(
+        const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeCmaskInfo(
+        const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn,
+        ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeDccInfo(
+        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
+        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeHtileAddrFromCoord(
+        const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT*  pIn,
+        ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord(
+        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*  pIn,
+        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeHtileCoordFromAddr(
+        const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT*  pIn,
+        ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr(
+        const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT*  pIn,
+        ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE ComputePrtInfo(
+        const ADDR_PRT_INFO_INPUT*  pIn,
+        ADDR_PRT_INFO_OUTPUT*       pOut) const;
+
+    ADDR_E_RETURNCODE Flt32ToDepthPixel(
+        const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
+        ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE Flt32ToColorPixel(
+        const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
+        ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const;
+
+    BOOL_32 GetExportNorm(
+        const ELEM_GETEXPORTNORM_INPUT* pIn) const;
+
+protected:
+    AddrLib();  // Constructor is protected
+    AddrLib(const AddrClient* pClient);
+
+    /// Pure Virtual function for Hwl computing surface info
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl computing surface address from coord
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord(
+        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl computing surface coord from address
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr(
+        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl computing surface tile swizzle
+    virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle(
+        const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
+        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl extracting bank/pipe swizzle from base256b
+    virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle(
+        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
+        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl combining bank/pipe swizzle
+    virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle(
+        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO*  pTileInfo,
+        UINT_64 baseAddr, UINT_32* pTileSwizzle) const = 0;
+
+    /// Pure Virtual function for Hwl computing base swizzle
+    virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle(
+        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
+        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl computing HTILE base align
+    virtual UINT_32 HwlComputeHtileBaseAlign(
+        BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const = 0;
+
+    /// Pure Virtual function for Hwl computing HTILE bpp
+    virtual UINT_32 HwlComputeHtileBpp(
+        BOOL_32 isWidth8, BOOL_32 isHeight8) const = 0;
+
+    /// Pure Virtual function for Hwl computing HTILE bytes
+    virtual UINT_64 HwlComputeHtileBytes(
+        UINT_32 pitch, UINT_32 height, UINT_32 bpp,
+        BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const = 0;
+
+    /// Pure Virtual function for Hwl computing FMASK info
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) = 0;
+
+    /// Pure Virtual function for Hwl FMASK address from coord
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord(
+        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl FMASK coord from address
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr(
+        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl convert tile info from real value to HW value
+    virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
+        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
+        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const = 0;
+
+    /// Pure Virtual function for Hwl compute mipmap info
+    virtual BOOL_32 HwlComputeMipLevel(
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0;
+
+    /// Pure Virtual function for Hwl compute max cmask blockMax value
+    virtual BOOL_32 HwlGetMaxCmaskBlockMax() const = 0;
+
+    /// Pure Virtual function for Hwl compute fmask bits
+    virtual UINT_32 HwlComputeFmaskBits(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+        UINT_32* pNumSamples) const = 0;
+
+    /// Virtual function to get index (not pure then no need to implement this in all hwls
+    virtual ADDR_E_RETURNCODE HwlGetTileIndex(
+        const ADDR_GET_TILEINDEX_INPUT* pIn,
+        ADDR_GET_TILEINDEX_OUTPUT*      pOut) const
+    {
+        return ADDR_NOTSUPPORTED;
+    }
+
+    /// Virtual function for Hwl to compute Dcc info
+    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
+        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
+        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const
+    {
+        return ADDR_NOTSUPPORTED;
+    }
+
+    /// Virtual function to get cmask address for tc compatible cmask
+    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
+        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const
+    {
+        return ADDR_NOTSUPPORTED;
+    }
+    // Compute attributes
+
+    // HTILE
+    UINT_32    ComputeHtileInfo(
+        ADDR_HTILE_FLAGS flags,
+        UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices,
+        BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
+        ADDR_TILEINFO*  pTileInfo,
+        UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pHtileBytes,
+        UINT_32* pMacroWidth = NULL, UINT_32* pMacroHeight = NULL,
+        UINT_64* pSliceSize = NULL, UINT_32* pBaseAlign = NULL) const;
+
+    // CMASK
+    ADDR_E_RETURNCODE ComputeCmaskInfo(
+        ADDR_CMASK_FLAGS flags,
+        UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices, BOOL_32 isLinear,
+        ADDR_TILEINFO* pTileInfo, UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pCmaskBytes,
+        UINT_32* pMacroWidth, UINT_32* pMacroHeight, UINT_64* pSliceSize = NULL,
+        UINT_32* pBaseAlign = NULL, UINT_32* pBlockMax = NULL) const;
+
+    virtual VOID HwlComputeTileDataWidthAndHeightLinear(
+        UINT_32* pMacroWidth, UINT_32* pMacroHeight,
+        UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
+
+    // CMASK & HTILE addressing
+    virtual UINT_64 HwlComputeXmaskAddrFromCoord(
+        UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice,
+        UINT_32 numSlices, UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8,
+        BOOL_32 isHeight8, ADDR_TILEINFO* pTileInfo,
+        UINT_32* bitPosition) const;
+
+    virtual VOID HwlComputeXmaskCoordFromAddr(
+        UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
+        UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
+        ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const;
+
+    // Surface mipmap
+    VOID    ComputeMipLevel(
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
+
+    /// Pure Virtual function for Hwl checking degrade for base level
+    virtual BOOL_32 HwlDegradeBaseLevel(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0;
+
+    virtual BOOL_32 HwlOverrideTileMode(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        AddrTileMode* pTileMode,
+        AddrTileType* pTileType) const
+    {
+        // not supported in hwl layer, FALSE for not-overrided
+        return FALSE;
+    }
+
+    AddrTileMode DegradeLargeThickTile(AddrTileMode tileMode, UINT_32 bpp) const;
+
+    VOID PadDimensions(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel,
+        UINT_32* pPitch, UINT_32 pitchAlign, UINT_32* pHeight, UINT_32 heightAlign,
+        UINT_32* pSlices, UINT_32 sliceAlign) const;
+
+    virtual VOID HwlPadDimensions(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel,
+        UINT_32* pPitch, UINT_32 pitchAlign, UINT_32* pHeight, UINT_32 heightAlign,
+        UINT_32* pSlices, UINT_32 sliceAlign) const
+    {
+    }
+
+    //
+    // Addressing shared for linear/1D tiling
+    //
+    UINT_64 ComputeSurfaceAddrFromCoordLinear(
+        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
+        UINT_32* pBitPosition) const;
+
+    VOID    ComputeSurfaceCoordFromAddrLinear(
+        UINT_64 addr, UINT_32 bitPosition, UINT_32 bpp,
+        UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const;
+
+    VOID    ComputeSurfaceCoordFromAddrMicroTiled(
+        UINT_64 addr, UINT_32 bitPosition,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
+        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const;
+
+    UINT_32 ComputePixelIndexWithinMicroTile(
+        UINT_32 x, UINT_32 y, UINT_32 z,
+        UINT_32 bpp, AddrTileMode tileMode, AddrTileType microTileType) const;
+
+    /// Pure Virtual function for Hwl computing coord from offset inside micro tile
+    virtual VOID HwlComputePixelCoordFromOffset(
+        UINT_32 offset, UINT_32 bpp, UINT_32 numSamples,
+        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
+        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const = 0;
+
+    //
+    // Addressing shared by all
+    //
+    virtual UINT_32 HwlGetPipes(
+        const ADDR_TILEINFO* pTileInfo) const;
+
+    UINT_32 ComputePipeFromAddr(
+        UINT_64 addr, UINT_32 numPipes) const;
+
+    /// Pure Virtual function for Hwl computing pipe from coord
+    virtual UINT_32 ComputePipeFromCoord(
+        UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode,
+        UINT_32 pipeSwizzle, BOOL_32 flags, ADDR_TILEINFO* pTileInfo) const = 0;
+
+    /// Pure Virtual function for Hwl computing coord Y for 8 pipe cmask/htile
+    virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe(
+        UINT_32 pipe, UINT_32 x) const = 0;
+
+    //
+    // Initialization
+    //
+    /// Pure Virtual function for Hwl computing internal global parameters from h/w registers
+    virtual BOOL_32 HwlInitGlobalParams(
+        const ADDR_CREATE_INPUT* pCreateIn) = 0;
+
+    /// Pure Virtual function for Hwl converting chip family
+    virtual AddrChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0;
+
+    //
+    // Misc helper
+    //
+    static const AddrTileModeFlags m_modeFlags[ADDR_TM_COUNT];
+
+    static UINT_32 ComputeSurfaceThickness(
+        AddrTileMode tileMode);
+
+    // Checking tile mode
+    static BOOL_32 IsMacroTiled(AddrTileMode tileMode);
+    static BOOL_32 IsMacro3dTiled(AddrTileMode tileMode);
+    static BOOL_32 IsLinear(AddrTileMode tileMode);
+    static BOOL_32 IsMicroTiled(AddrTileMode tileMode);
+    static BOOL_32 IsPrtTileMode(AddrTileMode tileMode);
+    static BOOL_32 IsPrtNoRotationTileMode(AddrTileMode tileMode);
+
+    static UINT_32 Bits2Number(UINT_32 bitNum,...);
+
+    static UINT_32 GetNumFragments(UINT_32 numSamples, UINT_32 numFrags)
+    {
+        return numFrags != 0 ? numFrags : Max(1u, numSamples);
+    }
+
+    /// Returns pointer of AddrElemLib
+    AddrElemLib* GetElemLib() const
+    {
+        return m_pElemLib;
+    }
+
+    /// Return TRUE if tile info is needed
+    BOOL_32 UseTileInfo() const
+    {
+        return !m_configFlags.ignoreTileInfo;
+    }
+
+    /// Returns fillSizeFields flag
+    UINT_32 GetFillSizeFieldsFlags() const
+    {
+        return m_configFlags.fillSizeFields;
+    }
+
+    /// Adjusts pitch alignment for flipping surface
+    VOID    AdjustPitchAlignment(
+        ADDR_SURFACE_FLAGS flags, UINT_32* pPitchAlign) const;
+
+    /// Overwrite tile config according to tile index
+    virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
+        INT_32 index, INT_32 macroModeIndex,
+        ADDR_TILEINFO* pInfo, AddrTileMode* mode = NULL, AddrTileType* type = NULL) const;
+
+    /// Overwrite macro tile config according to tile index
+    virtual INT_32 HwlComputeMacroModeIndex(
+        INT_32 index, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples,
+        ADDR_TILEINFO* pTileInfo, AddrTileMode *pTileMode = NULL, AddrTileType *pTileType = NULL
+        ) const
+    {
+        return TileIndexNoMacroIndex;
+    }
+
+    /// Pre-handler of 3x pitch (96 bit) adjustment
+    virtual UINT_32 HwlPreHandleBaseLvl3xPitch(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
+    /// Post-handler of 3x pitch adjustment
+    virtual UINT_32 HwlPostHandleBaseLvl3xPitch(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
+    /// Check miplevel after surface adjustment
+    ADDR_E_RETURNCODE PostComputeMipLevel(
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    /// Quad buffer stereo support, has its implementation in ind. layer
+    virtual BOOL_32 ComputeQbStereoInfo(
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    /// Pure virutual function to compute stereo bank swizzle for right eye
+    virtual UINT_32 HwlComputeQbStereoRightSwizzle(
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
+
+private:
+    // Disallow the copy constructor
+    AddrLib(const AddrLib& a);
+
+    // Disallow the assignment operator
+    AddrLib& operator=(const AddrLib& a);
+
+    VOID SetAddrChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
+
+    UINT_32 ComputeCmaskBaseAlign(
+        ADDR_CMASK_FLAGS flags, ADDR_TILEINFO*  pTileInfo) const;
+
+    UINT_64 ComputeCmaskBytes(
+        UINT_32 pitch, UINT_32 height, UINT_32 numSlices) const;
+
+    //
+    // CMASK/HTILE shared methods
+    //
+    VOID    ComputeTileDataWidthAndHeight(
+        UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo,
+        UINT_32* pMacroWidth, UINT_32* pMacroHeight) const;
+
+    UINT_32 ComputeXmaskCoordYFromPipe(
+        UINT_32 pipe, UINT_32 x) const;
+
+    VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels);
+
+    BOOL_32 DegradeBaseLevel(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, AddrTileMode* pTileMode) const;
+
+protected:
+    AddrLibClass        m_class;        ///< Store class type (HWL type)
+
+    AddrChipFamily      m_chipFamily;   ///< Chip family translated from the one in atiid.h
+
+    UINT_32             m_chipRevision; ///< Revision id from xxx_id.h
+
+    UINT_32             m_version;      ///< Current version
+
+    //
+    // Global parameters
+    //
+    ADDR_CONFIG_FLAGS   m_configFlags;  ///< Global configuration flags. Note this is setup by
+                                        ///  AddrLib instead of Client except forceLinearAligned
+
+    UINT_32             m_pipes;        ///< Number of pipes
+    UINT_32             m_banks;        ///< Number of banks
+                                        ///  For r800 this is MC_ARB_RAMCFG.NOOFBANK
+                                        ///  Keep it here to do default parameter calculation
+
+    UINT_32             m_pipeInterleaveBytes;
+                                        ///< Specifies the size of contiguous address space
+                                        ///  within each tiling pipe when making linear
+                                        ///  accesses. (Formerly Group Size)
+
+    UINT_32             m_rowSize;      ///< DRAM row size, in bytes
+
+    UINT_32             m_minPitchAlignPixels; ///< Minimum pitch alignment in pixels
+    UINT_32             m_maxSamples;   ///< Max numSamples
+private:
+    AddrElemLib*        m_pElemLib;     ///< Element Lib pointer
+};
+
+AddrLib* AddrSIHwlInit  (const AddrClient* pClient);
+AddrLib* AddrCIHwlInit  (const AddrClient* pClient);
+
+#endif
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,246 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  addrobject.cpp
+* @brief Contains the AddrObject base class implementation.
+***************************************************************************************************
+*/
+
+#include "addrinterface.h"
+#include "addrobject.h"
+
+/**
+***************************************************************************************************
+*   AddrObject::AddrObject
+*
+*   @brief
+*       Constructor for the AddrObject class.
+***************************************************************************************************
+*/
+AddrObject::AddrObject()
+{
+    m_client.handle = NULL;
+    m_client.callbacks.allocSysMem = NULL;
+    m_client.callbacks.freeSysMem = NULL;
+    m_client.callbacks.debugPrint = NULL;
+}
+
+/**
+***************************************************************************************************
+*   AddrObject::AddrObject
+*
+*   @brief
+*       Constructor for the AddrObject class.
+***************************************************************************************************
+*/
+AddrObject::AddrObject(const AddrClient* pClient)
+{
+    m_client = *pClient;
+}
+
+/**
+***************************************************************************************************
+*   AddrObject::~AddrObject
+*
+*   @brief
+*       Destructor for the AddrObject class.
+***************************************************************************************************
+*/
+AddrObject::~AddrObject()
+{
+}
+
+/**
+***************************************************************************************************
+*   AddrObject::ClientAlloc
+*
+*   @brief
+*       Calls instanced allocSysMem inside AddrClient
+***************************************************************************************************
+*/
+VOID* AddrObject::ClientAlloc(
+    size_t             objSize,    ///< [in] Size to allocate
+    const AddrClient*  pClient)    ///< [in] Client pointer
+{
+    VOID* pObjMem = NULL;
+
+    if (pClient->callbacks.allocSysMem != NULL)
+    {
+        ADDR_ALLOCSYSMEM_INPUT allocInput = {0};
+
+        allocInput.size        = sizeof(ADDR_ALLOCSYSMEM_INPUT);
+        allocInput.flags.value = 0;
+        allocInput.sizeInBytes = static_cast<UINT_32>(objSize);
+        allocInput.hClient     = pClient->handle;
+
+        pObjMem = pClient->callbacks.allocSysMem(&allocInput);
+    }
+
+    return pObjMem;
+}
+
+/**
+***************************************************************************************************
+*   AddrObject::AddrMalloc
+*
+*   @brief
+*       A wrapper of ClientAlloc
+***************************************************************************************************
+*/
+VOID* AddrObject::AddrMalloc(
+    size_t objSize) const   ///< [in] Size to allocate
+{
+    return ClientAlloc(objSize, &m_client);;
+}
+
+/**
+***************************************************************************************************
+*   AddrObject::ClientFree
+*
+*   @brief
+*       Calls freeSysMem inside AddrClient
+***************************************************************************************************
+*/
+VOID AddrObject::ClientFree(
+    VOID*              pObjMem,    ///< [in] User virtual address to free.
+    const AddrClient*  pClient)    ///< [in] Client pointer
+{
+    if (pClient->callbacks.freeSysMem != NULL)
+    {
+        if (pObjMem != NULL)
+        {
+            ADDR_FREESYSMEM_INPUT freeInput = {0};
+
+            freeInput.size      = sizeof(ADDR_FREESYSMEM_INPUT);
+            freeInput.hClient   = pClient->handle;
+            freeInput.pVirtAddr = pObjMem;
+
+            pClient->callbacks.freeSysMem(&freeInput);
+        }
+    }
+}
+
+/**
+***************************************************************************************************
+*   AddrObject::AddrFree
+*
+*   @brief
+*       A wrapper of ClientFree
+***************************************************************************************************
+*/
+VOID AddrObject::AddrFree(
+    VOID* pObjMem) const                 ///< [in] User virtual address to free.
+{
+    ClientFree(pObjMem, &m_client);
+}
+
+/**
+***************************************************************************************************
+*   AddrObject::operator new
+*
+*   @brief
+*       Allocates memory needed for AddrObject object. (with ADDR_CLIENT_HANDLE)
+*
+*   @return
+*       Returns NULL if unsuccessful.
+***************************************************************************************************
+*/
+VOID* AddrObject::operator new(
+    size_t             objSize,    ///< [in] Size to allocate
+    const AddrClient*  pClient)    ///< [in] Client pointer
+{
+    return ClientAlloc(objSize, pClient);
+}
+
+
+/**
+***************************************************************************************************
+*   AddrObject::operator delete
+*
+*   @brief
+*       Frees AddrObject object memory.
+***************************************************************************************************
+*/
+VOID AddrObject::operator delete(
+    VOID* pObjMem,              ///< [in] User virtual address to free.
+    const AddrClient* pClient)  ///< [in] Client handle
+{
+    ClientFree(pObjMem, pClient);
+}
+
+/**
+***************************************************************************************************
+*   AddrObject::operator delete
+*
+*   @brief
+*       Frees AddrObject object memory.
+***************************************************************************************************
+*/
+VOID AddrObject::operator delete(
+    VOID* pObjMem)                  ///< [in] User virtual address to free.
+{
+    AddrObject* pObj = static_cast<AddrObject*>(pObjMem);
+    ClientFree(pObjMem, &pObj->m_client);
+}
+
+/**
+***************************************************************************************************
+*   AddrObject::DebugPrint
+*
+*   @brief
+*       Print debug message
+*
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID AddrObject::DebugPrint(
+    const CHAR* pDebugString,     ///< [in] Debug string
+    ...) const
+{
+#if DEBUG
+    if (m_client.callbacks.debugPrint != NULL)
+    {
+        va_list ap;
+
+        va_start(ap, pDebugString);
+
+        ADDR_DEBUGPRINT_INPUT debugPrintInput = {0};
+
+        debugPrintInput.size         = sizeof(ADDR_DEBUGPRINT_INPUT);
+        debugPrintInput.pDebugString = const_cast<CHAR*>(pDebugString);
+        debugPrintInput.hClient      = m_client.handle;
+        va_copy(debugPrintInput.ap, ap);
+
+        m_client.callbacks.debugPrint(&debugPrintInput);
+
+        va_end(ap);
+    }
+#endif
+}
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  addrobject.h
+* @brief Contains the AddrObject base class definition.
+***************************************************************************************************
+*/
+
+#ifndef __ADDR_OBJECT_H__
+#define __ADDR_OBJECT_H__
+
+#include "addrtypes.h"
+#include "addrcommon.h"
+
+/**
+***************************************************************************************************
+* @brief This structure contains client specific data
+***************************************************************************************************
+*/
+struct AddrClient
+{
+    ADDR_CLIENT_HANDLE  handle;
+    ADDR_CALLBACKS      callbacks;
+};
+/**
+***************************************************************************************************
+* @brief This class is the base class for all ADDR class objects.
+***************************************************************************************************
+*/
+class AddrObject
+{
+public:
+    AddrObject();
+    AddrObject(const AddrClient* pClient);
+    virtual ~AddrObject();
+
+    VOID* operator new(size_t size, const AddrClient* pClient);
+    VOID  operator delete(VOID* pObj, const AddrClient* pClient);
+    VOID  operator delete(VOID* pObj);
+    VOID* AddrMalloc(size_t size) const;
+    VOID  AddrFree(VOID* pObj) const;
+
+    VOID DebugPrint(
+        const CHAR* pDebugString,
+        ...) const;
+
+    const AddrClient* GetClient() const {return &m_client;}
+
+protected:
+    AddrClient m_client;
+
+private:
+    static VOID* ClientAlloc(size_t size, const AddrClient* pClient);
+    static VOID  ClientFree(VOID* pObj, const AddrClient* pClient);
+
+    // disallow the copy constructor
+    AddrObject(const AddrObject& a);
+
+    // disallow the assignment operator
+    AddrObject& operator=(const AddrObject& a);
+};
+
+#endif
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/inc/chip/r800/si_gb_reg.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/inc/chip/r800/si_gb_reg.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/inc/chip/r800/si_gb_reg.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/inc/chip/r800/si_gb_reg.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,155 @@
+#if !defined (__SI_GB_REG_H__)
+#define __SI_GB_REG_H__
+
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+//
+// Make sure the necessary endian defines are there.
+//
+#if defined(LITTLEENDIAN_CPU)
+#elif defined(BIGENDIAN_CPU)
+#else
+#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
+#endif
+
+/*
+ * GB_ADDR_CONFIG struct
+ */
+
+#if     defined(LITTLEENDIAN_CPU)
+
+     typedef struct _GB_ADDR_CONFIG_T {
+          unsigned int num_pipes                      : 3;
+          unsigned int                                : 1;
+          unsigned int pipe_interleave_size           : 3;
+          unsigned int                                : 1;
+          unsigned int bank_interleave_size           : 3;
+          unsigned int                                : 1;
+          unsigned int num_shader_engines             : 2;
+          unsigned int                                : 2;
+          unsigned int shader_engine_tile_size        : 3;
+          unsigned int                                : 1;
+          unsigned int num_gpus                       : 3;
+          unsigned int                                : 1;
+          unsigned int multi_gpu_tile_size            : 2;
+          unsigned int                                : 2;
+          unsigned int row_size                       : 2;
+          unsigned int num_lower_pipes                : 1;
+          unsigned int                                : 1;
+     } GB_ADDR_CONFIG_T;
+
+#elif       defined(BIGENDIAN_CPU)
+
+     typedef struct _GB_ADDR_CONFIG_T {
+          unsigned int                                : 1;
+          unsigned int num_lower_pipes                : 1;
+          unsigned int row_size                       : 2;
+          unsigned int                                : 2;
+          unsigned int multi_gpu_tile_size            : 2;
+          unsigned int                                : 1;
+          unsigned int num_gpus                       : 3;
+          unsigned int                                : 1;
+          unsigned int shader_engine_tile_size        : 3;
+          unsigned int                                : 2;
+          unsigned int num_shader_engines             : 2;
+          unsigned int                                : 1;
+          unsigned int bank_interleave_size           : 3;
+          unsigned int                                : 1;
+          unsigned int pipe_interleave_size           : 3;
+          unsigned int                                : 1;
+          unsigned int num_pipes                      : 3;
+     } GB_ADDR_CONFIG_T;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     GB_ADDR_CONFIG_T f;
+} GB_ADDR_CONFIG;
+
+#if       defined(LITTLEENDIAN_CPU)
+
+     typedef struct _GB_TILE_MODE_T {
+          unsigned int micro_tile_mode                : 2;
+          unsigned int array_mode                     : 4;
+          unsigned int pipe_config                    : 5;
+          unsigned int tile_split                     : 3;
+          unsigned int bank_width                     : 2;
+          unsigned int bank_height                    : 2;
+          unsigned int macro_tile_aspect              : 2;
+          unsigned int num_banks                      : 2;
+          unsigned int micro_tile_mode_new            : 3;
+          unsigned int sample_split                   : 2;
+          unsigned int                                : 5;
+     } GB_TILE_MODE_T;
+
+     typedef struct _GB_MACROTILE_MODE_T {
+          unsigned int bank_width                     : 2;
+          unsigned int bank_height                    : 2;
+          unsigned int macro_tile_aspect              : 2;
+          unsigned int num_banks                      : 2;
+          unsigned int                                : 24;
+     } GB_MACROTILE_MODE_T;
+
+#elif          defined(BIGENDIAN_CPU)
+
+     typedef struct _GB_TILE_MODE_T {
+          unsigned int                                : 5;
+          unsigned int sample_split                   : 2;
+          unsigned int micro_tile_mode_new            : 3;
+          unsigned int num_banks                      : 2;
+          unsigned int macro_tile_aspect              : 2;
+          unsigned int bank_height                    : 2;
+          unsigned int bank_width                     : 2;
+          unsigned int tile_split                     : 3;
+          unsigned int pipe_config                    : 5;
+          unsigned int array_mode                     : 4;
+          unsigned int micro_tile_mode                : 2;
+     } GB_TILE_MODE_T;
+
+     typedef struct _GB_MACROTILE_MODE_T {
+          unsigned int                                : 24;
+          unsigned int num_banks                      : 2;
+          unsigned int macro_tile_aspect              : 2;
+          unsigned int bank_height                    : 2;
+          unsigned int bank_width                     : 2;
+     } GB_MACROTILE_MODE_T;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     GB_TILE_MODE_T f;
+} GB_TILE_MODE;
+
+typedef union {
+     unsigned int val : 32;
+     GB_MACROTILE_MODE_T f;
+} GB_MACROTILE_MODE;
+
+#endif
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/inc/lnx_common_defs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/inc/lnx_common_defs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/inc/lnx_common_defs.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/inc/lnx_common_defs.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+#ifndef _lnx_common_defs_h_
+#define _lnx_common_defs_h_
+
+#if DBG
+#include <stdarg.h>                         // We do not have any choice: need variable
+                                            // number of parameters support for debug
+                                            // build.
+#endif                                      // #if DBG
+
+//
+// --------------  External functions from Linux kernel driver ----------------
+//
+// Note: The definitions/declararions below must match the original ones.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef unsigned long __ke_size_t;              // as it is defined in firegl_public.h
+typedef int           __kernel_ptrdiff_t;       // as it is defined in posix_types.h
+
+
+#if !defined(ATI_API_CALL)
+#define ATI_API_CALL __attribute__((regparm(0)))
+#endif
+
+extern void * ATI_API_CALL __ke_memset(void* s, int c, __ke_size_t count);
+extern void * ATI_API_CALL __ke_memcpy(void* d, const void* s, __ke_size_t count);
+extern ATI_API_CALL __ke_size_t __ke_strlen(const char *s);
+extern char* ATI_API_CALL __ke_strcpy(char* d, const char* s);
+extern char* ATI_API_CALL __ke_strncpy(char* d, const char* s, __ke_size_t count);
+extern void __ke_printk(const char* fmt, ...);
+
+extern int ATI_API_CALL __ke_snprintf(char* buf, __ke_size_t size, const char* fmt, ...);
+extern int ATI_API_CALL KCL_CopyFromUserSpace(void* to, const void* from, __ke_size_t size);
+extern int ATI_API_CALL KCL_CopyToUserSpace(void* to, const void* from, __ke_size_t size);
+#define __ke_copy_from_user  KCL_CopyFromUserSpace
+#define __ke_copy_to_user    KCL_CopyToUserSpace
+extern int ATI_API_CALL __ke_verify_area(int type, const void * addr, unsigned long size);
+
+extern unsigned long ATI_API_CALL KAS_GetTickCounter(void);
+extern unsigned long ATI_API_CALL KAS_GetTicksPerSecond(void);
+
+
+#if DBG
+extern int ATI_API_CALL __ke_vsnprintf(char *buf, __ke_size_t size, const char *fmt, va_list ap);
+#define vsnprintf(_dst, _size, _fmt, varg)  __ke_snprintf(_dst, _size, _fmt, varg)
+#endif                                      // #if DBG
+
+
+// Note: This function is not defined in firegl_public.h.
+void    firegl_hardwareHangRecovery(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+//
+// --------------------------  C/C++ standard typedefs ----------------------------
+//
+#ifdef __SIZE_TYPE__
+typedef __SIZE_TYPE__       size_t;
+#else                                       // #ifdef __SIZE_TYPE__
+typedef unsigned int        size_t;
+#endif                                      // #ifdef __SIZE_TYPE__
+
+#ifdef __PTRDIFF_TYPE__
+typedef __PTRDIFF_TYPE__    ptrdiff_t;
+#else                                       // #ifdef __PTRDIFF_TYPE__
+typedef int                 ptrdiff_t;
+#endif                                      // #ifdef __PTRDIFF_TYPE__
+
+#ifndef NULL
+#ifdef __cplusplus
+#define NULL    __null
+#else
+#define NULL    ((void *)0)
+#endif
+#endif
+
+
+//
+// -------------------------  C/C++ standard macros ---------------------------
+//
+
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)  // as it is defined in stddef.h
+#define CHAR_BIT            8                                   // as it is defined in limits.h
+
+//
+// ---------------------------------  C RTL -----------------------------------
+//
+
+#define memset(_p, _v, _n)                  __ke_memset(_p, _v, _n)
+#define memcpy(_d, _s, _n)                  __ke_memcpy(_d, _s, _n)
+#define strlen(_s)                          __ke_strlen(_s)
+#define strcpy(_d, _s)                      __ke_strcpy(_d, _s)
+#define strncpy(_d, _s, _n)                 __ke_strncpy(_d, _s, _n)
+// Note: C99 supports macros with variable number of arguments. GCC also supports this C99 feature as
+//       C++ extension.
+#define snprintf(_dst, _size, _fmt, arg...) __ke_snprintf(_dst, _size, _fmt, ##arg)
+
+
+#endif                                      // #ifdef _lnx_common_defs_h_
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/chip/si_ci_vi_merged_enum.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/chip/si_ci_vi_merged_enum.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/chip/si_ci_vi_merged_enum.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/chip/si_ci_vi_merged_enum.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+#if !defined (SI_CI_VI_MERGED_ENUM_HEADER)
+#define SI_CI_VI_MERGED_ENUM_HEADER
+
+typedef enum PipeInterleaveSize {
+ADDR_CONFIG_PIPE_INTERLEAVE_256B         = 0x00000000,
+ADDR_CONFIG_PIPE_INTERLEAVE_512B         = 0x00000001,
+} PipeInterleaveSize;
+
+typedef enum RowSize {
+ADDR_CONFIG_1KB_ROW                      = 0x00000000,
+ADDR_CONFIG_2KB_ROW                      = 0x00000001,
+ADDR_CONFIG_4KB_ROW                      = 0x00000002,
+} RowSize;
+
+#endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,1782 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  ciaddrlib.cpp
+* @brief Contains the implementation for the CIAddrLib class.
+***************************************************************************************************
+*/
+
+#include "ciaddrlib.h"
+
+#include "si_gb_reg.h"
+
+#include "si_ci_vi_merged_enum.h"
+
+#if BRAHMA_BUILD
+#include "amdgpu_id.h"
+#else
+#include "ci_id.h"
+#include "kv_id.h"
+#include "vi_id.h"
+#endif
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrMask
+*
+*   @brief
+*       Gets a mask of "width"
+*   @return
+*       Bit mask
+***************************************************************************************************
+*/
+static UINT_64 AddrMask(
+    UINT_32 width)  ///< Width of bits
+{
+    UINT_64 ret;
+
+    if (width >= sizeof(UINT_64)*8)
+    {
+        ret = ~((UINT_64) 0);
+    }
+    else
+    {
+        return (((UINT_64) 1) << width) - 1;
+    }
+    return ret;
+}
+
+/**
+***************************************************************************************************
+*   AddrGetBits
+*
+*   @brief
+*       Gets bits within a range of [msb, lsb]
+*   @return
+*       Bits of this range
+***************************************************************************************************
+*/
+static UINT_64 AddrGetBits(
+    UINT_64 bits,   ///< Source bits
+    UINT_32 msb,    ///< Most signicant bit
+    UINT_32 lsb)    ///< Least signicant bit
+{
+    UINT_64 ret = 0;
+
+    if (msb >= lsb)
+    {
+        ret = (bits >> lsb) & (AddrMask(1 + msb - lsb));
+    }
+    return ret;
+}
+
+/**
+***************************************************************************************************
+*   AddrRemoveBits
+*
+*   @brief
+*       Removes bits within the range of [msb, lsb]
+*   @return
+*       Modified bits
+***************************************************************************************************
+*/
+static UINT_64 AddrRemoveBits(
+    UINT_64 bits,   ///< Source bits
+    UINT_32 msb,    ///< Most signicant bit
+    UINT_32 lsb)    ///< Least signicant bit
+{
+    UINT_64 ret = bits;
+
+    if (msb >= lsb)
+    {
+        ret = AddrGetBits(bits, lsb - 1, 0) // low bits
+            | (AddrGetBits(bits, 8 * sizeof(bits) - 1, msb + 1) << lsb); //high bits
+    }
+    return ret;
+}
+
+/**
+***************************************************************************************************
+*   AddrInsertBits
+*
+*   @brief
+*       Inserts new bits into the range of [msb, lsb]
+*   @return
+*       Modified bits
+***************************************************************************************************
+*/
+static UINT_64 AddrInsertBits(
+    UINT_64 bits,       ///< Source bits
+    UINT_64 newBits,    ///< New bits to be inserted
+    UINT_32 msb,        ///< Most signicant bit
+    UINT_32 lsb)        ///< Least signicant bit
+{
+    UINT_64 ret = bits;
+
+    if (msb >= lsb)
+    {
+        ret = AddrGetBits(bits, lsb - 1, 0) // old low bitss
+             | (AddrGetBits(newBits, msb - lsb, 0) << lsb) //new bits
+             | (AddrGetBits(bits, 8 * sizeof(bits) - 1, lsb) << (msb + 1)); //old high bits
+    }
+    return ret;
+}
+
+
+/**
+***************************************************************************************************
+*   AddrCIHwlInit
+*
+*   @brief
+*       Creates an CIAddrLib object.
+*
+*   @return
+*       Returns an CIAddrLib object pointer.
+***************************************************************************************************
+*/
+AddrLib* AddrCIHwlInit(const AddrClient* pClient)
+{
+    return CIAddrLib::CreateObj(pClient);
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::CIAddrLib
+*
+*   @brief
+*       Constructor
+*
+***************************************************************************************************
+*/
+CIAddrLib::CIAddrLib(const AddrClient* pClient) :
+    SIAddrLib(pClient),
+    m_noOfMacroEntries(0),
+    m_allowNonDispThickModes(FALSE)
+{
+    m_class = CI_ADDRLIB;
+    memset(&m_settings, 0, sizeof(m_settings));
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::~CIAddrLib
+*
+*   @brief
+*       Destructor
+***************************************************************************************************
+*/
+CIAddrLib::~CIAddrLib()
+{
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlComputeDccInfo
+*
+*   @brief
+*       Compute DCC key size, base alignment
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE CIAddrLib::HwlComputeDccInfo(
+    const ADDR_COMPUTE_DCCINFO_INPUT*  pIn,
+    ADDR_COMPUTE_DCCINFO_OUTPUT*       pOut) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    if (m_settings.isVolcanicIslands && IsMacroTiled(pIn->tileMode))
+    {
+        UINT_64 dccFastClearSize = pIn->colorSurfSize >> 8;
+
+        ADDR_ASSERT(0 == (pIn->colorSurfSize & 0xff));
+
+        if (pIn->numSamples > 1)
+        {
+            UINT_32 tileSizePerSample = BITS_TO_BYTES(pIn->bpp * MicroTileWidth * MicroTileHeight);
+            UINT_32 samplesPerSplit  = pIn->tileInfo.tileSplitBytes / tileSizePerSample;
+
+            if (samplesPerSplit < pIn->numSamples)
+            {
+                UINT_32 numSplits = pIn->numSamples / samplesPerSplit;
+                UINT_32 fastClearBaseAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes;
+
+                ADDR_ASSERT(IsPow2(fastClearBaseAlign));
+
+                dccFastClearSize /= numSplits;
+
+                if (0 != (dccFastClearSize & (fastClearBaseAlign - 1)))
+                {
+                    // Disable dcc fast clear
+                    // if key size of fisrt sample split is not pipe*interleave aligned
+                    dccFastClearSize = 0;
+                }
+            }
+        }
+
+        pOut->dccRamSize          = pIn->colorSurfSize >> 8;
+        pOut->dccRamBaseAlign     = pIn->tileInfo.banks *
+                                    HwlGetPipes(&pIn->tileInfo) *
+                                    m_pipeInterleaveBytes;
+        pOut->dccFastClearSize    = dccFastClearSize;
+
+        ADDR_ASSERT(IsPow2(pOut->dccRamBaseAlign));
+
+        if (0 == (pOut->dccRamSize & (pOut->dccRamBaseAlign - 1)))
+        {
+            pOut->subLvlCompressible = TRUE;
+        }
+        else
+        {
+            UINT_64 dccRamSizeAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes;
+
+            if (pOut->dccRamSize == pOut->dccFastClearSize)
+            {
+                pOut->dccFastClearSize = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign);
+            }
+            pOut->dccRamSize          = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign);
+            pOut->subLvlCompressible  = FALSE;
+        }
+    }
+    else
+    {
+        returnCode = ADDR_NOTSUPPORTED;
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlComputeCmaskAddrFromCoord
+*
+*   @brief
+*       Compute tc compatible Cmask address from fmask ram address
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE CIAddrLib::HwlComputeCmaskAddrFromCoord(
+    const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT*  pIn,  ///< [in] fmask addr/bpp/tile input
+    ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT*       pOut  ///< [out] cmask address
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_NOTSUPPORTED;
+
+    if ((m_settings.isVolcanicIslands == TRUE) &&
+        (pIn->flags.tcCompatible == TRUE))
+    {
+        UINT_32 numOfPipes   = HwlGetPipes(pIn->pTileInfo);
+        UINT_32 numOfBanks   = pIn->pTileInfo->banks;
+        UINT_64 fmaskAddress = pIn->fmaskAddr;
+        UINT_32 elemBits     = pIn->bpp;
+        UINT_32 blockByte    = 64 * elemBits / 8;
+        UINT_64 metaNibbleAddress = HwlComputeMetadataNibbleAddress(fmaskAddress,
+                                                                    0,
+                                                                    0,
+                                                                    4,
+                                                                    elemBits,
+                                                                    blockByte,
+                                                                    m_pipeInterleaveBytes,
+                                                                    numOfPipes,
+                                                                    numOfBanks,
+                                                                    1);
+        pOut->addr = (metaNibbleAddress >> 1);
+        pOut->bitPosition = (metaNibbleAddress % 2) ? 4 : 0;
+        returnCode = ADDR_OK;
+    }
+
+    return returnCode;
+}
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlConvertChipFamily
+*
+*   @brief
+*       Convert familyID defined in atiid.h to AddrChipFamily and set m_chipFamily/m_chipRevision
+*   @return
+*       AddrChipFamily
+***************************************************************************************************
+*/
+AddrChipFamily CIAddrLib::HwlConvertChipFamily(
+    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
+    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
+{
+    AddrChipFamily family = ADDR_CHIP_FAMILY_CI;
+
+    switch (uChipFamily)
+    {
+        case FAMILY_CI:
+            m_settings.isSeaIsland  = 1;
+            m_settings.isBonaire    = ASICREV_IS_BONAIRE_M(uChipRevision);
+            m_settings.isHawaii     = ASICREV_IS_HAWAII_P(uChipRevision);
+            break;
+        case FAMILY_KV:
+            m_settings.isKaveri     = 1;
+            m_settings.isSpectre    = ASICREV_IS_SPECTRE(uChipRevision);
+            m_settings.isSpooky     = ASICREV_IS_SPOOKY(uChipRevision);
+            m_settings.isKalindi    = ASICREV_IS_KALINDI(uChipRevision);
+            break;
+        case FAMILY_VI:
+            m_settings.isVolcanicIslands = 1;
+            m_settings.isIceland         = ASICREV_IS_ICELAND_M(uChipRevision);
+            m_settings.isTonga           = ASICREV_IS_TONGA_P(uChipRevision);
+            m_settings.isFiji            = ASICREV_IS_FIJI_P(uChipRevision);
+            break;
+        case FAMILY_CZ:
+            m_settings.isCarrizo         = 1;
+            m_settings.isVolcanicIslands = 1;
+            break;
+        default:
+            ADDR_ASSERT(!"This should be a unexpected Fusion");
+            break;
+    }
+
+    return family;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlInitGlobalParams
+*
+*   @brief
+*       Initializes global parameters
+*
+*   @return
+*       TRUE if all settings are valid
+*
+***************************************************************************************************
+*/
+BOOL_32 CIAddrLib::HwlInitGlobalParams(
+    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
+{
+    BOOL_32  valid = TRUE;
+
+    const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue;
+
+    valid = DecodeGbRegs(pRegValue);
+
+    // The following assignments for m_pipes is only for fail-safe, InitTileSettingTable should
+    // read the correct pipes from tile mode table
+    if (m_settings.isHawaii)
+    {
+        // Hawaii has 16-pipe, see GFXIP_Config_Summary.xls
+        m_pipes = 16;
+    }
+    else if (m_settings.isBonaire || m_settings.isSpectre)
+    {
+        m_pipes = 4;
+    }
+    else // Treat other KV asics to be 2-pipe
+    {
+        m_pipes = 2;
+    }
+
+    // @todo: VI
+    // Move this to VI code path once created
+    if (m_settings.isTonga)
+    {
+        m_pipes = 8;
+    }
+    else if (m_settings.isIceland)
+    {
+        m_pipes = 2;
+    }
+    else if (m_settings.isFiji)
+    {
+        m_pipes = 16;
+    }
+
+    if (valid)
+    {
+        valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries);
+    }
+    if (valid)
+    {
+        valid = InitMacroTileCfgTable(pRegValue->pMacroTileConfig, pRegValue->noOfMacroEntries);
+    }
+
+    return valid;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlPostCheckTileIndex
+*
+*   @brief
+*       Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches
+*       tile mode/type/info and change the index if needed
+*   @return
+*       Tile index.
+***************************************************************************************************
+*/
+INT_32 CIAddrLib::HwlPostCheckTileIndex(
+    const ADDR_TILEINFO* pInfo,     ///< [in] Tile Info
+    AddrTileMode         mode,      ///< [in] Tile mode
+    AddrTileType         type,      ///< [in] Tile type
+    INT                  curIndex   ///< [in] Current index assigned in HwlSetupTileInfo
+    ) const
+{
+    INT_32 index = curIndex;
+
+    if (mode == ADDR_TM_LINEAR_GENERAL)
+    {
+        index = TileIndexLinearGeneral;
+    }
+    else
+    {
+        BOOL_32 macroTiled = IsMacroTiled(mode);
+
+        // We need to find a new index if either of them is true
+        // 1. curIndex is invalid
+        // 2. tile mode is changed
+        // 3. tile info does not match for macro tiled
+        if ((index == TileIndexInvalid)         ||
+            (mode != m_tileTable[index].mode)   ||
+            (macroTiled && pInfo->pipeConfig != m_tileTable[index].info.pipeConfig))
+        {
+            for (index = 0; index < static_cast<INT_32>(m_noOfEntries); index++)
+            {
+                if (macroTiled)
+                {
+                    // macro tile modes need all to match
+                    if ((pInfo->pipeConfig == m_tileTable[index].info.pipeConfig) &&
+                        (mode == m_tileTable[index].mode) &&
+                        (type == m_tileTable[index].type))
+                    {
+                        // tileSplitBytes stored in m_tileTable is only valid for depth entries
+                        if (type == ADDR_DEPTH_SAMPLE_ORDER)
+                        {
+                            if (pInfo->tileSplitBytes == m_tileTable[index].info.tileSplitBytes)
+                            {
+                                break;
+                            }
+                        }
+                        else // other entries are determined by other 3 fields
+                        {
+                            break;
+                        }
+                    }
+                }
+                else if (mode == ADDR_TM_LINEAR_ALIGNED)
+                {
+                    // linear mode only needs tile mode to match
+                    if (mode == m_tileTable[index].mode)
+                    {
+                        break;
+                    }
+                }
+                else
+                {
+                    // micro tile modes only need tile mode and tile type to match
+                    if (mode == m_tileTable[index].mode &&
+                        type == m_tileTable[index].type)
+                    {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    ADDR_ASSERT(index < static_cast<INT_32>(m_noOfEntries));
+
+    if (index >= static_cast<INT_32>(m_noOfEntries))
+    {
+        index = TileIndexInvalid;
+    }
+
+    return index;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlSetupTileCfg
+*
+*   @brief
+*       Map tile index to tile setting.
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE CIAddrLib::HwlSetupTileCfg(
+    INT_32          index,          ///< [in] Tile index
+    INT_32          macroModeIndex, ///< [in] Index in macro tile mode table(CI)
+    ADDR_TILEINFO*  pInfo,          ///< [out] Tile Info
+    AddrTileMode*   pMode,          ///< [out] Tile mode
+    AddrTileType*   pType           ///< [out] Tile type
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    // Global flag to control usage of tileIndex
+    if (UseTileIndex(index))
+    {
+        if (static_cast<UINT_32>(index) >= m_noOfEntries)
+        {
+            returnCode = ADDR_INVALIDPARAMS;
+        }
+        else
+        {
+            const ADDR_TILECONFIG* pCfgTable = GetTileSetting(index);
+
+            if (pInfo != NULL)
+            {
+                if (IsMacroTiled(pCfgTable->mode))
+                {
+                    ADDR_ASSERT(((macroModeIndex != TileIndexInvalid)
+                        && (macroModeIndex != TileIndexNoMacroIndex)));
+                    // Here we used tile_bytes to replace of tile_split
+                    // According info as below:
+                    // "tile_split_c = MIN(ROW_SIZE, tile_split)
+                    // "tile_bytes = MIN(tile_split_c, num_samples * tile_bytes_1x)
+                    // when using tile_bytes replacing of tile_split, the result of
+                    // alignment and others(such as slicesPerTile) are unaffected -
+                    // since if tile_split_c is larger, split won't happen, otherwise
+                    // (num_samples * tile_bytes_1x is larger), a correct tile_split is
+                    // returned.
+                    *pInfo = m_macroTileTable[macroModeIndex];
+
+                    if (pCfgTable->type == ADDR_DEPTH_SAMPLE_ORDER)
+                    {
+                        pInfo->tileSplitBytes = pCfgTable->info.tileSplitBytes;
+                    }
+                    pInfo->pipeConfig = pCfgTable->info.pipeConfig;
+                }
+                else // 1D and linear modes, we return default value stored in table
+                {
+                    *pInfo = pCfgTable->info;
+                }
+            }
+
+            if (pMode != NULL)
+            {
+                *pMode = pCfgTable->mode;
+            }
+
+            if (pType != NULL)
+            {
+                *pType = pCfgTable->type;
+            }
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlComputeSurfaceInfo
+*
+*   @brief
+*       Entry of ci's ComputeSurfaceInfo
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE CIAddrLib::HwlComputeSurfaceInfo(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    // If tileIndex is invalid, force macroModeIndex to be invalid, too
+    if (pIn->tileIndex == TileIndexInvalid)
+    {
+        pOut->macroModeIndex = TileIndexInvalid;
+    }
+
+    ADDR_E_RETURNCODE retCode = SIAddrLib::HwlComputeSurfaceInfo(pIn,pOut);
+
+    if (pOut->macroModeIndex == TileIndexNoMacroIndex)
+    {
+        pOut->macroModeIndex = TileIndexInvalid;
+    }
+
+    return retCode;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlFmaskSurfaceInfo
+*   @brief
+*       Entry of r800's ComputeFmaskInfo
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE CIAddrLib::HwlComputeFmaskInfo(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,   ///< [in] input structure
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut   ///< [out] output structure
+    )
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    ADDR_TILEINFO tileInfo = {0};
+    ADDR_COMPUTE_FMASK_INFO_INPUT fmaskIn;
+    fmaskIn = *pIn;
+
+    AddrTileMode tileMode = pIn->tileMode;
+
+    // Use internal tile info if pOut does not have a valid pTileInfo
+    if (pOut->pTileInfo == NULL)
+    {
+        pOut->pTileInfo = &tileInfo;
+    }
+
+    ADDR_ASSERT(tileMode == ADDR_TM_2D_TILED_THIN1     ||
+                tileMode == ADDR_TM_3D_TILED_THIN1     ||
+                tileMode == ADDR_TM_PRT_TILED_THIN1    ||
+                tileMode == ADDR_TM_PRT_2D_TILED_THIN1 ||
+                tileMode == ADDR_TM_PRT_3D_TILED_THIN1);
+
+    ADDR_ASSERT(m_tileTable[14].mode == ADDR_TM_2D_TILED_THIN1);
+    ADDR_ASSERT(m_tileTable[15].mode == ADDR_TM_3D_TILED_THIN1);
+
+    // The only valid tile modes for fmask are 2D_THIN1 and 3D_THIN1 plus non-displayable
+    INT_32 tileIndex = tileMode == ADDR_TM_2D_TILED_THIN1 ? 14 : 15;
+    ADDR_SURFACE_FLAGS flags = {{0}};
+    flags.fmask = 1;
+
+    INT_32 macroModeIndex = TileIndexInvalid;
+
+    UINT_32 numSamples = pIn->numSamples;
+    UINT_32 numFrags = pIn->numFrags == 0 ? numSamples : pIn->numFrags;
+
+    UINT_32 bpp = QLog2(numFrags);
+
+    // EQAA needs one more bit
+    if (numSamples > numFrags)
+    {
+        bpp++;
+    }
+
+    if (bpp == 3)
+    {
+        bpp = 4;
+    }
+
+    bpp = Max(8u, bpp * numSamples);
+
+    macroModeIndex = HwlComputeMacroModeIndex(tileIndex, flags, bpp, numSamples, pOut->pTileInfo);
+
+    fmaskIn.tileIndex = tileIndex;
+    fmaskIn.pTileInfo = pOut->pTileInfo;
+    pOut->macroModeIndex = macroModeIndex;
+    pOut->tileIndex = tileIndex;
+
+    retCode = DispatchComputeFmaskInfo(&fmaskIn, pOut);
+
+    if (retCode == ADDR_OK)
+    {
+        pOut->tileIndex =
+            HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE,
+                                  pOut->tileIndex);
+    }
+
+    // Resets pTileInfo to NULL if the internal tile info is used
+    if (pOut->pTileInfo == &tileInfo)
+    {
+        pOut->pTileInfo = NULL;
+    }
+
+    return retCode;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlFmaskPreThunkSurfInfo
+*
+*   @brief
+*       Some preparation before thunking a ComputeSurfaceInfo call for Fmask
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+VOID CIAddrLib::HwlFmaskPreThunkSurfInfo(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pFmaskIn,   ///< [in] Input of fmask info
+    const ADDR_COMPUTE_FMASK_INFO_OUTPUT*   pFmaskOut,  ///< [in] Output of fmask info
+    ADDR_COMPUTE_SURFACE_INFO_INPUT*        pSurfIn,    ///< [out] Input of thunked surface info
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pSurfOut    ///< [out] Output of thunked surface info
+    ) const
+{
+    pSurfIn->tileIndex = pFmaskIn->tileIndex;
+    pSurfOut->macroModeIndex  = pFmaskOut->macroModeIndex;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlFmaskPostThunkSurfInfo
+*
+*   @brief
+*       Copy hwl extra field after calling thunked ComputeSurfaceInfo
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+VOID CIAddrLib::HwlFmaskPostThunkSurfInfo(
+    const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,   ///< [in] Output of surface info
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut           ///< [out] Output of fmask info
+    ) const
+{
+    pFmaskOut->tileIndex = pSurfOut->tileIndex;
+    pFmaskOut->macroModeIndex = pSurfOut->macroModeIndex;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlDegradeThickTileMode
+*
+*   @brief
+*       Degrades valid tile mode for thick modes if needed
+*
+*   @return
+*       Suitable tile mode
+***************************************************************************************************
+*/
+AddrTileMode CIAddrLib::HwlDegradeThickTileMode(
+    AddrTileMode        baseTileMode,   ///< [in] base tile mode
+    UINT_32             numSlices,      ///< [in] current number of slices
+    UINT_32*            pBytesPerTile   ///< [in/out] pointer to bytes per slice
+    ) const
+{
+    return baseTileMode;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlOverrideTileMode
+*
+*   @brief
+*       Override THICK to THIN, for specific formats on CI
+*
+*   @return
+*       Suitable tile mode
+*
+***************************************************************************************************
+*/
+BOOL_32 CIAddrLib::HwlOverrideTileMode(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,       ///< [in] input structure
+    AddrTileMode*                           pTileMode, ///< [in/out] pointer to the tile mode
+    AddrTileType*                           pTileType  ///< [in/out] pointer to the tile type
+    ) const
+{
+    BOOL_32 bOverrided = FALSE;
+    AddrTileMode tileMode = *pTileMode;
+
+    // currently, all CI/VI family do not
+    // support ADDR_TM_PRT_2D_TILED_THICK,ADDR_TM_PRT_3D_TILED_THICK and
+    // ADDR_TM_PRT_2D_TILED_THIN1, ADDR_TM_PRT_3D_TILED_THIN1
+    switch (tileMode)
+    {
+        case ADDR_TM_PRT_2D_TILED_THICK:
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            tileMode = ADDR_TM_PRT_TILED_THICK;
+            break;
+        case ADDR_TM_PRT_2D_TILED_THIN1:
+        case ADDR_TM_PRT_3D_TILED_THIN1:
+            tileMode = ADDR_TM_PRT_TILED_THIN1;
+            break;
+        default:
+            break;
+    }
+
+    // UBTS#404321, we do not need such overriding, as THICK+THICK entries removed from the tile-mode table
+    if (!m_settings.isBonaire)
+    {
+        UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+        // tile_thickness = (array_mode == XTHICK) ? 8 : ((array_mode == THICK) ? 4 : 1)
+        if (thickness > 1)
+        {
+            switch (pIn->format)
+            {
+                // see //gfxip/gcB/devel/cds/src/verif/tc/models/csim/tcp.cpp
+                // tcpError("Thick micro tiling is not supported for format...
+                case ADDR_FMT_X24_8_32_FLOAT:
+                case ADDR_FMT_32_AS_8:
+                case ADDR_FMT_32_AS_8_8:
+                case ADDR_FMT_32_AS_32_32_32_32:
+
+                // packed formats
+                case ADDR_FMT_GB_GR:
+                case ADDR_FMT_BG_RG:
+                case ADDR_FMT_1_REVERSED:
+                case ADDR_FMT_1:
+                case ADDR_FMT_BC1:
+                case ADDR_FMT_BC2:
+                case ADDR_FMT_BC3:
+                case ADDR_FMT_BC4:
+                case ADDR_FMT_BC5:
+                case ADDR_FMT_BC6:
+                case ADDR_FMT_BC7:
+                    switch (tileMode)
+                    {
+                        case ADDR_TM_1D_TILED_THICK:
+                            tileMode    = ADDR_TM_1D_TILED_THIN1;
+                            break;
+
+                        case ADDR_TM_2D_TILED_XTHICK:
+                        case ADDR_TM_2D_TILED_THICK:
+                            tileMode    = ADDR_TM_2D_TILED_THIN1;
+                            break;
+
+                        case ADDR_TM_3D_TILED_XTHICK:
+                        case ADDR_TM_3D_TILED_THICK:
+                            tileMode    = ADDR_TM_3D_TILED_THIN1;
+                            break;
+
+                        case ADDR_TM_PRT_TILED_THICK:
+                            tileMode    = ADDR_TM_PRT_TILED_THIN1;
+                            break;
+
+                        case ADDR_TM_PRT_2D_TILED_THICK:
+                            tileMode    = ADDR_TM_PRT_2D_TILED_THIN1;
+                            break;
+
+                        case ADDR_TM_PRT_3D_TILED_THICK:
+                            tileMode    = ADDR_TM_PRT_3D_TILED_THIN1;
+                            break;
+
+                        default:
+                            break;
+
+                    }
+
+                    // Switch tile type from thick to thin
+                    if (tileMode != *pTileMode)
+                    {
+                        // see tileIndex: 13-18
+                        *pTileType = ADDR_NON_DISPLAYABLE;
+                    }
+
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+
+    if (tileMode != *pTileMode)
+    {
+        *pTileMode = tileMode;
+        bOverrided = TRUE;
+    }
+
+    return bOverrided;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlSetupTileInfo
+*
+*   @brief
+*       Setup default value of tile info for SI
+***************************************************************************************************
+*/
+VOID CIAddrLib::HwlSetupTileInfo(
+    AddrTileMode                        tileMode,       ///< [in] Tile mode
+    ADDR_SURFACE_FLAGS                  flags,          ///< [in] Surface type flags
+    UINT_32                             bpp,            ///< [in] Bits per pixel
+    UINT_32                             pitch,          ///< [in] Pitch in pixels
+    UINT_32                             height,         ///< [in] Height in pixels
+    UINT_32                             numSamples,     ///< [in] Number of samples
+    ADDR_TILEINFO*                      pTileInfoIn,    ///< [in] Tile info input: NULL for default
+    ADDR_TILEINFO*                      pTileInfoOut,   ///< [out] Tile info output
+    AddrTileType                        inTileType,     ///< [in] Tile type
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*   pOut            ///< [out] Output
+    ) const
+{
+    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+    ADDR_TILEINFO* pTileInfo = pTileInfoOut;
+    INT index = TileIndexInvalid;
+    INT macroModeIndex = TileIndexInvalid;
+
+    // Fail-safe code
+    if (!IsLinear(tileMode))
+    {
+        // Thick tile modes must use thick micro tile mode but Bonaire does not support due to
+        // old derived netlists (UBTS 404321)
+        if (thickness > 1)
+        {
+            if (m_settings.isBonaire)
+            {
+                inTileType = ADDR_NON_DISPLAYABLE;
+            }
+            else if ((m_allowNonDispThickModes == FALSE) || (inTileType != ADDR_NON_DISPLAYABLE))
+            {
+                inTileType = ADDR_THICK;
+            }
+        }
+        // 128 bpp tiling must be non-displayable.
+        // Fmask reuse color buffer's entry but bank-height field can be from another entry
+        // To simplify the logic, fmask entry should be picked from non-displayable ones
+        else if (bpp == 128 || flags.fmask)
+        {
+            inTileType = ADDR_NON_DISPLAYABLE;
+        }
+        // These two modes only have non-disp entries though they can be other micro tile modes
+        else if (tileMode == ADDR_TM_3D_TILED_THIN1 || tileMode == ADDR_TM_PRT_3D_TILED_THIN1)
+        {
+            inTileType = ADDR_NON_DISPLAYABLE;
+        }
+
+        if (flags.depth || flags.stencil)
+        {
+            inTileType = ADDR_DEPTH_SAMPLE_ORDER;
+        }
+    }
+
+    if (IsTileInfoAllZero(pTileInfo))
+    {
+        // See table entries 0-4
+        if (flags.depth || flags.stencil)
+        {
+            if (flags.depth && flags.tcCompatible)
+            {
+                // tileSize = bpp * numSamples * 8 * 8 / 8
+                UINT_32 tileSize = bpp * numSamples * 8;
+
+                // Texure readable depth surface should not be split
+                switch (tileSize)
+                {
+                    case 128:
+                        index = 1;
+                        break;
+                    case 256:
+                        index = 2;
+                        break;
+                    case 512:
+                        index = 3;
+                        break;
+                    default:
+                        index = 4;
+                        break;
+                }
+            }
+            else
+            {
+                // Depth and stencil need to use the same index, thus the pre-defined tile_split
+                // can meet the requirement to choose the same macro mode index
+                // uncompressed depth/stencil are not supported for now
+                switch (numSamples)
+                {
+                    case 1:
+                        index = 0;
+                        break;
+                    case 2:
+                    case 4:
+                        index = 1;
+                        break;
+                    case 8:
+                        index = 2;
+                        break;
+                    default:
+                        break;
+                }
+            }
+        }
+
+        // See table entries 5-6
+        if (inTileType == ADDR_DEPTH_SAMPLE_ORDER)
+        {
+            switch (tileMode)
+            {
+                case ADDR_TM_1D_TILED_THIN1:
+                    index = 5;
+                    break;
+                case ADDR_TM_PRT_TILED_THIN1:
+                    index = 6;
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        // See table entries 8-12
+        if (inTileType == ADDR_DISPLAYABLE)
+        {
+            switch (tileMode)
+            {
+                case ADDR_TM_1D_TILED_THIN1:
+                    index = 9;
+                    break;
+                case ADDR_TM_2D_TILED_THIN1:
+                    index = 10;
+                    break;
+                case ADDR_TM_PRT_TILED_THIN1:
+                    index = 11;
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        // See table entries 13-18
+        if (inTileType == ADDR_NON_DISPLAYABLE)
+        {
+            switch (tileMode)
+            {
+                case ADDR_TM_1D_TILED_THIN1:
+                    index = 13;
+                    break;
+                case ADDR_TM_2D_TILED_THIN1:
+                    index = 14;
+                    break;
+                case ADDR_TM_3D_TILED_THIN1:
+                    index = 15;
+                    break;
+                case ADDR_TM_PRT_TILED_THIN1:
+                    index = 16;
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        // See table entries 19-26
+        if (thickness > 1)
+        {
+            switch (tileMode)
+            {
+            case ADDR_TM_1D_TILED_THICK:
+                    //special check for bonaire, for the compatablity between old KMD and new UMD for bonaire
+                    index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 19 : 18;
+                    break;
+            case ADDR_TM_2D_TILED_THICK:
+                    // special check for bonaire, for the compatablity between old KMD and new UMD for bonaire
+                    index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 20 : 24;
+                    break;
+                case ADDR_TM_3D_TILED_THICK:
+                    index = 21;
+                    break;
+                case ADDR_TM_PRT_TILED_THICK:
+                    index = 22;
+                    break;
+                case ADDR_TM_2D_TILED_XTHICK:
+                    index = 25;
+                    break;
+                case ADDR_TM_3D_TILED_XTHICK:
+                    index = 26;
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        // See table entries 27-30
+        if (inTileType == ADDR_ROTATED)
+        {
+            switch (tileMode)
+            {
+                case ADDR_TM_1D_TILED_THIN1:
+                    index = 27;
+                    break;
+                case ADDR_TM_2D_TILED_THIN1:
+                    index = 28;
+                    break;
+                case ADDR_TM_PRT_TILED_THIN1:
+                    index = 29;
+                    break;
+                case ADDR_TM_PRT_2D_TILED_THIN1:
+                    index = 30;
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        if (m_pipes >= 8)
+        {
+            ADDR_ASSERT((index + 1) < static_cast<INT_32>(m_noOfEntries));
+            // Only do this when tile mode table is updated.
+            if (((tileMode == ADDR_TM_PRT_TILED_THIN1) || (tileMode == ADDR_TM_PRT_TILED_THICK)) &&
+                (m_tileTable[index+1].mode == tileMode))
+            {
+                UINT_32 bytesXSamples = bpp * numSamples / 8;
+                UINT_32 bytesXThickness = bpp * thickness / 8;
+                UINT_32 switchP4Threshold = (m_pipes == 16) ? 8 : 32;
+
+                if ((bytesXSamples > switchP4Threshold) || (bytesXThickness > switchP4Threshold))
+                {
+                    // Pick next 4 pipe entry
+                    index += 1;
+                }
+            }
+        }
+    }
+    else
+    {
+        // A pre-filled tile info is ready
+        index = pOut->tileIndex;
+        macroModeIndex = pOut->macroModeIndex;
+
+        // pass tile type back for post tile index compute
+        pOut->tileType = inTileType;
+    }
+
+    // We only need to set up tile info if there is a valid index but macroModeIndex is invalid
+    if (index != TileIndexInvalid && macroModeIndex == TileIndexInvalid)
+    {
+        macroModeIndex = HwlComputeMacroModeIndex(index, flags, bpp, numSamples, pTileInfo);
+
+        /// Copy to pOut->tileType/tileIndex/macroModeIndex
+        pOut->tileIndex = index;
+        pOut->tileType = m_tileTable[index].type; // Or inTileType, the samea
+        pOut->macroModeIndex = macroModeIndex;
+    }
+    else if (tileMode == ADDR_TM_LINEAR_GENERAL)
+    {
+        pOut->tileIndex = TileIndexLinearGeneral;
+
+        // Copy linear-aligned entry??
+        *pTileInfo = m_tileTable[8].info;
+    }
+    else if (tileMode == ADDR_TM_LINEAR_ALIGNED)
+    {
+        pOut->tileIndex = 8;
+        *pTileInfo = m_tileTable[8].info;
+    }
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::ReadGbTileMode
+*
+*   @brief
+*       Convert GB_TILE_MODE HW value to ADDR_TILE_CONFIG.
+*   @return
+*       NA.
+***************************************************************************************************
+*/
+VOID CIAddrLib::ReadGbTileMode(
+    UINT_32             regValue,   ///< [in] GB_TILE_MODE register
+    ADDR_TILECONFIG*    pCfg        ///< [out] output structure
+    ) const
+{
+    GB_TILE_MODE gbTileMode;
+    gbTileMode.val = regValue;
+
+    pCfg->type = static_cast<AddrTileType>(gbTileMode.f.micro_tile_mode_new);
+    pCfg->info.pipeConfig = static_cast<AddrPipeCfg>(gbTileMode.f.pipe_config + 1);
+
+    if (pCfg->type == ADDR_DEPTH_SAMPLE_ORDER)
+    {
+        pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split;
+    }
+    else
+    {
+        pCfg->info.tileSplitBytes = 1 << gbTileMode.f.sample_split;
+    }
+
+    UINT_32 regArrayMode = gbTileMode.f.array_mode;
+
+    pCfg->mode = static_cast<AddrTileMode>(regArrayMode);
+
+    switch (regArrayMode)
+    {
+        case 5:
+            pCfg->mode = ADDR_TM_PRT_TILED_THIN1;
+            break;
+        case 6:
+            pCfg->mode = ADDR_TM_PRT_2D_TILED_THIN1;
+            break;
+        case 8:
+            pCfg->mode = ADDR_TM_2D_TILED_XTHICK;
+            break;
+        case 9:
+            pCfg->mode = ADDR_TM_PRT_TILED_THICK;
+            break;
+        case 0xa:
+            pCfg->mode = ADDR_TM_PRT_2D_TILED_THICK;
+            break;
+        case 0xb:
+            pCfg->mode = ADDR_TM_PRT_3D_TILED_THIN1;
+            break;
+        case 0xe:
+            pCfg->mode = ADDR_TM_3D_TILED_XTHICK;
+            break;
+        case 0xf:
+            pCfg->mode = ADDR_TM_PRT_3D_TILED_THICK;
+            break;
+        default:
+            break;
+    }
+
+    // Fail-safe code for these always convert tile info, as the non-macro modes
+    // return the entry of tile mode table directly without looking up macro mode table
+    if (!IsMacroTiled(pCfg->mode))
+    {
+        pCfg->info.banks = 2;
+        pCfg->info.bankWidth = 1;
+        pCfg->info.bankHeight = 1;
+        pCfg->info.macroAspectRatio = 1;
+        pCfg->info.tileSplitBytes = 64;
+    }
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::InitTileSettingTable
+*
+*   @brief
+*       Initialize the ADDR_TILE_CONFIG table.
+*   @return
+*       TRUE if tile table is correctly initialized
+***************************************************************************************************
+*/
+BOOL_32 CIAddrLib::InitTileSettingTable(
+    const UINT_32*  pCfg,           ///< [in] Pointer to table of tile configs
+    UINT_32         noOfEntries     ///< [in] Numbe of entries in the table above
+    )
+{
+    BOOL_32 initOk = TRUE;
+
+    ADDR_ASSERT(noOfEntries <= TileTableSize);
+
+    memset(m_tileTable, 0, sizeof(m_tileTable));
+
+    if (noOfEntries != 0)
+    {
+        m_noOfEntries = noOfEntries;
+    }
+    else
+    {
+        m_noOfEntries = TileTableSize;
+    }
+
+    if (pCfg) // From Client
+    {
+        for (UINT_32 i = 0; i < m_noOfEntries; i++)
+        {
+            ReadGbTileMode(*(pCfg + i), &m_tileTable[i]);
+        }
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        initOk = FALSE;
+    }
+
+    if (initOk)
+    {
+        ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED);
+
+        if (m_settings.isBonaire == FALSE)
+        {
+            // Check if entry 18 is "thick+thin" combination
+            if ((m_tileTable[18].mode == ADDR_TM_1D_TILED_THICK) &&
+                (m_tileTable[18].type == ADDR_NON_DISPLAYABLE))
+            {
+                m_allowNonDispThickModes = TRUE;
+                ADDR_ASSERT(m_tileTable[24].mode == ADDR_TM_2D_TILED_THICK);
+            }
+        }
+        else
+        {
+            m_allowNonDispThickModes = TRUE;
+        }
+
+        // Assume the first entry is always programmed with full pipes
+        m_pipes = HwlGetPipes(&m_tileTable[0].info);
+    }
+
+    return initOk;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::ReadGbMacroTileCfg
+*
+*   @brief
+*       Convert GB_MACRO_TILE_CFG HW value to ADDR_TILE_CONFIG.
+*   @return
+*       NA.
+***************************************************************************************************
+*/
+VOID CIAddrLib::ReadGbMacroTileCfg(
+    UINT_32             regValue,   ///< [in] GB_MACRO_TILE_MODE register
+    ADDR_TILEINFO*      pCfg        ///< [out] output structure
+    ) const
+{
+    GB_MACROTILE_MODE gbTileMode;
+    gbTileMode.val = regValue;
+
+    pCfg->bankHeight = 1 << gbTileMode.f.bank_height;
+    pCfg->bankWidth = 1 << gbTileMode.f.bank_width;
+    pCfg->banks = 1 << (gbTileMode.f.num_banks + 1);
+    pCfg->macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::InitMacroTileCfgTable
+*
+*   @brief
+*       Initialize the ADDR_MACRO_TILE_CONFIG table.
+*   @return
+*       TRUE if macro tile table is correctly initialized
+***************************************************************************************************
+*/
+BOOL_32 CIAddrLib::InitMacroTileCfgTable(
+    const UINT_32*  pCfg,           ///< [in] Pointer to table of tile configs
+    UINT_32         noOfMacroEntries     ///< [in] Numbe of entries in the table above
+    )
+{
+    BOOL_32 initOk = TRUE;
+
+    ADDR_ASSERT(noOfMacroEntries <= MacroTileTableSize);
+
+    memset(m_macroTileTable, 0, sizeof(m_macroTileTable));
+
+    if (noOfMacroEntries != 0)
+    {
+        m_noOfMacroEntries = noOfMacroEntries;
+    }
+    else
+    {
+        m_noOfMacroEntries = MacroTileTableSize;
+    }
+
+    if (pCfg) // From Client
+    {
+        for (UINT_32 i = 0; i < m_noOfMacroEntries; i++)
+        {
+            ReadGbMacroTileCfg(*(pCfg + i), &m_macroTileTable[i]);
+
+            m_macroTileTable[i].tileSplitBytes = 64 << (i % 8);
+        }
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        initOk = FALSE;
+    }
+    return initOk;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlComputeMacroModeIndex
+*
+*   @brief
+*       Computes macro tile mode index
+*   @return
+*       TRUE if macro tile table is correctly initialized
+***************************************************************************************************
+*/
+INT_32 CIAddrLib::HwlComputeMacroModeIndex(
+    INT_32              tileIndex,      ///< [in] Tile mode index
+    ADDR_SURFACE_FLAGS  flags,          ///< [in] Surface flags
+    UINT_32             bpp,            ///< [in] Bit per pixel
+    UINT_32             numSamples,     ///< [in] Number of samples
+    ADDR_TILEINFO*      pTileInfo,      ///< [out] Pointer to ADDR_TILEINFO
+    AddrTileMode*       pTileMode,      ///< [out] Pointer to AddrTileMode
+    AddrTileType*       pTileType       ///< [out] Pointer to AddrTileType
+    ) const
+{
+    INT_32 macroModeIndex = TileIndexInvalid;
+
+    if (flags.tcCompatible && flags.stencil)
+    {
+        // Don't compute macroModeIndex for tc compatible stencil surface
+        macroModeIndex = TileIndexNoMacroIndex;
+    }
+    else
+    {
+        AddrTileMode tileMode = m_tileTable[tileIndex].mode;
+        AddrTileType tileType = m_tileTable[tileIndex].type;
+        UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+        if (!IsMacroTiled(tileMode))
+        {
+            *pTileInfo = m_tileTable[tileIndex].info;
+            macroModeIndex = TileIndexNoMacroIndex;
+        }
+        else
+        {
+            UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness);
+            UINT_32 tileSplit;
+
+            if (m_tileTable[tileIndex].type == ADDR_DEPTH_SAMPLE_ORDER)
+            {
+                // Depth entries store real tileSplitBytes
+                tileSplit = m_tileTable[tileIndex].info.tileSplitBytes;
+            }
+            else
+            {
+                // Non-depth entries store a split factor
+                UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes;
+                UINT_32 colorTileSplit = Max(256u, sampleSplit * tileBytes1x);
+
+                tileSplit = colorTileSplit;
+            }
+
+            UINT_32 tileSplitC = Min(m_rowSize, tileSplit);
+            UINT_32 tileBytes;
+
+            if (flags.fmask)
+            {
+                tileBytes = Min(tileSplitC, tileBytes1x);
+            }
+            else
+            {
+                tileBytes = Min(tileSplitC, numSamples * tileBytes1x);
+            }
+
+            if (tileBytes < 64)
+            {
+                tileBytes = 64;
+            }
+
+            macroModeIndex = Log2(tileBytes / 64);
+
+            if (flags.prt || IsPrtTileMode(tileMode))
+            {
+                // Unknown - assume it is 1/2 of table size
+                const UINT_32 PrtMacroModeOffset = MacroTileTableSize / 2;
+
+                macroModeIndex += PrtMacroModeOffset;
+                *pTileInfo = m_macroTileTable[macroModeIndex];
+            }
+            else
+            {
+                *pTileInfo = m_macroTileTable[macroModeIndex];
+            }
+
+            pTileInfo->pipeConfig = m_tileTable[tileIndex].info.pipeConfig;
+
+            if (m_tileTable[tileIndex].type != ADDR_DEPTH_SAMPLE_ORDER)
+            {
+                pTileInfo->tileSplitBytes = tileSplitC;
+            }
+            else
+            {
+                pTileInfo->tileSplitBytes = m_tileTable[tileIndex].info.tileSplitBytes;
+            }
+        }
+
+        if (NULL != pTileMode)
+        {
+            *pTileMode = tileMode;
+        }
+
+        if (NULL != pTileType)
+        {
+            *pTileType = tileType;
+        }
+    }
+
+    return macroModeIndex;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlComputeTileDataWidthAndHeightLinear
+*
+*   @brief
+*       Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
+*
+*   @return
+*       N/A
+*
+*   @note
+*       MacroWidth and macroHeight are measured in pixels
+***************************************************************************************************
+*/
+VOID CIAddrLib::HwlComputeTileDataWidthAndHeightLinear(
+    UINT_32*        pMacroWidth,     ///< [out] macro tile width
+    UINT_32*        pMacroHeight,    ///< [out] macro tile height
+    UINT_32         bpp,             ///< [in] bits per pixel
+    ADDR_TILEINFO*  pTileInfo        ///< [in] tile info
+    ) const
+{
+    ADDR_ASSERT(pTileInfo != NULL);
+
+    UINT_32 numTiles;
+
+    switch (pTileInfo->pipeConfig)
+    {
+        case ADDR_PIPECFG_P16_32x32_8x16:
+        case ADDR_PIPECFG_P16_32x32_16x16:
+        case ADDR_PIPECFG_P8_32x64_32x32:
+        case ADDR_PIPECFG_P8_32x32_16x32:
+        case ADDR_PIPECFG_P8_32x32_16x16:
+        case ADDR_PIPECFG_P8_32x32_8x16:
+        case ADDR_PIPECFG_P4_32x32:
+            numTiles = 8;
+            break;
+        default:
+            numTiles = 4;
+            break;
+    }
+
+    *pMacroWidth    = numTiles * MicroTileWidth;
+    *pMacroHeight   = numTiles * MicroTileHeight;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlStereoCheckRightOffsetPadding
+*
+*   @brief
+*       check if the height needs extra padding for stereo right eye offset, to avoid swizzling
+*
+*   @return
+*       TRUE is the extra padding is needed
+*
+*   @note
+*       Kalindi (Kabini) is the only one that needs this padding as there is a uncertain
+*       possible HW issue where the right eye displays incorrectly with some type of swizzles, if
+*       the right eye offset is not 64KB aligned - EPR#366461
+*       Other Kaveri APUs also need the padding according to DXX team's report otherwise
+*       corruption observed. - EPR#374788
+***************************************************************************************************
+*/
+BOOL_32 CIAddrLib::HwlStereoCheckRightOffsetPadding() const
+{
+    BOOL_32 bNeedPadding = FALSE;
+
+    if (m_settings.isKaveri)
+    {
+        bNeedPadding = TRUE;
+    }
+
+    return bNeedPadding;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlComputeMetadataNibbleAddress
+*
+*   @brief
+*        calculate meta data address based on input information
+*
+*   &parameter
+*        uncompressedDataByteAddress - address of a pixel in color surface
+*        dataBaseByteAddress         - base address of color surface
+*        metadataBaseByteAddress     - base address of meta ram
+*        metadataBitSize             - meta key size, 8 for DCC, 4 for cmask
+*        elementBitSize              - element size of color surface
+*        blockByteSize               - compression block size, 256 for DCC
+*        pipeInterleaveBytes         - pipe interleave size
+*        numOfPipes                  - number of pipes
+*        numOfBanks                  - number of banks
+*        numOfSamplesPerSplit        - number of samples per tile split
+*   @return
+*        meta data nibble address (nibble address is used to support DCC compatible cmask)
+*
+***************************************************************************************************
+*/
+UINT_64 CIAddrLib::HwlComputeMetadataNibbleAddress(
+    UINT_64 uncompressedDataByteAddress,
+    UINT_64 dataBaseByteAddress,
+    UINT_64 metadataBaseByteAddress,
+    UINT_32 metadataBitSize,
+    UINT_32 elementBitSize,
+    UINT_32 blockByteSize,
+    UINT_32 pipeInterleaveBytes,
+    UINT_32 numOfPipes,
+    UINT_32 numOfBanks,
+    UINT_32 numOfSamplesPerSplit) const
+{
+    ///--------------------------------------------------------------------------------------------
+    /// Get pipe interleave, bank and pipe bits
+    ///--------------------------------------------------------------------------------------------
+    UINT_32 pipeInterleaveBits  = Log2(pipeInterleaveBytes);
+    UINT_32 pipeBits            = Log2(numOfPipes);
+    UINT_32 bankBits            = Log2(numOfBanks);
+
+    ///--------------------------------------------------------------------------------------------
+    /// Clear pipe and bank swizzles
+    ///--------------------------------------------------------------------------------------------
+    UINT_32 dataMacrotileBits        = pipeInterleaveBits + pipeBits + bankBits;
+    UINT_32 metadataMacrotileBits    = pipeInterleaveBits + pipeBits + bankBits;
+
+    UINT_64 dataMacrotileClearMask     = ~((1L << dataMacrotileBits) - 1);
+    UINT_64 metadataMacrotileClearMask = ~((1L << metadataMacrotileBits) - 1);
+
+    UINT_64 dataBaseByteAddressNoSwizzle = dataBaseByteAddress & dataMacrotileClearMask;
+    UINT_64 metadataBaseByteAddressNoSwizzle = metadataBaseByteAddress & metadataMacrotileClearMask;
+
+    ///--------------------------------------------------------------------------------------------
+    /// Modify metadata base before adding in so that when final address is divided by data ratio,
+    /// the base address returns to where it should be
+    ///--------------------------------------------------------------------------------------------
+    ADDR_ASSERT((0 != metadataBitSize));
+    UINT_64 metadataBaseShifted = metadataBaseByteAddressNoSwizzle * blockByteSize * 8 /
+                                  metadataBitSize;
+    UINT_64 offset = uncompressedDataByteAddress -
+                     dataBaseByteAddressNoSwizzle +
+                     metadataBaseShifted;
+
+    ///--------------------------------------------------------------------------------------------
+    /// Save bank data bits
+    ///--------------------------------------------------------------------------------------------
+    UINT_32 lsb = pipeBits + pipeInterleaveBits;
+    UINT_32 msb = bankBits - 1 + lsb;
+
+    UINT_64 bankDataBits = AddrGetBits(offset, msb, lsb);
+
+    ///--------------------------------------------------------------------------------------------
+    /// Save pipe data bits
+    ///--------------------------------------------------------------------------------------------
+    lsb = pipeInterleaveBits;
+    msb = pipeBits - 1 + lsb;
+
+    UINT_64 pipeDataBits = AddrGetBits(offset, msb, lsb);
+
+    ///--------------------------------------------------------------------------------------------
+    /// Remove pipe and bank bits
+    ///--------------------------------------------------------------------------------------------
+    lsb = pipeInterleaveBits;
+    msb = dataMacrotileBits - 1;
+
+    UINT_64 offsetWithoutPipeBankBits = AddrRemoveBits(offset, msb, lsb);
+
+    ADDR_ASSERT((0 != blockByteSize));
+    UINT_64 blockInBankpipe = offsetWithoutPipeBankBits / blockByteSize;
+
+    UINT_32 tileSize = 8 * 8 * elementBitSize/8 * numOfSamplesPerSplit;
+    UINT_32 blocksInTile = tileSize / blockByteSize;
+
+    if (0 == blocksInTile)
+    {
+        lsb = 0;
+    }
+    else
+    {
+        lsb = Log2(blocksInTile);
+    }
+    msb = bankBits - 1 + lsb;
+
+    UINT_64 blockInBankpipeWithBankBits = AddrInsertBits(blockInBankpipe, bankDataBits, msb, lsb);
+
+    /// NOTE *2 because we are converting to Nibble address in this step
+    UINT_64 metaAddressInPipe = blockInBankpipeWithBankBits * 2 * metadataBitSize / 8;
+
+
+    ///--------------------------------------------------------------------------------------------
+    /// Reinsert pipe bits back into the final address
+    ///--------------------------------------------------------------------------------------------
+    lsb = pipeInterleaveBits + 1; ///<+1 due to Nibble address now gives interleave bits extra lsb.
+    msb = pipeBits - 1 + lsb;
+    UINT_64 metadataAddress = AddrInsertBits(metaAddressInPipe, pipeDataBits, msb, lsb);
+
+    return metadataAddress;
+}
+
+/**
+***************************************************************************************************
+*   CIAddrLib::HwlPadDimensions
+*
+*   @brief
+*       Helper function to pad dimensions
+*
+*   @return
+*       N/A
+*
+***************************************************************************************************
+*/
+VOID CIAddrLib::HwlPadDimensions(
+    AddrTileMode        tileMode,    ///< [in] tile mode
+    UINT_32             bpp,         ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,       ///< [in] surface flags
+    UINT_32             numSamples,  ///< [in] number of samples
+    ADDR_TILEINFO*      pTileInfo,   ///< [in/out] bank structure.
+    UINT_32             padDims,     ///< [in] Dimensions to pad valid value 1,2,3
+    UINT_32             mipLevel,    ///< [in] MipLevel
+    UINT_32*            pPitch,      ///< [in/out] pitch in pixels
+    UINT_32             pitchAlign,  ///< [in] pitch alignment
+    UINT_32*            pHeight,     ///< [in/out] height in pixels
+    UINT_32             heightAlign, ///< [in] height alignment
+    UINT_32*            pSlices,     ///< [in/out] number of slices
+    UINT_32             sliceAlign   ///< [in] number of slice alignment
+    ) const
+{
+    if (m_settings.isVolcanicIslands &&
+        flags.dccCompatible &&
+        (numSamples > 1) &&
+        (mipLevel == 0) &&
+        IsMacroTiled(tileMode))
+    {
+        UINT_32 tileSizePerSample = BITS_TO_BYTES(bpp * MicroTileWidth * MicroTileHeight);
+        UINT_32 samplesPerSplit  = pTileInfo->tileSplitBytes / tileSizePerSample;
+
+        if (samplesPerSplit < numSamples)
+        {
+            UINT_32 dccFastClearByteAlign = HwlGetPipes(pTileInfo) * m_pipeInterleaveBytes * 256;
+            UINT_32 bytesPerSplit = BITS_TO_BYTES((*pPitch) * (*pHeight) * bpp * samplesPerSplit);
+
+            ADDR_ASSERT(IsPow2(dccFastClearByteAlign));
+
+            if (0 != (bytesPerSplit & (dccFastClearByteAlign - 1)))
+            {
+                UINT_32 dccFastClearPixelAlign = dccFastClearByteAlign /
+                                                BITS_TO_BYTES(bpp) /
+                                                samplesPerSplit;
+                UINT_32 macroTilePixelAlign = pitchAlign * heightAlign;
+
+                if ((dccFastClearPixelAlign >= macroTilePixelAlign) &&
+                    ((dccFastClearPixelAlign % macroTilePixelAlign) == 0))
+                {
+                    UINT_32 dccFastClearPitchAlignInMacroTile =
+                        dccFastClearPixelAlign / macroTilePixelAlign;
+                    UINT_32 heightInMacroTile = *pHeight / heightAlign;
+                    UINT_32 dccFastClearPitchAlignInPixels;
+
+                    while ((heightInMacroTile > 1) &&
+                           ((heightInMacroTile % 2) == 0) &&
+                           (dccFastClearPitchAlignInMacroTile > 1) &&
+                           ((dccFastClearPitchAlignInMacroTile % 2) == 0))
+                    {
+                        heightInMacroTile >>= 1;
+                        dccFastClearPitchAlignInMacroTile >>= 1;
+                    }
+
+                    dccFastClearPitchAlignInPixels = pitchAlign * dccFastClearPitchAlignInMacroTile;
+
+                    if (IsPow2(dccFastClearPitchAlignInPixels))
+                    {
+                        *pPitch = PowTwoAlign((*pPitch), dccFastClearPitchAlignInPixels);
+                    }
+                    else
+                    {
+                        *pPitch += (dccFastClearPitchAlignInPixels - 1);
+                        *pPitch /= dccFastClearPitchAlignInPixels;
+                        *pPitch *= dccFastClearPitchAlignInPixels;
+                    }
+                }
+            }
+        }
+    }
+}
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,198 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  ciaddrlib.h
+* @brief Contains the CIAddrLib class definition.
+***************************************************************************************************
+*/
+
+#ifndef __CI_ADDR_LIB_H__
+#define __CI_ADDR_LIB_H__
+
+#include "addrlib.h"
+#include "siaddrlib.h"
+
+/**
+***************************************************************************************************
+* @brief CI specific settings structure.
+***************************************************************************************************
+*/
+struct CIChipSettings
+{
+    struct
+    {
+        UINT_32 isSeaIsland : 1;
+        UINT_32 isBonaire   : 1;
+        UINT_32 isKaveri    : 1;
+        UINT_32 isSpectre   : 1;
+        UINT_32 isSpooky    : 1;
+        UINT_32 isKalindi   : 1;
+        // Hawaii is GFXIP 7.2, similar with CI (Bonaire)
+        UINT_32 isHawaii    : 1;
+
+        // VI
+        UINT_32 isVolcanicIslands : 1;
+        UINT_32 isIceland         : 1;
+        UINT_32 isTonga           : 1;
+        UINT_32 isFiji            : 1;
+        // VI fusion (Carrizo)
+        UINT_32 isCarrizo         : 1;
+    };
+};
+
+/**
+***************************************************************************************************
+* @brief This class is the CI specific address library
+*        function set.
+***************************************************************************************************
+*/
+class CIAddrLib : public SIAddrLib
+{
+public:
+    /// Creates CIAddrLib object
+    static AddrLib* CreateObj(const AddrClient* pClient)
+    {
+        return new(pClient) CIAddrLib(pClient);
+    }
+
+private:
+    CIAddrLib(const AddrClient* pClient);
+    virtual ~CIAddrLib();
+
+protected:
+
+    // Hwl interface - defined in AddrLib
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+
+    virtual AddrChipFamily HwlConvertChipFamily(
+        UINT_32 uChipFamily, UINT_32 uChipRevision);
+
+    virtual BOOL_32 HwlInitGlobalParams(
+        const ADDR_CREATE_INPUT* pCreateIn);
+
+    virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
+        INT_32 index, INT_32 macroModeIndex, ADDR_TILEINFO* pInfo,
+        AddrTileMode* pMode = 0, AddrTileType* pType = 0) const;
+
+    virtual VOID HwlComputeTileDataWidthAndHeightLinear(
+        UINT_32* pMacroWidth, UINT_32* pMacroHeight,
+        UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
+
+    virtual INT_32 HwlComputeMacroModeIndex(
+        INT_32 tileIndex, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples,
+        ADDR_TILEINFO* pTileInfo, AddrTileMode* pTileMode = NULL, AddrTileType* pTileType = NULL
+        ) const;
+
+    // Sub-hwl interface - defined in EgBasedAddrLib
+    virtual VOID HwlSetupTileInfo(
+        AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
+        AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    virtual INT_32 HwlPostCheckTileIndex(
+        const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
+        INT curIndex = TileIndexInvalid) const;
+
+    virtual VOID   HwlFmaskPreThunkSurfInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
+        const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const;
+
+    virtual VOID   HwlFmaskPostThunkSurfInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const;
+
+    virtual AddrTileMode HwlDegradeThickTileMode(
+        AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
+
+    virtual BOOL_32 HwlOverrideTileMode(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        AddrTileMode* pTileMode,
+        AddrTileType* pTileType) const;
+
+    virtual BOOL_32 HwlStereoCheckRightOffsetPadding() const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
+        const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
+        ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
+        const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+protected:
+    virtual VOID HwlPadDimensions(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel,
+        UINT_32* pPitch, UINT_32 pitchAlign, UINT_32* pHeight, UINT_32 heightAlign,
+        UINT_32* pSlices, UINT_32 sliceAlign) const;
+
+private:
+    VOID ReadGbTileMode(
+        UINT_32 regValue, ADDR_TILECONFIG* pCfg) const;
+
+    VOID ReadGbMacroTileCfg(
+        UINT_32 regValue, ADDR_TILEINFO* pCfg) const;
+
+    BOOL_32 InitTileSettingTable(
+        const UINT_32 *pSetting, UINT_32 noOfEntries);
+
+    BOOL_32 InitMacroTileCfgTable(
+        const UINT_32 *pSetting, UINT_32 noOfEntries);
+
+    UINT_64 HwlComputeMetadataNibbleAddress(
+        UINT_64 uncompressedDataByteAddress,
+        UINT_64 dataBaseByteAddress,
+        UINT_64 metadataBaseByteAddress,
+        UINT_32 metadataBitSize,
+        UINT_32 elementBitSize,
+        UINT_32 blockByteSize,
+        UINT_32 pipeInterleaveBytes,
+        UINT_32 numOfPipes,
+        UINT_32 numOfBanks,
+        UINT_32 numOfSamplesPerSplit) const;
+
+    static const UINT_32    MacroTileTableSize = 16;
+    ADDR_TILEINFO           m_macroTileTable[MacroTileTableSize];
+    UINT_32                 m_noOfMacroEntries;
+    BOOL_32                 m_allowNonDispThickModes;
+
+    CIChipSettings          m_settings;
+};
+
+#endif
+
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,4575 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  egbaddrlib.cpp
+* @brief Contains the EgBasedAddrLib class implementation
+***************************************************************************************************
+*/
+
+#include "egbaddrlib.h"
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::EgBasedAddrLib
+*
+*   @brief
+*       Constructor
+*
+*   @note
+*
+***************************************************************************************************
+*/
+EgBasedAddrLib::EgBasedAddrLib(const AddrClient* pClient) :
+    AddrLib(pClient),
+    m_ranks(0),
+    m_logicalBanks(0),
+    m_bankInterleave(1)
+{
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::~EgBasedAddrLib
+*
+*   @brief
+*       Destructor
+***************************************************************************************************
+*/
+EgBasedAddrLib::~EgBasedAddrLib()
+{
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::DispatchComputeSurfaceInfo
+*
+*   @brief
+*       Compute surface sizes include padded pitch,height,slices,total size in bytes,
+*       meanwhile output suitable tile mode and base alignment might be changed in this
+*       call as well. Results are returned through output parameters.
+*
+*   @return
+*       TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::DispatchComputeSurfaceInfo(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    AddrTileMode        tileMode      = pIn->tileMode;
+    UINT_32             bpp           = pIn->bpp;
+    UINT_32             numSamples    = pIn->numSamples;
+    UINT_32             numFrags      = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
+    UINT_32             pitch         = pIn->width;
+    UINT_32             height        = pIn->height;
+    UINT_32             numSlices     = pIn->numSlices;
+    UINT_32             mipLevel      = pIn->mipLevel;
+    ADDR_SURFACE_FLAGS  flags         = pIn->flags;
+
+    ADDR_TILEINFO       tileInfoDef   = {0};
+    ADDR_TILEINFO*      pTileInfo     = &tileInfoDef;
+
+    UINT_32             padDims = 0;
+    BOOL_32             valid;
+
+    tileMode = DegradeLargeThickTile(tileMode, bpp);
+
+    // Only override numSamples for NI above
+    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
+    {
+        if (numFrags != numSamples) // This means EQAA
+        {
+            // The real surface size needed is determined by number of fragments
+            numSamples = numFrags;
+        }
+
+        // Save altered numSamples in pOut
+        pOut->numSamples = numSamples;
+    }
+
+    // Caller makes sure pOut->pTileInfo is not NULL, see HwlComputeSurfaceInfo
+    ADDR_ASSERT(pOut->pTileInfo);
+
+    if (pOut->pTileInfo != NULL)
+    {
+        pTileInfo = pOut->pTileInfo;
+    }
+
+    // Set default values
+    if (pIn->pTileInfo != NULL)
+    {
+        if (pTileInfo != pIn->pTileInfo)
+        {
+            *pTileInfo = *pIn->pTileInfo;
+        }
+    }
+    else
+    {
+        memset(pTileInfo, 0, sizeof(ADDR_TILEINFO));
+    }
+
+    // For macro tile mode, we should calculate default tiling parameters
+    HwlSetupTileInfo(tileMode,
+                     flags,
+                     bpp,
+                     pitch,
+                     height,
+                     numSamples,
+                     pIn->pTileInfo,
+                     pTileInfo,
+                     pIn->tileType,
+                     pOut);
+
+    if (flags.cube)
+    {
+        if (mipLevel == 0)
+        {
+            padDims = 2;
+        }
+
+        if (numSlices == 1)
+        {
+            // This is calculating one face, remove cube flag
+            flags.cube = 0;
+        }
+    }
+
+    switch (tileMode)
+    {
+        case ADDR_TM_LINEAR_GENERAL://fall through
+        case ADDR_TM_LINEAR_ALIGNED:
+            valid = ComputeSurfaceInfoLinear(pIn, pOut, padDims);
+            break;
+
+        case ADDR_TM_1D_TILED_THIN1://fall through
+        case ADDR_TM_1D_TILED_THICK:
+            valid = ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, tileMode);
+            break;
+
+        case ADDR_TM_2D_TILED_THIN1:    //fall through
+        case ADDR_TM_2D_TILED_THICK:    //fall through
+        case ADDR_TM_3D_TILED_THIN1:    //fall through
+        case ADDR_TM_3D_TILED_THICK:    //fall through
+        case ADDR_TM_2D_TILED_XTHICK:   //fall through
+        case ADDR_TM_3D_TILED_XTHICK:   //fall through
+        case ADDR_TM_PRT_TILED_THIN1:   //fall through
+        case ADDR_TM_PRT_2D_TILED_THIN1://fall through
+        case ADDR_TM_PRT_3D_TILED_THIN1://fall through
+        case ADDR_TM_PRT_TILED_THICK:   //fall through
+        case ADDR_TM_PRT_2D_TILED_THICK://fall through
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            valid = ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, tileMode);
+            break;
+
+        default:
+            valid = FALSE;
+            ADDR_ASSERT_ALWAYS();
+            break;
+    }
+
+    return valid;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeSurfaceInfoLinear
+*
+*   @brief
+*       Compute linear surface sizes include padded pitch, height, slices, total size in
+*       bytes, meanwhile alignments as well. Since it is linear mode, so output tile mode
+*       will not be changed here. Results are returned through output parameters.
+*
+*   @return
+*       TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoLinear(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] Input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,   ///< [out] Output structure
+    UINT_32                                 padDims ///< [in] Dimensions to padd
+    ) const
+{
+    UINT_32 expPitch = pIn->width;
+    UINT_32 expHeight = pIn->height;
+    UINT_32 expNumSlices = pIn->numSlices;
+
+    // No linear MSAA on real H/W, keep this for TGL
+    UINT_32 numSamples = pOut->numSamples;
+
+    const UINT_32 microTileThickness = 1;
+
+    //
+    // Compute the surface alignments.
+    //
+    ComputeSurfaceAlignmentsLinear(pIn->tileMode,
+                                   pIn->bpp,
+                                   pIn->flags,
+                                   &pOut->baseAlign,
+                                   &pOut->pitchAlign,
+                                   &pOut->heightAlign);
+
+    if ((pIn->tileMode == ADDR_TM_LINEAR_GENERAL) && pIn->flags.color && (pIn->height > 1))
+    {
+#if !ALT_TEST
+        // When linear_general surface is accessed in multiple lines, it requires 8 pixels in pitch
+        // alignment since PITCH_TILE_MAX is in unit of 8 pixels.
+        // It is OK if it is accessed per line.
+        ADDR_ASSERT((pIn->width % 8) == 0);
+#endif
+    }
+
+    pOut->depthAlign = microTileThickness;
+
+    expPitch = HwlPreHandleBaseLvl3xPitch(pIn, expPitch);
+
+    //
+    // Pad pitch and height to the required granularities.
+    //
+    PadDimensions(pIn->tileMode,
+                  pIn->bpp,
+                  pIn->flags,
+                  numSamples,
+                  pOut->pTileInfo,
+                  padDims,
+                  pIn->mipLevel,
+                  &expPitch, pOut->pitchAlign,
+                  &expHeight, pOut->heightAlign,
+                  &expNumSlices, microTileThickness);
+
+    expPitch = HwlPostHandleBaseLvl3xPitch(pIn, expPitch);
+
+    //
+    // Adjust per HWL
+    //
+
+    UINT_64 logicalSliceSize;
+
+    logicalSliceSize = HwlGetSizeAdjustmentLinear(pIn->tileMode,
+                                                  pIn->bpp,
+                                                  numSamples,
+                                                  pOut->baseAlign,
+                                                  pOut->pitchAlign,
+                                                  &expPitch,
+                                                  &expHeight,
+                                                  &pOut->heightAlign);
+
+
+    pOut->pitch = expPitch;
+    pOut->height = expHeight;
+    pOut->depth = expNumSlices;
+
+    pOut->surfSize = logicalSliceSize * expNumSlices;
+
+    pOut->tileMode = pIn->tileMode;
+
+    return TRUE;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeSurfaceInfoMicroTiled
+*
+*   @brief
+*       Compute 1D/Micro Tiled surface sizes include padded pitch, height, slices, total
+*       size in bytes, meanwhile alignments as well. Results are returned through output
+*       parameters.
+*
+*   @return
+*       TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMicroTiled(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] Input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,       ///< [out] Output structure
+    UINT_32                                 padDims,    ///< [in] Dimensions to padd
+    AddrTileMode                            expTileMode ///< [in] Expected tile mode
+    ) const
+{
+    BOOL_32 valid = TRUE;
+
+    UINT_32 microTileThickness;
+    UINT_32 expPitch = pIn->width;
+    UINT_32 expHeight = pIn->height;
+    UINT_32 expNumSlices = pIn->numSlices;
+
+    // No 1D MSAA on real H/W, keep this for TGL
+    UINT_32 numSamples = pOut->numSamples;
+
+    //
+    // Compute the micro tile thickness.
+    //
+    microTileThickness = ComputeSurfaceThickness(expTileMode);
+
+    //
+    // Extra override for mip levels
+    //
+    if (pIn->mipLevel > 0)
+    {
+        //
+        // Reduce tiling mode from thick to thin if the number of slices is less than the
+        // micro tile thickness.
+        //
+        if ((expTileMode == ADDR_TM_1D_TILED_THICK) &&
+            (expNumSlices < ThickTileThickness))
+        {
+            expTileMode = HwlDegradeThickTileMode(ADDR_TM_1D_TILED_THICK, expNumSlices, NULL);
+            if (expTileMode != ADDR_TM_1D_TILED_THICK)
+            {
+                microTileThickness = 1;
+            }
+        }
+    }
+
+    //
+    // Compute the surface restrictions.
+    //
+    ComputeSurfaceAlignmentsMicroTiled(expTileMode,
+                                       pIn->bpp,
+                                       pIn->flags,
+                                       numSamples,
+                                       &pOut->baseAlign,
+                                       &pOut->pitchAlign,
+                                       &pOut->heightAlign);
+
+    pOut->depthAlign = microTileThickness;
+
+    //
+    // Pad pitch and height to the required granularities.
+    // Compute surface size.
+    // Return parameters.
+    //
+    PadDimensions(expTileMode,
+                  pIn->bpp,
+                  pIn->flags,
+                  numSamples,
+                  pOut->pTileInfo,
+                  padDims,
+                  pIn->mipLevel,
+                  &expPitch, pOut->pitchAlign,
+                  &expHeight, pOut->heightAlign,
+                  &expNumSlices, microTileThickness);
+
+    //
+    // Get HWL specific pitch adjustment
+    //
+    UINT_64 logicalSliceSize = HwlGetSizeAdjustmentMicroTiled(microTileThickness,
+                                                              pIn->bpp,
+                                                              pIn->flags,
+                                                              numSamples,
+                                                              pOut->baseAlign,
+                                                              pOut->pitchAlign,
+                                                              &expPitch,
+                                                              &expHeight);
+
+
+    pOut->pitch = expPitch;
+    pOut->height = expHeight;
+    pOut->depth = expNumSlices;
+
+    pOut->surfSize = logicalSliceSize * expNumSlices;
+
+    pOut->tileMode = expTileMode;
+
+    return valid;
+}
+
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeSurfaceInfoMacroTiled
+*
+*   @brief
+*       Compute 2D/macro tiled surface sizes include padded pitch, height, slices, total
+*       size in bytes, meanwhile output suitable tile mode and alignments might be changed
+*       in this call as well. Results are returned through output parameters.
+*
+*   @return
+*       TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMacroTiled(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] Input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut,       ///< [out] Output structure
+    UINT_32                                 padDims,    ///< [in] Dimensions to padd
+    AddrTileMode                            expTileMode ///< [in] Expected tile mode
+    ) const
+{
+    BOOL_32 valid = TRUE;
+
+    AddrTileMode origTileMode = expTileMode;
+    UINT_32 microTileThickness;
+
+    UINT_32 paddedPitch;
+    UINT_32 paddedHeight;
+    UINT_64 bytesPerSlice;
+
+    UINT_32 expPitch     = pIn->width;
+    UINT_32 expHeight    = pIn->height;
+    UINT_32 expNumSlices = pIn->numSlices;
+
+    UINT_32 numSamples = pOut->numSamples;
+
+    //
+    // Compute the surface restrictions as base
+    // SanityCheckMacroTiled is called in ComputeSurfaceAlignmentsMacroTiled
+    //
+    valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode,
+                                               pIn->bpp,
+                                               pIn->flags,
+                                               pIn->mipLevel,
+                                               numSamples,
+                                               pOut->pTileInfo,
+                                               &pOut->baseAlign,
+                                               &pOut->pitchAlign,
+                                               &pOut->heightAlign);
+
+    if (valid)
+    {
+        //
+        // Compute the micro tile thickness.
+        //
+        microTileThickness = ComputeSurfaceThickness(expTileMode);
+
+        //
+        // Find the correct tiling mode for mip levels
+        //
+        if (pIn->mipLevel > 0)
+        {
+            //
+            // Try valid tile mode
+            //
+            expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode,
+                                                         pIn->bpp,
+                                                         expPitch,
+                                                         expHeight,
+                                                         expNumSlices,
+                                                         numSamples,
+                                                         pOut->pitchAlign,
+                                                         pOut->heightAlign,
+                                                         pOut->pTileInfo);
+
+            if (!IsMacroTiled(expTileMode)) // Downgraded to micro-tiled
+            {
+                return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, expTileMode);
+            }
+            else
+            {
+                if (microTileThickness != ComputeSurfaceThickness(expTileMode))
+                {
+                    //
+                    // Re-compute if thickness changed since bank-height may be changed!
+                    //
+                    return ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, expTileMode);
+                }
+            }
+        }
+
+        paddedPitch     = expPitch;
+        paddedHeight    = expHeight;
+
+        //
+        // Re-cal alignment
+        //
+        if (expTileMode != origTileMode) // Tile mode is changed but still macro-tiled
+        {
+            valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode,
+                                                       pIn->bpp,
+                                                       pIn->flags,
+                                                       pIn->mipLevel,
+                                                       numSamples,
+                                                       pOut->pTileInfo,
+                                                       &pOut->baseAlign,
+                                                       &pOut->pitchAlign,
+                                                       &pOut->heightAlign);
+        }
+
+        //
+        // Do padding
+        //
+        PadDimensions(expTileMode,
+                      pIn->bpp,
+                      pIn->flags,
+                      numSamples,
+                      pOut->pTileInfo,
+                      padDims,
+                      pIn->mipLevel,
+                      &paddedPitch, pOut->pitchAlign,
+                      &paddedHeight, pOut->heightAlign,
+                      &expNumSlices, microTileThickness);
+
+        if (pIn->flags.qbStereo &&
+            (pOut->pStereoInfo != NULL) &&
+            HwlStereoCheckRightOffsetPadding())
+        {
+            // Eye height's bank bits are different from y == 0?
+            // Since 3D rendering treats right eye buffer starting from y == "eye height" while
+            // display engine treats it to be 0, so the bank bits may be different, we pad
+            // more in height to make sure y == "eye height" has the same bank bits as y == 0.
+            UINT_32 checkMask = pOut->pTileInfo->banks - 1;
+            UINT_32 bankBits = 0;
+            do
+            {
+                bankBits = (paddedHeight / 8 / pOut->pTileInfo->bankHeight) & checkMask;
+
+                if (bankBits)
+                {
+                   paddedHeight += pOut->heightAlign;
+                }
+            } while (bankBits);
+        }
+
+        //
+        // Compute the size of a slice.
+        //
+        bytesPerSlice = BITS_TO_BYTES(static_cast<UINT_64>(paddedPitch) *
+                                      paddedHeight * NextPow2(pIn->bpp) * numSamples);
+
+        pOut->pitch = paddedPitch;
+        // Put this check right here to workaround special mipmap cases which the original height
+        // is needed.
+        // The original height is pre-stored in pOut->height in PostComputeMipLevel and
+        // pOut->pitch is needed in HwlCheckLastMacroTiledLvl, too.
+        if (m_configFlags.checkLast2DLevel && numSamples == 1) // Don't check MSAA
+        {
+            // Set a TRUE in pOut if next Level is the first 1D sub level
+            HwlCheckLastMacroTiledLvl(pIn, pOut);
+        }
+        pOut->height = paddedHeight;
+
+        pOut->depth = expNumSlices;
+
+        pOut->surfSize = bytesPerSlice * expNumSlices;
+
+        pOut->tileMode = expTileMode;
+
+        pOut->depthAlign = microTileThickness;
+
+    } // if (valid)
+
+    return valid;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeSurfaceAlignmentsLinear
+*
+*   @brief
+*       Compute linear surface alignment, calculation results are returned through
+*       output parameters.
+*
+*   @return
+*       TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsLinear(
+    AddrTileMode        tileMode,          ///< [in] tile mode
+    UINT_32             bpp,               ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
+    UINT_32*            pBaseAlign,        ///< [out] base address alignment in bytes
+    UINT_32*            pPitchAlign,       ///< [out] pitch alignment in pixels
+    UINT_32*            pHeightAlign       ///< [out] height alignment in pixels
+    ) const
+{
+    BOOL_32 valid = TRUE;
+
+    switch (tileMode)
+    {
+        case ADDR_TM_LINEAR_GENERAL:
+            //
+            // The required base alignment and pitch and height granularities is to 1 element.
+            //
+            *pBaseAlign   = (bpp > 8) ? bpp / 8 : 1;
+            *pPitchAlign  = 1;
+            *pHeightAlign = 1;
+            break;
+        case ADDR_TM_LINEAR_ALIGNED:
+            //
+            // The required alignment for base is the pipe interleave size.
+            // The required granularity for pitch is hwl dependent.
+            // The required granularity for height is one row.
+            //
+            *pBaseAlign     = m_pipeInterleaveBytes;
+            *pPitchAlign    = HwlGetPitchAlignmentLinear(bpp, flags);
+            *pHeightAlign   = 1;
+            break;
+        default:
+            *pBaseAlign     = 1;
+            *pPitchAlign    = 1;
+            *pHeightAlign   = 1;
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+
+    AdjustPitchAlignment(flags, pPitchAlign);
+
+    return valid;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeSurfaceAlignmentsMicroTiled
+*
+*   @brief
+*       Compute 1D tiled surface alignment, calculation results are returned through
+*       output parameters.
+*
+*   @return
+*       TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMicroTiled(
+    AddrTileMode        tileMode,          ///< [in] tile mode
+    UINT_32             bpp,               ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
+    UINT_32             numSamples,        ///< [in] number of samples
+    UINT_32*            pBaseAlign,        ///< [out] base address alignment in bytes
+    UINT_32*            pPitchAlign,       ///< [out] pitch alignment in pixels
+    UINT_32*            pHeightAlign       ///< [out] height alignment in pixels
+    ) const
+{
+    BOOL_32 valid = TRUE;
+
+    //
+    // The required alignment for base is the pipe interleave size.
+    //
+    *pBaseAlign   = m_pipeInterleaveBytes;
+
+    *pPitchAlign  = HwlGetPitchAlignmentMicroTiled(tileMode, bpp, flags, numSamples);
+
+    *pHeightAlign = MicroTileHeight;
+
+    AdjustPitchAlignment(flags, pPitchAlign);
+
+    // ECR#393489
+    // Workaround 2 for 1D tiling -  There is HW bug for Carrizo
+    // where it requires the following alignments for 1D tiling.
+    if (flags.czDispCompatible)
+    {
+        *pBaseAlign  = PowTwoAlign(*pBaseAlign, 4096);                         //Base address MOD 4096 = 0
+        *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 >> (BITS_TO_BYTES(bpp))); //(8 lines * pitch * bytes per pixel) MOD 4096 = 0
+    }
+    // end Carrizo workaround for 1D tilling
+
+    return valid;
+}
+
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlReduceBankWidthHeight
+*
+*   @brief
+*       Additional checks, reduce bankHeight/bankWidth if needed and possible
+*       tileSize*BANK_WIDTH*BANK_HEIGHT <= ROW_SIZE
+*
+*   @return
+*       TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::HwlReduceBankWidthHeight(
+    UINT_32             tileSize,           ///< [in] tile size
+    UINT_32             bpp,                ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,              ///< [in] surface flags
+    UINT_32             numSamples,         ///< [in] number of samples
+    UINT_32             bankHeightAlign,    ///< [in] bank height alignment
+    UINT_32             pipes,              ///< [in] pipes
+    ADDR_TILEINFO*      pTileInfo           ///< [in/out] bank structure.
+    ) const
+{
+    UINT_32 macroAspectAlign;
+    BOOL_32 valid = TRUE;
+
+    if (tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize)
+    {
+        BOOL_32 stillGreater = TRUE;
+
+        // Try reducing bankWidth first
+        if (stillGreater && pTileInfo->bankWidth > 1)
+        {
+            while (stillGreater && pTileInfo->bankWidth > 0)
+            {
+                pTileInfo->bankWidth >>= 1;
+
+                if (pTileInfo->bankWidth == 0)
+                {
+                    pTileInfo->bankWidth = 1;
+                    break;
+                }
+
+                stillGreater =
+                    tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize;
+            }
+
+            // bankWidth is reduced above, so we need to recalculate bankHeight and ratio
+            bankHeightAlign = Max(1u,
+                                  m_pipeInterleaveBytes * m_bankInterleave /
+                                  (tileSize * pTileInfo->bankWidth)
+                                  );
+
+            // We cannot increase bankHeight so just assert this case.
+            ADDR_ASSERT((pTileInfo->bankHeight % bankHeightAlign) == 0);
+
+            if (numSamples == 1)
+            {
+                macroAspectAlign = Max(1u,
+                                   m_pipeInterleaveBytes * m_bankInterleave /
+                                   (tileSize * pipes * pTileInfo->bankWidth)
+                                   );
+                pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio,
+                                                          macroAspectAlign);
+            }
+        }
+
+        // Early quit bank_height degradation for "64" bit z buffer
+        if (flags.depth && bpp >= 64)
+        {
+            stillGreater = FALSE;
+        }
+
+        // Then try reducing bankHeight
+        if (stillGreater && pTileInfo->bankHeight > bankHeightAlign)
+        {
+            while (stillGreater && pTileInfo->bankHeight > bankHeightAlign)
+            {
+                pTileInfo->bankHeight >>= 1;
+
+                if (pTileInfo->bankHeight < bankHeightAlign)
+                {
+                    pTileInfo->bankHeight = bankHeightAlign;
+                    break;
+                }
+
+                stillGreater =
+                    tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize;
+            }
+        }
+
+        valid = !stillGreater;
+
+        // Generate a warning if we still fail to meet this constraint
+        if (!valid)
+        {
+            ADDR_WARN(
+                0, ("TILE_SIZE(%d)*BANK_WIDTH(%d)*BANK_HEIGHT(%d) <= ROW_SIZE(%d)",
+                tileSize, pTileInfo->bankWidth, pTileInfo->bankHeight, m_rowSize));
+        }
+    }
+
+    return valid;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeSurfaceAlignmentsMacroTiled
+*
+*   @brief
+*       Compute 2D tiled surface alignment, calculation results are returned through
+*       output parameters.
+*
+*   @return
+*       TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMacroTiled(
+    AddrTileMode        tileMode,           ///< [in] tile mode
+    UINT_32             bpp,                ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,              ///< [in] surface flags
+    UINT_32             mipLevel,           ///< [in] mip level
+    UINT_32             numSamples,         ///< [in] number of samples
+    ADDR_TILEINFO*      pTileInfo,          ///< [in/out] bank structure.
+    UINT_32*            pBaseAlign,         ///< [out] base address alignment in bytes
+    UINT_32*            pPitchAlign,        ///< [out] pitch alignment in pixels
+    UINT_32*            pHeightAlign        ///< [out] height alignment in pixels
+    ) const
+{
+    BOOL_32 valid = SanityCheckMacroTiled(pTileInfo);
+
+    if (valid)
+    {
+        UINT_32 macroTileWidth;
+        UINT_32 macroTileHeight;
+
+        UINT_32 tileSize;
+        UINT_32 bankHeightAlign;
+        UINT_32 macroAspectAlign;
+
+        UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+        UINT_32 pipes = HwlGetPipes(pTileInfo);
+
+        //
+        // Align bank height first according to latest h/w spec
+        //
+
+        // tile_size = MIN(tile_split, 64 * tile_thickness * element_bytes * num_samples)
+        tileSize = Min(pTileInfo->tileSplitBytes,
+                       BITS_TO_BYTES(64 * thickness * bpp * numSamples));
+
+        // bank_height_align =
+        // MAX(1, (pipe_interleave_bytes * bank_interleave)/(tile_size*bank_width))
+        bankHeightAlign = Max(1u,
+                              m_pipeInterleaveBytes * m_bankInterleave /
+                              (tileSize * pTileInfo->bankWidth)
+                              );
+
+        pTileInfo->bankHeight = PowTwoAlign(pTileInfo->bankHeight, bankHeightAlign);
+
+        // num_pipes * bank_width * macro_tile_aspect >=
+        // (pipe_interleave_size * bank_interleave) / tile_size
+        if (numSamples == 1)
+        {
+            // this restriction is only for mipmap (mipmap's numSamples must be 1)
+            macroAspectAlign = Max(1u,
+                               m_pipeInterleaveBytes * m_bankInterleave /
+                               (tileSize * pipes * pTileInfo->bankWidth)
+                               );
+            pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio, macroAspectAlign);
+        }
+
+        valid = HwlReduceBankWidthHeight(tileSize,
+                                      bpp,
+                                      flags,
+                                      numSamples,
+                                      bankHeightAlign,
+                                      pipes,
+                                      pTileInfo);
+
+        //
+        // The required granularity for pitch is the macro tile width.
+        //
+        macroTileWidth = MicroTileWidth * pTileInfo->bankWidth * pipes *
+            pTileInfo->macroAspectRatio;
+
+        *pPitchAlign = macroTileWidth;
+
+        AdjustPitchAlignment(flags, pPitchAlign);
+
+        //
+        // The required granularity for height is the macro tile height.
+        //
+        macroTileHeight = MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
+            pTileInfo->macroAspectRatio;
+
+        *pHeightAlign = macroTileHeight;
+
+        //
+        // Compute base alignment
+        //
+        *pBaseAlign = pipes *
+            pTileInfo->bankWidth * pTileInfo->banks * pTileInfo->bankHeight * tileSize;
+
+        if ((mipLevel == 0) && (flags.prt) && (m_chipFamily == ADDR_CHIP_FAMILY_SI))
+        {
+            static const UINT_32 PrtTileSize = 0x10000;
+
+            UINT_32 macroTileSize = macroTileWidth * macroTileHeight * numSamples * bpp / 8;
+
+            if (macroTileSize < PrtTileSize)
+            {
+                UINT_32 numMacroTiles = PrtTileSize / macroTileSize;
+
+                ADDR_ASSERT((PrtTileSize % macroTileSize) == 0);
+
+                *pPitchAlign *= numMacroTiles;
+                *pBaseAlign  *= numMacroTiles;
+            }
+        }
+    }
+
+    return valid;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::SanityCheckMacroTiled
+*
+*   @brief
+*       Check if macro-tiled parameters are valid
+*   @return
+*       TRUE if valid
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::SanityCheckMacroTiled(
+    ADDR_TILEINFO* pTileInfo   ///< [in] macro-tiled parameters
+    ) const
+{
+    BOOL_32 valid       = TRUE;
+    UINT_32 numPipes    = HwlGetPipes(pTileInfo);
+
+    switch (pTileInfo->banks)
+    {
+        case 2: //fall through
+        case 4: //fall through
+        case 8: //fall through
+        case 16:
+            break;
+        default:
+            valid = FALSE;
+            break;
+
+    }
+
+    if (valid)
+    {
+        switch (pTileInfo->bankWidth)
+        {
+            case 1: //fall through
+            case 2: //fall through
+            case 4: //fall through
+            case 8:
+                break;
+            default:
+                valid = FALSE;
+                break;
+        }
+    }
+
+    if (valid)
+    {
+        switch (pTileInfo->bankHeight)
+        {
+            case 1: //fall through
+            case 2: //fall through
+            case 4: //fall through
+            case 8:
+                break;
+            default:
+                valid = FALSE;
+                break;
+        }
+    }
+
+    if (valid)
+    {
+        switch (pTileInfo->macroAspectRatio)
+        {
+            case 1: //fall through
+            case 2: //fall through
+            case 4: //fall through
+            case 8:
+                break;
+            default:
+                valid = FALSE;
+                break;
+        }
+    }
+
+    if (valid)
+    {
+        if (pTileInfo->banks < pTileInfo->macroAspectRatio)
+        {
+            // This will generate macro tile height <= 1
+            valid = FALSE;
+        }
+    }
+
+    if (valid)
+    {
+        if (pTileInfo->tileSplitBytes > m_rowSize)
+        {
+            valid = FALSE;
+        }
+    }
+
+    if (valid)
+    {
+        valid = HwlSanityCheckMacroTiled(pTileInfo);
+    }
+
+    ADDR_ASSERT(valid == TRUE);
+
+    // Add this assert for guidance
+    ADDR_ASSERT(numPipes * pTileInfo->banks >= 4);
+
+    return valid;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeSurfaceMipLevelTileMode
+*
+*   @brief
+*       Compute valid tile mode for surface mipmap sub-levels
+*
+*   @return
+*       Suitable tile mode
+***************************************************************************************************
+*/
+AddrTileMode EgBasedAddrLib::ComputeSurfaceMipLevelTileMode(
+    AddrTileMode        baseTileMode,   ///< [in] base tile mode
+    UINT_32             bpp,            ///< [in] bits per pixels
+    UINT_32             pitch,          ///< [in] current level pitch
+    UINT_32             height,         ///< [in] current level height
+    UINT_32             numSlices,      ///< [in] current number of slices
+    UINT_32             numSamples,     ///< [in] number of samples
+    UINT_32             pitchAlign,     ///< [in] pitch alignment
+    UINT_32             heightAlign,    ///< [in] height alignment
+    ADDR_TILEINFO*      pTileInfo       ///< [in] ptr to bank structure
+    ) const
+{
+    UINT_32 bytesPerTile;
+
+    AddrTileMode expTileMode = baseTileMode;
+    UINT_32 microTileThickness = ComputeSurfaceThickness(expTileMode);
+    UINT_32 interleaveSize = m_pipeInterleaveBytes * m_bankInterleave;
+
+    //
+    // Compute the size of a slice.
+    //
+    bytesPerTile = BITS_TO_BYTES(MicroTilePixels * microTileThickness * NextPow2(bpp) * numSamples);
+
+    //
+    // Reduce tiling mode from thick to thin if the number of slices is less than the
+    // micro tile thickness.
+    //
+    if (numSlices < microTileThickness)
+    {
+        expTileMode = HwlDegradeThickTileMode(expTileMode, numSlices, &bytesPerTile);
+    }
+
+    if (bytesPerTile > pTileInfo->tileSplitBytes)
+    {
+        bytesPerTile = pTileInfo->tileSplitBytes;
+    }
+
+    UINT_32 threshold1 =
+        bytesPerTile * HwlGetPipes(pTileInfo) * pTileInfo->bankWidth * pTileInfo->macroAspectRatio;
+
+    UINT_32 threshold2 =
+        bytesPerTile * pTileInfo->bankWidth * pTileInfo->bankHeight;
+
+    //
+    // Reduce the tile mode from 2D/3D to 1D in following conditions
+    //
+    switch (expTileMode)
+    {
+        case ADDR_TM_2D_TILED_THIN1: //fall through
+        case ADDR_TM_3D_TILED_THIN1:
+        case ADDR_TM_PRT_TILED_THIN1:
+        case ADDR_TM_PRT_2D_TILED_THIN1:
+        case ADDR_TM_PRT_3D_TILED_THIN1:
+            if ((pitch < pitchAlign) ||
+                (height < heightAlign) ||
+                (interleaveSize > threshold1) ||
+                (interleaveSize > threshold2))
+            {
+                expTileMode = ADDR_TM_1D_TILED_THIN1;
+            }
+            break;
+        case ADDR_TM_2D_TILED_THICK: //fall through
+        case ADDR_TM_3D_TILED_THICK:
+        case ADDR_TM_2D_TILED_XTHICK:
+        case ADDR_TM_3D_TILED_XTHICK:
+        case ADDR_TM_PRT_TILED_THICK:
+        case ADDR_TM_PRT_2D_TILED_THICK:
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            if ((pitch < pitchAlign) ||
+                (height < heightAlign))
+            {
+                expTileMode = ADDR_TM_1D_TILED_THICK;
+            }
+            break;
+        default:
+            break;
+    }
+
+    return expTileMode;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlDegradeBaseLevel
+*   @brief
+*       Check if degrade is needed for base level
+*   @return
+*       TRUE if degrade is suggested
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::HwlDegradeBaseLevel(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const
+{
+    BOOL_32 degrade = FALSE;
+    BOOL_32 valid = TRUE;
+
+    ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
+
+    UINT_32 baseAlign;
+    UINT_32 pitchAlign;
+    UINT_32 heightAlign;
+
+    ADDR_ASSERT(pIn->pTileInfo);
+    ADDR_TILEINFO tileInfo = *pIn->pTileInfo;
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
+
+    if (UseTileIndex(pIn->tileIndex))
+    {
+        out.tileIndex = pIn->tileIndex;
+        out.macroModeIndex = TileIndexInvalid;
+    }
+
+    HwlSetupTileInfo(pIn->tileMode,
+                     pIn->flags,
+                     pIn->bpp,
+                     pIn->width,
+                     pIn->height,
+                     pIn->numSamples,
+                     &tileInfo,
+                     &tileInfo,
+                     pIn->tileType,
+                     &out);
+
+    valid = ComputeSurfaceAlignmentsMacroTiled(pIn->tileMode,
+                                               pIn->bpp,
+                                               pIn->flags,
+                                               pIn->mipLevel,
+                                               pIn->numSamples,
+                                               &tileInfo,
+                                               &baseAlign,
+                                               &pitchAlign,
+                                               &heightAlign);
+
+    if (valid)
+    {
+        degrade = (pIn->width < pitchAlign || pIn->height < heightAlign);
+    }
+    else
+    {
+        degrade = TRUE;
+    }
+
+    return degrade;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlDegradeThickTileMode
+*
+*   @brief
+*       Degrades valid tile mode for thick modes if needed
+*
+*   @return
+*       Suitable tile mode
+***************************************************************************************************
+*/
+AddrTileMode EgBasedAddrLib::HwlDegradeThickTileMode(
+    AddrTileMode        baseTileMode,   ///< [in] base tile mode
+    UINT_32             numSlices,      ///< [in] current number of slices
+    UINT_32*            pBytesPerTile   ///< [in/out] pointer to bytes per slice
+    ) const
+{
+    ADDR_ASSERT(numSlices < ComputeSurfaceThickness(baseTileMode));
+    // if pBytesPerTile is NULL, this is a don't-care....
+    UINT_32 bytesPerTile = pBytesPerTile != NULL ? *pBytesPerTile : 64;
+
+    AddrTileMode expTileMode = baseTileMode;
+    switch (baseTileMode)
+    {
+        case ADDR_TM_1D_TILED_THICK:
+            expTileMode = ADDR_TM_1D_TILED_THIN1;
+            bytesPerTile >>= 2;
+            break;
+        case ADDR_TM_2D_TILED_THICK:
+            expTileMode = ADDR_TM_2D_TILED_THIN1;
+            bytesPerTile >>= 2;
+            break;
+        case ADDR_TM_3D_TILED_THICK:
+            expTileMode = ADDR_TM_3D_TILED_THIN1;
+            bytesPerTile >>= 2;
+            break;
+        case ADDR_TM_2D_TILED_XTHICK:
+            if (numSlices < ThickTileThickness)
+            {
+                expTileMode = ADDR_TM_2D_TILED_THIN1;
+                bytesPerTile >>= 3;
+            }
+            else
+            {
+                expTileMode = ADDR_TM_2D_TILED_THICK;
+                bytesPerTile >>= 1;
+            }
+            break;
+        case ADDR_TM_3D_TILED_XTHICK:
+            if (numSlices < ThickTileThickness)
+            {
+                expTileMode = ADDR_TM_3D_TILED_THIN1;
+                bytesPerTile >>= 3;
+            }
+            else
+            {
+                expTileMode = ADDR_TM_3D_TILED_THICK;
+                bytesPerTile >>= 1;
+            }
+            break;
+        default:
+            ADDR_ASSERT_ALWAYS();
+            break;
+    }
+
+    if (pBytesPerTile != NULL)
+    {
+        *pBytesPerTile = bytesPerTile;
+    }
+
+    return expTileMode;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::DispatchComputeSurfaceAddrFromCoord
+*
+*   @brief
+*       Compute surface address from given coord (x, y, slice,sample)
+*
+*   @return
+*       Address in bytes
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::DispatchComputeSurfaceAddrFromCoord(
+    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    UINT_32             x                  = pIn->x;
+    UINT_32             y                  = pIn->y;
+    UINT_32             slice              = pIn->slice;
+    UINT_32             sample             = pIn->sample;
+    UINT_32             bpp                = pIn->bpp;
+    UINT_32             pitch              = pIn->pitch;
+    UINT_32             height             = pIn->height;
+    UINT_32             numSlices          = pIn->numSlices;
+    UINT_32             numSamples         = ((pIn->numSamples == 0) ? 1 : pIn->numSamples);
+    UINT_32             numFrags           = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
+    AddrTileMode        tileMode           = pIn->tileMode;
+    AddrTileType        microTileType      = pIn->tileType;
+    BOOL_32             ignoreSE           = pIn->ignoreSE;
+    BOOL_32             isDepthSampleOrder = pIn->isDepth;
+    ADDR_TILEINFO*      pTileInfo          = pIn->pTileInfo;
+
+    UINT_32*            pBitPosition       = &pOut->bitPosition;
+    UINT_64             addr;
+
+#if ADDR_AM_BUILD
+    UINT_32             addr5Bit           = 0;
+    UINT_32             addr5Swizzle       = pIn->addr5Swizzle;
+    BOOL_32             is32ByteTile       = pIn->is32ByteTile;
+#endif
+
+    // ADDR_DEPTH_SAMPLE_ORDER = non-disp + depth-sample-order
+    if (microTileType == ADDR_DEPTH_SAMPLE_ORDER)
+    {
+        isDepthSampleOrder = TRUE;
+    }
+
+    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
+    {
+        if (numFrags != numSamples)
+        {
+            numSamples = numFrags;
+            ADDR_ASSERT(sample < numSamples);
+        }
+
+        /// @note
+        /// 128 bit/thick tiled surface doesn't support display tiling and
+        /// mipmap chain must have the same tileType, so please fill tileType correctly
+        if (!IsLinear(pIn->tileMode))
+        {
+            if (bpp >= 128 || ComputeSurfaceThickness(tileMode) > 1)
+            {
+                ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE);
+            }
+        }
+    }
+
+    switch (tileMode)
+    {
+        case ADDR_TM_LINEAR_GENERAL://fall through
+        case ADDR_TM_LINEAR_ALIGNED:
+            addr = ComputeSurfaceAddrFromCoordLinear(x,
+                                                     y,
+                                                     slice,
+                                                     sample,
+                                                     bpp,
+                                                     pitch,
+                                                     height,
+                                                     numSlices,
+                                                     pBitPosition);
+            break;
+        case ADDR_TM_1D_TILED_THIN1://fall through
+        case ADDR_TM_1D_TILED_THICK:
+            addr = ComputeSurfaceAddrFromCoordMicroTiled(x,
+                                                         y,
+                                                         slice,
+                                                         sample,
+                                                         bpp,
+                                                         pitch,
+                                                         height,
+                                                         numSamples,
+                                                         tileMode,
+                                                         microTileType,
+                                                         isDepthSampleOrder,
+                                                         pBitPosition);
+            break;
+        case ADDR_TM_2D_TILED_THIN1:    //fall through
+        case ADDR_TM_2D_TILED_THICK:    //fall through
+        case ADDR_TM_3D_TILED_THIN1:    //fall through
+        case ADDR_TM_3D_TILED_THICK:    //fall through
+        case ADDR_TM_2D_TILED_XTHICK:   //fall through
+        case ADDR_TM_3D_TILED_XTHICK:   //fall through
+        case ADDR_TM_PRT_TILED_THIN1:   //fall through
+        case ADDR_TM_PRT_2D_TILED_THIN1://fall through
+        case ADDR_TM_PRT_3D_TILED_THIN1://fall through
+        case ADDR_TM_PRT_TILED_THICK:   //fall through
+        case ADDR_TM_PRT_2D_TILED_THICK://fall through
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            UINT_32 pipeSwizzle;
+            UINT_32 bankSwizzle;
+
+            if (m_configFlags.useCombinedSwizzle)
+            {
+                ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
+                                       &bankSwizzle, &pipeSwizzle);
+            }
+            else
+            {
+                pipeSwizzle = pIn->pipeSwizzle;
+                bankSwizzle = pIn->bankSwizzle;
+            }
+
+            addr = ComputeSurfaceAddrFromCoordMacroTiled(x,
+                                                         y,
+                                                         slice,
+                                                         sample,
+                                                         bpp,
+                                                         pitch,
+                                                         height,
+                                                         numSamples,
+                                                         tileMode,
+                                                         microTileType,
+                                                         ignoreSE,
+                                                         isDepthSampleOrder,
+                                                         pipeSwizzle,
+                                                         bankSwizzle,
+                                                         pTileInfo,
+                                                         pBitPosition);
+            break;
+        default:
+            addr = 0;
+            ADDR_ASSERT_ALWAYS();
+            break;
+    }
+
+#if ADDR_AM_BUILD
+    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
+    {
+        if (addr5Swizzle && isDepthSampleOrder && is32ByteTile)
+        {
+            UINT_32 tx = x >> 3;
+            UINT_32 ty = y >> 3;
+            UINT_32 tileBits = ((ty&0x3) << 2) | (tx&0x3);
+
+            tileBits = tileBits & addr5Swizzle;
+            addr5Bit = XorReduce(tileBits, 4);
+
+            addr = addr | static_cast<UINT_64>(addr5Bit << 5);
+        }
+    }
+#endif
+
+    return addr;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeSurfaceAddrFromCoordMicroTiled
+*
+*   @brief
+*       Computes the surface address and bit position from a
+*       coordinate for 2D tilied (macro tiled)
+*   @return
+*       The byte address
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::ComputeSurfaceAddrFromCoordMacroTiled(
+    UINT_32             x,                      ///< [in] x coordinate
+    UINT_32             y,                      ///< [in] y coordinate
+    UINT_32             slice,                  ///< [in] slice index
+    UINT_32             sample,                 ///< [in] sample index
+    UINT_32             bpp,                    ///< [in] bits per pixel
+    UINT_32             pitch,                  ///< [in] surface pitch, in pixels
+    UINT_32             height,                 ///< [in] surface height, in pixels
+    UINT_32             numSamples,             ///< [in] number of samples
+    AddrTileMode        tileMode,               ///< [in] tile mode
+    AddrTileType        microTileType,          ///< [in] micro tiling type
+    BOOL_32             ignoreSE,               ///< [in] TRUE if shader enginers can be ignored
+    BOOL_32             isDepthSampleOrder,     ///< [in] TRUE if it depth sample ordering is used
+    UINT_32             pipeSwizzle,            ///< [in] pipe swizzle
+    UINT_32             bankSwizzle,            ///< [in] bank swizzle
+    ADDR_TILEINFO*      pTileInfo,              ///< [in] bank structure
+                                                ///  **All fields to be valid on entry**
+    UINT_32*            pBitPosition            ///< [out] bit position, e.g. FMT_1 will use this
+    ) const
+{
+    UINT_64 addr;
+
+    UINT_32 microTileBytes;
+    UINT_32 microTileBits;
+    UINT_32 sampleOffset;
+    UINT_32 pixelIndex;
+    UINT_32 pixelOffset;
+    UINT_32 elementOffset;
+    UINT_32 tileSplitSlice;
+    UINT_32 pipe;
+    UINT_32 bank;
+    UINT_64 sliceBytes;
+    UINT_64 sliceOffset;
+    UINT_32 macroTilePitch;
+    UINT_32 macroTileHeight;
+    UINT_32 macroTilesPerRow;
+    UINT_32 macroTilesPerSlice;
+    UINT_64 macroTileBytes;
+    UINT_32 macroTileIndexX;
+    UINT_32 macroTileIndexY;
+    UINT_64 macroTileOffset;
+    UINT_64 totalOffset;
+    UINT_64 pipeInterleaveMask;
+    UINT_64 bankInterleaveMask;
+    UINT_64 pipeInterleaveOffset;
+    UINT_32 bankInterleaveOffset;
+    UINT_64 offset;
+    UINT_32 tileRowIndex;
+    UINT_32 tileColumnIndex;
+    UINT_32 tileIndex;
+    UINT_32 tileOffset;
+
+    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+    //
+    // Compute the number of group, pipe, and bank bits.
+    //
+    UINT_32 numPipes              = HwlGetPipes(pTileInfo);
+    UINT_32 numPipeInterleaveBits = Log2(m_pipeInterleaveBytes);
+    UINT_32 numPipeBits           = Log2(numPipes);
+    UINT_32 numBankInterleaveBits = Log2(m_bankInterleave);
+    UINT_32 numBankBits           = Log2(pTileInfo->banks);
+
+    //
+    // Compute the micro tile size.
+    //
+    microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples;
+
+    microTileBytes = microTileBits / 8;
+    //
+    // Compute the pixel index within the micro tile.
+    //
+    pixelIndex = ComputePixelIndexWithinMicroTile(x,
+                                                  y,
+                                                  slice,
+                                                  bpp,
+                                                  tileMode,
+                                                  microTileType);
+
+    //
+    // Compute the sample offset and pixel offset.
+    //
+    if (isDepthSampleOrder)
+    {
+        //
+        // For depth surfaces, samples are stored contiguously for each element, so the sample
+        // offset is the sample number times the element size.
+        //
+        sampleOffset = sample * bpp;
+        pixelOffset  = pixelIndex * bpp * numSamples;
+    }
+    else
+    {
+        //
+        // For color surfaces, all elements for a particular sample are stored contiguously, so
+        // the sample offset is the sample number times the micro tile size divided yBit the number
+        // of samples.
+        //
+        sampleOffset = sample * (microTileBits / numSamples);
+        pixelOffset  = pixelIndex * bpp;
+    }
+
+    //
+    // Compute the element offset.
+    //
+    elementOffset = pixelOffset + sampleOffset;
+
+    *pBitPosition = static_cast<UINT_32>(elementOffset % 8);
+
+    elementOffset /= 8; //bit-to-byte
+
+    //
+    // Determine if tiles need to be split across slices.
+    //
+    // If the size of the micro tile is larger than the tile split size, then the tile will be
+    // split across multiple slices.
+    //
+    UINT_32 slicesPerTile = 1;
+
+    if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1))
+    {   //don't support for thick mode
+
+        //
+        // Compute the number of slices per tile.
+        //
+        slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes;
+
+        //
+        // Compute the tile split slice number for use in rotating the bank.
+        //
+        tileSplitSlice = elementOffset / pTileInfo->tileSplitBytes;
+
+        //
+        // Adjust the element offset to account for the portion of the tile that is being moved to
+        // a new slice..
+        //
+        elementOffset %= pTileInfo->tileSplitBytes;
+
+        //
+        // Adjust the microTileBytes size to tileSplitBytes size since
+        // a new slice..
+        //
+        microTileBytes = pTileInfo->tileSplitBytes;
+    }
+    else
+    {
+        tileSplitSlice = 0;
+    }
+
+    //
+    // Compute macro tile pitch and height.
+    //
+    macroTilePitch  =
+        (MicroTileWidth  * pTileInfo->bankWidth  * numPipes) * pTileInfo->macroAspectRatio;
+    macroTileHeight =
+        (MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks) / pTileInfo->macroAspectRatio;
+
+    //
+    // Compute the number of bytes per macro tile. Note: bytes of the same bank/pipe actually
+    //
+    macroTileBytes =
+        static_cast<UINT_64>(microTileBytes) *
+        (macroTilePitch / MicroTileWidth) * (macroTileHeight / MicroTileHeight) /
+        (numPipes * pTileInfo->banks);
+
+    //
+    // Compute the number of macro tiles per row.
+    //
+    macroTilesPerRow = pitch / macroTilePitch;
+
+    //
+    // Compute the offset to the macro tile containing the specified coordinate.
+    //
+    macroTileIndexX = x / macroTilePitch;
+    macroTileIndexY = y / macroTileHeight;
+    macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes;
+
+    //
+    // Compute the number of macro tiles per slice.
+    //
+    macroTilesPerSlice = macroTilesPerRow  * (height / macroTileHeight);
+
+    //
+    // Compute the slice size.
+    //
+    sliceBytes = macroTilesPerSlice * macroTileBytes;
+
+    //
+    // Compute the slice offset.
+    //
+    sliceOffset = sliceBytes * (tileSplitSlice + slicesPerTile * (slice / microTileThickness));
+
+    //
+    // Compute tile offest
+    //
+    tileRowIndex    = (y / MicroTileHeight) % pTileInfo->bankHeight;
+    tileColumnIndex = ((x / MicroTileWidth) / numPipes) % pTileInfo->bankWidth;
+    tileIndex        = (tileRowIndex * pTileInfo->bankWidth) + tileColumnIndex;
+    tileOffset       = tileIndex * microTileBytes;
+
+    //
+    // Combine the slice offset and macro tile offset with the pixel and sample offsets, accounting
+    // for the pipe and bank bits in the middle of the address.
+    //
+    totalOffset = sliceOffset + macroTileOffset + elementOffset + tileOffset;
+
+    //
+    // Get the pipe and bank.
+    //
+
+    // when the tileMode is PRT type, then adjust x and y coordinates
+    if (IsPrtNoRotationTileMode(tileMode))
+    {
+        x = x % macroTilePitch;
+        y = y % macroTileHeight;
+    }
+
+    pipe = ComputePipeFromCoord(x,
+                                y,
+                                slice,
+                                tileMode,
+                                pipeSwizzle,
+                                ignoreSE,
+                                pTileInfo);
+
+    bank = ComputeBankFromCoord(x,
+                                y,
+                                slice,
+                                tileMode,
+                                bankSwizzle,
+                                tileSplitSlice,
+                                pTileInfo);
+
+
+    //
+    // Split the offset to put some bits below the pipe+bank bits and some above.
+    //
+    pipeInterleaveMask = (1 << numPipeInterleaveBits) - 1;
+    bankInterleaveMask = (1 << numBankInterleaveBits) - 1;
+    pipeInterleaveOffset = totalOffset & pipeInterleaveMask;
+    bankInterleaveOffset = static_cast<UINT_32>((totalOffset >> numPipeInterleaveBits) &
+                                                bankInterleaveMask);
+    offset               =  totalOffset >> (numPipeInterleaveBits + numBankInterleaveBits);
+
+    //
+    // Assemble the address from its components.
+    //
+    addr  = pipeInterleaveOffset;
+    // This is to remove /analyze warnings
+    UINT_32 pipeBits            = pipe                 <<  numPipeInterleaveBits;
+    UINT_32 bankInterleaveBits  = bankInterleaveOffset << (numPipeInterleaveBits + numPipeBits);
+    UINT_32 bankBits            = bank                 << (numPipeInterleaveBits + numPipeBits +
+                                                           numBankInterleaveBits);
+    UINT_64 offsetBits          = offset               << (numPipeInterleaveBits + numPipeBits +
+                                                           numBankInterleaveBits + numBankBits);
+
+    addr |= pipeBits;
+    addr |= bankInterleaveBits;
+    addr |= bankBits;
+    addr |= offsetBits;
+
+    return addr;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeSurfaceAddrFromCoordMicroTiled
+*
+*   @brief
+*       Computes the surface address and bit position from a coordinate for 1D tilied
+*       (micro tiled)
+*   @return
+*       The byte address
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::ComputeSurfaceAddrFromCoordMicroTiled(
+    UINT_32             x,                      ///< [in] x coordinate
+    UINT_32             y,                      ///< [in] y coordinate
+    UINT_32             slice,                  ///< [in] slice index
+    UINT_32             sample,                 ///< [in] sample index
+    UINT_32             bpp,                    ///< [in] bits per pixel
+    UINT_32             pitch,                  ///< [in] pitch, in pixels
+    UINT_32             height,                 ///< [in] height, in pixels
+    UINT_32             numSamples,             ///< [in] number of samples
+    AddrTileMode        tileMode,               ///< [in] tile mode
+    AddrTileType        microTileType,          ///< [in] micro tiling type
+    BOOL_32             isDepthSampleOrder,     ///< [in] TRUE if depth sample ordering is used
+    UINT_32*            pBitPosition            ///< [out] bit position, e.g. FMT_1 will use this
+    ) const
+{
+    UINT_64 addr = 0;
+
+    UINT_32 microTileBytes;
+    UINT_64 sliceBytes;
+    UINT_32 microTilesPerRow;
+    UINT_32 microTileIndexX;
+    UINT_32 microTileIndexY;
+    UINT_32 microTileIndexZ;
+    UINT_64 sliceOffset;
+    UINT_64 microTileOffset;
+    UINT_32 sampleOffset;
+    UINT_32 pixelIndex;
+    UINT_32 pixelOffset;
+
+    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+    //
+    // Compute the micro tile size.
+    //
+    microTileBytes = BITS_TO_BYTES(MicroTilePixels * microTileThickness * bpp * numSamples);
+
+    //
+    // Compute the slice size.
+    //
+    sliceBytes =
+        BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * microTileThickness * bpp * numSamples);
+
+    //
+    // Compute the number of micro tiles per row.
+    //
+    microTilesPerRow = pitch / MicroTileWidth;
+
+    //
+    // Compute the micro tile index.
+    //
+    microTileIndexX = x     / MicroTileWidth;
+    microTileIndexY = y     / MicroTileHeight;
+    microTileIndexZ = slice / microTileThickness;
+
+    //
+    // Compute the slice offset.
+    //
+    sliceOffset = static_cast<UINT_64>(microTileIndexZ) * sliceBytes;
+
+    //
+    // Compute the offset to the micro tile containing the specified coordinate.
+    //
+    microTileOffset = (static_cast<UINT_64>(microTileIndexY) * microTilesPerRow + microTileIndexX) *
+        microTileBytes;
+
+    //
+    // Compute the pixel index within the micro tile.
+    //
+    pixelIndex = ComputePixelIndexWithinMicroTile(x,
+                                                  y,
+                                                  slice,
+                                                  bpp,
+                                                  tileMode,
+                                                  microTileType);
+
+    // Compute the sample offset.
+    //
+    if (isDepthSampleOrder)
+    {
+        //
+        // For depth surfaces, samples are stored contiguously for each element, so the sample
+        // offset is the sample number times the element size.
+        //
+        sampleOffset = sample * bpp;
+        pixelOffset = pixelIndex * bpp * numSamples;
+    }
+    else
+    {
+        //
+        // For color surfaces, all elements for a particular sample are stored contiguously, so
+        // the sample offset is the sample number times the micro tile size divided yBit the number
+        // of samples.
+        //
+        sampleOffset = sample * (microTileBytes*8 / numSamples);
+        pixelOffset = pixelIndex * bpp;
+    }
+
+    //
+    // Compute the bit position of the pixel.  Each element is stored with one bit per sample.
+    //
+
+    UINT_32 elemOffset = sampleOffset + pixelOffset;
+
+    *pBitPosition = elemOffset % 8;
+    elemOffset /= 8;
+
+    //
+    // Combine the slice offset, micro tile offset, sample offset, and pixel offsets.
+    //
+    addr = sliceOffset + microTileOffset + elemOffset;
+
+    return addr;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlComputePixelCoordFromOffset
+*
+*   @brief
+*       Compute pixel coordinate from offset inside a micro tile
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::HwlComputePixelCoordFromOffset(
+    UINT_32         offset,             ///< [in] offset inside micro tile in bits
+    UINT_32         bpp,                ///< [in] bits per pixel
+    UINT_32         numSamples,         ///< [in] number of samples
+    AddrTileMode    tileMode,           ///< [in] tile mode
+    UINT_32         tileBase,           ///< [in] base offset within a tile
+    UINT_32         compBits,           ///< [in] component bits actually needed(for planar surface)
+    UINT_32*        pX,                 ///< [out] x coordinate
+    UINT_32*        pY,                 ///< [out] y coordinate
+    UINT_32*        pSlice,             ///< [out] slice index
+    UINT_32*        pSample,            ///< [out] sample index
+    AddrTileType    microTileType,      ///< [in] micro tiling type
+    BOOL_32         isDepthSampleOrder  ///< [in] TRUE if depth sample order in microtile is used
+    ) const
+{
+    UINT_32 x = 0;
+    UINT_32 y = 0;
+    UINT_32 z = 0;
+    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+    // For planar surface, we adjust offset acoording to tile base
+    if ((bpp != compBits) && (compBits != 0) && isDepthSampleOrder)
+    {
+        offset -= tileBase;
+
+        ADDR_ASSERT(microTileType == ADDR_NON_DISPLAYABLE ||
+                    microTileType == ADDR_DEPTH_SAMPLE_ORDER);
+
+        bpp = compBits;
+    }
+
+    UINT_32 sampleTileBits;
+    UINT_32 samplePixelBits;
+    UINT_32 pixelIndex;
+
+    if (isDepthSampleOrder)
+    {
+        samplePixelBits = bpp * numSamples;
+        pixelIndex = offset / samplePixelBits;
+        *pSample = (offset % samplePixelBits) / bpp;
+    }
+    else
+    {
+        sampleTileBits = MicroTilePixels * bpp * thickness;
+        *pSample = offset / sampleTileBits;
+        pixelIndex = (offset % sampleTileBits) / bpp;
+    }
+
+    if (microTileType != ADDR_THICK)
+    {
+        if (microTileType == ADDR_DISPLAYABLE) // displayable
+        {
+            switch (bpp)
+            {
+                case 8:
+                    x = pixelIndex & 0x7;
+                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4));
+                    break;
+                case 16:
+                    x = pixelIndex & 0x7;
+                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3));
+                    break;
+                case 32:
+                    x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0));
+                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2));
+                    break;
+                case 64:
+                    x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
+                    break;
+                case 128:
+                    x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,1));
+                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,0));
+                    break;
+                default:
+                    break;
+            }
+        }
+        else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
+        {
+            x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+            y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
+        }
+        else if (microTileType == ADDR_ROTATED)
+        {
+            /*
+                8-Bit Elements
+                element_index[5:0] = { x[2], x[0], x[1], y[2], y[1], y[0] }
+
+                16-Bit Elements
+                element_index[5:0] = { x[2], x[1], x[0], y[2], y[1], y[0] }
+
+                32-Bit Elements
+                element_index[5:0] = { x[2], x[1], y[2], x[0], y[1], y[0] }
+
+                64-Bit Elements
+                element_index[5:0] = { y[2], x[2], x[1], y[1], x[0], y[0] }
+            */
+            switch(bpp)
+            {
+                case 8:
+                    x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4));
+                    y = pixelIndex & 0x7;
+                    break;
+                case 16:
+                    x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3));
+                    y = pixelIndex & 0x7;
+                    break;
+                case 32:
+                    x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2));
+                    y = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0));
+                    break;
+                case 64:
+                    x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
+                    y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    break;
+            }
+        }
+
+        if (thickness > 1) // thick
+        {
+            z = Bits2Number(3, _BIT(pixelIndex,8),_BIT(pixelIndex,7),_BIT(pixelIndex,6));
+        }
+    }
+    else
+    {
+        ADDR_ASSERT((m_chipFamily >= ADDR_CHIP_FAMILY_CI) && (thickness > 1));
+        /*
+            8-Bit Elements and 16-Bit Elements
+            element_index[7:0] = { y[2], x[2], z[1], z[0], y[1], x[1], y[0], x[0] }
+
+            32-Bit Elements
+            element_index[7:0] = { y[2], x[2], z[1], y[1], z[0], x[1], y[0], x[0] }
+
+            64-Bit Elements and 128-Bit Elements
+            element_index[7:0] = { y[2], x[2], z[1], y[1], x[1], z[0], y[0], x[0] }
+
+            The equation to compute the element index for the extra thick tile:
+            element_index[8] = z[2]
+        */
+        switch (bpp)
+        {
+            case 8:
+            case 16: // fall-through
+                x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+                y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
+                z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,4));
+                break;
+            case 32:
+                x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+                y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
+                z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,3));
+                break;
+            case 64:
+            case 128: // fall-through
+                x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,3),_BIT(pixelIndex,0));
+                y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
+                z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,2));
+                break;
+            default:
+                ADDR_ASSERT_ALWAYS();
+                break;
+        }
+
+        if (thickness == 8)
+        {
+            z += Bits2Number(3,_BIT(pixelIndex,8),0,0);
+        }
+    }
+
+    *pX = x;
+    *pY = y;
+    *pSlice += z;
+}
+
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::DispatchComputeSurfaceCoordFromAddrDispatch
+*
+*   @brief
+*       Compute (x,y,slice,sample) coordinates from surface address
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::DispatchComputeSurfaceCoordFromAddr(
+    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    UINT_64             addr               = pIn->addr;
+    UINT_32             bitPosition        = pIn->bitPosition;
+    UINT_32             bpp                = pIn->bpp;
+    UINT_32             pitch              = pIn->pitch;
+    UINT_32             height             = pIn->height;
+    UINT_32             numSlices          = pIn->numSlices;
+    UINT_32             numSamples         = ((pIn->numSamples == 0) ? 1 : pIn->numSamples);
+    UINT_32             numFrags           = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
+    AddrTileMode        tileMode           = pIn->tileMode;
+    UINT_32             tileBase           = pIn->tileBase;
+    UINT_32             compBits           = pIn->compBits;
+    AddrTileType        microTileType      = pIn->tileType;
+    BOOL_32             ignoreSE           = pIn->ignoreSE;
+    BOOL_32             isDepthSampleOrder = pIn->isDepth;
+    ADDR_TILEINFO*      pTileInfo          = pIn->pTileInfo;
+
+    UINT_32*            pX                 = &pOut->x;
+    UINT_32*            pY                 = &pOut->y;
+    UINT_32*            pSlice             = &pOut->slice;
+    UINT_32*            pSample            = &pOut->sample;
+
+    if (microTileType == ADDR_DEPTH_SAMPLE_ORDER)
+    {
+        isDepthSampleOrder = TRUE;
+    }
+
+    if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
+    {
+        if (numFrags != numSamples)
+        {
+            numSamples = numFrags;
+        }
+
+        /// @note
+        /// 128 bit/thick tiled surface doesn't support display tiling and
+        /// mipmap chain must have the same tileType, so please fill tileType correctly
+        if (!IsLinear(pIn->tileMode))
+        {
+            if (bpp >= 128 || ComputeSurfaceThickness(tileMode) > 1)
+            {
+                ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE);
+            }
+        }
+    }
+
+    switch (tileMode)
+    {
+        case ADDR_TM_LINEAR_GENERAL://fall through
+        case ADDR_TM_LINEAR_ALIGNED:
+            ComputeSurfaceCoordFromAddrLinear(addr,
+                                              bitPosition,
+                                              bpp,
+                                              pitch,
+                                              height,
+                                              numSlices,
+                                              pX,
+                                              pY,
+                                              pSlice,
+                                              pSample);
+            break;
+        case ADDR_TM_1D_TILED_THIN1://fall through
+        case ADDR_TM_1D_TILED_THICK:
+            ComputeSurfaceCoordFromAddrMicroTiled(addr,
+                                                  bitPosition,
+                                                  bpp,
+                                                  pitch,
+                                                  height,
+                                                  numSamples,
+                                                  tileMode,
+                                                  tileBase,
+                                                  compBits,
+                                                  pX,
+                                                  pY,
+                                                  pSlice,
+                                                  pSample,
+                                                  microTileType,
+                                                  isDepthSampleOrder);
+            break;
+        case ADDR_TM_2D_TILED_THIN1:    //fall through
+        case ADDR_TM_2D_TILED_THICK:    //fall through
+        case ADDR_TM_3D_TILED_THIN1:    //fall through
+        case ADDR_TM_3D_TILED_THICK:    //fall through
+        case ADDR_TM_2D_TILED_XTHICK:   //fall through
+        case ADDR_TM_3D_TILED_XTHICK:   //fall through
+        case ADDR_TM_PRT_TILED_THIN1:   //fall through
+        case ADDR_TM_PRT_2D_TILED_THIN1://fall through
+        case ADDR_TM_PRT_3D_TILED_THIN1://fall through
+        case ADDR_TM_PRT_TILED_THICK:   //fall through
+        case ADDR_TM_PRT_2D_TILED_THICK://fall through
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            UINT_32 pipeSwizzle;
+            UINT_32 bankSwizzle;
+
+            if (m_configFlags.useCombinedSwizzle)
+            {
+                ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
+                                       &bankSwizzle, &pipeSwizzle);
+            }
+            else
+            {
+                pipeSwizzle = pIn->pipeSwizzle;
+                bankSwizzle = pIn->bankSwizzle;
+            }
+
+            ComputeSurfaceCoordFromAddrMacroTiled(addr,
+                                                  bitPosition,
+                                                  bpp,
+                                                  pitch,
+                                                  height,
+                                                  numSamples,
+                                                  tileMode,
+                                                  tileBase,
+                                                  compBits,
+                                                  microTileType,
+                                                  ignoreSE,
+                                                  isDepthSampleOrder,
+                                                  pipeSwizzle,
+                                                  bankSwizzle,
+                                                  pTileInfo,
+                                                  pX,
+                                                  pY,
+                                                  pSlice,
+                                                  pSample);
+            break;
+        default:
+            ADDR_ASSERT_ALWAYS();
+    }
+}
+
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeSurfaceCoordFromAddrMacroTiled
+*
+*   @brief
+*       Compute surface coordinates from address for macro tiled surface
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::ComputeSurfaceCoordFromAddrMacroTiled(
+    UINT_64             addr,               ///< [in] byte address
+    UINT_32             bitPosition,        ///< [in] bit position
+    UINT_32             bpp,                ///< [in] bits per pixel
+    UINT_32             pitch,              ///< [in] pitch in pixels
+    UINT_32             height,             ///< [in] height in pixels
+    UINT_32             numSamples,         ///< [in] number of samples
+    AddrTileMode        tileMode,           ///< [in] tile mode
+    UINT_32             tileBase,           ///< [in] tile base offset
+    UINT_32             compBits,           ///< [in] component bits (for planar surface)
+    AddrTileType        microTileType,      ///< [in] micro tiling type
+    BOOL_32             ignoreSE,           ///< [in] TRUE if shader engines can be ignored
+    BOOL_32             isDepthSampleOrder, ///< [in] TRUE if depth sample order is used
+    UINT_32             pipeSwizzle,        ///< [in] pipe swizzle
+    UINT_32             bankSwizzle,        ///< [in] bank swizzle
+    ADDR_TILEINFO*      pTileInfo,          ///< [in] bank structure.
+                                            ///  **All fields to be valid on entry**
+    UINT_32*            pX,                 ///< [out] X coord
+    UINT_32*            pY,                 ///< [out] Y coord
+    UINT_32*            pSlice,             ///< [out] slice index
+    UINT_32*            pSample             ///< [out] sample index
+    ) const
+{
+    UINT_32 mx;
+    UINT_32 my;
+    UINT_64 tileBits;
+    UINT_64 macroTileBits;
+    UINT_32 slices;
+    UINT_32 tileSlices;
+    UINT_64 elementOffset;
+    UINT_64 macroTileIndex;
+    UINT_32 tileIndex;
+    UINT_64 totalOffset;
+
+
+    UINT_32 bank;
+    UINT_32 pipe;
+    UINT_32 groupBits = m_pipeInterleaveBytes << 3;
+    UINT_32 pipes = HwlGetPipes(pTileInfo);
+    UINT_32 banks = pTileInfo->banks;
+
+    UINT_32 bankInterleave = m_bankInterleave;
+
+    UINT_64 addrBits = BYTES_TO_BITS(addr) + bitPosition;
+
+    //
+    // remove bits for bank and pipe
+    //
+    totalOffset = (addrBits % groupBits) +
+        (((addrBits / groupBits / pipes) % bankInterleave) * groupBits) +
+        (((addrBits / groupBits / pipes) / bankInterleave) / banks) * groupBits * bankInterleave;
+
+    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+    UINT_32 microTileBits = bpp * microTileThickness * MicroTilePixels * numSamples;
+
+    UINT_32 microTileBytes = BITS_TO_BYTES(microTileBits);
+    //
+    // Determine if tiles need to be split across slices.
+    //
+    // If the size of the micro tile is larger than the tile split size, then the tile will be
+    // split across multiple slices.
+    //
+    UINT_32 slicesPerTile = 1; //_State->TileSlices
+
+    if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1))
+    {   //don't support for thick mode
+
+        //
+        // Compute the number of slices per tile.
+        //
+        slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes;
+    }
+
+    tileBits = microTileBits / slicesPerTile; // micro tile bits
+
+    // in micro tiles because not MicroTileWidth timed.
+    UINT_32 macroWidth  = pTileInfo->bankWidth * pipes * pTileInfo->macroAspectRatio;
+    // in micro tiles as well
+    UINT_32 macroHeight = pTileInfo->bankHeight * banks / pTileInfo->macroAspectRatio;
+
+    UINT_32 pitchInMacroTiles = pitch / MicroTileWidth / macroWidth;
+
+    macroTileBits = (macroWidth * macroHeight) * tileBits / (banks * pipes);
+
+    macroTileIndex = totalOffset / macroTileBits;
+
+    // pitchMacros * height / heightMacros;  macroTilesPerSlice == _State->SliceMacros
+    UINT_32 macroTilesPerSlice = (pitch / (macroWidth * MicroTileWidth)) * height /
+        (macroHeight * MicroTileWidth);
+
+    slices = static_cast<UINT_32>(macroTileIndex / macroTilesPerSlice);
+
+    *pSlice = static_cast<UINT_32>(slices / slicesPerTile * microTileThickness);
+
+    //
+    // calculate element offset and x[2:0], y[2:0], z[1:0] for thick
+    //
+    tileSlices = slices % slicesPerTile;
+
+    elementOffset  = tileSlices * tileBits;
+    elementOffset += totalOffset % tileBits;
+
+    UINT_32 coordZ = 0;
+
+    HwlComputePixelCoordFromOffset(static_cast<UINT_32>(elementOffset),
+                                   bpp,
+                                   numSamples,
+                                   tileMode,
+                                   tileBase,
+                                   compBits,
+                                   pX,
+                                   pY,
+                                   &coordZ,
+                                   pSample,
+                                   microTileType,
+                                   isDepthSampleOrder);
+
+    macroTileIndex = macroTileIndex % macroTilesPerSlice;
+    *pY += static_cast<UINT_32>(macroTileIndex / pitchInMacroTiles * macroHeight * MicroTileHeight);
+    *pX += static_cast<UINT_32>(macroTileIndex % pitchInMacroTiles * macroWidth * MicroTileWidth);
+
+    *pSlice += coordZ;
+
+    tileIndex = static_cast<UINT_32>((totalOffset % macroTileBits) / tileBits);
+
+    my = (tileIndex / pTileInfo->bankWidth) % pTileInfo->bankHeight * MicroTileHeight;
+    mx = (tileIndex % pTileInfo->bankWidth) * pipes * MicroTileWidth;
+
+    *pY += my;
+    *pX += mx;
+
+    bank = ComputeBankFromAddr(addr, banks, pipes);
+    pipe = ComputePipeFromAddr(addr, pipes);
+
+    HwlComputeSurfaceCoord2DFromBankPipe(tileMode,
+                                         pX,
+                                         pY,
+                                         *pSlice,
+                                         bank,
+                                         pipe,
+                                         bankSwizzle,
+                                         pipeSwizzle,
+                                         tileSlices,
+                                         ignoreSE,
+                                         pTileInfo);
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeSurfaceCoord2DFromBankPipe
+*
+*   @brief
+*       Compute surface x,y coordinates from bank/pipe info
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::ComputeSurfaceCoord2DFromBankPipe(
+    AddrTileMode        tileMode,   ///< [in] tile mode
+    UINT_32             x,          ///< [in] x coordinate
+    UINT_32             y,          ///< [in] y coordinate
+    UINT_32             slice,      ///< [in] slice index
+    UINT_32             bank,       ///< [in] bank number
+    UINT_32             pipe,       ///< [in] pipe number
+    UINT_32             bankSwizzle,///< [in] bank swizzle
+    UINT_32             pipeSwizzle,///< [in] pipe swizzle
+    UINT_32             tileSlices, ///< [in] slices in a micro tile
+    ADDR_TILEINFO*      pTileInfo,  ///< [in] bank structure. **All fields to be valid on entry**
+    CoordFromBankPipe*  pOutput     ///< [out] pointer to extracted x/y bits
+    ) const
+{
+    UINT_32 yBit3 = 0;
+    UINT_32 yBit4 = 0;
+    UINT_32 yBit5 = 0;
+    UINT_32 yBit6 = 0;
+
+    UINT_32 xBit3 = 0;
+    UINT_32 xBit4 = 0;
+    UINT_32 xBit5 = 0;
+
+    UINT_32 tileSplitRotation;
+
+    UINT_32 numPipes = HwlGetPipes(pTileInfo);
+
+    UINT_32 bankRotation = ComputeBankRotation(tileMode,
+                                               pTileInfo->banks, numPipes);
+
+    UINT_32 pipeRotation = ComputePipeRotation(tileMode, numPipes);
+
+    UINT_32 xBit = x / (MicroTileWidth * pTileInfo->bankWidth * numPipes);
+    UINT_32 yBit = y / (MicroTileHeight * pTileInfo->bankHeight);
+
+    //calculate the bank and pipe before rotation and swizzle
+
+    switch (tileMode)
+    {
+        case ADDR_TM_2D_TILED_THIN1:  //fall through
+        case ADDR_TM_2D_TILED_THICK:  //fall through
+        case ADDR_TM_2D_TILED_XTHICK: //fall through
+        case ADDR_TM_3D_TILED_THIN1:  //fall through
+        case ADDR_TM_3D_TILED_THICK:  //fall through
+        case ADDR_TM_3D_TILED_XTHICK:
+            tileSplitRotation = ((pTileInfo->banks / 2) + 1);
+            break;
+        default:
+            tileSplitRotation =  0;
+            break;
+    }
+
+    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+    bank ^= tileSplitRotation * tileSlices;
+    if (pipeRotation == 0)
+    {
+        bank ^= bankRotation * (slice / microTileThickness) + bankSwizzle;
+        bank %= pTileInfo->banks;
+        pipe ^= pipeSwizzle;
+    }
+    else
+    {
+        bank ^= bankRotation * (slice / microTileThickness) / numPipes + bankSwizzle;
+        bank %= pTileInfo->banks;
+        pipe ^= pipeRotation * (slice / microTileThickness) + pipeSwizzle;
+    }
+
+    if (pTileInfo->macroAspectRatio == 1)
+    {
+        switch (pTileInfo->banks)
+        {
+            case 2:
+                yBit3 = _BIT(bank, 0) ^ _BIT(xBit,0);
+                break;
+            case 4:
+                yBit4 = _BIT(bank, 0) ^ _BIT(xBit,0);
+                yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1);
+                break;
+            case 8:
+                yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
+                yBit5 = _BIT(bank, 0) ^ _BIT(xBit,0);
+                yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ yBit5;
+                break;
+            case 16:
+                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3);
+                yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2);
+                yBit6 = _BIT(bank, 0) ^ _BIT(xBit, 0);
+                yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ yBit6;
+                break;
+            default:
+                break;
+        }
+
+    }
+    else if (pTileInfo->macroAspectRatio == 2)
+    {
+        switch (pTileInfo->banks)
+        {
+            case 2: //xBit3 = yBit3^b0
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,0);
+                break;
+            case 4: //xBit3=yBit4^b0; yBit3=xBit4^b1
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1);
+                yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1);
+                break;
+            case 8: //xBit4, xBit5, yBit5 are known
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2);
+                yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
+                yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ _BIT(yBit, 2);
+                break;
+            case 16://x4,x5,x6,y6 are known
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3); //x3 = y6 ^ b0
+                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3
+                yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = x5 ^ b2
+                yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ _BIT(yBit, 3); //y5=x4^y6^b1
+                break;
+            default:
+                break;
+        }
+    }
+    else if (pTileInfo->macroAspectRatio == 4)
+    {
+        switch (pTileInfo->banks)
+        {
+            case 4: //yBit3, yBit4
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1);
+                xBit4 = _BIT(bank, 1) ^ _BIT(yBit,0);
+                break;
+            case 8: //xBit5, yBit4, yBit5
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2);
+                yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
+                xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^  _BIT(yBit,2);
+                break;
+            case 16: //xBit5, xBit6, yBit5, yBit6
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = b0 ^ y6
+                xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = b1 ^ y5 ^ y6;
+                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = b3 ^ x6;
+                yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = b2 ^ x5;
+                break;
+            default:
+                break;
+        }
+    }
+    else if (pTileInfo->macroAspectRatio == 8)
+    {
+        switch (pTileInfo->banks)
+        {
+            case 8: //yBit3, yBit4, yBit5
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2); //x3 = b0 ^ y5;
+                xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^ _BIT(yBit, 2);//x4 = b1 ^ y4 ^ y5;
+                xBit5 = _BIT(bank, 2) ^ _BIT(yBit,0);
+                break;
+            case 16: //xBit6, yBit4, yBit5, yBit6
+                xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = y6 ^ b0
+                xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = y5 ^ y6 ^ b1
+                xBit5 = _BIT(bank, 2) ^ _BIT(yBit, 1);//x5 = y4 ^ b2
+                yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3
+                break;
+            default:
+                break;
+        }
+    }
+
+    pOutput->xBits = xBit;
+    pOutput->yBits = yBit;
+
+    pOutput->xBit3 = xBit3;
+    pOutput->xBit4 = xBit4;
+    pOutput->xBit5 = xBit5;
+    pOutput->yBit3 = yBit3;
+    pOutput->yBit4 = yBit4;
+    pOutput->yBit5 = yBit5;
+    pOutput->yBit6 = yBit6;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlExtractBankPipeSwizzle
+*   @brief
+*       Entry of EgBasedAddrLib ExtractBankPipeSwizzle
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlExtractBankPipeSwizzle(
+    const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT*  pIn,   ///< [in] input structure
+    ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT*       pOut   ///< [out] output structure
+    ) const
+{
+    ExtractBankPipeSwizzle(pIn->base256b,
+                           pIn->pTileInfo,
+                           &pOut->bankSwizzle,
+                           &pOut->pipeSwizzle);
+
+    return ADDR_OK;
+}
+
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlCombineBankPipeSwizzle
+*   @brief
+*       Combine bank/pipe swizzle
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlCombineBankPipeSwizzle(
+    UINT_32         bankSwizzle,    ///< [in] bank swizzle
+    UINT_32         pipeSwizzle,    ///< [in] pipe swizzle
+    ADDR_TILEINFO*  pTileInfo,      ///< [in] tile info
+    UINT_64         baseAddr,       ///< [in] base address
+    UINT_32*        pTileSwizzle    ///< [out] combined swizzle
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    if (pTileSwizzle)
+    {
+        *pTileSwizzle = GetBankPipeSwizzle(bankSwizzle, pipeSwizzle, baseAddr, pTileInfo);
+    }
+    else
+    {
+        retCode = ADDR_INVALIDPARAMS;
+    }
+
+    return retCode;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlComputeBaseSwizzle
+*   @brief
+*       Compute base swizzle
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeBaseSwizzle(
+    const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
+    ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut
+    ) const
+{
+    UINT_32 bankSwizzle = 0;
+    UINT_32 pipeSwizzle = 0;
+    ADDR_TILEINFO* pTileInfo = pIn->pTileInfo;
+
+    ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
+    ADDR_ASSERT(pIn->pTileInfo);
+
+    /// This is a legacy misreading of h/w doc, use it as it doesn't hurt.
+    static const UINT_8 bankRotationArray[4][16] = {
+        { 0, 0,  0, 0,  0, 0,  0, 0, 0,  0, 0,  0, 0,  0, 0, 0 }, // ADDR_SURF_2_BANK
+        { 0, 1,  2, 3,  0, 0,  0, 0, 0,  0, 0,  0, 0,  0, 0, 0 }, // ADDR_SURF_4_BANK
+        { 0, 3,  6, 1,  4, 7,  2, 5, 0,  0, 0,  0, 0,  0, 0, 0 }, // ADDR_SURF_8_BANK
+        { 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 }, // ADDR_SURF_16_BANK
+    };
+
+    UINT_32 banks = pTileInfo ? pTileInfo->banks : 2;
+    UINT_32 hwNumBanks;
+
+    // Uses less bank swizzle bits
+    if (pIn->option.reduceBankBit && banks > 2)
+    {
+        banks >>= 1;
+    }
+
+    switch (banks)
+    {
+        case 2:
+            hwNumBanks = 0;
+            break;
+        case 4:
+            hwNumBanks = 1;
+            break;
+        case 8:
+            hwNumBanks = 2;
+            break;
+        case 16:
+            hwNumBanks = 3;
+            break;
+        default:
+            ADDR_ASSERT_ALWAYS();
+            hwNumBanks = 0;
+            break;
+    }
+
+    if (pIn->option.genOption == ADDR_SWIZZLE_GEN_LINEAR)
+    {
+        bankSwizzle = pIn->surfIndex & (banks - 1);
+    }
+    else // (pIn->option.genOption == ADDR_SWIZZLE_GEN_DEFAULT)
+    {
+        bankSwizzle = bankRotationArray[hwNumBanks][pIn->surfIndex & (banks - 1)];
+    }
+
+    if (IsMacro3dTiled(pIn->tileMode))
+    {
+        pipeSwizzle = pIn->surfIndex & (HwlGetPipes(pTileInfo) - 1);
+    }
+
+    return HwlCombineBankPipeSwizzle(bankSwizzle, pipeSwizzle, pTileInfo, 0, &pOut->tileSwizzle);
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ExtractBankPipeSwizzle
+*   @brief
+*       Extract bank/pipe swizzle from base256b
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::ExtractBankPipeSwizzle(
+    UINT_32         base256b,       ///< [in] input base256b register value
+    ADDR_TILEINFO*  pTileInfo,      ///< [in] 2D tile parameters. Client must provide all data
+    UINT_32*        pBankSwizzle,   ///< [out] bank swizzle
+    UINT_32*        pPipeSwizzle    ///< [out] pipe swizzle
+    ) const
+{
+    UINT_32 bankSwizzle = 0;
+    UINT_32 pipeSwizzle = 0;
+
+    if (base256b != 0)
+    {
+        UINT_32 numPipes        = HwlGetPipes(pTileInfo);
+        UINT_32 bankBits        = QLog2(pTileInfo->banks);
+        UINT_32 pipeBits        = QLog2(numPipes);
+        UINT_32 groupBytes      = m_pipeInterleaveBytes;
+        UINT_32 bankInterleave  = m_bankInterleave;
+
+        pipeSwizzle =
+            (base256b / (groupBytes >> 8)) & ((1<<pipeBits)-1);
+
+        bankSwizzle =
+            (base256b / (groupBytes >> 8) / numPipes / bankInterleave) & ((1 << bankBits) - 1);
+    }
+
+    *pPipeSwizzle = pipeSwizzle;
+    *pBankSwizzle = bankSwizzle;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::GetBankPipeSwizzle
+*   @brief
+*       Combine bank/pipe swizzle
+*   @return
+*       Base256b bits (only filled bank/pipe bits)
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::GetBankPipeSwizzle(
+    UINT_32         bankSwizzle,    ///< [in] bank swizzle
+    UINT_32         pipeSwizzle,    ///< [in] pipe swizzle
+    UINT_64         baseAddr,       ///< [in] base address
+    ADDR_TILEINFO*  pTileInfo       ///< [in] tile info
+    ) const
+{
+    UINT_32 pipeBits = QLog2(HwlGetPipes(pTileInfo));
+    UINT_32 bankInterleaveBits = QLog2(m_bankInterleave);
+    UINT_32 tileSwizzle = pipeSwizzle + ((bankSwizzle << bankInterleaveBits) << pipeBits);
+
+    baseAddr ^= tileSwizzle * m_pipeInterleaveBytes;
+    baseAddr >>= 8;
+
+    return static_cast<UINT_32>(baseAddr);
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeSliceTileSwizzle
+*   @brief
+*       Compute cubemap/3d texture faces/slices tile swizzle
+*   @return
+*       Tile swizzle
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputeSliceTileSwizzle(
+    AddrTileMode        tileMode,       ///< [in] Tile mode
+    UINT_32             baseSwizzle,    ///< [in] Base swizzle
+    UINT_32             slice,          ///< [in] Slice index, Cubemap face index, 0 means +X
+    UINT_64             baseAddr,       ///< [in] Base address
+    ADDR_TILEINFO* pTileInfo       ///< [in] Bank structure
+    ) const
+{
+    UINT_32 tileSwizzle = 0;
+
+    if (IsMacroTiled(tileMode)) // Swizzle only for macro tile mode
+    {
+        UINT_32 firstSlice = slice / ComputeSurfaceThickness(tileMode);
+
+        UINT_32 numPipes = HwlGetPipes(pTileInfo);
+        UINT_32 numBanks = pTileInfo->banks;
+
+        UINT_32 pipeRotation;
+        UINT_32 bankRotation;
+
+        UINT_32 bankSwizzle = 0;
+        UINT_32 pipeSwizzle = 0;
+
+        pipeRotation = ComputePipeRotation(tileMode, numPipes);
+        bankRotation = ComputeBankRotation(tileMode, numBanks, numPipes);
+
+        if (baseSwizzle != 0)
+        {
+            ExtractBankPipeSwizzle(baseSwizzle,
+                                   pTileInfo,
+                                   &bankSwizzle,
+                                   &pipeSwizzle);
+        }
+
+        if (pipeRotation == 0) //2D mode
+        {
+            bankSwizzle += firstSlice * bankRotation;
+            bankSwizzle %= numBanks;
+        }
+        else //3D mode
+        {
+            pipeSwizzle += firstSlice * pipeRotation;
+            pipeSwizzle %= numPipes;
+            bankSwizzle += firstSlice * bankRotation / numPipes;
+            bankSwizzle %= numBanks;
+        }
+
+        tileSwizzle = GetBankPipeSwizzle(bankSwizzle,
+                                         pipeSwizzle,
+                                         baseAddr,
+                                         pTileInfo);
+    }
+
+    return tileSwizzle;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlComputeQbStereoRightSwizzle
+*
+*   @brief
+*       Compute right eye swizzle
+*   @return
+*       swizzle
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::HwlComputeQbStereoRightSwizzle(
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo  ///< [in] Surface info, must be valid
+    ) const
+{
+    UINT_32 bankBits    = 0;
+    UINT_32 swizzle     = 0;
+
+    // The assumption is default swizzle for left eye is 0
+    if (IsMacroTiled(pInfo->tileMode) && pInfo->pStereoInfo && pInfo->pTileInfo)
+    {
+        bankBits = ComputeBankFromCoord(0, pInfo->height, 0,
+                                        pInfo->tileMode, 0, 0, pInfo->pTileInfo);
+
+        if (bankBits)
+        {
+            HwlCombineBankPipeSwizzle(bankBits, 0, pInfo->pTileInfo, 0, &swizzle);
+        }
+    }
+
+    return swizzle;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeBankFromCoord
+*
+*   @brief
+*       Compute bank number from coordinates
+*   @return
+*       Bank number
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputeBankFromCoord(
+    UINT_32         x,              ///< [in] x coordinate
+    UINT_32         y,              ///< [in] y coordinate
+    UINT_32         slice,          ///< [in] slice index
+    AddrTileMode    tileMode,       ///< [in] tile mode
+    UINT_32         bankSwizzle,    ///< [in] bank swizzle
+    UINT_32         tileSplitSlice, ///< [in] If the size of the pixel offset is larger than the
+                                    ///  tile split size, then the pixel will be moved to a separate
+                                    ///  slice. This value equals pixelOffset / tileSplitBytes
+                                    ///  in this case. Otherwise this is 0.
+    ADDR_TILEINFO*  pTileInfo       ///< [in] tile info
+    ) const
+{
+    UINT_32 pipes = HwlGetPipes(pTileInfo);
+    UINT_32 bankBit0 = 0;
+    UINT_32 bankBit1 = 0;
+    UINT_32 bankBit2 = 0;
+    UINT_32 bankBit3 = 0;
+    UINT_32 sliceRotation;
+    UINT_32 tileSplitRotation;
+    UINT_32 bank;
+    UINT_32 numBanks    = pTileInfo->banks;
+    UINT_32 bankWidth   = pTileInfo->bankWidth;
+    UINT_32 bankHeight  = pTileInfo->bankHeight;
+
+    UINT_32 tx = x / MicroTileWidth / (bankWidth * pipes);
+    UINT_32 ty = y / MicroTileHeight / bankHeight;
+
+    UINT_32 x3 = _BIT(tx,0);
+    UINT_32 x4 = _BIT(tx,1);
+    UINT_32 x5 = _BIT(tx,2);
+    UINT_32 x6 = _BIT(tx,3);
+    UINT_32 y3 = _BIT(ty,0);
+    UINT_32 y4 = _BIT(ty,1);
+    UINT_32 y5 = _BIT(ty,2);
+    UINT_32 y6 = _BIT(ty,3);
+
+    switch (numBanks)
+    {
+        case 16:
+            bankBit0 = x3 ^ y6;
+            bankBit1 = x4 ^ y5 ^ y6;
+            bankBit2 = x5 ^ y4;
+            bankBit3 = x6 ^ y3;
+            break;
+        case 8:
+            bankBit0 = x3 ^ y5;
+            bankBit1 = x4 ^ y4 ^ y5;
+            bankBit2 = x5 ^ y3;
+            break;
+        case 4:
+            bankBit0 = x3 ^ y4;
+            bankBit1 = x4 ^ y3;
+            break;
+        case 2:
+            bankBit0 = x3 ^ y3;
+            break;
+        default:
+            ADDR_ASSERT_ALWAYS();
+            break;
+    }
+
+    bank = bankBit0 | (bankBit1 << 1) | (bankBit2 << 2) | (bankBit3 << 3);
+
+    //Bits2Number(4, bankBit3, bankBit2, bankBit1, bankBit0);
+
+    bank = HwlPreAdjustBank((x / MicroTileWidth), bank, pTileInfo);
+    //
+    // Compute bank rotation for the slice.
+    //
+    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+    switch (tileMode)
+    {
+        case ADDR_TM_2D_TILED_THIN1:  // fall through
+        case ADDR_TM_2D_TILED_THICK:  // fall through
+        case ADDR_TM_2D_TILED_XTHICK:
+            sliceRotation = ((numBanks / 2) - 1) * (slice / microTileThickness);
+            break;
+        case ADDR_TM_3D_TILED_THIN1:  // fall through
+        case ADDR_TM_3D_TILED_THICK:  // fall through
+        case ADDR_TM_3D_TILED_XTHICK:
+            sliceRotation =
+                Max(1u, (pipes / 2) - 1) * (slice / microTileThickness) / pipes;
+            break;
+        default:
+            sliceRotation =  0;
+            break;
+    }
+
+
+    //
+    // Compute bank rotation for the tile split slice.
+    //
+    // The sample slice will be non-zero if samples must be split across multiple slices.
+    // This situation arises when the micro tile size multiplied yBit the number of samples exceeds
+    // the split size (set in GB_ADDR_CONFIG).
+    //
+    switch (tileMode)
+    {
+        case ADDR_TM_2D_TILED_THIN1: //fall through
+        case ADDR_TM_3D_TILED_THIN1: //fall through
+        case ADDR_TM_PRT_2D_TILED_THIN1: //fall through
+        case ADDR_TM_PRT_3D_TILED_THIN1: //fall through
+            tileSplitRotation = ((numBanks / 2) + 1) * tileSplitSlice;
+            break;
+        default:
+            tileSplitRotation =  0;
+            break;
+    }
+
+    //
+    // Apply bank rotation for the slice and tile split slice.
+    //
+    bank ^= bankSwizzle + sliceRotation;
+    bank ^= tileSplitRotation;
+
+    bank &= (numBanks - 1);
+
+    return bank;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeBankFromAddr
+*
+*   @brief
+*       Compute the bank number from an address
+*   @return
+*       Bank number
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputeBankFromAddr(
+    UINT_64 addr,       ///< [in] address
+    UINT_32 numBanks,   ///< [in] number of banks
+    UINT_32 numPipes    ///< [in] number of pipes
+    ) const
+{
+    UINT_32 bank;
+
+    //
+    // The LSBs of the address are arranged as follows:
+    //   bank | bankInterleave | pipe | pipeInterleave
+    //
+    // To get the bank number, shift off the pipe interleave, pipe, and bank interlave bits and
+    // mask the bank bits.
+    //
+    bank = static_cast<UINT_32>(
+        (addr >> Log2(m_pipeInterleaveBytes * numPipes * m_bankInterleave)) &
+        (numBanks - 1)
+        );
+
+    return bank;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputePipeRotation
+*
+*   @brief
+*       Compute pipe rotation value
+*   @return
+*       Pipe rotation
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputePipeRotation(
+    AddrTileMode tileMode,  ///< [in] tile mode
+    UINT_32      numPipes   ///< [in] number of pipes
+    ) const
+{
+   UINT_32 rotation;
+
+    switch (tileMode)
+    {
+        case ADDR_TM_3D_TILED_THIN1:        //fall through
+        case ADDR_TM_3D_TILED_THICK:        //fall through
+        case ADDR_TM_3D_TILED_XTHICK:       //fall through
+        case ADDR_TM_PRT_3D_TILED_THIN1:    //fall through
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1);
+            break;
+        default:
+            rotation = 0;
+    }
+
+    return rotation;
+}
+
+
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeBankRotation
+*
+*   @brief
+*       Compute bank rotation value
+*   @return
+*       Bank rotation
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputeBankRotation(
+    AddrTileMode tileMode,  ///< [in] tile mode
+    UINT_32      numBanks,  ///< [in] number of banks
+    UINT_32      numPipes   ///< [in] number of pipes
+    ) const
+{
+    UINT_32 rotation;
+
+    switch (tileMode)
+    {
+        case ADDR_TM_2D_TILED_THIN1: // fall through
+        case ADDR_TM_2D_TILED_THICK: // fall through
+        case ADDR_TM_2D_TILED_XTHICK:
+        case ADDR_TM_PRT_2D_TILED_THIN1:
+        case ADDR_TM_PRT_2D_TILED_THICK:
+            // Rotate banks per Z-slice yBit 1 for 4-bank or 3 for 8-bank
+            rotation =  numBanks / 2 - 1;
+            break;
+        case ADDR_TM_3D_TILED_THIN1: // fall through
+        case ADDR_TM_3D_TILED_THICK: // fall through
+        case ADDR_TM_3D_TILED_XTHICK:
+        case ADDR_TM_PRT_3D_TILED_THIN1:
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1);    // rotate pipes & banks
+            break;
+        default:
+            rotation = 0;
+    }
+
+    return rotation;
+}
+
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeHtileBytes
+*
+*   @brief
+*       Compute htile size in bytes
+*
+*   @return
+*       Htile size in bytes
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::ComputeHtileBytes(
+    UINT_32 pitch,        ///< [in] pitch
+    UINT_32 height,       ///< [in] height
+    UINT_32 bpp,          ///< [in] bits per pixel
+    BOOL_32 isLinear,     ///< [in] if it is linear mode
+    UINT_32 numSlices,    ///< [in] number of slices
+    UINT_64* sliceBytes,  ///< [out] bytes per slice
+    UINT_32 baseAlign     ///< [in] base alignments
+    ) const
+{
+    UINT_64 surfBytes;
+
+    const UINT_64 HtileCacheLineSize = BITS_TO_BYTES(HtileCacheBits);
+
+    *sliceBytes = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp / 64);
+
+    if (m_configFlags.useHtileSliceAlign)
+    {
+        // Align the sliceSize to htilecachelinesize * pipes at first
+        *sliceBytes = PowTwoAlign(*sliceBytes, HtileCacheLineSize * m_pipes);
+        surfBytes  = *sliceBytes * numSlices;
+    }
+    else
+    {
+        // Align the surfSize to htilecachelinesize * pipes at last
+        surfBytes  = *sliceBytes * numSlices;
+        surfBytes  = PowTwoAlign(surfBytes, HtileCacheLineSize * m_pipes);
+    }
+
+    return surfBytes;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::DispatchComputeFmaskInfo
+*
+*   @brief
+*       Compute fmask sizes include padded pitch, height, slices, total size in bytes,
+*       meanwhile output suitable tile mode and alignments as well. Results are returned
+*       through output parameters.
+*
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::DispatchComputeFmaskInfo(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,   ///< [in] input structure
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut)  ///< [out] output structure
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    ADDR_COMPUTE_SURFACE_INFO_INPUT  surfIn     = {0};
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT surfOut    = {0};
+
+    // Setup input structure
+    surfIn.tileMode          = pIn->tileMode;
+    surfIn.width             = pIn->pitch;
+    surfIn.height            = pIn->height;
+    surfIn.numSlices         = pIn->numSlices;
+    surfIn.pTileInfo         = pIn->pTileInfo;
+    surfIn.tileType          = ADDR_NON_DISPLAYABLE;
+    surfIn.flags.fmask       = 1;
+
+    // Setup output structure
+    surfOut.pTileInfo       = pOut->pTileInfo;
+
+    // Setup hwl specific fields
+    HwlFmaskPreThunkSurfInfo(pIn, pOut, &surfIn, &surfOut);
+
+    surfIn.bpp = HwlComputeFmaskBits(pIn, &surfIn.numSamples);
+
+    // ComputeSurfaceInfo needs numSamples in surfOut as surface routines need adjusted numSamples
+    surfOut.numSamples = surfIn.numSamples;
+
+    retCode = HwlComputeSurfaceInfo(&surfIn, &surfOut);
+
+    // Save bpp field for surface dump support
+    surfOut.bpp = surfIn.bpp;
+
+    if (retCode == ADDR_OK)
+    {
+        pOut->bpp               = surfOut.bpp;
+        pOut->pitch             = surfOut.pitch;
+        pOut->height            = surfOut.height;
+        pOut->numSlices         = surfOut.depth;
+        pOut->fmaskBytes        = surfOut.surfSize;
+        pOut->baseAlign         = surfOut.baseAlign;
+        pOut->pitchAlign        = surfOut.pitchAlign;
+        pOut->heightAlign       = surfOut.heightAlign;
+
+        if (surfOut.depth > 1)
+        {
+            // For fmask, expNumSlices is stored in depth.
+            pOut->sliceSize = surfOut.surfSize / surfOut.depth;
+        }
+        else
+        {
+            pOut->sliceSize = surfOut.surfSize;
+        }
+
+        // Save numSamples field for surface dump support
+        pOut->numSamples        = surfOut.numSamples;
+
+        HwlFmaskPostThunkSurfInfo(&surfOut, pOut);
+    }
+
+    return retCode;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlFmaskSurfaceInfo
+*   @brief
+*       Entry of EgBasedAddrLib ComputeFmaskInfo
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeFmaskInfo(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pIn,   ///< [in] input structure
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT*         pOut   ///< [out] output structure
+    )
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    ADDR_TILEINFO tileInfo = {0};
+
+    // Use internal tile info if pOut does not have a valid pTileInfo
+    if (pOut->pTileInfo == NULL)
+    {
+        pOut->pTileInfo = &tileInfo;
+    }
+
+    retCode = DispatchComputeFmaskInfo(pIn, pOut);
+
+    if (retCode == ADDR_OK)
+    {
+        pOut->tileIndex =
+            HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE,
+                                  pOut->tileIndex);
+    }
+
+    // Resets pTileInfo to NULL if the internal tile info is used
+    if (pOut->pTileInfo == &tileInfo)
+    {
+        pOut->pTileInfo = NULL;
+    }
+
+    return retCode;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlComputeFmaskAddrFromCoord
+*   @brief
+*       Entry of EgBasedAddrLib ComputeFmaskAddrFromCoord
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeFmaskAddrFromCoord(
+    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+#if ADDR_AM_BUILD
+    if ((pIn->x > pIn->pitch)               ||
+        (pIn->y > pIn->height)              ||
+        (pIn->numSamples > m_maxSamples)    ||
+        (pIn->sample >= m_maxSamples))
+    {
+        retCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        pOut->addr = DispatchComputeFmaskAddrFromCoord(pIn, pOut);
+    }
+#endif
+
+    return retCode;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlComputeFmaskCoordFromAddr
+*   @brief
+*       Entry of EgBasedAddrLib ComputeFmaskCoordFromAddr
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeFmaskCoordFromAddr(
+    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+#if ADDR_AM_BUILD
+    if ((pIn->bitPosition >= 8) ||
+        (pIn->numSamples > m_maxSamples))
+    {
+        retCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        DispatchComputeFmaskCoordFromAddr(pIn, pOut);
+    }
+#endif
+
+    return retCode;
+}
+
+#if ADDR_AM_BUILD
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::DispatchComputeFmaskAddrFromCoord
+*
+*   @brief
+*       Computes the FMASK address and bit position from a coordinate.
+*   @return
+*       The byte address
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::DispatchComputeFmaskAddrFromCoord(
+    const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    UINT_32             x                 = pIn->x;
+    UINT_32             y                 = pIn->y;
+    UINT_32             slice             = pIn->slice;
+    UINT_32             sample            = pIn->sample;
+    UINT_32             plane             = pIn->plane;
+    UINT_32             pitch             = pIn->pitch;
+    UINT_32             height            = pIn->height;
+    UINT_32             numSamples        = pIn->numSamples;
+    AddrTileMode        tileMode          = pIn->tileMode;
+    BOOL_32             ignoreSE          = pIn->ignoreSE;
+    ADDR_TILEINFO*      pTileInfo         = pIn->pTileInfo;
+    BOOL_32             resolved          = pIn->resolved;
+
+    UINT_32* pBitPosition = &pOut->bitPosition;
+    UINT_64 addr          = 0;
+
+    ADDR_ASSERT(numSamples > 1);
+    ADDR_ASSERT(ComputeSurfaceThickness(tileMode) == 1);
+
+    switch (tileMode)
+    {
+        case ADDR_TM_1D_TILED_THIN1:
+            addr = ComputeFmaskAddrFromCoordMicroTiled(x,
+                                                       y,
+                                                       slice,
+                                                       sample,
+                                                       plane,
+                                                       pitch,
+                                                       height,
+                                                       numSamples,
+                                                       tileMode,
+                                                       resolved,
+                                                       pBitPosition);
+            break;
+        case ADDR_TM_2D_TILED_THIN1: //fall through
+        case ADDR_TM_3D_TILED_THIN1:
+            UINT_32 pipeSwizzle;
+            UINT_32 bankSwizzle;
+
+            if (m_configFlags.useCombinedSwizzle)
+            {
+                ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
+                                       &bankSwizzle, &pipeSwizzle);
+            }
+            else
+            {
+                pipeSwizzle = pIn->pipeSwizzle;
+                bankSwizzle = pIn->bankSwizzle;
+            }
+
+            addr = ComputeFmaskAddrFromCoordMacroTiled(x,
+                                                       y,
+                                                       slice,
+                                                       sample,
+                                                       plane,
+                                                       pitch,
+                                                       height,
+                                                       numSamples,
+                                                       tileMode,
+                                                       pipeSwizzle,
+                                                       bankSwizzle,
+                                                       ignoreSE,
+                                                       pTileInfo,
+                                                       resolved,
+                                                       pBitPosition);
+            break;
+        default:
+            *pBitPosition = 0;
+            break;
+    }
+
+    return addr;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeFmaskAddrFromCoordMicroTiled
+*
+*   @brief
+*       Computes the FMASK address and bit position from a coordinate for 1D tilied (micro
+*       tiled)
+*   @return
+*       The byte address
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::ComputeFmaskAddrFromCoordMicroTiled(
+    UINT_32             x,              ///< [in] x coordinate
+    UINT_32             y,              ///< [in] y coordinate
+    UINT_32             slice,          ///< [in] slice index
+    UINT_32             sample,         ///< [in] sample number
+    UINT_32             plane,          ///< [in] plane number
+    UINT_32             pitch,          ///< [in] surface pitch in pixels
+    UINT_32             height,         ///< [in] surface height in pixels
+    UINT_32             numSamples,     ///< [in] number of samples
+    AddrTileMode        tileMode,       ///< [in] tile mode
+    BOOL_32             resolved,       ///< [in] TRUE if this is for resolved fmask
+    UINT_32*            pBitPosition    ///< [out] pointer to returned bit position
+    ) const
+{
+    UINT_64 addr = 0;
+    UINT_32 effectiveBpp;
+    UINT_32 effectiveSamples;
+
+    //
+    // 2xAA use the same layout as 4xAA
+    //
+    if (numSamples == 2)
+    {
+        numSamples = 4;
+    }
+
+    //
+    // Compute the number of planes.
+    //
+    if (!resolved)
+    {
+        effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);;
+        effectiveBpp = numSamples;
+
+        //
+        // Compute the address just like a color surface with numSamples bits per element and
+        // numPlanes samples.
+        //
+        addr = ComputeSurfaceAddrFromCoordMicroTiled(x,
+                                                     y,
+                                                     slice,
+                                                     plane, // sample
+                                                     effectiveBpp,
+                                                     pitch,
+                                                     height,
+                                                     effectiveSamples,
+                                                     tileMode,
+                                                     ADDR_NON_DISPLAYABLE,
+                                                     FALSE,
+                                                     pBitPosition);
+
+        //
+        // Compute the real bit position. Each (sample, plane) is stored with one bit per sample.
+        //
+
+        //
+        // Compute the pixel index with in the micro tile
+        //
+        UINT_32 pixelIndex = ComputePixelIndexWithinMicroTile(x % 8,
+                                                              y % 8,
+                                                              slice,
+                                                              1,
+                                                              tileMode,
+                                                              ADDR_NON_DISPLAYABLE);
+
+        *pBitPosition = ((pixelIndex * numSamples) + sample) & (BITS_PER_BYTE-1);
+
+        UINT_64 bitAddr = BYTES_TO_BITS(addr) + *pBitPosition;
+
+        addr = bitAddr / 8;
+    }
+    else
+    {
+        effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
+        effectiveSamples = 1;
+
+        //
+        // Compute the address just like a color surface with numSamples bits per element and
+        // numPlanes samples.
+        //
+        addr = ComputeSurfaceAddrFromCoordMicroTiled(x,
+                                                     y,
+                                                     slice,
+                                                     sample,
+                                                     effectiveBpp,
+                                                     pitch,
+                                                     height,
+                                                     effectiveSamples,
+                                                     tileMode,
+                                                     ADDR_NON_DISPLAYABLE,
+                                                     TRUE,
+                                                     pBitPosition);
+    }
+
+    return addr;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeFmaskAddrFromCoordMacroTiled
+*
+*   @brief
+*       Computes the FMASK address and bit position from a coordinate for 2D tilied (macro
+*       tiled)
+*   @return
+*       The byte address
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::ComputeFmaskAddrFromCoordMacroTiled(
+    UINT_32             x,              ///< [in] x coordinate
+    UINT_32             y,              ///< [in] y coordinate
+    UINT_32             slice,          ///< [in] slice index
+    UINT_32             sample,         ///< [in] sample number
+    UINT_32             plane,          ///< [in] plane number
+    UINT_32             pitch,          ///< [in] surface pitch in pixels
+    UINT_32             height,         ///< [in] surface height in pixels
+    UINT_32             numSamples,     ///< [in] number of samples
+    AddrTileMode        tileMode,       ///< [in] tile mode
+    UINT_32             pipeSwizzle,    ///< [in] pipe swizzle
+    UINT_32             bankSwizzle,    ///< [in] bank swizzle
+    BOOL_32             ignoreSE,       ///< [in] TRUE if ignore shader engine
+    ADDR_TILEINFO*      pTileInfo,      ///< [in] bank structure.**All fields to be valid on entry**
+    BOOL_32             resolved,       ///< [in] TRUE if this is for resolved fmask
+    UINT_32*            pBitPosition    ///< [out] pointer to returned bit position
+    ) const
+{
+    UINT_64 addr = 0;
+    UINT_32 effectiveBpp;
+    UINT_32 effectiveSamples;
+
+    //
+    // 2xAA use the same layout as 4xAA
+    //
+    if (numSamples == 2)
+    {
+        numSamples = 4;
+    }
+
+    //
+    // Compute the number of planes.
+    //
+    if (!resolved)
+    {
+        effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);
+        effectiveBpp = numSamples;
+
+        //
+        // Compute the address just like a color surface with numSamples bits per element and
+        // numPlanes samples.
+        //
+        addr = ComputeSurfaceAddrFromCoordMacroTiled(x,
+                                                     y,
+                                                     slice,
+                                                     plane, // sample
+                                                     effectiveBpp,
+                                                     pitch,
+                                                     height,
+                                                     effectiveSamples,
+                                                     tileMode,
+                                                     ADDR_NON_DISPLAYABLE,// isdisp
+                                                     ignoreSE,// ignore_shader
+                                                     FALSE,// depth_sample_order
+                                                     pipeSwizzle,
+                                                     bankSwizzle,
+                                                     pTileInfo,
+                                                     pBitPosition);
+
+        //
+        // Compute the real bit position. Each (sample, plane) is stored with one bit per sample.
+        //
+
+
+        //
+        // Compute the pixel index with in the micro tile
+        //
+        UINT_32 pixelIndex = ComputePixelIndexWithinMicroTile(x ,
+                                                              y ,
+                                                              slice,
+                                                              effectiveBpp,
+                                                              tileMode,
+                                                              ADDR_NON_DISPLAYABLE);
+
+        *pBitPosition = ((pixelIndex * numSamples) + sample) & (BITS_PER_BYTE-1);
+
+        UINT_64 bitAddr = BYTES_TO_BITS(addr) + *pBitPosition;
+
+        addr = bitAddr / 8;
+
+    }
+    else
+    {
+        effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
+        effectiveSamples = 1;
+
+        //
+        // Compute the address just like a color surface with numSamples bits per element and
+        // numPlanes samples.
+        //
+        addr = ComputeSurfaceAddrFromCoordMacroTiled(x,
+                                                     y,
+                                                     slice,
+                                                     sample,
+                                                     effectiveBpp,
+                                                     pitch,
+                                                     height,
+                                                     effectiveSamples,
+                                                     tileMode,
+                                                     ADDR_NON_DISPLAYABLE,
+                                                     ignoreSE,
+                                                     TRUE,
+                                                     pipeSwizzle,
+                                                     bankSwizzle,
+                                                     pTileInfo,
+                                                     pBitPosition);
+    }
+
+    return addr;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeFmaskCoordFromAddrMicroTiled
+*
+*   @brief
+*       Compute (x,y,slice,sample,plane) coordinates from fmask address
+*   @return
+*       N/A
+*
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::ComputeFmaskCoordFromAddrMicroTiled(
+    UINT_64             addr,       ///< [in] byte address
+    UINT_32             bitPosition,///< [in] bit position
+    UINT_32             pitch,      ///< [in] pitch in pixels
+    UINT_32             height,     ///< [in] height in pixels
+    UINT_32             numSamples, ///< [in] number of samples (of color buffer)
+    AddrTileMode        tileMode,   ///< [in] tile mode
+    BOOL_32             resolved,   ///< [in] TRUE if it is resolved fmask
+    UINT_32*            pX,         ///< [out] X coord
+    UINT_32*            pY,         ///< [out] Y coord
+    UINT_32*            pSlice,     ///< [out] slice index
+    UINT_32*            pSample,    ///< [out] sample index
+    UINT_32*            pPlane      ///< [out] plane index
+    ) const
+{
+    UINT_32 effectiveBpp;
+    UINT_32 effectiveSamples;
+
+    // 2xAA use the same layout as 4xAA
+    if (numSamples == 2)
+    {
+        numSamples = 4;
+    }
+
+    if (!resolved)
+    {
+        effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);
+        effectiveBpp  = numSamples;
+
+        ComputeSurfaceCoordFromAddrMicroTiled(addr,
+                                              bitPosition,
+                                              effectiveBpp,
+                                              pitch,
+                                              height,
+                                              effectiveSamples,
+                                              tileMode,
+                                              0, // tileBase
+                                              0, // compBits
+                                              pX,
+                                              pY,
+                                              pSlice,
+                                              pPlane,
+                                              ADDR_NON_DISPLAYABLE, // microTileType
+                                              FALSE  // isDepthSampleOrder
+                                              );
+
+
+        if ( pSample )
+        {
+            *pSample = bitPosition % numSamples;
+        }
+    }
+    else
+    {
+        effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
+        effectiveSamples = 1;
+
+        ComputeSurfaceCoordFromAddrMicroTiled(addr,
+                                              bitPosition,
+                                              effectiveBpp,
+                                              pitch,
+                                              height,
+                                              effectiveSamples,
+                                              tileMode,
+                                              0,     // tileBase
+                                              0,     // compBits
+                                              pX,
+                                              pY,
+                                              pSlice,
+                                              pSample,
+                                              ADDR_NON_DISPLAYABLE, // microTileType
+                                              TRUE   // isDepthSampleOrder
+                                              );
+    }
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeFmaskCoordFromAddrMacroTiled
+*
+*   @brief
+*       Compute (x,y,slice,sample,plane) coordinates from
+*       fmask address
+*   @return
+*       N/A
+*
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::ComputeFmaskCoordFromAddrMacroTiled(
+    UINT_64             addr,       ///< [in] byte address
+    UINT_32             bitPosition,///< [in] bit position
+    UINT_32             pitch,      ///< [in] pitch in pixels
+    UINT_32             height,     ///< [in] height in pixels
+    UINT_32             numSamples, ///< [in] number of samples (of color buffer)
+    AddrTileMode        tileMode,   ///< [in] tile mode
+    UINT_32             pipeSwizzle,///< [in] pipe swizzle
+    UINT_32             bankSwizzle,///< [in] bank swizzle
+    BOOL_32             ignoreSE,   ///< [in] TRUE if ignore shader engine
+    ADDR_TILEINFO*      pTileInfo,  ///< [in] bank structure. **All fields to be valid on entry**
+    BOOL_32             resolved,   ///< [in] TRUE if it is resolved fmask
+    UINT_32*            pX,         ///< [out] X coord
+    UINT_32*            pY,         ///< [out] Y coord
+    UINT_32*            pSlice,     ///< [out] slice index
+    UINT_32*            pSample,    ///< [out] sample index
+    UINT_32*            pPlane      ///< [out] plane index
+    ) const
+{
+    UINT_32 effectiveBpp;
+    UINT_32 effectiveSamples;
+
+    // 2xAA use the same layout as 4xAA
+    if (numSamples == 2)
+    {
+        numSamples = 4;
+    }
+
+    //
+    // Compute the number of planes.
+    //
+    if (!resolved)
+    {
+        effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);
+        effectiveBpp  = numSamples;
+
+        ComputeSurfaceCoordFromAddrMacroTiled(addr,
+                                              bitPosition,
+                                              effectiveBpp,
+                                              pitch,
+                                              height,
+                                              effectiveSamples,
+                                              tileMode,
+                                              0, // No tileBase
+                                              0, // No compBits
+                                              ADDR_NON_DISPLAYABLE,
+                                              ignoreSE,
+                                              FALSE,
+                                              pipeSwizzle,
+                                              bankSwizzle,
+                                              pTileInfo,
+                                              pX,
+                                              pY,
+                                              pSlice,
+                                              pPlane);
+
+        if (pSample)
+        {
+            *pSample = bitPosition % numSamples;
+        }
+    }
+    else
+    {
+        effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
+        effectiveSamples = 1;
+
+        ComputeSurfaceCoordFromAddrMacroTiled(addr,
+                                              bitPosition,
+                                              effectiveBpp,
+                                              pitch,
+                                              height,
+                                              effectiveSamples,
+                                              tileMode,
+                                              0, // No tileBase
+                                              0, // No compBits
+                                              ADDR_NON_DISPLAYABLE,
+                                              ignoreSE,
+                                              TRUE,
+                                              pipeSwizzle,
+                                              bankSwizzle,
+                                              pTileInfo,
+                                              pX,
+                                              pY,
+                                              pSlice,
+                                              pSample);
+    }
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::DispatchComputeFmaskCoordFromAddr
+*
+*   @brief
+*       Compute (x,y,slice,sample,plane) coordinates from
+*       fmask address
+*   @return
+*       N/A
+*
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::DispatchComputeFmaskCoordFromAddr(
+    const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT*   pIn,    ///< [in] input structure
+    ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT*        pOut    ///< [out] output structure
+    ) const
+{
+    UINT_64             addr              = pIn->addr;
+    UINT_32             bitPosition       = pIn->bitPosition;
+    UINT_32             pitch             = pIn->pitch;
+    UINT_32             height            = pIn->height;
+    UINT_32             numSamples        = pIn->numSamples;
+    AddrTileMode        tileMode          = pIn->tileMode;
+    BOOL_32             ignoreSE          = pIn->ignoreSE;
+    ADDR_TILEINFO*      pTileInfo         = pIn->pTileInfo;
+    BOOL_32             resolved          = pIn->resolved;
+
+    UINT_32*            pX      = &pOut->x;
+    UINT_32*            pY      = &pOut->y;
+    UINT_32*            pSlice  = &pOut->slice;
+    UINT_32*            pSample = &pOut->sample;
+    UINT_32*            pPlane  = &pOut->plane;
+
+    switch (tileMode)
+    {
+        case ADDR_TM_1D_TILED_THIN1:
+            ComputeFmaskCoordFromAddrMicroTiled(addr,
+                                                bitPosition,
+                                                pitch,
+                                                height,
+                                                numSamples,
+                                                tileMode,
+                                                resolved,
+                                                pX,
+                                                pY,
+                                                pSlice,
+                                                pSample,
+                                                pPlane);
+            break;
+        case ADDR_TM_2D_TILED_THIN1://fall through
+        case ADDR_TM_3D_TILED_THIN1:
+            UINT_32 pipeSwizzle;
+            UINT_32 bankSwizzle;
+
+            if (m_configFlags.useCombinedSwizzle)
+            {
+                ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
+                                       &bankSwizzle, &pipeSwizzle);
+            }
+            else
+            {
+                pipeSwizzle = pIn->pipeSwizzle;
+                bankSwizzle = pIn->bankSwizzle;
+            }
+
+            ComputeFmaskCoordFromAddrMacroTiled(addr,
+                                                bitPosition,
+                                                pitch,
+                                                height,
+                                                numSamples,
+                                                tileMode,
+                                                pipeSwizzle,
+                                                bankSwizzle,
+                                                ignoreSE,
+                                                pTileInfo,
+                                                resolved,
+                                                pX,
+                                                pY,
+                                                pSlice,
+                                                pSample,
+                                                pPlane);
+            break;
+        default:
+            ADDR_ASSERT_ALWAYS();
+            break;
+
+    }
+}
+#endif
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeFmaskNumPlanesFromNumSamples
+*
+*   @brief
+*       Compute fmask number of planes from number of samples
+*
+*   @return
+*       Number of planes
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputeFmaskNumPlanesFromNumSamples(
+    UINT_32 numSamples)     ///< [in] number of samples
+{
+    UINT_32 numPlanes;
+
+    //
+    // FMASK is stored such that each micro tile is composed of elements containing N bits, where
+    // N is the number of samples.  There is a micro tile for each bit in the FMASK address, and
+    // micro tiles for each address bit, sometimes referred to as a plane, are stored sequentially.
+    // The FMASK for a 2-sample surface looks like a general surface with 2 bits per element.
+    // The FMASK for a 4-sample surface looks like a general surface with 4 bits per element and
+    // 2 samples.  The FMASK for an 8-sample surface looks like a general surface with 8 bits per
+    // element and 4 samples.  R6xx and R7xx only stored 3 planes for 8-sample FMASK surfaces.
+    // This was changed for R8xx to simplify the logic in the CB.
+    //
+    switch (numSamples)
+    {
+        case 2:
+            numPlanes = 1;
+            break;
+        case 4:
+            numPlanes = 2;
+            break;
+        case 8:
+            numPlanes = 4;
+            break;
+        default:
+            ADDR_UNHANDLED_CASE();
+            numPlanes = 0;
+            break;
+    }
+    return numPlanes;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::ComputeFmaskResolvedBppFromNumSamples
+*
+*   @brief
+*       Compute resolved fmask effective bpp based on number of samples
+*
+*   @return
+*       bpp
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputeFmaskResolvedBppFromNumSamples(
+    UINT_32 numSamples)     ///< number of samples
+{
+    UINT_32 bpp;
+
+    //
+    // Resolved FMASK surfaces are generated yBit the CB and read yBit the texture unit
+    // so that the texture unit can read compressed multi-sample color data.
+    // These surfaces store each index value packed per element.
+    // Each element contains at least num_samples * log2(num_samples) bits.
+    // Resolved FMASK surfaces are addressed as follows:
+    // 2-sample Addressed similarly to a color surface with 8 bits per element and 1 sample.
+    // 4-sample Addressed similarly to a color surface with 8 bits per element and 1 sample.
+    // 8-sample Addressed similarly to a color surface with 32 bits per element and 1 sample.
+
+    switch (numSamples)
+    {
+        case 2:
+            bpp = 8;
+            break;
+        case 4:
+            bpp = 8;
+            break;
+        case 8:
+            bpp = 32;
+            break;
+        default:
+            ADDR_UNHANDLED_CASE();
+            bpp = 0;
+            break;
+    }
+    return bpp;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::IsTileInfoAllZero
+*
+*   @brief
+*       Return TRUE if all field are zero
+*   @note
+*       Since NULL input is consider to be all zero
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::IsTileInfoAllZero(
+    ADDR_TILEINFO* pTileInfo)
+{
+    BOOL_32 allZero = TRUE;
+
+    if (pTileInfo)
+    {
+        if ((pTileInfo->banks            != 0)  ||
+            (pTileInfo->bankWidth        != 0)  ||
+            (pTileInfo->bankHeight       != 0)  ||
+            (pTileInfo->macroAspectRatio != 0)  ||
+            (pTileInfo->tileSplitBytes   != 0)  ||
+            (pTileInfo->pipeConfig       != 0)
+            )
+        {
+            allZero = FALSE;
+        }
+    }
+
+    return allZero;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlTileInfoEqual
+*
+*   @brief
+*       Return TRUE if all field are equal
+*   @note
+*       Only takes care of current HWL's data
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::HwlTileInfoEqual(
+    const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand
+    const ADDR_TILEINFO* pRight ///<[in] Right compare operand
+    ) const
+{
+    BOOL_32 equal = FALSE;
+
+    if (pLeft->banks == pRight->banks           &&
+        pLeft->bankWidth == pRight->bankWidth   &&
+        pLeft->bankHeight == pRight->bankHeight &&
+        pLeft->macroAspectRatio == pRight->macroAspectRatio &&
+        pLeft->tileSplitBytes == pRight->tileSplitBytes)
+    {
+        equal = TRUE;
+    }
+
+    return equal;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlConvertTileInfoToHW
+*   @brief
+*       Entry of EgBasedAddrLib ConvertTileInfoToHW
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlConvertTileInfoToHW(
+    const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
+    ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut      ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode   = ADDR_OK;
+
+    ADDR_TILEINFO *pTileInfoIn  = pIn->pTileInfo;
+    ADDR_TILEINFO *pTileInfoOut = pOut->pTileInfo;
+
+    if ((pTileInfoIn != NULL) && (pTileInfoOut != NULL))
+    {
+        if (pIn->reverse == FALSE)
+        {
+            switch (pTileInfoIn->banks)
+            {
+                case 2:
+                    pTileInfoOut->banks = 0;
+                    break;
+                case 4:
+                    pTileInfoOut->banks = 1;
+                    break;
+                case 8:
+                    pTileInfoOut->banks = 2;
+                    break;
+                case 16:
+                    pTileInfoOut->banks = 3;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->banks = 0;
+                    break;
+            }
+
+            switch (pTileInfoIn->bankWidth)
+            {
+                case 1:
+                    pTileInfoOut->bankWidth = 0;
+                    break;
+                case 2:
+                    pTileInfoOut->bankWidth = 1;
+                    break;
+                case 4:
+                    pTileInfoOut->bankWidth = 2;
+                    break;
+                case 8:
+                    pTileInfoOut->bankWidth = 3;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->bankWidth = 0;
+                    break;
+            }
+
+            switch (pTileInfoIn->bankHeight)
+            {
+                case 1:
+                    pTileInfoOut->bankHeight = 0;
+                    break;
+                case 2:
+                    pTileInfoOut->bankHeight = 1;
+                    break;
+                case 4:
+                    pTileInfoOut->bankHeight = 2;
+                    break;
+                case 8:
+                    pTileInfoOut->bankHeight = 3;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->bankHeight = 0;
+                    break;
+            }
+
+            switch (pTileInfoIn->macroAspectRatio)
+            {
+                case 1:
+                    pTileInfoOut->macroAspectRatio = 0;
+                    break;
+                case 2:
+                    pTileInfoOut->macroAspectRatio = 1;
+                    break;
+                case 4:
+                    pTileInfoOut->macroAspectRatio = 2;
+                    break;
+                case 8:
+                    pTileInfoOut->macroAspectRatio = 3;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->macroAspectRatio = 0;
+                    break;
+            }
+
+            switch (pTileInfoIn->tileSplitBytes)
+            {
+                case 64:
+                    pTileInfoOut->tileSplitBytes = 0;
+                    break;
+                case 128:
+                    pTileInfoOut->tileSplitBytes = 1;
+                    break;
+                case 256:
+                    pTileInfoOut->tileSplitBytes = 2;
+                    break;
+                case 512:
+                    pTileInfoOut->tileSplitBytes = 3;
+                    break;
+                case 1024:
+                    pTileInfoOut->tileSplitBytes = 4;
+                    break;
+                case 2048:
+                    pTileInfoOut->tileSplitBytes = 5;
+                    break;
+                case 4096:
+                    pTileInfoOut->tileSplitBytes = 6;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->tileSplitBytes = 0;
+                    break;
+            }
+        }
+        else
+        {
+            switch (pTileInfoIn->banks)
+            {
+                case 0:
+                    pTileInfoOut->banks = 2;
+                    break;
+                case 1:
+                    pTileInfoOut->banks = 4;
+                    break;
+                case 2:
+                    pTileInfoOut->banks = 8;
+                    break;
+                case 3:
+                    pTileInfoOut->banks = 16;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->banks = 2;
+                    break;
+            }
+
+            switch (pTileInfoIn->bankWidth)
+            {
+                case 0:
+                    pTileInfoOut->bankWidth = 1;
+                    break;
+                case 1:
+                    pTileInfoOut->bankWidth = 2;
+                    break;
+                case 2:
+                    pTileInfoOut->bankWidth = 4;
+                    break;
+                case 3:
+                    pTileInfoOut->bankWidth = 8;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->bankWidth = 1;
+                    break;
+            }
+
+            switch (pTileInfoIn->bankHeight)
+            {
+                case 0:
+                    pTileInfoOut->bankHeight = 1;
+                    break;
+                case 1:
+                    pTileInfoOut->bankHeight = 2;
+                    break;
+                case 2:
+                    pTileInfoOut->bankHeight = 4;
+                    break;
+                case 3:
+                    pTileInfoOut->bankHeight = 8;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->bankHeight = 1;
+                    break;
+            }
+
+            switch (pTileInfoIn->macroAspectRatio)
+            {
+                case 0:
+                    pTileInfoOut->macroAspectRatio = 1;
+                    break;
+                case 1:
+                    pTileInfoOut->macroAspectRatio = 2;
+                    break;
+                case 2:
+                    pTileInfoOut->macroAspectRatio = 4;
+                    break;
+                case 3:
+                    pTileInfoOut->macroAspectRatio = 8;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->macroAspectRatio = 1;
+                    break;
+            }
+
+            switch (pTileInfoIn->tileSplitBytes)
+            {
+                case 0:
+                    pTileInfoOut->tileSplitBytes = 64;
+                    break;
+                case 1:
+                    pTileInfoOut->tileSplitBytes = 128;
+                    break;
+                case 2:
+                    pTileInfoOut->tileSplitBytes = 256;
+                    break;
+                case 3:
+                    pTileInfoOut->tileSplitBytes = 512;
+                    break;
+                case 4:
+                    pTileInfoOut->tileSplitBytes = 1024;
+                    break;
+                case 5:
+                    pTileInfoOut->tileSplitBytes = 2048;
+                    break;
+                case 6:
+                    pTileInfoOut->tileSplitBytes = 4096;
+                    break;
+                default:
+                    ADDR_ASSERT_ALWAYS();
+                    retCode = ADDR_INVALIDPARAMS;
+                    pTileInfoOut->tileSplitBytes = 64;
+                    break;
+            }
+        }
+
+        if (pTileInfoIn != pTileInfoOut)
+        {
+            pTileInfoOut->pipeConfig = pTileInfoIn->pipeConfig;
+        }
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        retCode = ADDR_INVALIDPARAMS;
+    }
+
+    return retCode;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlComputeSurfaceInfo
+*   @brief
+*       Entry of EgBasedAddrLib ComputeSurfaceInfo
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeSurfaceInfo(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    if (pIn->numSamples < pIn->numFrags)
+    {
+        retCode = ADDR_INVALIDPARAMS;
+    }
+
+    ADDR_TILEINFO tileInfo = {0};
+
+    if (retCode == ADDR_OK)
+    {
+        // Uses internal tile info if pOut does not have a valid pTileInfo
+        if (pOut->pTileInfo == NULL)
+        {
+            pOut->pTileInfo = &tileInfo;
+        }
+
+        if (!DispatchComputeSurfaceInfo(pIn, pOut))
+        {
+            retCode = ADDR_INVALIDPARAMS;
+        }
+
+        // Returns an index
+        pOut->tileIndex = HwlPostCheckTileIndex(pOut->pTileInfo,
+                                                pOut->tileMode,
+                                                pOut->tileType,
+                                                pOut->tileIndex);
+
+        if (IsMacroTiled(pOut->tileMode) && (pOut->macroModeIndex == TileIndexInvalid))
+        {
+            pOut->macroModeIndex = HwlComputeMacroModeIndex(pOut->tileIndex,
+                                                            pIn->flags,
+                                                            pIn->bpp,
+                                                            pIn->numSamples,
+                                                            pOut->pTileInfo);
+        }
+
+        // Resets pTileInfo to NULL if the internal tile info is used
+        if (pOut->pTileInfo == &tileInfo)
+        {
+#if DEBUG
+            // Client does not pass in a valid pTileInfo
+            if (IsMacroTiled(pOut->tileMode))
+            {
+                // If a valid index is returned, then no pTileInfo is okay
+                ADDR_ASSERT(!m_configFlags.useTileIndex || pOut->tileIndex != TileIndexInvalid);
+
+                if (!IsTileInfoAllZero(pIn->pTileInfo))
+                {
+                    // The initial value of pIn->pTileInfo is copied to tileInfo
+                    // We do not expect any of these value to be changed nor any 0 of inputs
+                    ADDR_ASSERT(tileInfo.banks == pIn->pTileInfo->banks);
+                    ADDR_ASSERT(tileInfo.bankWidth == pIn->pTileInfo->bankWidth);
+                    ADDR_ASSERT(tileInfo.bankHeight == pIn->pTileInfo->bankHeight);
+                    ADDR_ASSERT(tileInfo.macroAspectRatio == pIn->pTileInfo->macroAspectRatio);
+                    ADDR_ASSERT(tileInfo.tileSplitBytes == pIn->pTileInfo->tileSplitBytes);
+                }
+            }
+#endif
+            pOut->pTileInfo = NULL;
+        }
+    }
+
+    return retCode;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlComputeSurfaceAddrFromCoord
+*   @brief
+*       Entry of EgBasedAddrLib ComputeSurfaceAddrFromCoord
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeSurfaceAddrFromCoord(
+    const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    if (
+#if !ALT_TEST // Overflow test needs this out-of-boundary coord
+        (pIn->x > pIn->pitch)   ||
+        (pIn->y > pIn->height)  ||
+#endif
+        (pIn->numSamples > m_maxSamples))
+    {
+        retCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        pOut->addr = DispatchComputeSurfaceAddrFromCoord(pIn, pOut);
+    }
+
+    return retCode;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlComputeSurfaceCoordFromAddr
+*   @brief
+*       Entry of EgBasedAddrLib ComputeSurfaceCoordFromAddr
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeSurfaceCoordFromAddr(
+    const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT*      pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    if ((pIn->bitPosition >= 8) ||
+        (pIn->numSamples > m_maxSamples))
+    {
+        retCode = ADDR_INVALIDPARAMS;
+    }
+    else
+    {
+        DispatchComputeSurfaceCoordFromAddr(pIn, pOut);
+    }
+    return retCode;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlComputeSliceTileSwizzle
+*   @brief
+*       Entry of EgBasedAddrLib ComputeSurfaceCoordFromAddr
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeSliceTileSwizzle(
+    const ADDR_COMPUTE_SLICESWIZZLE_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SLICESWIZZLE_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+    if (pIn->pTileInfo && (pIn->pTileInfo->banks > 0))
+    {
+
+        pOut->tileSwizzle = ComputeSliceTileSwizzle(pIn->tileMode,
+                                                    pIn->baseSwizzle,
+                                                    pIn->slice,
+                                                    pIn->baseAddr,
+                                                    pIn->pTileInfo);
+    }
+    else
+    {
+        retCode = ADDR_INVALIDPARAMS;
+    }
+
+    return retCode;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlComputeHtileBpp
+*
+*   @brief
+*       Compute htile bpp
+*
+*   @return
+*       Htile bpp
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::HwlComputeHtileBpp(
+    BOOL_32 isWidth8,   ///< [in] TRUE if block width is 8
+    BOOL_32 isHeight8   ///< [in] TRUE if block height is 8
+    ) const
+{
+    // only support 8x8 mode
+    ADDR_ASSERT(isWidth8 && isHeight8);
+    return 32;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlComputeHtileBaseAlign
+*
+*   @brief
+*       Compute htile base alignment
+*
+*   @return
+*       Htile base alignment
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::HwlComputeHtileBaseAlign(
+    BOOL_32         isTcCompatible, ///< [in] if TC compatible
+    BOOL_32         isLinear,       ///< [in] if it is linear mode
+    ADDR_TILEINFO*  pTileInfo       ///< [in] Tile info
+    ) const
+{
+    UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo);
+
+    if (isTcCompatible)
+    {
+        ADDR_ASSERT(pTileInfo != NULL);
+        if (pTileInfo)
+        {
+            baseAlign *= pTileInfo->banks;
+        }
+    }
+
+    return baseAlign;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlGetPitchAlignmentMicroTiled
+*
+*   @brief
+*       Compute 1D tiled surface pitch alignment, calculation results are returned through
+*       output parameters.
+*
+*   @return
+*       pitch alignment
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::HwlGetPitchAlignmentMicroTiled(
+    AddrTileMode        tileMode,          ///< [in] tile mode
+    UINT_32             bpp,               ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
+    UINT_32             numSamples         ///< [in] number of samples
+    ) const
+{
+    UINT_32 pitchAlign;
+
+    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+    UINT_32 pixelsPerMicroTile;
+    UINT_32 pixelsPerPipeInterleave;
+    UINT_32 microTilesPerPipeInterleave;
+
+    //
+    // Special workaround for depth/stencil buffer, use 8 bpp to meet larger requirement for
+    // stencil buffer since pitch alignment is related to bpp.
+    // For a depth only buffer do not set this.
+    //
+    // Note: this actually does not work for mipmap but mipmap depth texture is not really
+    // sampled with mipmap.
+    //
+    if (flags.depth && !flags.noStencil)
+    {
+        bpp = 8;
+    }
+
+    pixelsPerMicroTile = MicroTilePixels * microTileThickness;
+    pixelsPerPipeInterleave = BYTES_TO_BITS(m_pipeInterleaveBytes) / (bpp * numSamples);
+    microTilesPerPipeInterleave = pixelsPerPipeInterleave / pixelsPerMicroTile;
+
+    pitchAlign = Max(MicroTileWidth, microTilesPerPipeInterleave * MicroTileWidth);
+
+    return pitchAlign;
+}
+
+/**
+***************************************************************************************************
+*   EgBasedAddrLib::HwlGetSizeAdjustmentMicroTiled
+*
+*   @brief
+*       Adjust 1D tiled surface pitch and slice size
+*
+*   @return
+*       Logical slice size in bytes
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::HwlGetSizeAdjustmentMicroTiled(
+    UINT_32             thickness,      ///< [in] thickness
+    UINT_32             bpp,            ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,          ///< [in] surface flags
+    UINT_32             numSamples,     ///< [in] number of samples
+    UINT_32             baseAlign,      ///< [in] base alignment
+    UINT_32             pitchAlign,     ///< [in] pitch alignment
+    UINT_32*            pPitch,         ///< [in/out] pointer to pitch
+    UINT_32*            pHeight         ///< [in/out] pointer to height
+    ) const
+{
+    UINT_64 logicalSliceSize;
+    UINT_64 physicalSliceSize;
+
+    UINT_32 pitch   = *pPitch;
+    UINT_32 height  = *pHeight;
+
+    // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1)
+    logicalSliceSize = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
+
+    // Physical slice: multiplied by thickness
+    physicalSliceSize =  logicalSliceSize * thickness;
+
+    //
+    // R800 will always pad physical slice size to baseAlign which is pipe_interleave_bytes
+    //
+    ADDR_ASSERT((physicalSliceSize % baseAlign) == 0)
+
+    return logicalSliceSize;
+}
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,411 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  egbaddrlib.h
+* @brief Contains the EgBasedAddrLib class definition.
+***************************************************************************************************
+*/
+
+#ifndef __EG_BASED_ADDR_LIB_H__
+#define __EG_BASED_ADDR_LIB_H__
+
+#include "addrlib.h"
+
+
+/// Structures for functions
+struct CoordFromBankPipe
+{
+    UINT_32 xBits : 3;
+    UINT_32 yBits : 4;
+
+    UINT_32 xBit3 : 1;
+    UINT_32 xBit4 : 1;
+    UINT_32 xBit5 : 1;
+    UINT_32 yBit3 : 1;
+    UINT_32 yBit4 : 1;
+    UINT_32 yBit5 : 1;
+    UINT_32 yBit6 : 1;
+};
+
+/**
+***************************************************************************************************
+* @brief This class is the Evergreen based address library
+* @note  Abstract class
+***************************************************************************************************
+*/
+class EgBasedAddrLib : public AddrLib
+{
+protected:
+    EgBasedAddrLib(const AddrClient* pClient);
+    virtual ~EgBasedAddrLib();
+
+public:
+
+    /// Surface info functions
+
+    // NOTE: DispatchComputeSurfaceInfo using TileInfo takes both an input and an output.
+    //       On input:
+    //       One or more fields may be 0 to be calculated/defaulted - pre-SI h/w.
+    //       H/W using tile mode index only accepts none or all 0's - SI and newer h/w.
+    //       It then returns the actual tiling configuration used.
+    //       Other methods' TileInfo must be valid on entry
+    BOOL_32 DispatchComputeSurfaceInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    ADDR_E_RETURNCODE DispatchComputeFmaskInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+
+protected:
+    // Hwl interface
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord(
+        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr(
+        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle(
+        const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
+        ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle(
+        const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
+        ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle(
+        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO*  pTileInfo,
+        UINT_64 baseAddr, UINT_32* pTileSwizzle) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle(
+        const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
+        ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
+        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
+        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
+
+    virtual UINT_32 HwlComputeHtileBpp(
+        BOOL_32 isWidth8, BOOL_32 isHeight8) const;
+
+    virtual UINT_32 HwlComputeHtileBaseAlign(
+        BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord(
+        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr(
+        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
+
+    virtual BOOL_32 HwlDegradeBaseLevel(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
+
+    virtual UINT_32 HwlComputeQbStereoRightSwizzle(
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo) const;
+
+    virtual VOID HwlComputePixelCoordFromOffset(
+        UINT_32 offset, UINT_32 bpp, UINT_32 numSamples,
+        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
+        AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const;
+
+    /// Return Cmask block max
+    virtual BOOL_32 HwlGetMaxCmaskBlockMax() const
+    {
+        return 16383; // 14 bits
+    }
+
+    // Sub-hwl interface
+    /// Pure virtual function to setup tile info (indices) if client requests to do so
+    virtual VOID HwlSetupTileInfo(
+        AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
+        AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
+
+    /// Pure virtual function to get pitch alignment for linear modes
+    virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const = 0;
+
+    /// Pure virtual function to get size adjustment for linear modes
+    virtual UINT_64 HwlGetSizeAdjustmentLinear(
+        AddrTileMode tileMode,
+        UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign,
+        UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const = 0;
+
+    virtual UINT_32 HwlGetPitchAlignmentMicroTiled(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const;
+
+    virtual UINT_64 HwlGetSizeAdjustmentMicroTiled(
+        UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+        UINT_32 baseAlign, UINT_32 pitchAlign,
+        UINT_32 *pPitch, UINT_32 *pHeight) const;
+
+        /// Pure virtual function to do extra sanity check
+    virtual BOOL_32 HwlSanityCheckMacroTiled(
+        ADDR_TILEINFO* pTileInfo) const = 0;
+
+    /// Pure virtual function to check current level to be the last macro tiled one
+    virtual VOID HwlCheckLastMacroTiledLvl(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
+
+    /// Adjusts bank before bank is modified by rotation
+    virtual UINT_32 HwlPreAdjustBank(
+        UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO*  pTileInfo) const = 0;
+
+    virtual VOID HwlComputeSurfaceCoord2DFromBankPipe(
+        AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice,
+        UINT_32 bank, UINT_32 pipe,
+        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
+        BOOL_32 ignoreSE,
+        ADDR_TILEINFO* pTileInfo) const = 0;
+
+    virtual BOOL_32 HwlTileInfoEqual(
+        const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const;
+
+    virtual AddrTileMode HwlDegradeThickTileMode(
+        AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
+
+    virtual INT_32 HwlPostCheckTileIndex(
+        const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
+        INT curIndex = TileIndexInvalid) const
+    {
+        return TileIndexInvalid;
+    }
+
+    virtual VOID HwlFmaskPreThunkSurfInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
+        const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const
+    {
+    }
+
+    virtual VOID HwlFmaskPostThunkSurfInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const
+    {
+    }
+
+    /// Virtual function to check if the height needs extra padding
+    /// for stereo right eye offset, to avoid bank pipe swizzle
+    virtual BOOL_32 HwlStereoCheckRightOffsetPadding() const
+    {
+        return FALSE;
+    }
+
+    virtual BOOL_32 HwlReduceBankWidthHeight(
+        UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+        UINT_32 bankHeightAlign, UINT_32 pipes,
+        ADDR_TILEINFO* pTileInfo) const;
+
+    // Protected non-virtual functions
+
+    /// Mip level functions
+    AddrTileMode ComputeSurfaceMipLevelTileMode(
+        AddrTileMode baseTileMode, UINT_32 bpp,
+        UINT_32 pitch, UINT_32 height, UINT_32 numSlices, UINT_32 numSamples,
+        UINT_32 pitchAlign, UINT_32 heightAlign,
+        ADDR_TILEINFO* pTileInfo) const;
+
+    /// Swizzle functions
+    VOID ExtractBankPipeSwizzle(
+        UINT_32 base256b, ADDR_TILEINFO* pTileInfo,
+        UINT_32* pBankSwizzle, UINT_32* pPipeSwizzle) const;
+
+    UINT_32 GetBankPipeSwizzle(
+        UINT_32 bankSwizzle, UINT_32 pipeSwizzle,
+        UINT_64 baseAddr, ADDR_TILEINFO*  pTileInfo) const;
+
+    UINT_32 ComputeSliceTileSwizzle(
+        AddrTileMode tileMode, UINT_32 baseSwizzle, UINT_32 slice, UINT_64 baseAddr,
+        ADDR_TILEINFO* pTileInfo) const;
+
+    /// Addressing functions
+    UINT_32 ComputeBankFromCoord(
+        UINT_32 x, UINT_32 y, UINT_32 slice,
+        AddrTileMode tileMode, UINT_32 bankSwizzle, UINT_32 tileSpitSlice,
+        ADDR_TILEINFO* pTileInfo) const;
+
+    UINT_32 ComputeBankFromAddr(
+        UINT_64 addr, UINT_32 numBanks, UINT_32 numPipes) const;
+
+    UINT_32 ComputePipeRotation(
+        AddrTileMode tileMode, UINT_32 numPipes) const;
+
+    UINT_32 ComputeBankRotation(
+        AddrTileMode tileMode, UINT_32 numBanks,
+        UINT_32 numPipes) const;
+
+    VOID ComputeSurfaceCoord2DFromBankPipe(
+        AddrTileMode tileMode, UINT_32 x, UINT_32 y, UINT_32 slice,
+        UINT_32 bank, UINT_32 pipe,
+        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
+        ADDR_TILEINFO* pTileInfo,
+        CoordFromBankPipe *pOutput) const;
+
+    /// Htile/Cmask functions
+    UINT_64 ComputeHtileBytes(
+        UINT_32 pitch, UINT_32 height, UINT_32 bpp,
+        BOOL_32 isLinear, UINT_32 numSlices, UINT_64* sliceBytes, UINT_32 baseAlign) const;
+
+    // Static functions
+    static BOOL_32 IsTileInfoAllZero(ADDR_TILEINFO* pTileInfo);
+    static UINT_32 ComputeFmaskNumPlanesFromNumSamples(UINT_32 numSamples);
+    static UINT_32 ComputeFmaskResolvedBppFromNumSamples(UINT_32 numSamples);
+
+private:
+
+    BOOL_32 ComputeSurfaceInfoLinear(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
+        UINT_32 padDims) const;
+
+    BOOL_32 ComputeSurfaceInfoMicroTiled(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
+        UINT_32 padDims,
+        AddrTileMode expTileMode) const;
+
+    BOOL_32 ComputeSurfaceInfoMacroTiled(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
+        UINT_32 padDims,
+        AddrTileMode expTileMode) const;
+
+    BOOL_32 ComputeSurfaceAlignmentsLinear(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
+
+    BOOL_32 ComputeSurfaceAlignmentsMicroTiled(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+        UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
+
+    BOOL_32 ComputeSurfaceAlignmentsMacroTiled(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+        UINT_32 mipLevel, UINT_32 numSamples,
+        ADDR_TILEINFO* pTileInfo,
+        UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
+
+    /// Surface addressing functions
+    UINT_64 DispatchComputeSurfaceAddrFromCoord(
+        const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    VOID    DispatchComputeSurfaceCoordFromAddr(
+        const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
+
+    UINT_64 ComputeSurfaceAddrFromCoordMicroTiled(
+        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        AddrTileMode tileMode,
+        AddrTileType microTileType, BOOL_32 isDepthSampleOrder,
+        UINT_32* pBitPosition) const;
+
+    UINT_64 ComputeSurfaceAddrFromCoordMacroTiled(
+        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        AddrTileMode tileMode,
+        AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder,
+        UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
+        ADDR_TILEINFO* pTileInfo,
+        UINT_32* pBitPosition) const;
+
+    VOID    ComputeSurfaceCoordFromAddrMacroTiled(
+        UINT_64 addr, UINT_32 bitPosition,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
+        AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder,
+        UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
+        ADDR_TILEINFO* pTileInfo,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const;
+
+    /// Fmask functions
+    UINT_64 DispatchComputeFmaskAddrFromCoord(
+        const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+    VOID    DispatchComputeFmaskCoordFromAddr(
+        const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
+        ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
+
+    // FMASK related methods - private
+    UINT_64 ComputeFmaskAddrFromCoordMicroTiled(
+        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane,
+        UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode,
+        BOOL_32 resolved, UINT_32* pBitPosition) const;
+
+    VOID    ComputeFmaskCoordFromAddrMicroTiled(
+        UINT_64 addr, UINT_32 bitPosition,
+        UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        AddrTileMode tileMode, BOOL_32 resolved,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const;
+
+    VOID    ComputeFmaskCoordFromAddrMacroTiled(
+        UINT_64 addr, UINT_32 bitPosition,
+        UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode,
+        UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
+        BOOL_32 ignoreSE,
+        ADDR_TILEINFO* pTileInfo,
+        BOOL_32 resolved,
+        UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const;
+
+    UINT_64 ComputeFmaskAddrFromCoordMacroTiled(
+        UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane,
+        UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        AddrTileMode tileMode, UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
+        BOOL_32 ignoreSE,
+        ADDR_TILEINFO* pTileInfo,
+        BOOL_32 resolved,
+        UINT_32* pBitPosition) const;
+
+    /// Sanity check functions
+    BOOL_32 SanityCheckMacroTiled(
+        ADDR_TILEINFO* pTileInfo) const;
+
+protected:
+    UINT_32 m_ranks;                ///< Number of ranks - MC_ARB_RAMCFG.NOOFRANK
+    UINT_32 m_logicalBanks;         ///< Logical banks = m_banks * m_ranks if m_banks != 16
+    UINT_32 m_bankInterleave;       ///< Bank interleave, as a multiple of pipe interleave size
+};
+
+#endif
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,2818 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  siaddrlib.cpp
+* @brief Contains the implementation for the SIAddrLib class.
+***************************************************************************************************
+*/
+
+#include "siaddrlib.h"
+
+#include "si_gb_reg.h"
+
+#include "si_ci_vi_merged_enum.h"
+
+#if BRAHMA_BUILD
+#include "amdgpu_id.h"
+#else
+#include "si_id.h"
+#endif
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+*   AddrSIHwlInit
+*
+*   @brief
+*       Creates an SIAddrLib object.
+*
+*   @return
+*       Returns an SIAddrLib object pointer.
+***************************************************************************************************
+*/
+AddrLib* AddrSIHwlInit(const AddrClient* pClient)
+{
+    return SIAddrLib::CreateObj(pClient);
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::SIAddrLib
+*
+*   @brief
+*       Constructor
+*
+***************************************************************************************************
+*/
+SIAddrLib::SIAddrLib(const AddrClient* pClient) :
+    EgBasedAddrLib(pClient),
+    m_noOfEntries(0)
+{
+    m_class = SI_ADDRLIB;
+    memset(&m_settings, 0, sizeof(m_settings));
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::~SIAddrLib
+*
+*   @brief
+*       Destructor
+***************************************************************************************************
+*/
+SIAddrLib::~SIAddrLib()
+{
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlGetPipes
+*
+*   @brief
+*       Get number pipes
+*   @return
+*       num pipes
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlGetPipes(
+    const ADDR_TILEINFO* pTileInfo    ///< [in] Tile info
+    ) const
+{
+    UINT_32 numPipes;
+
+    if (pTileInfo)
+    {
+        numPipes = GetPipePerSurf(pTileInfo->pipeConfig);
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        numPipes = m_pipes; // Suppose we should still have a global pipes
+    }
+
+    return numPipes;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::GetPipePerSurf
+*   @brief
+*       get pipe num base on inputing tileinfo->pipeconfig
+*   @return
+*       pipe number
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::GetPipePerSurf(
+    AddrPipeCfg pipeConfig   ///< [in] pipe config
+    ) const
+{
+    UINT_32 numPipes = 0;
+
+    switch (pipeConfig)
+    {
+        case ADDR_PIPECFG_P2:
+            numPipes = 2;
+            break;
+        case ADDR_PIPECFG_P4_8x16:
+        case ADDR_PIPECFG_P4_16x16:
+        case ADDR_PIPECFG_P4_16x32:
+        case ADDR_PIPECFG_P4_32x32:
+            numPipes = 4;
+            break;
+        case ADDR_PIPECFG_P8_16x16_8x16:
+        case ADDR_PIPECFG_P8_16x32_8x16:
+        case ADDR_PIPECFG_P8_32x32_8x16:
+        case ADDR_PIPECFG_P8_16x32_16x16:
+        case ADDR_PIPECFG_P8_32x32_16x16:
+        case ADDR_PIPECFG_P8_32x32_16x32:
+        case ADDR_PIPECFG_P8_32x64_32x32:
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P16_32x32_8x16:
+        case ADDR_PIPECFG_P16_32x32_16x16:
+            numPipes = 16;
+            break;
+        default:
+            ADDR_ASSERT(!"Invalid pipe config");
+            numPipes = m_pipes;
+    }
+    return numPipes;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::ComputePipeFromCoord
+*
+*   @brief
+*       Compute pipe number from coordinates
+*   @return
+*       Pipe number
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::ComputePipeFromCoord(
+    UINT_32         x,              ///< [in] x coordinate
+    UINT_32         y,              ///< [in] y coordinate
+    UINT_32         slice,          ///< [in] slice index
+    AddrTileMode    tileMode,       ///< [in] tile mode
+    UINT_32         pipeSwizzle,    ///< [in] pipe swizzle
+    BOOL_32         ignoreSE,       ///< [in] TRUE if shader engines are ignored
+    ADDR_TILEINFO*  pTileInfo       ///< [in] Tile info
+    ) const
+{
+    UINT_32 pipe;
+    UINT_32 pipeBit0 = 0;
+    UINT_32 pipeBit1 = 0;
+    UINT_32 pipeBit2 = 0;
+    UINT_32 pipeBit3 = 0;
+    UINT_32 sliceRotation;
+    UINT_32 numPipes = 0;
+
+    UINT_32 tx = x / MicroTileWidth;
+    UINT_32 ty = y / MicroTileHeight;
+    UINT_32 x3 = _BIT(tx,0);
+    UINT_32 x4 = _BIT(tx,1);
+    UINT_32 x5 = _BIT(tx,2);
+    UINT_32 x6 = _BIT(tx,3);
+    UINT_32 y3 = _BIT(ty,0);
+    UINT_32 y4 = _BIT(ty,1);
+    UINT_32 y5 = _BIT(ty,2);
+    UINT_32 y6 = _BIT(ty,3);
+
+    switch (pTileInfo->pipeConfig)
+    {
+        case ADDR_PIPECFG_P2:
+            pipeBit0 = x3 ^ y3;
+            numPipes = 2;
+            break;
+        case ADDR_PIPECFG_P4_8x16:
+            pipeBit0 = x4 ^ y3;
+            pipeBit1 = x3 ^ y4;
+            numPipes = 4;
+            break;
+        case ADDR_PIPECFG_P4_16x16:
+            pipeBit0 = x3 ^ y3 ^ x4;
+            pipeBit1 = x4 ^ y4;
+            numPipes = 4;
+            break;
+        case ADDR_PIPECFG_P4_16x32:
+            pipeBit0 = x3 ^ y3 ^ x4;
+            pipeBit1 = x4 ^ y5;
+            numPipes = 4;
+            break;
+        case ADDR_PIPECFG_P4_32x32:
+            pipeBit0 = x3 ^ y3 ^ x5;
+            pipeBit1 = x5 ^ y5;
+            numPipes = 4;
+            break;
+        case ADDR_PIPECFG_P8_16x16_8x16:
+            pipeBit0 = x4 ^ y3 ^ x5;
+            pipeBit1 = x3 ^ y5;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P8_16x32_8x16:
+            pipeBit0 = x4 ^ y3 ^ x5;
+            pipeBit1 = x3 ^ y4;
+            pipeBit2 = x4 ^ y5;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P8_16x32_16x16:
+            pipeBit0 = x3 ^ y3 ^ x4;
+            pipeBit1 = x5 ^ y4;
+            pipeBit2 = x4 ^ y5;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P8_32x32_8x16:
+            pipeBit0 = x4 ^ y3 ^ x5;
+            pipeBit1 = x3 ^ y4;
+            pipeBit2 = x5 ^ y5;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x16:
+            pipeBit0 = x3 ^ y3 ^ x4;
+            pipeBit1 = x4 ^ y4;
+            pipeBit2 = x5 ^ y5;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x32:
+            pipeBit0 = x3 ^ y3 ^ x4;
+            pipeBit1 = x4 ^ y6;
+            pipeBit2 = x5 ^ y5;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P8_32x64_32x32:
+            pipeBit0 = x3 ^ y3 ^ x5;
+            pipeBit1 = x6 ^ y5;
+            pipeBit2 = x5 ^ y6;
+            numPipes = 8;
+            break;
+        case ADDR_PIPECFG_P16_32x32_8x16:
+            pipeBit0 = x4 ^ y3;
+            pipeBit1 = x3 ^ y4;
+            pipeBit2 = x5 ^ y6;
+            pipeBit3 = x6 ^ y5;
+            numPipes = 16;
+            break;
+        case ADDR_PIPECFG_P16_32x32_16x16:
+            pipeBit0 = x3 ^ y3 ^ x4;
+            pipeBit1 = x4 ^ y4;
+            pipeBit2 = x5 ^ y6;
+            pipeBit3 = x6 ^ y5;
+            numPipes = 16;
+            break;
+        default:
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+    pipe = pipeBit0 | (pipeBit1 << 1) | (pipeBit2 << 2) | (pipeBit3 << 3);
+
+    UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+    //
+    // Apply pipe rotation for the slice.
+    //
+    switch (tileMode)
+    {
+        case ADDR_TM_3D_TILED_THIN1:    //fall through thin
+        case ADDR_TM_3D_TILED_THICK:    //fall through thick
+        case ADDR_TM_3D_TILED_XTHICK:
+            sliceRotation =
+                Max(1, static_cast<INT_32>(numPipes / 2) - 1) * (slice / microTileThickness);
+            break;
+        default:
+            sliceRotation = 0;
+            break;
+    }
+    pipeSwizzle += sliceRotation;
+    pipeSwizzle &= (numPipes - 1);
+
+    pipe = pipe ^ pipeSwizzle;
+
+    return pipe;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::ComputeTileCoordFromPipeAndElemIdx
+*
+*   @brief
+*       Compute (x,y) of a tile within a macro tile from address
+*   @return
+*       Pipe number
+***************************************************************************************************
+*/
+VOID SIAddrLib::ComputeTileCoordFromPipeAndElemIdx(
+    UINT_32         elemIdx,          ///< [in] per pipe element index within a macro tile
+    UINT_32         pipe,             ///< [in] pipe index
+    AddrPipeCfg     pipeCfg,          ///< [in] pipe config
+    UINT_32         pitchInMacroTile, ///< [in] surface pitch in macro tile
+    UINT_32         x,                ///< [in] x coordinate of the (0,0) tile in a macro tile
+    UINT_32         y,                ///< [in] y coordinate of the (0,0) tile in a macro tile
+    UINT_32*        pX,               ///< [out] x coordinate
+    UINT_32*        pY                ///< [out] y coordinate
+    ) const
+{
+    UINT_32 pipebit0 = _BIT(pipe,0);
+    UINT_32 pipebit1 = _BIT(pipe,1);
+    UINT_32 pipebit2 = _BIT(pipe,2);
+    UINT_32 pipebit3 = _BIT(pipe,3);
+    UINT_32 elemIdx0 = _BIT(elemIdx,0);
+    UINT_32 elemIdx1 = _BIT(elemIdx,1);
+    UINT_32 elemIdx2 = _BIT(elemIdx,2);
+    UINT_32 x3 = 0;
+    UINT_32 x4 = 0;
+    UINT_32 x5 = 0;
+    UINT_32 x6 = 0;
+    UINT_32 y3 = 0;
+    UINT_32 y4 = 0;
+    UINT_32 y5 = 0;
+    UINT_32 y6 = 0;
+
+    switch(pipeCfg)
+    {
+        case ADDR_PIPECFG_P2:
+            x4 = elemIdx2;
+            y4 = elemIdx1 ^ x4;
+            y3 = elemIdx0 ^ x4;
+            x3 = pipebit0 ^ y3;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P4_8x16:
+            x4 = elemIdx1;
+            y4 = elemIdx0 ^ x4;
+            x3 = pipebit1 ^ y4;
+            y3 = pipebit0 ^ x4;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P4_16x16:
+            x4 = elemIdx1;
+            y3 = elemIdx0 ^ x4;
+            y4 = pipebit1 ^ x4;
+            x3 = pipebit0 ^ y3 ^ x4;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P4_16x32:
+            x3 = elemIdx0 ^ pipebit0;
+            y5 = _BIT(y,5);
+            x4 = pipebit1 ^ y5;
+            y3 = pipebit0 ^ x3 ^ x4;
+            y4 = elemIdx1 ^ x4;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P4_32x32:
+            x4 = elemIdx2;
+            y3 = elemIdx0 ^ x4;
+            y4 = elemIdx1 ^ x4;
+            if((pitchInMacroTile % 2) == 0)
+            {   //even
+                y5 = _BIT(y,5);
+                x5 = pipebit1 ^ y5;
+                x3 = pipebit0 ^ y3 ^ x5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            else
+            {   //odd
+                x5 = _BIT(x,5);
+                x3 = pipebit0 ^ y3 ^ x5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(2, x4, x3);
+            }
+            break;
+        case ADDR_PIPECFG_P8_16x16_8x16:
+            x4 = elemIdx0;
+            y5 = _BIT(y,5);
+            x5 = _BIT(x,5);
+            x3 = pipebit1 ^ y5;
+            y4 = pipebit2 ^ x4;
+            y3 = pipebit0 ^ x5 ^ x4;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P8_16x32_8x16:
+            x3 = elemIdx0;
+            y4 = pipebit1 ^ x3;
+            y5 = _BIT(y,5);
+            x5 = _BIT(x,5);
+            x4 = pipebit2 ^ y5;
+            y3 = pipebit0 ^ x4 ^ x5;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P8_32x32_8x16:
+            x4 = elemIdx1;
+            y4 = elemIdx0 ^ x4;
+            x3 = pipebit1 ^ y4;
+            if((pitchInMacroTile % 2) == 0)
+            {  //even
+                y5 = _BIT(y,5);
+                x5 = _BIT(x,5);
+                x5 = pipebit2 ^ y5;
+                y3 = pipebit0 ^ x4 ^ x5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            else
+            {  //odd
+                x5 = _BIT(x,5);
+                y3 = pipebit0 ^ x4 ^ x5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(2, x4, x3);
+            }
+            break;
+        case ADDR_PIPECFG_P8_16x32_16x16:
+            x3 = elemIdx0;
+            x5 = _BIT(x,5);
+            y5 = _BIT(y,5);
+            x4 = pipebit2 ^ y5;
+            y4 = pipebit1 ^ x5;
+            y3 = pipebit0 ^ x3 ^ x4;
+            *pY = Bits2Number(2, y4, y3);
+            *pX = Bits2Number(2, x4, x3);
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x16:
+            x4 = elemIdx1;
+            y3 = elemIdx0 ^ x4;
+            x3 = y3^x4^pipebit0;
+            y4 = pipebit1 ^ x4;
+            if((pitchInMacroTile % 2) == 0)
+            {   //even
+                y5 = _BIT(y,5);
+                x5 = pipebit2 ^ y5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            else
+            {   //odd
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(2, x4, x3);
+            }
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x32:
+            if((pitchInMacroTile % 2) == 0)
+            {   //even
+                y5 = _BIT(y,5);
+                y6 = _BIT(y,6);
+                x4 = pipebit1 ^ y6;
+                y3 = elemIdx0 ^ x4;
+                y4 = elemIdx1 ^ x4;
+                x3 = pipebit0 ^ y3 ^ x4;
+                x5 = pipebit2 ^ y5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            else
+            {   //odd
+                y6 = _BIT(y,6);
+                x4 = pipebit1 ^ y6;
+                y3 = elemIdx0 ^ x4;
+                y4 = elemIdx1 ^ x4;
+                x3 = pipebit0 ^ y3 ^ x4;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(2, x4, x3);
+            }
+            break;
+        case ADDR_PIPECFG_P8_32x64_32x32:
+            x4 = elemIdx2;
+            y3 = elemIdx0 ^ x4;
+            y4 = elemIdx1 ^ x4;
+            if((pitchInMacroTile % 4) == 0)
+            {   //multiple of 4
+                y5 = _BIT(y,5);
+                y6 = _BIT(y,6);
+                x5 = pipebit2 ^ y6;
+                x6 = pipebit1 ^ y5;
+                x3 = pipebit0 ^ y3 ^ x5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(4, x6, x5, x4, x3);
+            }
+            else
+            {
+                y6 = _BIT(y,6);
+                x5 = pipebit2 ^ y6;
+                x3 = pipebit0 ^ y3 ^ x5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            break;
+        case ADDR_PIPECFG_P16_32x32_8x16:
+            x4 = elemIdx1;
+            y4 = elemIdx0 ^ x4;
+            y3 = pipebit0 ^ x4;
+            x3 = pipebit1 ^ y4;
+            if((pitchInMacroTile % 4) == 0)
+            {   //multiple of 4
+                y5 = _BIT(y,5);
+                y6 = _BIT(y,6);
+                x5 = pipebit2 ^ y6;
+                x6 = pipebit3 ^ y5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(4, x6, x5,x4, x3);
+            }
+            else
+            {
+                y6 = _BIT(y,6);
+                x5 = pipebit2 ^ y6;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            break;
+        case ADDR_PIPECFG_P16_32x32_16x16:
+            x4 = elemIdx1;
+            y3 = elemIdx0 ^ x4;
+            y4 = pipebit1 ^ x4;
+            x3 = pipebit0 ^ y3 ^ x4;
+            if((pitchInMacroTile % 4) == 0)
+            {   //multiple of 4
+                y5 = _BIT(y,5);
+                y6 = _BIT(y,6);
+                x5 = pipebit2 ^ y6;
+                x6 = pipebit3 ^ y5;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(4, x6, x5, x4, x3);
+            }
+            else
+            {
+                y6 = _BIT(y,6);
+                x5 = pipebit2 ^ y6;
+                *pY = Bits2Number(2, y4, y3);
+                *pX = Bits2Number(3, x5, x4, x3);
+            }
+            break;
+        default:
+            ADDR_UNHANDLED_CASE();
+    }
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::TileCoordToMaskElementIndex
+*
+*   @brief
+*       Compute element index from coordinates in tiles
+*   @return
+*       Element index
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::TileCoordToMaskElementIndex(
+    UINT_32         tx,                 ///< [in] x coord, in Tiles
+    UINT_32         ty,                 ///< [in] y coord, in Tiles
+    AddrPipeCfg     pipeConfig,         ///< [in] pipe config
+    UINT_32*        macroShift,         ///< [out] macro shift
+    UINT_32*        elemIdxBits         ///< [out] tile offset bits
+    ) const
+{
+    UINT_32 elemIdx = 0;
+    UINT_32 elemIdx0, elemIdx1, elemIdx2;
+    UINT_32 tx0, tx1;
+    UINT_32 ty0, ty1;
+
+    tx0 = _BIT(tx,0);
+    tx1 = _BIT(tx,1);
+    ty0 = _BIT(ty,0);
+    ty1 = _BIT(ty,1);
+
+    switch(pipeConfig)
+    {
+        case ADDR_PIPECFG_P2:
+            *macroShift = 3;
+            *elemIdxBits =3;
+            elemIdx2 = tx1;
+            elemIdx1 = tx1 ^ ty1;
+            elemIdx0 = tx1 ^ ty0;
+            elemIdx = Bits2Number(3,elemIdx2,elemIdx1,elemIdx0);
+            break;
+        case ADDR_PIPECFG_P4_8x16:
+            *macroShift = 2;
+            *elemIdxBits =2;
+            elemIdx1 = tx1;
+            elemIdx0 = tx1 ^ ty1;
+            elemIdx = Bits2Number(2,elemIdx1,elemIdx0);
+            break;
+        case ADDR_PIPECFG_P4_16x16:
+            *macroShift = 2;
+            *elemIdxBits =2;
+            elemIdx0 = tx1^ty0;
+            elemIdx1 = tx1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P4_16x32:
+            *macroShift = 2;
+            *elemIdxBits =2;
+            elemIdx0 = tx1^ty0;
+            elemIdx1 = tx1^ty1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P4_32x32:
+            *macroShift = 2;
+            *elemIdxBits =3;
+            elemIdx0 = tx1^ty0;
+            elemIdx1 = tx1^ty1;
+            elemIdx2 = tx1;
+            elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P8_16x16_8x16:
+            *macroShift = 1;
+            *elemIdxBits =1;
+            elemIdx0 = tx1;
+            elemIdx = elemIdx0;
+            break;
+        case ADDR_PIPECFG_P8_16x32_8x16:
+            *macroShift = 1;
+            *elemIdxBits =1;
+            elemIdx0 = tx0;
+            elemIdx = elemIdx0;
+            break;
+        case ADDR_PIPECFG_P8_32x32_8x16:
+            *macroShift = 1;
+            *elemIdxBits =2;
+            elemIdx1 = tx1;
+            elemIdx0 = tx1^ty1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P8_16x32_16x16:
+            *macroShift = 1;
+            *elemIdxBits =1;
+            elemIdx0 = tx0;
+            elemIdx = elemIdx0;
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x16:
+            *macroShift = 1;
+            *elemIdxBits =2;
+            elemIdx0 = tx1^ty0;
+            elemIdx1 = tx1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x32:
+            *macroShift = 1;
+            *elemIdxBits =2;
+            elemIdx0 =  tx1^ty0;
+            elemIdx1 = tx1^ty1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P8_32x64_32x32:
+            *macroShift = 1;
+            *elemIdxBits =3;
+            elemIdx0 = tx1^ty0;
+            elemIdx1 = tx1^ty1;
+            elemIdx2 = tx1;
+            elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P16_32x32_8x16:
+            *macroShift = 0;
+            *elemIdxBits =2;
+            elemIdx0 = tx1^ty1;
+            elemIdx1 = tx1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        case ADDR_PIPECFG_P16_32x32_16x16:
+            *macroShift = 0;
+            *elemIdxBits =2;
+            elemIdx0 = tx1^ty0;
+            elemIdx1 = tx1;
+            elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+            break;
+        default:
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+
+    return elemIdx;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlComputeTileDataWidthAndHeightLinear
+*
+*   @brief
+*       Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
+*
+*   @return
+*       N/A
+*
+*   @note
+*       MacroWidth and macroHeight are measured in pixels
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlComputeTileDataWidthAndHeightLinear(
+    UINT_32*        pMacroWidth,     ///< [out] macro tile width
+    UINT_32*        pMacroHeight,    ///< [out] macro tile height
+    UINT_32         bpp,             ///< [in] bits per pixel
+    ADDR_TILEINFO*  pTileInfo        ///< [in] tile info
+    ) const
+{
+    ADDR_ASSERT(pTileInfo != NULL);
+    UINT_32 macroWidth;
+    UINT_32 macroHeight;
+
+    /// In linear mode, the htile or cmask buffer must be padded out to 4 tiles
+    /// but for P8_32x64_32x32, it must be padded out to 8 tiles
+    /// Actually there are more pipe configs which need 8-tile padding but SI family
+    /// has a bug which is fixed in CI family
+    if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32) ||
+        (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) ||
+        (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x32_16x16))
+    {
+        macroWidth  = 8*MicroTileWidth;
+        macroHeight = 8*MicroTileHeight;
+    }
+    else
+    {
+        macroWidth  = 4*MicroTileWidth;
+        macroHeight = 4*MicroTileHeight;
+    }
+
+    *pMacroWidth    = macroWidth;
+    *pMacroHeight   = macroHeight;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlComputeHtileBytes
+*
+*   @brief
+*       Compute htile size in bytes
+*
+*   @return
+*       Htile size in bytes
+***************************************************************************************************
+*/
+UINT_64 SIAddrLib::HwlComputeHtileBytes(
+    UINT_32     pitch,          ///< [in] pitch
+    UINT_32     height,         ///< [in] height
+    UINT_32     bpp,            ///< [in] bits per pixel
+    BOOL_32     isLinear,       ///< [in] if it is linear mode
+    UINT_32     numSlices,      ///< [in] number of slices
+    UINT_64*    pSliceBytes,    ///< [out] bytes per slice
+    UINT_32     baseAlign       ///< [in] base alignments
+    ) const
+{
+    return ComputeHtileBytes(pitch, height, bpp, isLinear, numSlices, pSliceBytes, baseAlign);
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlComputeXmaskAddrFromCoord
+*
+*   @brief
+*       Compute address from coordinates for htile/cmask
+*   @return
+*       Byte address
+***************************************************************************************************
+*/
+UINT_64 SIAddrLib::HwlComputeXmaskAddrFromCoord(
+    UINT_32        pitch,          ///< [in] pitch
+    UINT_32        height,         ///< [in] height
+    UINT_32        x,              ///< [in] x coord
+    UINT_32        y,              ///< [in] y coord
+    UINT_32        slice,          ///< [in] slice/depth index
+    UINT_32        numSlices,      ///< [in] number of slices
+    UINT_32        factor,         ///< [in] factor that indicates cmask(2) or htile(1)
+    BOOL_32        isLinear,       ///< [in] linear or tiled HTILE layout
+    BOOL_32        isWidth8,       ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+    BOOL_32        isHeight8,      ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+    ADDR_TILEINFO* pTileInfo,      ///< [in] Tile info
+    UINT_32*       pBitPosition    ///< [out] bit position inside a byte
+    ) const
+{
+    UINT_32 tx = x / MicroTileWidth;
+    UINT_32 ty = y / MicroTileHeight;
+    UINT_32 newPitch;
+    UINT_32 newHeight;
+    UINT_64 totalBytes;
+    UINT_32 macroWidth;
+    UINT_32 macroHeight;
+    UINT_64 pSliceBytes;
+    UINT_32 pBaseAlign;
+    UINT_32 tileNumPerPipe;
+    UINT_32 elemBits;
+
+    if (factor == 2) //CMASK
+    {
+        ADDR_CMASK_FLAGS flags = {{0}};
+
+        tileNumPerPipe = 256;
+
+        ComputeCmaskInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         pTileInfo,
+                         &newPitch,
+                         &newHeight,
+                         &totalBytes,
+                         &macroWidth,
+                         &macroHeight);
+        elemBits = CmaskElemBits;
+    }
+    else //HTile
+    {
+        ADDR_HTILE_FLAGS flags = {{0}};
+
+        tileNumPerPipe = 512;
+
+        ComputeHtileInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         TRUE,
+                         TRUE,
+                         pTileInfo,
+                         &newPitch,
+                         &newHeight,
+                         &totalBytes,
+                         &macroWidth,
+                         &macroHeight,
+                         &pSliceBytes,
+                         &pBaseAlign);
+        elemBits = 32;
+    }
+
+    const UINT_32 pitchInTile = newPitch / MicroTileWidth;
+    const UINT_32 heightInTile = newHeight / MicroTileWidth;
+    UINT_64 macroOffset; // Per pipe starting offset of the macro tile in which this tile lies.
+    UINT_64 microNumber; // Per pipe starting offset of the macro tile in which this tile lies.
+    UINT_32 microX;
+    UINT_32 microY;
+    UINT_64 microOffset;
+    UINT_32 microShift;
+    UINT_64 totalOffset;
+    UINT_32 elemIdxBits;
+    UINT_32 elemIdx =
+        TileCoordToMaskElementIndex(tx, ty, pTileInfo->pipeConfig, &microShift, &elemIdxBits);
+
+    UINT_32 numPipes = HwlGetPipes(pTileInfo);
+
+    if (isLinear)
+    {   //linear addressing
+        // Linear addressing is extremelly wasting memory if slice > 1, since each pipe has the full
+        // slice memory foot print instead of divided by numPipes.
+        microX = tx / 4; // Macro Tile is 4x4
+        microY = ty / 4 ;
+        microNumber = static_cast<UINT_64>(microX + microY * (pitchInTile / 4)) << microShift;
+
+        UINT_32 sliceBits = pitchInTile * heightInTile;
+
+        // do htile single slice alignment if the flag is true
+        if (m_configFlags.useHtileSliceAlign && (factor == 1))  //Htile
+        {
+            sliceBits = PowTwoAlign(sliceBits, BITS_TO_BYTES(HtileCacheBits) * numPipes / elemBits);
+        }
+        macroOffset = slice * (sliceBits / numPipes) * elemBits ;
+    }
+    else
+    {   //tiled addressing
+        const UINT_32 macroWidthInTile = macroWidth / MicroTileWidth; // Now in unit of Tiles
+        const UINT_32 macroHeightInTile = macroHeight / MicroTileHeight;
+        const UINT_32 pitchInCL = pitchInTile / macroWidthInTile;
+        const UINT_32 heightInCL = heightInTile / macroHeightInTile;
+
+        const UINT_32 macroX = x / macroWidth;
+        const UINT_32 macroY = y / macroHeight;
+        const UINT_32 macroNumber = macroX + macroY * pitchInCL + slice * pitchInCL * heightInCL;
+
+        // Per pipe starting offset of the cache line in which this tile lies.
+        microX = (x % macroWidth) / MicroTileWidth / 4; // Macro Tile is 4x4
+        microY = (y % macroHeight) / MicroTileHeight / 4 ;
+        microNumber = static_cast<UINT_64>(microX + microY * (macroWidth / MicroTileWidth / 4)) << microShift;
+
+        macroOffset = macroNumber * tileNumPerPipe * elemBits;
+    }
+
+    if(elemIdxBits == microShift)
+    {
+        microNumber += elemIdx;
+    }
+    else
+    {
+        microNumber >>= elemIdxBits;
+        microNumber <<= elemIdxBits;
+        microNumber += elemIdx;
+    }
+
+    microOffset = elemBits * microNumber;
+    totalOffset = microOffset + macroOffset;
+
+    UINT_32 pipe = ComputePipeFromCoord(x, y, 0, ADDR_TM_2D_TILED_THIN1, 0, FALSE, pTileInfo);
+    UINT_64 addrInBits = totalOffset % (m_pipeInterleaveBytes * 8) +
+                   pipe * (m_pipeInterleaveBytes * 8) +
+                   totalOffset / (m_pipeInterleaveBytes * 8) * (m_pipeInterleaveBytes * 8) * numPipes;
+    *pBitPosition = static_cast<UINT_32>(addrInBits) % 8;
+    UINT_64 addr = addrInBits / 8;
+
+    return addr;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlComputeXmaskCoordFromAddr
+*
+*   @brief
+*       Compute the coord from an address of a cmask/htile
+*
+*   @return
+*       N/A
+*
+*   @note
+*       This method is reused by htile, so rename to Xmask
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlComputeXmaskCoordFromAddr(
+    UINT_64         addr,           ///< [in] address
+    UINT_32         bitPosition,    ///< [in] bitPosition in a byte
+    UINT_32         pitch,          ///< [in] pitch
+    UINT_32         height,         ///< [in] height
+    UINT_32         numSlices,      ///< [in] number of slices
+    UINT_32         factor,         ///< [in] factor that indicates cmask or htile
+    BOOL_32         isLinear,       ///< [in] linear or tiled HTILE layout
+    BOOL_32         isWidth8,       ///< [in] Not used by SI
+    BOOL_32         isHeight8,      ///< [in] Not used by SI
+    ADDR_TILEINFO*  pTileInfo,      ///< [in] Tile info
+    UINT_32*        pX,             ///< [out] x coord
+    UINT_32*        pY,             ///< [out] y coord
+    UINT_32*        pSlice          ///< [out] slice index
+    ) const
+{
+    UINT_32 newPitch;
+    UINT_32 newHeight;
+    UINT_64 totalBytes;
+    UINT_32 clWidth;
+    UINT_32 clHeight;
+    UINT_32 tileNumPerPipe;
+    UINT_64 sliceBytes;
+
+    *pX = 0;
+    *pY = 0;
+    *pSlice = 0;
+
+    if (factor == 2) //CMASK
+    {
+        ADDR_CMASK_FLAGS flags = {{0}};
+
+        tileNumPerPipe = 256;
+
+        ComputeCmaskInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         pTileInfo,
+                         &newPitch,
+                         &newHeight,
+                         &totalBytes,
+                         &clWidth,
+                         &clHeight);
+    }
+    else //HTile
+    {
+        ADDR_HTILE_FLAGS flags = {{0}};
+
+        tileNumPerPipe = 512;
+
+        ComputeHtileInfo(flags,
+                         pitch,
+                         height,
+                         numSlices,
+                         isLinear,
+                         TRUE,
+                         TRUE,
+                         pTileInfo,
+                         &newPitch,
+                         &newHeight,
+                         &totalBytes,
+                         &clWidth,
+                         &clHeight,
+                         &sliceBytes);
+    }
+
+    const UINT_32 pitchInTile = newPitch / MicroTileWidth;
+    const UINT_32 heightInTile = newHeight / MicroTileWidth;
+    const UINT_32 pitchInMacroTile = pitchInTile / 4;
+    UINT_32 macroShift;
+    UINT_32 elemIdxBits;
+    // get macroShift and elemIdxBits
+    TileCoordToMaskElementIndex(0, 0, pTileInfo->pipeConfig, &macroShift, &elemIdxBits);
+
+    const UINT_32 numPipes = HwlGetPipes(pTileInfo);
+    const UINT_32 pipe = (UINT_32)((addr / m_pipeInterleaveBytes) % numPipes);
+    // per pipe
+    UINT_64 localOffset = (addr % m_pipeInterleaveBytes) +
+        (addr / m_pipeInterleaveBytes / numPipes)* m_pipeInterleaveBytes;
+
+    UINT_32 tileIndex;
+    if (factor == 2) //CMASK
+    {
+        tileIndex = (UINT_32)(localOffset * 2 + (bitPosition != 0));
+    }
+    else
+    {
+        tileIndex = (UINT_32)(localOffset / 4);
+    }
+
+    UINT_32 macroOffset;
+    if (isLinear)
+    {
+        UINT_32 sliceSizeInTile = pitchInTile * heightInTile;
+
+        // do htile single slice alignment if the flag is true
+        if (m_configFlags.useHtileSliceAlign && (factor == 1))  //Htile
+        {
+            sliceSizeInTile = PowTwoAlign(sliceSizeInTile, static_cast<UINT_32>(sliceBytes) / 64);
+        }
+        *pSlice = tileIndex / (sliceSizeInTile / numPipes);
+        macroOffset = tileIndex % (sliceSizeInTile / numPipes);
+    }
+    else
+    {
+        const UINT_32 clWidthInTile = clWidth / MicroTileWidth; // Now in unit of Tiles
+        const UINT_32 clHeightInTile = clHeight / MicroTileHeight;
+        const UINT_32 pitchInCL = pitchInTile / clWidthInTile;
+        const UINT_32 heightInCL = heightInTile / clHeightInTile;
+        const UINT_32 clIndex = tileIndex / tileNumPerPipe;
+
+        UINT_32 clX = clIndex % pitchInCL;
+        UINT_32 clY = (clIndex % (heightInCL * pitchInCL)) / pitchInCL;
+
+        *pX = clX * clWidthInTile * MicroTileWidth;
+        *pY = clY * clHeightInTile * MicroTileHeight;
+        *pSlice = clIndex / (heightInCL * pitchInCL);
+
+        macroOffset = tileIndex % tileNumPerPipe;
+    }
+
+    UINT_32 elemIdx = macroOffset & 7;
+    macroOffset >>= elemIdxBits;
+
+    if (elemIdxBits != macroShift)
+    {
+        macroOffset <<= (elemIdxBits - macroShift);
+
+        UINT_32 pipebit1 = _BIT(pipe,1);
+        UINT_32 pipebit2 = _BIT(pipe,2);
+        UINT_32 pipebit3 = _BIT(pipe,3);
+        if (pitchInMacroTile % 2)
+        {   //odd
+            switch (pTileInfo->pipeConfig)
+            {
+                case ADDR_PIPECFG_P4_32x32:
+                    macroOffset |= pipebit1;
+                    break;
+                case ADDR_PIPECFG_P8_32x32_8x16:
+                case ADDR_PIPECFG_P8_32x32_16x16:
+                case ADDR_PIPECFG_P8_32x32_16x32:
+                    macroOffset |= pipebit2;
+                    break;
+                default:
+                    break;
+            }
+
+        }
+
+        if (pitchInMacroTile % 4)
+        {
+            if (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)
+            {
+                macroOffset |= (pipebit1<<1);
+            }
+            if((pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) ||
+               (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_16x16))
+            {
+                macroOffset |= (pipebit3<<1);
+            }
+        }
+    }
+
+    UINT_32 macroX;
+    UINT_32 macroY;
+
+    if (isLinear)
+    {
+        macroX = macroOffset % pitchInMacroTile;
+        macroY = macroOffset / pitchInMacroTile;
+    }
+    else
+    {
+        const UINT_32 clWidthInMacroTile = clWidth / (MicroTileWidth * 4);
+        macroX = macroOffset % clWidthInMacroTile;
+        macroY = macroOffset / clWidthInMacroTile;
+    }
+
+    *pX += macroX * 4 * MicroTileWidth;
+    *pY += macroY * 4 * MicroTileHeight;
+
+    UINT_32 microX;
+    UINT_32 microY;
+    ComputeTileCoordFromPipeAndElemIdx(elemIdx, pipe, pTileInfo->pipeConfig, pitchInMacroTile,
+                                       *pX, *pY, &microX, &microY);
+
+    *pX += microX * MicroTileWidth;
+    *pY += microY * MicroTileWidth;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlGetPitchAlignmentLinear
+*   @brief
+*       Get pitch alignment
+*   @return
+*       pitch alignment
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlGetPitchAlignmentLinear(
+    UINT_32             bpp,    ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags   ///< [in] surface flags
+    ) const
+{
+    UINT_32 pitchAlign;
+
+    // Interleaved access requires a 256B aligned pitch, so fall back to pre-SI alignment
+    if (flags.interleaved)
+    {
+        pitchAlign = Max(64u, m_pipeInterleaveBytes / BITS_TO_BYTES(bpp));
+
+    }
+    else
+    {
+        pitchAlign = Max(8u, 64 / BITS_TO_BYTES(bpp));
+    }
+
+    return pitchAlign;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlGetSizeAdjustmentLinear
+*
+*   @brief
+*       Adjust linear surface pitch and slice size
+*
+*   @return
+*       Logical slice size in bytes
+***************************************************************************************************
+*/
+UINT_64 SIAddrLib::HwlGetSizeAdjustmentLinear(
+    AddrTileMode        tileMode,       ///< [in] tile mode
+    UINT_32             bpp,            ///< [in] bits per pixel
+    UINT_32             numSamples,     ///< [in] number of samples
+    UINT_32             baseAlign,      ///< [in] base alignment
+    UINT_32             pitchAlign,     ///< [in] pitch alignment
+    UINT_32*            pPitch,         ///< [in/out] pointer to pitch
+    UINT_32*            pHeight,        ///< [in/out] pointer to height
+    UINT_32*            pHeightAlign    ///< [in/out] pointer to height align
+    ) const
+{
+    UINT_64 sliceSize;
+    if (tileMode == ADDR_TM_LINEAR_GENERAL)
+    {
+        sliceSize = BITS_TO_BYTES(static_cast<UINT_64>(*pPitch) * (*pHeight) * bpp * numSamples);
+    }
+    else
+    {
+        UINT_32 pitch   = *pPitch;
+        UINT_32 height  = *pHeight;
+
+        UINT_32 pixelsPerPipeInterleave = m_pipeInterleaveBytes / BITS_TO_BYTES(bpp);
+        UINT_32 sliceAlignInPixel = pixelsPerPipeInterleave < 64 ? 64 : pixelsPerPipeInterleave;
+
+        // numSamples should be 1 in real cases (no MSAA for linear but TGL may pass non 1 value)
+        UINT_64 pixelPerSlice = static_cast<UINT_64>(pitch) * height * numSamples;
+
+        while (pixelPerSlice % sliceAlignInPixel)
+        {
+            pitch += pitchAlign;
+            pixelPerSlice = static_cast<UINT_64>(pitch) * height * numSamples;
+        }
+
+        *pPitch = pitch;
+
+        UINT_32 heightAlign = 1;
+
+        while ((pitch * heightAlign) % sliceAlignInPixel)
+        {
+            heightAlign++;
+        }
+
+        *pHeightAlign = heightAlign;
+
+        sliceSize = BITS_TO_BYTES(pixelPerSlice * bpp);
+    }
+
+    return sliceSize;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlPreHandleBaseLvl3xPitch
+*
+*   @brief
+*       Pre-handler of 3x pitch (96 bit) adjustment
+*
+*   @return
+*       Expected pitch
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlPreHandleBaseLvl3xPitch(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
+    UINT_32                                 expPitch    ///< [in] pitch
+    ) const
+{
+    ADDR_ASSERT(pIn->width == expPitch);
+
+    // From SI, if pow2Pad is 1 the pitch is expanded 3x first, then padded to pow2, so nothing to
+    // do here
+    if (!pIn->flags.pow2Pad)
+    {
+        AddrLib::HwlPreHandleBaseLvl3xPitch(pIn, expPitch);
+    }
+    else
+    {
+        ADDR_ASSERT(IsPow2(expPitch));
+    }
+
+    return expPitch;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlPostHandleBaseLvl3xPitch
+*
+*   @brief
+*       Post-handler of 3x pitch adjustment
+*
+*   @return
+*       Expected pitch
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlPostHandleBaseLvl3xPitch(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,        ///< [in] input
+    UINT_32                                 expPitch    ///< [in] pitch
+    ) const
+{
+    /**
+     * @note The pitch will be divided by 3 in the end so the value will look odd but h/w should
+     *  be able to compute a correct pitch from it as h/w address library is doing the job.
+     */
+    // From SI, the pitch is expanded 3x first, then padded to pow2, so no special handler here
+    if (!pIn->flags.pow2Pad)
+    {
+        AddrLib::HwlPostHandleBaseLvl3xPitch(pIn, expPitch);
+    }
+
+    return expPitch;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlGetPitchAlignmentMicroTiled
+*
+*   @brief
+*       Compute 1D tiled surface pitch alignment
+*
+*   @return
+*       pitch alignment
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlGetPitchAlignmentMicroTiled(
+    AddrTileMode        tileMode,          ///< [in] tile mode
+    UINT_32             bpp,               ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,             ///< [in] surface flags
+    UINT_32             numSamples         ///< [in] number of samples
+    ) const
+{
+    UINT_32 pitchAlign;
+
+    if (flags.qbStereo)
+    {
+        pitchAlign = EgBasedAddrLib::HwlGetPitchAlignmentMicroTiled(tileMode,bpp,flags,numSamples);
+    }
+    else
+    {
+        pitchAlign = 8;
+    }
+
+    return pitchAlign;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlGetSizeAdjustmentMicroTiled
+*
+*   @brief
+*       Adjust 1D tiled surface pitch and slice size
+*
+*   @return
+*       Logical slice size in bytes
+***************************************************************************************************
+*/
+UINT_64 SIAddrLib::HwlGetSizeAdjustmentMicroTiled(
+    UINT_32             thickness,      ///< [in] thickness
+    UINT_32             bpp,            ///< [in] bits per pixel
+    ADDR_SURFACE_FLAGS  flags,          ///< [in] surface flags
+    UINT_32             numSamples,     ///< [in] number of samples
+    UINT_32             baseAlign,      ///< [in] base alignment
+    UINT_32             pitchAlign,     ///< [in] pitch alignment
+    UINT_32*            pPitch,         ///< [in/out] pointer to pitch
+    UINT_32*            pHeight         ///< [in/out] pointer to height
+    ) const
+{
+    UINT_64 logicalSliceSize;
+    UINT_64 physicalSliceSize;
+
+    UINT_32 pitch   = *pPitch;
+    UINT_32 height  = *pHeight;
+
+    // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1)
+    logicalSliceSize = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
+
+    // Physical slice: multiplied by thickness
+    physicalSliceSize =  logicalSliceSize * thickness;
+
+    // Pitch alignment is always 8, so if slice size is not padded to base alignment
+    // (pipe_interleave_size), we need to increase pitch
+    while ((physicalSliceSize % baseAlign) != 0)
+    {
+        pitch += pitchAlign;
+
+        logicalSliceSize = BITS_TO_BYTES(static_cast<UINT_64>(pitch) * height * bpp * numSamples);
+
+        physicalSliceSize =  logicalSliceSize * thickness;
+    }
+
+#if !ALT_TEST
+    //
+    // Special workaround for depth/stencil buffer, use 8 bpp to align depth buffer again since
+    // the stencil plane may have larger pitch if the slice size is smaller than base alignment.
+    //
+    // Note: this actually does not work for mipmap but mipmap depth texture is not really
+    // sampled with mipmap.
+    //
+    if (flags.depth && !flags.noStencil)
+    {
+        ADDR_ASSERT(numSamples == 1);
+
+        UINT_64 logicalSiceSizeStencil = static_cast<UINT_64>(pitch) * height; // 1 byte stencil
+
+        while ((logicalSiceSizeStencil % baseAlign) != 0)
+        {
+            pitch += pitchAlign; // Stencil plane's pitch alignment is the same as depth plane's
+
+            logicalSiceSizeStencil = static_cast<UINT_64>(pitch) * height;
+        }
+
+        if (pitch != *pPitch)
+        {
+            // If this is a mipmap, this padded one cannot be sampled as a whole mipmap!
+            logicalSliceSize = logicalSiceSizeStencil * BITS_TO_BYTES(bpp);
+        }
+    }
+#endif
+    *pPitch = pitch;
+
+    // No adjust for pHeight
+
+    return logicalSliceSize;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlConvertChipFamily
+*
+*   @brief
+*       Convert familyID defined in atiid.h to AddrChipFamily and set m_chipFamily/m_chipRevision
+*   @return
+*       AddrChipFamily
+***************************************************************************************************
+*/
+AddrChipFamily SIAddrLib::HwlConvertChipFamily(
+    UINT_32 uChipFamily,        ///< [in] chip family defined in atiih.h
+    UINT_32 uChipRevision)      ///< [in] chip revision defined in "asic_family"_id.h
+{
+    AddrChipFamily family = ADDR_CHIP_FAMILY_SI;
+
+    switch (uChipFamily)
+    {
+        case FAMILY_SI:
+            m_settings.isSouthernIsland = 1;
+            m_settings.isTahiti     = ASICREV_IS_TAHITI_P(uChipRevision);
+            m_settings.isPitCairn   = ASICREV_IS_PITCAIRN_PM(uChipRevision);
+            m_settings.isCapeVerde  = ASICREV_IS_CAPEVERDE_M(uChipRevision);
+            m_settings.isOland      = ASICREV_IS_OLAND_M(uChipRevision);
+            m_settings.isHainan     = ASICREV_IS_HAINAN_V(uChipRevision);
+            break;
+        default:
+            ADDR_ASSERT(!"This should be a Fusion");
+            break;
+    }
+
+    return family;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlSetupTileInfo
+*
+*   @brief
+*       Setup default value of tile info for SI
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlSetupTileInfo(
+    AddrTileMode                        tileMode,       ///< [in] Tile mode
+    ADDR_SURFACE_FLAGS                  flags,          ///< [in] Surface type flags
+    UINT_32                             bpp,            ///< [in] Bits per pixel
+    UINT_32                             pitch,          ///< [in] Pitch in pixels
+    UINT_32                             height,         ///< [in] Height in pixels
+    UINT_32                             numSamples,     ///< [in] Number of samples
+    ADDR_TILEINFO*                      pTileInfoIn,    ///< [in] Tile info input: NULL for default
+    ADDR_TILEINFO*                      pTileInfoOut,   ///< [out] Tile info output
+    AddrTileType                        inTileType,     ///< [in] Tile type
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*   pOut            ///< [out] Output
+    ) const
+{
+    UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+    ADDR_TILEINFO* pTileInfo = pTileInfoOut;
+    INT index = TileIndexInvalid;
+
+    // Fail-safe code
+    if (!IsLinear(tileMode))
+    {
+        // 128 bpp/thick tiling must be non-displayable.
+        // Fmask reuse color buffer's entry but bank-height field can be from another entry
+        // To simplify the logic, fmask entry should be picked from non-displayable ones
+        if (bpp == 128 || thickness > 1 || flags.fmask || flags.prt)
+        {
+            inTileType = ADDR_NON_DISPLAYABLE;
+        }
+
+        if (flags.depth || flags.stencil)
+        {
+            inTileType = ADDR_DEPTH_SAMPLE_ORDER;
+        }
+    }
+
+    // Partial valid fields are not allowed for SI.
+    if (IsTileInfoAllZero(pTileInfo))
+    {
+        if (IsMacroTiled(tileMode))
+        {
+            if (flags.prt)
+            {
+                if (numSamples == 1)
+                {
+                    if (flags.depth)
+                    {
+                        switch (bpp)
+                        {
+                            case 16:
+                                index = 3;
+                                break;
+                            case 32:
+                                index = 6;
+                                break;
+                            default:
+                                ADDR_ASSERT_ALWAYS();
+                                break;
+                        }
+                    }
+                    else
+                    {
+                        switch (bpp)
+                        {
+                            case 8:
+                                index = 21;
+                                break;
+                            case 16:
+                                index = 22;
+                                break;
+                            case 32:
+                                index = 23;
+                                break;
+                            case 64:
+                                index = 24;
+                                break;
+                            case 128:
+                                index = 25;
+                                break;
+                            default:
+                                break;
+                        }
+
+                        if (thickness > 1)
+                        {
+                            ADDR_ASSERT(bpp != 128);
+                            index += 5;
+                        }
+                    }
+                }
+                else
+                {
+                    ADDR_ASSERT(numSamples == 4);
+
+                    if (flags.depth)
+                    {
+                        switch (bpp)
+                        {
+                            case 16:
+                                index = 5;
+                                break;
+                            case 32:
+                                index = 7;
+                                break;
+                            default:
+                                ADDR_ASSERT_ALWAYS();
+                                break;
+                        }
+                    }
+                    else
+                    {
+                        switch (bpp)
+                        {
+                            case 8:
+                                index = 23;
+                                break;
+                            case 16:
+                                index = 24;
+                                break;
+                            case 32:
+                                index = 25;
+                                break;
+                            case 64:
+                                index = 30;
+                                break;
+                            default:
+                                ADDR_ASSERT_ALWAYS();
+                                break;
+                        }
+                    }
+                }
+            }//end of PRT part
+            // See table entries 0-7
+            else if (flags.depth || flags.stencil)
+            {
+                if (flags.compressZ)
+                {
+                    if (flags.stencil)
+                    {
+                        index = 0;
+                    }
+                    else
+                    {
+                        // optimal tile index for compressed depth/stencil.
+                        switch (numSamples)
+                        {
+                            case 1:
+                                index = 0;
+                                break;
+                            case 2:
+                            case 4:
+                                index = 1;
+                                break;
+                            case 8:
+                                index = 2;
+                                break;
+                            default:
+                                break;
+                        }
+                    }
+                }
+                else // unCompressZ
+                {
+                    index = 3;
+                }
+            }
+            else //non PRT & non Depth & non Stencil
+            {
+                // See table entries 9-12
+                if (inTileType == ADDR_DISPLAYABLE)
+                {
+                    switch (bpp)
+                    {
+                        case 8:
+                            index = 10;
+                            break;
+                        case 16:
+                            index = 11;
+                            break;
+                        case 32:
+                            index = 12;
+                            break;
+                        case 64:
+                            index = 12;
+                            break;
+                        default:
+                            break;
+                    }
+                }
+                else
+                {
+                    // See table entries 13-17
+                    if (thickness == 1)
+                    {
+                        if (flags.fmask)
+                        {
+                            UINT_32 fmaskPixelSize = bpp * numSamples;
+
+                            switch (fmaskPixelSize)
+                            {
+                                case 8:
+                                    index = 14;
+                                    break;
+                                case 16:
+                                    index = 15;
+                                    break;
+                                case 32:
+                                    index = 16;
+                                    break;
+                                case 64:
+                                    index = 17;
+                                    break;
+                                default:
+                                    ADDR_ASSERT_ALWAYS();
+                            }
+                        }
+                        else
+                        {
+                            switch (bpp)
+                            {
+                                case 8:
+                                    index = 14;
+                                    break;
+                                case 16:
+                                    index = 15;
+                                    break;
+                                case 32:
+                                    index = 16;
+                                    break;
+                                case 64:
+                                    index = 17;
+                                    break;
+                                case 128:
+                                    index = 17;
+                                    break;
+                                default:
+                                    break;
+                            }
+                        }
+                    }
+                    else // thick tiling - entries 18-20
+                    {
+                        switch (thickness)
+                        {
+                            case 4:
+                                index = 20;
+                                break;
+                            case 8:
+                                index = 19;
+                                break;
+                            default:
+                                break;
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            if (tileMode == ADDR_TM_LINEAR_ALIGNED)
+            {
+                index = 8;
+            }
+            else if (tileMode == ADDR_TM_LINEAR_GENERAL)
+            {
+                index = TileIndexLinearGeneral;
+            }
+            else
+            {
+                if (flags.depth || flags.stencil)
+                {
+                    index = 4;
+                }
+                else if (inTileType == ADDR_DISPLAYABLE)
+                {
+                    index = 9;
+                }
+                else if (thickness == 1)
+                {
+                    index = 13;
+                }
+                else
+                {
+                    index = 18;
+                }
+            }
+        }
+
+        if (index >= 0 && index <= 31)
+        {
+            *pTileInfo      = m_tileTable[index].info;
+            pOut->tileType  = m_tileTable[index].type;
+        }
+
+        if (index == TileIndexLinearGeneral)
+        {
+            *pTileInfo      = m_tileTable[8].info;
+            pOut->tileType  = m_tileTable[8].type;
+        }
+    }
+    else
+    {
+        if (pTileInfoIn)
+        {
+            if (flags.stencil && pTileInfoIn->tileSplitBytes == 0)
+            {
+                // Stencil always uses index 0
+                *pTileInfo = m_tileTable[0].info;
+            }
+        }
+        // Pass through tile type
+        pOut->tileType = inTileType;
+    }
+
+    pOut->tileIndex = index;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::DecodeGbRegs
+*
+*   @brief
+*       Decodes GB_ADDR_CONFIG and noOfBanks/noOfRanks
+*
+*   @return
+*       TRUE if all settings are valid
+*
+***************************************************************************************************
+*/
+BOOL_32 SIAddrLib::DecodeGbRegs(
+    const ADDR_REGISTER_VALUE* pRegValue) ///< [in] create input
+{
+    GB_ADDR_CONFIG  reg;
+    BOOL_32         valid = TRUE;
+
+    reg.val = pRegValue->gbAddrConfig;
+
+    switch (reg.f.pipe_interleave_size)
+    {
+        case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
+            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
+            break;
+        case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
+            m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
+            break;
+        default:
+            valid = FALSE;
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+
+    switch (reg.f.row_size)
+    {
+        case ADDR_CONFIG_1KB_ROW:
+            m_rowSize = ADDR_ROWSIZE_1KB;
+            break;
+        case ADDR_CONFIG_2KB_ROW:
+            m_rowSize = ADDR_ROWSIZE_2KB;
+            break;
+        case ADDR_CONFIG_4KB_ROW:
+            m_rowSize = ADDR_ROWSIZE_4KB;
+            break;
+        default:
+            valid = FALSE;
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+
+    switch (pRegValue->noOfBanks)
+    {
+        case 0:
+            m_banks = 4;
+            break;
+        case 1:
+            m_banks = 8;
+            break;
+        case 2:
+            m_banks = 16;
+            break;
+        default:
+            valid = FALSE;
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+
+    switch (pRegValue->noOfRanks)
+    {
+        case 0:
+            m_ranks = 1;
+            break;
+        case 1:
+            m_ranks = 2;
+            break;
+        default:
+            valid = FALSE;
+            ADDR_UNHANDLED_CASE();
+            break;
+    }
+
+    m_logicalBanks = m_banks * m_ranks;
+
+    ADDR_ASSERT(m_logicalBanks <= 16);
+
+    return valid;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlInitGlobalParams
+*
+*   @brief
+*       Initializes global parameters
+*
+*   @return
+*       TRUE if all settings are valid
+*
+***************************************************************************************************
+*/
+BOOL_32 SIAddrLib::HwlInitGlobalParams(
+    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
+{
+    BOOL_32 valid = TRUE;
+    const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue;
+
+    valid = DecodeGbRegs(pRegValue);
+
+    if (valid)
+    {
+        if (m_settings.isTahiti || m_settings.isPitCairn)
+        {
+            m_pipes = 8;
+        }
+        else if (m_settings.isCapeVerde || m_settings.isOland)
+        {
+            m_pipes = 4;
+        }
+        else
+        {
+            // Hainan is 2-pipe (m_settings.isHainan == 1)
+            m_pipes = 2;
+        }
+
+        valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries);
+
+        m_maxSamples = 16;
+    }
+
+    return valid;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlConvertTileInfoToHW
+*   @brief
+*       Entry of si's ConvertTileInfoToHW
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE SIAddrLib::HwlConvertTileInfoToHW(
+    const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
+    ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut      ///< [out] output structure
+    ) const
+{
+    ADDR_E_RETURNCODE retCode   = ADDR_OK;
+
+    retCode = EgBasedAddrLib::HwlConvertTileInfoToHW(pIn, pOut);
+
+    if (retCode == ADDR_OK)
+    {
+        if (pIn->reverse == FALSE)
+        {
+            if (pIn->pTileInfo->pipeConfig == ADDR_PIPECFG_INVALID)
+            {
+                retCode = ADDR_INVALIDPARAMS;
+            }
+            else
+            {
+                pOut->pTileInfo->pipeConfig =
+                    static_cast<AddrPipeCfg>(pIn->pTileInfo->pipeConfig - 1);
+            }
+        }
+        else
+        {
+            pOut->pTileInfo->pipeConfig =
+                static_cast<AddrPipeCfg>(pIn->pTileInfo->pipeConfig + 1);
+        }
+    }
+
+    return retCode;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlComputeXmaskCoordYFrom8Pipe
+*
+*   @brief
+*       Compute the Y coord which will be added to Xmask Y
+*       coord.
+*   @return
+*       Y coord
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlComputeXmaskCoordYFrom8Pipe(
+    UINT_32         pipe,       ///< [in] pipe id
+    UINT_32         x           ///< [in] tile coord x, which is original x coord / 8
+    ) const
+{
+    // This function should never be called since it is 6xx/8xx specfic.
+    // Keep this empty implementation to avoid any mis-use.
+    ADDR_ASSERT_ALWAYS();
+
+    return 0;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlComputeSurfaceCoord2DFromBankPipe
+*
+*   @brief
+*       Compute surface x,y coordinates from bank/pipe info
+*   @return
+*       N/A
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlComputeSurfaceCoord2DFromBankPipe(
+    AddrTileMode        tileMode,   ///< [in] tile mode
+    UINT_32*            pX,         ///< [in/out] x coordinate
+    UINT_32*            pY,         ///< [in/out] y coordinate
+    UINT_32             slice,      ///< [in] slice index
+    UINT_32             bank,       ///< [in] bank number
+    UINT_32             pipe,       ///< [in] pipe number
+    UINT_32             bankSwizzle,///< [in] bank swizzle
+    UINT_32             pipeSwizzle,///< [in] pipe swizzle
+    UINT_32             tileSlices, ///< [in] slices in a micro tile
+    BOOL_32             ignoreSE,   ///< [in] TRUE if shader engines are ignored
+    ADDR_TILEINFO*      pTileInfo   ///< [in] bank structure. **All fields to be valid on entry**
+    ) const
+{
+    UINT_32 xBit;
+    UINT_32 yBit;
+    UINT_32 yBit3 = 0;
+    UINT_32 yBit4 = 0;
+    UINT_32 yBit5 = 0;
+    UINT_32 yBit6 = 0;
+
+    UINT_32 xBit3 = 0;
+    UINT_32 xBit4 = 0;
+    UINT_32 xBit5 = 0;
+
+    UINT_32 numPipes = GetPipePerSurf(pTileInfo->pipeConfig);
+
+    CoordFromBankPipe xyBits = {0};
+    ComputeSurfaceCoord2DFromBankPipe(tileMode, *pX, *pY, slice, bank, pipe,
+                                      bankSwizzle, pipeSwizzle, tileSlices, pTileInfo,
+                                      &xyBits);
+    yBit3 = xyBits.yBit3;
+    yBit4 = xyBits.yBit4;
+    yBit5 = xyBits.yBit5;
+    yBit6 = xyBits.yBit6;
+
+    xBit3 = xyBits.xBit3;
+    xBit4 = xyBits.xBit4;
+    xBit5 = xyBits.xBit5;
+
+    yBit = xyBits.yBits;
+
+    UINT_32 yBitTemp = 0;
+
+    if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) ||
+        (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32))
+    {
+        ADDR_ASSERT(pTileInfo->bankWidth == 1 && pTileInfo->macroAspectRatio > 1);
+        UINT_32 yBitToCheck = QLog2(pTileInfo->banks) - 1;
+
+        ADDR_ASSERT(yBitToCheck <= 3);
+
+        yBitTemp = _BIT(yBit, yBitToCheck);
+
+        xBit3 = 0;
+    }
+
+    yBit = Bits2Number(4, yBit6, yBit5, yBit4, yBit3);
+    xBit = Bits2Number(3, xBit5, xBit4, xBit3);
+
+    *pY += yBit * pTileInfo->bankHeight * MicroTileHeight;
+    *pX += xBit * numPipes * pTileInfo->bankWidth * MicroTileWidth;
+
+    //calculate the bank and pipe bits in x, y
+    UINT_32 xTile; //x in micro tile
+    UINT_32 x3 = 0;
+    UINT_32 x4 = 0;
+    UINT_32 x5 = 0;
+    UINT_32 x6 = 0;
+    UINT_32 y = *pY;
+
+    UINT_32 pipeBit0 = _BIT(pipe,0);
+    UINT_32 pipeBit1 = _BIT(pipe,1);
+    UINT_32 pipeBit2 = _BIT(pipe,2);
+
+    UINT_32 y3 = _BIT(y, 3);
+    UINT_32 y4 = _BIT(y, 4);
+    UINT_32 y5 = _BIT(y, 5);
+    UINT_32 y6 = _BIT(y, 6);
+
+    // bankbit0 after ^x4^x5
+    UINT_32 bankBit00 = _BIT(bank,0);
+    UINT_32 bankBit0 = 0;
+
+    switch (pTileInfo->pipeConfig)
+    {
+        case ADDR_PIPECFG_P2:
+            x3 = pipeBit0 ^ y3;
+            break;
+        case ADDR_PIPECFG_P4_8x16:
+            x4 = pipeBit0 ^ y3;
+            x3 = pipeBit0 ^ y4;
+            break;
+        case ADDR_PIPECFG_P4_16x16:
+            x4 = pipeBit1 ^ y4;
+            x3 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P4_16x32:
+            x4 = pipeBit1 ^ y4;
+            x3 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P4_32x32:
+            x5 = pipeBit1 ^ y5;
+            x3 = pipeBit0 ^ y3 ^ x5;
+            bankBit0 = yBitTemp ^ x5;
+            x4 = bankBit00 ^ x5 ^ bankBit0;
+            *pX += x5 * 4 * 1 * 8; // x5 * num_pipes * bank_width * 8;
+            break;
+        case ADDR_PIPECFG_P8_16x16_8x16:
+            x3 = pipeBit1 ^ y5;
+            x4 = pipeBit2 ^ y4;
+            x5 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P8_16x32_8x16:
+            x3 = pipeBit1 ^ y4;
+            x4 = pipeBit2 ^ y5;
+            x5 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P8_32x32_8x16:
+            x3 = pipeBit1 ^ y4;
+            x5 = pipeBit2 ^ y5;
+            x4 = pipeBit0 ^ y3 ^ x5;
+            break;
+        case ADDR_PIPECFG_P8_16x32_16x16:
+            x4 = pipeBit2 ^ y5;
+            x5 = pipeBit1 ^ y4;
+            x3 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x16:
+            x5 = pipeBit2 ^ y5;
+            x4 = pipeBit1 ^ y4;
+            x3 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P8_32x32_16x32:
+            x5 = pipeBit2 ^ y5;
+            x4 = pipeBit1 ^ y6;
+            x3 = pipeBit0 ^ y3 ^ x4;
+            break;
+        case ADDR_PIPECFG_P8_32x64_32x32:
+            x6 = pipeBit1 ^ y5;
+            x5 = pipeBit2 ^ y6;
+            x3 = pipeBit0 ^ y3 ^ x5;
+            bankBit0 = yBitTemp ^ x6;
+            x4 = bankBit00 ^ x5 ^ bankBit0;
+            *pX += x6 * 8 * 1 * 8; // x6 * num_pipes * bank_width * 8;
+            break;
+        default:
+            ADDR_ASSERT_ALWAYS();
+    }
+
+    xTile = Bits2Number(3, x5, x4, x3);
+
+    *pX += xTile << 3;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlPreAdjustBank
+*
+*   @brief
+*       Adjust bank before calculating address acoording to bank/pipe
+*   @return
+*       Adjusted bank
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlPreAdjustBank(
+    UINT_32         tileX,      ///< [in] x coordinate in unit of tile
+    UINT_32         bank,       ///< [in] bank
+    ADDR_TILEINFO*  pTileInfo   ///< [in] tile info
+    ) const
+{
+    if (((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) ||
+        (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)) && (pTileInfo->bankWidth == 1))
+    {
+        UINT_32 bankBit0 = _BIT(bank, 0);
+        UINT_32 x4 = _BIT(tileX, 1);
+        UINT_32 x5 = _BIT(tileX, 2);
+
+        bankBit0 = bankBit0 ^ x4 ^ x5;
+        bank |= bankBit0;
+
+        ADDR_ASSERT(pTileInfo->macroAspectRatio > 1)
+    }
+
+    return bank;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlComputeSurfaceInfo
+*
+*   @brief
+*       Entry of si's ComputeSurfaceInfo
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE SIAddrLib::HwlComputeSurfaceInfo(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,    ///< [in] input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pOut    ///< [out] output structure
+    ) const
+{
+    pOut->tileIndex = pIn->tileIndex;
+
+    return EgBasedAddrLib::HwlComputeSurfaceInfo(pIn,pOut);
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlComputeMipLevel
+*   @brief
+*       Compute MipLevel info (including level 0)
+*   @return
+*       TRUE if HWL's handled
+***************************************************************************************************
+*/
+BOOL_32 SIAddrLib::HwlComputeMipLevel(
+    ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in/out] Input structure
+    ) const
+{
+    // basePitch is calculated from level 0 so we only check this for mipLevel > 0
+    if (pIn->mipLevel > 0)
+    {
+        // Note: Don't check expand 3x formats(96 bit) as the basePitch is not pow2 even if
+        // we explicity set pow2Pad flag. The 3x base pitch is padded to pow2 but after being
+        // divided by expandX factor (3) - to program texture pitch, the basePitch is never pow2.
+        if (!AddrElemLib::IsExpand3x(pIn->format))
+        {
+            // Sublevel pitches are generated from base level pitch instead of width on SI
+            // If pow2Pad is 0, we don't assert - as this is not really used for a mip chain
+            ADDR_ASSERT(!pIn->flags.pow2Pad || ((pIn->basePitch != 0) && IsPow2(pIn->basePitch)));
+        }
+
+        if (pIn->basePitch != 0)
+        {
+            pIn->width = Max(1u, pIn->basePitch >> pIn->mipLevel);
+        }
+    }
+
+    // pow2Pad is done in PostComputeMipLevel
+
+    return TRUE;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlCheckLastMacroTiledLvl
+*
+*   @brief
+*       Sets pOut->last2DLevel to TRUE if it is
+*   @note
+*
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlCheckLastMacroTiledLvl(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut      ///< [in/out] Output structure (used as input, too)
+    ) const
+{
+    // pow2Pad covers all mipmap cases
+    if (pIn->flags.pow2Pad)
+    {
+        ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
+
+        UINT_32 nextPitch;
+        UINT_32 nextHeight;
+        UINT_32 nextSlices;
+
+        AddrTileMode nextTileMode;
+
+        if (pIn->mipLevel == 0 || pIn->basePitch == 0)
+        {
+            // Base level or fail-safe case (basePitch == 0)
+            nextPitch = pOut->pitch >> 1;
+        }
+        else
+        {
+            // Sub levels
+            nextPitch = pIn->basePitch >> (pIn->mipLevel + 1);
+        }
+
+        // nextHeight must be shifted from this level's original height rather than a pow2 padded
+        // one but this requires original height stored somewhere (pOut->height)
+        ADDR_ASSERT(pOut->height != 0);
+
+        // next level's height is just current level's >> 1 in pixels
+        nextHeight = pOut->height >> 1;
+        // Special format such as FMT_1 and FMT_32_32_32 can be linear only so we consider block
+        // compressed foramts
+        if (AddrElemLib::IsBlockCompressed(pIn->format))
+        {
+            nextHeight = (nextHeight + 3) / 4;
+        }
+        nextHeight = NextPow2(nextHeight);
+
+        // nextSlices may be 0 if this level's is 1
+        if (pIn->flags.volume)
+        {
+            nextSlices = Max(1u, pIn->numSlices >> 1);
+        }
+        else
+        {
+            nextSlices = pIn->numSlices;
+        }
+
+        nextTileMode = ComputeSurfaceMipLevelTileMode(pIn->tileMode,
+                                                      pIn->bpp,
+                                                      nextPitch,
+                                                      nextHeight,
+                                                      nextSlices,
+                                                      pIn->numSamples,
+                                                      pOut->pitchAlign,
+                                                      pOut->heightAlign,
+                                                      pOut->pTileInfo);
+
+        pOut->last2DLevel = IsMicroTiled(nextTileMode);
+    }
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlDegradeThickTileMode
+*
+*   @brief
+*       Degrades valid tile mode for thick modes if needed
+*
+*   @return
+*       Suitable tile mode
+***************************************************************************************************
+*/
+AddrTileMode SIAddrLib::HwlDegradeThickTileMode(
+    AddrTileMode        baseTileMode,   ///< [in] base tile mode
+    UINT_32             numSlices,      ///< [in] current number of slices
+    UINT_32*            pBytesPerTile   ///< [in/out] pointer to bytes per slice
+    ) const
+{
+    return EgBasedAddrLib::HwlDegradeThickTileMode(baseTileMode, numSlices, pBytesPerTile);
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlTileInfoEqual
+*
+*   @brief
+*       Return TRUE if all field are equal
+*   @note
+*       Only takes care of current HWL's data
+***************************************************************************************************
+*/
+BOOL_32 SIAddrLib::HwlTileInfoEqual(
+    const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand
+    const ADDR_TILEINFO* pRight ///<[in] Right compare operand
+    ) const
+{
+    BOOL_32 equal = FALSE;
+
+    if (pLeft->pipeConfig == pRight->pipeConfig)
+    {
+        equal =  EgBasedAddrLib::HwlTileInfoEqual(pLeft, pRight);
+    }
+
+    return equal;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::GetTileSettings
+*
+*   @brief
+*       Get tile setting infos by index.
+*   @return
+*       Tile setting info.
+***************************************************************************************************
+*/
+const ADDR_TILECONFIG* SIAddrLib::GetTileSetting(
+    UINT_32 index          ///< [in] Tile index
+    ) const
+{
+    ADDR_ASSERT(index < m_noOfEntries);
+    return &m_tileTable[index];
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlPostCheckTileIndex
+*
+*   @brief
+*       Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches
+*       tile mode/type/info and change the index if needed
+*   @return
+*       Tile index.
+***************************************************************************************************
+*/
+INT_32 SIAddrLib::HwlPostCheckTileIndex(
+    const ADDR_TILEINFO* pInfo,     ///< [in] Tile Info
+    AddrTileMode         mode,      ///< [in] Tile mode
+    AddrTileType         type,      ///< [in] Tile type
+    INT                  curIndex   ///< [in] Current index assigned in HwlSetupTileInfo
+    ) const
+{
+    INT_32 index = curIndex;
+
+    if (mode == ADDR_TM_LINEAR_GENERAL)
+    {
+        index = TileIndexLinearGeneral;
+    }
+    else
+    {
+        BOOL_32 macroTiled = IsMacroTiled(mode);
+
+        // We need to find a new index if either of them is true
+        // 1. curIndex is invalid
+        // 2. tile mode is changed
+        // 3. tile info does not match for macro tiled
+        if ((index == TileIndexInvalid         ||
+            (mode != m_tileTable[index].mode)  ||
+            (macroTiled && !HwlTileInfoEqual(pInfo, &m_tileTable[index].info))))
+        {
+            for (index = 0; index < static_cast<INT_32>(m_noOfEntries); index++)
+            {
+                if (macroTiled)
+                {
+                    // macro tile modes need all to match
+                    if (HwlTileInfoEqual(pInfo, &m_tileTable[index].info) &&
+                        (mode == m_tileTable[index].mode)                 &&
+                        (type == m_tileTable[index].type))
+                    {
+                        break;
+                    }
+                }
+                else if (mode == ADDR_TM_LINEAR_ALIGNED)
+                {
+                    // linear mode only needs tile mode to match
+                    if (mode == m_tileTable[index].mode)
+                    {
+                        break;
+                    }
+                }
+                else
+                {
+                    // micro tile modes only need tile mode and tile type to match
+                    if (mode == m_tileTable[index].mode &&
+                        type == m_tileTable[index].type)
+                    {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    ADDR_ASSERT(index < static_cast<INT_32>(m_noOfEntries));
+
+    if (index >= static_cast<INT_32>(m_noOfEntries))
+    {
+        index = TileIndexInvalid;
+    }
+
+    return index;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlSetupTileCfg
+*
+*   @brief
+*       Map tile index to tile setting.
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE SIAddrLib::HwlSetupTileCfg(
+    INT_32          index,          ///< [in] Tile index
+    INT_32          macroModeIndex, ///< [in] Index in macro tile mode table(CI)
+    ADDR_TILEINFO*  pInfo,          ///< [out] Tile Info
+    AddrTileMode*   pMode,          ///< [out] Tile mode
+    AddrTileType*   pType          ///< [out] Tile type
+    ) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    // Global flag to control usage of tileIndex
+    if (UseTileIndex(index))
+    {
+        if (index == TileIndexLinearGeneral)
+        {
+            if (pMode)
+            {
+                *pMode = ADDR_TM_LINEAR_GENERAL;
+            }
+
+            if (pType)
+            {
+                *pType = ADDR_DISPLAYABLE;
+            }
+
+            if (pInfo)
+            {
+                pInfo->banks = 2;
+                pInfo->bankWidth = 1;
+                pInfo->bankHeight = 1;
+                pInfo->macroAspectRatio = 1;
+                pInfo->tileSplitBytes = 64;
+                pInfo->pipeConfig = ADDR_PIPECFG_P2;
+            }
+        }
+        else if (static_cast<UINT_32>(index) >= m_noOfEntries)
+        {
+            returnCode = ADDR_INVALIDPARAMS;
+        }
+        else
+        {
+            const ADDR_TILECONFIG* pCfgTable = GetTileSetting(index);
+
+            if (pInfo)
+            {
+                *pInfo = pCfgTable->info;
+            }
+            else
+            {
+                if (IsMacroTiled(pCfgTable->mode))
+                {
+                    returnCode = ADDR_INVALIDPARAMS;
+                }
+            }
+
+            if (pMode)
+            {
+                *pMode = pCfgTable->mode;
+            }
+
+            if (pType)
+            {
+                *pType = pCfgTable->type;
+            }
+        }
+    }
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::ReadGbTileMode
+*
+*   @brief
+*       Convert GB_TILE_MODE HW value to ADDR_TILE_CONFIG.
+*   @return
+*       NA.
+***************************************************************************************************
+*/
+VOID SIAddrLib::ReadGbTileMode(
+    UINT_32             regValue,   ///< [in] GB_TILE_MODE register
+    ADDR_TILECONFIG*    pCfg        ///< [out] output structure
+    ) const
+{
+    GB_TILE_MODE gbTileMode;
+    gbTileMode.val = regValue;
+
+    pCfg->type = static_cast<AddrTileType>(gbTileMode.f.micro_tile_mode);
+    pCfg->info.bankHeight = 1 << gbTileMode.f.bank_height;
+    pCfg->info.bankWidth = 1 << gbTileMode.f.bank_width;
+    pCfg->info.banks = 1 << (gbTileMode.f.num_banks + 1);
+    pCfg->info.macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect;
+    pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split;
+    pCfg->info.pipeConfig = static_cast<AddrPipeCfg>(gbTileMode.f.pipe_config + 1);
+
+    UINT_32 regArrayMode = gbTileMode.f.array_mode;
+
+    pCfg->mode = static_cast<AddrTileMode>(regArrayMode);
+
+    if (regArrayMode == 8) //ARRAY_2D_TILED_XTHICK
+    {
+        pCfg->mode = ADDR_TM_2D_TILED_XTHICK;
+    }
+    else if (regArrayMode >= 14) //ARRAY_3D_TILED_XTHICK
+    {
+        pCfg->mode = static_cast<AddrTileMode>(pCfg->mode + 3);
+    }
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::InitTileSettingTable
+*
+*   @brief
+*       Initialize the ADDR_TILE_CONFIG table.
+*   @return
+*       TRUE if tile table is correctly initialized
+***************************************************************************************************
+*/
+BOOL_32 SIAddrLib::InitTileSettingTable(
+    const UINT_32*  pCfg,           ///< [in] Pointer to table of tile configs
+    UINT_32         noOfEntries     ///< [in] Numbe of entries in the table above
+    )
+{
+    BOOL_32 initOk = TRUE;
+
+    ADDR_ASSERT(noOfEntries <= TileTableSize);
+
+    memset(m_tileTable, 0, sizeof(m_tileTable));
+
+    if (noOfEntries != 0)
+    {
+        m_noOfEntries = noOfEntries;
+    }
+    else
+    {
+        m_noOfEntries = TileTableSize;
+    }
+
+    if (pCfg) // From Client
+    {
+        for (UINT_32 i = 0; i < m_noOfEntries; i++)
+        {
+            ReadGbTileMode(*(pCfg + i), &m_tileTable[i]);
+        }
+    }
+    else
+    {
+        ADDR_ASSERT_ALWAYS();
+        initOk = FALSE;
+    }
+
+    if (initOk)
+    {
+        ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED);
+    }
+
+    return initOk;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlGetTileIndex
+*
+*   @brief
+*       Return the virtual/real index for given mode/type/info
+*   @return
+*       ADDR_OK if successful.
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE SIAddrLib::HwlGetTileIndex(
+    const ADDR_GET_TILEINDEX_INPUT* pIn,
+    ADDR_GET_TILEINDEX_OUTPUT*      pOut) const
+{
+    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+    pOut->index = HwlPostCheckTileIndex(pIn->pTileInfo, pIn->tileMode, pIn->tileType);
+
+    return returnCode;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlFmaskPreThunkSurfInfo
+*
+*   @brief
+*       Some preparation before thunking a ComputeSurfaceInfo call for Fmask
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlFmaskPreThunkSurfInfo(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT*    pFmaskIn,   ///< [in] Input of fmask info
+    const ADDR_COMPUTE_FMASK_INFO_OUTPUT*   pFmaskOut,  ///< [in] Output of fmask info
+    ADDR_COMPUTE_SURFACE_INFO_INPUT*        pSurfIn,    ///< [out] Input of thunked surface info
+    ADDR_COMPUTE_SURFACE_INFO_OUTPUT*       pSurfOut    ///< [out] Output of thunked surface info
+    ) const
+{
+    pSurfIn->tileIndex = pFmaskIn->tileIndex;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlFmaskPostThunkSurfInfo
+*
+*   @brief
+*       Copy hwl extra field after calling thunked ComputeSurfaceInfo
+*   @return
+*       ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlFmaskPostThunkSurfInfo(
+    const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,   ///< [in] Output of surface info
+    ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut           ///< [out] Output of fmask info
+    ) const
+{
+    pFmaskOut->macroModeIndex = TileIndexInvalid;
+    pFmaskOut->tileIndex = pSurfOut->tileIndex;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlComputeFmaskBits
+*   @brief
+*       Computes fmask bits
+*   @return
+*       Fmask bits
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlComputeFmaskBits(
+    const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+    UINT_32* pNumSamples
+    ) const
+{
+    UINT_32 numSamples = pIn->numSamples;
+    UINT_32 numFrags = GetNumFragments(numSamples, pIn->numFrags);
+    UINT_32 bpp;
+
+    if (numFrags != numSamples) // EQAA
+    {
+        ADDR_ASSERT(numFrags <= 8);
+
+        if (!pIn->resolved)
+        {
+            if (numFrags == 1)
+            {
+                bpp          = 1;
+                numSamples   = numSamples == 16 ? 16 : 8;
+            }
+            else if (numFrags == 2)
+            {
+                ADDR_ASSERT(numSamples >= 4);
+
+                bpp          = 2;
+                numSamples   = numSamples;
+            }
+            else if (numFrags == 4)
+            {
+                ADDR_ASSERT(numSamples >= 4);
+
+                bpp          = 4;
+                numSamples   = numSamples;
+            }
+            else // numFrags == 8
+            {
+                ADDR_ASSERT(numSamples == 16);
+
+                bpp          = 4;
+                numSamples   = numSamples;
+            }
+        }
+        else
+        {
+            if (numFrags == 1)
+            {
+                bpp          = (numSamples == 16) ? 16 : 8;
+                numSamples   = 1;
+            }
+            else if (numFrags == 2)
+            {
+                ADDR_ASSERT(numSamples >= 4);
+
+                bpp          = numSamples*2;
+                numSamples   = 1;
+            }
+            else if (numFrags == 4)
+            {
+                ADDR_ASSERT(numSamples >= 4);
+
+                bpp          = numSamples*4;
+                numSamples   = 1;
+            }
+            else // numFrags == 8
+            {
+                ADDR_ASSERT(numSamples >= 16);
+
+                bpp          = 16*4;
+                numSamples   = 1;
+            }
+        }
+    }
+    else // Normal AA
+    {
+        if (!pIn->resolved)
+        {
+            bpp          = ComputeFmaskNumPlanesFromNumSamples(numSamples);
+            numSamples   = numSamples == 2 ? 8 : numSamples;
+        }
+        else
+        {
+            // The same as 8XX
+            bpp          = ComputeFmaskResolvedBppFromNumSamples(numSamples);
+            numSamples   = 1; // 1x sample
+        }
+    }
+
+    SafeAssign(pNumSamples, numSamples);
+
+    return bpp;
+}
+
+/**
+***************************************************************************************************
+*   SIAddrLib::HwlOverrideTileMode
+*
+*   @brief
+*       Override tile modes (for PRT only, avoid client passes in an invalid PRT mode for SI.
+*
+*   @return
+*       Suitable tile mode
+*
+***************************************************************************************************
+*/
+BOOL_32 SIAddrLib::HwlOverrideTileMode(
+    const ADDR_COMPUTE_SURFACE_INFO_INPUT*  pIn,       ///< [in] input structure
+    AddrTileMode*                           pTileMode, ///< [in/out] pointer to the tile mode
+    AddrTileType*                           pTileType  ///< [in/out] pointer to the tile type
+    ) const
+{
+    BOOL_32 bOverrided = FALSE;
+    AddrTileMode tileMode = *pTileMode;
+
+    switch (tileMode)
+    {
+        case ADDR_TM_PRT_TILED_THIN1:
+            tileMode    = ADDR_TM_2D_TILED_THIN1;
+            break;
+
+        case ADDR_TM_PRT_TILED_THICK:
+            tileMode    = ADDR_TM_2D_TILED_THICK;
+            break;
+
+        case ADDR_TM_PRT_2D_TILED_THICK:
+            tileMode    = ADDR_TM_2D_TILED_THICK;
+            break;
+
+        case ADDR_TM_PRT_3D_TILED_THICK:
+            tileMode    = ADDR_TM_3D_TILED_THICK;
+            break;
+
+        default:
+            break;
+    }
+
+    if (tileMode != *pTileMode)
+    {
+        *pTileMode = tileMode;
+        bOverrided = TRUE;
+        ADDR_ASSERT(pIn->flags.prt == TRUE);
+    }
+
+    return bOverrided;
+}
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,262 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file  siaddrlib.h
+* @brief Contains the R800AddrLib class definition.
+***************************************************************************************************
+*/
+
+#ifndef __SI_ADDR_LIB_H__
+#define __SI_ADDR_LIB_H__
+
+#include "addrlib.h"
+#include "egbaddrlib.h"
+
+/**
+***************************************************************************************************
+* @brief Describes the information in tile mode table
+***************************************************************************************************
+*/
+struct ADDR_TILECONFIG
+{
+    AddrTileMode  mode;
+    AddrTileType  type;
+    ADDR_TILEINFO info;
+};
+
+/**
+***************************************************************************************************
+* @brief SI specific settings structure.
+***************************************************************************************************
+*/
+struct SIChipSettings
+{
+    struct
+    {
+        UINT_32 isSouthernIsland    : 1;
+        UINT_32 isTahiti            : 1;
+        UINT_32 isPitCairn          : 1;
+        UINT_32 isCapeVerde         : 1;
+        /// Oland/Hainan are of GFXIP 6.0, similar with SI
+        UINT_32 isOland             : 1;
+        UINT_32 isHainan            : 1;
+    };
+};
+
+/**
+***************************************************************************************************
+* @brief This class is the SI specific address library
+*        function set.
+***************************************************************************************************
+*/
+class SIAddrLib : public EgBasedAddrLib
+{
+public:
+    /// Creates SIAddrLib object
+    static AddrLib* CreateObj(const AddrClient* pClient)
+    {
+        return new(pClient) SIAddrLib(pClient);
+    }
+
+protected:
+    SIAddrLib(const AddrClient* pClient);
+    virtual ~SIAddrLib();
+
+    // Hwl interface - defined in AddrLib
+    virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
+        const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
+        ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
+
+    virtual UINT_64 HwlComputeXmaskAddrFromCoord(
+        UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 numSlices,
+        UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
+        ADDR_TILEINFO* pTileInfo, UINT_32* pBitPosition) const;
+
+    virtual VOID HwlComputeXmaskCoordFromAddr(
+        UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
+        UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
+        ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const;
+
+    virtual ADDR_E_RETURNCODE HwlGetTileIndex(
+        const ADDR_GET_TILEINDEX_INPUT* pIn,
+        ADDR_GET_TILEINDEX_OUTPUT*      pOut) const;
+
+    virtual BOOL_32 HwlComputeMipLevel(
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
+
+    virtual AddrChipFamily HwlConvertChipFamily(
+        UINT_32 uChipFamily, UINT_32 uChipRevision);
+
+    virtual BOOL_32 HwlInitGlobalParams(
+        const ADDR_CREATE_INPUT* pCreateIn);
+
+    virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
+        INT_32 index, INT_32 macroModeIndex,
+        ADDR_TILEINFO* pInfo, AddrTileMode* pMode = 0, AddrTileType* pType = 0) const;
+
+    virtual VOID HwlComputeTileDataWidthAndHeightLinear(
+        UINT_32* pMacroWidth, UINT_32* pMacroHeight,
+        UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
+
+    virtual UINT_64 HwlComputeHtileBytes(
+        UINT_32 pitch, UINT_32 height, UINT_32 bpp,
+        BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const;
+
+    virtual UINT_32 ComputePipeFromCoord(
+        UINT_32 x, UINT_32 y, UINT_32 slice,
+        AddrTileMode tileMode, UINT_32 pipeSwizzle, BOOL_32 ignoreSE,
+        ADDR_TILEINFO* pTileInfo) const;
+
+    virtual UINT_32 HwlGetPipes(const ADDR_TILEINFO* pTileInfo) const;
+
+    /// Pre-handler of 3x pitch (96 bit) adjustment
+    virtual UINT_32 HwlPreHandleBaseLvl3xPitch(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
+    /// Post-handler of 3x pitch adjustment
+    virtual UINT_32 HwlPostHandleBaseLvl3xPitch(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
+
+    /// Dummy function to finalize the inheritance
+    virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe(
+        UINT_32 pipe, UINT_32 x) const;
+
+    // Sub-hwl interface - defined in EgBasedAddrLib
+    virtual VOID HwlSetupTileInfo(
+        AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
+        UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+        ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
+        AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    virtual UINT_32 HwlGetPitchAlignmentMicroTiled(
+        AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const;
+
+    virtual UINT_64 HwlGetSizeAdjustmentMicroTiled(
+        UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+        UINT_32 baseAlign, UINT_32 pitchAlign,
+        UINT_32 *pPitch, UINT_32 *pHeight) const;
+
+    virtual VOID HwlCheckLastMacroTiledLvl(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+    virtual BOOL_32 HwlTileInfoEqual(
+        const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const;
+
+    virtual AddrTileMode HwlDegradeThickTileMode(
+        AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
+
+    virtual BOOL_32 HwlOverrideTileMode(
+        const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+        AddrTileMode* pTileMode,
+        AddrTileType* pTileType) const;
+
+    virtual BOOL_32 HwlSanityCheckMacroTiled(
+        ADDR_TILEINFO* pTileInfo) const
+    {
+        return TRUE;
+    }
+
+    virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const;
+
+    virtual UINT_64 HwlGetSizeAdjustmentLinear(
+        AddrTileMode tileMode,
+        UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign,
+        UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const;
+
+    virtual VOID HwlComputeSurfaceCoord2DFromBankPipe(
+        AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice,
+        UINT_32 bank, UINT_32 pipe,
+        UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
+        BOOL_32 ignoreSE,
+        ADDR_TILEINFO* pTileInfo) const;
+
+    virtual UINT_32 HwlPreAdjustBank(
+        UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO* pTileInfo) const;
+
+    virtual INT_32 HwlPostCheckTileIndex(
+        const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
+        INT curIndex = TileIndexInvalid) const;
+
+    virtual VOID   HwlFmaskPreThunkSurfInfo(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
+        const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
+        ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
+        ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const;
+
+    virtual VOID   HwlFmaskPostThunkSurfInfo(
+        const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
+        ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const;
+
+    virtual UINT_32 HwlComputeFmaskBits(
+        const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+        UINT_32* pNumSamples) const;
+
+    virtual BOOL_32 HwlReduceBankWidthHeight(
+        UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+        UINT_32 bankHeightAlign, UINT_32 pipes,
+        ADDR_TILEINFO* pTileInfo) const
+    {
+        return TRUE;
+    }
+
+    // Protected non-virtual functions
+    VOID ComputeTileCoordFromPipeAndElemIdx(
+        UINT_32 elemIdx, UINT_32 pipe, AddrPipeCfg pipeCfg, UINT_32 pitchInMacroTile,
+        UINT_32 x, UINT_32 y, UINT_32* pX, UINT_32* pY) const;
+
+    UINT_32 TileCoordToMaskElementIndex(
+        UINT_32 tx, UINT_32 ty, AddrPipeCfg  pipeConfig,
+        UINT_32 *macroShift, UINT_32 *elemIdxBits) const;
+
+    BOOL_32 DecodeGbRegs(
+        const ADDR_REGISTER_VALUE* pRegValue);
+
+    const ADDR_TILECONFIG* GetTileSetting(
+        UINT_32 index) const;
+
+    static const UINT_32    TileTableSize = 32;
+    ADDR_TILECONFIG         m_tileTable[TileTableSize];
+    UINT_32                 m_noOfEntries;
+
+private:
+
+    UINT_32 GetPipePerSurf(AddrPipeCfg pipeConfig) const;
+
+    VOID ReadGbTileMode(
+        UINT_32 regValue, ADDR_TILECONFIG* pCfg) const;
+    BOOL_32 InitTileSettingTable(
+        const UINT_32 *pSetting, UINT_32 noOfEntries);
+
+    SIChipSettings          m_settings;
+};
+
+#endif
+
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,784 @@
+/*
+ * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Marek Olšák <maraeo@gmail.com>
+ */
+
+#include "amdgpu_cs.h"
+
+#include "os/os_time.h"
+#include "state_tracker/drm_driver.h"
+#include <amdgpu_drm.h>
+#include <xf86drm.h>
+#include <stdio.h>
+
+static const struct pb_vtbl amdgpu_winsys_bo_vtbl;
+
+static inline struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer *bo)
+{
+   assert(bo->vtbl == &amdgpu_winsys_bo_vtbl);
+   return (struct amdgpu_winsys_bo *)bo;
+}
+
+struct amdgpu_bomgr {
+   struct pb_manager base;
+   struct amdgpu_winsys *rws;
+};
+
+static struct amdgpu_winsys *get_winsys(struct pb_manager *mgr)
+{
+   return ((struct amdgpu_bomgr*)mgr)->rws;
+}
+
+static struct amdgpu_winsys_bo *get_amdgpu_winsys_bo(struct pb_buffer *_buf)
+{
+   struct amdgpu_winsys_bo *bo = NULL;
+
+   if (_buf->vtbl == &amdgpu_winsys_bo_vtbl) {
+      bo = amdgpu_winsys_bo(_buf);
+   } else {
+      struct pb_buffer *base_buf;
+      pb_size offset;
+      pb_get_base_buffer(_buf, &base_buf, &offset);
+
+      if (base_buf->vtbl == &amdgpu_winsys_bo_vtbl)
+         bo = amdgpu_winsys_bo(base_buf);
+   }
+
+   return bo;
+}
+
+static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
+                           enum radeon_bo_usage usage)
+{
+   struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
+   struct amdgpu_winsys *ws = bo->rws;
+   int i;
+
+   if (bo->is_shared) {
+      /* We can't use user fences for shared buffers, because user fences
+       * are local to this process only. If we want to wait for all buffer
+       * uses in all processes, we have to use amdgpu_bo_wait_for_idle.
+       */
+      bool buffer_busy = true;
+      int r;
+
+      r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy);
+      if (r)
+         fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__,
+                 r);
+      return !buffer_busy;
+   }
+
+   if (timeout == 0) {
+      /* Timeout == 0 is quite simple. */
+      pipe_mutex_lock(ws->bo_fence_lock);
+      for (i = 0; i < RING_LAST; i++)
+         if (bo->fence[i]) {
+            if (amdgpu_fence_wait(bo->fence[i], 0, false)) {
+               /* Release the idle fence to avoid checking it again later. */
+               amdgpu_fence_reference(&bo->fence[i], NULL);
+            } else {
+               pipe_mutex_unlock(ws->bo_fence_lock);
+               return false;
+            }
+         }
+      pipe_mutex_unlock(ws->bo_fence_lock);
+      return true;
+
+   } else {
+      struct pipe_fence_handle *fence[RING_LAST] = {};
+      bool fence_idle[RING_LAST] = {};
+      bool buffer_idle = true;
+      int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
+
+      /* Take references to all fences, so that we can wait for them
+       * without the lock. */
+      pipe_mutex_lock(ws->bo_fence_lock);
+      for (i = 0; i < RING_LAST; i++)
+         amdgpu_fence_reference(&fence[i], bo->fence[i]);
+      pipe_mutex_unlock(ws->bo_fence_lock);
+
+      /* Now wait for the fences. */
+      for (i = 0; i < RING_LAST; i++) {
+         if (fence[i]) {
+            if (amdgpu_fence_wait(fence[i], abs_timeout, true))
+               fence_idle[i] = true;
+            else
+               buffer_idle = false;
+         }
+      }
+
+      /* Release idle fences to avoid checking them again later. */
+      pipe_mutex_lock(ws->bo_fence_lock);
+      for (i = 0; i < RING_LAST; i++) {
+         if (fence[i] == bo->fence[i] && fence_idle[i])
+            amdgpu_fence_reference(&bo->fence[i], NULL);
+
+         amdgpu_fence_reference(&fence[i], NULL);
+      }
+      pipe_mutex_unlock(ws->bo_fence_lock);
+
+      return buffer_idle;
+   }
+}
+
+static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
+      struct radeon_winsys_cs_handle *buf)
+{
+   return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
+}
+
+static void amdgpu_bo_destroy(struct pb_buffer *_buf)
+{
+   struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
+   int i;
+
+   amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
+   amdgpu_va_range_free(bo->va_handle);
+   amdgpu_bo_free(bo->bo);
+
+   for (i = 0; i < RING_LAST; i++)
+      amdgpu_fence_reference(&bo->fence[i], NULL);
+
+   if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+      bo->rws->allocated_vram -= align(bo->base.size, bo->rws->gart_page_size);
+   else if (bo->initial_domain & RADEON_DOMAIN_GTT)
+      bo->rws->allocated_gtt -= align(bo->base.size, bo->rws->gart_page_size);
+   FREE(bo);
+}
+
+static void *amdgpu_bo_map(struct radeon_winsys_cs_handle *buf,
+                           struct radeon_winsys_cs *rcs,
+                           enum pipe_transfer_usage usage)
+{
+   struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
+   struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
+   int r;
+   void *cpu = NULL;
+
+   /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
+   if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+      /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
+      if (usage & PIPE_TRANSFER_DONTBLOCK) {
+         if (!(usage & PIPE_TRANSFER_WRITE)) {
+            /* Mapping for read.
+             *
+             * Since we are mapping for read, we don't need to wait
+             * if the GPU is using the buffer for read too
+             * (neither one is changing it).
+             *
+             * Only check whether the buffer is being used for write. */
+            if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
+                                                               RADEON_USAGE_WRITE)) {
+               cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
+               return NULL;
+            }
+
+            if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
+                                RADEON_USAGE_WRITE)) {
+               return NULL;
+            }
+         } else {
+            if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
+               cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
+               return NULL;
+            }
+
+            if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
+                                RADEON_USAGE_READWRITE)) {
+               return NULL;
+            }
+         }
+      } else {
+         uint64_t time = os_time_get_nano();
+
+         if (!(usage & PIPE_TRANSFER_WRITE)) {
+            /* Mapping for read.
+             *
+             * Since we are mapping for read, we don't need to wait
+             * if the GPU is using the buffer for read too
+             * (neither one is changing it).
+             *
+             * Only check whether the buffer is being used for write. */
+            if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
+                                                               RADEON_USAGE_WRITE)) {
+               cs->flush_cs(cs->flush_data, 0, NULL);
+            }
+            amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
+                           RADEON_USAGE_WRITE);
+         } else {
+            /* Mapping for write. */
+            if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo))
+               cs->flush_cs(cs->flush_data, 0, NULL);
+
+            amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
+                           RADEON_USAGE_READWRITE);
+         }
+
+         bo->rws->buffer_wait_time += os_time_get_nano() - time;
+      }
+   }
+
+   /* If the buffer is created from user memory, return the user pointer. */
+   if (bo->user_ptr)
+       return bo->user_ptr;
+
+   r = amdgpu_bo_cpu_map(bo->bo, &cpu);
+   return r ? NULL : cpu;
+}
+
+static void amdgpu_bo_unmap(struct radeon_winsys_cs_handle *buf)
+{
+   struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
+
+   amdgpu_bo_cpu_unmap(bo->bo);
+}
+
+static void amdgpu_bo_get_base_buffer(struct pb_buffer *buf,
+                                      struct pb_buffer **base_buf,
+                                      unsigned *offset)
+{
+   *base_buf = buf;
+   *offset = 0;
+}
+
+static enum pipe_error amdgpu_bo_validate(struct pb_buffer *_buf,
+                                          struct pb_validate *vl,
+                                          unsigned flags)
+{
+   /* Always pinned */
+   return PIPE_OK;
+}
+
+static void amdgpu_bo_fence(struct pb_buffer *buf,
+                            struct pipe_fence_handle *fence)
+{
+}
+
+static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
+   amdgpu_bo_destroy,
+   NULL, /* never called */
+   NULL, /* never called */
+   amdgpu_bo_validate,
+   amdgpu_bo_fence,
+   amdgpu_bo_get_base_buffer,
+};
+
+static struct pb_buffer *amdgpu_bomgr_create_bo(struct pb_manager *_mgr,
+                                                pb_size size,
+                                                const struct pb_desc *desc)
+{
+   struct amdgpu_winsys *rws = get_winsys(_mgr);
+   struct amdgpu_bo_desc *rdesc = (struct amdgpu_bo_desc*)desc;
+   struct amdgpu_bo_alloc_request request = {0};
+   amdgpu_bo_handle buf_handle;
+   uint64_t va = 0;
+   struct amdgpu_winsys_bo *bo;
+   amdgpu_va_handle va_handle;
+   int r;
+
+   assert(rdesc->initial_domain & RADEON_DOMAIN_VRAM_GTT);
+   bo = CALLOC_STRUCT(amdgpu_winsys_bo);
+   if (!bo) {
+      return NULL;
+   }
+
+   request.alloc_size = size;
+   request.phys_alignment = desc->alignment;
+
+   if (rdesc->initial_domain & RADEON_DOMAIN_VRAM) {
+      request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
+      if (rdesc->flags & RADEON_FLAG_CPU_ACCESS)
+         request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+   }
+   if (rdesc->initial_domain & RADEON_DOMAIN_GTT) {
+      request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
+      if (rdesc->flags & RADEON_FLAG_GTT_WC)
+         request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+   }
+
+   r = amdgpu_bo_alloc(rws->dev, &request, &buf_handle);
+   if (r) {
+      fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
+      fprintf(stderr, "amdgpu:    size      : %d bytes\n", size);
+      fprintf(stderr, "amdgpu:    alignment : %d bytes\n", desc->alignment);
+      fprintf(stderr, "amdgpu:    domains   : %d\n", rdesc->initial_domain);
+      goto error_bo_alloc;
+   }
+
+   r = amdgpu_va_range_alloc(rws->dev, amdgpu_gpu_va_range_general,
+                             size, desc->alignment, 0, &va, &va_handle, 0);
+   if (r)
+      goto error_va_alloc;
+
+   r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
+   if (r)
+      goto error_va_map;
+
+   pipe_reference_init(&bo->base.reference, 1);
+   bo->base.alignment = desc->alignment;
+   bo->base.usage = desc->usage;
+   bo->base.size = size;
+   bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
+   bo->rws = rws;
+   bo->bo = buf_handle;
+   bo->va = va;
+   bo->va_handle = va_handle;
+   bo->initial_domain = rdesc->initial_domain;
+   bo->unique_id = __sync_fetch_and_add(&rws->next_bo_unique_id, 1);
+
+   if (rdesc->initial_domain & RADEON_DOMAIN_VRAM)
+      rws->allocated_vram += align(size, rws->gart_page_size);
+   else if (rdesc->initial_domain & RADEON_DOMAIN_GTT)
+      rws->allocated_gtt += align(size, rws->gart_page_size);
+
+   return &bo->base;
+
+error_va_map:
+   amdgpu_va_range_free(va_handle);
+
+error_va_alloc:
+   amdgpu_bo_free(buf_handle);
+
+error_bo_alloc:
+   FREE(bo);
+   return NULL;
+}
+
+static void amdgpu_bomgr_flush(struct pb_manager *mgr)
+{
+   /* NOP */
+}
+
+/* This is for the cache bufmgr. */
+static boolean amdgpu_bomgr_is_buffer_busy(struct pb_manager *_mgr,
+                                           struct pb_buffer *_buf)
+{
+   struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
+
+   if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
+      return TRUE;
+   }
+
+   if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0, RADEON_USAGE_READWRITE)) {
+      return TRUE;
+   }
+
+   return FALSE;
+}
+
+static void amdgpu_bomgr_destroy(struct pb_manager *mgr)
+{
+   FREE(mgr);
+}
+
+struct pb_manager *amdgpu_bomgr_create(struct amdgpu_winsys *rws)
+{
+   struct amdgpu_bomgr *mgr;
+
+   mgr = CALLOC_STRUCT(amdgpu_bomgr);
+   if (!mgr)
+      return NULL;
+
+   mgr->base.destroy = amdgpu_bomgr_destroy;
+   mgr->base.create_buffer = amdgpu_bomgr_create_bo;
+   mgr->base.flush = amdgpu_bomgr_flush;
+   mgr->base.is_buffer_busy = amdgpu_bomgr_is_buffer_busy;
+
+   mgr->rws = rws;
+   return &mgr->base;
+}
+
+static unsigned eg_tile_split(unsigned tile_split)
+{
+   switch (tile_split) {
+   case 0:     tile_split = 64;    break;
+   case 1:     tile_split = 128;   break;
+   case 2:     tile_split = 256;   break;
+   case 3:     tile_split = 512;   break;
+   default:
+   case 4:     tile_split = 1024;  break;
+   case 5:     tile_split = 2048;  break;
+   case 6:     tile_split = 4096;  break;
+   }
+   return tile_split;
+}
+
+static unsigned eg_tile_split_rev(unsigned eg_tile_split)
+{
+   switch (eg_tile_split) {
+   case 64:    return 0;
+   case 128:   return 1;
+   case 256:   return 2;
+   case 512:   return 3;
+   default:
+   case 1024:  return 4;
+   case 2048:  return 5;
+   case 4096:  return 6;
+   }
+}
+
+static void amdgpu_bo_get_tiling(struct pb_buffer *_buf,
+                                 enum radeon_bo_layout *microtiled,
+                                 enum radeon_bo_layout *macrotiled,
+                                 unsigned *bankw, unsigned *bankh,
+                                 unsigned *tile_split,
+                                 unsigned *stencil_tile_split,
+                                 unsigned *mtilea,
+                                 bool *scanout)
+{
+   struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
+   struct amdgpu_bo_info info = {0};
+   uint32_t tiling_flags;
+   int r;
+
+   r = amdgpu_bo_query_info(bo->bo, &info);
+   if (r)
+      return;
+
+   tiling_flags = info.metadata.tiling_info;
+
+   *microtiled = RADEON_LAYOUT_LINEAR;
+   *macrotiled = RADEON_LAYOUT_LINEAR;
+
+   if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4)  /* 2D_TILED_THIN1 */
+      *macrotiled = RADEON_LAYOUT_TILED;
+   else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
+      *microtiled = RADEON_LAYOUT_TILED;
+
+   if (bankw && tile_split && mtilea && tile_split) {
+      *bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+      *bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+      *tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
+      *mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+   }
+   if (scanout)
+      *scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
+}
+
+static void amdgpu_bo_set_tiling(struct pb_buffer *_buf,
+                                 struct radeon_winsys_cs *rcs,
+                                 enum radeon_bo_layout microtiled,
+                                 enum radeon_bo_layout macrotiled,
+                                 unsigned pipe_config,
+                                 unsigned bankw, unsigned bankh,
+                                 unsigned tile_split,
+                                 unsigned stencil_tile_split,
+                                 unsigned mtilea, unsigned num_banks,
+                                 uint32_t pitch,
+                                 bool scanout)
+{
+   struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
+   struct amdgpu_bo_metadata metadata = {0};
+   uint32_t tiling_flags = 0;
+
+   if (macrotiled == RADEON_LAYOUT_TILED)
+      tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
+   else if (microtiled == RADEON_LAYOUT_TILED)
+      tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
+   else
+      tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
+
+   tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, pipe_config);
+   tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(bankw));
+   tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(bankh));
+   if (tile_split)
+      tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(tile_split));
+   tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(mtilea));
+   tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(num_banks)-1);
+
+   if (scanout)
+      tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
+   else
+      tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
+
+   metadata.tiling_info = tiling_flags;
+
+   amdgpu_bo_set_metadata(bo->bo, &metadata);
+}
+
+static struct radeon_winsys_cs_handle *amdgpu_get_cs_handle(struct pb_buffer *_buf)
+{
+   /* return a direct pointer to amdgpu_winsys_bo. */
+   return (struct radeon_winsys_cs_handle*)get_amdgpu_winsys_bo(_buf);
+}
+
+static struct pb_buffer *
+amdgpu_bo_create(struct radeon_winsys *rws,
+                 unsigned size,
+                 unsigned alignment,
+                 boolean use_reusable_pool,
+                 enum radeon_bo_domain domain,
+                 enum radeon_bo_flag flags)
+{
+   struct amdgpu_winsys *ws = amdgpu_winsys(rws);
+   struct amdgpu_bo_desc desc;
+   struct pb_manager *provider;
+   struct pb_buffer *buffer;
+
+   /* Don't use VRAM if the GPU doesn't have much. This is only the initial
+    * domain. The kernel is free to move the buffer if it wants to.
+    *
+    * 64MB means no VRAM by todays standards.
+    */
+   if (domain & RADEON_DOMAIN_VRAM && ws->info.vram_size <= 64*1024*1024) {
+      domain = RADEON_DOMAIN_GTT;
+      flags = RADEON_FLAG_GTT_WC;
+   }
+
+   memset(&desc, 0, sizeof(desc));
+   desc.base.alignment = alignment;
+
+   /* Align size to page size. This is the minimum alignment for normal
+    * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
+    * like constant/uniform buffers, can benefit from better and more reuse.
+    */
+   size = align(size, ws->gart_page_size);
+
+   /* Only set one usage bit each for domains and flags, or the cache manager
+    * might consider different sets of domains / flags compatible
+    */
+   if (domain == RADEON_DOMAIN_VRAM_GTT)
+      desc.base.usage = 1 << 2;
+   else
+      desc.base.usage = domain >> 1;
+   assert(flags < sizeof(desc.base.usage) * 8 - 3);
+   desc.base.usage |= 1 << (flags + 3);
+
+   desc.initial_domain = domain;
+   desc.flags = flags;
+
+   /* Assign a buffer manager. */
+   if (use_reusable_pool)
+      provider = ws->cman;
+   else
+      provider = ws->kman;
+
+   buffer = provider->create_buffer(provider, size, &desc.base);
+   if (!buffer)
+      return NULL;
+
+   return (struct pb_buffer*)buffer;
+}
+
+static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
+                                               struct winsys_handle *whandle,
+                                               unsigned *stride)
+{
+   struct amdgpu_winsys *ws = amdgpu_winsys(rws);
+   struct amdgpu_winsys_bo *bo;
+   enum amdgpu_bo_handle_type type;
+   struct amdgpu_bo_import_result result = {0};
+   uint64_t va;
+   amdgpu_va_handle va_handle;
+   struct amdgpu_bo_info info = {0};
+   enum radeon_bo_domain initial = 0;
+   int r;
+
+   /* Initialize the structure. */
+   bo = CALLOC_STRUCT(amdgpu_winsys_bo);
+   if (!bo) {
+      return NULL;
+   }
+
+   switch (whandle->type) {
+   case DRM_API_HANDLE_TYPE_SHARED:
+      type = amdgpu_bo_handle_type_gem_flink_name;
+      break;
+   case DRM_API_HANDLE_TYPE_FD:
+      type = amdgpu_bo_handle_type_dma_buf_fd;
+      break;
+   default:
+      return NULL;
+   }
+
+   r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
+   if (r)
+      goto error;
+
+   /* Get initial domains. */
+   r = amdgpu_bo_query_info(result.buf_handle, &info);
+   if (r)
+      goto error_query;
+
+   r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
+                             result.alloc_size, 1 << 20, 0, &va, &va_handle, 0);
+   if (r)
+      goto error_query;
+
+   r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
+   if (r)
+      goto error_va_map;
+
+   if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
+      initial |= RADEON_DOMAIN_VRAM;
+   if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
+      initial |= RADEON_DOMAIN_GTT;
+
+
+   pipe_reference_init(&bo->base.reference, 1);
+   bo->base.alignment = info.phys_alignment;
+   bo->base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
+   bo->bo = result.buf_handle;
+   bo->base.size = result.alloc_size;
+   bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
+   bo->rws = ws;
+   bo->va = va;
+   bo->va_handle = va_handle;
+   bo->initial_domain = initial;
+   bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
+   bo->is_shared = true;
+
+   if (stride)
+      *stride = whandle->stride;
+
+   if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+      ws->allocated_vram += align(bo->base.size, ws->gart_page_size);
+   else if (bo->initial_domain & RADEON_DOMAIN_GTT)
+      ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);
+
+   return &bo->base;
+
+error_va_map:
+   amdgpu_va_range_free(va_handle);
+
+error_query:
+   amdgpu_bo_free(result.buf_handle);
+
+error:
+   FREE(bo);
+   return NULL;
+}
+
+static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
+                                    unsigned stride,
+                                    struct winsys_handle *whandle)
+{
+   struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(buffer);
+   enum amdgpu_bo_handle_type type;
+   int r;
+
+   if ((void*)bo != (void*)buffer)
+      pb_cache_manager_remove_buffer(buffer);
+
+   switch (whandle->type) {
+   case DRM_API_HANDLE_TYPE_SHARED:
+      type = amdgpu_bo_handle_type_gem_flink_name;
+      break;
+   case DRM_API_HANDLE_TYPE_FD:
+      type = amdgpu_bo_handle_type_dma_buf_fd;
+      break;
+   case DRM_API_HANDLE_TYPE_KMS:
+      type = amdgpu_bo_handle_type_kms;
+      break;
+   default:
+      return FALSE;
+   }
+
+   r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
+   if (r)
+      return FALSE;
+
+   whandle->stride = stride;
+   bo->is_shared = true;
+   return TRUE;
+}
+
+static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
+					    void *pointer, unsigned size)
+{
+    struct amdgpu_winsys *ws = amdgpu_winsys(rws);
+    amdgpu_bo_handle buf_handle;
+    struct amdgpu_winsys_bo *bo;
+    uint64_t va;
+    amdgpu_va_handle va_handle;
+
+    bo = CALLOC_STRUCT(amdgpu_winsys_bo);
+    if (!bo)
+        return NULL;
+
+    if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
+        goto error;
+
+    if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
+                              size, 1 << 12, 0, &va, &va_handle, 0))
+        goto error_va_alloc;
+
+    if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
+        goto error_va_map;
+
+    /* Initialize it. */
+    pipe_reference_init(&bo->base.reference, 1);
+    bo->bo = buf_handle;
+    bo->base.alignment = 0;
+    bo->base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
+    bo->base.size = size;
+    bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
+    bo->rws = ws;
+    bo->user_ptr = pointer;
+    bo->va = va;
+    bo->va_handle = va_handle;
+    bo->initial_domain = RADEON_DOMAIN_GTT;
+    bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
+
+    ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);
+
+    return (struct pb_buffer*)bo;
+
+error_va_map:
+    amdgpu_va_range_free(va_handle);
+
+error_va_alloc:
+    amdgpu_bo_free(buf_handle);
+
+error:
+    FREE(bo);
+    return NULL;
+}
+
+static uint64_t amdgpu_bo_get_va(struct radeon_winsys_cs_handle *buf)
+{
+   return ((struct amdgpu_winsys_bo*)buf)->va;
+}
+
+void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws)
+{
+   ws->base.buffer_get_cs_handle = amdgpu_get_cs_handle;
+   ws->base.buffer_set_tiling = amdgpu_bo_set_tiling;
+   ws->base.buffer_get_tiling = amdgpu_bo_get_tiling;
+   ws->base.buffer_map = amdgpu_bo_map;
+   ws->base.buffer_unmap = amdgpu_bo_unmap;
+   ws->base.buffer_wait = amdgpu_bo_wait;
+   ws->base.buffer_create = amdgpu_bo_create;
+   ws->base.buffer_from_handle = amdgpu_bo_from_handle;
+   ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
+   ws->base.buffer_get_handle = amdgpu_bo_get_handle;
+   ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
+   ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2008 Jérôme Glisse
+ * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Marek Olšák <maraeo@gmail.com>
+ */
+
+#ifndef AMDGPU_BO_H
+#define AMDGPU_BO_H
+
+#include "amdgpu_winsys.h"
+#include "pipebuffer/pb_bufmgr.h"
+
+struct amdgpu_bo_desc {
+   struct pb_desc base;
+
+   enum radeon_bo_domain initial_domain;
+   unsigned flags;
+};
+
+struct amdgpu_winsys_bo {
+   struct pb_buffer base;
+
+   struct amdgpu_winsys *rws;
+   void *user_ptr; /* from buffer_from_ptr */
+
+   amdgpu_bo_handle bo;
+   uint32_t unique_id;
+   amdgpu_va_handle va_handle;
+   uint64_t va;
+   enum radeon_bo_domain initial_domain;
+
+   /* how many command streams is this bo referenced in? */
+   int num_cs_references;
+
+   /* whether buffer_get_handle or buffer_from_handle was called,
+    * it can only transition from false to true
+    */
+   volatile int is_shared; /* bool (int for atomicity) */
+
+   /* Fences for buffer synchronization. */
+   struct pipe_fence_handle *fence[RING_LAST];
+};
+
+struct pb_manager *amdgpu_bomgr_create(struct amdgpu_winsys *rws);
+void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws);
+
+static inline
+void amdgpu_winsys_bo_reference(struct amdgpu_winsys_bo **dst,
+                                struct amdgpu_winsys_bo *src)
+{
+   pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
+}
+
+#endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,704 @@
+/*
+ * Copyright © 2008 Jérôme Glisse
+ * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Marek Olšák <maraeo@gmail.com>
+ */
+
+#include "amdgpu_cs.h"
+#include "os/os_time.h"
+#include <stdio.h>
+#include <amdgpu_drm.h>
+
+
+/* FENCES */
+
+static struct pipe_fence_handle *
+amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
+                    unsigned ip_instance, unsigned ring)
+{
+   struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
+
+   fence->reference.count = 1;
+   fence->ctx = ctx;
+   fence->fence.context = ctx->ctx;
+   fence->fence.ip_type = ip_type;
+   fence->fence.ip_instance = ip_instance;
+   fence->fence.ring = ring;
+   p_atomic_inc(&ctx->refcount);
+   return (struct pipe_fence_handle *)fence;
+}
+
+static void amdgpu_fence_submitted(struct pipe_fence_handle *fence,
+				struct amdgpu_cs_request* request,
+				uint64_t *user_fence_cpu_address)
+{
+   struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
+
+   rfence->fence.fence = request->seq_no;
+   rfence->user_fence_cpu_address = user_fence_cpu_address;
+}
+
+static void amdgpu_fence_signalled(struct pipe_fence_handle *fence)
+{
+   struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
+
+   rfence->signalled = true;
+}
+
+bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
+                       bool absolute)
+{
+   struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
+   uint32_t expired;
+   int64_t abs_timeout;
+   uint64_t *user_fence_cpu;
+   int r;
+
+   if (rfence->signalled)
+      return true;
+
+   if (absolute)
+      abs_timeout = timeout;
+   else
+      abs_timeout = os_time_get_absolute_timeout(timeout);
+
+   user_fence_cpu = rfence->user_fence_cpu_address;
+   if (user_fence_cpu && *user_fence_cpu >= rfence->fence.fence) {
+	rfence->signalled = true;
+	return true;
+   }
+   /* Now use the libdrm query. */
+   r = amdgpu_cs_query_fence_status(&rfence->fence,
+				    abs_timeout,
+				    AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE,
+				    &expired);
+   if (r) {
+      fprintf(stderr, "amdgpu: amdgpu_cs_query_fence_status failed.\n");
+      return FALSE;
+   }
+
+   if (expired) {
+      /* This variable can only transition from false to true, so it doesn't
+       * matter if threads race for it. */
+      rfence->signalled = true;
+      return true;
+   }
+   return false;
+}
+
+static bool amdgpu_fence_wait_rel_timeout(struct radeon_winsys *rws,
+                                          struct pipe_fence_handle *fence,
+                                          uint64_t timeout)
+{
+   return amdgpu_fence_wait(fence, timeout, false);
+}
+
+/* CONTEXTS */
+
+static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws)
+{
+   struct amdgpu_ctx *ctx = CALLOC_STRUCT(amdgpu_ctx);
+   int r;
+   struct amdgpu_bo_alloc_request alloc_buffer = {};
+   amdgpu_bo_handle buf_handle;
+
+   ctx->ws = amdgpu_winsys(ws);
+   ctx->refcount = 1;
+
+   r = amdgpu_cs_ctx_create(ctx->ws->dev, &ctx->ctx);
+   if (r) {
+      fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", r);
+      FREE(ctx);
+      return NULL;
+   }
+
+   alloc_buffer.alloc_size = 4 * 1024;
+   alloc_buffer.phys_alignment = 4 *1024;
+   alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT;
+
+   r = amdgpu_bo_alloc(ctx->ws->dev, &alloc_buffer, &buf_handle);
+   if (r) {
+      fprintf(stderr, "amdgpu: amdgpu_bo_alloc failed. (%i)\n", r);
+      amdgpu_cs_ctx_free(ctx->ctx);
+      FREE(ctx);
+      return NULL;
+   }
+
+   r = amdgpu_bo_cpu_map(buf_handle, (void**)&ctx->user_fence_cpu_address_base);
+   if (r) {
+      fprintf(stderr, "amdgpu: amdgpu_bo_cpu_map failed. (%i)\n", r);
+      amdgpu_bo_free(buf_handle);
+      amdgpu_cs_ctx_free(ctx->ctx);
+      FREE(ctx);
+      return NULL;
+   }
+
+   memset(ctx->user_fence_cpu_address_base, 0, alloc_buffer.alloc_size);
+   ctx->user_fence_bo = buf_handle;
+
+   return (struct radeon_winsys_ctx*)ctx;
+}
+
+static void amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
+{
+   amdgpu_ctx_unref((struct amdgpu_ctx*)rwctx);
+}
+
+static enum pipe_reset_status
+amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx)
+{
+   struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx;
+   uint32_t result, hangs;
+   int r;
+
+   r = amdgpu_cs_query_reset_state(ctx->ctx, &result, &hangs);
+   if (r) {
+      fprintf(stderr, "amdgpu: amdgpu_cs_query_reset_state failed. (%i)\n", r);
+      return PIPE_NO_RESET;
+   }
+
+   switch (result) {
+   case AMDGPU_CTX_GUILTY_RESET:
+      return PIPE_GUILTY_CONTEXT_RESET;
+   case AMDGPU_CTX_INNOCENT_RESET:
+      return PIPE_INNOCENT_CONTEXT_RESET;
+   case AMDGPU_CTX_UNKNOWN_RESET:
+      return PIPE_UNKNOWN_CONTEXT_RESET;
+   case AMDGPU_CTX_NO_RESET:
+   default:
+      return PIPE_NO_RESET;
+   }
+}
+
+/* COMMAND SUBMISSION */
+
+static bool amdgpu_get_new_ib(struct amdgpu_cs *cs)
+{
+   /* The maximum size is 4MB - 1B, which is unaligned.
+    * Use aligned size 4MB - 16B. */
+   const unsigned max_ib_size = (1024 * 1024 - 16) * 4;
+   const unsigned min_ib_size = 24 * 1024 * 4;
+
+   cs->base.cdw = 0;
+   cs->base.buf = NULL;
+
+   /* Allocate a new buffer for IBs if the current buffer is all used. */
+   if (!cs->big_ib_buffer ||
+       cs->used_ib_space + min_ib_size > cs->big_ib_buffer->size) {
+      struct radeon_winsys *ws = &cs->ctx->ws->base;
+      struct radeon_winsys_cs_handle *winsys_bo;
+
+      pb_reference(&cs->big_ib_buffer, NULL);
+      cs->big_ib_winsys_buffer = NULL;
+      cs->ib_mapped = NULL;
+      cs->used_ib_space = 0;
+
+      cs->big_ib_buffer = ws->buffer_create(ws, max_ib_size,
+                                            4096, true,
+                                            RADEON_DOMAIN_GTT,
+                                            RADEON_FLAG_CPU_ACCESS);
+      if (!cs->big_ib_buffer)
+         return false;
+
+      winsys_bo = ws->buffer_get_cs_handle(cs->big_ib_buffer);
+
+      cs->ib_mapped = ws->buffer_map(winsys_bo, NULL, PIPE_TRANSFER_WRITE);
+      if (!cs->ib_mapped) {
+         pb_reference(&cs->big_ib_buffer, NULL);
+         return false;
+      }
+
+      cs->big_ib_winsys_buffer = (struct amdgpu_winsys_bo*)winsys_bo;
+   }
+
+   cs->ib.ib_mc_address = cs->big_ib_winsys_buffer->va + cs->used_ib_space;
+   cs->base.buf = (uint32_t*)(cs->ib_mapped + cs->used_ib_space);
+   cs->base.max_dw = (cs->big_ib_buffer->size - cs->used_ib_space) / 4;
+   return true;
+}
+
+static boolean amdgpu_init_cs_context(struct amdgpu_cs *cs,
+                                      enum ring_type ring_type)
+{
+   int i;
+
+   switch (ring_type) {
+   case RING_DMA:
+      cs->request.ip_type = AMDGPU_HW_IP_DMA;
+      break;
+
+   case RING_UVD:
+      cs->request.ip_type = AMDGPU_HW_IP_UVD;
+      break;
+
+   case RING_VCE:
+      cs->request.ip_type = AMDGPU_HW_IP_VCE;
+      break;
+
+   case RING_COMPUTE:
+      cs->request.ip_type = AMDGPU_HW_IP_COMPUTE;
+      break;
+
+   default:
+   case RING_GFX:
+      cs->request.ip_type = AMDGPU_HW_IP_GFX;
+      break;
+   }
+
+   cs->request.number_of_ibs = 1;
+   cs->request.ibs = &cs->ib;
+
+   cs->max_num_buffers = 512;
+   cs->buffers = (struct amdgpu_cs_buffer*)
+                  CALLOC(1, cs->max_num_buffers * sizeof(struct amdgpu_cs_buffer));
+   if (!cs->buffers) {
+      return FALSE;
+   }
+
+   cs->handles = CALLOC(1, cs->max_num_buffers * sizeof(amdgpu_bo_handle));
+   if (!cs->handles) {
+      FREE(cs->buffers);
+      return FALSE;
+   }
+
+   cs->flags = CALLOC(1, cs->max_num_buffers);
+   if (!cs->flags) {
+      FREE(cs->handles);
+      FREE(cs->buffers);
+      return FALSE;
+   }
+
+   for (i = 0; i < Elements(cs->buffer_indices_hashlist); i++) {
+      cs->buffer_indices_hashlist[i] = -1;
+   }
+   return TRUE;
+}
+
+static void amdgpu_cs_context_cleanup(struct amdgpu_cs *cs)
+{
+   unsigned i;
+
+   for (i = 0; i < cs->num_buffers; i++) {
+      p_atomic_dec(&cs->buffers[i].bo->num_cs_references);
+      amdgpu_winsys_bo_reference(&cs->buffers[i].bo, NULL);
+      cs->handles[i] = NULL;
+      cs->flags[i] = 0;
+   }
+
+   cs->num_buffers = 0;
+   cs->used_gart = 0;
+   cs->used_vram = 0;
+
+   for (i = 0; i < Elements(cs->buffer_indices_hashlist); i++) {
+      cs->buffer_indices_hashlist[i] = -1;
+   }
+}
+
+static void amdgpu_destroy_cs_context(struct amdgpu_cs *cs)
+{
+   amdgpu_cs_context_cleanup(cs);
+   FREE(cs->flags);
+   FREE(cs->buffers);
+   FREE(cs->handles);
+   FREE(cs->request.dependencies);
+}
+
+
+static struct radeon_winsys_cs *
+amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
+                 enum ring_type ring_type,
+                 void (*flush)(void *ctx, unsigned flags,
+                               struct pipe_fence_handle **fence),
+                 void *flush_ctx,
+                 struct radeon_winsys_cs_handle *trace_buf)
+{
+   struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx;
+   struct amdgpu_cs *cs;
+
+   cs = CALLOC_STRUCT(amdgpu_cs);
+   if (!cs) {
+      return NULL;
+   }
+
+   cs->ctx = ctx;
+   cs->flush_cs = flush;
+   cs->flush_data = flush_ctx;
+   cs->base.ring_type = ring_type;
+
+   if (!amdgpu_init_cs_context(cs, ring_type)) {
+      FREE(cs);
+      return NULL;
+   }
+
+   if (!amdgpu_get_new_ib(cs)) {
+      amdgpu_destroy_cs_context(cs);
+      FREE(cs);
+      return NULL;
+   }
+
+   p_atomic_inc(&ctx->ws->num_cs);
+   return &cs->base;
+}
+
+#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
+
+int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
+{
+   unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
+   int i = cs->buffer_indices_hashlist[hash];
+
+   /* not found or found */
+   if (i == -1 || cs->buffers[i].bo == bo)
+      return i;
+
+   /* Hash collision, look for the BO in the list of relocs linearly. */
+   for (i = cs->num_buffers - 1; i >= 0; i--) {
+      if (cs->buffers[i].bo == bo) {
+         /* Put this reloc in the hash list.
+          * This will prevent additional hash collisions if there are
+          * several consecutive get_reloc calls for the same buffer.
+          *
+          * Example: Assuming buffers A,B,C collide in the hash list,
+          * the following sequence of relocs:
+          *         AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
+          * will collide here: ^ and here:   ^,
+          * meaning that we should get very few collisions in the end. */
+         cs->buffer_indices_hashlist[hash] = i;
+         return i;
+      }
+   }
+   return -1;
+}
+
+static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
+                                 struct amdgpu_winsys_bo *bo,
+                                 enum radeon_bo_usage usage,
+                                 enum radeon_bo_domain domains,
+                                 unsigned priority,
+                                 enum radeon_bo_domain *added_domains)
+{
+   struct amdgpu_cs_buffer *reloc;
+   unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
+   int i = -1;
+
+   priority = MIN2(priority, 15);
+   *added_domains = 0;
+
+   i = amdgpu_get_reloc(cs, bo);
+
+   if (i >= 0) {
+      reloc = &cs->buffers[i];
+      reloc->usage |= usage;
+      *added_domains = domains & ~reloc->domains;
+      reloc->domains |= domains;
+      cs->flags[i] = MAX2(cs->flags[i], priority);
+      return i;
+   }
+
+   /* New relocation, check if the backing array is large enough. */
+   if (cs->num_buffers >= cs->max_num_buffers) {
+      uint32_t size;
+      cs->max_num_buffers += 10;
+
+      size = cs->max_num_buffers * sizeof(struct amdgpu_cs_buffer);
+      cs->buffers = realloc(cs->buffers, size);
+
+      size = cs->max_num_buffers * sizeof(amdgpu_bo_handle);
+      cs->handles = realloc(cs->handles, size);
+
+      cs->flags = realloc(cs->flags, cs->max_num_buffers);
+   }
+
+   /* Initialize the new relocation. */
+   cs->buffers[cs->num_buffers].bo = NULL;
+   amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo);
+   cs->handles[cs->num_buffers] = bo->bo;
+   cs->flags[cs->num_buffers] = priority;
+   p_atomic_inc(&bo->num_cs_references);
+   reloc = &cs->buffers[cs->num_buffers];
+   reloc->bo = bo;
+   reloc->usage = usage;
+   reloc->domains = domains;
+
+   cs->buffer_indices_hashlist[hash] = cs->num_buffers;
+
+   *added_domains = domains;
+   return cs->num_buffers++;
+}
+
+static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
+                                    struct radeon_winsys_cs_handle *buf,
+                                    enum radeon_bo_usage usage,
+                                    enum radeon_bo_domain domains,
+                                    enum radeon_bo_priority priority)
+{
+   /* Don't use the "domains" parameter. Amdgpu doesn't support changing
+    * the buffer placement during command submission.
+    */
+   struct amdgpu_cs *cs = amdgpu_cs(rcs);
+   struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
+   enum radeon_bo_domain added_domains;
+   unsigned index = amdgpu_add_reloc(cs, bo, usage, bo->initial_domain,
+                                     priority, &added_domains);
+
+   if (added_domains & RADEON_DOMAIN_GTT)
+      cs->used_gart += bo->base.size;
+   if (added_domains & RADEON_DOMAIN_VRAM)
+      cs->used_vram += bo->base.size;
+
+   return index;
+}
+
+static int amdgpu_cs_get_reloc(struct radeon_winsys_cs *rcs,
+                               struct radeon_winsys_cs_handle *buf)
+{
+   struct amdgpu_cs *cs = amdgpu_cs(rcs);
+
+   return amdgpu_get_reloc(cs, (struct amdgpu_winsys_bo*)buf);
+}
+
+static boolean amdgpu_cs_validate(struct radeon_winsys_cs *rcs)
+{
+   return TRUE;
+}
+
+static boolean amdgpu_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
+{
+   struct amdgpu_cs *cs = amdgpu_cs(rcs);
+   boolean status =
+         (cs->used_gart + gtt) < cs->ctx->ws->info.gart_size * 0.7 &&
+         (cs->used_vram + vram) < cs->ctx->ws->info.vram_size * 0.7;
+
+   return status;
+}
+
+static void amdgpu_cs_do_submission(struct amdgpu_cs *cs,
+                                    struct pipe_fence_handle **out_fence)
+{
+   struct amdgpu_winsys *ws = cs->ctx->ws;
+   struct pipe_fence_handle *fence;
+   int i, j, r;
+
+   /* Create a fence. */
+   fence = amdgpu_fence_create(cs->ctx,
+                               cs->request.ip_type,
+                               cs->request.ip_instance,
+                               cs->request.ring);
+   if (out_fence)
+      amdgpu_fence_reference(out_fence, fence);
+
+   cs->request.number_of_dependencies = 0;
+
+   /* Since the kernel driver doesn't synchronize execution between different
+    * rings automatically, we have to add fence dependencies manually. */
+   pipe_mutex_lock(ws->bo_fence_lock);
+   for (i = 0; i < cs->num_buffers; i++) {
+      for (j = 0; j < RING_LAST; j++) {
+         struct amdgpu_cs_fence *dep;
+         unsigned idx;
+
+         struct amdgpu_fence *bo_fence = (void *)cs->buffers[i].bo->fence[j];
+         if (!bo_fence)
+            continue;
+
+         if (bo_fence->ctx == cs->ctx &&
+             bo_fence->fence.ip_type == cs->request.ip_type &&
+             bo_fence->fence.ip_instance == cs->request.ip_instance &&
+             bo_fence->fence.ring == cs->request.ring)
+            continue;
+
+         if (amdgpu_fence_wait((void *)bo_fence, 0, false))
+            continue;
+
+         idx = cs->request.number_of_dependencies++;
+         if (idx >= cs->max_dependencies) {
+            unsigned size;
+
+            cs->max_dependencies = idx + 8;
+            size = cs->max_dependencies * sizeof(struct amdgpu_cs_fence);
+            cs->request.dependencies = realloc(cs->request.dependencies, size);
+         }
+
+         dep = &cs->request.dependencies[idx];
+         memcpy(dep, &bo_fence->fence, sizeof(*dep));
+      }
+   }
+
+   cs->request.fence_info.handle = NULL;
+   if (cs->request.ip_type != AMDGPU_HW_IP_UVD && cs->request.ip_type != AMDGPU_HW_IP_VCE) {
+	cs->request.fence_info.handle = cs->ctx->user_fence_bo;
+	cs->request.fence_info.offset = cs->base.ring_type;
+   }
+
+   r = amdgpu_cs_submit(cs->ctx->ctx, 0, &cs->request, 1);
+   if (r) {
+      if (r == -ENOMEM)
+         fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
+      else
+         fprintf(stderr, "amdgpu: The CS has been rejected, "
+                 "see dmesg for more information.\n");
+
+      amdgpu_fence_signalled(fence);
+   } else {
+      /* Success. */
+      uint64_t *user_fence = NULL;
+      if (cs->request.ip_type != AMDGPU_HW_IP_UVD && cs->request.ip_type != AMDGPU_HW_IP_VCE)
+         user_fence = cs->ctx->user_fence_cpu_address_base +
+                      cs->request.fence_info.offset;
+      amdgpu_fence_submitted(fence, &cs->request, user_fence);
+
+      for (i = 0; i < cs->num_buffers; i++)
+         amdgpu_fence_reference(&cs->buffers[i].bo->fence[cs->base.ring_type],
+                                fence);
+   }
+   pipe_mutex_unlock(ws->bo_fence_lock);
+   amdgpu_fence_reference(&fence, NULL);
+}
+
+static void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs)
+{
+   /* no-op */
+}
+
+DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
+
+static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
+                            unsigned flags,
+                            struct pipe_fence_handle **fence,
+                            uint32_t cs_trace_id)
+{
+   struct amdgpu_cs *cs = amdgpu_cs(rcs);
+   struct amdgpu_winsys *ws = cs->ctx->ws;
+
+   switch (cs->base.ring_type) {
+   case RING_DMA:
+      /* pad DMA ring to 8 DWs */
+      if (ws->info.chip_class <= SI) {
+         while (rcs->cdw & 7)
+            OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
+      } else {
+         while (rcs->cdw & 7)
+            OUT_CS(&cs->base, 0x00000000); /* NOP packet */
+      }
+      break;
+   case RING_GFX:
+      /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
+             * r6xx, requires at least 4 dw alignment to avoid a hw bug.
+             */
+      if (ws->info.chip_class <= SI) {
+         while (rcs->cdw & 7)
+            OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
+      } else {
+         while (rcs->cdw & 7)
+            OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
+      }
+      break;
+   case RING_UVD:
+      while (rcs->cdw & 15)
+         OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
+      break;
+   default:
+      break;
+   }
+
+   if (rcs->cdw > rcs->max_dw) {
+      fprintf(stderr, "amdgpu: command stream overflowed\n");
+   }
+
+   amdgpu_cs_add_reloc(rcs, (void*)cs->big_ib_winsys_buffer,
+		       RADEON_USAGE_READ, 0, RADEON_PRIO_MIN);
+
+   /* If the CS is not empty or overflowed.... */
+   if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
+      int r;
+
+      r = amdgpu_bo_list_create(ws->dev, cs->num_buffers,
+                                cs->handles, cs->flags,
+                                &cs->request.resources);
+
+      if (r) {
+         fprintf(stderr, "amdgpu: resource list creation failed (%d)\n", r);
+         cs->request.resources = NULL;
+	 goto cleanup;
+      }
+
+      cs->ib.size = cs->base.cdw;
+      cs->used_ib_space += cs->base.cdw * 4;
+
+      amdgpu_cs_do_submission(cs, fence);
+
+      /* Cleanup. */
+      if (cs->request.resources)
+         amdgpu_bo_list_destroy(cs->request.resources);
+   }
+
+cleanup:
+   amdgpu_cs_context_cleanup(cs);
+   amdgpu_get_new_ib(cs);
+
+   ws->num_cs_flushes++;
+}
+
+static void amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
+{
+   struct amdgpu_cs *cs = amdgpu_cs(rcs);
+
+   amdgpu_destroy_cs_context(cs);
+   p_atomic_dec(&cs->ctx->ws->num_cs);
+   pb_reference(&cs->big_ib_buffer, NULL);
+   FREE(cs);
+}
+
+static boolean amdgpu_bo_is_referenced(struct radeon_winsys_cs *rcs,
+                                       struct radeon_winsys_cs_handle *_buf,
+                                       enum radeon_bo_usage usage)
+{
+   struct amdgpu_cs *cs = amdgpu_cs(rcs);
+   struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)_buf;
+
+   return amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, usage);
+}
+
+void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
+{
+   ws->base.ctx_create = amdgpu_ctx_create;
+   ws->base.ctx_destroy = amdgpu_ctx_destroy;
+   ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
+   ws->base.cs_create = amdgpu_cs_create;
+   ws->base.cs_destroy = amdgpu_cs_destroy;
+   ws->base.cs_add_reloc = amdgpu_cs_add_reloc;
+   ws->base.cs_get_reloc = amdgpu_cs_get_reloc;
+   ws->base.cs_validate = amdgpu_cs_validate;
+   ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit;
+   ws->base.cs_flush = amdgpu_cs_flush;
+   ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced;
+   ws->base.cs_sync_flush = amdgpu_cs_sync_flush;
+   ws->base.fence_wait = amdgpu_fence_wait_rel_timeout;
+   ws->base.fence_reference = amdgpu_fence_reference;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,162 @@
+/*
+ * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Marek Olšák <maraeo@gmail.com>
+ */
+
+#ifndef AMDGPU_CS_H
+#define AMDGPU_CS_H
+
+#include "amdgpu_bo.h"
+#include "util/u_memory.h"
+
+struct amdgpu_ctx {
+   struct amdgpu_winsys *ws;
+   amdgpu_context_handle ctx;
+   amdgpu_bo_handle user_fence_bo;
+   uint64_t *user_fence_cpu_address_base;
+   int refcount;
+};
+
+struct amdgpu_cs_buffer {
+   struct amdgpu_winsys_bo *bo;
+   enum radeon_bo_usage usage;
+   enum radeon_bo_domain domains;
+};
+
+
+struct amdgpu_cs {
+   struct radeon_winsys_cs base;
+   struct amdgpu_ctx *ctx;
+
+   /* Flush CS. */
+   void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
+   void *flush_data;
+
+   /* A buffer out of which new IBs are allocated. */
+   struct pb_buffer *big_ib_buffer; /* for holding the reference */
+   struct amdgpu_winsys_bo *big_ib_winsys_buffer;
+   uint8_t *ib_mapped;
+   unsigned used_ib_space;
+
+   /* amdgpu_cs_submit parameters */
+   struct amdgpu_cs_request    request;
+   struct amdgpu_cs_ib_info    ib;
+
+   /* Relocs. */
+   unsigned                    max_num_buffers;
+   unsigned                    num_buffers;
+   amdgpu_bo_handle            *handles;
+   uint8_t                     *flags;
+   struct amdgpu_cs_buffer     *buffers;
+
+   int                         buffer_indices_hashlist[512];
+
+   uint64_t                    used_vram;
+   uint64_t                    used_gart;
+
+   unsigned                    max_dependencies;
+};
+
+struct amdgpu_fence {
+   struct pipe_reference reference;
+
+   struct amdgpu_ctx *ctx;  /* submission context */
+   struct amdgpu_cs_fence fence;
+   uint64_t *user_fence_cpu_address;
+
+   volatile int signalled;              /* bool (int for atomicity) */
+};
+
+static inline void amdgpu_ctx_unref(struct amdgpu_ctx *ctx)
+{
+   if (p_atomic_dec_zero(&ctx->refcount)) {
+      amdgpu_cs_ctx_free(ctx->ctx);
+      amdgpu_bo_free(ctx->user_fence_bo);
+      FREE(ctx);
+   }
+}
+
+static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
+                                          struct pipe_fence_handle *src)
+{
+   struct amdgpu_fence **rdst = (struct amdgpu_fence **)dst;
+   struct amdgpu_fence *rsrc = (struct amdgpu_fence *)src;
+
+   if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
+      amdgpu_ctx_unref((*rdst)->ctx);
+      FREE(*rdst);
+   }
+   *rdst = rsrc;
+}
+
+int amdgpu_get_reloc(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo);
+
+static inline struct amdgpu_cs *
+amdgpu_cs(struct radeon_winsys_cs *base)
+{
+   return (struct amdgpu_cs*)base;
+}
+
+static inline boolean
+amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
+                              struct amdgpu_winsys_bo *bo)
+{
+   int num_refs = bo->num_cs_references;
+   return num_refs == bo->rws->num_cs ||
+         (num_refs && amdgpu_get_reloc(cs, bo) != -1);
+}
+
+static inline boolean
+amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
+                                         struct amdgpu_winsys_bo *bo,
+                                         enum radeon_bo_usage usage)
+{
+   int index;
+
+   if (!bo->num_cs_references)
+      return FALSE;
+
+   index = amdgpu_get_reloc(cs, bo);
+   if (index == -1)
+      return FALSE;
+
+   return (cs->buffers[index].usage & usage) != 0;
+}
+
+static inline boolean
+amdgpu_bo_is_referenced_by_any_cs(struct amdgpu_winsys_bo *bo)
+{
+   return bo->num_cs_references != 0;
+}
+
+bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
+                       bool absolute);
+void amdgpu_cs_init_functions(struct amdgpu_winsys *ws);
+
+#endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_id.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_id.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_id.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,161 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+ * This file is included by addrlib. It adds GPU family definitions and
+ * macros compatible with addrlib.
+ */
+
+#ifndef AMDGPU_ID_H
+#define AMDGPU_ID_H
+
+#include "pipe/p_config.h"
+
+#if defined(PIPE_ARCH_LITTLE_ENDIAN)
+#define LITTLEENDIAN_CPU
+#elif defined(PIPE_ARCH_BIG_ENDIAN)
+#define BIGENDIAN_CPU
+#endif
+
+enum {
+	FAMILY_UNKNOWN,
+	FAMILY_SI,
+	FAMILY_CI,
+	FAMILY_KV,
+	FAMILY_VI,
+	FAMILY_CZ,
+	FAMILY_PI,
+	FAMILY_LAST,
+};
+
+/* SI specific rev IDs */
+enum {
+	SI_TAHITI_P_A11      = 1,
+	SI_TAHITI_P_A0       = SI_TAHITI_P_A11,      /*A0 is alias of A11*/
+	SI_TAHITI_P_A21      = 5,
+	SI_TAHITI_P_B0       = SI_TAHITI_P_A21,      /*B0 is alias of A21*/
+	SI_TAHITI_P_A22      = 6,
+	SI_TAHITI_P_B1       = SI_TAHITI_P_A22,      /*B1 is alias of A22*/
+
+	SI_PITCAIRN_PM_A11   = 20,
+	SI_PITCAIRN_PM_A0    = SI_PITCAIRN_PM_A11,   /*A0 is alias of A11*/
+	SI_PITCAIRN_PM_A12   = 21,
+	SI_PITCAIRN_PM_A1    = SI_PITCAIRN_PM_A12,   /*A1 is alias of A12*/
+
+	SI_CAPEVERDE_M_A11   = 40,
+	SI_CAPEVERDE_M_A0    = SI_CAPEVERDE_M_A11,   /*A0 is alias of A11*/
+	SI_CAPEVERDE_M_A12   = 41,
+	SI_CAPEVERDE_M_A1    = SI_CAPEVERDE_M_A12,   /*A1 is alias of A12*/
+
+	SI_OLAND_M_A0        = 60,
+
+	SI_HAINAN_V_A0       = 70,
+
+	SI_UNKNOWN           = 0xFF
+};
+
+
+#define ASICREV_IS_TAHITI_P(eChipRev)	\
+	(eChipRev < SI_PITCAIRN_PM_A11)
+#define ASICREV_IS_PITCAIRN_PM(eChipRev)	\
+	((eChipRev >= SI_PITCAIRN_PM_A11) && (eChipRev < SI_CAPEVERDE_M_A11))
+#define ASICREV_IS_CAPEVERDE_M(eChipRev)	\
+	((eChipRev >= SI_CAPEVERDE_M_A11) && (eChipRev < SI_OLAND_M_A0))
+#define ASICREV_IS_OLAND_M(eChipRev)	\
+	((eChipRev >= SI_OLAND_M_A0) && (eChipRev < SI_HAINAN_V_A0))
+#define ASICREV_IS_HAINAN_V(eChipRev)	\
+(eChipRev >= SI_HAINAN_V_A0)
+
+/* CI specific revIDs */
+enum {
+	CI_BONAIRE_M_A0 = 20,
+	CI_BONAIRE_M_A1 = 21,
+
+	CI_HAWAII_P_A0  = 40,
+
+	CI_UNKNOWN      = 0xFF
+};
+
+#define ASICREV_IS_BONAIRE_M(eChipRev)	\
+	((eChipRev >= CI_BONAIRE_M_A0) && (eChipRev < CI_HAWAII_P_A0))
+#define ASICREV_IS_HAWAII_P(eChipRev)	\
+	(eChipRev >= CI_HAWAII_P_A0)
+
+/* KV specific rev IDs */
+enum {
+	KV_SPECTRE_A0      = 0x01,       /* KV1 with Spectre GFX core, 8-8-1-2 (CU-Pix-Primitive-RB) */
+	KV_SPOOKY_A0       = 0x41,       /* KV2 with Spooky GFX core, including downgraded from Spectre core, 3-4-1-1 (CU-Pix-Primitive-RB) */
+	KB_KALINDI_A0      = 0x81,       /* KB with Kalindi GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
+	KB_KALINDI_A1      = 0x82,       /* KB with Kalindi GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
+	BV_KALINDI_A2      = 0x85,       /* BV with Kalindi GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
+	ML_GODAVARI_A0     = 0xa1,      /* ML with Godavari GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
+	ML_GODAVARI_A1     = 0xa2,      /* ML with Godavari GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
+	KV_UNKNOWN = 0xFF
+};
+
+#define ASICREV_IS_SPECTRE(eChipRev)	\
+	((eChipRev >= KV_SPECTRE_A0) && (eChipRev < KV_SPOOKY_A0))         /* identify all versions of SPRECTRE and supported features set */
+#define ASICREV_IS_SPOOKY(eChipRev)	\
+	((eChipRev >= KV_SPOOKY_A0) && (eChipRev < KB_KALINDI_A0))          /* identify all versions of SPOOKY and supported features set */
+#define ASICREV_IS_KALINDI(eChipRev)	\
+	((eChipRev >= KB_KALINDI_A0) && (eChipRev < KV_UNKNOWN))           /* identify all versions of KALINDI and supported features set */
+
+/* Following macros are subset of ASICREV_IS_KALINDI macro */
+#define ASICREV_IS_KALINDI_BHAVANI(eChipRev)	\
+	((eChipRev >= BV_KALINDI_A2) && (eChipRev < ML_GODAVARI_A0))   /* identify all versions of BHAVANI and supported features set */
+#define ASICREV_IS_KALINDI_GODAVARI(eChipRev)	\
+	((eChipRev >= ML_GODAVARI_A0) && (eChipRev < KV_UNKNOWN)) /* identify all versions of GODAVARI and supported features set */
+
+/* VI specific rev IDs */
+enum {
+	VI_ICELAND_M_A0   = 1,
+
+	VI_TONGA_P_A0     = 20,
+	VI_TONGA_P_A1     = 21,
+
+	VI_FIJI_P_A0      = 60,
+
+	VI_UNKNOWN        = 0xFF
+};
+
+
+#define ASICREV_IS_ICELAND_M(eChipRev)	\
+	(eChipRev < VI_TONGA_P_A0)
+#define ASICREV_IS_TONGA_P(eChipRev)	\
+	((eChipRev >= VI_TONGA_P_A0) && (eChipRev < VI_FIJI_P_A0))
+#define ASICREV_IS_FIJI_P(eChipRev)	\
+	(eChipRev >= VI_FIJI_P_A0)
+
+/* CZ specific rev IDs */
+enum {
+	CZ_CARRIZO_A0      = 0x01,
+	CZ_UNKNOWN      = 0xFF
+};
+
+#define ASICREV_IS_CARRIZO(eChipRev) \
+	(eChipRev >= CARRIZO_A0)
+
+#endif /* AMDGPU_ID_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_public.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_public.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_public.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_public.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+#ifndef AMDGPU_PUBLIC_H
+#define AMDGPU_PUBLIC_H
+
+#include "pipe/p_defines.h"
+
+struct radeon_winsys;
+struct pipe_screen;
+
+typedef struct pipe_screen *(*radeon_screen_create_t)(struct radeon_winsys *);
+
+struct radeon_winsys *
+amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create);
+
+#endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,438 @@
+/*
+ * Copyright © 2011 Red Hat All Rights Reserved.
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/* Contact:
+ *     Marek Olšák <maraeo@gmail.com>
+ */
+
+#include "amdgpu_winsys.h"
+
+#ifndef NO_ENTRIES
+#define NO_ENTRIES 32
+#endif
+
+#ifndef NO_MACRO_ENTRIES
+#define NO_MACRO_ENTRIES 16
+#endif
+
+#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
+#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
+#endif
+
+
+static int amdgpu_surface_sanity(const struct radeon_surf *surf)
+{
+   unsigned type = RADEON_SURF_GET(surf->flags, TYPE);
+
+   if (!(surf->flags & RADEON_SURF_HAS_TILE_MODE_INDEX))
+      return -EINVAL;
+
+   /* all dimension must be at least 1 ! */
+   if (!surf->npix_x || !surf->npix_y || !surf->npix_z ||
+       !surf->array_size)
+      return -EINVAL;
+
+   if (!surf->blk_w || !surf->blk_h || !surf->blk_d)
+      return -EINVAL;
+
+   switch (surf->nsamples) {
+   case 1:
+   case 2:
+   case 4:
+   case 8:
+      break;
+   default:
+      return -EINVAL;
+   }
+
+   switch (type) {
+   case RADEON_SURF_TYPE_1D:
+      if (surf->npix_y > 1)
+         return -EINVAL;
+      /* fall through */
+   case RADEON_SURF_TYPE_2D:
+   case RADEON_SURF_TYPE_CUBEMAP:
+      if (surf->npix_z > 1 || surf->array_size > 1)
+         return -EINVAL;
+      break;
+   case RADEON_SURF_TYPE_3D:
+      if (surf->array_size > 1)
+         return -EINVAL;
+      break;
+   case RADEON_SURF_TYPE_1D_ARRAY:
+      if (surf->npix_y > 1)
+         return -EINVAL;
+      /* fall through */
+   case RADEON_SURF_TYPE_2D_ARRAY:
+      if (surf->npix_z > 1)
+         return -EINVAL;
+      break;
+   default:
+      return -EINVAL;
+   }
+   return 0;
+}
+
+static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
+{
+   return malloc(pInput->sizeInBytes);
+}
+
+static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT * pInput)
+{
+   free(pInput->pVirtAddr);
+   return ADDR_OK;
+}
+
+/**
+ * This returns the number of banks for the surface.
+ * Possible values: 2, 4, 8, 16.
+ */
+static uint32_t cik_num_banks(struct amdgpu_winsys *ws,
+                              struct radeon_surf *surf)
+{
+   unsigned index, tileb;
+
+   tileb = 8 * 8 * surf->bpe;
+   tileb = MIN2(surf->tile_split, tileb);
+
+   for (index = 0; tileb > 64; index++) {
+      tileb >>= 1;
+   }
+   assert(index < 16);
+
+   return 2 << ((ws->amdinfo.gb_macro_tile_mode[index] >> 6) & 0x3);
+}
+
+ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
+{
+   ADDR_CREATE_INPUT addrCreateInput = {0};
+   ADDR_CREATE_OUTPUT addrCreateOutput = {0};
+   ADDR_REGISTER_VALUE regValue = {0};
+   ADDR_CREATE_FLAGS createFlags = {{0}};
+   ADDR_E_RETURNCODE addrRet;
+
+   addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
+   addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
+
+   regValue.noOfBanks = ws->amdinfo.mc_arb_ramcfg & 0x3;
+   regValue.gbAddrConfig = ws->amdinfo.gb_addr_cfg;
+   regValue.noOfRanks = (ws->amdinfo.mc_arb_ramcfg & 0x4) >> 2;
+
+   regValue.backendDisables = ws->amdinfo.backend_disable[0];
+   regValue.pTileConfig = ws->amdinfo.gb_tile_mode;
+   regValue.noOfEntries = sizeof(ws->amdinfo.gb_tile_mode) /
+                          sizeof(ws->amdinfo.gb_tile_mode[0]);
+   regValue.pMacroTileConfig = ws->amdinfo.gb_macro_tile_mode;
+   regValue.noOfMacroEntries = sizeof(ws->amdinfo.gb_macro_tile_mode) /
+                               sizeof(ws->amdinfo.gb_macro_tile_mode[0]);
+
+   createFlags.value = 0;
+   createFlags.useTileIndex = 1;
+   createFlags.degradeBaseLevel = 1;
+
+   addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
+   addrCreateInput.chipFamily = ws->family;
+   addrCreateInput.chipRevision = ws->rev_id;
+   addrCreateInput.createFlags = createFlags;
+   addrCreateInput.callbacks.allocSysMem = allocSysMem;
+   addrCreateInput.callbacks.freeSysMem = freeSysMem;
+   addrCreateInput.callbacks.debugPrint = 0;
+   addrCreateInput.regValue = regValue;
+
+   addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);
+   if (addrRet != ADDR_OK)
+      return NULL;
+
+   return addrCreateOutput.hLib;
+}
+
+static int compute_level(struct amdgpu_winsys *ws,
+                         struct radeon_surf *surf, bool is_stencil,
+                         unsigned level, unsigned type, bool compressed,
+                         ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
+                         ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut)
+{
+   struct radeon_surf_level *surf_level;
+   ADDR_E_RETURNCODE ret;
+
+   AddrSurfInfoIn->mipLevel = level;
+   AddrSurfInfoIn->width = u_minify(surf->npix_x, level);
+   AddrSurfInfoIn->height = u_minify(surf->npix_y, level);
+
+   if (type == RADEON_SURF_TYPE_3D)
+      AddrSurfInfoIn->numSlices = u_minify(surf->npix_z, level);
+   else if (type == RADEON_SURF_TYPE_CUBEMAP)
+      AddrSurfInfoIn->numSlices = 6;
+   else
+      AddrSurfInfoIn->numSlices = surf->array_size;
+
+   if (level > 0) {
+      /* Set the base level pitch. This is needed for calculation
+       * of non-zero levels. */
+      if (is_stencil)
+         AddrSurfInfoIn->basePitch = surf->stencil_level[0].nblk_x;
+      else
+         AddrSurfInfoIn->basePitch = surf->level[0].nblk_x;
+
+      /* Convert blocks to pixels for compressed formats. */
+      if (compressed)
+         AddrSurfInfoIn->basePitch *= surf->blk_w;
+   }
+
+   ret = AddrComputeSurfaceInfo(ws->addrlib,
+                                AddrSurfInfoIn,
+                                AddrSurfInfoOut);
+   if (ret != ADDR_OK) {
+      return ret;
+   }
+
+   surf_level = is_stencil ? &surf->stencil_level[level] : &surf->level[level];
+   surf_level->offset = align(surf->bo_size, AddrSurfInfoOut->baseAlign);
+   surf_level->slice_size = AddrSurfInfoOut->sliceSize;
+   surf_level->pitch_bytes = AddrSurfInfoOut->pitch * (is_stencil ? 1 : surf->bpe);
+   surf_level->npix_x = u_minify(surf->npix_x, level);
+   surf_level->npix_y = u_minify(surf->npix_y, level);
+   surf_level->npix_z = u_minify(surf->npix_z, level);
+   surf_level->nblk_x = AddrSurfInfoOut->pitch;
+   surf_level->nblk_y = AddrSurfInfoOut->height;
+   if (type == RADEON_SURF_TYPE_3D)
+      surf_level->nblk_z = AddrSurfInfoOut->depth;
+   else
+      surf_level->nblk_z = 1;
+
+   switch (AddrSurfInfoOut->tileMode) {
+   case ADDR_TM_LINEAR_GENERAL:
+      surf_level->mode = RADEON_SURF_MODE_LINEAR;
+      break;
+   case ADDR_TM_LINEAR_ALIGNED:
+      surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+      break;
+   case ADDR_TM_1D_TILED_THIN1:
+      surf_level->mode = RADEON_SURF_MODE_1D;
+      break;
+   case ADDR_TM_2D_TILED_THIN1:
+      surf_level->mode = RADEON_SURF_MODE_2D;
+      break;
+   default:
+      assert(0);
+   }
+
+   if (is_stencil)
+      surf->stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;
+   else
+      surf->tiling_index[level] = AddrSurfInfoOut->tileIndex;
+
+   surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize;
+   return 0;
+}
+
+static int amdgpu_surface_init(struct radeon_winsys *rws,
+                               struct radeon_surf *surf)
+{
+   struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
+   unsigned level, mode, type;
+   bool compressed;
+   ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
+   ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
+   ADDR_TILEINFO AddrTileInfoIn = {0};
+   ADDR_TILEINFO AddrTileInfoOut = {0};
+   int r;
+
+   r = amdgpu_surface_sanity(surf);
+   if (r)
+      return r;
+
+   AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
+   AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
+   AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
+
+   type = RADEON_SURF_GET(surf->flags, TYPE);
+   mode = RADEON_SURF_GET(surf->flags, MODE);
+   compressed = surf->blk_w == 4 && surf->blk_h == 4;
+
+   /* MSAA and FMASK require 2D tiling. */
+   if (surf->nsamples > 1 ||
+       (surf->flags & RADEON_SURF_FMASK))
+      mode = RADEON_SURF_MODE_2D;
+
+   /* DB doesn't support linear layouts. */
+   if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) &&
+       mode < RADEON_SURF_MODE_1D)
+      mode = RADEON_SURF_MODE_1D;
+
+   /* Set the requested tiling mode. */
+   switch (mode) {
+   case RADEON_SURF_MODE_LINEAR:
+      AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_GENERAL;
+      break;
+   case RADEON_SURF_MODE_LINEAR_ALIGNED:
+      AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;
+      break;
+   case RADEON_SURF_MODE_1D:
+      AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;
+      break;
+   case RADEON_SURF_MODE_2D:
+      AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;
+      break;
+   default:
+      assert(0);
+   }
+
+   /* The format must be set correctly for the allocation of compressed
+    * textures to work. In other cases, setting the bpp is sufficient. */
+   if (compressed) {
+      switch (surf->bpe) {
+      case 8:
+         AddrSurfInfoIn.format = ADDR_FMT_BC1;
+         break;
+      case 16:
+         AddrSurfInfoIn.format = ADDR_FMT_BC3;
+         break;
+      default:
+         assert(0);
+      }
+   }
+   else {
+      AddrSurfInfoIn.bpp = surf->bpe * 8;
+   }
+
+   AddrSurfInfoIn.numSamples = surf->nsamples;
+   AddrSurfInfoIn.tileIndex = -1;
+
+   /* Set the micro tile type. */
+   if (surf->flags & RADEON_SURF_SCANOUT)
+      AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
+   else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
+      AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
+   else
+      AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
+
+   AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
+   AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
+   AddrSurfInfoIn.flags.stencil = (surf->flags & RADEON_SURF_SBUFFER) != 0;
+   AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
+   AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
+   AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
+   AddrSurfInfoIn.flags.degrade4Space = 1;
+
+   /* This disables incorrect calculations (hacks) in addrlib. */
+   AddrSurfInfoIn.flags.noStencil = 1;
+
+   /* Set preferred macrotile parameters. This is usually required
+    * for shared resources. This is for 2D tiling only. */
+   if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 &&
+       surf->bankw && surf->bankh && surf->mtilea && surf->tile_split) {
+      /* If any of these parameters are incorrect, the calculation
+       * will fail. */
+      AddrTileInfoIn.banks = cik_num_banks(ws, surf);
+      AddrTileInfoIn.bankWidth = surf->bankw;
+      AddrTileInfoIn.bankHeight = surf->bankh;
+      AddrTileInfoIn.macroAspectRatio = surf->mtilea;
+      AddrTileInfoIn.tileSplitBytes = surf->tile_split;
+      AddrSurfInfoIn.flags.degrade4Space = 0;
+      AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
+
+      /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
+       * the tile index, because we are expected to know it if
+       * we know the other parameters.
+       *
+       * This is something that can easily be fixed in Addrlib.
+       * For now, just figure it out here.
+       * Note that only 2D_TILE_THIN1 is handled here.
+       */
+      assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
+      assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
+
+      if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
+         AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
+      else
+         AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
+   }
+
+   surf->bo_size = 0;
+
+   /* Calculate texture layout information. */
+   for (level = 0; level <= surf->last_level; level++) {
+      r = compute_level(ws, surf, false, level, type, compressed,
+                        &AddrSurfInfoIn, &AddrSurfInfoOut);
+      if (r)
+         return r;
+
+      if (level == 0) {
+         surf->bo_alignment = AddrSurfInfoOut.baseAlign;
+         surf->pipe_config = AddrSurfInfoOut.pTileInfo->pipeConfig - 1;
+
+         /* For 2D modes only. */
+         if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
+            surf->bankw = AddrSurfInfoOut.pTileInfo->bankWidth;
+            surf->bankh = AddrSurfInfoOut.pTileInfo->bankHeight;
+            surf->mtilea = AddrSurfInfoOut.pTileInfo->macroAspectRatio;
+            surf->tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes;
+            surf->num_banks = AddrSurfInfoOut.pTileInfo->banks;
+         }
+      }
+   }
+
+   /* Calculate texture layout information for stencil. */
+   if (surf->flags & RADEON_SURF_SBUFFER) {
+      AddrSurfInfoIn.bpp = 8;
+      /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
+      AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split;
+
+      for (level = 0; level <= surf->last_level; level++) {
+         r = compute_level(ws, surf, true, level, type, compressed,
+                           &AddrSurfInfoIn, &AddrSurfInfoOut);
+         if (r)
+            return r;
+
+         if (level == 0) {
+            surf->stencil_offset = surf->stencil_level[0].offset;
+
+            /* For 2D modes only. */
+            if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
+               surf->stencil_tile_split =
+                     AddrSurfInfoOut.pTileInfo->tileSplitBytes;
+            }
+         }
+      }
+   }
+
+   return 0;
+}
+
+static int amdgpu_surface_best(struct radeon_winsys *rws,
+                               struct radeon_surf *surf)
+{
+   return 0;
+}
+
+void amdgpu_surface_init_functions(struct amdgpu_winsys *ws)
+{
+   ws->base.surface_init = amdgpu_surface_init;
+   ws->base.surface_best = amdgpu_surface_best;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,514 @@
+/*
+ * Copyright © 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright © 2009 Joakim Sindholt <opensource@zhasha.com>
+ * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Marek Olšák <maraeo@gmail.com>
+ */
+
+#include "amdgpu_cs.h"
+#include "amdgpu_public.h"
+
+#include "util/u_hash_table.h"
+#include <amdgpu_drm.h>
+#include <xf86drm.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include "amdgpu_id.h"
+
+#define CIK_TILE_MODE_COLOR_2D			14
+
+#define CIK__GB_TILE_MODE__PIPE_CONFIG(x)        (((x) >> 6) & 0x1f)
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P2               0
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16          4
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16         5
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32         6
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32         7
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16    8
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16    9
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16    10
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16   11
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16   12
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32   13
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32   14
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16   16
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16  17
+
+static struct util_hash_table *dev_tab = NULL;
+pipe_static_mutex(dev_tab_mutex);
+
+static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
+{
+   unsigned mode2d = info->gb_tile_mode[CIK_TILE_MODE_COLOR_2D];
+
+   switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) {
+   case CIK__PIPE_CONFIG__ADDR_SURF_P2:
+   default:
+       return 2;
+   case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32:
+       return 4;
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32:
+       return 8;
+   case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16:
+       return 16;
+   }
+}
+
+/* Convert Sea Islands register values GB_ADDR_CFG and MC_ADDR_CFG
+ * into GB_TILING_CONFIG register which is only present on R600-R700. */
+static unsigned r600_get_gb_tiling_config(struct amdgpu_gpu_info *info)
+{
+   unsigned num_pipes = info->gb_addr_cfg & 0x7;
+   unsigned num_banks = info->mc_arb_ramcfg & 0x3;
+   unsigned pipe_interleave_bytes = (info->gb_addr_cfg >> 4) & 0x7;
+   unsigned row_size = (info->gb_addr_cfg >> 28) & 0x3;
+
+   return num_pipes | (num_banks << 4) |
+         (pipe_interleave_bytes << 8) |
+         (row_size << 12);
+}
+
+/* Helper function to do the ioctls needed for setup and init. */
+static boolean do_winsys_init(struct amdgpu_winsys *ws)
+{
+   struct amdgpu_buffer_size_alignments alignment_info = {};
+   struct amdgpu_heap_info vram, gtt;
+   struct drm_amdgpu_info_hw_ip dma = {}, uvd = {}, vce = {};
+   uint32_t vce_version = 0, vce_feature = 0;
+   int r, i, j;
+
+   /* Query hardware and driver information. */
+   r = amdgpu_query_gpu_info(ws->dev, &ws->amdinfo);
+   if (r) {
+      fprintf(stderr, "amdgpu: amdgpu_query_gpu_info failed.\n");
+      goto fail;
+   }
+
+   r = amdgpu_query_buffer_size_alignment(ws->dev, &alignment_info);
+   if (r) {
+      fprintf(stderr, "amdgpu: amdgpu_query_buffer_size_alignment failed.\n");
+      goto fail;
+   }
+
+   r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram);
+   if (r) {
+      fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n");
+      goto fail;
+   }
+
+   r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &gtt);
+   if (r) {
+      fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
+      goto fail;
+   }
+
+   r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_DMA, 0, &dma);
+   if (r) {
+      fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n");
+      goto fail;
+   }
+
+   r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_UVD, 0, &uvd);
+   if (r) {
+      fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd) failed.\n");
+      goto fail;
+   }
+
+   r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_VCE, 0, &vce);
+   if (r) {
+      fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vce) failed.\n");
+      goto fail;
+   }
+
+   r = amdgpu_query_firmware_version(ws->dev, AMDGPU_INFO_FW_VCE, 0, 0,
+				     &vce_version, &vce_feature);
+   if (r) {
+      fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(vce) failed.\n");
+      goto fail;
+   }
+
+   /* Set chip identification. */
+   ws->info.pci_id = ws->amdinfo.asic_id; /* TODO: is this correct? */
+   ws->info.vce_harvest_config = ws->amdinfo.vce_harvest_config;
+
+   switch (ws->info.pci_id) {
+#define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; break;
+#include "pci_ids/radeonsi_pci_ids.h"
+#undef CHIPSET
+
+   default:
+      fprintf(stderr, "amdgpu: Invalid PCI ID.\n");
+      goto fail;
+   }
+
+   if (ws->info.family >= CHIP_TONGA)
+      ws->info.chip_class = VI;
+   else if (ws->info.family >= CHIP_BONAIRE)
+      ws->info.chip_class = CIK;
+   else {
+      fprintf(stderr, "amdgpu: Unknown family.\n");
+      goto fail;
+   }
+
+   /* LLVM 3.6 is required for VI. */
+   if (ws->info.chip_class >= VI &&
+       (HAVE_LLVM < 0x0306 ||
+        (HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 1))) {
+      fprintf(stderr, "amdgpu: LLVM 3.6.1 is required, got LLVM %i.%i.%i\n",
+              HAVE_LLVM >> 8, HAVE_LLVM & 255, MESA_LLVM_VERSION_PATCH);
+      goto fail;
+   }
+
+   /* family and rev_id are for addrlib */
+   switch (ws->info.family) {
+   case CHIP_BONAIRE:
+      ws->family = FAMILY_CI;
+      ws->rev_id = CI_BONAIRE_M_A0;
+      break;
+   case CHIP_KAVERI:
+      ws->family = FAMILY_KV;
+      ws->rev_id = KV_SPECTRE_A0;
+      break;
+   case CHIP_KABINI:
+      ws->family = FAMILY_KV;
+      ws->rev_id = KB_KALINDI_A0;
+      break;
+   case CHIP_HAWAII:
+      ws->family = FAMILY_CI;
+      ws->rev_id = CI_HAWAII_P_A0;
+      break;
+   case CHIP_MULLINS:
+      ws->family = FAMILY_KV;
+      ws->rev_id = ML_GODAVARI_A0;
+      break;
+   case CHIP_TONGA:
+      ws->family = FAMILY_VI;
+      ws->rev_id = VI_TONGA_P_A0;
+      break;
+   case CHIP_ICELAND:
+      ws->family = FAMILY_VI;
+      ws->rev_id = VI_ICELAND_M_A0;
+      break;
+   case CHIP_CARRIZO:
+      ws->family = FAMILY_CZ;
+      ws->rev_id = CZ_CARRIZO_A0;
+      break;
+   case CHIP_FIJI:
+      ws->family = FAMILY_VI;
+      ws->rev_id = VI_FIJI_P_A0;
+      break;
+   default:
+      fprintf(stderr, "amdgpu: Unknown family.\n");
+      goto fail;
+   }
+
+   ws->addrlib = amdgpu_addr_create(ws);
+   if (!ws->addrlib) {
+      fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
+      goto fail;
+   }
+
+   /* Set hardware information. */
+   ws->info.gart_size = gtt.heap_size;
+   ws->info.vram_size = vram.heap_size;
+   /* convert the shader clock from KHz to MHz */
+   ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000;
+   ws->info.max_se = ws->amdinfo.num_shader_engines;
+   ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
+   ws->info.has_uvd = uvd.available_rings != 0;
+   ws->info.vce_fw_version =
+         vce.available_rings ? vce_version : 0;
+   ws->info.has_userptr = TRUE;
+   ws->info.r600_num_backends = ws->amdinfo.rb_pipes;
+   ws->info.r600_clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
+   ws->info.r600_tiling_config = r600_get_gb_tiling_config(&ws->amdinfo);
+   ws->info.r600_num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo);
+   ws->info.r600_max_pipes = ws->amdinfo.max_quad_shader_pipes; /* TODO: is this correct? */
+   ws->info.r600_virtual_address = TRUE;
+   ws->info.r600_has_dma = dma.available_rings != 0;
+
+   /* Guess what the maximum compute unit number is by looking at the mask
+    * of enabled CUs.
+    */
+   for (i = 0; i < ws->info.max_se; i++)
+      for (j = 0; j < ws->info.max_sh_per_se; j++) {
+         unsigned max = util_last_bit(ws->amdinfo.cu_bitmap[i][j]);
+
+         if (ws->info.max_compute_units < max)
+            ws->info.max_compute_units = max;
+      }
+   ws->info.max_compute_units *= ws->info.max_se * ws->info.max_sh_per_se;
+
+   memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
+          sizeof(ws->amdinfo.gb_tile_mode));
+   ws->info.si_tile_mode_array_valid = TRUE;
+   ws->info.si_backend_enabled_mask = ws->amdinfo.enabled_rb_pipes_mask;
+
+   memcpy(ws->info.cik_macrotile_mode_array, ws->amdinfo.gb_macro_tile_mode,
+          sizeof(ws->amdinfo.gb_macro_tile_mode));
+   ws->info.cik_macrotile_mode_array_valid = TRUE;
+
+   ws->gart_page_size = alignment_info.size_remote;
+
+   return TRUE;
+
+fail:
+   if (ws->addrlib)
+      AddrDestroy(ws->addrlib);
+   amdgpu_device_deinitialize(ws->dev);
+   ws->dev = NULL;
+   return FALSE;
+}
+
+static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
+{
+   struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
+
+   pipe_mutex_destroy(ws->bo_fence_lock);
+
+   ws->cman->destroy(ws->cman);
+   ws->kman->destroy(ws->kman);
+   AddrDestroy(ws->addrlib);
+
+   amdgpu_device_deinitialize(ws->dev);
+   FREE(rws);
+}
+
+static void amdgpu_winsys_query_info(struct radeon_winsys *rws,
+                                     struct radeon_info *info)
+{
+   *info = ((struct amdgpu_winsys *)rws)->info;
+}
+
+static boolean amdgpu_cs_request_feature(struct radeon_winsys_cs *rcs,
+                                         enum radeon_feature_id fid,
+                                         boolean enable)
+{
+   return FALSE;
+}
+
+static uint64_t amdgpu_query_value(struct radeon_winsys *rws,
+                                   enum radeon_value_id value)
+{
+   struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
+   struct amdgpu_heap_info heap;
+   uint64_t retval = 0;
+
+   switch (value) {
+   case RADEON_REQUESTED_VRAM_MEMORY:
+      return ws->allocated_vram;
+   case RADEON_REQUESTED_GTT_MEMORY:
+      return ws->allocated_gtt;
+   case RADEON_BUFFER_WAIT_TIME_NS:
+      return ws->buffer_wait_time;
+   case RADEON_TIMESTAMP:
+      amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
+      return retval;
+   case RADEON_NUM_CS_FLUSHES:
+      return ws->num_cs_flushes;
+   case RADEON_NUM_BYTES_MOVED:
+      amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED, 8, &retval);
+      return retval;
+   case RADEON_VRAM_USAGE:
+      amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &heap);
+      return heap.heap_usage;
+   case RADEON_GTT_USAGE:
+      amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap);
+      return heap.heap_usage;
+   case RADEON_GPU_TEMPERATURE:
+   case RADEON_CURRENT_SCLK:
+   case RADEON_CURRENT_MCLK:
+      return 0;
+   case RADEON_GPU_RESET_COUNTER:
+      assert(0);
+      return 0;
+   }
+   return 0;
+}
+
+static void amdgpu_read_registers(struct radeon_winsys *rws,
+                                  unsigned reg_offset,
+                                  unsigned num_registers, uint32_t *out)
+{
+   struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
+
+   amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers,
+                            0xffffffff, 0, out);
+}
+
+static unsigned hash_dev(void *key)
+{
+#if defined(PIPE_ARCH_X86_64)
+   return pointer_to_intptr(key) ^ (pointer_to_intptr(key) >> 32);
+#else
+   return pointer_to_intptr(key);
+#endif
+}
+
+static int compare_dev(void *key1, void *key2)
+{
+   return key1 != key2;
+}
+
+static bool amdgpu_winsys_unref(struct radeon_winsys *ws)
+{
+   struct amdgpu_winsys *rws = (struct amdgpu_winsys*)ws;
+   bool destroy;
+
+   /* When the reference counter drops to zero, remove the device pointer
+    * from the table.
+    * This must happen while the mutex is locked, so that
+    * amdgpu_winsys_create in another thread doesn't get the winsys
+    * from the table when the counter drops to 0. */
+   pipe_mutex_lock(dev_tab_mutex);
+
+   destroy = pipe_reference(&rws->reference, NULL);
+   if (destroy && dev_tab)
+      util_hash_table_remove(dev_tab, rws->dev);
+
+   pipe_mutex_unlock(dev_tab_mutex);
+   return destroy;
+}
+
+PUBLIC struct radeon_winsys *
+amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
+{
+   struct amdgpu_winsys *ws;
+   drmVersionPtr version = drmGetVersion(fd);
+   amdgpu_device_handle dev;
+   uint32_t drm_major, drm_minor, r;
+
+   /* The DRM driver version of amdgpu is 3.x.x. */
+   if (version->version_major != 3) {
+      drmFreeVersion(version);
+      return NULL;
+   }
+   drmFreeVersion(version);
+
+   /* Look up the winsys from the dev table. */
+   pipe_mutex_lock(dev_tab_mutex);
+   if (!dev_tab)
+      dev_tab = util_hash_table_create(hash_dev, compare_dev);
+
+   /* Initialize the amdgpu device. This should always return the same pointer
+    * for the same fd. */
+   r = amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev);
+   if (r) {
+      pipe_mutex_unlock(dev_tab_mutex);
+      fprintf(stderr, "amdgpu: amdgpu_device_initialize failed.\n");
+      return NULL;
+   }
+
+   /* Lookup a winsys if we have already created one for this device. */
+   ws = util_hash_table_get(dev_tab, dev);
+   if (ws) {
+      pipe_reference(NULL, &ws->reference);
+      pipe_mutex_unlock(dev_tab_mutex);
+      return &ws->base;
+   }
+
+   /* Create a new winsys. */
+   ws = CALLOC_STRUCT(amdgpu_winsys);
+   if (!ws) {
+      pipe_mutex_unlock(dev_tab_mutex);
+      return NULL;
+   }
+
+   ws->dev = dev;
+   ws->info.drm_major = drm_major;
+   ws->info.drm_minor = drm_minor;
+
+   if (!do_winsys_init(ws))
+      goto fail;
+
+   /* Create managers. */
+   ws->kman = amdgpu_bomgr_create(ws);
+   if (!ws->kman)
+      goto fail;
+   ws->cman = pb_cache_manager_create(ws->kman, 500000, 2.0f, 0,
+			(ws->info.vram_size + ws->info.gart_size) / 8);
+   if (!ws->cman)
+      goto fail;
+
+   /* init reference */
+   pipe_reference_init(&ws->reference, 1);
+
+   /* Set functions. */
+   ws->base.unref = amdgpu_winsys_unref;
+   ws->base.destroy = amdgpu_winsys_destroy;
+   ws->base.query_info = amdgpu_winsys_query_info;
+   ws->base.cs_request_feature = amdgpu_cs_request_feature;
+   ws->base.query_value = amdgpu_query_value;
+   ws->base.read_registers = amdgpu_read_registers;
+
+   amdgpu_bomgr_init_functions(ws);
+   amdgpu_cs_init_functions(ws);
+   amdgpu_surface_init_functions(ws);
+
+   pipe_mutex_init(ws->bo_fence_lock);
+
+   /* Create the screen at the end. The winsys must be initialized
+    * completely.
+    *
+    * Alternatively, we could create the screen based on "ws->gen"
+    * and link all drivers into one binary blob. */
+   ws->base.screen = screen_create(&ws->base);
+   if (!ws->base.screen) {
+      amdgpu_winsys_destroy(&ws->base);
+      pipe_mutex_unlock(dev_tab_mutex);
+      return NULL;
+   }
+
+   util_hash_table_set(dev_tab, dev, ws);
+
+   /* We must unlock the mutex once the winsys is fully initialized, so that
+    * other threads attempting to create the winsys from the same fd will
+    * get a fully initialized winsys and not just half-way initialized. */
+   pipe_mutex_unlock(dev_tab_mutex);
+
+   return &ws->base;
+
+fail:
+   pipe_mutex_unlock(dev_tab_mutex);
+   if (ws->cman)
+      ws->cman->destroy(ws->cman);
+   if (ws->kman)
+      ws->kman->destroy(ws->kman);
+   FREE(ws);
+   return NULL;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2009 Corbin Simpson
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Marek Olšák <maraeo@gmail.com>
+ */
+
+#ifndef AMDGPU_WINSYS_H
+#define AMDGPU_WINSYS_H
+
+#include "gallium/drivers/radeon/radeon_winsys.h"
+#include "addrlib/addrinterface.h"
+#include "os/os_thread.h"
+#include <amdgpu.h>
+
+struct amdgpu_cs;
+
+struct amdgpu_winsys {
+   struct radeon_winsys base;
+   struct pipe_reference reference;
+
+   amdgpu_device_handle dev;
+
+   pipe_mutex bo_fence_lock;
+
+   int num_cs; /* The number of command streams created. */
+   uint32_t next_bo_unique_id;
+   uint64_t allocated_vram;
+   uint64_t allocated_gtt;
+   uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
+   uint64_t num_cs_flushes;
+   unsigned gart_page_size;
+
+   struct radeon_info info;
+
+   struct pb_manager *kman;
+   struct pb_manager *cman;
+
+   struct amdgpu_gpu_info amdinfo;
+   ADDR_HANDLE addrlib;
+   uint32_t rev_id;
+   unsigned family;
+};
+
+static inline struct amdgpu_winsys *
+amdgpu_winsys(struct radeon_winsys *base)
+{
+   return (struct amdgpu_winsys*)base;
+}
+
+void amdgpu_surface_init_functions(struct amdgpu_winsys *ws);
+ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws);
+
+#endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/Android.mk	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/Android.mk	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,47 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2011 LunarG Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_CFLAGS := \
+	$(AMDGPU_CFLAGS) \
+	-DBRAHMA_BUILD=1
+
+LOCAL_C_INCLUDES := \
+	$(LOCAL_PATH)/addrlib \
+	$(LOCAL_PATH)/addrlib/core \
+	$(LOCAL_PATH)/addrlib/inc/chip/r800 \
+	$(LOCAL_PATH)/addrlib/r800/chip
+
+LOCAL_SHARED_LIBRARIES := libdrm libdrm_amdgpu
+LOCAL_MODULE := libmesa_winsys_amdgpu
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/Makefile.am	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,17 @@
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+	$(GALLIUM_WINSYS_CFLAGS) \
+	$(AMDGPU_CFLAGS) \
+	-I$(srcdir)/addrlib \
+	-I$(srcdir)/addrlib/core \
+	-I$(srcdir)/addrlib/inc/chip/r800 \
+	-I$(srcdir)/addrlib/r800/chip \
+	-DBRAHMA_BUILD=1
+
+AM_CXXFLAGS = $(AM_CFLAGS)
+
+noinst_LTLIBRARIES = libamdgpuwinsys.la
+
+libamdgpuwinsys_la_SOURCES = $(C_SOURCES)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/amdgpu/drm/Makefile.sources	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/amdgpu/drm/Makefile.sources	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,29 @@
+C_SOURCES := \
+	addrlib/addrinterface.cpp \
+	addrlib/addrinterface.h \
+	addrlib/addrtypes.h \
+	addrlib/core/addrcommon.h \
+	addrlib/core/addrelemlib.cpp \
+	addrlib/core/addrelemlib.h \
+	addrlib/core/addrlib.cpp \
+	addrlib/core/addrlib.h \
+	addrlib/core/addrobject.cpp \
+	addrlib/core/addrobject.h \
+	addrlib/inc/chip/r800/si_gb_reg.h \
+	addrlib/inc/lnx_common_defs.h \
+	addrlib/r800/chip/si_ci_vi_merged_enum.h \
+	addrlib/r800/ciaddrlib.cpp \
+	addrlib/r800/ciaddrlib.h \
+	addrlib/r800/egbaddrlib.cpp \
+	addrlib/r800/egbaddrlib.h \
+	addrlib/r800/siaddrlib.cpp \
+	addrlib/r800/siaddrlib.h \
+	amdgpu_bo.c \
+	amdgpu_bo.h \
+	amdgpu_cs.c \
+	amdgpu_cs.h \
+	amdgpu_id.h \
+	amdgpu_public.h \
+	amdgpu_surface.c \
+	amdgpu_winsys.c \
+	amdgpu_winsys.h
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c	2015-09-16 14:36:09.000000000 +0000
@@ -26,7 +26,7 @@
    drm_intel_bo *bo;
 };
 
-static INLINE struct i915_drm_batchbuffer *
+static inline struct i915_drm_batchbuffer *
 i915_drm_batchbuffer(struct i915_winsys_batchbuffer *batch)
 {
    return (struct i915_drm_batchbuffer *)batch;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/i915/drm/i915_drm_winsys.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/i915/drm/i915_drm_winsys.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/i915/drm/i915_drm_winsys.h	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/i915/drm/i915_drm_winsys.h	2015-09-16 14:36:09.000000000 +0000
@@ -28,7 +28,7 @@
    drm_intel_bufmgr *gem_manager;
 };
 
-static INLINE struct i915_drm_winsys *
+static inline struct i915_drm_winsys *
 i915_drm_winsys(struct i915_winsys *iws)
 {
    return (struct i915_drm_winsys *)iws;
@@ -58,13 +58,13 @@
    unsigned flink;
 };
 
-static INLINE struct i915_drm_buffer *
+static inline struct i915_drm_buffer *
 i915_drm_buffer(struct i915_winsys_buffer *buffer)
 {
    return (struct i915_drm_buffer *)buffer;
 }
 
-static INLINE drm_intel_bo *
+static inline drm_intel_bo *
 intel_bo(struct i915_winsys_buffer *buffer)
 {
    return i915_drm_buffer(buffer)->bo;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c	2015-07-27 05:57:14.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c	2015-09-16 14:36:09.000000000 +0000
@@ -17,7 +17,7 @@
 
 pipe_static_mutex(nouveau_screen_mutex);
 
-boolean nouveau_drm_screen_unref(struct nouveau_screen *screen)
+bool nouveau_drm_screen_unref(struct nouveau_screen *screen)
 {
 	int ret;
 	if (screen->refcount == -1)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/radeon/drm/radeon_drm_bo.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/radeon/drm/radeon_drm_bo.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/radeon/drm/radeon_drm_bo.c	2015-09-16 14:36:09.000000000 +0000
@@ -44,7 +44,7 @@
 
 static const struct pb_vtbl radeon_bo_vtbl;
 
-static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo)
+static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
 {
     assert(bo->vtbl == &radeon_bo_vtbl);
     return (struct radeon_bo *)bo;
@@ -78,7 +78,7 @@
     struct list_head va_holes;
 };
 
-static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
+static inline struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
 {
     return (struct radeon_bomgr *)mgr;
 }
@@ -101,33 +101,30 @@
     return bo;
 }
 
-static void radeon_bo_wait(struct pb_buffer *_buf, enum radeon_bo_usage usage)
+static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
+                           enum radeon_bo_usage usage)
 {
-    struct radeon_bo *bo = get_radeon_bo(_buf);
-    struct drm_radeon_gem_wait_idle args = {0};
-
-    while (p_atomic_read(&bo->num_active_ioctls)) {
-        sched_yield();
-    }
+   struct radeon_bo *bo = get_radeon_bo(_buf);
 
-    args.handle = bo->handle;
-    while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
-                           &args, sizeof(args)) == -EBUSY);
-}
-
-static boolean radeon_bo_is_busy(struct pb_buffer *_buf,
-                                 enum radeon_bo_usage usage)
-{
-    struct radeon_bo *bo = get_radeon_bo(_buf);
-    struct drm_radeon_gem_busy args = {0};
+   /* Wait if any ioctl is being submitted with this buffer. */
+   if (!os_wait_until_zero(&bo->num_active_ioctls, timeout))
+      return false;
+
+   /* TODO: handle arbitrary timeout */
+    if (!timeout) {
+        struct drm_radeon_gem_busy args = {0};
+
+        args.handle = bo->handle;
+        return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
+                                   &args, sizeof(args)) == 0;
+    } else {
+        struct drm_radeon_gem_wait_idle args = {0};
 
-    if (p_atomic_read(&bo->num_active_ioctls)) {
-        return TRUE;
+        args.handle = bo->handle;
+        while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
+                               &args, sizeof(args)) == -EBUSY);
+        return true;
     }
-
-    args.handle = bo->handle;
-    return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
-                               &args, sizeof(args)) != 0;
 }
 
 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
@@ -351,14 +348,11 @@
     if (bo->user_ptr)
         return bo->user_ptr;
 
-    /* Return the pointer if it's already mapped. */
-    if (bo->ptr)
-        return bo->ptr;
-
     /* Map the buffer. */
     pipe_mutex_lock(bo->map_mutex);
-    /* Return the pointer if it's already mapped (in case of a race). */
+    /* Return the pointer if it's already mapped. */
     if (bo->ptr) {
+        bo->map_count++;
         pipe_mutex_unlock(bo->map_mutex);
         return bo->ptr;
     }
@@ -383,6 +377,7 @@
         return NULL;
     }
     bo->ptr = ptr;
+    bo->map_count = 1;
     pipe_mutex_unlock(bo->map_mutex);
 
     return bo->ptr;
@@ -412,8 +407,8 @@
                     return NULL;
                 }
 
-                if (radeon_bo_is_busy((struct pb_buffer*)bo,
-                                      RADEON_USAGE_WRITE)) {
+                if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
+                                    RADEON_USAGE_WRITE)) {
                     return NULL;
                 }
             } else {
@@ -422,8 +417,8 @@
                     return NULL;
                 }
 
-                if (radeon_bo_is_busy((struct pb_buffer*)bo,
-                                      RADEON_USAGE_READWRITE)) {
+                if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
+                                    RADEON_USAGE_READWRITE)) {
                     return NULL;
                 }
             }
@@ -441,7 +436,7 @@
                 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
                     cs->flush_cs(cs->flush_data, 0, NULL);
                 }
-                radeon_bo_wait((struct pb_buffer*)bo,
+                radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
                                RADEON_USAGE_WRITE);
             } else {
                 /* Mapping for write. */
@@ -455,7 +450,8 @@
                     }
                 }
 
-                radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE);
+                radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
+                               RADEON_USAGE_READWRITE);
             }
 
             bo->mgr->rws->buffer_wait_time += os_time_get_nano() - time;
@@ -467,7 +463,26 @@
 
 static void radeon_bo_unmap(struct radeon_winsys_cs_handle *_buf)
 {
-    /* NOP */
+    struct radeon_bo *bo = (struct radeon_bo*)_buf;
+
+    if (bo->user_ptr)
+        return;
+
+    pipe_mutex_lock(bo->map_mutex);
+    if (!bo->ptr) {
+        pipe_mutex_unlock(bo->map_mutex);
+        return; /* it's not been mapped */
+    }
+
+    assert(bo->map_count);
+    if (--bo->map_count) {
+        pipe_mutex_unlock(bo->map_mutex);
+        return; /* it's been mapped multiple times */
+    }
+
+    os_munmap(bo->ptr, bo->base.size);
+    bo->ptr = NULL;
+    pipe_mutex_unlock(bo->map_mutex);
 }
 
 static void radeon_bo_get_base_buffer(struct pb_buffer *buf,
@@ -627,7 +642,7 @@
        return TRUE;
    }
 
-   if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) {
+   if (!radeon_bo_wait((struct pb_buffer*)bo, 0, RADEON_USAGE_READWRITE)) {
        return TRUE;
    }
 
@@ -759,10 +774,11 @@
                                  struct radeon_winsys_cs *rcs,
                                  enum radeon_bo_layout microtiled,
                                  enum radeon_bo_layout macrotiled,
+                                 unsigned pipe_config,
                                  unsigned bankw, unsigned bankh,
                                  unsigned tile_split,
                                  unsigned stencil_tile_split,
-                                 unsigned mtilea,
+                                 unsigned mtilea, unsigned num_banks,
                                  uint32_t pitch,
                                  bool scanout)
 {
@@ -778,9 +794,7 @@
         cs->flush_cs(cs->flush_data, 0, NULL);
     }
 
-    while (p_atomic_read(&bo->num_active_ioctls)) {
-        sched_yield();
-    }
+    os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
 
     if (microtiled == RADEON_LAYOUT_TILED)
         args.tiling_flags |= RADEON_TILING_MICRO;
@@ -840,6 +854,12 @@
     memset(&desc, 0, sizeof(desc));
     desc.base.alignment = alignment;
 
+    /* Align size to page size. This is the minimum alignment for normal
+     * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
+     * like constant/uniform buffers, can benefit from better and more reuse.
+     */
+    size = align(size, 4096);
+
     /* Only set one usage bit each for domains and flags, or the cache manager
      * might consider different sets of domains / flags compatible
      */
@@ -1106,6 +1126,9 @@
 
     memset(&flink, 0, sizeof(flink));
 
+    if ((void*)bo != (void*)buffer)
+       pb_cache_manager_remove_buffer(buffer);
+
     if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
         if (!bo->flink_name) {
             flink.handle = bo->handle;
@@ -1145,7 +1168,6 @@
     ws->base.buffer_map = radeon_bo_map;
     ws->base.buffer_unmap = radeon_bo_unmap;
     ws->base.buffer_wait = radeon_bo_wait;
-    ws->base.buffer_is_busy = radeon_bo_is_busy;
     ws->base.buffer_create = radeon_winsys_bo_create;
     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
     ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/radeon/drm/radeon_drm_bo.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/radeon/drm/radeon_drm_bo.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/radeon/drm/radeon_drm_bo.h	2015-09-16 14:36:09.000000000 +0000
@@ -54,6 +54,7 @@
 
     void *ptr;
     pipe_mutex map_mutex;
+    unsigned map_count;
 
     uint32_t handle;
     uint32_t flink_name;
@@ -71,7 +72,7 @@
 struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws);
 void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws);
 
-static INLINE
+static inline
 void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src)
 {
     pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/radeon/drm/radeon_drm_cs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/radeon/drm/radeon_drm_cs.c	2014-09-10 05:44:12.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/radeon/drm/radeon_drm_cs.c	2015-09-16 14:36:09.000000000 +0000
@@ -80,6 +80,18 @@
 static void radeon_fence_reference(struct pipe_fence_handle **dst,
                                    struct pipe_fence_handle *src);
 
+static struct radeon_winsys_ctx *radeon_drm_ctx_create(struct radeon_winsys *ws)
+{
+    /* No context support here. Just return the winsys pointer
+     * as the "context". */
+    return (struct radeon_winsys_ctx*)ws;
+}
+
+static void radeon_drm_ctx_destroy(struct radeon_winsys_ctx *ctx)
+{
+    /* No context support here. */
+}
+
 static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
                                       struct radeon_drm_winsys *ws)
 {
@@ -152,14 +164,14 @@
 
 
 static struct radeon_winsys_cs *
-radeon_drm_cs_create(struct radeon_winsys *rws,
+radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
                      enum ring_type ring_type,
                      void (*flush)(void *ctx, unsigned flags,
                                    struct pipe_fence_handle **fence),
                      void *flush_ctx,
                      struct radeon_winsys_cs_handle *trace_buf)
 {
-    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
+    struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx;
     struct radeon_drm_cs *cs;
 
     cs = CALLOC_STRUCT(radeon_drm_cs);
@@ -188,6 +200,7 @@
     cs->cst = &cs->csc2;
     cs->base.buf = cs->csc->buf;
     cs->base.ring_type = ring_type;
+    cs->base.max_dw = ARRAY_SIZE(cs->csc->buf);
 
     p_atomic_inc(&ws->num_cs);
     return &cs->base;
@@ -195,7 +208,7 @@
 
 #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
 
-static INLINE void update_reloc(struct drm_radeon_cs_reloc *reloc,
+static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
                                 enum radeon_bo_domain rd,
                                 enum radeon_bo_domain wd,
                                 unsigned priority,
@@ -372,20 +385,29 @@
 static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
 {
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
-    boolean status =
-        (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
-        (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
 
-    return status;
+    vram += cs->csc->used_vram;
+    gtt += cs->csc->used_gart;
+
+    /* Anything that goes above the VRAM size should go to GTT. */
+    if (vram > cs->ws->info.vram_size)
+        gtt += vram - cs->ws->info.vram_size;
+
+    /* Now we just need to check if we have enough GTT. */
+    return gtt < cs->ws->info.gart_size * 0.7;
 }
 
 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
 {
     unsigned i;
+    int r;
 
-    if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
-                            &csc->cs, sizeof(struct drm_radeon_cs))) {
-        if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
+    r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
+                            &csc->cs, sizeof(struct drm_radeon_cs));
+    if (r) {
+	if (r == -ENOMEM)
+	    fprintf(stderr, "radeon: Not enough memory for command submission.\n");
+	else if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
             unsigned i;
 
             fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
@@ -467,7 +489,7 @@
         break;
     }
 
-    if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
+    if (rcs->cdw > rcs->max_dw) {
        fprintf(stderr, "radeon: command stream overflowed\n");
     }
 
@@ -486,7 +508,7 @@
     cs->cst->cs_trace_id = cs_trace_id;
 
     /* If the CS is not empty or overflowed, emit it in a separate thread. */
-    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
+    if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
         unsigned i, crelocs;
 
         crelocs = cs->cst->crelocs;
@@ -522,6 +544,7 @@
 
         default:
         case RING_GFX:
+        case RING_COMPUTE:
             cs->cst->flags[0] = 0;
             cs->cst->flags[1] = RADEON_CS_RING_GFX;
             cs->cst->cs.num_chunks = 2;
@@ -537,7 +560,7 @@
                 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
                 cs->cst->cs.num_chunks = 3;
             }
-            if (flags & RADEON_FLUSH_COMPUTE) {
+            if (cs->base.ring_type == RING_COMPUTE) {
                 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
                 cs->cst->cs.num_chunks = 3;
             }
@@ -625,7 +648,7 @@
     struct pb_buffer *rfence = (struct pb_buffer*)fence;
 
     if (timeout == 0)
-        return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
+        return ws->buffer_wait(rfence, 0, RADEON_USAGE_READWRITE);
 
     if (timeout != PIPE_TIMEOUT_INFINITE) {
         int64_t start_time = os_time_get();
@@ -634,7 +657,7 @@
         timeout /= 1000;
 
         /* Wait in a loop. */
-        while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
+        while (!ws->buffer_wait(rfence, 0, RADEON_USAGE_READWRITE)) {
             if (os_time_get() - start_time >= timeout) {
                 return FALSE;
             }
@@ -643,7 +666,7 @@
         return TRUE;
     }
 
-    ws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
+    ws->buffer_wait(rfence, PIPE_TIMEOUT_INFINITE, RADEON_USAGE_READWRITE);
     return TRUE;
 }
 
@@ -655,6 +678,8 @@
 
 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
 {
+    ws->base.ctx_create = radeon_drm_ctx_create;
+    ws->base.ctx_destroy = radeon_drm_ctx_destroy;
     ws->base.cs_create = radeon_drm_cs_create;
     ws->base.cs_destroy = radeon_drm_cs_destroy;
     ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/radeon/drm/radeon_drm_cs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/radeon/drm/radeon_drm_cs.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/radeon/drm/radeon_drm_cs.h	2015-09-16 14:36:09.000000000 +0000
@@ -30,7 +30,7 @@
 #include "radeon_drm_bo.h"
 
 struct radeon_cs_context {
-    uint32_t                    buf[RADEON_MAX_CMDBUF_DWORDS];
+    uint32_t                    buf[16 * 1024];
 
     int                         fd;
     struct drm_radeon_cs        cs;
@@ -79,13 +79,13 @@
 
 int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo);
 
-static INLINE struct radeon_drm_cs *
+static inline struct radeon_drm_cs *
 radeon_drm_cs(struct radeon_winsys_cs *base)
 {
     return (struct radeon_drm_cs*)base;
 }
 
-static INLINE boolean
+static inline boolean
 radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs,
                               struct radeon_bo *bo)
 {
@@ -94,7 +94,7 @@
            (num_refs && radeon_get_reloc(cs->csc, bo) != -1);
 }
 
-static INLINE boolean
+static inline boolean
 radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs,
                                         struct radeon_bo *bo)
 {
@@ -110,7 +110,7 @@
     return cs->csc->relocs[index].write_domain != 0;
 }
 
-static INLINE boolean
+static inline boolean
 radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo)
 {
     return bo->num_cs_references != 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c	2015-09-16 14:36:09.000000000 +0000
@@ -59,6 +59,10 @@
 
 #define RADEON_INFO_VA_UNMAP_WORKING	0x25
 
+#ifndef RADEON_INFO_GPU_RESET_COUNTER
+#define RADEON_INFO_GPU_RESET_COUNTER   0x26
+#endif
+
 static struct util_hash_table *fd_tab = NULL;
 pipe_static_mutex(fd_tab_mutex);
 
@@ -571,6 +575,10 @@
         radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK,
                              "current-gpu-mclk", (uint32_t*)&retval);
         return retval;
+    case RADEON_GPU_RESET_COUNTER:
+        radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER,
+                             "gpu-reset-counter", (uint32_t*)&retval);
+        return retval;
     }
     return 0;
 }
@@ -714,7 +722,7 @@
     if (!ws->kman)
         goto fail;
 
-    ws->cman = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0,
+    ws->cman = pb_cache_manager_create(ws->kman, 500000, 2.0f, 0,
                                        MIN2(ws->info.vram_size, ws->info.gart_size));
     if (!ws->cman)
         goto fail;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h	2015-09-16 14:36:09.000000000 +0000
@@ -97,7 +97,7 @@
     struct radeon_drm_cs *cs_stack[RING_LAST];
 };
 
-static INLINE struct radeon_drm_winsys *
+static inline struct radeon_drm_winsys *
 radeon_drm_winsys(struct radeon_winsys *base)
 {
     return (struct radeon_drm_winsys*)base;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c	2015-09-16 14:36:09.000000000 +0000
@@ -127,7 +127,7 @@
 };
 
 
-static INLINE struct fenced_manager *
+static inline struct fenced_manager *
 fenced_manager(struct pb_manager *mgr)
 {
    assert(mgr);
@@ -135,7 +135,7 @@
 }
 
 
-static INLINE struct fenced_buffer *
+static inline struct fenced_buffer *
 fenced_buffer(struct pb_buffer *buf)
 {
    assert(buf);
@@ -204,7 +204,7 @@
 }
 
 
-static INLINE void
+static inline void
 fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr,
                              struct fenced_buffer *fenced_buf)
 {
@@ -228,7 +228,7 @@
  *
  * Reference count should be incremented before calling this function.
  */
-static INLINE void
+static inline void
 fenced_buffer_add_locked(struct fenced_manager *fenced_mgr,
                          struct fenced_buffer *fenced_buf)
 {
@@ -252,7 +252,7 @@
  *
  * Returns TRUE if the buffer was detroyed.
  */
-static INLINE boolean
+static inline boolean
 fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr,
                             struct fenced_buffer *fenced_buf)
 {
@@ -289,7 +289,7 @@
  * This function will release and re-acquire the mutex, so any copy of mutable
  * state must be discarded after calling it.
  */
-static INLINE enum pipe_error
+static inline enum pipe_error
 fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr,
                             struct fenced_buffer *fenced_buf)
 {
@@ -430,7 +430,7 @@
  * This function is a shorthand around pb_manager::create_buffer for
  * fenced_buffer_create_gpu_storage_locked()'s benefit.
  */
-static INLINE boolean
+static inline boolean
 fenced_buffer_try_create_gpu_storage_locked(struct fenced_manager *fenced_mgr,
                                             struct fenced_buffer *fenced_buf,
                                             const struct pb_desc *desc)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/SConscript	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/SConscript	2015-09-16 14:36:09.000000000 +0000
@@ -8,7 +8,6 @@
     env.Append(CCFLAGS = ['-fvisibility=hidden'])
     env.Append(CPPDEFINES = [
         'HAVE_STDINT_H', 
-        'HAVE_SYS_TYPES_H',
         '-D_FILE_OFFSET_BITS=64',
     ])
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_buffer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_buffer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_buffer.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_buffer.c	2015-09-16 14:36:09.000000000 +0000
@@ -69,7 +69,7 @@
 extern const struct pb_vtbl vmw_gmr_buffer_vtbl;
 
 
-static INLINE struct vmw_gmr_buffer *
+static inline struct vmw_gmr_buffer *
 vmw_gmr_buffer(struct pb_buffer *buf)
 {
    assert(buf);
@@ -86,7 +86,7 @@
 };
 
 
-static INLINE struct vmw_gmr_bufmgr *
+static inline struct vmw_gmr_bufmgr *
 vmw_gmr_bufmgr(struct pb_manager *mgr)
 {
    assert(mgr);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_buffer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_buffer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_buffer.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_buffer.h	2015-09-16 14:36:09.000000000 +0000
@@ -59,7 +59,7 @@
 vmw_debug_flush_buf(struct svga_winsys_buffer *buffer);
 
 #else
-static INLINE struct pb_buffer *
+static inline struct pb_buffer *
 vmw_pb_buffer(struct svga_winsys_buffer *buffer)
 {
    assert(buffer);
@@ -67,7 +67,7 @@
 }
 
 
-static INLINE struct svga_winsys_buffer *
+static inline struct svga_winsys_buffer *
 vmw_svga_winsys_buffer_wrap(struct pb_buffer *buffer)
 {
    return (struct svga_winsys_buffer *)buffer;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_context.c	2014-09-10 05:44:12.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -152,7 +152,7 @@
 };
 
 
-static INLINE struct vmw_svga_winsys_context *
+static inline struct vmw_svga_winsys_context *
 vmw_svga_winsys_context(struct svga_winsys_context *swc)
 {
    assert(swc);
@@ -160,7 +160,7 @@
 }
 
 
-static INLINE unsigned
+static inline unsigned
 vmw_translate_to_pb_flags(unsigned flags)
 {
    unsigned f = 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_fence.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_fence.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_fence.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_fence.c	2015-09-16 14:36:09.000000000 +0000
@@ -67,7 +67,7 @@
  * @ops: Pointer to a struct pb_fence_ops.
  *
  */
-static INLINE boolean
+static inline boolean
 vmw_fence_seq_is_signaled(uint32_t seq, uint32_t last, uint32_t cur)
 {
    return (cur - last <= cur - seq);
@@ -81,7 +81,7 @@
  * @ops: Pointer to a struct pb_fence_ops.
  *
  */
-static INLINE struct vmw_fence_ops *
+static inline struct vmw_fence_ops *
 vmw_fence_ops(struct pb_fence_ops *ops)
 {
    assert(ops);
@@ -162,7 +162,7 @@
  *
  * @fence: The opaque pipe fence handle.
  */
-static INLINE struct vmw_fence *
+static inline struct vmw_fence *
 vmw_fence(struct pipe_fence_handle *fence)
 {
    return (struct vmw_fence *) fence;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_screen_dri.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_screen_dri.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_screen_dri.c	2014-09-20 14:48:28.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_screen_dri.c	2015-09-16 14:36:09.000000000 +0000
@@ -126,7 +126,7 @@
    return NULL;
 }
 
-static INLINE boolean
+static inline boolean
 vmw_dri1_intersect_src_bbox(struct drm_clip_rect *dst,
 			    int dst_x,
 			    int dst_y,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_screen.h	2014-09-10 05:44:12.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -102,7 +102,7 @@
 };
 
 
-static INLINE struct vmw_winsys_screen *
+static inline struct vmw_winsys_screen *
 vmw_winsys_screen(struct svga_winsys_screen *base)
 {
    return (struct vmw_winsys_screen *)base;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c	2015-09-16 14:36:09.000000000 +0000
@@ -650,7 +650,7 @@
       vmw_error("%s Failed\n", __FUNCTION__);
 }
 
-static INLINE uint32_t
+static inline uint32_t
 vmw_drm_fence_flags(uint32_t flags)
 {
     uint32_t dflags = 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_shader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_shader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_shader.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_shader.h	2015-09-16 14:36:09.000000000 +0000
@@ -47,14 +47,14 @@
    uint32_t shid;
 };
 
-static INLINE struct svga_winsys_gb_shader *
+static inline struct svga_winsys_gb_shader *
 svga_winsys_shader(struct vmw_svga_winsys_shader *shader)
 {
    assert(!shader || shader->shid != SVGA3D_INVALID_ID);
    return (struct svga_winsys_gb_shader *)shader;
 }
 
-static INLINE struct vmw_svga_winsys_shader *
+static inline struct vmw_svga_winsys_shader *
 vmw_svga_winsys_shader(struct svga_winsys_gb_shader *shader)
 {
    return (struct vmw_svga_winsys_shader *)shader;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_surface.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_surface.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/svga/drm/vmw_surface.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/svga/drm/vmw_surface.h	2015-09-16 14:36:09.000000000 +0000
@@ -68,7 +68,7 @@
 };
 
 
-static INLINE struct svga_winsys_surface *
+static inline struct svga_winsys_surface *
 svga_winsys_surface(struct vmw_svga_winsys_surface *surf)
 {
    assert(!surf || surf->sid != SVGA3D_INVALID_ID);
@@ -76,7 +76,7 @@
 }
 
 
-static INLINE struct vmw_svga_winsys_surface *
+static inline struct vmw_svga_winsys_surface *
 vmw_svga_winsys_surface(struct svga_winsys_surface *surf)
 {
    return (struct vmw_svga_winsys_surface *)surf;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/android/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/android/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/android/Android.mk	2012-01-02 08:23:27.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/android/Android.mk	1970-01-01 00:00:00.000000000 +0000
@@ -1,34 +0,0 @@
-# Mesa 3-D graphics library
-#
-# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
-# Copyright (C) 2010-2011 LunarG Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-LOCAL_PATH := $(call my-dir)
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
-	android_sw_winsys.cpp
-
-LOCAL_MODULE := libmesa_winsys_sw_android
-
-include $(GALLIUM_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/android/android_sw_winsys.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/android/android_sw_winsys.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/android/android_sw_winsys.cpp	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/android/android_sw_winsys.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,264 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2010-2011 LunarG Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chia-I Wu <olv@lunarg.com>
- */
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_state.h"
-#include "util/u_memory.h"
-#include "util/u_format.h"
-#include "state_tracker/sw_winsys.h"
-
-#include <hardware/gralloc.h>
-#include <utils/Errors.h>
-
-#if ANDROID_VERSION < 0x0300
-#include <private/ui/sw_gralloc_handle.h>
-#endif
-
-#include "android_sw_winsys.h"
-
-struct android_sw_winsys
-{
-   struct sw_winsys base;
-
-   const gralloc_module_t *grmod;
-};
-
-struct android_sw_displaytarget
-{
-   buffer_handle_t handle;
-   int stride;
-   int width, height;
-   int usage; /* gralloc usage */
-
-   void *mapped;
-};
-
-static INLINE struct android_sw_winsys *
-android_sw_winsys(struct sw_winsys *ws)
-{
-   return (struct android_sw_winsys *) ws;
-}
-
-static INLINE struct android_sw_displaytarget *
-android_sw_displaytarget(struct sw_displaytarget *dt)
-{
-   return (struct android_sw_displaytarget *) dt;
-}
-
-namespace android {
-
-static void
-android_displaytarget_display(struct sw_winsys *ws,
-                              struct sw_displaytarget *dt,
-                              void *context_private,
-                              struct pipe_box *box)
-{
-}
-
-static struct sw_displaytarget *
-android_displaytarget_create(struct sw_winsys *ws,
-                             unsigned tex_usage,
-                             enum pipe_format format,
-                             unsigned width, unsigned height,
-                             unsigned alignment,
-                             unsigned *stride)
-{
-   return NULL;
-}
-
-static void
-android_displaytarget_destroy(struct sw_winsys *ws,
-                              struct sw_displaytarget *dt)
-{
-   struct android_sw_displaytarget *adt = android_sw_displaytarget(dt);
-
-   assert(!adt->mapped);
-   FREE(adt);
-}
-
-static void
-android_displaytarget_unmap(struct sw_winsys *ws,
-                            struct sw_displaytarget *dt)
-{
-   struct android_sw_winsys *droid = android_sw_winsys(ws);
-   struct android_sw_displaytarget *adt = android_sw_displaytarget(dt);
-
-#if ANDROID_VERSION < 0x0300
-   /* try sw_gralloc first */
-   if (adt->mapped && sw_gralloc_handle_t::validate(adt->handle) >= 0) {
-      adt->mapped = NULL;
-      return;
-   }
-#endif
-
-   if (adt->mapped) {
-      droid->grmod->unlock(droid->grmod, adt->handle);
-      adt->mapped = NULL;
-   }
-}
-
-static void *
-android_displaytarget_map(struct sw_winsys *ws,
-                          struct sw_displaytarget *dt,
-                          unsigned flags)
-{
-   struct android_sw_winsys *droid = android_sw_winsys(ws);
-   struct android_sw_displaytarget *adt = android_sw_displaytarget(dt);
-
-#if ANDROID_VERSION < 0x0300
-   /* try sw_gralloc first */
-   if (sw_gralloc_handle_t::validate(adt->handle) >= 0) {
-      const sw_gralloc_handle_t *swhandle =
-         reinterpret_cast<const sw_gralloc_handle_t *>(adt->handle);
-      adt->mapped = reinterpret_cast<void *>(swhandle->base);
-
-      return adt->mapped;
-   }
-#endif
-
-   if (!adt->mapped) {
-      /* lock the buffer for CPU access */
-      droid->grmod->lock(droid->grmod, adt->handle,
-            adt->usage, 0, 0, adt->width, adt->height, &adt->mapped);
-   }
-
-   return adt->mapped;
-}
-
-static struct sw_displaytarget *
-android_displaytarget_from_handle(struct sw_winsys *ws,
-                                  const struct pipe_resource *templ,
-                                  struct winsys_handle *whandle,
-                                  unsigned *stride)
-{
-   struct android_winsys_handle *ahandle =
-      (struct android_winsys_handle *) whandle;
-   struct android_sw_displaytarget *adt;
-
-   adt = CALLOC_STRUCT(android_sw_displaytarget);
-   if (!adt)
-      return NULL;
-
-   adt->handle = ahandle->handle;
-   adt->stride = ahandle->stride;
-   adt->width = templ->width0;
-   adt->height = templ->height0;
-
-   if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_TRANSFER_WRITE))
-      adt->usage |= GRALLOC_USAGE_SW_WRITE_OFTEN;
-   if (templ->bind & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_TRANSFER_READ))
-      adt->usage |= GRALLOC_USAGE_SW_READ_OFTEN;
-
-   if (stride)
-      *stride = adt->stride;
-
-   return reinterpret_cast<struct sw_displaytarget *>(adt);
-}
-
-static boolean
-android_displaytarget_get_handle(struct sw_winsys *ws,
-                                 struct sw_displaytarget *dt,
-                                 struct winsys_handle *whandle)
-{
-   return FALSE;
-}
-
-static boolean
-android_is_displaytarget_format_supported(struct sw_winsys *ws,
-                                          unsigned tex_usage,
-                                          enum pipe_format format)
-{
-   struct android_sw_winsys *droid = android_sw_winsys(ws);
-   int fmt = -1;
-
-   switch (format) {
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
-      fmt = HAL_PIXEL_FORMAT_RGBA_8888;
-      break;
-   case PIPE_FORMAT_R8G8B8X8_UNORM:
-      fmt = HAL_PIXEL_FORMAT_RGBX_8888;
-      break;
-   case PIPE_FORMAT_R8G8B8_UNORM:
-      fmt = HAL_PIXEL_FORMAT_RGB_888;
-      break;
-   case PIPE_FORMAT_B5G6R5_UNORM:
-      fmt = HAL_PIXEL_FORMAT_RGB_565;
-      break;
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
-      fmt = HAL_PIXEL_FORMAT_BGRA_8888;
-      break;
-   default:
-      break;
-   }
-
-   return (fmt != -1);
-}
-
-static void
-android_destroy(struct sw_winsys *ws)
-{
-   struct android_sw_winsys *droid = android_sw_winsys(ws);
-
-   FREE(droid);
-}
-
-}; /* namespace android */
-
-using namespace android;
-
-struct sw_winsys *
-android_create_sw_winsys(void)
-{
-   struct android_sw_winsys *droid;
-   const hw_module_t *mod;
-
-   droid = CALLOC_STRUCT(android_sw_winsys);
-   if (!droid)
-      return NULL;
-
-   if (hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &mod)) {
-      FREE(droid);
-      return NULL;
-   }
-
-   droid->grmod = (const gralloc_module_t *) mod;
-
-   droid->base.destroy = android_destroy;
-   droid->base.is_displaytarget_format_supported =
-      android_is_displaytarget_format_supported;
-
-   droid->base.displaytarget_create = android_displaytarget_create;
-   droid->base.displaytarget_destroy = android_displaytarget_destroy;
-   droid->base.displaytarget_from_handle = android_displaytarget_from_handle;
-   droid->base.displaytarget_get_handle = android_displaytarget_get_handle;
-
-   droid->base.displaytarget_map = android_displaytarget_map;
-   droid->base.displaytarget_unmap = android_displaytarget_unmap;
-   droid->base.displaytarget_display = android_displaytarget_display;
-
-   return &droid->base;
-}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/android/android_sw_winsys.h mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/android/android_sw_winsys.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/android/android_sw_winsys.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/android/android_sw_winsys.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,48 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2010-2011 LunarG Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Chia-I Wu <olv@lunarg.com>
- */
-
-#ifndef ANDROID_SW_WINSYS
-#define ANDROID_SW_WINSYS
-
-#include <sys/cdefs.h>
-#include <hardware/gralloc.h>
-
-__BEGIN_DECLS
-
-struct sw_winsys;
-
-struct android_winsys_handle {
-   buffer_handle_t handle;
-   int stride;
-};
-
-struct sw_winsys *
-android_create_sw_winsys(void);
-
-__END_DECLS
-
-#endif /* ANDROID_SW_WINSYS */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/dri/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/dri/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/dri/Android.mk	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/dri/Android.mk	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,35 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2015 Chih-Wei Huang <cwhuang@linux.org.tw>
+# Copyright (C) 2015 Android-x86 Open Source Project
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_MODULE := libmesa_winsys_sw_dri
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/dri/dri_sw_winsys.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/dri/dri_sw_winsys.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/dri/dri_sw_winsys.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/dri/dri_sw_winsys.c	2015-09-16 14:36:09.000000000 +0000
@@ -55,13 +55,13 @@
    struct drisw_loader_funcs *lf;
 };
 
-static INLINE struct dri_sw_displaytarget *
+static inline struct dri_sw_displaytarget *
 dri_sw_displaytarget( struct sw_displaytarget *dt )
 {
    return (struct dri_sw_displaytarget *)dt;
 }
 
-static INLINE struct dri_sw_winsys *
+static inline struct dri_sw_winsys *
 dri_sw_winsys( struct sw_winsys *ws )
 {
    return (struct dri_sw_winsys *)ws;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c	2015-09-16 14:36:09.000000000 +0000
@@ -62,7 +62,7 @@
 
 
 /** Cast wrapper */
-static INLINE struct gdi_sw_displaytarget *
+static inline struct gdi_sw_displaytarget *
 gdi_sw_displaytarget( struct sw_displaytarget *buf )
 {
    return (struct gdi_sw_displaytarget *)buf;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/hgl/hgl_sw_winsys.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/hgl/hgl_sw_winsys.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/hgl/hgl_sw_winsys.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/hgl/hgl_sw_winsys.c	2015-09-16 14:36:09.000000000 +0000
@@ -67,7 +67,7 @@
 
 
 // Cast
-static INLINE struct haiku_displaytarget*
+static inline struct haiku_displaytarget*
 hgl_sw_displaytarget(struct sw_displaytarget* target)
 {
 	return (struct haiku_displaytarget *)target;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c	2015-09-16 14:36:09.000000000 +0000
@@ -83,13 +83,13 @@
    struct list_head bo_list;
 };
 
-static INLINE struct kms_sw_displaytarget *
+static inline struct kms_sw_displaytarget *
 kms_sw_displaytarget( struct sw_displaytarget *dt )
 {
    return (struct kms_sw_displaytarget *)dt;
 }
 
-static INLINE struct kms_sw_winsys *
+static inline struct kms_sw_winsys *
 kms_sw_winsys( struct sw_winsys *ws )
 {
    return (struct kms_sw_winsys *)ws;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/kms-dri/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/kms-dri/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/kms-dri/Makefile.am	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/kms-dri/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -31,5 +31,3 @@
 noinst_LTLIBRARIES = libswkmsdri.la
 
 libswkmsdri_la_SOURCES = $(C_SOURCES)
-
-EXTRA_DIST = SConscript
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/kms-dri/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/kms-dri/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/kms-dri/SConscript	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/kms-dri/SConscript	1970-01-01 00:00:00.000000000 +0000
@@ -1,23 +0,0 @@
-#######################################################################
-# SConscript for kms-dri winsys
-
-
-Import('*')
-
-if env['platform'] not in ('linux'):
-    Return()
-
-env = env.Clone()
-
-env.PkgUseModules('DRM')
-
-env.Append(CPPPATH = [
-    '#/src/gallium/include',
-    '#/src/gallium/auxiliary',
-])
-
-ws_kms_dri = env.ConvenienceLibrary(
-    target = 'ws_kms_dri',
-    source = env.ParseSourceList('Makefile.sources', 'C_SOURCES'),
-)
-Export('ws_kms_dri')
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c	2015-09-16 14:36:09.000000000 +0000
@@ -66,13 +66,13 @@
    void *ptr;
 };
 
-static INLINE struct wrapper_sw_winsys *
+static inline struct wrapper_sw_winsys *
 wrapper_sw_winsys(struct sw_winsys *ws)
 {
    return (struct wrapper_sw_winsys *)ws;
 }
 
-static INLINE struct wrapper_sw_displaytarget *
+static inline struct wrapper_sw_displaytarget *
 wrapper_sw_displaytarget(struct sw_displaytarget *dt)
 {
    return (struct wrapper_sw_displaytarget *)dt;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c	2014-04-29 19:36:58.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c	2015-09-16 14:36:09.000000000 +0000
@@ -92,7 +92,7 @@
 
 
 /** Cast wrapper */
-static INLINE struct xlib_displaytarget *
+static inline struct xlib_displaytarget *
 xlib_displaytarget(struct sw_displaytarget *dt)
 {
    return (struct xlib_displaytarget *) dt;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/vc4/drm/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/vc4/drm/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gallium/winsys/vc4/drm/Android.mk	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gallium/winsys/vc4/drm/Android.mk	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,34 @@
+# Copyright (C) 2014 Emil Velikov <emil.l.velikov@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_SHARED_LIBRARIES := libdrm
+LOCAL_MODULE := libmesa_winsys_vc4
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gbm/main/backend.c mesa-11.0.0~git20150916+11.0.c4bae579/src/gbm/main/backend.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gbm/main/backend.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gbm/main/backend.c	2015-09-16 14:36:09.000000000 +0000
@@ -65,7 +65,7 @@
 find_backend(const char *name)
 {
    const struct backend_desc *backend = NULL;
-   int i;
+   unsigned i;
 
    for (i = 0; i < ARRAY_SIZE(backends); ++i) {
       if (strcmp(backends[i].name, name) == 0) {
@@ -82,7 +82,7 @@
 {
    const struct gbm_backend *backend = NULL;
    struct gbm_device *dev = NULL;
-   int i;
+   unsigned i;
    const char *b;
 
    b = getenv("GBM_BACKEND");
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gbm/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gbm/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gbm/Makefile.am	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gbm/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = main/gbm.pc
 
@@ -41,18 +39,15 @@
 endif
 
 if HAVE_DRI2
-noinst_LTLIBRARIES = libgbm_dri.la
-libgbm_dri_la_SOURCES = \
+libgbm_la_SOURCES += \
 	backends/dri/gbm_dri.c \
 	backends/dri/gbm_driint.h
 
-libgbm_dri_la_CFLAGS = \
-	$(AM_CFLAGS) \
+AM_CFLAGS += \
 	-DDEFAULT_DRIVER_DIR='"$(DRI_DRIVER_SEARCH_DIR)"' \
 	$(LIBDRM_CFLAGS)
 
 libgbm_la_LIBADD += \
-	libgbm_dri.la \
 	$(LIBDRM_LIBS)
 endif
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/Android.gen.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/Android.gen.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/Android.gen.mk	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/Android.gen.mk	2015-09-16 14:36:09.000000000 +0000
@@ -29,18 +29,7 @@
 
 intermediates := $(call local-generated-sources-dir)
 
-sources := \
-	glsl_lexer.cpp \
-	glsl_parser.cpp \
-	glcpp/glcpp-lex.c \
-	glcpp/glcpp-parse.c \
-	nir/nir_builder_opcodes.h \
-	nir/nir_constant_expressions.c \
-	nir/nir_opcodes.c \
-	nir/nir_opcodes.h \
-	nir/nir_opt_algebraic.c
-
-LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
+LOCAL_SRC_FILES := $(LOCAL_SRC_FILES)
 
 LOCAL_C_INCLUDES += \
 	$(intermediates)/glcpp \
@@ -51,8 +40,10 @@
 LOCAL_EXPORT_C_INCLUDE_DIRS += \
 	$(intermediates)/nir
 
-sources := $(addprefix $(intermediates)/, $(sources))
-LOCAL_GENERATED_SOURCES += $(sources)
+LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
+	$(LIBGLCPP_GENERATED_FILES) \
+	$(NIR_GENERATED_FILES) \
+	$(LIBGLSL_GENERATED_CXX_FILES))
 
 define local-l-or-ll-to-c-or-cpp
 	@mkdir -p $(dir $@)
@@ -102,8 +93,7 @@
 nir_constant_expressions_gen := $(LOCAL_PATH)/nir/nir_constant_expressions.py
 nir_constant_expressions_deps := \
 	$(LOCAL_PATH)/nir/nir_opcodes.py \
-	$(LOCAL_PATH)/nir/nir_constant_expressions.py \
-	$(LOCAL_PATH)/nir/nir_constant_expressions.h
+	$(LOCAL_PATH)/nir/nir_constant_expressions.py
 
 $(intermediates)/nir/nir_constant_expressions.c: $(nir_constant_expressions_deps)
 	@mkdir -p $(dir $@)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/Android.mk	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/Android.mk	2015-09-16 14:36:09.000000000 +0000
@@ -46,7 +46,6 @@
 
 LOCAL_MODULE := libmesa_glsl
 
-include external/stlport/libstlport.mk
 include $(LOCAL_PATH)/Android.gen.mk
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ast_array_index.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ast_array_index.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ast_array_index.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ast_array_index.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -107,6 +107,33 @@
 }
 
 
+static int
+get_implicit_array_size(struct _mesa_glsl_parse_state *state,
+                        ir_rvalue *array)
+{
+   ir_variable *var = array->variable_referenced();
+
+   /* Inputs in control shader are implicitly sized
+    * to the maximum patch size.
+    */
+   if (state->stage == MESA_SHADER_TESS_CTRL &&
+       var->data.mode == ir_var_shader_in) {
+      return state->Const.MaxPatchVertices;
+   }
+
+   /* Non-patch inputs in evaluation shader are implicitly sized
+    * to the maximum patch size.
+    */
+   if (state->stage == MESA_SHADER_TESS_EVAL &&
+       var->data.mode == ir_var_shader_in &&
+       !var->data.patch) {
+      return state->Const.MaxPatchVertices;
+   }
+
+   return 0;
+}
+
+
 ir_rvalue *
 _mesa_ast_array_index_to_hir(void *mem_ctx,
 			     struct _mesa_glsl_parse_state *state,
@@ -183,7 +210,25 @@
          update_max_array_access(array, idx, &loc, state);
    } else if (const_index == NULL && array->type->is_array()) {
       if (array->type->is_unsized_array()) {
-	 _mesa_glsl_error(&loc, state, "unsized array index must be constant");
+         int implicit_size = get_implicit_array_size(state, array);
+         if (implicit_size) {
+            ir_variable *v = array->whole_variable_referenced();
+            if (v != NULL)
+               v->data.max_array_access = implicit_size - 1;
+         }
+         else if (state->stage == MESA_SHADER_TESS_CTRL &&
+                  array->variable_referenced()->data.mode == ir_var_shader_out &&
+                  !array->variable_referenced()->data.patch) {
+            /* Tessellation control shader output non-patch arrays are
+             * initially unsized. Despite that, they are allowed to be
+             * indexed with a non-constant expression (typically
+             * "gl_InvocationID"). The array size will be determined
+             * by the linker.
+             */
+         }
+         else {
+            _mesa_glsl_error(&loc, state, "unsized array index must be constant");
+         }
       } else if (array->type->fields.array->is_interface()
                  && array->variable_referenced()->data.mode == ir_var_uniform
                  && !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
@@ -245,6 +290,21 @@
                                   "1.30 and later");
          }
       }
+
+      /* From page 27 of the GLSL ES 3.1 specification:
+       *
+       * "When aggregated into arrays within a shader, images can only be
+       *  indexed with a constant integral expression."
+       *
+       * On the other hand the desktop GL specification extension allows
+       * non-constant indexing of image arrays, but behavior is left undefined
+       * in cases where the indexing expression is not dynamically uniform.
+       */
+      if (state->es_shader && array->type->without_array()->is_image()) {
+         _mesa_glsl_error(&loc, state,
+                          "image arrays indexed with non-constant "
+                          "expressions are forbidden in GLSL ES.");
+      }
    }
 
    /* After performing all of the error checking, generate the IR for the
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ast_function.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ast_function.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ast_function.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ast_function.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -26,6 +26,7 @@
 #include "glsl_types.h"
 #include "ir.h"
 #include "main/core.h" /* for MIN2 */
+#include "main/shaderobj.h"
 
 static ir_rvalue *
 convert_component(ir_rvalue *src, const glsl_type *desired_type);
@@ -355,6 +356,8 @@
 static ir_rvalue *
 generate_call(exec_list *instructions, ir_function_signature *sig,
 	      exec_list *actual_parameters,
+              ir_variable *sub_var,
+	      ir_rvalue *array_idx,
 	      struct _mesa_glsl_parse_state *state)
 {
    void *ctx = state;
@@ -421,7 +424,8 @@
 
       deref = new(ctx) ir_dereference_variable(var);
    }
-   ir_call *call = new(ctx) ir_call(sig, deref, actual_parameters);
+
+   ir_call *call = new(ctx) ir_call(sig, deref, actual_parameters, sub_var, array_idx);
    instructions->push_tail(call);
 
    /* Also emit any necessary out-parameter conversions. */
@@ -489,6 +493,40 @@
    return sig;
 }
 
+static ir_function_signature *
+match_subroutine_by_name(const char *name,
+                         exec_list *actual_parameters,
+                         struct _mesa_glsl_parse_state *state,
+                         ir_variable **var_r)
+{
+   void *ctx = state;
+   ir_function_signature *sig = NULL;
+   ir_function *f, *found = NULL;
+   const char *new_name;
+   ir_variable *var;
+   bool is_exact = false;
+
+   new_name = ralloc_asprintf(ctx, "%s_%s", _mesa_shader_stage_to_subroutine_prefix(state->stage), name);
+   var = state->symbols->get_variable(new_name);
+   if (!var)
+      return NULL;
+
+   for (int i = 0; i < state->num_subroutine_types; i++) {
+      f = state->subroutine_types[i];
+      if (strcmp(f->name, var->type->without_array()->name))
+         continue;
+      found = f;
+      break;
+   }
+
+   if (!found)
+      return NULL;
+   *var_r = var;
+   sig = found->matching_signature(state, actual_parameters,
+                                  false, &is_exact);
+   return sig;
+}
+
 static void
 print_function_prototypes(_mesa_glsl_parse_state *state, YYLTYPE *loc,
                           ir_function *f)
@@ -863,7 +901,7 @@
 
    if (is_unsized_array) {
       constructor_type =
-	 glsl_type::get_array_instance(constructor_type->element_type(),
+	 glsl_type::get_array_instance(constructor_type->fields.array,
 				       parameter_count);
       assert(constructor_type != NULL);
       assert(constructor_type->length == parameter_count);
@@ -876,7 +914,7 @@
       ir_rvalue *result = ir;
 
       const glsl_base_type element_base_type =
-         constructor_type->element_type()->base_type;
+         constructor_type->fields.array->base_type;
 
       /* Apply implicit conversions (not the scalar constructor rules!). See
        * the spec quote above. */
@@ -896,10 +934,10 @@
 	 }
       }
 
-      if (result->type != constructor_type->element_type()) {
+      if (result->type != constructor_type->fields.array) {
 	 _mesa_glsl_error(loc, state, "type error in array constructor: "
 			  "expected: %s, found %s",
-			  constructor_type->element_type()->name,
+			  constructor_type->fields.array->name,
 			  result->type->name);
          return ir_rvalue::error_value(ctx);
       }
@@ -993,11 +1031,15 @@
    ir_variable *var = new(ctx) ir_variable(type, "vec_ctor", ir_var_temporary);
    instructions->push_tail(var);
 
-   /* There are two kinds of vector constructors.
+   /* There are three kinds of vector constructors.
     *
     *  - Construct a vector from a single scalar by replicating that scalar to
     *    all components of the vector.
     *
+    *  - Construct a vector from at least a matrix. This case should already
+    *    have been taken care of in ast_function_expression::hir by breaking
+    *    down the matrix into a series of column vectors.
+    *
     *  - Construct a vector from an arbirary combination of vectors and
     *    scalars.  The components of the constructor parameters are assigned
     *    to the vector in order until the vector is full.
@@ -1091,6 +1133,14 @@
 	    rhs_components = lhs_components - base_component;
 	 }
 
+	 /* If we do not have any components left to copy, break out of the
+	  * loop. This can happen when initializing a vec4 with a mat3 as the
+	  * mat3 would have been broken into a series of column vectors.
+	  */
+	 if (rhs_components == 0) {
+	    break;
+	 }
+
 	 const ir_constant *const c = param->as_constant();
 	 if (c == NULL) {
 	    /* Mask of fields to be written in the assignment.
@@ -1519,6 +1569,65 @@
                                              &actual_parameters, state);
 }
 
+ir_rvalue *
+ast_function_expression::handle_method(exec_list *instructions,
+                                       struct _mesa_glsl_parse_state *state)
+{
+   const ast_expression *field = subexpressions[0];
+   ir_rvalue *op;
+   ir_rvalue *result;
+   void *ctx = state;
+   /* Handle "method calls" in GLSL 1.20 - namely, array.length() */
+   YYLTYPE loc = get_location();
+   state->check_version(120, 300, &loc, "methods not supported");
+
+   const char *method;
+   method = field->primary_expression.identifier;
+
+   op = field->subexpressions[0]->hir(instructions, state);
+   if (strcmp(method, "length") == 0) {
+      if (!this->expressions.is_empty()) {
+         _mesa_glsl_error(&loc, state, "length method takes no arguments");
+         goto fail;
+      }
+
+      if (op->type->is_array()) {
+         if (op->type->is_unsized_array()) {
+            _mesa_glsl_error(&loc, state, "length called on unsized array");
+            goto fail;
+         }
+
+         result = new(ctx) ir_constant(op->type->array_size());
+      } else if (op->type->is_vector()) {
+         if (state->ARB_shading_language_420pack_enable) {
+            /* .length() returns int. */
+            result = new(ctx) ir_constant((int) op->type->vector_elements);
+         } else {
+            _mesa_glsl_error(&loc, state, "length method on matrix only available"
+                             "with ARB_shading_language_420pack");
+            goto fail;
+         }
+      } else if (op->type->is_matrix()) {
+         if (state->ARB_shading_language_420pack_enable) {
+            /* .length() returns int. */
+            result = new(ctx) ir_constant((int) op->type->matrix_columns);
+         } else {
+            _mesa_glsl_error(&loc, state, "length method on matrix only available"
+                             "with ARB_shading_language_420pack");
+            goto fail;
+         }
+      } else {
+         _mesa_glsl_error(&loc, state, "length called on scalar.");
+         goto fail;
+      }
+   } else {
+         _mesa_glsl_error(&loc, state, "unknown method: `%s'", method);
+         goto fail;
+   }
+   return result;
+fail:
+   return ir_rvalue::error_value(ctx);
+}
 
 ir_rvalue *
 ast_function_expression::hir(exec_list *instructions,
@@ -1531,8 +1640,6 @@
     * 2. methods - Only the .length() method of array types.
     * 3. functions - Calls to regular old functions.
     *
-    * Method calls are actually detected when the ast_field_selection
-    * expression is handled.
     */
    if (is_constructor()) {
       const ast_type_specifier *type = (ast_type_specifier *) subexpressions[0];
@@ -1681,11 +1788,11 @@
 	 return ir_rvalue::error_value(ctx);
       }
 
-      /* Later, we cast each parameter to the same base type as the
-       * constructor.  Since there are no non-floating point matrices, we
-       * need to break them up into a series of column vectors.
+      /* Matrices can never be consumed as is by any constructor but matrix
+       * constructors. If the constructor type is not matrix, always break the
+       * matrix up into a series of column vectors.
        */
-      if (constructor_type->base_type != GLSL_TYPE_FLOAT) {
+      if (!constructor_type->is_matrix()) {
 	 foreach_in_list_safe(ir_rvalue, matrix, &actual_parameters) {
 	    if (!matrix->type->is_matrix())
 	       continue;
@@ -1753,11 +1860,22 @@
 					       &actual_parameters,
 					       ctx);
       }
+   } else if (subexpressions[0]->oper == ast_field_selection) {
+      return handle_method(instructions, state);
    } else {
       const ast_expression *id = subexpressions[0];
-      const char *func_name = id->primary_expression.identifier;
+      const char *func_name;
       YYLTYPE loc = get_location();
       exec_list actual_parameters;
+      ir_variable *sub_var = NULL;
+      ir_rvalue *array_idx = NULL;
+
+      if (id->oper == ast_array_index) {
+         func_name = id->subexpressions[0]->primary_expression.identifier;
+	 array_idx = id->subexpressions[1]->hir(instructions, state);
+      } else {
+         func_name = id->primary_expression.identifier;
+      }
 
       process_parameters(instructions, &actual_parameters, &this->expressions,
 			 state);
@@ -1767,13 +1885,17 @@
 
       ir_rvalue *value = NULL;
       if (sig == NULL) {
+         sig = match_subroutine_by_name(func_name, &actual_parameters, state, &sub_var);
+      }
+
+      if (sig == NULL) {
 	 no_matching_function_error(func_name, &loc, &actual_parameters, state);
 	 value = ir_rvalue::error_value(ctx);
       } else if (!verify_parameter_modes(state, sig, actual_parameters, this->expressions)) {
 	 /* an error has already been emitted */
 	 value = ir_rvalue::error_value(ctx);
       } else {
-         value = generate_call(instructions, sig, &actual_parameters, state);
+         value = generate_call(instructions, sig, &actual_parameters, sub_var, array_idx, state);
          if (!value) {
             ir_variable *const tmp = new(ctx) ir_variable(glsl_type::void_type,
                                                           "void_var",
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ast.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ast.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ast.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ast.h	2015-09-16 14:36:09.000000000 +0000
@@ -304,6 +304,16 @@
     * Is this function call actually a constructor?
     */
    bool cons;
+   ir_rvalue *
+   handle_method(exec_list *instructions,
+                 struct _mesa_glsl_parse_state *state);
+};
+
+class ast_subroutine_list : public ast_node
+{
+public:
+   virtual void print(void) const;
+   exec_list declarations;
 };
 
 class ast_array_specifier : public ast_node {
@@ -434,7 +444,9 @@
 	 unsigned out:1;
 	 unsigned centroid:1;
          unsigned sample:1;
+	 unsigned patch:1;
 	 unsigned uniform:1;
+	 unsigned buffer:1;
 	 unsigned smooth:1;
 	 unsigned flat:1;
 	 unsigned noperspective:1;
@@ -514,6 +526,23 @@
          unsigned stream:1; /**< Has stream value assigned  */
          unsigned explicit_stream:1; /**< stream value assigned explicitly by shader code */
          /** \} */
+
+	 /** \name Layout qualifiers for GL_ARB_tessellation_shader */
+	 /** \{ */
+	 /* tess eval input layout */
+	 /* gs prim_type reused for primitive mode */
+	 unsigned vertex_spacing:1;
+	 unsigned ordering:1;
+	 unsigned point_mode:1;
+	 /* tess control output layout */
+	 unsigned vertices:1;
+	 /** \} */
+
+         /** \name Qualifiers for GL_ARB_shader_subroutine */
+	 /** \{ */
+         unsigned subroutine:1;  /**< Is this marked 'subroutine' */
+         unsigned subroutine_def:1; /**< Is this marked 'subroutine' with a list of types */
+	 /** \} */
       }
       /** \brief Set of flags, accessed by name. */
       q;
@@ -549,7 +578,10 @@
    /** Stream in GLSL 1.50 geometry shaders. */
    unsigned stream;
 
-   /** Input or output primitive type in GLSL 1.50 geometry shaders */
+   /**
+    * Input or output primitive type in GLSL 1.50 geometry shaders
+    * and tessellation shaders.
+    */
    GLenum prim_type;
 
    /**
@@ -576,6 +608,18 @@
     */
    int local_size[3];
 
+   /** Tessellation evaluation shader: vertex spacing (equal, fractional even/odd) */
+   GLenum vertex_spacing;
+
+   /** Tessellation evaluation shader: vertex ordering (CW or CCW) */
+   GLenum ordering;
+
+   /** Tessellation evaluation shader: point mode */
+   bool point_mode;
+
+   /** Tessellation control shader: number of output vertices */
+   int vertices;
+
    /**
     * Image format specified with an ARB_shader_image_load_store
     * layout qualifier.
@@ -631,11 +675,17 @@
 			_mesa_glsl_parse_state *state,
 			ast_type_qualifier q);
 
+   bool merge_out_qualifier(YYLTYPE *loc,
+                           _mesa_glsl_parse_state *state,
+                           ast_type_qualifier q,
+                           ast_node* &node);
+
    bool merge_in_qualifier(YYLTYPE *loc,
                            _mesa_glsl_parse_state *state,
                            ast_type_qualifier q,
                            ast_node* &node);
 
+   ast_subroutine_list *subroutine_list;
 };
 
 class ast_declarator_list;
@@ -1030,6 +1080,27 @@
 };
 
 
+/**
+ * AST node representing a declaration of the output layout for tessellation
+ * control shaders.
+ */
+class ast_tcs_output_layout : public ast_node
+{
+public:
+   ast_tcs_output_layout(const struct YYLTYPE &locp, int vertices)
+      : vertices(vertices)
+   {
+      set_location(locp);
+   }
+
+   virtual ir_rvalue *hir(exec_list *instructions,
+                          struct _mesa_glsl_parse_state *state);
+
+private:
+   const int vertices;
+};
+
+
 /**
  * AST node representing a declaration of the input layout for geometry
  * shaders.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ast_to_hir.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ast_to_hir.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ast_to_hir.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ast_to_hir.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -54,6 +54,7 @@
 #include "ast.h"
 #include "glsl_types.h"
 #include "program/hash_table.h"
+#include "main/shaderobj.h"
 #include "ir.h"
 #include "ir_builder.h"
 
@@ -79,6 +80,7 @@
    state->toplevel_ir = instructions;
 
    state->gs_input_prim_type_specified = false;
+   state->tcs_output_vertices_specified = false;
    state->cs_input_local_size_specified = false;
 
    /* Section 4.2 of the GLSL 1.20 specification states:
@@ -639,6 +641,34 @@
 }
 
 /**
+ * Returns the innermost array index expression in an rvalue tree.
+ * This is the largest indexing level -- if an array of blocks, then
+ * it is the block index rather than an indexing expression for an
+ * array-typed member of an array of blocks.
+ */
+static ir_rvalue *
+find_innermost_array_index(ir_rvalue *rv)
+{
+   ir_dereference_array *last = NULL;
+   while (rv) {
+      if (rv->as_dereference_array()) {
+         last = rv->as_dereference_array();
+         rv = last->array;
+      } else if (rv->as_dereference_record())
+         rv = rv->as_dereference_record()->record;
+      else if (rv->as_swizzle())
+         rv = rv->as_swizzle()->val;
+      else
+         rv = NULL;
+   }
+
+   if (last)
+      return last->array_index;
+
+   return NULL;
+}
+
+/**
  * Validates that a value can be assigned to a location with a specified type
  *
  * Validates that \c rhs can be assigned to some location.  If the types are
@@ -654,9 +684,9 @@
  * In addition to being used for assignments, this function is used to
  * type-check return values.
  */
-ir_rvalue *
+static ir_rvalue *
 validate_assignment(struct _mesa_glsl_parse_state *state,
-                    YYLTYPE loc, const glsl_type *lhs_type,
+                    YYLTYPE loc, ir_rvalue *lhs,
                     ir_rvalue *rhs, bool is_initializer)
 {
    /* If there is already some error in the RHS, just return it.  Anything
@@ -665,9 +695,28 @@
    if (rhs->type->is_error())
       return rhs;
 
+   /* In the Tessellation Control Shader:
+    * If a per-vertex output variable is used as an l-value, it is an error
+    * if the expression indicating the vertex number is not the identifier
+    * `gl_InvocationID`.
+    */
+   if (state->stage == MESA_SHADER_TESS_CTRL) {
+      ir_variable *var = lhs->variable_referenced();
+      if (var->data.mode == ir_var_shader_out && !var->data.patch) {
+         ir_rvalue *index = find_innermost_array_index(lhs);
+         ir_variable *index_var = index ? index->variable_referenced() : NULL;
+         if (!index_var || strcmp(index_var->name, "gl_InvocationID") != 0) {
+            _mesa_glsl_error(&loc, state,
+                             "Tessellation control shader outputs can only "
+                             "be indexed by gl_InvocationID");
+            return NULL;
+         }
+      }
+   }
+
    /* If the types are identical, the assignment can trivially proceed.
     */
-   if (rhs->type == lhs_type)
+   if (rhs->type == lhs->type)
       return rhs;
 
    /* If the array element types are the same and the LHS is unsized,
@@ -677,8 +726,8 @@
     * Note: Whole-array assignments are not permitted in GLSL 1.10, but this
     * is handled by ir_dereference::is_lvalue.
     */
-   if (lhs_type->is_unsized_array() && rhs->type->is_array()
-       && (lhs_type->element_type() == rhs->type->element_type())) {
+   if (lhs->type->is_unsized_array() && rhs->type->is_array()
+       && (lhs->type->fields.array == rhs->type->fields.array)) {
       if (is_initializer) {
          return rhs;
       } else {
@@ -689,8 +738,8 @@
    }
 
    /* Check for implicit conversion in GLSL 1.20 */
-   if (apply_implicit_conversion(lhs_type, rhs, state)) {
-      if (rhs->type == lhs_type)
+   if (apply_implicit_conversion(lhs->type, rhs, state)) {
+      if (rhs->type == lhs->type)
 	 return rhs;
    }
 
@@ -698,7 +747,7 @@
                     "%s of type %s cannot be assigned to "
                     "variable of type %s",
                     is_initializer ? "initializer" : "value",
-                    rhs->type->name, lhs_type->name);
+                    rhs->type->name, lhs->type->name);
 
    return NULL;
 }
@@ -733,7 +782,7 @@
 
       if (unlikely(lhs_expr->operation == ir_binop_vector_extract)) {
          ir_rvalue *new_rhs =
-            validate_assignment(state, lhs_loc, lhs->type,
+            validate_assignment(state, lhs_loc, lhs,
                                 rhs, is_initializer);
 
          if (new_rhs == NULL) {
@@ -795,7 +844,7 @@
    }
 
    ir_rvalue *new_rhs =
-      validate_assignment(state, lhs_loc, lhs->type, rhs, is_initializer);
+      validate_assignment(state, lhs_loc, lhs, rhs, is_initializer);
    if (new_rhs != NULL) {
       rhs = new_rhs;
 
@@ -820,7 +869,7 @@
                              var->data.max_array_access);
          }
 
-         var->type = glsl_type::get_array_instance(lhs->type->element_type(),
+         var->type = glsl_type::get_array_instance(lhs->type->fields.array,
                                                    rhs->type->array_size());
          d->type = var->type;
       }
@@ -971,6 +1020,7 @@
    case GLSL_TYPE_IMAGE:
    case GLSL_TYPE_INTERFACE:
    case GLSL_TYPE_ATOMIC_UINT:
+   case GLSL_TYPE_SUBROUTINE:
       /* I assume a comparison of a struct containing a sampler just
        * ignores the sampler present in the type.
        */
@@ -2014,7 +2064,7 @@
                                 const glsl_type *type,
                                 ir_variable *var)
 {
-   if (var && !var->is_in_uniform_block()) {
+   if (var && !var->is_in_buffer_block()) {
       /* Layout qualifiers may only apply to interface blocks and fields in
        * them.
        */
@@ -2048,12 +2098,13 @@
 static bool
 validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
                            YYLTYPE *loc,
-                           ir_variable *var,
+                           const glsl_type *type,
                            const ast_type_qualifier *qual)
 {
-   if (var->data.mode != ir_var_uniform) {
+   if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
       _mesa_glsl_error(loc, state,
-                       "the \"binding\" qualifier only applies to uniforms");
+                       "the \"binding\" qualifier only applies to uniforms and "
+                       "shader storage buffer objects");
       return false;
    }
 
@@ -2063,10 +2114,11 @@
    }
 
    const struct gl_context *const ctx = state->ctx;
-   unsigned elements = var->type->is_array() ? var->type->length : 1;
+   unsigned elements = type->is_array() ? type->length : 1;
    unsigned max_index = qual->binding + elements - 1;
+   const glsl_type *base_type = type->without_array();
 
-   if (var->type->is_interface()) {
+   if (base_type->is_interface()) {
       /* UBOs.  From page 60 of the GLSL 4.20 specification:
        * "If the binding point for any uniform block instance is less than zero,
        *  or greater than or equal to the implementation-dependent maximum
@@ -2077,15 +2129,33 @@
        *
        * The implementation-dependent maximum is GL_MAX_UNIFORM_BUFFER_BINDINGS.
        */
-      if (max_index >= ctx->Const.MaxUniformBufferBindings) {
+      if (qual->flags.q.uniform &&
+         max_index >= ctx->Const.MaxUniformBufferBindings) {
          _mesa_glsl_error(loc, state, "layout(binding = %d) for %d UBOs exceeds "
                           "the maximum number of UBO binding points (%d)",
                           qual->binding, elements,
                           ctx->Const.MaxUniformBufferBindings);
          return false;
       }
-   } else if (var->type->is_sampler() ||
-              (var->type->is_array() && var->type->fields.array->is_sampler())) {
+
+      /* SSBOs. From page 67 of the GLSL 4.30 specification:
+       * "If the binding point for any uniform or shader storage block instance
+       *  is less than zero, or greater than or equal to the
+       *  implementation-dependent maximum number of uniform buffer bindings, a
+       *  compile-time error will occur. When the binding identifier is used
+       *  with a uniform or shader storage block instanced as an array of size
+       *  N, all elements of the array from binding through binding + N – 1 must
+       *  be within this range."
+       */
+      if (qual->flags.q.buffer &&
+         max_index >= ctx->Const.MaxShaderStorageBufferBindings) {
+         _mesa_glsl_error(loc, state, "layout(binding = %d) for %d SSBOs exceeds "
+                          "the maximum number of SSBO binding points (%d)",
+                          qual->binding, elements,
+                          ctx->Const.MaxShaderStorageBufferBindings);
+         return false;
+      }
+   } else if (base_type->is_sampler()) {
       /* Samplers.  From page 63 of the GLSL 4.20 specification:
        * "If the binding is less than zero, or greater than or equal to the
        *  implementation-dependent maximum supported number of units, a
@@ -2102,7 +2172,7 @@
 
          return false;
       }
-   } else if (var->type->contains_atomic()) {
+   } else if (base_type->contains_atomic()) {
       assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS);
       if (unsigned(qual->binding) >= ctx->Const.MaxAtomicBufferBindings) {
          _mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the "
@@ -2112,10 +2182,19 @@
 
          return false;
       }
+   } else if (state->is_version(420, 310) && base_type->is_image()) {
+      assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS);
+      if (max_index >= ctx->Const.MaxImageUnits) {
+         _mesa_glsl_error(loc, state, "Image binding %d exceeds the "
+                          " maximum number of image units (%d)", max_index,
+                          ctx->Const.MaxImageUnits);
+         return false;
+      }
+
    } else {
       _mesa_glsl_error(loc, state,
                        "the \"binding\" qualifier only applies to uniform "
-                       "blocks, samplers, atomic counters, or arrays thereof");
+                       "blocks, opaque variables, or arrays thereof");
       return false;
    }
 
@@ -2212,6 +2291,8 @@
     *                     input            output
     *                     -----            ------
     * vertex              explicit_loc     sso
+    * tess control        sso              sso
+    * tess eval           sso              sso
     * geometry            sso              sso
     * fragment            sso              explicit_loc
     */
@@ -2234,6 +2315,8 @@
       fail = true;
       break;
 
+   case MESA_SHADER_TESS_CTRL:
+   case MESA_SHADER_TESS_EVAL:
    case MESA_SHADER_GEOMETRY:
       if (var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out) {
          if (!state->check_separate_shader_objects_allowed(loc, var))
@@ -2293,8 +2376,13 @@
                : (qual->location + VARYING_SLOT_VAR0);
             break;
 
+         case MESA_SHADER_TESS_CTRL:
+         case MESA_SHADER_TESS_EVAL:
          case MESA_SHADER_GEOMETRY:
-            var->data.location = qual->location + VARYING_SLOT_VAR0;
+            if (var->data.patch)
+               var->data.location = qual->location + VARYING_SLOT_PATCH0;
+            else
+               var->data.location = qual->location + VARYING_SLOT_VAR0;
             break;
 
          case MESA_SHADER_FRAGMENT:
@@ -2337,8 +2425,7 @@
                                   struct _mesa_glsl_parse_state *state,
                                   YYLTYPE *loc)
 {
-   const glsl_type *base_type =
-      (var->type->is_array() ? var->type->element_type() : var->type);
+   const glsl_type *base_type = var->type->without_array();
 
    if (base_type->is_image()) {
       if (var->data.mode != ir_var_uniform &&
@@ -2368,14 +2455,38 @@
 
          var->data.image_format = qual->image_format;
       } else {
-         if (var->data.mode == ir_var_uniform && !qual->flags.q.write_only) {
-            _mesa_glsl_error(loc, state, "uniforms not qualified with "
-                             "`writeonly' must have a format layout "
-                             "qualifier");
+         if (var->data.mode == ir_var_uniform) {
+            if (state->es_shader) {
+               _mesa_glsl_error(loc, state, "all image uniforms "
+                                "must have a format layout qualifier");
+
+            } else if (!qual->flags.q.write_only) {
+               _mesa_glsl_error(loc, state, "image uniforms not qualified with "
+                                "`writeonly' must have a format layout "
+                                "qualifier");
+            }
          }
 
          var->data.image_format = GL_NONE;
       }
+
+      /* From page 70 of the GLSL ES 3.1 specification:
+       *
+       * "Except for image variables qualified with the format qualifiers
+       *  r32f, r32i, and r32ui, image variables must specify either memory
+       *  qualifier readonly or the memory qualifier writeonly."
+       */
+      if (state->es_shader &&
+          var->data.image_format != GL_R32F &&
+          var->data.image_format != GL_R32I &&
+          var->data.image_format != GL_R32UI &&
+          !var->data.image_read_only &&
+          !var->data.image_write_only) {
+         _mesa_glsl_error(loc, state, "image variables of format other than "
+                          "r32f, r32i or r32ui must be qualified `readonly' or "
+                          "`writeonly'");
+      }
+
    } else if (qual->flags.q.read_only ||
               qual->flags.q.write_only ||
               qual->flags.q.coherent ||
@@ -2446,6 +2557,12 @@
       }
    }
 
+   if (qual->flags.q.subroutine && !qual->flags.q.uniform) {
+      _mesa_glsl_error(loc, state,
+                       "`subroutine' may only be applied to uniforms, "
+                       "subroutine type declarations, or function definitions");
+   }
+
    if (qual->flags.q.constant || qual->flags.q.attribute
        || qual->flags.q.uniform
        || (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT)))
@@ -2462,6 +2579,9 @@
       var->data.stream = qual->stream;
    }
 
+   if (qual->flags.q.patch)
+      var->data.patch = 1;
+
    if (qual->flags.q.attribute && state->stage != MESA_SHADER_VERTEX) {
       var->type = glsl_type::error_type;
       _mesa_glsl_error(loc, state,
@@ -2509,6 +2629,8 @@
       var->data.mode = ir_var_shader_out;
    else if (qual->flags.q.uniform)
       var->data.mode = ir_var_uniform;
+   else if (qual->flags.q.buffer)
+      var->data.mode = ir_var_shader_storage;
 
    if (!is_parameter && is_varying_var(var, state->stage)) {
       /* User-defined ins/outs are not permitted in compute shaders. */
@@ -2572,7 +2694,9 @@
       case MESA_SHADER_VERTEX:
          if (var->data.mode == ir_var_shader_out)
             var->data.invariant = true;
-	      break;
+         break;
+      case MESA_SHADER_TESS_CTRL:
+      case MESA_SHADER_TESS_EVAL:
       case MESA_SHADER_GEOMETRY:
          if ((var->data.mode == ir_var_shader_in)
              || (var->data.mode == ir_var_shader_out))
@@ -2659,7 +2783,7 @@
    }
 
    if (qual->flags.q.explicit_binding &&
-       validate_binding_qualifier(state, loc, var, qual)) {
+       validate_binding_qualifier(state, loc, var->type, qual)) {
       var->data.explicit_binding = true;
       var->data.binding = qual->binding;
    }
@@ -2737,7 +2861,7 @@
     *    GL_ARB_conservative_depth
     *    GL_ARB_gpu_shader5
     *    GL_ARB_separate_shader_objects
-    *    GL_ARB_tesselation_shader
+    *    GL_ARB_tessellation_shader
     *    GL_ARB_transform_feedback3
     *    GL_ARB_uniform_buffer_object
     *
@@ -2862,7 +2986,7 @@
     *  type and specify a size."
     */
    if (earlier->type->is_unsized_array() && var->type->is_array()
-       && (var->type->element_type() == earlier->type->element_type())) {
+       && (var->type->fields.array == earlier->type->fields.array)) {
       /* FINISHME: This doesn't match the qualifiers on the two
        * FINISHME: declarations.  It's not 100% clear whether this is
        * FINISHME: required or not.
@@ -2982,6 +3106,15 @@
                            "cannot initialize uniforms");
    }
 
+   /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec:
+    *
+    *    "Buffer variables cannot have initializers."
+    */
+   if (var->data.mode == ir_var_shader_storage) {
+      _mesa_glsl_error(& initializer_loc, state,
+                       "SSBO variables cannot have initializers");
+   }
+
    /* From section 4.1.7 of the GLSL 4.40 spec:
     *
     *    "Opaque variables [...] are initialized only through the
@@ -3017,7 +3150,7 @@
    if (type->qualifier.flags.q.constant
        || type->qualifier.flags.q.uniform) {
       ir_rvalue *new_rhs = validate_assignment(state, initializer_loc,
-                                               var->type, rhs, true);
+                                               lhs, rhs, true);
       if (new_rhs != NULL) {
          rhs = new_rhs;
 
@@ -3103,30 +3236,13 @@
    return result;
 }
 
-
-/**
- * Do additional processing necessary for geometry shader input declarations
- * (this covers both interface blocks arrays and bare input variables).
- */
 static void
-handle_geometry_shader_input_decl(struct _mesa_glsl_parse_state *state,
-                                  YYLTYPE loc, ir_variable *var)
+validate_layout_qualifier_vertex_count(struct _mesa_glsl_parse_state *state,
+                                       YYLTYPE loc, ir_variable *var,
+                                       unsigned num_vertices,
+                                       unsigned *size,
+                                       const char *var_category)
 {
-   unsigned num_vertices = 0;
-   if (state->gs_input_prim_type_specified) {
-      num_vertices = vertices_per_prim(state->in_qualifier->prim_type);
-   }
-
-   /* Geometry shader input variables must be arrays.  Caller should have
-    * reported an error for this.
-    */
-   if (!var->type->is_array()) {
-      assert(state->error);
-
-      /* To avoid cascading failures, short circuit the checks below. */
-      return;
-   }
-
    if (var->type->is_unsized_array()) {
       /* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec says:
        *
@@ -3136,6 +3252,8 @@
        *
        * Followed by a table mapping each allowed input layout qualifier to
        * the corresponding input length.
+       *
+       * Similarly for tessellation control shader outputs.
        */
       if (num_vertices != 0)
          var->type = glsl_type::get_array_instance(var->type->fields.array,
@@ -3162,22 +3280,101 @@
        */
       if (num_vertices != 0 && var->type->length != num_vertices) {
          _mesa_glsl_error(&loc, state,
-                          "geometry shader input size contradicts previously"
-                          " declared layout (size is %u, but layout requires a"
-                          " size of %u)", var->type->length, num_vertices);
-      } else if (state->gs_input_size != 0 &&
-                 var->type->length != state->gs_input_size) {
+                          "%s size contradicts previously declared layout "
+                          "(size is %u, but layout requires a size of %u)",
+                          var_category, var->type->length, num_vertices);
+      } else if (*size != 0 && var->type->length != *size) {
          _mesa_glsl_error(&loc, state,
-                          "geometry shader input sizes are "
-                          "inconsistent (size is %u, but a previous "
-                          "declaration has size %u)",
-                          var->type->length, state->gs_input_size);
+                          "%s sizes are inconsistent (size is %u, but a "
+                          "previous declaration has size %u)",
+                          var_category, var->type->length, *size);
       } else {
-         state->gs_input_size = var->type->length;
+         *size = var->type->length;
       }
    }
 }
 
+static void
+handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state,
+                                    YYLTYPE loc, ir_variable *var)
+{
+   unsigned num_vertices = 0;
+
+   if (state->tcs_output_vertices_specified) {
+      num_vertices = state->out_qualifier->vertices;
+   }
+
+   if (!var->type->is_array() && !var->data.patch) {
+      _mesa_glsl_error(&loc, state,
+                       "tessellation control shader outputs must be arrays");
+
+      /* To avoid cascading failures, short circuit the checks below. */
+      return;
+   }
+
+   if (var->data.patch)
+      return;
+
+   validate_layout_qualifier_vertex_count(state, loc, var, num_vertices,
+                                          &state->tcs_output_size,
+                                          "tessellation control shader output");
+}
+
+/**
+ * Do additional processing necessary for tessellation control/evaluation shader
+ * input declarations. This covers both interface block arrays and bare input
+ * variables.
+ */
+static void
+handle_tess_shader_input_decl(struct _mesa_glsl_parse_state *state,
+                              YYLTYPE loc, ir_variable *var)
+{
+   if (!var->type->is_array() && !var->data.patch) {
+      _mesa_glsl_error(&loc, state,
+                       "per-vertex tessellation shader inputs must be arrays");
+      /* Avoid cascading failures. */
+      return;
+   }
+
+   if (var->data.patch)
+      return;
+
+   /* Unsized arrays are implicitly sized to gl_MaxPatchVertices. */
+   if (var->type->is_unsized_array()) {
+      var->type = glsl_type::get_array_instance(var->type->fields.array,
+            state->Const.MaxPatchVertices);
+   }
+}
+
+
+/**
+ * Do additional processing necessary for geometry shader input declarations
+ * (this covers both interface blocks arrays and bare input variables).
+ */
+static void
+handle_geometry_shader_input_decl(struct _mesa_glsl_parse_state *state,
+                                  YYLTYPE loc, ir_variable *var)
+{
+   unsigned num_vertices = 0;
+
+   if (state->gs_input_prim_type_specified) {
+      num_vertices = vertices_per_prim(state->in_qualifier->prim_type);
+   }
+
+   /* Geometry shader input variables must be arrays.  Caller should have
+    * reported an error for this.
+    */
+   if (!var->type->is_array()) {
+      assert(state->error);
+
+      /* To avoid cascading failures, short circuit the checks below. */
+      return;
+   }
+
+   validate_layout_qualifier_vertex_count(state, loc, var, num_vertices,
+                                          &state->gs_input_size,
+                                          "geometry shader input");
+}
 
 void
 validate_identifier(const char *identifier, YYLTYPE loc,
@@ -3217,7 +3414,7 @@
 static bool
 precision_qualifier_allowed(const glsl_type *type)
 {
-   /* Precision qualifiers apply to floating point, integer and sampler
+   /* Precision qualifiers apply to floating point, integer and opaque
     * types.
     *
     * Section 4.5.2 (Precision Qualifiers) of the GLSL 1.30 spec says:
@@ -3247,7 +3444,7 @@
    return type->is_float()
        || type->is_integer()
        || type->is_record()
-       || type->is_sampler();
+       || type->contains_opaque();
 }
 
 ir_rvalue *
@@ -3356,6 +3553,18 @@
 
    decl_type = this->type->glsl_type(& type_name, state);
 
+   /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec:
+    *    "Buffer variables may only be declared inside interface blocks
+    *    (section 4.3.9 “Interface Blocks”), which are then referred to as
+    *    shader storage blocks. It is a compile-time error to declare buffer
+    *    variables at global scope (outside a block)."
+    */
+   if (type->qualifier.flags.q.buffer && !decl_type->is_interface()) {
+      _mesa_glsl_error(&loc, state,
+                       "buffer variables cannot be declared outside "
+                       "interface blocks");
+   }
+
    /* An offset-qualified atomic counter declaration sets the default
     * offset for the next declaration within the same atomic counter
     * buffer.
@@ -3429,7 +3638,7 @@
    foreach_list_typed (ast_declaration, decl, link, &this->declarations) {
       const struct glsl_type *var_type;
       ir_variable *var;
-
+      const char *identifier = decl->identifier;
       /* FINISHME: Emit a warning if a variable declaration shadows a
        * FINISHME: declaration at a higher scope.
        */
@@ -3447,10 +3656,24 @@
          continue;
       }
 
+      if (this->type->qualifier.flags.q.subroutine) {
+         const glsl_type *t;
+         const char *name;
+
+         t = state->symbols->get_type(this->type->specifier->type_name);
+         if (!t)
+            _mesa_glsl_error(& loc, state,
+                             "invalid type in declaration of `%s'",
+                             decl->identifier);
+         name = ralloc_asprintf(ctx, "%s_%s", _mesa_shader_stage_to_subroutine_prefix(state->stage), decl->identifier);
+
+         identifier = name;
+
+      }
       var_type = process_array_type(&loc, decl_type, decl->array_specifier,
                                     state);
 
-      var = new(ctx) ir_variable(var_type, decl->identifier, ir_var_auto);
+      var = new(ctx) ir_variable(var_type, identifier, ir_var_auto);
 
       /* The 'varying in' and 'varying out' qualifiers can only be used with
        * ARB_geometry_shader4 and EXT_geometry_shader4, which we don't support
@@ -3522,6 +3745,8 @@
           */
          if (this->type->qualifier.flags.q.attribute) {
             mode = "attribute";
+         } else if (this->type->qualifier.flags.q.subroutine) {
+            mode = "subroutine uniform";
          } else if (this->type->qualifier.flags.q.uniform) {
             mode = "uniform";
          } else if (this->type->qualifier.flags.q.varying) {
@@ -3615,6 +3840,54 @@
             }
 
             handle_geometry_shader_input_decl(state, loc, var);
+         } else if (state->stage == MESA_SHADER_FRAGMENT) {
+            /* From section 4.3.4 (Input Variables) of the GLSL ES 3.10 spec:
+             *
+             *     It is a compile-time error to declare a fragment shader
+             *     input with, or that contains, any of the following types:
+             *
+             *     * A boolean type
+             *     * An opaque type
+             *     * An array of arrays
+             *     * An array of structures
+             *     * A structure containing an array
+             *     * A structure containing a structure
+             */
+            if (state->es_shader) {
+               const glsl_type *check_type = var->type->without_array();
+               if (check_type->is_boolean() ||
+                   check_type->contains_opaque()) {
+                  _mesa_glsl_error(&loc, state,
+                                   "fragment shader input cannot have type %s",
+                                   check_type->name);
+               }
+               if (var->type->is_array() &&
+                   var->type->fields.array->is_array()) {
+                  _mesa_glsl_error(&loc, state,
+                                   "%s shader output "
+                                   "cannot have an array of arrays",
+                                   _mesa_shader_stage_to_string(state->stage));
+               }
+               if (var->type->is_array() &&
+                   var->type->fields.array->is_record()) {
+                  _mesa_glsl_error(&loc, state,
+                                   "fragment shader input "
+                                   "cannot have an array of structs");
+               }
+               if (var->type->is_record()) {
+                  for (unsigned i = 0; i < var->type->length; i++) {
+                     if (var->type->fields.structure[i].type->is_array() ||
+                         var->type->fields.structure[i].type->is_record())
+                        _mesa_glsl_error(&loc, state,
+                                         "fragement shader input cannot have "
+                                         "a struct that contains an "
+                                         "array or struct");
+                  }
+               }
+            }
+         } else if (state->stage == MESA_SHADER_TESS_CTRL ||
+                    state->stage == MESA_SHADER_TESS_EVAL) {
+            handle_tess_shader_input_decl(state, loc, var);
          }
       } else if (var->data.mode == ir_var_shader_out) {
          const glsl_type *check_type = var->type->without_array();
@@ -3649,7 +3922,7 @@
             if (check_type->is_record() || check_type->is_matrix())
                _mesa_glsl_error(&loc, state,
                                 "fragment shader output "
-                                "cannot have struct or array type");
+                                "cannot have struct or matrix type");
             switch (check_type->base_type) {
             case GLSL_TYPE_UINT:
             case GLSL_TYPE_INT:
@@ -3661,6 +3934,62 @@
                                 "type %s", check_type->name);
             }
          }
+
+         /* From section 4.3.6 (Output Variables) of the GLSL ES 3.10 spec:
+          *
+          *     It is a compile-time error to declare a vertex shader output
+          *     with, or that contains, any of the following types:
+          *
+          *     * A boolean type
+          *     * An opaque type
+          *     * An array of arrays
+          *     * An array of structures
+          *     * A structure containing an array
+          *     * A structure containing a structure
+          *
+          *     It is a compile-time error to declare a fragment shader output
+          *     with, or that contains, any of the following types:
+          *
+          *     * A boolean type
+          *     * An opaque type
+          *     * A matrix
+          *     * A structure
+          *     * An array of array
+          */
+         if (state->es_shader) {
+            if (var->type->is_array() &&
+                var->type->fields.array->is_array()) {
+               _mesa_glsl_error(&loc, state,
+                                "%s shader output "
+                                "cannot have an array of arrays",
+                                _mesa_shader_stage_to_string(state->stage));
+            }
+            if (state->stage == MESA_SHADER_VERTEX) {
+               if (var->type->is_array() &&
+                   var->type->fields.array->is_record()) {
+                  _mesa_glsl_error(&loc, state,
+                                   "vertex shader output "
+                                   "cannot have an array of structs");
+               }
+               if (var->type->is_record()) {
+                  for (unsigned i = 0; i < var->type->length; i++) {
+                     if (var->type->fields.structure[i].type->is_array() ||
+                         var->type->fields.structure[i].type->is_record())
+                        _mesa_glsl_error(&loc, state,
+                                         "vertex shader output cannot have a "
+                                         "struct that contains an "
+                                         "array or struct");
+                  }
+               }
+            }
+         }
+
+         if (state->stage == MESA_SHADER_TESS_CTRL) {
+            handle_tess_ctrl_shader_output_decl(state, loc, var);
+         }
+      } else if (var->type->contains_subroutine()) {
+         /* declare subroutine uniforms as hidden */
+         var->data.how_declared = ir_var_hidden;
       }
 
       /* Integer fragment inputs must be qualified with 'flat'.  In GLSL ES,
@@ -3784,6 +4113,33 @@
       }
 
 
+      /* From section 4.3.4 of the GLSL 4.00 spec:
+       *    "Input variables may not be declared using the patch in qualifier
+       *    in tessellation control or geometry shaders."
+       *
+       * From section 4.3.6 of the GLSL 4.00 spec:
+       *    "It is an error to use patch out in a vertex, tessellation
+       *    evaluation, or geometry shader."
+       *
+       * This doesn't explicitly forbid using them in a fragment shader, but
+       * that's probably just an oversight.
+       */
+      if (state->stage != MESA_SHADER_TESS_EVAL
+          && this->type->qualifier.flags.q.patch
+          && this->type->qualifier.flags.q.in) {
+
+         _mesa_glsl_error(&loc, state, "'patch in' can only be used in a "
+                          "tessellation evaluation shader");
+      }
+
+      if (state->stage != MESA_SHADER_TESS_CTRL
+          && this->type->qualifier.flags.q.patch
+          && this->type->qualifier.flags.q.out) {
+
+         _mesa_glsl_error(&loc, state, "'patch out' can only be used in a "
+                          "tessellation control shader");
+      }
+
       /* Precision qualifiers exists only in GLSL versions 1.00 and >= 1.30.
        */
       if (this->type->qualifier.precision != ast_precision_none) {
@@ -3795,13 +4151,11 @@
        * an array of that type.
        */
       if (!(this->type->qualifier.precision == ast_precision_none
-          || precision_qualifier_allowed(var->type)
-          || (var->type->is_array()
-	      && precision_qualifier_allowed(var->type->fields.array)))) {
+          || precision_qualifier_allowed(var->type->without_array()))) {
 
          _mesa_glsl_error(&loc, state,
                           "precision qualifiers apply only to floating point"
-                          ", integer and sampler types");
+                          ", integer and opaque types");
       }
 
       /* From section 4.1.7 of the GLSL 4.40 spec:
@@ -4100,6 +4454,7 @@
    ir_function *f = NULL;
    ir_function_signature *sig = NULL;
    exec_list hir_parameters;
+   YYLTYPE loc = this->get_location();
 
    const char *const name = identifier;
 
@@ -4151,6 +4506,17 @@
       return_type = glsl_type::error_type;
    }
 
+   /* ARB_shader_subroutine states:
+    *  "Subroutine declarations cannot be prototyped. It is an error to prepend
+    *   subroutine(...) to a function declaration."
+    */
+   if (this->return_type->qualifier.flags.q.subroutine_def && !is_definition) {
+      YYLTYPE loc = this->get_location();
+      _mesa_glsl_error(&loc, state,
+                       "function declaration `%s' cannot have subroutine prepended",
+                       name);
+   }
+
    /* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec:
     * "No qualifier is allowed on the return type of a function."
     */
@@ -4188,15 +4554,15 @@
    f = state->symbols->get_function(name);
    if (f == NULL) {
       f = new(ctx) ir_function(name);
-      if (!state->symbols->add_function(f)) {
-         /* This function name shadows a non-function use of the same name. */
-         YYLTYPE loc = this->get_location();
-
-         _mesa_glsl_error(&loc, state, "function name `%s' conflicts with "
-                          "non-function", name);
-         return NULL;
+      if (!this->return_type->qualifier.flags.q.subroutine) {
+         if (!state->symbols->add_function(f)) {
+            /* This function name shadows a non-function use of the same name. */
+            YYLTYPE loc = this->get_location();
+            _mesa_glsl_error(&loc, state, "function name `%s' conflicts with "
+                             "non-function", name);
+            return NULL;
+         }
       }
-
       emit_function(state, f);
    }
 
@@ -4283,6 +4649,44 @@
    sig->replace_parameters(&hir_parameters);
    signature = sig;
 
+   if (this->return_type->qualifier.flags.q.subroutine_def) {
+      int idx;
+
+      f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length();
+      f->subroutine_types = ralloc_array(state, const struct glsl_type *,
+                                         f->num_subroutine_types);
+      idx = 0;
+      foreach_list_typed(ast_declaration, decl, link, &this->return_type->qualifier.subroutine_list->declarations) {
+         const struct glsl_type *type;
+         /* the subroutine type must be already declared */
+         type = state->symbols->get_type(decl->identifier);
+         if (!type) {
+            _mesa_glsl_error(& loc, state, "unknown type '%s' in subroutine function definition", decl->identifier);
+         }
+         f->subroutine_types[idx++] = type;
+      }
+      state->subroutines = (ir_function **)reralloc(state, state->subroutines,
+                                                    ir_function *,
+                                                    state->num_subroutines + 1);
+      state->subroutines[state->num_subroutines] = f;
+      state->num_subroutines++;
+
+   }
+
+   if (this->return_type->qualifier.flags.q.subroutine) {
+      if (!state->symbols->add_type(this->identifier, glsl_type::get_subroutine_instance(this->identifier))) {
+         _mesa_glsl_error(& loc, state, "type '%s' previously defined", this->identifier);
+         return NULL;
+      }
+      state->subroutine_types = (ir_function **)reralloc(state, state->subroutine_types,
+                                                         ir_function *,
+                                                         state->num_subroutine_types + 1);
+      state->subroutine_types[state->num_subroutine_types] = f;
+      state->num_subroutine_types++;
+
+      f->is_subroutine = true;
+   }
+
    /* Function declarations (prototypes) do not have r-values.
     */
    return NULL;
@@ -5059,6 +5463,8 @@
       /* "int" and "float" are valid, but vectors and matrices are not. */
       return type->vector_elements == 1 && type->matrix_columns == 1;
    case GLSL_TYPE_SAMPLER:
+   case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_ATOMIC_UINT:
       return true;
    default:
       return false;
@@ -5107,7 +5513,7 @@
       if (!is_valid_default_precision_type(type)) {
          _mesa_glsl_error(&loc, state,
                           "default precision statements apply only to "
-                          "float, int, and sampler types");
+                          "float, int, and opaque types");
          return NULL;
       }
 
@@ -5181,8 +5587,9 @@
  * \c glsl_struct_field to describe the members.
  *
  * If we're processing an interface block, var_mode should be the type of the
- * interface block (ir_var_shader_in, ir_var_shader_out, or ir_var_uniform).
- * If we're processing a structure, var_mode should be ir_var_auto.
+ * interface block (ir_var_shader_in, ir_var_shader_out, ir_var_uniform or
+ * ir_var_shader_storage).  If we're processing a structure, var_mode should be
+ * ir_var_auto.
  *
  * \return
  * The number of fields processed.  A pointer to the array structure fields is
@@ -5255,19 +5662,19 @@
          if (is_interface && field_type->contains_opaque()) {
             YYLTYPE loc = decl_list->get_location();
             _mesa_glsl_error(&loc, state,
-                             "uniform in non-default uniform block contains "
+                             "uniform/buffer in non-default interface block contains "
                              "opaque variable");
          }
 
          if (field_type->contains_atomic()) {
-            /* FINISHME: Add a spec quotation here once updated spec
-             * FINISHME: language is available.  See Khronos bug #10903
-             * FINISHME: on whether atomic counters are allowed in
-             * FINISHME: structures.
+            /* From section 4.1.7.3 of the GLSL 4.40 spec:
+             *
+             *    "Members of structures cannot be declared as atomic counter
+             *     types."
              */
             YYLTYPE loc = decl_list->get_location();
-            _mesa_glsl_error(&loc, state, "atomic counter in structure or "
-                             "uniform block");
+            _mesa_glsl_error(&loc, state, "atomic counter in structure, "
+                             "shader storage block or uniform block");
          }
 
          if (field_type->contains_image()) {
@@ -5277,7 +5684,8 @@
              */
             YYLTYPE loc = decl_list->get_location();
             _mesa_glsl_error(&loc, state,
-                             "image in structure or uniform block");
+                             "image in structure, shader storage block or "
+                             "uniform block");
          }
 
          const struct ast_type_qualifier *const qual =
@@ -5286,9 +5694,9 @@
              qual->flags.q.packed ||
              qual->flags.q.shared) {
             _mesa_glsl_error(&loc, state,
-                             "uniform block layout qualifiers std140, packed, and "
-                             "shared can only be applied to uniform blocks, not "
-                             "members");
+                             "uniform/shader storage block layout qualifiers "
+                             "std140, packed, and shared can only be applied "
+                             "to uniform/shader storage blocks, not members");
          }
 
          if (qual->flags.q.constant) {
@@ -5307,15 +5715,16 @@
             interpret_interpolation_qualifier(qual, var_mode, state, &loc);
          fields[i].centroid = qual->flags.q.centroid ? 1 : 0;
          fields[i].sample = qual->flags.q.sample ? 1 : 0;
+         fields[i].patch = qual->flags.q.patch ? 1 : 0;
 
          /* Only save explicitly defined streams in block's field */
          fields[i].stream = qual->flags.q.explicit_stream ? qual->stream : -1;
 
          if (qual->flags.q.row_major || qual->flags.q.column_major) {
-            if (!qual->flags.q.uniform) {
+            if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
                _mesa_glsl_error(&loc, state,
                                 "row_major and column_major can only be "
-                                "applied to uniform interface blocks");
+                                "applied to interface blocks");
             } else
                validate_matrix_layout_for_type(state, &loc, field_type, NULL);
          }
@@ -5512,6 +5921,9 @@
    } else if (this->layout.flags.q.uniform) {
       var_mode = ir_var_uniform;
       iface_type_name = "uniform";
+   } else if (this->layout.flags.q.buffer) {
+      var_mode = ir_var_shader_storage;
+      iface_type_name = "buffer";
    } else {
       var_mode = ir_var_auto;
       iface_type_name = "UNKNOWN";
@@ -5596,16 +6008,28 @@
          if (ir_variable *earlier_gl_Position =
              state->symbols->get_variable("gl_Position")) {
             earlier_per_vertex = earlier_gl_Position->get_interface_type();
+         } else if (ir_variable *earlier_gl_out =
+               state->symbols->get_variable("gl_out")) {
+            earlier_per_vertex = earlier_gl_out->get_interface_type();
          } else {
             _mesa_glsl_error(&loc, state,
                              "redeclaration of gl_PerVertex output not "
                              "allowed in the %s shader",
                              _mesa_shader_stage_to_string(state->stage));
          }
-         if (this->instance_name != NULL) {
-            _mesa_glsl_error(&loc, state,
-                             "gl_PerVertex output may not be redeclared with "
-                             "an instance name");
+         if (state->stage == MESA_SHADER_TESS_CTRL) {
+            if (this->instance_name == NULL ||
+                strcmp(this->instance_name, "gl_out") != 0 || this->array_specifier == NULL) {
+               _mesa_glsl_error(&loc, state,
+                                "gl_PerVertex output must be redeclared as "
+                                "gl_out[]");
+            }
+         } else {
+            if (this->instance_name != NULL) {
+               _mesa_glsl_error(&loc, state,
+                                "gl_PerVertex output may not be redeclared with "
+                                "an instance name");
+            }
          }
          break;
       default:
@@ -5638,6 +6062,8 @@
                earlier_per_vertex->fields.structure[j].centroid;
             fields[i].sample =
                earlier_per_vertex->fields.structure[j].sample;
+            fields[i].patch =
+               earlier_per_vertex->fields.structure[j].patch;
          }
       }
 
@@ -5667,6 +6093,8 @@
                                         num_variables,
                                         packing,
                                         this->block_name);
+   if (this->layout.flags.q.explicit_binding)
+      validate_binding_qualifier(state, &loc, block_type, &this->layout);
 
    if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) {
       YYLTYPE loc = this->get_location();
@@ -5691,8 +6119,18 @@
    if (state->stage == MESA_SHADER_GEOMETRY && this->array_specifier == NULL &&
        var_mode == ir_var_shader_in) {
       _mesa_glsl_error(&loc, state, "geometry shader inputs must be arrays");
+   } else if ((state->stage == MESA_SHADER_TESS_CTRL ||
+               state->stage == MESA_SHADER_TESS_EVAL) &&
+              this->array_specifier == NULL &&
+              var_mode == ir_var_shader_in) {
+      _mesa_glsl_error(&loc, state, "per-vertex tessellation shader inputs must be arrays");
+   } else if (state->stage == MESA_SHADER_TESS_CTRL &&
+              this->array_specifier == NULL &&
+              var_mode == ir_var_shader_out) {
+      _mesa_glsl_error(&loc, state, "tessellation control shader outputs must be arrays");
    }
 
+
    /* Page 39 (page 45 of the PDF) of section 4.3.7 in the GLSL ES 3.00 spec
     * says:
     *
@@ -5738,21 +6176,59 @@
           *     geometry shader inputs. All other input and output block
           *     arrays must specify an array size.
           *
+          * The same applies to tessellation shaders.
+          *
           * The upshot of this is that the only circumstance where an
           * interface array size *doesn't* need to be specified is on a
-          * geometry shader input.
+          * geometry shader input, tessellation control shader input,
+          * tessellation control shader output, and tessellation evaluation
+          * shader input.
           */
-         if (this->array_specifier->is_unsized_array &&
-             (state->stage != MESA_SHADER_GEOMETRY || !this->layout.flags.q.in)) {
-            _mesa_glsl_error(&loc, state,
-                             "only geometry shader inputs may be unsized "
-                             "instance block arrays");
+         if (this->array_specifier->is_unsized_array) {
+            bool allow_inputs = state->stage == MESA_SHADER_GEOMETRY ||
+                                state->stage == MESA_SHADER_TESS_CTRL ||
+                                state->stage == MESA_SHADER_TESS_EVAL;
+            bool allow_outputs = state->stage == MESA_SHADER_TESS_CTRL;
 
+            if (this->layout.flags.q.in) {
+               if (!allow_inputs)
+                  _mesa_glsl_error(&loc, state,
+                                   "unsized input block arrays not allowed in "
+                                   "%s shader",
+                                   _mesa_shader_stage_to_string(state->stage));
+            } else if (this->layout.flags.q.out) {
+               if (!allow_outputs)
+                  _mesa_glsl_error(&loc, state,
+                                   "unsized output block arrays not allowed in "
+                                   "%s shader",
+                                   _mesa_shader_stage_to_string(state->stage));
+            } else {
+               /* by elimination, this is a uniform block array */
+               _mesa_glsl_error(&loc, state,
+                                "unsized uniform block arrays not allowed in "
+                                "%s shader",
+                                _mesa_shader_stage_to_string(state->stage));
+            }
          }
 
          const glsl_type *block_array_type =
             process_array_type(&loc, block_type, this->array_specifier, state);
 
+          /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec:
+          *
+          *     * Arrays of arrays of blocks are not allowed
+          */
+         if (state->es_shader && block_array_type->is_array() &&
+             block_array_type->fields.array->is_array()) {
+            _mesa_glsl_error(&loc, state,
+                             "arrays of arrays interface blocks are "
+                             "not allowed");
+         }
+
+         if (this->layout.flags.q.explicit_binding)
+            validate_binding_qualifier(state, &loc, block_array_type,
+                                       &this->layout);
+
          var = new(state) ir_variable(block_array_type,
                                       this->instance_name,
                                       var_mode);
@@ -5770,6 +6246,11 @@
 
       if (state->stage == MESA_SHADER_GEOMETRY && var_mode == ir_var_shader_in)
          handle_geometry_shader_input_decl(state, loc, var);
+      else if ((state->stage == MESA_SHADER_TESS_CTRL ||
+           state->stage == MESA_SHADER_TESS_EVAL) && var_mode == ir_var_shader_in)
+         handle_tess_shader_input_decl(state, loc, var);
+      else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out)
+         handle_tess_ctrl_shader_output_decl(state, loc, var);
 
       if (ir_variable *earlier =
           state->symbols->get_variable(this->instance_name)) {
@@ -5806,6 +6287,7 @@
          var->data.interpolation = fields[i].interpolation;
          var->data.centroid = fields[i].centroid;
          var->data.sample = fields[i].sample;
+         var->data.patch = fields[i].patch;
          var->init_interface_type(block_type);
 
          if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
@@ -5854,8 +6336,8 @@
          if (state->symbols->get_variable(var->name) != NULL)
             _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name);
 
-         /* Propagate the "binding" keyword into this UBO's fields;
-          * the UBO declaration itself doesn't get an ir_variable unless it
+         /* Propagate the "binding" keyword into this UBO/SSBO's fields.
+          * The UBO declaration itself doesn't get an ir_variable unless it
           * has an instance name.  This is ugly.
           */
          var->data.explicit_binding = this->layout.flags.q.explicit_binding;
@@ -5905,6 +6387,67 @@
       }
    }
 
+   return NULL;
+}
+
+
+ir_rvalue *
+ast_tcs_output_layout::hir(exec_list *instructions,
+			  struct _mesa_glsl_parse_state *state)
+{
+   YYLTYPE loc = this->get_location();
+
+   /* If any tessellation control output layout declaration preceded this
+    * one, make sure it was consistent with this one.
+    */
+   if (state->tcs_output_vertices_specified &&
+       state->out_qualifier->vertices != this->vertices) {
+      _mesa_glsl_error(&loc, state,
+		       "tessellation control shader output layout does not "
+		       "match previous declaration");
+      return NULL;
+   }
+
+   /* If any shader outputs occurred before this declaration and specified an
+    * array size, make sure the size they specified is consistent with the
+    * primitive type.
+    */
+   unsigned num_vertices = this->vertices;
+   if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) {
+      _mesa_glsl_error(&loc, state,
+		       "this tessellation control shader output layout "
+		       "specifies %u vertices, but a previous output "
+		       "is declared with size %u",
+		       num_vertices, state->tcs_output_size);
+      return NULL;
+   }
+
+   state->tcs_output_vertices_specified = true;
+
+   /* If any shader outputs occurred before this declaration and did not
+    * specify an array size, their size is determined now.
+    */
+   foreach_in_list (ir_instruction, node, instructions) {
+      ir_variable *var = node->as_variable();
+      if (var == NULL || var->data.mode != ir_var_shader_out)
+	 continue;
+
+      /* Note: Not all tessellation control shader output are arrays. */
+      if (!var->type->is_unsized_array() || var->data.patch)
+         continue;
+
+      if (var->data.max_array_access >= num_vertices) {
+	 _mesa_glsl_error(&loc, state,
+			  "this tessellation control shader output layout "
+			  "specifies %u vertices, but an access to element "
+			  "%u of output `%s' already exists", num_vertices,
+			  var->data.max_array_access, var->name);
+      } else {
+	 var->type = glsl_type::get_array_instance(var->type->fields.array,
+						   num_vertices);
+      }
+   }
+
    return NULL;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ast_type.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ast_type.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ast_type.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ast_type.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -40,7 +40,12 @@
 bool
 ast_fully_specified_type::has_qualifiers() const
 {
-   return this->qualifier.flags.i != 0;
+   /* 'subroutine' isnt a real qualifier. */
+   ast_type_qualifier subroutine_only;
+   subroutine_only.flags.i = 0;
+   subroutine_only.flags.q.subroutine = 1;
+   subroutine_only.flags.q.subroutine_def = 1;
+   return (this->qualifier.flags.i & ~subroutine_only.flags.i) != 0;
 }
 
 bool ast_type_qualifier::has_interpolation() const
@@ -78,14 +83,16 @@
           || this->flags.q.varying
           || this->flags.q.in
           || this->flags.q.out
-          || this->flags.q.uniform;
+          || this->flags.q.uniform
+          || this->flags.q.buffer;
 }
 
 bool
 ast_type_qualifier::has_auxiliary_storage() const
 {
    return this->flags.q.centroid
-          || this->flags.q.sample;
+          || this->flags.q.sample
+          || this->flags.q.patch;
 }
 
 const char*
@@ -211,6 +218,44 @@
       }
    }
 
+   if (q.flags.q.vertices) {
+      if (this->flags.q.vertices && this->vertices != q.vertices) {
+	 _mesa_glsl_error(loc, state,
+			  "tessellation control shader set conflicting "
+			  "vertices (%d and %d)",
+			  this->vertices, q.vertices);
+	 return false;
+      }
+      this->vertices = q.vertices;
+   }
+
+   if (q.flags.q.vertex_spacing) {
+      if (this->flags.q.vertex_spacing && this->vertex_spacing != q.vertex_spacing) {
+	 _mesa_glsl_error(loc, state,
+			  "conflicting vertex spacing used");
+	 return false;
+      }
+      this->vertex_spacing = q.vertex_spacing;
+   }
+
+   if (q.flags.q.ordering) {
+      if (this->flags.q.ordering && this->ordering != q.ordering) {
+	 _mesa_glsl_error(loc, state,
+			  "conflicting ordering used");
+	 return false;
+      }
+      this->ordering = q.ordering;
+   }
+
+   if (q.flags.q.point_mode) {
+      if (this->flags.q.point_mode && this->point_mode != q.point_mode) {
+	 _mesa_glsl_error(loc, state,
+			  "conflicting point mode used");
+	 return false;
+      }
+      this->point_mode = q.point_mode;
+   }
+
    if ((q.flags.i & ubo_mat_mask.flags.i) != 0)
       this->flags.i &= ~ubo_mat_mask.flags.i;
    if ((q.flags.i & ubo_layout_mask.flags.i) != 0)
@@ -256,6 +301,22 @@
 }
 
 bool
+ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc,
+                                        _mesa_glsl_parse_state *state,
+                                        ast_type_qualifier q,
+                                        ast_node* &node)
+{
+   void *mem_ctx = state;
+   const bool r = this->merge_qualifier(loc, state, q);
+
+   if (state->stage == MESA_SHADER_TESS_CTRL) {
+      node = new(mem_ctx) ast_tcs_output_layout(*loc, q.vertices);
+   }
+
+   return r;
+}
+
+bool
 ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
                                        _mesa_glsl_parse_state *state,
                                        ast_type_qualifier q,
@@ -268,6 +329,27 @@
    valid_in_mask.flags.i = 0;
 
    switch (state->stage) {
+   case MESA_SHADER_TESS_EVAL:
+      if (q.flags.q.prim_type) {
+         /* Make sure this is a valid input primitive type. */
+         switch (q.prim_type) {
+         case GL_TRIANGLES:
+         case GL_QUADS:
+         case GL_ISOLINES:
+            break;
+         default:
+            _mesa_glsl_error(loc, state,
+                             "invalid tessellation evaluation "
+                             "shader input primitive type");
+            break;
+         }
+      }
+
+      valid_in_mask.flags.q.prim_type = 1;
+      valid_in_mask.flags.q.vertex_spacing = 1;
+      valid_in_mask.flags.q.ordering = 1;
+      valid_in_mask.flags.q.point_mode = 1;
+      break;
    case MESA_SHADER_GEOMETRY:
       if (q.flags.q.prim_type) {
          /* Make sure this is a valid input primitive type. */
@@ -323,7 +405,9 @@
       if (q.flags.q.prim_type &&
           this->prim_type != q.prim_type) {
          _mesa_glsl_error(loc, state,
-                          "conflicting input primitive types specified");
+                          "conflicting input primitive %s specified",
+                          state->stage == MESA_SHADER_GEOMETRY ?
+                          "type" : "mode");
       }
    } else if (q.flags.q.prim_type) {
       state->in_qualifier->flags.q.prim_type = 1;
@@ -345,6 +429,39 @@
       state->fs_early_fragment_tests = true;
    }
 
+   if (this->flags.q.vertex_spacing) {
+      if (q.flags.q.vertex_spacing &&
+          this->vertex_spacing != q.vertex_spacing) {
+         _mesa_glsl_error(loc, state,
+                          "conflicting vertex spacing specified");
+      }
+   } else if (q.flags.q.vertex_spacing) {
+      this->flags.q.vertex_spacing = 1;
+      this->vertex_spacing = q.vertex_spacing;
+   }
+
+   if (this->flags.q.ordering) {
+      if (q.flags.q.ordering &&
+          this->ordering != q.ordering) {
+         _mesa_glsl_error(loc, state,
+                          "conflicting ordering specified");
+      }
+   } else if (q.flags.q.ordering) {
+      this->flags.q.ordering = 1;
+      this->ordering = q.ordering;
+   }
+
+   if (this->flags.q.point_mode) {
+      if (q.flags.q.point_mode &&
+          this->point_mode != q.point_mode) {
+         _mesa_glsl_error(loc, state,
+                          "conflicting point mode specified");
+      }
+   } else if (q.flags.q.point_mode) {
+      this->flags.q.point_mode = 1;
+      this->point_mode = q.point_mode;
+   }
+
    if (create_gs_ast) {
       node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type);
    } else if (create_cs_ast) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/builtin_functions.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/builtin_functions.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/builtin_functions.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/builtin_functions.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -136,6 +136,13 @@
 }
 
 static bool
+v400_fs_only(const _mesa_glsl_parse_state *state)
+{
+   return state->is_version(400, 0) &&
+          state->stage == MESA_SHADER_FRAGMENT;
+}
+
+static bool
 es31(const _mesa_glsl_parse_state *state)
 {
    return state->is_version(0, 310);
@@ -270,6 +277,13 @@
 static bool
 texture_multisample(const _mesa_glsl_parse_state *state)
 {
+   return state->is_version(150, 310) ||
+          state->ARB_texture_multisample_enable;
+}
+
+static bool
+texture_multisample_array(const _mesa_glsl_parse_state *state)
+{
    return state->is_version(150, 0) ||
           state->ARB_texture_multisample_enable;
 }
@@ -394,11 +408,25 @@
 static bool
 shader_image_load_store(const _mesa_glsl_parse_state *state)
 {
+   return (state->is_version(420, 310) ||
+           state->ARB_shader_image_load_store_enable);
+}
+
+static bool
+shader_image_atomic(const _mesa_glsl_parse_state *state)
+{
    return (state->is_version(420, 0) ||
            state->ARB_shader_image_load_store_enable);
 }
 
 static bool
+shader_image_size(const _mesa_glsl_parse_state *state)
+{
+   return state->is_version(430, 310) ||
+           state->ARB_shader_image_size_enable;
+}
+
+static bool
 gs_streams(const _mesa_glsl_parse_state *state)
 {
    return gpu_shader5(state) && gs_only(state);
@@ -410,6 +438,13 @@
    return state->has_double();
 }
 
+static bool
+barrier_supported(const _mesa_glsl_parse_state *state)
+{
+   return state->stage == MESA_SHADER_COMPUTE ||
+          state->stage == MESA_SHADER_TESS_CTRL;
+}
+
 /** @} */
 
 /******************************************************************************/
@@ -485,13 +520,19 @@
    /** Create a new function and add the given signatures. */
    void add_function(const char *name, ...);
 
+   typedef ir_function_signature *(builtin_builder::*image_prototype_ctr)(const glsl_type *image_type,
+                                                                          const char *intrinsic_name,
+                                                                          unsigned num_arguments,
+                                                                          unsigned flags);
+
    enum image_function_flags {
       IMAGE_FUNCTION_EMIT_STUB = (1 << 0),
       IMAGE_FUNCTION_RETURNS_VOID = (1 << 1),
       IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2),
       IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3),
       IMAGE_FUNCTION_READ_ONLY = (1 << 4),
-      IMAGE_FUNCTION_WRITE_ONLY = (1 << 5)
+      IMAGE_FUNCTION_WRITE_ONLY = (1 << 5),
+      IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6)
    };
 
    /**
@@ -500,6 +541,7 @@
     */
    void add_image_function(const char *name,
                            const char *intrinsic_name,
+                           image_prototype_ctr prototype,
                            unsigned num_arguments,
                            unsigned flags);
 
@@ -654,8 +696,9 @@
                                             const glsl_type *stream_type);
    ir_function_signature *_EndStreamPrimitive(builtin_available_predicate avail,
                                               const glsl_type *stream_type);
+   B0(barrier)
 
-   B2(textureQueryLod);
+   BA2(textureQueryLod);
    B1(textureQueryLevels);
    B1(dFdx);
    B1(dFdy);
@@ -700,7 +743,12 @@
                                            const char *intrinsic_name,
                                            unsigned num_arguments,
                                            unsigned flags);
-   ir_function_signature *_image(const glsl_type *image_type,
+   ir_function_signature *_image_size_prototype(const glsl_type *image_type,
+                                                const char *intrinsic_name,
+                                                unsigned num_arguments,
+                                                unsigned flags);
+   ir_function_signature *_image(image_prototype_ctr prototype,
+                                 const glsl_type *image_type,
                                  const char *intrinsic_name,
                                  unsigned num_arguments,
                                  unsigned flags);
@@ -1359,9 +1407,9 @@
                 _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::isampler2DMS_type),
                 _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::usampler2DMS_type),
 
-                _textureSize(texture_multisample, glsl_type::ivec3_type, glsl_type::sampler2DMSArray_type),
-                _textureSize(texture_multisample, glsl_type::ivec3_type, glsl_type::isampler2DMSArray_type),
-                _textureSize(texture_multisample, glsl_type::ivec3_type, glsl_type::usampler2DMSArray_type),
+                _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::sampler2DMSArray_type),
+                _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::isampler2DMSArray_type),
+                _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::usampler2DMSArray_type),
                 NULL);
 
    add_function("texture",
@@ -1624,9 +1672,9 @@
                 _texelFetch(texture_multisample, glsl_type::ivec4_type, glsl_type::isampler2DMS_type, glsl_type::ivec2_type),
                 _texelFetch(texture_multisample, glsl_type::uvec4_type, glsl_type::usampler2DMS_type, glsl_type::ivec2_type),
 
-                _texelFetch(texture_multisample, glsl_type::vec4_type,  glsl_type::sampler2DMSArray_type,  glsl_type::ivec3_type),
-                _texelFetch(texture_multisample, glsl_type::ivec4_type, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type),
-                _texelFetch(texture_multisample, glsl_type::uvec4_type, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type),
+                _texelFetch(texture_multisample_array, glsl_type::vec4_type,  glsl_type::sampler2DMSArray_type,  glsl_type::ivec3_type),
+                _texelFetch(texture_multisample_array, glsl_type::ivec4_type, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type),
+                _texelFetch(texture_multisample_array, glsl_type::uvec4_type, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type),
                 NULL);
 
    add_function("texelFetchOffset",
@@ -1933,42 +1981,80 @@
                 _EndStreamPrimitive(gs_streams, glsl_type::uint_type),
                 _EndStreamPrimitive(gs_streams, glsl_type::int_type),
                 NULL);
+   add_function("barrier", _barrier(), NULL);
 
    add_function("textureQueryLOD",
-                _textureQueryLod(glsl_type::sampler1D_type,  glsl_type::float_type),
-                _textureQueryLod(glsl_type::isampler1D_type, glsl_type::float_type),
-                _textureQueryLod(glsl_type::usampler1D_type, glsl_type::float_type),
-
-                _textureQueryLod(glsl_type::sampler2D_type,  glsl_type::vec2_type),
-                _textureQueryLod(glsl_type::isampler2D_type, glsl_type::vec2_type),
-                _textureQueryLod(glsl_type::usampler2D_type, glsl_type::vec2_type),
-
-                _textureQueryLod(glsl_type::sampler3D_type,  glsl_type::vec3_type),
-                _textureQueryLod(glsl_type::isampler3D_type, glsl_type::vec3_type),
-                _textureQueryLod(glsl_type::usampler3D_type, glsl_type::vec3_type),
-
-                _textureQueryLod(glsl_type::samplerCube_type,  glsl_type::vec3_type),
-                _textureQueryLod(glsl_type::isamplerCube_type, glsl_type::vec3_type),
-                _textureQueryLod(glsl_type::usamplerCube_type, glsl_type::vec3_type),
-
-                _textureQueryLod(glsl_type::sampler1DArray_type,  glsl_type::float_type),
-                _textureQueryLod(glsl_type::isampler1DArray_type, glsl_type::float_type),
-                _textureQueryLod(glsl_type::usampler1DArray_type, glsl_type::float_type),
-
-                _textureQueryLod(glsl_type::sampler2DArray_type,  glsl_type::vec2_type),
-                _textureQueryLod(glsl_type::isampler2DArray_type, glsl_type::vec2_type),
-                _textureQueryLod(glsl_type::usampler2DArray_type, glsl_type::vec2_type),
-
-                _textureQueryLod(glsl_type::samplerCubeArray_type,  glsl_type::vec3_type),
-                _textureQueryLod(glsl_type::isamplerCubeArray_type, glsl_type::vec3_type),
-                _textureQueryLod(glsl_type::usamplerCubeArray_type, glsl_type::vec3_type),
-
-                _textureQueryLod(glsl_type::sampler1DShadow_type, glsl_type::float_type),
-                _textureQueryLod(glsl_type::sampler2DShadow_type, glsl_type::vec2_type),
-                _textureQueryLod(glsl_type::samplerCubeShadow_type, glsl_type::vec3_type),
-                _textureQueryLod(glsl_type::sampler1DArrayShadow_type, glsl_type::float_type),
-                _textureQueryLod(glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type),
-                _textureQueryLod(glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type),
+                _textureQueryLod(texture_query_lod, glsl_type::sampler1D_type,  glsl_type::float_type),
+                _textureQueryLod(texture_query_lod, glsl_type::isampler1D_type, glsl_type::float_type),
+                _textureQueryLod(texture_query_lod, glsl_type::usampler1D_type, glsl_type::float_type),
+
+                _textureQueryLod(texture_query_lod, glsl_type::sampler2D_type,  glsl_type::vec2_type),
+                _textureQueryLod(texture_query_lod, glsl_type::isampler2D_type, glsl_type::vec2_type),
+                _textureQueryLod(texture_query_lod, glsl_type::usampler2D_type, glsl_type::vec2_type),
+
+                _textureQueryLod(texture_query_lod, glsl_type::sampler3D_type,  glsl_type::vec3_type),
+                _textureQueryLod(texture_query_lod, glsl_type::isampler3D_type, glsl_type::vec3_type),
+                _textureQueryLod(texture_query_lod, glsl_type::usampler3D_type, glsl_type::vec3_type),
+
+                _textureQueryLod(texture_query_lod, glsl_type::samplerCube_type,  glsl_type::vec3_type),
+                _textureQueryLod(texture_query_lod, glsl_type::isamplerCube_type, glsl_type::vec3_type),
+                _textureQueryLod(texture_query_lod, glsl_type::usamplerCube_type, glsl_type::vec3_type),
+
+                _textureQueryLod(texture_query_lod, glsl_type::sampler1DArray_type,  glsl_type::float_type),
+                _textureQueryLod(texture_query_lod, glsl_type::isampler1DArray_type, glsl_type::float_type),
+                _textureQueryLod(texture_query_lod, glsl_type::usampler1DArray_type, glsl_type::float_type),
+
+                _textureQueryLod(texture_query_lod, glsl_type::sampler2DArray_type,  glsl_type::vec2_type),
+                _textureQueryLod(texture_query_lod, glsl_type::isampler2DArray_type, glsl_type::vec2_type),
+                _textureQueryLod(texture_query_lod, glsl_type::usampler2DArray_type, glsl_type::vec2_type),
+
+                _textureQueryLod(texture_query_lod, glsl_type::samplerCubeArray_type,  glsl_type::vec3_type),
+                _textureQueryLod(texture_query_lod, glsl_type::isamplerCubeArray_type, glsl_type::vec3_type),
+                _textureQueryLod(texture_query_lod, glsl_type::usamplerCubeArray_type, glsl_type::vec3_type),
+
+                _textureQueryLod(texture_query_lod, glsl_type::sampler1DShadow_type, glsl_type::float_type),
+                _textureQueryLod(texture_query_lod, glsl_type::sampler2DShadow_type, glsl_type::vec2_type),
+                _textureQueryLod(texture_query_lod, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type),
+                _textureQueryLod(texture_query_lod, glsl_type::sampler1DArrayShadow_type, glsl_type::float_type),
+                _textureQueryLod(texture_query_lod, glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type),
+                _textureQueryLod(texture_query_lod, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type),
+                NULL);
+
+   add_function("textureQueryLod",
+                _textureQueryLod(v400_fs_only, glsl_type::sampler1D_type,  glsl_type::float_type),
+                _textureQueryLod(v400_fs_only, glsl_type::isampler1D_type, glsl_type::float_type),
+                _textureQueryLod(v400_fs_only, glsl_type::usampler1D_type, glsl_type::float_type),
+
+                _textureQueryLod(v400_fs_only, glsl_type::sampler2D_type,  glsl_type::vec2_type),
+                _textureQueryLod(v400_fs_only, glsl_type::isampler2D_type, glsl_type::vec2_type),
+                _textureQueryLod(v400_fs_only, glsl_type::usampler2D_type, glsl_type::vec2_type),
+
+                _textureQueryLod(v400_fs_only, glsl_type::sampler3D_type,  glsl_type::vec3_type),
+                _textureQueryLod(v400_fs_only, glsl_type::isampler3D_type, glsl_type::vec3_type),
+                _textureQueryLod(v400_fs_only, glsl_type::usampler3D_type, glsl_type::vec3_type),
+
+                _textureQueryLod(v400_fs_only, glsl_type::samplerCube_type,  glsl_type::vec3_type),
+                _textureQueryLod(v400_fs_only, glsl_type::isamplerCube_type, glsl_type::vec3_type),
+                _textureQueryLod(v400_fs_only, glsl_type::usamplerCube_type, glsl_type::vec3_type),
+
+                _textureQueryLod(v400_fs_only, glsl_type::sampler1DArray_type,  glsl_type::float_type),
+                _textureQueryLod(v400_fs_only, glsl_type::isampler1DArray_type, glsl_type::float_type),
+                _textureQueryLod(v400_fs_only, glsl_type::usampler1DArray_type, glsl_type::float_type),
+
+                _textureQueryLod(v400_fs_only, glsl_type::sampler2DArray_type,  glsl_type::vec2_type),
+                _textureQueryLod(v400_fs_only, glsl_type::isampler2DArray_type, glsl_type::vec2_type),
+                _textureQueryLod(v400_fs_only, glsl_type::usampler2DArray_type, glsl_type::vec2_type),
+
+                _textureQueryLod(v400_fs_only, glsl_type::samplerCubeArray_type,  glsl_type::vec3_type),
+                _textureQueryLod(v400_fs_only, glsl_type::isamplerCubeArray_type, glsl_type::vec3_type),
+                _textureQueryLod(v400_fs_only, glsl_type::usamplerCubeArray_type, glsl_type::vec3_type),
+
+                _textureQueryLod(v400_fs_only, glsl_type::sampler1DShadow_type, glsl_type::float_type),
+                _textureQueryLod(v400_fs_only, glsl_type::sampler2DShadow_type, glsl_type::vec2_type),
+                _textureQueryLod(v400_fs_only, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type),
+                _textureQueryLod(v400_fs_only, glsl_type::sampler1DArrayShadow_type, glsl_type::float_type),
+                _textureQueryLod(v400_fs_only, glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type),
+                _textureQueryLod(v400_fs_only, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type),
                 NULL);
 
    add_function("textureQueryLevels",
@@ -2543,6 +2629,7 @@
 void
 builtin_builder::add_image_function(const char *name,
                                     const char *intrinsic_name,
+                                    image_prototype_ctr prototype,
                                     unsigned num_arguments,
                                     unsigned flags)
 {
@@ -2581,12 +2668,13 @@
       glsl_type::uimage2DMS_type,
       glsl_type::uimage2DMSArray_type
    };
+
    ir_function *f = new(mem_ctx) ir_function(name);
 
    for (unsigned i = 0; i < ARRAY_SIZE(types); ++i) {
       if (types[i]->sampler_type != GLSL_TYPE_FLOAT ||
           (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE))
-         f->add_signature(_image(types[i], intrinsic_name,
+         f->add_signature(_image(prototype, types[i], intrinsic_name,
                                  num_arguments, flags));
    }
 
@@ -2599,43 +2687,60 @@
    const unsigned flags = (glsl ? IMAGE_FUNCTION_EMIT_STUB : 0);
 
    add_image_function(glsl ? "imageLoad" : "__intrinsic_image_load",
-                      "__intrinsic_image_load", 0,
-                      (flags | IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE |
+                       "__intrinsic_image_load",
+                       &builtin_builder::_image_prototype, 0,
+                       (flags | IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE |
                        IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE |
                        IMAGE_FUNCTION_READ_ONLY));
 
    add_image_function(glsl ? "imageStore" : "__intrinsic_image_store",
-                      "__intrinsic_image_store", 1,
+                      "__intrinsic_image_store",
+                      &builtin_builder::_image_prototype, 1,
                       (flags | IMAGE_FUNCTION_RETURNS_VOID |
                        IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE |
                        IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE |
                        IMAGE_FUNCTION_WRITE_ONLY));
 
+   const unsigned atom_flags = flags | IMAGE_FUNCTION_AVAIL_ATOMIC;
+
    add_image_function(glsl ? "imageAtomicAdd" : "__intrinsic_image_atomic_add",
-                      "__intrinsic_image_atomic_add", 1, flags);
+                      "__intrinsic_image_atomic_add",
+                      &builtin_builder::_image_prototype, 1, atom_flags);
 
    add_image_function(glsl ? "imageAtomicMin" : "__intrinsic_image_atomic_min",
-                      "__intrinsic_image_atomic_min", 1, flags);
+                      "__intrinsic_image_atomic_min",
+                      &builtin_builder::_image_prototype, 1, atom_flags);
 
    add_image_function(glsl ? "imageAtomicMax" : "__intrinsic_image_atomic_max",
-                      "__intrinsic_image_atomic_max", 1, flags);
+                      "__intrinsic_image_atomic_max",
+                      &builtin_builder::_image_prototype, 1, atom_flags);
 
    add_image_function(glsl ? "imageAtomicAnd" : "__intrinsic_image_atomic_and",
-                      "__intrinsic_image_atomic_and", 1, flags);
+                      "__intrinsic_image_atomic_and",
+                      &builtin_builder::_image_prototype, 1, atom_flags);
 
    add_image_function(glsl ? "imageAtomicOr" : "__intrinsic_image_atomic_or",
-                      "__intrinsic_image_atomic_or", 1, flags);
+                      "__intrinsic_image_atomic_or",
+                      &builtin_builder::_image_prototype, 1, atom_flags);
 
    add_image_function(glsl ? "imageAtomicXor" : "__intrinsic_image_atomic_xor",
-                      "__intrinsic_image_atomic_xor", 1, flags);
+                      "__intrinsic_image_atomic_xor",
+                      &builtin_builder::_image_prototype, 1, atom_flags);
 
    add_image_function((glsl ? "imageAtomicExchange" :
                        "__intrinsic_image_atomic_exchange"),
-                      "__intrinsic_image_atomic_exchange", 1, flags);
+                      "__intrinsic_image_atomic_exchange",
+                      &builtin_builder::_image_prototype, 1, atom_flags);
 
    add_image_function((glsl ? "imageAtomicCompSwap" :
                        "__intrinsic_image_atomic_comp_swap"),
-                      "__intrinsic_image_atomic_comp_swap", 2, flags);
+                      "__intrinsic_image_atomic_comp_swap",
+                      &builtin_builder::_image_prototype, 2, atom_flags);
+
+   add_image_function(glsl ? "imageSize" : "__intrinsic_image_size",
+                      "__intrinsic_image_size",
+                      &builtin_builder::_image_size_prototype, 1,
+                      flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE);
 }
 
 ir_variable *
@@ -4296,13 +4401,23 @@
 }
 
 ir_function_signature *
-builtin_builder::_textureQueryLod(const glsl_type *sampler_type,
+builtin_builder::_barrier()
+{
+   MAKE_SIG(glsl_type::void_type, barrier_supported, 0);
+
+   body.emit(new(mem_ctx) ir_barrier());
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_textureQueryLod(builtin_available_predicate avail,
+                                  const glsl_type *sampler_type,
                                   const glsl_type *coord_type)
 {
    ir_variable *s = in_var(sampler_type, "sampler");
    ir_variable *coord = in_var(coord_type, "coord");
    /* The sampler and coordinate always exist; add optional parameters later. */
-   MAKE_SIG(glsl_type::vec2_type, texture_query_lod, 2, s, coord);
+   MAKE_SIG(glsl_type::vec2_type, avail, 2, s, coord);
 
    ir_texture *tex = new(mem_ctx) ir_texture(ir_lod);
    tex->coordinate = var_ref(coord);
@@ -4769,8 +4884,10 @@
    ir_variable *coord = in_var(
       glsl_type::ivec(image_type->coordinate_components()), "coord");
 
-   ir_function_signature *sig = new_sig(
-      ret_type, shader_image_load_store, 2, image, coord);
+   const builtin_available_predicate avail =
+      (flags & IMAGE_FUNCTION_AVAIL_ATOMIC ? shader_image_atomic :
+       shader_image_load_store);
+   ir_function_signature *sig = new_sig(ret_type, avail, 2, image, coord);
 
    /* Sample index for multisample images. */
    if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS)
@@ -4800,13 +4917,55 @@
 }
 
 ir_function_signature *
-builtin_builder::_image(const glsl_type *image_type,
+builtin_builder::_image_size_prototype(const glsl_type *image_type,
+                                       const char *intrinsic_name,
+                                       unsigned num_arguments,
+                                       unsigned flags)
+{
+   const glsl_type *ret_type;
+   unsigned num_components = image_type->coordinate_components();
+
+   /* From the ARB_shader_image_size extension:
+    * "Cube images return the dimensions of one face."
+    */
+   if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
+       !image_type->sampler_array) {
+      num_components = 2;
+   }
+
+   /* FIXME: Add the highp precision qualifier for GLES 3.10 when it is
+    * supported by mesa.
+    */
+   ret_type = glsl_type::get_instance(GLSL_TYPE_INT, num_components, 1);
+
+   ir_variable *image = in_var(image_type, "image");
+   ir_function_signature *sig = new_sig(ret_type, shader_image_size, 1, image);
+
+   /* Set the maximal set of qualifiers allowed for this image
+    * built-in.  Function calls with arguments having fewer
+    * qualifiers than present in the prototype are allowed by the
+    * spec, but not with more, i.e. this will make the compiler
+    * accept everything that needs to be accepted, and reject cases
+    * like loads from write-only or stores to read-only images.
+    */
+   image->data.image_read_only = true;
+   image->data.image_write_only = true;
+   image->data.image_coherent = true;
+   image->data.image_volatile = true;
+   image->data.image_restrict = true;
+
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_image(image_prototype_ctr prototype,
+                        const glsl_type *image_type,
                         const char *intrinsic_name,
                         unsigned num_arguments,
                         unsigned flags)
 {
-   ir_function_signature *sig = _image_prototype(image_type, intrinsic_name,
-                                                 num_arguments, flags);
+   ir_function_signature *sig = (this->*prototype)(image_type, intrinsic_name,
+                                                   num_arguments, flags);
 
    if (flags & IMAGE_FUNCTION_EMIT_STUB) {
       ir_factory body(&sig->body, mem_ctx);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/builtin_types.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/builtin_types.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/builtin_types.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/builtin_types.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -54,64 +54,64 @@
       &glsl_type::_struct_##NAME##_type;
 
 static const struct glsl_struct_field gl_DepthRangeParameters_fields[] = {
-   { glsl_type::float_type, "near", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "far",  -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "diff", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+   glsl_struct_field(glsl_type::float_type, "near"),
+   glsl_struct_field(glsl_type::float_type, "far"),
+   glsl_struct_field(glsl_type::float_type, "diff"),
 };
 
 static const struct glsl_struct_field gl_PointParameters_fields[] = {
-   { glsl_type::float_type, "size", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "sizeMin", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "sizeMax", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "fadeThresholdSize", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "distanceConstantAttenuation", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "distanceLinearAttenuation", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "distanceQuadraticAttenuation", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+   glsl_struct_field(glsl_type::float_type, "size"),
+   glsl_struct_field(glsl_type::float_type, "sizeMin"),
+   glsl_struct_field(glsl_type::float_type, "sizeMax"),
+   glsl_struct_field(glsl_type::float_type, "fadeThresholdSize"),
+   glsl_struct_field(glsl_type::float_type, "distanceConstantAttenuation"),
+   glsl_struct_field(glsl_type::float_type, "distanceLinearAttenuation"),
+   glsl_struct_field(glsl_type::float_type, "distanceQuadraticAttenuation"),
 };
 
 static const struct glsl_struct_field gl_MaterialParameters_fields[] = {
-   { glsl_type::vec4_type, "emission", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::vec4_type, "ambient", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::vec4_type, "diffuse", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::vec4_type, "specular", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "shininess", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+   glsl_struct_field(glsl_type::vec4_type, "emission"),
+   glsl_struct_field(glsl_type::vec4_type, "ambient"),
+   glsl_struct_field(glsl_type::vec4_type, "diffuse"),
+   glsl_struct_field(glsl_type::vec4_type, "specular"),
+   glsl_struct_field(glsl_type::float_type, "shininess"),
 };
 
 static const struct glsl_struct_field gl_LightSourceParameters_fields[] = {
-   { glsl_type::vec4_type, "ambient", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::vec4_type, "diffuse", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::vec4_type, "specular", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::vec4_type, "position", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::vec4_type, "halfVector", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::vec3_type, "spotDirection", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "spotExponent", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "spotCutoff", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "spotCosCutoff", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "constantAttenuation", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "linearAttenuation", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "quadraticAttenuation", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+   glsl_struct_field(glsl_type::vec4_type, "ambient"),
+   glsl_struct_field(glsl_type::vec4_type, "diffuse"),
+   glsl_struct_field(glsl_type::vec4_type, "specular"),
+   glsl_struct_field(glsl_type::vec4_type, "position"),
+   glsl_struct_field(glsl_type::vec4_type, "halfVector"),
+   glsl_struct_field(glsl_type::vec3_type, "spotDirection"),
+   glsl_struct_field(glsl_type::float_type, "spotExponent"),
+   glsl_struct_field(glsl_type::float_type, "spotCutoff"),
+   glsl_struct_field(glsl_type::float_type, "spotCosCutoff"),
+   glsl_struct_field(glsl_type::float_type, "constantAttenuation"),
+   glsl_struct_field(glsl_type::float_type, "linearAttenuation"),
+   glsl_struct_field(glsl_type::float_type, "quadraticAttenuation"),
 };
 
 static const struct glsl_struct_field gl_LightModelParameters_fields[] = {
-   { glsl_type::vec4_type, "ambient", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+   glsl_struct_field(glsl_type::vec4_type, "ambient"),
 };
 
 static const struct glsl_struct_field gl_LightModelProducts_fields[] = {
-   { glsl_type::vec4_type, "sceneColor", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+   glsl_struct_field(glsl_type::vec4_type, "sceneColor"),
 };
 
 static const struct glsl_struct_field gl_LightProducts_fields[] = {
-   { glsl_type::vec4_type, "ambient", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::vec4_type, "diffuse", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::vec4_type, "specular", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+   glsl_struct_field(glsl_type::vec4_type, "ambient"),
+   glsl_struct_field(glsl_type::vec4_type, "diffuse"),
+   glsl_struct_field(glsl_type::vec4_type, "specular"),
 };
 
 static const struct glsl_struct_field gl_FogParameters_fields[] = {
-   { glsl_type::vec4_type, "color", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "density", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "start", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "end", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
-   { glsl_type::float_type, "scale", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+   glsl_struct_field(glsl_type::vec4_type, "color"),
+   glsl_struct_field(glsl_type::float_type, "density"),
+   glsl_struct_field(glsl_type::float_type, "start"),
+   glsl_struct_field(glsl_type::float_type, "end"),
+   glsl_struct_field(glsl_type::float_type, "scale"),
 };
 
 #include "builtin_type_macros.h"
@@ -182,7 +182,7 @@
    T(samplerCubeArray,                400, 999)
    T(sampler2DRect,                   140, 999)
    T(samplerBuffer,                   140, 999)
-   T(sampler2DMS,                     150, 999)
+   T(sampler2DMS,                     150, 310)
    T(sampler2DMSArray,                150, 999)
 
    T(isampler1D,                      130, 999)
@@ -194,7 +194,7 @@
    T(isamplerCubeArray,               400, 999)
    T(isampler2DRect,                  140, 999)
    T(isamplerBuffer,                  140, 999)
-   T(isampler2DMS,                    150, 999)
+   T(isampler2DMS,                    150, 310)
    T(isampler2DMSArray,               150, 999)
 
    T(usampler1D,                      130, 999)
@@ -206,7 +206,7 @@
    T(usamplerCubeArray,               400, 999)
    T(usampler2DRect,                  140, 999)
    T(usamplerBuffer,                  140, 999)
-   T(usampler2DMS,                    150, 999)
+   T(usampler2DMS,                    150, 310)
    T(usampler2DMSArray,               150, 999)
 
    T(sampler1DShadow,                 110, 999)
@@ -220,40 +220,40 @@
    T(struct_gl_DepthRangeParameters,  110, 100)
 
    T(image1D,                         420, 999)
-   T(image2D,                         420, 999)
-   T(image3D,                         420, 999)
+   T(image2D,                         420, 310)
+   T(image3D,                         420, 310)
    T(image2DRect,                     420, 999)
-   T(imageCube,                       420, 999)
+   T(imageCube,                       420, 310)
    T(imageBuffer,                     420, 999)
    T(image1DArray,                    420, 999)
-   T(image2DArray,                    420, 999)
+   T(image2DArray,                    420, 310)
    T(imageCubeArray,                  420, 999)
    T(image2DMS,                       420, 999)
    T(image2DMSArray,                  420, 999)
    T(iimage1D,                        420, 999)
-   T(iimage2D,                        420, 999)
-   T(iimage3D,                        420, 999)
+   T(iimage2D,                        420, 310)
+   T(iimage3D,                        420, 310)
    T(iimage2DRect,                    420, 999)
-   T(iimageCube,                      420, 999)
+   T(iimageCube,                      420, 310)
    T(iimageBuffer,                    420, 999)
    T(iimage1DArray,                   420, 999)
-   T(iimage2DArray,                   420, 999)
+   T(iimage2DArray,                   420, 310)
    T(iimageCubeArray,                 420, 999)
    T(iimage2DMS,                      420, 999)
    T(iimage2DMSArray,                 420, 999)
    T(uimage1D,                        420, 999)
-   T(uimage2D,                        420, 999)
-   T(uimage3D,                        420, 999)
+   T(uimage2D,                        420, 310)
+   T(uimage3D,                        420, 310)
    T(uimage2DRect,                    420, 999)
-   T(uimageCube,                      420, 999)
+   T(uimageCube,                      420, 310)
    T(uimageBuffer,                    420, 999)
    T(uimage1DArray,                   420, 999)
-   T(uimage2DArray,                   420, 999)
+   T(uimage2DArray,                   420, 310)
    T(uimageCubeArray,                 420, 999)
    T(uimage2DMS,                      420, 999)
    T(uimage2DMSArray,                 420, 999)
 
-   T(atomic_uint,                     420, 999)
+   T(atomic_uint,                     420, 310)
 };
 
 static const glsl_type *const deprecated_types[] = {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/builtin_variables.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/builtin_variables.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/builtin_variables.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/builtin_variables.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -322,6 +322,7 @@
    this->fields[this->num_fields].interpolation = INTERP_QUALIFIER_NONE;
    this->fields[this->num_fields].centroid = 0;
    this->fields[this->num_fields].sample = 0;
+   this->fields[this->num_fields].patch = 0;
    this->num_fields++;
 }
 
@@ -343,6 +344,8 @@
    void generate_constants();
    void generate_uniforms();
    void generate_vs_special_vars();
+   void generate_tcs_special_vars();
+   void generate_tes_special_vars();
    void generate_gs_special_vars();
    void generate_fs_special_vars();
    void generate_cs_special_vars();
@@ -436,11 +439,12 @@
       var->data.read_only = true;
       break;
    case ir_var_shader_out:
+   case ir_var_shader_storage:
       break;
    default:
       /* The only variables that are added using this function should be
-       * uniforms, shader inputs, and shader outputs, constants (which use
-       * ir_var_auto), and system values.
+       * uniforms, shader storage, shader inputs, and shader outputs, constants
+       * (which use ir_var_auto), and system values.
        */
       assert(0);
       break;
@@ -669,8 +673,14 @@
       if (!state->es_shader) {
          add_const("gl_MaxGeometryAtomicCounters",
                    state->Const.MaxGeometryAtomicCounters);
-         add_const("gl_MaxTessControlAtomicCounters", 0);
-         add_const("gl_MaxTessEvaluationAtomicCounters", 0);
+
+	 if (state->is_version(400, 0) ||
+             state->ARB_tessellation_shader_enable) {
+		 add_const("gl_MaxTessControlAtomicCounters",
+                           state->Const.MaxTessControlAtomicCounters);
+		 add_const("gl_MaxTessEvaluationAtomicCounters",
+                           state->Const.MaxTessEvaluationAtomicCounters);
+	 }
       }
    }
 
@@ -690,8 +700,10 @@
       if (!state->es_shader) {
          add_const("gl_MaxGeometryAtomicCounterBuffers",
                    state->Const.MaxGeometryAtomicCounterBuffers);
-         add_const("gl_MaxTessControlAtomicCounterBuffers", 0);
-         add_const("gl_MaxTessEvaluationAtomicCounterBuffers", 0);
+         add_const("gl_MaxTessControlAtomicCounterBuffers",
+                   state->Const.MaxTessControlAtomicCounterBuffers);
+         add_const("gl_MaxTessEvaluationAtomicCounterBuffers",
+                   state->Const.MaxTessEvaluationAtomicCounterBuffers);
       }
    }
 
@@ -732,29 +744,59 @@
        */
    }
 
-   if (state->is_version(420, 0) ||
+   if (state->is_version(420, 310) ||
        state->ARB_shader_image_load_store_enable) {
       add_const("gl_MaxImageUnits",
                 state->Const.MaxImageUnits);
-      add_const("gl_MaxCombinedImageUnitsAndFragmentOutputs",
-                state->Const.MaxCombinedImageUnitsAndFragmentOutputs);
-      add_const("gl_MaxImageSamples",
-                state->Const.MaxImageSamples);
       add_const("gl_MaxVertexImageUniforms",
                 state->Const.MaxVertexImageUniforms);
-      add_const("gl_MaxTessControlImageUniforms", 0);
-      add_const("gl_MaxTessEvaluationImageUniforms", 0);
-      add_const("gl_MaxGeometryImageUniforms",
-                state->Const.MaxGeometryImageUniforms);
       add_const("gl_MaxFragmentImageUniforms",
                 state->Const.MaxFragmentImageUniforms);
       add_const("gl_MaxCombinedImageUniforms",
                 state->Const.MaxCombinedImageUniforms);
+
+      if (!state->es_shader) {
+         add_const("gl_MaxCombinedImageUnitsAndFragmentOutputs",
+                   state->Const.MaxCombinedShaderOutputResources);
+         add_const("gl_MaxImageSamples",
+                   state->Const.MaxImageSamples);
+         add_const("gl_MaxGeometryImageUniforms",
+                   state->Const.MaxGeometryImageUniforms);
+      }
+
+      if (state->is_version(450, 310)) {
+         add_const("gl_MaxCombinedShaderOutputResources",
+                   state->Const.MaxCombinedShaderOutputResources);
+      }
+
+      if (state->is_version(400, 0) ||
+          state->ARB_tessellation_shader_enable) {
+         add_const("gl_MaxTessControlImageUniforms",
+                   state->Const.MaxTessControlImageUniforms);
+         add_const("gl_MaxTessEvaluationImageUniforms",
+                   state->Const.MaxTessEvaluationImageUniforms);
+      }
    }
 
    if (state->is_version(410, 0) ||
        state->ARB_viewport_array_enable)
       add_const("gl_MaxViewports", state->Const.MaxViewports);
+
+   if (state->is_version(400, 0) ||
+       state->ARB_tessellation_shader_enable) {
+      add_const("gl_MaxPatchVertices", state->Const.MaxPatchVertices);
+      add_const("gl_MaxTessGenLevel", state->Const.MaxTessGenLevel);
+      add_const("gl_MaxTessControlInputComponents", state->Const.MaxTessControlInputComponents);
+      add_const("gl_MaxTessControlOutputComponents", state->Const.MaxTessControlOutputComponents);
+      add_const("gl_MaxTessControlTextureImageUnits", state->Const.MaxTessControlTextureImageUnits);
+      add_const("gl_MaxTessEvaluationInputComponents", state->Const.MaxTessEvaluationInputComponents);
+      add_const("gl_MaxTessEvaluationOutputComponents", state->Const.MaxTessEvaluationOutputComponents);
+      add_const("gl_MaxTessEvaluationTextureImageUnits", state->Const.MaxTessEvaluationTextureImageUnits);
+      add_const("gl_MaxTessPatchComponents", state->Const.MaxTessPatchComponents);
+      add_const("gl_MaxTessControlTotalOutputComponents", state->Const.MaxTessControlTotalOutputComponents);
+      add_const("gl_MaxTessControlUniformComponents", state->Const.MaxTessControlUniformComponents);
+      add_const("gl_MaxTessEvaluationUniformComponents", state->Const.MaxTessEvaluationUniformComponents);
+   }
 }
 
 
@@ -764,7 +806,8 @@
 void
 builtin_variable_generator::generate_uniforms()
 {
-   add_uniform(int_t, "gl_NumSamples");
+   if (state->is_version(400, 0) || state->ARB_sample_shading_enable)
+      add_uniform(int_t, "gl_NumSamples");
    add_uniform(type("gl_DepthRangeParameters"), "gl_DepthRange");
    add_uniform(array(vec4_t, VERT_ATTRIB_MAX), "gl_CurrentAttribVertMESA");
    add_uniform(array(vec4_t, VARYING_SLOT_MAX), "gl_CurrentAttribFragMESA");
@@ -870,6 +913,39 @@
 
 
 /**
+ * Generate variables which only exist in tessellation control shaders.
+ */
+void
+builtin_variable_generator::generate_tcs_special_vars()
+{
+   add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID");
+   add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn");
+   add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID");
+
+   add_output(VARYING_SLOT_TESS_LEVEL_OUTER, array(float_t, 4),
+              "gl_TessLevelOuter")->data.patch = 1;
+   add_output(VARYING_SLOT_TESS_LEVEL_INNER, array(float_t, 2),
+              "gl_TessLevelInner")->data.patch = 1;
+}
+
+
+/**
+ * Generate variables which only exist in tessellation evaluation shaders.
+ */
+void
+builtin_variable_generator::generate_tes_special_vars()
+{
+   add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID");
+   add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn");
+   add_system_value(SYSTEM_VALUE_TESS_COORD, vec3_t, "gl_TessCoord");
+   add_system_value(SYSTEM_VALUE_TESS_LEVEL_OUTER, array(float_t, 4),
+                    "gl_TessLevelOuter");
+   add_system_value(SYSTEM_VALUE_TESS_LEVEL_INNER, array(float_t, 2),
+                    "gl_TessLevelInner");
+}
+
+
+/**
  * Generate variables which only exist in geometry shaders.
  */
 void
@@ -992,6 +1068,8 @@
                                         const char *name_as_gs_input)
 {
    switch (state->stage) {
+   case MESA_SHADER_TESS_CTRL:
+   case MESA_SHADER_TESS_EVAL:
    case MESA_SHADER_GEOMETRY:
       this->per_vertex_in.add_field(slot, type, name);
       /* FALLTHROUGH */
@@ -1044,13 +1122,40 @@
       }
    }
 
+   /* Section 7.1 (Built-In Language Variables) of the GLSL 4.00 spec
+    * says:
+    *
+    *    "In the tessellation control language, built-in variables are
+    *    intrinsically declared as:
+    *
+    *        in gl_PerVertex {
+    *            vec4 gl_Position;
+    *            float gl_PointSize;
+    *            float gl_ClipDistance[];
+    *        } gl_in[gl_MaxPatchVertices];"
+    */
+   if (state->stage == MESA_SHADER_TESS_CTRL ||
+       state->stage == MESA_SHADER_TESS_EVAL) {
+      const glsl_type *per_vertex_in_type =
+         this->per_vertex_in.construct_interface_instance();
+      add_variable("gl_in", array(per_vertex_in_type, state->Const.MaxPatchVertices),
+                   ir_var_shader_in, -1);
+   }
    if (state->stage == MESA_SHADER_GEOMETRY) {
       const glsl_type *per_vertex_in_type =
          this->per_vertex_in.construct_interface_instance();
       add_variable("gl_in", array(per_vertex_in_type, 0),
                    ir_var_shader_in, -1);
    }
-   if (state->stage == MESA_SHADER_VERTEX || state->stage == MESA_SHADER_GEOMETRY) {
+   if (state->stage == MESA_SHADER_TESS_CTRL) {
+      const glsl_type *per_vertex_out_type =
+         this->per_vertex_out.construct_interface_instance();
+      add_variable("gl_out", array(per_vertex_out_type, 0),
+                   ir_var_shader_out, -1);
+   }
+   if (state->stage == MESA_SHADER_VERTEX ||
+       state->stage == MESA_SHADER_TESS_EVAL ||
+       state->stage == MESA_SHADER_GEOMETRY) {
       const glsl_type *per_vertex_out_type =
          this->per_vertex_out.construct_interface_instance();
       const glsl_struct_field *fields = per_vertex_out_type->fields.structure;
@@ -1061,6 +1166,7 @@
          var->data.interpolation = fields[i].interpolation;
          var->data.centroid = fields[i].centroid;
          var->data.sample = fields[i].sample;
+         var->data.patch = fields[i].patch;
          var->init_interface_type(per_vertex_out_type);
       }
    }
@@ -1085,6 +1191,12 @@
    case MESA_SHADER_VERTEX:
       gen.generate_vs_special_vars();
       break;
+   case MESA_SHADER_TESS_CTRL:
+      gen.generate_tcs_special_vars();
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      gen.generate_tes_special_vars();
+      break;
    case MESA_SHADER_GEOMETRY:
       gen.generate_gs_special_vars();
       break;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glcpp/glcpp.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glcpp/glcpp.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glcpp/glcpp.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glcpp/glcpp.c	2015-09-16 14:36:09.000000000 +0000
@@ -29,6 +29,7 @@
 #include "glcpp.h"
 #include "main/mtypes.h"
 #include "main/shaderobj.h"
+#include "util/strtod.h"
 
 extern int glcpp_parser_debug;
 
@@ -168,6 +169,8 @@
 	if (shader == NULL)
 	   return 1;
 
+	_mesa_locale_init();
+
 	ret = glcpp_preprocess(ctx, &shader, &info_log, NULL, &gl_ctx);
 
 	printf("%s", shader);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glcpp/glcpp-parse.y mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glcpp/glcpp-parse.y
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glcpp/glcpp-parse.y	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glcpp/glcpp-parse.y	2015-09-16 14:36:09.000000000 +0000
@@ -2478,11 +2478,23 @@
 	      if (extensions->ARB_shader_image_load_store)
 	         add_builtin_define(parser, "GL_ARB_shader_image_load_store", 1);
 
+              if (extensions->ARB_shader_image_size)
+                 add_builtin_define(parser, "GL_ARB_shader_image_size", 1);
+
               if (extensions->ARB_derivative_control)
                  add_builtin_define(parser, "GL_ARB_derivative_control", 1);
 
               if (extensions->ARB_shader_precision)
                  add_builtin_define(parser, "GL_ARB_shader_precision", 1);
+
+	      if (extensions->ARB_shader_storage_buffer_object)
+	         add_builtin_define(parser, "GL_ARB_shader_storage_buffer_object", 1);
+
+	      if (extensions->ARB_tessellation_shader)
+	         add_builtin_define(parser, "GL_ARB_tessellation_shader", 1);
+
+              if (extensions->ARB_shader_subroutine)
+                 add_builtin_define(parser, "GL_ARB_shader_subroutine", 1);
 	   }
 	}
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_lexer.ll mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_lexer.ll
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_lexer.ll	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_lexer.ll	2015-09-16 14:36:09.000000000 +0000
@@ -308,12 +308,14 @@
 out		return OUT_TOK;
 inout		return INOUT_TOK;
 uniform		return UNIFORM;
+buffer		return BUFFER;
 varying		DEPRECATED_ES_KEYWORD(VARYING);
 centroid	KEYWORD(120, 300, 120, 300, CENTROID);
 invariant	KEYWORD(120, 100, 120, 100, INVARIANT);
 flat		KEYWORD(130, 100, 130, 300, FLAT);
 smooth		KEYWORD(130, 300, 130, 300, SMOOTH);
 noperspective	KEYWORD(130, 300, 130, 0, NOPERSPECTIVE);
+patch		KEYWORD_WITH_ALT(0, 300, 400, 0, yyextra->ARB_tessellation_shader_enable, PATCH);
 
 sampler1D	DEPRECATED_ES_KEYWORD(SAMPLER1D);
 sampler2D	return SAMPLER2D;
@@ -341,9 +343,10 @@
 
    /* additional keywords in ARB_texture_multisample, included in GLSL 1.50 */
    /* these are reserved but not defined in GLSL 3.00 */
-sampler2DMS        KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, SAMPLER2DMS);
-isampler2DMS       KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMS);
-usampler2DMS       KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, USAMPLER2DMS);
+   /* [iu]sampler2DMS are defined in GLSL ES 3.10 */
+sampler2DMS        KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, SAMPLER2DMS);
+isampler2DMS       KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMS);
+usampler2DMS       KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, USAMPLER2DMS);
 sampler2DMSArray   KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, SAMPLER2DMSARRAY);
 isampler2DMSArray  KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMSARRAY);
 usampler2DMSArray  KEYWORD_WITH_ALT(150, 300, 150, 0, yyextra->ARB_texture_multisample_enable, USAMPLER2DMSARRAY);
@@ -366,35 +369,35 @@
 
    /* keywords available with ARB_shader_image_load_store */
 image1D         KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1D);
-image2D         KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2D);
-image3D         KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE3D);
+image2D         KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE2D);
+image3D         KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE3D);
 image2DRect     KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DRECT);
-imageCube       KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGECUBE);
+imageCube       KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGECUBE);
 imageBuffer     KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGEBUFFER);
 image1DArray    KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1DARRAY);
-image2DArray    KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DARRAY);
+image2DArray    KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE2DARRAY);
 imageCubeArray  KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGECUBEARRAY);
 image2DMS       KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DMS);
 image2DMSArray  KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DMSARRAY);
 iimage1D        KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE1D);
-iimage2D        KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2D);
-iimage3D        KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE3D);
+iimage2D        KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE2D);
+iimage3D        KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE3D);
 iimage2DRect    KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DRECT);
-iimageCube      KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBE);
+iimageCube      KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBE);
 iimageBuffer    KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGEBUFFER);
 iimage1DArray   KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE1DARRAY);
-iimage2DArray   KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DARRAY);
+iimage2DArray   KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DARRAY);
 iimageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBEARRAY);
 iimage2DMS      KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DMS);
 iimage2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DMSARRAY);
 uimage1D        KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE1D);
-uimage2D        KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2D);
-uimage3D        KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE3D);
+uimage2D        KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE2D);
+uimage3D        KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE3D);
 uimage2DRect    KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DRECT);
-uimageCube      KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBE);
+uimageCube      KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBE);
 uimageBuffer    KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGEBUFFER);
 uimage1DArray   KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE1DARRAY);
-uimage2DArray   KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DARRAY);
+uimage2DArray   KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DARRAY);
 uimageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBEARRAY);
 uimage2DMS      KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DMS);
 uimage2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DMSARRAY);
@@ -403,11 +406,11 @@
 image1DArrayShadow      KEYWORD(130, 300, 0, 0, IMAGE1DARRAYSHADOW);
 image2DArrayShadow      KEYWORD(130, 300, 0, 0, IMAGE2DARRAYSHADOW);
 
-coherent	KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, COHERENT);
-volatile	KEYWORD_WITH_ALT(110, 100, 420, 0, yyextra->ARB_shader_image_load_store_enable, VOLATILE);
-restrict	KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, RESTRICT);
-readonly	KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, READONLY);
-writeonly	KEYWORD_WITH_ALT(420, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, WRITEONLY);
+coherent       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, COHERENT);
+volatile       KEYWORD_WITH_ALT(110, 100, 420, 310, yyextra->ARB_shader_image_load_store_enable, VOLATILE);
+restrict       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, RESTRICT);
+readonly       KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, READONLY);
+writeonly      KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, WRITEONLY);
 
 atomic_uint     KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT);
 
@@ -424,7 +427,8 @@
 		      || yyextra->ARB_uniform_buffer_object_enable
 		      || yyextra->ARB_fragment_coord_conventions_enable
                       || yyextra->ARB_shading_language_420pack_enable
-                      || yyextra->ARB_compute_shader_enable) {
+                      || yyextra->ARB_compute_shader_enable
+                      || yyextra->ARB_tessellation_shader_enable) {
 		      return LAYOUT_TOK;
 		   } else {
 		      void *mem_ctx = yyextra;
@@ -575,9 +579,8 @@
 
     /* Additional reserved words in GLSL ES 3.00 */
 resource	KEYWORD(0, 300, 0, 0, RESOURCE);
-patch		KEYWORD(0, 300, 0, 0, PATCH);
 sample		KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_gpu_shader5_enable, SAMPLE);
-subroutine	KEYWORD(0, 300, 0, 0, SUBROUTINE);
+subroutine	KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_shader_subroutine_enable, SUBROUTINE);
 
 
 [_a-zA-Z][_a-zA-Z0-9]*	{
@@ -593,6 +596,10 @@
 			    return classify_identifier(state, yytext);
 			}
 
+\.			{ struct _mesa_glsl_parse_state *state = yyextra;
+			  state->is_field = true;
+			  return DOT_TOK; }
+
 .			{ return yytext[0]; }
 
 %%
@@ -600,6 +607,10 @@
 int
 classify_identifier(struct _mesa_glsl_parse_state *state, const char *name)
 {
+   if (state->is_field) {
+      state->is_field = false;
+      return FIELD_SELECTION;
+   }
    if (state->symbols->get_variable(name) || state->symbols->get_function(name))
       return IDENTIFIER;
    else if (state->symbols->get_type(name))
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_parser_extras.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_parser_extras.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_parser_extras.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_parser_extras.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -113,12 +113,18 @@
    this->Const.MaxGeometryUniformComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents;
 
    this->Const.MaxVertexAtomicCounters = ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters;
+   this->Const.MaxTessControlAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicCounters;
+   this->Const.MaxTessEvaluationAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters;
    this->Const.MaxGeometryAtomicCounters = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters;
    this->Const.MaxFragmentAtomicCounters = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters;
    this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters;
    this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings;
    this->Const.MaxVertexAtomicCounterBuffers =
       ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers;
+   this->Const.MaxTessControlAtomicCounterBuffers =
+      ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers;
+   this->Const.MaxTessEvaluationAtomicCounterBuffers =
+      ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers;
    this->Const.MaxGeometryAtomicCounterBuffers =
       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers;
    this->Const.MaxFragmentAtomicCounterBuffers =
@@ -135,9 +141,11 @@
       this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i];
 
    this->Const.MaxImageUnits = ctx->Const.MaxImageUnits;
-   this->Const.MaxCombinedImageUnitsAndFragmentOutputs = ctx->Const.MaxCombinedImageUnitsAndFragmentOutputs;
+   this->Const.MaxCombinedShaderOutputResources = ctx->Const.MaxCombinedShaderOutputResources;
    this->Const.MaxImageSamples = ctx->Const.MaxImageSamples;
    this->Const.MaxVertexImageUniforms = ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms;
+   this->Const.MaxTessControlImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms;
+   this->Const.MaxTessEvaluationImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms;
    this->Const.MaxGeometryImageUniforms = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms;
    this->Const.MaxFragmentImageUniforms = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms;
    this->Const.MaxCombinedImageUniforms = ctx->Const.MaxCombinedImageUniforms;
@@ -145,12 +153,30 @@
    /* ARB_viewport_array */
    this->Const.MaxViewports = ctx->Const.MaxViewports;
 
+   /* tessellation shader constants */
+   this->Const.MaxPatchVertices = ctx->Const.MaxPatchVertices;
+   this->Const.MaxTessGenLevel = ctx->Const.MaxTessGenLevel;
+   this->Const.MaxTessControlInputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents;
+   this->Const.MaxTessControlOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents;
+   this->Const.MaxTessControlTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits;
+   this->Const.MaxTessEvaluationInputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents;
+   this->Const.MaxTessEvaluationOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents;
+   this->Const.MaxTessEvaluationTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits;
+   this->Const.MaxTessPatchComponents = ctx->Const.MaxTessPatchComponents;
+   this->Const.MaxTessControlTotalOutputComponents = ctx->Const.MaxTessControlTotalOutputComponents;
+   this->Const.MaxTessControlUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformComponents;
+   this->Const.MaxTessEvaluationUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformComponents;
+
    this->current_function = NULL;
    this->toplevel_ir = NULL;
    this->found_return = false;
    this->all_invariant = false;
    this->user_structures = NULL;
    this->num_user_structures = 0;
+   this->num_subroutines = 0;
+   this->subroutines = NULL;
+   this->num_subroutine_types = 0;
+   this->subroutine_types = NULL;
 
    /* supported_versions should be large enough to support the known desktop
     * GLSL versions plus 3 GLES versions (ES 1.00, ES 3.00, and ES 3.10))
@@ -224,6 +250,7 @@
    this->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = false;
 
    this->gs_input_prim_type_specified = false;
+   this->tcs_output_vertices_specified = false;
    this->gs_input_size = 0;
    this->in_qualifier = new(this) ast_type_qualifier();
    this->out_qualifier = new(this) ast_type_qualifier();
@@ -389,6 +416,8 @@
    case MESA_SHADER_FRAGMENT: return "fragment";
    case MESA_SHADER_GEOMETRY: return "geometry";
    case MESA_SHADER_COMPUTE:  return "compute";
+   case MESA_SHADER_TESS_CTRL: return "tess ctrl";
+   case MESA_SHADER_TESS_EVAL: return "tess eval";
    }
 
    unreachable("Unknown shader stage.");
@@ -406,6 +435,8 @@
    case MESA_SHADER_FRAGMENT: return "FS";
    case MESA_SHADER_GEOMETRY: return "GS";
    case MESA_SHADER_COMPUTE:  return "CS";
+   case MESA_SHADER_TESS_CTRL: return "TCS";
+   case MESA_SHADER_TESS_EVAL: return "TES";
    }
 
    unreachable("Unknown shader stage.");
@@ -551,37 +582,41 @@
 
    /* ARB extensions go here, sorted alphabetically.
     */
-   EXT(ARB_arrays_of_arrays,           true,  false,     ARB_arrays_of_arrays),
-   EXT(ARB_compute_shader,             true,  false,     ARB_compute_shader),
-   EXT(ARB_conservative_depth,         true,  false,     ARB_conservative_depth),
-   EXT(ARB_derivative_control,         true,  false,     ARB_derivative_control),
-   EXT(ARB_draw_buffers,               true,  false,     dummy_true),
-   EXT(ARB_draw_instanced,             true,  false,     ARB_draw_instanced),
-   EXT(ARB_explicit_attrib_location,   true,  false,     ARB_explicit_attrib_location),
-   EXT(ARB_explicit_uniform_location,  true,  false,     ARB_explicit_uniform_location),
-   EXT(ARB_fragment_coord_conventions, true,  false,     ARB_fragment_coord_conventions),
-   EXT(ARB_fragment_layer_viewport,    true,  false,     ARB_fragment_layer_viewport),
-   EXT(ARB_gpu_shader5,                true,  false,     ARB_gpu_shader5),
-   EXT(ARB_gpu_shader_fp64,            true,  false,     ARB_gpu_shader_fp64),
-   EXT(ARB_sample_shading,             true,  false,     ARB_sample_shading),
-   EXT(ARB_separate_shader_objects,    true,  false,     dummy_true),
-   EXT(ARB_shader_atomic_counters,     true,  false,     ARB_shader_atomic_counters),
-   EXT(ARB_shader_bit_encoding,        true,  false,     ARB_shader_bit_encoding),
-   EXT(ARB_shader_image_load_store,    true,  false,     ARB_shader_image_load_store),
-   EXT(ARB_shader_precision,           true,  false,     ARB_shader_precision),
-   EXT(ARB_shader_stencil_export,      true,  false,     ARB_shader_stencil_export),
-   EXT(ARB_shader_texture_lod,         true,  false,     ARB_shader_texture_lod),
-   EXT(ARB_shading_language_420pack,   true,  false,     ARB_shading_language_420pack),
-   EXT(ARB_shading_language_packing,   true,  false,     ARB_shading_language_packing),
-   EXT(ARB_texture_cube_map_array,     true,  false,     ARB_texture_cube_map_array),
-   EXT(ARB_texture_gather,             true,  false,     ARB_texture_gather),
-   EXT(ARB_texture_multisample,        true,  false,     ARB_texture_multisample),
-   EXT(ARB_texture_query_levels,       true,  false,     ARB_texture_query_levels),
-   EXT(ARB_texture_query_lod,          true,  false,     ARB_texture_query_lod),
-   EXT(ARB_texture_rectangle,          true,  false,     dummy_true),
-   EXT(ARB_uniform_buffer_object,      true,  false,     ARB_uniform_buffer_object),
-   EXT(ARB_vertex_attrib_64bit,        true,  false,     ARB_vertex_attrib_64bit),
-   EXT(ARB_viewport_array,             true,  false,     ARB_viewport_array),
+   EXT(ARB_arrays_of_arrays,             true,  false,     ARB_arrays_of_arrays),
+   EXT(ARB_compute_shader,               true,  false,     ARB_compute_shader),
+   EXT(ARB_conservative_depth,           true,  false,     ARB_conservative_depth),
+   EXT(ARB_derivative_control,           true,  false,     ARB_derivative_control),
+   EXT(ARB_draw_buffers,                 true,  false,     dummy_true),
+   EXT(ARB_draw_instanced,               true,  false,     ARB_draw_instanced),
+   EXT(ARB_explicit_attrib_location,     true,  false,     ARB_explicit_attrib_location),
+   EXT(ARB_explicit_uniform_location,    true,  false,     ARB_explicit_uniform_location),
+   EXT(ARB_fragment_coord_conventions,   true,  false,     ARB_fragment_coord_conventions),
+   EXT(ARB_fragment_layer_viewport,      true,  false,     ARB_fragment_layer_viewport),
+   EXT(ARB_gpu_shader5,                  true,  false,     ARB_gpu_shader5),
+   EXT(ARB_gpu_shader_fp64,              true,  false,     ARB_gpu_shader_fp64),
+   EXT(ARB_sample_shading,               true,  false,     ARB_sample_shading),
+   EXT(ARB_separate_shader_objects,      true,  false,     dummy_true),
+   EXT(ARB_shader_atomic_counters,       true,  false,     ARB_shader_atomic_counters),
+   EXT(ARB_shader_bit_encoding,          true,  false,     ARB_shader_bit_encoding),
+   EXT(ARB_shader_image_load_store,      true,  false,     ARB_shader_image_load_store),
+   EXT(ARB_shader_image_size,            true,  false,     ARB_shader_image_size),
+   EXT(ARB_shader_precision,             true,  false,     ARB_shader_precision),
+   EXT(ARB_shader_stencil_export,        true,  false,     ARB_shader_stencil_export),
+   EXT(ARB_shader_storage_buffer_object, true,  false,     ARB_shader_storage_buffer_object),
+   EXT(ARB_shader_subroutine,            true,  false,     ARB_shader_subroutine),
+   EXT(ARB_shader_texture_lod,           true,  false,     ARB_shader_texture_lod),
+   EXT(ARB_shading_language_420pack,     true,  false,     ARB_shading_language_420pack),
+   EXT(ARB_shading_language_packing,     true,  false,     ARB_shading_language_packing),
+   EXT(ARB_tessellation_shader,          true,  false,     ARB_tessellation_shader),
+   EXT(ARB_texture_cube_map_array,       true,  false,     ARB_texture_cube_map_array),
+   EXT(ARB_texture_gather,               true,  false,     ARB_texture_gather),
+   EXT(ARB_texture_multisample,          true,  false,     ARB_texture_multisample),
+   EXT(ARB_texture_query_levels,         true,  false,     ARB_texture_query_levels),
+   EXT(ARB_texture_query_lod,            true,  false,     ARB_texture_query_lod),
+   EXT(ARB_texture_rectangle,            true,  false,     dummy_true),
+   EXT(ARB_uniform_buffer_object,        true,  false,     ARB_uniform_buffer_object),
+   EXT(ARB_vertex_attrib_64bit,          true,  false,     ARB_vertex_attrib_64bit),
+   EXT(ARB_viewport_array,               true,  false,     ARB_viewport_array),
 
    /* KHR extensions go here, sorted alphabetically.
     */
@@ -778,7 +813,7 @@
 
    /* If the aggregate is an array, recursively set its elements' types. */
    if (type->is_array()) {
-      /* Each array element has the type type->element_type().
+      /* Each array element has the type type->fields.array.
        *
        * E.g., if <type> if struct S[2] we want to set each element's type to
        * struct S.
@@ -790,7 +825,7 @@
                                                link);
 
          if (expr->oper == ast_aggregate)
-            _mesa_ast_set_aggregate_type(type->element_type(), expr);
+            _mesa_ast_set_aggregate_type(type->fields.array, expr);
       }
 
    /* If the aggregate is a struct, recursively set its fields' types. */
@@ -825,6 +860,15 @@
 void
 _mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q)
 {
+   if (q->flags.q.subroutine)
+      printf("subroutine ");
+
+   if (q->flags.q.subroutine_def) {
+      printf("subroutine (");
+      q->subroutine_list->print();
+      printf(")");
+   }
+
    if (q->flags.q.constant)
       printf("const ");
 
@@ -851,8 +895,12 @@
       printf("centroid ");
    if (q->flags.q.sample)
       printf("sample ");
+   if (q->flags.q.patch)
+      printf("patch ");
    if (q->flags.q.uniform)
       printf("uniform ");
+   if (q->flags.q.buffer)
+      printf("buffer ");
    if (q->flags.q.smooth)
       printf("smooth ");
    if (q->flags.q.flat)
@@ -1413,12 +1461,25 @@
    is_declaration = true;
 }
 
+void ast_subroutine_list::print(void) const
+{
+   foreach_list_typed (ast_node, ast, link, & this->declarations) {
+      if (&ast->link != this->declarations.get_head())
+         printf(", ");
+      ast->print();
+   }
+}
+
 static void
 set_shader_inout_layout(struct gl_shader *shader,
 		     struct _mesa_glsl_parse_state *state)
 {
-   if (shader->Stage != MESA_SHADER_GEOMETRY) {
-      /* Should have been prevented by the parser. */
+   /* Should have been prevented by the parser. */
+   if (shader->Stage == MESA_SHADER_TESS_CTRL) {
+      assert(!state->in_qualifier->flags.i);
+   } else if (shader->Stage == MESA_SHADER_TESS_EVAL) {
+      assert(!state->out_qualifier->flags.i);
+   } else if (shader->Stage != MESA_SHADER_GEOMETRY) {
       assert(!state->in_qualifier->flags.i);
       assert(!state->out_qualifier->flags.i);
    }
@@ -1438,6 +1499,28 @@
    }
 
    switch (shader->Stage) {
+   case MESA_SHADER_TESS_CTRL:
+      shader->TessCtrl.VerticesOut = 0;
+      if (state->tcs_output_vertices_specified)
+         shader->TessCtrl.VerticesOut = state->out_qualifier->vertices;
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      shader->TessEval.PrimitiveMode = PRIM_UNKNOWN;
+      if (state->in_qualifier->flags.q.prim_type)
+         shader->TessEval.PrimitiveMode = state->in_qualifier->prim_type;
+
+      shader->TessEval.Spacing = 0;
+      if (state->in_qualifier->flags.q.vertex_spacing)
+         shader->TessEval.Spacing = state->in_qualifier->vertex_spacing;
+
+      shader->TessEval.VertexOrder = 0;
+      if (state->in_qualifier->flags.q.ordering)
+         shader->TessEval.VertexOrder = state->in_qualifier->ordering;
+
+      shader->TessEval.PointMode = -1;
+      if (state->in_qualifier->flags.q.point_mode)
+         shader->TessEval.PointMode = state->in_qualifier->point_mode;
+      break;
    case MESA_SHADER_GEOMETRY:
       shader->Geom.VerticesOut = 0;
       if (state->out_qualifier->flags.q.max_vertices)
@@ -1535,6 +1618,7 @@
       struct gl_shader_compiler_options *options =
          &ctx->Const.ShaderCompilerOptions[shader->Stage];
 
+      lower_subroutine(shader->ir, state);
       /* Do some optimization at compile time to reduce shader IR size
        * and reduce later work if the same shader is linked multiple times
        */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_parser_extras.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_parser_extras.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_parser_extras.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_parser_extras.h	2015-09-16 14:36:09.000000000 +0000
@@ -129,7 +129,7 @@
    bool check_explicit_attrib_stream_allowed(YYLTYPE *locp)
    {
       if (!this->has_explicit_attrib_stream()) {
-         const char *const requirement = "GL_ARB_gpu_shader5 extension or GLSL 400";
+         const char *const requirement = "GL_ARB_gpu_shader5 extension or GLSL 4.00";
 
          _mesa_glsl_error(locp, this, "explicit stream requires %s",
                           requirement);
@@ -144,8 +144,8 @@
    {
       if (!this->has_explicit_attrib_location()) {
          const char *const requirement = this->es_shader
-            ? "GLSL ES 300"
-            : "GL_ARB_explicit_attrib_location extension or GLSL 330";
+            ? "GLSL ES 3.00"
+            : "GL_ARB_explicit_attrib_location extension or GLSL 3.30";
 
          _mesa_glsl_error(locp, this, "%s explicit location requires %s",
                           mode_string(var), requirement);
@@ -160,8 +160,8 @@
    {
       if (!this->has_separate_shader_objects()) {
          const char *const requirement = this->es_shader
-            ? "GL_EXT_separate_shader_objects extension or GLSL ES 310"
-            : "GL_ARB_separate_shader_objects extension or GLSL 420";
+            ? "GL_EXT_separate_shader_objects extension or GLSL ES 3.10"
+            : "GL_ARB_separate_shader_objects extension or GLSL 4.20";
 
          _mesa_glsl_error(locp, this, "%s explicit location requires %s",
                           mode_string(var), requirement);
@@ -177,9 +177,9 @@
       if (!this->has_explicit_attrib_location() ||
           !this->has_explicit_uniform_location()) {
          const char *const requirement = this->es_shader
-            ? "GLSL ES 310"
+            ? "GLSL ES 3.10"
             : "GL_ARB_explicit_uniform_location and either "
-              "GL_ARB_explicit_attrib_location or GLSL 330.";
+              "GL_ARB_explicit_attrib_location or GLSL 3.30.";
 
          _mesa_glsl_error(locp, this,
                           "uniform explicit location requires %s",
@@ -215,6 +215,11 @@
       return ARB_uniform_buffer_object_enable || is_version(140, 300);
    }
 
+   bool has_shader_storage_buffer_objects() const
+   {
+      return ARB_shader_storage_buffer_object_enable || is_version(430, 0);
+   }
+
    bool has_separate_shader_objects() const
    {
       return ARB_separate_shader_objects_enable || is_version(410, 310)
@@ -226,6 +231,16 @@
       return ARB_gpu_shader_fp64_enable || is_version(400, 0);
    }
 
+   bool has_420pack() const
+   {
+      return ARB_shading_language_420pack_enable || is_version(420, 0);
+   }
+
+   bool has_compute_shader() const
+   {
+      return ARB_compute_shader_enable || is_version(430, 310);
+   }
+
    void process_version_directive(YYLTYPE *locp, int version,
                                   const char *ident);
 
@@ -272,15 +287,19 @@
    bool fs_redeclares_gl_fragcoord_with_no_layout_qualifiers;
 
    /**
-    * True if a geometry shader input primitive type was specified using a
-    * layout directive.
+    * True if a geometry shader input primitive type or tessellation control
+    * output vertices were specified using a layout directive.
     *
-    * Note: this value is computed at ast_to_hir time rather than at parse
+    * Note: these values are computed at ast_to_hir time rather than at parse
     * time.
     */
    bool gs_input_prim_type_specified;
+   bool tcs_output_vertices_specified;
 
-   /** Input layout qualifiers from GLSL 1.50. (geometry shader controls)*/
+   /**
+    * Input layout qualifiers from GLSL 1.50 (geometry shader controls),
+    * and GLSL 4.00 (tessellation evaluation shader)
+    */
    struct ast_type_qualifier *in_qualifier;
 
    /**
@@ -298,7 +317,10 @@
     */
    unsigned cs_input_local_size[3];
 
-   /** Output layout qualifiers from GLSL 1.50. (geometry shader controls)*/
+   /**
+    * Output layout qualifiers from GLSL 1.50 (geometry shader controls),
+    * and GLSL 4.00 (tessellation control shader).
+    */
    struct ast_type_qualifier *out_qualifier;
 
    /**
@@ -348,6 +370,8 @@
 
       /* ARB_shader_atomic_counters */
       unsigned MaxVertexAtomicCounters;
+      unsigned MaxTessControlAtomicCounters;
+      unsigned MaxTessEvaluationAtomicCounters;
       unsigned MaxGeometryAtomicCounters;
       unsigned MaxFragmentAtomicCounters;
       unsigned MaxCombinedAtomicCounters;
@@ -358,6 +382,8 @@
        * 3.10.
        */
       unsigned MaxVertexAtomicCounterBuffers;
+      unsigned MaxTessControlAtomicCounterBuffers;
+      unsigned MaxTessEvaluationAtomicCounterBuffers;
       unsigned MaxGeometryAtomicCounterBuffers;
       unsigned MaxFragmentAtomicCounterBuffers;
       unsigned MaxCombinedAtomicCounterBuffers;
@@ -369,15 +395,31 @@
 
       /* ARB_shader_image_load_store */
       unsigned MaxImageUnits;
-      unsigned MaxCombinedImageUnitsAndFragmentOutputs;
+      unsigned MaxCombinedShaderOutputResources;
       unsigned MaxImageSamples;
       unsigned MaxVertexImageUniforms;
+      unsigned MaxTessControlImageUniforms;
+      unsigned MaxTessEvaluationImageUniforms;
       unsigned MaxGeometryImageUniforms;
       unsigned MaxFragmentImageUniforms;
       unsigned MaxCombinedImageUniforms;
 
       /* ARB_viewport_array */
       unsigned MaxViewports;
+
+      /* ARB_tessellation_shader */
+      unsigned MaxPatchVertices;
+      unsigned MaxTessGenLevel;
+      unsigned MaxTessControlInputComponents;
+      unsigned MaxTessControlOutputComponents;
+      unsigned MaxTessControlTextureImageUnits;
+      unsigned MaxTessEvaluationInputComponents;
+      unsigned MaxTessEvaluationOutputComponents;
+      unsigned MaxTessEvaluationTextureImageUnits;
+      unsigned MaxTessPatchComponents;
+      unsigned MaxTessControlTotalOutputComponents;
+      unsigned MaxTessControlUniformComponents;
+      unsigned MaxTessEvaluationUniformComponents;
    } Const;
 
    /**
@@ -458,16 +500,24 @@
    bool ARB_shader_bit_encoding_warn;
    bool ARB_shader_image_load_store_enable;
    bool ARB_shader_image_load_store_warn;
+   bool ARB_shader_image_size_enable;
+   bool ARB_shader_image_size_warn;
    bool ARB_shader_precision_enable;
    bool ARB_shader_precision_warn;
    bool ARB_shader_stencil_export_enable;
    bool ARB_shader_stencil_export_warn;
+   bool ARB_shader_storage_buffer_object_enable;
+   bool ARB_shader_storage_buffer_object_warn;
+   bool ARB_shader_subroutine_enable;
+   bool ARB_shader_subroutine_warn;
    bool ARB_shader_texture_lod_enable;
    bool ARB_shader_texture_lod_warn;
    bool ARB_shading_language_420pack_enable;
    bool ARB_shading_language_420pack_warn;
    bool ARB_shading_language_packing_enable;
    bool ARB_shading_language_packing_warn;
+   bool ARB_tessellation_shader_enable;
+   bool ARB_tessellation_shader_warn;
    bool ARB_texture_cube_map_array_enable;
    bool ARB_texture_cube_map_array_warn;
    bool ARB_texture_gather_enable;
@@ -538,10 +588,38 @@
 
    bool fs_early_fragment_tests;
 
+   /**
+    * For tessellation control shaders, size of the most recently seen output
+    * declaration that was a sized array, or 0 if no sized output array
+    * declarations have been seen.
+    *
+    * Unused for other shader types.
+    */
+   unsigned tcs_output_size;
+
    /** Atomic counter offsets by binding */
    unsigned atomic_counter_offsets[MAX_COMBINED_ATOMIC_BUFFERS];
 
    bool allow_extension_directive_midshader;
+
+   /**
+    * Known subroutine type declarations.
+    */
+   int num_subroutine_types;
+   ir_function **subroutine_types;
+
+   /**
+    * Functions that are associated with
+    * subroutine types.
+    */
+   int num_subroutines;
+   ir_function **subroutines;
+
+   /**
+    * field selection temporary parser storage -
+    * did the parser just parse a dot.
+    */
+   bool is_field;
 };
 
 # define YYLLOC_DEFAULT(Current, Rhs, N)			\
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_parser.yy mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_parser.yy
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_parser.yy	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_parser.yy	2015-09-16 14:36:09.000000000 +0000
@@ -121,7 +121,7 @@
    ast_case_statement *case_statement;
    ast_case_statement_list *case_statement_list;
    ast_interface_block *interface_block;
-
+   ast_subroutine_list *subroutine_list;
    struct {
       ast_node *cond;
       ast_expression *rest;
@@ -134,7 +134,7 @@
 }
 
 %token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK DOUBLE_TOK
-%token BREAK CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT
+%token BREAK BUFFER CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT
 %token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4 DVEC2 DVEC3 DVEC4
 %token CENTROID IN_TOK OUT_TOK INOUT_TOK UNIFORM VARYING SAMPLE
 %token NOPERSPECTIVE FLAT SMOOTH
@@ -186,7 +186,7 @@
 %token PRAGMA_OPTIMIZE_ON PRAGMA_OPTIMIZE_OFF
 %token PRAGMA_INVARIANT_ALL
 %token LAYOUT_TOK
-
+%token DOT_TOK
    /* Reserved words that are not actually used in the grammar.
     */
 %token ASM CLASS UNION ENUM TYPEDEF TEMPLATE THIS PACKED_TOK GOTO
@@ -215,6 +215,8 @@
 %type <type_qualifier> layout_qualifier_id_list layout_qualifier_id
 %type <type_qualifier> interface_block_layout_qualifier
 %type <type_qualifier> memory_qualifier
+%type <type_qualifier> subroutine_qualifier
+%type <subroutine_list> subroutine_type_list
 %type <type_qualifier> interface_qualifier
 %type <type_specifier> type_specifier
 %type <type_specifier> type_specifier_nonarray
@@ -260,10 +262,6 @@
 %type <expression> function_call_generic
 %type <expression> function_call_or_method
 %type <expression> function_call
-%type <expression> method_call_generic
-%type <expression> method_call_header_with_parameters
-%type <expression> method_call_header_no_parameters
-%type <expression> method_call_header
 %type <n> assignment_operator
 %type <n> unary_operator
 %type <expression> function_identifier
@@ -476,7 +474,7 @@
    {
       $$ = $1;
    }
-   | postfix_expression '.' any_identifier
+   | postfix_expression DOT_TOK FIELD_SELECTION
    {
       void *ctx = state;
       $$ = new(ctx) ast_expression(ast_field_selection, $1, NULL, NULL);
@@ -507,12 +505,6 @@
 
 function_call_or_method:
    function_call_generic
-   | postfix_expression '.' method_call_generic
-   {
-      void *ctx = state;
-      $$ = new(ctx) ast_expression(ast_field_selection, $1, $3, NULL);
-      $$->set_location_range(@1, @3);
-   }
    ;
 
 function_call_generic:
@@ -554,62 +546,17 @@
       $$ = new(ctx) ast_function_expression($1);
       $$->set_location(@1);
       }
-   | variable_identifier
+   | postfix_expression
    {
       void *ctx = state;
-      ast_expression *callee = new(ctx) ast_expression($1);
-      callee->set_location(@1);
-      $$ = new(ctx) ast_function_expression(callee);
-      $$->set_location(@1);
-      }
-   | FIELD_SELECTION
-   {
-      void *ctx = state;
-      ast_expression *callee = new(ctx) ast_expression($1);
-      callee->set_location(@1);
-      $$ = new(ctx) ast_function_expression(callee);
+      $$ = new(ctx) ast_function_expression($1);
       $$->set_location(@1);
       }
    ;
 
-method_call_generic:
-   method_call_header_with_parameters ')'
-   | method_call_header_no_parameters ')'
-   ;
-
-method_call_header_no_parameters:
-   method_call_header VOID_TOK
-   | method_call_header
-   ;
-
-method_call_header_with_parameters:
-   method_call_header assignment_expression
-   {
-      $$ = $1;
-      $$->set_location(@1);
-      $$->expressions.push_tail(& $2->link);
-   }
-   | method_call_header_with_parameters ',' assignment_expression
-   {
-      $$ = $1;
-      $$->set_location(@1);
-      $$->expressions.push_tail(& $3->link);
-   }
-   ;
-
    // Grammar Note: Constructors look like methods, but lexical
    // analysis recognized most of them as keywords. They are now
    // recognized through "type_specifier".
-method_call_header:
-   variable_identifier '('
-   {
-      void *ctx = state;
-      ast_expression *callee = new(ctx) ast_expression($1);
-      callee->set_location(@1);
-      $$ = new(ctx) ast_function_expression(callee);
-      $$->set_location(@1);
-   }
-   ;
 
    // Grammar Note: No traditional style type casts.
 unary_expression:
@@ -910,7 +857,11 @@
       $$->return_type = $1;
       $$->identifier = $2;
 
-      state->symbols->add_function(new(state) ir_function($2));
+      if ($1->qualifier.flags.q.subroutine) {
+         /* add type for IDENTIFIER search */
+         state->symbols->add_type($2, glsl_type::get_subroutine_instance($2));
+      } else
+         state->symbols->add_function(new(state) ir_function($2));
       state->symbols->push_scope();
    }
    ;
@@ -983,7 +934,7 @@
       if (($1.flags.q.in || $1.flags.q.out) && ($2.flags.q.in || $2.flags.q.out))
          _mesa_glsl_error(&@1, state, "duplicate in/out/inout qualifier");
 
-      if (!state->ARB_shading_language_420pack_enable && $2.flags.q.constant)
+      if (!state->has_420pack() && $2.flags.q.constant)
          _mesa_glsl_error(&@1, state, "in/out/inout must come after const "
                                       "or precise");
 
@@ -995,7 +946,7 @@
       if ($2.precision != ast_precision_none)
          _mesa_glsl_error(&@1, state, "duplicate precision qualifier");
 
-      if (!state->ARB_shading_language_420pack_enable && $2.flags.i != 0)
+      if (!state->has_420pack() && $2.flags.i != 0)
          _mesa_glsl_error(&@1, state, "precision qualifiers must come last");
 
       $$ = $2;
@@ -1215,7 +1166,8 @@
       /* Layout qualifiers for AMD/ARB_conservative_depth. */
       if (!$$.flags.i &&
           (state->AMD_conservative_depth_enable ||
-           state->ARB_conservative_depth_enable)) {
+           state->ARB_conservative_depth_enable ||
+           state->is_version(420, 0))) {
          if (match_layout_qualifier($1, "depth_any", state) == 0) {
             $$.flags.q.depth_any = 1;
          } else if (match_layout_qualifier($1, "depth_greater", state) == 0) {
@@ -1306,56 +1258,65 @@
 
       /* Layout qualifiers for ARB_shader_image_load_store. */
       if (state->ARB_shader_image_load_store_enable ||
-          state->is_version(420, 0)) {
+          state->is_version(420, 310)) {
          if (!$$.flags.i) {
             static const struct {
                const char *name;
                GLenum format;
                glsl_base_type base_type;
+               /** Minimum desktop GLSL version required for the image
+                * format.  Use 130 if already present in the original
+                * ARB extension.
+                */
+               unsigned required_glsl;
+               /** Minimum GLSL ES version required for the image format. */
+               unsigned required_essl;
             } map[] = {
-               { "rgba32f", GL_RGBA32F, GLSL_TYPE_FLOAT },
-               { "rgba16f", GL_RGBA16F, GLSL_TYPE_FLOAT },
-               { "rg32f", GL_RG32F, GLSL_TYPE_FLOAT },
-               { "rg16f", GL_RG16F, GLSL_TYPE_FLOAT },
-               { "r11f_g11f_b10f", GL_R11F_G11F_B10F, GLSL_TYPE_FLOAT },
-               { "r32f", GL_R32F, GLSL_TYPE_FLOAT },
-               { "r16f", GL_R16F, GLSL_TYPE_FLOAT },
-               { "rgba32ui", GL_RGBA32UI, GLSL_TYPE_UINT },
-               { "rgba16ui", GL_RGBA16UI, GLSL_TYPE_UINT },
-               { "rgb10_a2ui", GL_RGB10_A2UI, GLSL_TYPE_UINT },
-               { "rgba8ui", GL_RGBA8UI, GLSL_TYPE_UINT },
-               { "rg32ui", GL_RG32UI, GLSL_TYPE_UINT },
-               { "rg16ui", GL_RG16UI, GLSL_TYPE_UINT },
-               { "rg8ui", GL_RG8UI, GLSL_TYPE_UINT },
-               { "r32ui", GL_R32UI, GLSL_TYPE_UINT },
-               { "r16ui", GL_R16UI, GLSL_TYPE_UINT },
-               { "r8ui", GL_R8UI, GLSL_TYPE_UINT },
-               { "rgba32i", GL_RGBA32I, GLSL_TYPE_INT },
-               { "rgba16i", GL_RGBA16I, GLSL_TYPE_INT },
-               { "rgba8i", GL_RGBA8I, GLSL_TYPE_INT },
-               { "rg32i", GL_RG32I, GLSL_TYPE_INT },
-               { "rg16i", GL_RG16I, GLSL_TYPE_INT },
-               { "rg8i", GL_RG8I, GLSL_TYPE_INT },
-               { "r32i", GL_R32I, GLSL_TYPE_INT },
-               { "r16i", GL_R16I, GLSL_TYPE_INT },
-               { "r8i", GL_R8I, GLSL_TYPE_INT },
-               { "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT },
-               { "rgb10_a2", GL_RGB10_A2, GLSL_TYPE_FLOAT },
-               { "rgba8", GL_RGBA8, GLSL_TYPE_FLOAT },
-               { "rg16", GL_RG16, GLSL_TYPE_FLOAT },
-               { "rg8", GL_RG8, GLSL_TYPE_FLOAT },
-               { "r16", GL_R16, GLSL_TYPE_FLOAT },
-               { "r8", GL_R8, GLSL_TYPE_FLOAT },
-               { "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT },
-               { "rgba8_snorm", GL_RGBA8_SNORM, GLSL_TYPE_FLOAT },
-               { "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT },
-               { "rg8_snorm", GL_RG8_SNORM, GLSL_TYPE_FLOAT },
-               { "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT },
-               { "r8_snorm", GL_R8_SNORM, GLSL_TYPE_FLOAT }
+               { "rgba32f", GL_RGBA32F, GLSL_TYPE_FLOAT, 130, 310 },
+               { "rgba16f", GL_RGBA16F, GLSL_TYPE_FLOAT, 130, 310 },
+               { "rg32f", GL_RG32F, GLSL_TYPE_FLOAT, 130, 0 },
+               { "rg16f", GL_RG16F, GLSL_TYPE_FLOAT, 130, 0 },
+               { "r11f_g11f_b10f", GL_R11F_G11F_B10F, GLSL_TYPE_FLOAT, 130, 0 },
+               { "r32f", GL_R32F, GLSL_TYPE_FLOAT, 130, 310 },
+               { "r16f", GL_R16F, GLSL_TYPE_FLOAT, 130, 0 },
+               { "rgba32ui", GL_RGBA32UI, GLSL_TYPE_UINT, 130, 310 },
+               { "rgba16ui", GL_RGBA16UI, GLSL_TYPE_UINT, 130, 310 },
+               { "rgb10_a2ui", GL_RGB10_A2UI, GLSL_TYPE_UINT, 130, 0 },
+               { "rgba8ui", GL_RGBA8UI, GLSL_TYPE_UINT, 130, 310 },
+               { "rg32ui", GL_RG32UI, GLSL_TYPE_UINT, 130, 0 },
+               { "rg16ui", GL_RG16UI, GLSL_TYPE_UINT, 130, 0 },
+               { "rg8ui", GL_RG8UI, GLSL_TYPE_UINT, 130, 0 },
+               { "r32ui", GL_R32UI, GLSL_TYPE_UINT, 130, 310 },
+               { "r16ui", GL_R16UI, GLSL_TYPE_UINT, 130, 0 },
+               { "r8ui", GL_R8UI, GLSL_TYPE_UINT, 130, 0 },
+               { "rgba32i", GL_RGBA32I, GLSL_TYPE_INT, 130, 310 },
+               { "rgba16i", GL_RGBA16I, GLSL_TYPE_INT, 130, 310 },
+               { "rgba8i", GL_RGBA8I, GLSL_TYPE_INT, 130, 310 },
+               { "rg32i", GL_RG32I, GLSL_TYPE_INT, 130, 0 },
+               { "rg16i", GL_RG16I, GLSL_TYPE_INT, 130, 0 },
+               { "rg8i", GL_RG8I, GLSL_TYPE_INT, 130, 0 },
+               { "r32i", GL_R32I, GLSL_TYPE_INT, 130, 310 },
+               { "r16i", GL_R16I, GLSL_TYPE_INT, 130, 0 },
+               { "r8i", GL_R8I, GLSL_TYPE_INT, 130, 0 },
+               { "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT, 130, 0 },
+               { "rgb10_a2", GL_RGB10_A2, GLSL_TYPE_FLOAT, 130, 0 },
+               { "rgba8", GL_RGBA8, GLSL_TYPE_FLOAT, 130, 310 },
+               { "rg16", GL_RG16, GLSL_TYPE_FLOAT, 130, 0 },
+               { "rg8", GL_RG8, GLSL_TYPE_FLOAT, 130, 0 },
+               { "r16", GL_R16, GLSL_TYPE_FLOAT, 130, 0 },
+               { "r8", GL_R8, GLSL_TYPE_FLOAT, 130, 0 },
+               { "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT, 130, 0 },
+               { "rgba8_snorm", GL_RGBA8_SNORM, GLSL_TYPE_FLOAT, 130, 310 },
+               { "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT, 130, 0 },
+               { "rg8_snorm", GL_RG8_SNORM, GLSL_TYPE_FLOAT, 130, 0 },
+               { "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT, 130, 0 },
+               { "r8_snorm", GL_R8_SNORM, GLSL_TYPE_FLOAT, 130, 0 }
             };
 
             for (unsigned i = 0; i < ARRAY_SIZE(map); i++) {
-               if (match_layout_qualifier($1, map[i].name, state) == 0) {
+               if (state->is_version(map[i].required_glsl,
+                                     map[i].required_essl) &&
+                   match_layout_qualifier($1, map[i].name, state) == 0) {
                   $$.flags.q.explicit_image_format = 1;
                   $$.image_format = map[i].format;
                   $$.image_base_type = map[i].base_type;
@@ -1385,6 +1346,89 @@
          }
       }
 
+      /* Layout qualifiers for tessellation evaluation shaders. */
+      if (!$$.flags.i) {
+         struct {
+            const char *s;
+            GLenum e;
+         } map[] = {
+                 /* triangles already parsed by gs-specific code */
+                 { "quads", GL_QUADS },
+                 { "isolines", GL_ISOLINES },
+         };
+         for (unsigned i = 0; i < ARRAY_SIZE(map); i++) {
+            if (match_layout_qualifier($1, map[i].s, state) == 0) {
+               $$.flags.q.prim_type = 1;
+               $$.prim_type = map[i].e;
+               break;
+            }
+         }
+
+         if ($$.flags.i &&
+             !state->ARB_tessellation_shader_enable &&
+             !state->is_version(400, 0)) {
+            _mesa_glsl_error(& @1, state,
+                             "primitive mode qualifier `%s' requires "
+                             "GLSL 4.00 or ARB_tessellation_shader", $1);
+         }
+      }
+      if (!$$.flags.i) {
+         struct {
+            const char *s;
+            GLenum e;
+         } map[] = {
+                 { "equal_spacing", GL_EQUAL },
+                 { "fractional_odd_spacing", GL_FRACTIONAL_ODD },
+                 { "fractional_even_spacing", GL_FRACTIONAL_EVEN },
+         };
+         for (unsigned i = 0; i < ARRAY_SIZE(map); i++) {
+            if (match_layout_qualifier($1, map[i].s, state) == 0) {
+               $$.flags.q.vertex_spacing = 1;
+               $$.vertex_spacing = map[i].e;
+               break;
+            }
+         }
+
+         if ($$.flags.i &&
+             !state->ARB_tessellation_shader_enable &&
+             !state->is_version(400, 0)) {
+            _mesa_glsl_error(& @1, state,
+                             "vertex spacing qualifier `%s' requires "
+                             "GLSL 4.00 or ARB_tessellation_shader", $1);
+         }
+      }
+      if (!$$.flags.i) {
+         if (match_layout_qualifier($1, "cw", state) == 0) {
+            $$.flags.q.ordering = 1;
+            $$.ordering = GL_CW;
+         } else if (match_layout_qualifier($1, "ccw", state) == 0) {
+            $$.flags.q.ordering = 1;
+            $$.ordering = GL_CCW;
+         }
+
+         if ($$.flags.i &&
+             !state->ARB_tessellation_shader_enable &&
+             !state->is_version(400, 0)) {
+            _mesa_glsl_error(& @1, state,
+                             "ordering qualifier `%s' requires "
+                             "GLSL 4.00 or ARB_tessellation_shader", $1);
+         }
+      }
+      if (!$$.flags.i) {
+         if (match_layout_qualifier($1, "point_mode", state) == 0) {
+            $$.flags.q.point_mode = 1;
+            $$.point_mode = true;
+         }
+
+         if ($$.flags.i &&
+             !state->ARB_tessellation_shader_enable &&
+             !state->is_version(400, 0)) {
+            _mesa_glsl_error(& @1, state,
+                             "qualifier `point_mode' requires "
+                             "GLSL 4.00 or ARB_tessellation_shader");
+         }
+      }
+
       if (!$$.flags.i) {
          _mesa_glsl_error(& @1, state, "unrecognized layout identifier "
                           "`%s'", $1);
@@ -1424,8 +1468,9 @@
          }
       }
 
-      if ((state->ARB_shading_language_420pack_enable ||
-           state->has_atomic_counters()) &&
+      if ((state->has_420pack() ||
+           state->has_atomic_counters() ||
+           state->has_shader_storage_buffer_objects()) &&
           match_layout_qualifier("binding", $1, state) == 0) {
          $$.flags.q.explicit_binding = 1;
          $$.binding = $3;
@@ -1483,11 +1528,10 @@
                                 "invalid %s of %d specified",
                                 local_size_qualifiers[i], $3);
                YYERROR;
-            } else if (!state->is_version(430, 0) &&
-                       !state->ARB_compute_shader_enable) {
+            } else if (!state->has_compute_shader()) {
                _mesa_glsl_error(& @3, state,
                                 "%s qualifier requires GLSL 4.30 or "
-                                "ARB_compute_shader",
+                                "GLSL ES 3.10 or ARB_compute_shader",
                                 local_size_qualifiers[i]);
                YYERROR;
             } else {
@@ -1521,6 +1565,30 @@
          }
       }
 
+      /* Layout qualifiers for tessellation control shaders. */
+      if (match_layout_qualifier("vertices", $1, state) == 0) {
+         $$.flags.q.vertices = 1;
+
+         if ($3 <= 0) {
+            _mesa_glsl_error(& @3, state,
+                             "invalid vertices (%d) specified", $3);
+            YYERROR;
+         } else if ($3 > (int)state->Const.MaxPatchVertices) {
+            _mesa_glsl_error(& @3, state,
+                             "vertices (%d) exceeds "
+                             "GL_MAX_PATCH_VERTICES", $3);
+            YYERROR;
+         } else {
+            $$.vertices = $3;
+            if (!state->ARB_tessellation_shader_enable &&
+                !state->is_version(400, 0)) {
+               _mesa_glsl_error(& @1, state,
+                                "vertices qualifier requires GLSL 4.00 or "
+                                "ARB_tessellation_shader");
+            }
+         }
+      }
+
       /* If the identifier didn't match any known layout identifiers,
        * emit an error.
        */
@@ -1568,6 +1636,41 @@
    }
    ;
 
+subroutine_qualifier:
+   SUBROUTINE
+   {
+      memset(& $$, 0, sizeof($$));
+      $$.flags.q.subroutine = 1;
+   }
+   | SUBROUTINE '(' subroutine_type_list ')'
+   {
+      memset(& $$, 0, sizeof($$));
+      $$.flags.q.subroutine_def = 1;
+      $$.subroutine_list = $3;
+   }
+   ;
+
+subroutine_type_list:
+   any_identifier
+   {
+        void *ctx = state;
+        ast_declaration *decl = new(ctx)  ast_declaration($1, NULL, NULL);
+        decl->set_location(@1);
+
+        $$ = new(ctx) ast_subroutine_list();
+        $$->declarations.push_tail(&decl->link);
+   }
+   | subroutine_type_list ',' any_identifier
+   {
+        void *ctx = state;
+        ast_declaration *decl = new(ctx)  ast_declaration($3, NULL, NULL);
+        decl->set_location(@3);
+
+        $$ = $1;
+        $$->declarations.push_tail(&decl->link);
+   }
+   ;
+
 interpolation_qualifier:
    SMOOTH
    {
@@ -1603,6 +1706,7 @@
    | interpolation_qualifier
    | layout_qualifier
    | memory_qualifier
+   | subroutine_qualifier
    | precision_qualifier
    {
       memset(&$$, 0, sizeof($$));
@@ -1634,7 +1738,7 @@
       if ($2.flags.q.invariant)
          _mesa_glsl_error(&@1, state, "duplicate \"invariant\" qualifier");
 
-      if (!state->ARB_shading_language_420pack_enable && $2.flags.q.precise)
+      if (!state->has_420pack() && $2.flags.q.precise)
          _mesa_glsl_error(&@1, state,
                           "\"invariant\" must come after \"precise\"");
 
@@ -1667,7 +1771,7 @@
       if ($2.has_interpolation())
          _mesa_glsl_error(&@1, state, "duplicate interpolation qualifier");
 
-      if (!state->ARB_shading_language_420pack_enable &&
+      if (!state->has_420pack() &&
           ($2.flags.q.precise || $2.flags.q.invariant)) {
          _mesa_glsl_error(&@1, state, "interpolation qualifiers must come "
                           "after \"precise\" or \"invariant\"");
@@ -1687,12 +1791,17 @@
        * precise qualifiers since these are useful in ARB_separate_shader_objects.
        * There is no clear spec guidance on this either.
        */
-      if (!state->ARB_shading_language_420pack_enable && $2.has_layout())
+      if (!state->has_420pack() && $2.has_layout())
          _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
 
       $$ = $1;
       $$.merge_qualifier(&@1, state, $2);
    }
+   | subroutine_qualifier type_qualifier
+   {
+      $$ = $1;
+      $$.merge_qualifier(&@1, state, $2);
+   }
    | auxiliary_storage_qualifier type_qualifier
    {
       if ($2.has_auxiliary_storage()) {
@@ -1700,7 +1809,7 @@
                           "duplicate auxiliary storage qualifier (centroid or sample)");
       }
 
-      if (!state->ARB_shading_language_420pack_enable &&
+      if (!state->has_420pack() &&
           ($2.flags.q.precise || $2.flags.q.invariant ||
            $2.has_interpolation() || $2.has_layout())) {
          _mesa_glsl_error(&@1, state, "auxiliary storage qualifiers must come "
@@ -1718,7 +1827,7 @@
       if ($2.has_storage())
          _mesa_glsl_error(&@1, state, "duplicate storage qualifier");
 
-      if (!state->ARB_shading_language_420pack_enable &&
+      if (!state->has_420pack() &&
           ($2.flags.q.precise || $2.flags.q.invariant || $2.has_interpolation() ||
            $2.has_layout() || $2.has_auxiliary_storage())) {
          _mesa_glsl_error(&@1, state, "storage qualifiers must come after "
@@ -1734,7 +1843,7 @@
       if ($2.precision != ast_precision_none)
          _mesa_glsl_error(&@1, state, "duplicate precision qualifier");
 
-      if (!state->ARB_shading_language_420pack_enable && $2.flags.i != 0)
+      if (!state->has_420pack() && $2.flags.i != 0)
          _mesa_glsl_error(&@1, state, "precision qualifiers must come last");
 
       $$ = $2;
@@ -1758,7 +1867,11 @@
       memset(& $$, 0, sizeof($$));
       $$.flags.q.sample = 1;
    }
-   /* TODO: "patch" also goes here someday. */
+   | PATCH
+   {
+      memset(& $$, 0, sizeof($$));
+      $$.flags.q.patch = 1;
+   }
 
 storage_qualifier:
    CONST_TOK
@@ -1805,6 +1918,11 @@
       memset(& $$, 0, sizeof($$));
       $$.flags.q.uniform = 1;
    }
+   | BUFFER
+   {
+      memset(& $$, 0, sizeof($$));
+      $$.flags.q.buffer = 1;
+   }
    ;
 
 memory_qualifier:
@@ -2507,7 +2625,17 @@
       block->block_name = $2;
       block->declarations.push_degenerate_list_at_head(& $4->link);
 
-      if ($1.flags.q.uniform) {
+      if ($1.flags.q.buffer) {
+         if (!state->has_shader_storage_buffer_objects()) {
+            _mesa_glsl_error(& @1, state,
+                             "#version 430 / GL_ARB_shader_storage_buffer_object "
+                             "required for defining shader storage blocks");
+         } else if (state->ARB_shader_storage_buffer_object_warn) {
+            _mesa_glsl_warning(& @1, state,
+                               "#version 430 / GL_ARB_shader_storage_buffer_object "
+                               "required for defining shader storage blocks");
+         }
+      } else if ($1.flags.q.uniform) {
          if (!state->has_uniform_buffer_objects()) {
             _mesa_glsl_error(& @1, state,
                              "#version 140 / GL_ARB_uniform_buffer_object "
@@ -2551,11 +2679,13 @@
       uint64_t interface_type_mask;
       struct ast_type_qualifier temp_type_qualifier;
 
-      /* Get a bitmask containing only the in/out/uniform flags, allowing us
-       * to ignore other irrelevant flags like interpolation qualifiers.
+      /* Get a bitmask containing only the in/out/uniform/buffer
+       * flags, allowing us to ignore other irrelevant flags like
+       * interpolation qualifiers.
        */
       temp_type_qualifier.flags.i = 0;
       temp_type_qualifier.flags.q.uniform = true;
+      temp_type_qualifier.flags.q.buffer = true;
       temp_type_qualifier.flags.q.in = true;
       temp_type_qualifier.flags.q.out = true;
       interface_type_mask = temp_type_qualifier.flags.i;
@@ -2642,6 +2772,11 @@
       memset(& $$, 0, sizeof($$));
       $$.flags.q.uniform = 1;
    }
+   | BUFFER
+   {
+      memset(& $$, 0, sizeof($$));
+      $$.flags.q.buffer = 1;
+   }
    ;
 
 instance_name_opt:
@@ -2720,11 +2855,8 @@
 
    | layout_qualifier OUT_TOK ';'
    {
-      if (state->stage != MESA_SHADER_GEOMETRY) {
-         _mesa_glsl_error(& @1, state,
-                          "out layout qualifiers only valid in "
-                          "geometry shaders");
-      } else {
+      $$ = NULL;
+      if (state->stage == MESA_SHADER_GEOMETRY) {
          if ($1.flags.q.prim_type) {
             /* Make sure this is a valid output primitive type. */
             switch ($1.prim_type) {
@@ -2743,6 +2875,12 @@
 
          /* Allow future assigments of global out's stream id value */
          state->out_qualifier->flags.q.explicit_stream = 0;
+      } else if (state->stage == MESA_SHADER_TESS_CTRL) {
+         if (!state->out_qualifier->merge_out_qualifier(& @1, state, $1, $$))
+            YYERROR;
+      } else {
+         _mesa_glsl_error(& @1, state,
+                          "out layout qualifiers only valid in "
+                          "tessellation control or geometry shaders");
       }
-      $$ = NULL;
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_symbol_table.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_symbol_table.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_symbol_table.cpp	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_symbol_table.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -36,6 +36,9 @@
       case ir_var_uniform:
          dest = &ibu;
          break;
+      case ir_var_shader_storage:
+         dest = &iss;
+         break;
       case ir_var_shader_in:
          dest = &ibi;
          break;
@@ -60,6 +63,8 @@
       switch (mode) {
       case ir_var_uniform:
          return ibu;
+      case ir_var_shader_storage:
+         return iss;
       case ir_var_shader_in:
          return ibi;
       case ir_var_shader_out:
@@ -71,24 +76,25 @@
    }
 
    symbol_table_entry(ir_variable *v)               :
-      v(v), f(0), t(0), ibu(0), ibi(0), ibo(0), a(0) {}
+      v(v), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0) {}
    symbol_table_entry(ir_function *f)               :
-      v(0), f(f), t(0), ibu(0), ibi(0), ibo(0), a(0) {}
+      v(0), f(f), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0) {}
    symbol_table_entry(const glsl_type *t)           :
-      v(0), f(0), t(t), ibu(0), ibi(0), ibo(0), a(0) {}
+      v(0), f(0), t(t), ibu(0), iss(0), ibi(0), ibo(0), a(0) {}
    symbol_table_entry(const glsl_type *t, enum ir_variable_mode mode) :
-      v(0), f(0), t(0), ibu(0), ibi(0), ibo(0), a(0)
+      v(0), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0)
    {
       assert(t->is_interface());
       add_interface(t, mode);
    }
    symbol_table_entry(const class ast_type_specifier *a):
-      v(0), f(0), t(0), ibu(0), ibi(0), ibo(0), a(a) {}
+      v(0), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(a) {}
 
    ir_variable *v;
    ir_function *f;
    const glsl_type *t;
    const glsl_type *ibu;
+   const glsl_type *iss;
    const glsl_type *ibi;
    const glsl_type *ibo;
    const class ast_type_specifier *a;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_types.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_types.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_types.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_types.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -25,13 +25,14 @@
 #include "main/core.h" /* for Elements, MAX2 */
 #include "glsl_parser_extras.h"
 #include "glsl_types.h"
-#include "program/hash_table.h"
+#include "util/hash_table.h"
 
 
 mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP;
 hash_table *glsl_type::array_types = NULL;
 hash_table *glsl_type::record_types = NULL;
 hash_table *glsl_type::interface_types = NULL;
+hash_table *glsl_type::subroutine_types = NULL;
 void *glsl_type::mem_ctx = NULL;
 
 void
@@ -122,6 +123,7 @@
       this->fields.structure[i].centroid = fields[i].centroid;
       this->fields.structure[i].sample = fields[i].sample;
       this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
+      this->fields.structure[i].patch = fields[i].patch;
    }
 
    mtx_unlock(&glsl_type::mutex);
@@ -154,11 +156,28 @@
       this->fields.structure[i].centroid = fields[i].centroid;
       this->fields.structure[i].sample = fields[i].sample;
       this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
+      this->fields.structure[i].patch = fields[i].patch;
    }
 
    mtx_unlock(&glsl_type::mutex);
 }
 
+glsl_type::glsl_type(const char *subroutine_name) :
+   gl_type(0),
+   base_type(GLSL_TYPE_SUBROUTINE),
+   sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+   sampler_type(0), interface_packing(0),
+   vector_elements(0), matrix_columns(0),
+   length(0)
+{
+   mtx_lock(&glsl_type::mutex);
+
+   init_ralloc_type_ctx();
+   assert(subroutine_name != NULL);
+   this->name = ralloc_strdup(this->mem_ctx, subroutine_name);
+   this->vector_elements = 1;
+   mtx_unlock(&glsl_type::mutex);
+}
 
 bool
 glsl_type::contains_sampler() const
@@ -217,7 +236,7 @@
    case GLSL_TYPE_ATOMIC_UINT:
       return true;
    case GLSL_TYPE_ARRAY:
-      return element_type()->contains_opaque();
+      return fields.array->contains_opaque();
    case GLSL_TYPE_STRUCT:
       for (unsigned int i = 0; i < length; i++) {
          if (fields.structure[i].type->contains_opaque())
@@ -229,6 +248,22 @@
    }
 }
 
+bool
+glsl_type::contains_subroutine() const
+{
+   if (this->is_array()) {
+      return this->fields.array->contains_subroutine();
+   } else if (this->is_record()) {
+      for (unsigned int i = 0; i < this->length; i++) {
+	 if (this->fields.structure[i].type->contains_subroutine())
+	    return true;
+      }
+      return false;
+   } else {
+      return this->is_subroutine();
+   }
+}
+
 gl_texture_index
 glsl_type::sampler_index() const
 {
@@ -324,19 +359,24 @@
 void
 _mesa_glsl_release_types(void)
 {
-   mtx_lock(&glsl_type::mutex);
-
+   /* Should only be called during atexit (either when unloading shared
+    * object, or if process terminates), so no mutex-locking should be
+    * necessary.
+    */
    if (glsl_type::array_types != NULL) {
-      hash_table_dtor(glsl_type::array_types);
+      _mesa_hash_table_destroy(glsl_type::array_types, NULL);
       glsl_type::array_types = NULL;
    }
 
    if (glsl_type::record_types != NULL) {
-      hash_table_dtor(glsl_type::record_types);
+      _mesa_hash_table_destroy(glsl_type::record_types, NULL);
       glsl_type::record_types = NULL;
    }
 
-   mtx_unlock(&glsl_type::mutex);
+   if (glsl_type::interface_types != NULL) {
+      _mesa_hash_table_destroy(glsl_type::interface_types, NULL);
+      glsl_type::interface_types = NULL;
+   }
 }
 
 
@@ -648,27 +688,28 @@
    mtx_lock(&glsl_type::mutex);
 
    if (array_types == NULL) {
-      array_types = hash_table_ctor(64, hash_table_string_hash,
-				    hash_table_string_compare);
+      array_types = _mesa_hash_table_create(NULL, _mesa_key_hash_string,
+                                            _mesa_key_string_equal);
    }
 
-   const glsl_type *t = (glsl_type *) hash_table_find(array_types, key);
-
-   if (t == NULL) {
+   const struct hash_entry *entry = _mesa_hash_table_search(array_types, key);
+   if (entry == NULL) {
       mtx_unlock(&glsl_type::mutex);
-      t = new glsl_type(base, array_size);
+      const glsl_type *t = new glsl_type(base, array_size);
       mtx_lock(&glsl_type::mutex);
 
-      hash_table_insert(array_types, (void *) t, ralloc_strdup(mem_ctx, key));
+      entry = _mesa_hash_table_insert(array_types,
+                                      ralloc_strdup(mem_ctx, key),
+                                      (void *) t);
    }
 
-   assert(t->base_type == GLSL_TYPE_ARRAY);
-   assert(t->length == array_size);
-   assert(t->fields.array == base);
+   assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_ARRAY);
+   assert(((glsl_type *) entry->data)->length == array_size);
+   assert(((glsl_type *) entry->data)->fields.array == base);
 
    mtx_unlock(&glsl_type::mutex);
 
-   return t;
+   return (glsl_type *) entry->data;
 }
 
 
@@ -716,25 +757,22 @@
       if (this->fields.structure[i].sample
           != b->fields.structure[i].sample)
          return false;
+      if (this->fields.structure[i].patch
+          != b->fields.structure[i].patch)
+         return false;
    }
 
    return true;
 }
 
 
-int
+bool
 glsl_type::record_key_compare(const void *a, const void *b)
 {
    const glsl_type *const key1 = (glsl_type *) a;
    const glsl_type *const key2 = (glsl_type *) b;
 
-   /* Return zero is the types match (there is zero difference) or non-zero
-    * otherwise.
-    */
-   if (strcmp(key1->name, key2->name) != 0)
-      return 1;
-
-   return !key1->record_compare(key2);
+   return strcmp(key1->name, key2->name) == 0 && key1->record_compare(key2);
 }
 
 
@@ -772,25 +810,27 @@
    mtx_lock(&glsl_type::mutex);
 
    if (record_types == NULL) {
-      record_types = hash_table_ctor(64, record_key_hash, record_key_compare);
+      record_types = _mesa_hash_table_create(NULL, record_key_hash,
+                                             record_key_compare);
    }
 
-   const glsl_type *t = (glsl_type *) hash_table_find(record_types, & key);
-   if (t == NULL) {
+   const struct hash_entry *entry = _mesa_hash_table_search(record_types,
+                                                            &key);
+   if (entry == NULL) {
       mtx_unlock(&glsl_type::mutex);
-      t = new glsl_type(fields, num_fields, name);
+      const glsl_type *t = new glsl_type(fields, num_fields, name);
       mtx_lock(&glsl_type::mutex);
 
-      hash_table_insert(record_types, (void *) t, t);
+      entry = _mesa_hash_table_insert(record_types, t, (void *) t);
    }
 
-   assert(t->base_type == GLSL_TYPE_STRUCT);
-   assert(t->length == num_fields);
-   assert(strcmp(t->name, name) == 0);
+   assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_STRUCT);
+   assert(((glsl_type *) entry->data)->length == num_fields);
+   assert(strcmp(((glsl_type *) entry->data)->name, name) == 0);
 
    mtx_unlock(&glsl_type::mutex);
 
-   return t;
+   return (glsl_type *) entry->data;
 }
 
 
@@ -805,25 +845,58 @@
    mtx_lock(&glsl_type::mutex);
 
    if (interface_types == NULL) {
-      interface_types = hash_table_ctor(64, record_key_hash, record_key_compare);
+      interface_types = _mesa_hash_table_create(NULL, record_key_hash,
+                                                record_key_compare);
+   }
+
+   const struct hash_entry *entry = _mesa_hash_table_search(interface_types,
+                                                            &key);
+   if (entry == NULL) {
+      mtx_unlock(&glsl_type::mutex);
+      const glsl_type *t = new glsl_type(fields, num_fields,
+                                         packing, block_name);
+      mtx_lock(&glsl_type::mutex);
+
+      entry = _mesa_hash_table_insert(interface_types, t, (void *) t);
    }
 
-   const glsl_type *t = (glsl_type *) hash_table_find(interface_types, & key);
-   if (t == NULL) {
+   assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_INTERFACE);
+   assert(((glsl_type *) entry->data)->length == num_fields);
+   assert(strcmp(((glsl_type *) entry->data)->name, block_name) == 0);
+
+   mtx_unlock(&glsl_type::mutex);
+
+   return (glsl_type *) entry->data;
+}
+
+const glsl_type *
+glsl_type::get_subroutine_instance(const char *subroutine_name)
+{
+   const glsl_type key(subroutine_name);
+
+   mtx_lock(&glsl_type::mutex);
+
+   if (subroutine_types == NULL) {
+      subroutine_types = _mesa_hash_table_create(NULL, record_key_hash,
+                                                 record_key_compare);
+   }
+
+   const struct hash_entry *entry = _mesa_hash_table_search(subroutine_types,
+                                                            &key);
+   if (entry == NULL) {
       mtx_unlock(&glsl_type::mutex);
-      t = new glsl_type(fields, num_fields, packing, block_name);
+      const glsl_type *t = new glsl_type(subroutine_name);
       mtx_lock(&glsl_type::mutex);
 
-      hash_table_insert(interface_types, (void *) t, t);
+      entry = _mesa_hash_table_insert(subroutine_types, t, (void *) t);
    }
 
-   assert(t->base_type == GLSL_TYPE_INTERFACE);
-   assert(t->length == num_fields);
-   assert(strcmp(t->name, block_name) == 0);
+   assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_SUBROUTINE);
+   assert(strcmp(((glsl_type *) entry->data)->name, subroutine_name) == 0);
 
    mtx_unlock(&glsl_type::mutex);
 
-   return t;
+   return (glsl_type *) entry->data;
 }
 
 
@@ -954,7 +1027,8 @@
 
    case GLSL_TYPE_IMAGE:
       return 1;
-
+   case GLSL_TYPE_SUBROUTINE:
+     return 1;
    case GLSL_TYPE_SAMPLER:
    case GLSL_TYPE_ATOMIC_UINT:
    case GLSL_TYPE_VOID:
@@ -978,6 +1052,7 @@
    case GLSL_TYPE_BOOL:
    case GLSL_TYPE_SAMPLER:
    case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_SUBROUTINE:
       return 1;
 
    case GLSL_TYPE_STRUCT:
@@ -1086,7 +1161,8 @@
 	  this->fields.array->is_matrix()) {
 	 return MAX2(this->fields.array->std140_base_alignment(row_major), 16);
       } else {
-	 assert(this->fields.array->is_record());
+	 assert(this->fields.array->is_record() ||
+                this->fields.array->is_array());
 	 return this->fields.array->std140_base_alignment(row_major);
       }
    }
@@ -1330,6 +1406,7 @@
    case GLSL_TYPE_IMAGE:
    case GLSL_TYPE_ATOMIC_UINT:
    case GLSL_TYPE_VOID:
+   case GLSL_TYPE_SUBROUTINE:
    case GLSL_TYPE_ERROR:
       break;
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_types.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_types.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/glsl_types.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/glsl_types.h	2015-09-16 14:36:09.000000000 +0000
@@ -59,6 +59,7 @@
    GLSL_TYPE_INTERFACE,
    GLSL_TYPE_ARRAY,
    GLSL_TYPE_VOID,
+   GLSL_TYPE_SUBROUTINE,
    GLSL_TYPE_ERROR
 };
 
@@ -228,18 +229,6 @@
    const glsl_type *get_scalar_type() const;
 
    /**
-    * Query the type of elements in an array
-    *
-    * \return
-    * Pointer to the type of elements in the array for array types, or \c NULL
-    * for non-array types.
-    */
-   const glsl_type *element_type() const
-   {
-      return is_array() ? fields.array : NULL;
-   }
-
-   /**
     * Get the instance of a built-in scalar, vector, or matrix type
     */
    static const glsl_type *get_instance(unsigned base_type, unsigned rows,
@@ -276,6 +265,11 @@
 						  const char *block_name);
 
    /**
+    * Get the instance of an subroutine type
+    */
+   static const glsl_type *get_subroutine_instance(const char *subroutine_name);
+
+   /**
     * Get the type resulting from a multiplication of \p type_a * \p type_b
     */
    static const glsl_type *get_mul_type(const glsl_type *type_a,
@@ -526,6 +520,13 @@
    /**
     * Query if a type is unnamed/anonymous (named by the parser)
     */
+
+   bool is_subroutine() const
+   {
+      return base_type == GLSL_TYPE_SUBROUTINE;
+   }
+   bool contains_subroutine() const;
+
    bool is_anonymous() const
    {
       return !strncmp(name, "#anon", 5);
@@ -556,7 +557,7 @@
       if (base_type == GLSL_TYPE_ATOMIC_UINT)
          return ATOMIC_COUNTER_SIZE;
       else if (is_array())
-         return length * element_type()->atomic_size();
+         return length * fields.array->atomic_size();
       else
          return 0;
    }
@@ -691,6 +692,9 @@
    /** Constructor for array types */
    glsl_type(const glsl_type *array, unsigned length);
 
+   /** Constructor for subroutine types */
+   glsl_type(const char *name);
+
    /** Hash table containing the known array types. */
    static struct hash_table *array_types;
 
@@ -700,7 +704,10 @@
    /** Hash table containing the known interface types. */
    static struct hash_table *interface_types;
 
-   static int record_key_compare(const void *a, const void *b);
+   /** Hash table containing the known subroutine types. */
+   static struct hash_table *subroutine_types;
+
+   static bool record_key_compare(const void *a, const void *b);
    static unsigned record_key_hash(const void *key);
 
    /**
@@ -764,10 +771,29 @@
    unsigned matrix_layout:2;
 
    /**
+    * For interface blocks, 1 if this variable is a per-patch input or output
+    * (as in ir_variable::patch). 0 otherwise.
+    */
+   unsigned patch:1;
+
+   /**
     * For interface blocks, it has a value if this variable uses multiple vertex
     * streams (as in ir_variable::stream). -1 otherwise.
     */
    int stream;
+
+   glsl_struct_field(const struct glsl_type *_type, const char *_name)
+      : type(_type), name(_name), location(-1), interpolation(0), centroid(0),
+        sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
+        stream(-1)
+   {
+      /* empty */
+   }
+
+   glsl_struct_field()
+   {
+      /* empty */
+   }
 };
 
 static inline unsigned int
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/hir_field_selection.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/hir_field_selection.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/hir_field_selection.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/hir_field_selection.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -56,45 +56,6 @@
 			  "structure",
 			  expr->primary_expression.identifier);
       }
-   } else if (expr->subexpressions[1] != NULL) {
-      /* Handle "method calls" in GLSL 1.20 - namely, array.length() */
-      state->check_version(120, 300, &loc, "methods not supported");
-
-      ast_expression *call = expr->subexpressions[1];
-      assert(call->oper == ast_function_call);
-
-      const char *method;
-      method = call->subexpressions[0]->primary_expression.identifier;
-
-      if (strcmp(method, "length") == 0) {
-         if (!call->expressions.is_empty())
-            _mesa_glsl_error(&loc, state, "length method takes no arguments");
-
-         if (op->type->is_array()) {
-            if (op->type->is_unsized_array())
-               _mesa_glsl_error(&loc, state, "length called on unsized array");
-
-            result = new(ctx) ir_constant(op->type->array_size());
-         } else if (op->type->is_vector()) {
-            if (state->ARB_shading_language_420pack_enable) {
-               /* .length() returns int. */
-               result = new(ctx) ir_constant((int) op->type->vector_elements);
-            } else {
-               _mesa_glsl_error(&loc, state, "length method on matrix only available"
-                                             "with ARB_shading_language_420pack");
-            }
-         } else if (op->type->is_matrix()) {
-            if (state->ARB_shading_language_420pack_enable) {
-               /* .length() returns int. */
-               result = new(ctx) ir_constant((int) op->type->matrix_columns);
-            } else {
-               _mesa_glsl_error(&loc, state, "length method on matrix only available"
-                                             "with ARB_shading_language_420pack");
-            }
-         }
-      } else {
-	 _mesa_glsl_error(&loc, state, "unknown method: `%s'", method);
-      }
    } else if (op->type->is_vector() ||
               (state->ARB_shading_language_420pack_enable &&
                op->type->is_scalar())) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_builder.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_builder.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_builder.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_builder.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -338,6 +338,12 @@
    return expr(ir_unop_sign, a);
 }
 
+ir_expression *
+subr_to_int(operand a)
+{
+   return expr(ir_unop_subroutine_to_int, a);
+}
+
 ir_expression*
 equal(operand a, operand b)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_builder.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_builder.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_builder.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_builder.h	2015-09-16 14:36:09.000000000 +0000
@@ -153,6 +153,7 @@
 ir_expression *log(operand a);
 ir_expression *sign(operand a);
 
+ir_expression *subr_to_int(operand a);
 ir_expression *equal(operand a, operand b);
 ir_expression *nequal(operand a, operand b);
 ir_expression *less(operand a, operand b);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_clone.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_clone.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_clone.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_clone.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -267,6 +267,12 @@
 {
    ir_function *copy = new(mem_ctx) ir_function(this->name);
 
+   copy->is_subroutine = this->is_subroutine;
+   copy->num_subroutine_types = this->num_subroutine_types;
+   copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, copy->num_subroutine_types);
+   for (int i = 0; i < copy->num_subroutine_types; i++)
+     copy->subroutine_types[i] = this->subroutine_types[i];
+
    foreach_in_list(const ir_function_signature, sig, &this->signatures) {
       ir_function_signature *sig_copy = sig->clone(mem_ctx, ht);
       copy->add_signature(sig_copy);
@@ -362,6 +368,7 @@
    case GLSL_TYPE_ATOMIC_UINT:
    case GLSL_TYPE_VOID:
    case GLSL_TYPE_ERROR:
+   case GLSL_TYPE_SUBROUTINE:
    case GLSL_TYPE_INTERFACE:
       assert(!"Should not get here.");
       break;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_constant_expression.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_constant_expression.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_constant_expression.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_constant_expression.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -40,12 +40,7 @@
 #include "glsl_types.h"
 #include "program/hash_table.h"
 
-#if defined(_MSC_VER) && (_MSC_VER < 1800)
-static int isnormal(double x)
-{
-   return _fpclass(x) == _FPCLASS_NN || _fpclass(x) == _FPCLASS_PN;
-}
-#elif defined(__SUNPRO_CC) && !defined(isnormal)
+#if defined(__SUNPRO_CC) && !defined(isnormal)
 #include <ieeefp.h>
 static int isnormal(double x)
 {
@@ -53,13 +48,6 @@
 }
 #endif
 
-#if defined(_MSC_VER)
-static double copysign(double x, double y)
-{
-   return _copysign(x, y);
-}
-#endif
-
 static float
 dot_f(ir_constant *op0, ir_constant *op1)
 {
@@ -242,12 +230,9 @@
      *    follows:
      *
      *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
-     *
-     * We must first cast the float to an int, because casting a negative
-     * float to a uint is undefined.
      */
-   return (uint8_t) (int)
-          _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
+   return (uint8_t)
+          _mesa_lroundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
 }
 
 /**
@@ -264,12 +249,9 @@
      *    follows:
      *
      *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
-     *
-     * We must first cast the float to an int, because casting a negative
-     * float to a uint is undefined.
      */
-   return (uint16_t) (int)
-          _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
+   return (uint16_t)
+          _mesa_lroundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
 }
 
 /**
@@ -1674,10 +1656,10 @@
             if (!isnormal(data.d[c]))
                data.d[c] = copysign(0.0, op[0]->value.d[c]);
          } else {
-            data.f[c] = ldexp(op[0]->value.f[c], op[1]->value.i[c]);
+            data.f[c] = ldexpf(op[0]->value.f[c], op[1]->value.i[c]);
             /* Flush subnormal values to zero. */
             if (!isnormal(data.f[c]))
-               data.f[c] = copysign(0.0f, op[0]->value.f[c]);
+               data.f[c] = copysignf(0.0f, op[0]->value.f[c]);
          }
       }
       break;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -260,6 +260,7 @@
    case ir_unop_bit_count:
    case ir_unop_find_msb:
    case ir_unop_find_lsb:
+   case ir_unop_subroutine_to_int:
       this->type = glsl_type::get_instance(GLSL_TYPE_INT,
 					   op0->type->vector_elements, 1);
       break;
@@ -568,6 +569,7 @@
    "frexp_sig",
    "frexp_exp",
    "noise",
+   "subroutine_to_int",
    "interpolate_at_centroid",
    "+",
    "-",
@@ -912,7 +914,7 @@
       c->array_elements = ralloc_array(c, ir_constant *, type->length);
 
       for (unsigned i = 0; i < type->length; i++)
-	 c->array_elements[i] = ir_constant::zero(c, type->element_type());
+	 c->array_elements[i] = ir_constant::zero(c, type->fields.array);
    }
 
    if (type->is_record()) {
@@ -1341,7 +1343,7 @@
    const glsl_type *const vt = this->array->type;
 
    if (vt->is_array()) {
-      type = vt->element_type();
+      type = vt->fields.array;
    } else if (vt->is_matrix()) {
       type = vt->column_type();
    } else if (vt->is_vector()) {
@@ -1643,6 +1645,7 @@
    this->data.read_only = false;
    this->data.centroid = false;
    this->data.sample = false;
+   this->data.patch = false;
    this->data.invariant = false;
    this->data.how_declared = ir_var_declared_normally;
    this->data.mode = mode;
@@ -1785,6 +1788,7 @@
 	  a->data.interpolation != b->data.interpolation ||
 	  a->data.centroid != b->data.centroid ||
           a->data.sample != b->data.sample ||
+          a->data.patch != b->data.patch ||
           a->data.image_read_only != b->data.image_read_only ||
           a->data.image_write_only != b->data.image_write_only ||
           a->data.image_coherent != b->data.image_coherent ||
@@ -1851,6 +1855,7 @@
 steal_memory(ir_instruction *ir, void *new_ctx)
 {
    ir_variable *var = ir->as_variable();
+   ir_function *fn = ir->as_function();
    ir_constant *constant = ir->as_constant();
    if (var != NULL && var->constant_value != NULL)
       steal_memory(var->constant_value, ir);
@@ -1858,6 +1863,9 @@
    if (var != NULL && var->constant_initializer != NULL)
       steal_memory(var->constant_initializer, ir);
 
+   if (fn != NULL && fn->subroutine_types)
+      ralloc_steal(new_ctx, fn->subroutine_types);
+
    /* The components of aggregate constants are not visited by the normal
     * visitor, so steal their values by hand.
     */
@@ -1975,6 +1983,9 @@
    case ir_var_uniform:
       return "uniform";
 
+   case ir_var_shader_storage:
+      return "buffer";
+
    case ir_var_shader_in:
       return "shader input";
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_function.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_function.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_function.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_function.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -72,6 +72,7 @@
       switch ((enum ir_variable_mode)(param->data.mode)) {
       case ir_var_auto:
       case ir_var_uniform:
+      case ir_var_shader_storage:
       case ir_var_temporary:
 	 /* These are all error conditions.  It is invalid for a parameter to
 	  * a function to be declared as auto (not in, out, or inout) or
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir.h	2015-09-16 14:36:09.000000000 +0000
@@ -78,6 +78,7 @@
    ir_type_discard,
    ir_type_emit_vertex,
    ir_type_end_primitive,
+   ir_type_barrier,
    ir_type_max, /**< maximum ir_type enum number, for validation */
    ir_type_unset = ir_type_max
 };
@@ -323,6 +324,7 @@
 enum ir_variable_mode {
    ir_var_auto = 0,     /**< Function local variables and globals. */
    ir_var_uniform,      /**< Variable declared as a uniform. */
+   ir_var_shader_storage,   /**< Variable declared as an ssbo. */
    ir_var_shader_in,
    ir_var_shader_out,
    ir_var_function_in,
@@ -440,11 +442,14 @@
    glsl_interp_qualifier determine_interpolation_mode(bool flat_shade);
 
    /**
-    * Determine whether or not a variable is part of a uniform block.
+    * Determine whether or not a variable is part of a uniform or
+    * shader storage block.
     */
-   inline bool is_in_uniform_block() const
+   inline bool is_in_buffer_block() const
    {
-      return this->data.mode == ir_var_uniform && this->interface_type != NULL;
+      return (this->data.mode == ir_var_uniform ||
+              this->data.mode == ir_var_shader_storage) &&
+             this->interface_type != NULL;
    }
 
    /**
@@ -617,6 +622,7 @@
       unsigned read_only:1;
       unsigned centroid:1;
       unsigned sample:1;
+      unsigned patch:1;
       unsigned invariant:1;
       unsigned precise:1;
 
@@ -1120,6 +1126,21 @@
     * List of ir_function_signature for each overloaded function with this name.
     */
    struct exec_list signatures;
+
+   /**
+    * is this function a subroutine type declaration
+    * e.g. subroutine void type1(float arg1);
+    */
+   bool is_subroutine;
+
+   /**
+    * is this function associated to a subroutine type
+    * e.g. subroutine (type1, type2) function_name { function_body };
+    * would have num_subroutine_types 2,
+    * and pointers to the type1 and type2 types.
+    */
+   int num_subroutine_types;
+   const struct glsl_type **subroutine_types;
 };
 
 inline const char *ir_function_signature::function_name() const
@@ -1379,6 +1400,7 @@
 
    ir_unop_noise,
 
+   ir_unop_subroutine_to_int,
    /**
     * Interpolate fs input at centroid
     *
@@ -1690,7 +1712,18 @@
    ir_call(ir_function_signature *callee,
 	   ir_dereference_variable *return_deref,
 	   exec_list *actual_parameters)
-      : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee)
+      : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(NULL), array_idx(NULL)
+   {
+      assert(callee->return_type != NULL);
+      actual_parameters->move_nodes_to(& this->actual_parameters);
+      this->use_builtin = callee->is_builtin();
+   }
+
+   ir_call(ir_function_signature *callee,
+	   ir_dereference_variable *return_deref,
+	   exec_list *actual_parameters,
+	   ir_variable *var, ir_rvalue *array_idx)
+      : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(var), array_idx(array_idx)
    {
       assert(callee->return_type != NULL);
       actual_parameters->move_nodes_to(& this->actual_parameters);
@@ -1738,6 +1771,14 @@
 
    /** Should this call only bind to a built-in function? */
    bool use_builtin;
+
+   /*
+    * ARB_shader_subroutine support -
+    * the subroutine uniform variable and array index
+    * rvalue to be used in the lowering pass later.
+    */
+   ir_variable *sub_var;
+   ir_rvalue *array_idx;
 };
 
 
@@ -2396,6 +2437,29 @@
    ir_rvalue *stream;
 };
 
+/**
+ * IR instruction for tessellation control and compute shader barrier.
+ */
+class ir_barrier : public ir_instruction {
+public:
+   ir_barrier()
+      : ir_instruction(ir_type_barrier)
+   {
+   }
+
+   virtual void accept(ir_visitor *v)
+   {
+      v->visit(this);
+   }
+
+   virtual ir_barrier *clone(void *mem_ctx, struct hash_table *) const
+   {
+      return new(mem_ctx) ir_barrier();
+   }
+
+   virtual ir_visitor_status accept(ir_hierarchical_visitor *);
+};
+
 /*@}*/
 
 /**
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_hierarchical_visitor.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_hierarchical_visitor.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_hierarchical_visitor.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_hierarchical_visitor.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -80,6 +80,15 @@
 }
 
 ir_visitor_status
+ir_hierarchical_visitor::visit(ir_barrier *ir)
+{
+   if (this->callback_enter != NULL)
+      this->callback_enter(ir, this->data_enter);
+
+   return visit_continue;
+}
+
+ir_visitor_status
 ir_hierarchical_visitor::visit_enter(ir_loop *ir)
 {
    if (this->callback_enter != NULL)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_hierarchical_visitor.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_hierarchical_visitor.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_hierarchical_visitor.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_hierarchical_visitor.h	2015-09-16 14:36:09.000000000 +0000
@@ -59,7 +59,7 @@
  * in the composite's \c accept method.  The \c accept method for a leaf-node
  * class will simply call the \c visit method, as usual, and pass its return
  * value on.  The \c accept method for internal-node classes will call the \c
- * visit_enter method, call the \c accpet method of each child node, and,
+ * visit_enter method, call the \c accept method of each child node, and,
  * finally, call the \c visit_leave method.  If any of these return a value
  * other that \c visit_continue, the correct action must be taken.
  *
@@ -87,6 +87,7 @@
    virtual ir_visitor_status visit(class ir_variable *);
    virtual ir_visitor_status visit(class ir_constant *);
    virtual ir_visitor_status visit(class ir_loop_jump *);
+   virtual ir_visitor_status visit(class ir_barrier *);
 
    /**
     * ir_dereference_variable isn't technically a leaf, but it is treated as a
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_hv_accept.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_hv_accept.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_hv_accept.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_hv_accept.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -429,3 +429,9 @@
 
    return (s == visit_stop) ? s : v->visit_leave(this);
 }
+
+ir_visitor_status
+ir_barrier::accept(ir_hierarchical_visitor *v)
+{
+   return v->visit(this);
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_optimization.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_optimization.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_optimization.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_optimization.h	2015-09-16 14:36:09.000000000 +0000
@@ -114,12 +114,13 @@
 void lower_discard_flow(exec_list *instructions);
 bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
 bool lower_noise(exec_list *instructions);
-bool lower_variable_index_to_cond_assign(exec_list *instructions,
-    bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform);
+bool lower_variable_index_to_cond_assign(gl_shader_stage stage,
+    exec_list *instructions, bool lower_input, bool lower_output,
+    bool lower_temp, bool lower_uniform);
 bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
 bool lower_const_arrays_to_uniforms(exec_list *instructions);
 bool lower_clip_distance(gl_shader *shader);
-void lower_output_reads(exec_list *instructions);
+void lower_output_reads(unsigned stage, exec_list *instructions);
 bool lower_packing_builtins(exec_list *instructions, int op_mask);
 void lower_ubo_reference(struct gl_shader *shader, exec_list *instructions);
 void lower_packed_varyings(void *mem_ctx,
@@ -132,9 +133,12 @@
 bool lower_offset_arrays(exec_list *instructions);
 void optimize_dead_builtin_variables(exec_list *instructions,
                                      enum ir_variable_mode other);
+bool lower_tess_level(gl_shader *shader);
 
 bool lower_vertex_id(gl_shader *shader);
 
+bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
+
 ir_rvalue *
 compare_index_block(exec_list *instructions, ir_variable *index,
 		    unsigned base, unsigned components, void *mem_ctx);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_print_visitor.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_print_visitor.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_print_visitor.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_print_visitor.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -25,7 +25,7 @@
 #include "glsl_types.h"
 #include "glsl_parser_extras.h"
 #include "main/macros.h"
-#include "program/hash_table.h"
+#include "util/hash_table.h"
 
 static void print_type(FILE *f, const glsl_type *t);
 
@@ -72,7 +72,7 @@
       if (ir->ir_type != ir_type_function)
 	 fprintf(f, "\n");
    }
-   fprintf(f, "\n)");
+   fprintf(f, ")\n");
 }
 
 void
@@ -89,14 +89,14 @@
 {
    indentation = 0;
    printable_names =
-      hash_table_ctor(32, hash_table_pointer_hash, hash_table_pointer_compare);
+      _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
    symbols = _mesa_symbol_table_ctor();
    mem_ctx = ralloc_context(NULL);
 }
 
 ir_print_visitor::~ir_print_visitor()
 {
-   hash_table_dtor(printable_names);
+   _mesa_hash_table_destroy(printable_names, NULL);
    _mesa_symbol_table_dtor(symbols);
    ralloc_free(mem_ctx);
 }
@@ -121,18 +121,22 @@
    }
 
    /* Do we already have a name for this variable? */
-   const char *name = (const char *) hash_table_find(this->printable_names, var);
-   if (name != NULL)
-      return name;
+   struct hash_entry * entry =
+      _mesa_hash_table_search(this->printable_names, var);
+
+   if (entry != NULL) {
+      return (const char *) entry->data;
+   }
 
    /* If there's no conflict, just use the original name */
+   const char* name = NULL;
    if (_mesa_symbol_table_find_symbol(this->symbols, -1, var->name) == NULL) {
       name = var->name;
    } else {
       static unsigned i = 1;
       name = ralloc_asprintf(this->mem_ctx, "%s@%u", var->name, ++i);
    }
-   hash_table_insert(this->printable_names, (void *) name, var);
+   _mesa_hash_table_insert(this->printable_names, var, (void *) name);
    _mesa_symbol_table_add_symbol(this->symbols, -1, name, var);
    return name;
 }
@@ -161,10 +165,16 @@
 {
    fprintf(f, "(declare ");
 
+   char loc[256] = {0};
+   if (ir->data.location != -1)
+      snprintf(loc, sizeof(loc), "location=%i ", ir->data.location);
+
    const char *const cent = (ir->data.centroid) ? "centroid " : "";
    const char *const samp = (ir->data.sample) ? "sample " : "";
+   const char *const patc = (ir->data.patch) ? "patch " : "";
    const char *const inv = (ir->data.invariant) ? "invariant " : "";
-   const char *const mode[] = { "", "uniform ", "shader_in ", "shader_out ",
+   const char *const mode[] = { "", "uniform ", "shader_storage",
+                                "shader_in ", "shader_out ",
                                 "in ", "out ", "inout ",
 			        "const_in ", "sys ", "temporary " };
    STATIC_ASSERT(ARRAY_SIZE(mode) == ir_var_mode_count);
@@ -172,8 +182,8 @@
    const char *const interp[] = { "", "smooth", "flat", "noperspective" };
    STATIC_ASSERT(ARRAY_SIZE(interp) == INTERP_QUALIFIER_COUNT);
 
-   fprintf(f, "(%s%s%s%s%s%s) ",
-           cent, samp, inv, mode[ir->data.mode],
+   fprintf(f, "(%s%s%s%s%s%s%s%s) ",
+           loc, cent, samp, patc, inv, mode[ir->data.mode],
            stream[ir->data.stream],
            interp[ir->data.interpolation]);
 
@@ -225,7 +235,7 @@
 
 void ir_print_visitor::visit(ir_function *ir)
 {
-   fprintf(f, "(function %s\n", ir->name);
+   fprintf(f, "(%s function %s\n", ir->is_subroutine ? "subroutine" : "", ir->name);
    indentation++;
    foreach_in_list(ir_function_signature, sig, &ir->signatures) {
       indent();
@@ -573,5 +583,10 @@
    fprintf(f, "(end-primitive ");
    ir->stream->accept(this);
    fprintf(f, ")\n");
+}
 
+void
+ir_print_visitor::visit(ir_barrier *ir)
+{
+   fprintf(f, "(barrier)\n");
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_print_visitor.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_print_visitor.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_print_visitor.h	2014-04-29 19:36:58.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_print_visitor.h	2015-09-16 14:36:09.000000000 +0000
@@ -71,6 +71,7 @@
    virtual void visit(ir_loop_jump *);
    virtual void visit(ir_emit_vertex *);
    virtual void visit(ir_end_primitive *);
+   virtual void visit(ir_barrier *);
    /*@}*/
 
 private:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_reader.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_reader.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_reader.cpp	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_reader.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -63,6 +63,7 @@
    ir_texture *read_texture(s_expression *);
    ir_emit_vertex *read_emit_vertex(s_expression *);
    ir_end_primitive *read_end_primitive(s_expression *);
+   ir_barrier *read_barrier(s_expression *);
 
    ir_dereference *read_dereference(s_expression *);
    ir_dereference_variable *read_var_ref(s_expression *);
@@ -375,6 +376,8 @@
       inst = read_emit_vertex(list);
    } else if (strcmp(tag->value(), "end-primitive") == 0) {
       inst = read_end_primitive(list);
+   } else if (strcmp(tag->value(), "barrier") == 0) {
+      inst = read_barrier(list);
    } else {
       inst = read_rvalue(list);
       if (inst == NULL)
@@ -414,10 +417,14 @@
 	 var->data.centroid = 1;
       } else if (strcmp(qualifier->value(), "sample") == 0) {
          var->data.sample = 1;
+      } else if (strcmp(qualifier->value(), "patch") == 0) {
+         var->data.patch = 1;
       } else if (strcmp(qualifier->value(), "invariant") == 0) {
 	 var->data.invariant = 1;
       } else if (strcmp(qualifier->value(), "uniform") == 0) {
 	 var->data.mode = ir_var_uniform;
+      } else if (strcmp(qualifier->value(), "shader_storage") == 0) {
+	 var->data.mode = ir_var_shader_storage;
       } else if (strcmp(qualifier->value(), "auto") == 0) {
 	 var->data.mode = ir_var_auto;
       } else if (strcmp(qualifier->value(), "in") == 0) {
@@ -1142,3 +1149,15 @@
    ir_read_error(NULL, "when reading end-primitive");
    return NULL;
 }
+
+ir_barrier *
+ir_reader::read_barrier(s_expression *expr)
+{
+   s_pattern pat[] = { "barrier" };
+
+   if (MATCH(expr, pat)) {
+      return new(mem_ctx) ir_barrier();
+   }
+   ir_read_error(NULL, "when reading barrier");
+   return NULL;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_set_program_inouts.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_set_program_inouts.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_set_program_inouts.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_set_program_inouts.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -103,10 +103,26 @@
    for (int i = 0; i < len; i++) {
       bool dual_slot = is_dual_slot(var);
       int idx = var->data.location + var->data.index + offset + i;
-      GLbitfield64 bitfield = BITFIELD64_BIT(idx);
+      bool is_patch_generic = var->data.patch &&
+                              idx != VARYING_SLOT_TESS_LEVEL_INNER &&
+                              idx != VARYING_SLOT_TESS_LEVEL_OUTER;
+      GLbitfield64 bitfield;
+
+      if (is_patch_generic) {
+         assert(idx >= VARYING_SLOT_PATCH0 && idx < VARYING_SLOT_TESS_MAX);
+         bitfield = BITFIELD64_BIT(idx - VARYING_SLOT_PATCH0);
+      }
+      else {
+         assert(idx < VARYING_SLOT_MAX);
+         bitfield = BITFIELD64_BIT(idx);
+      }
 
       if (var->data.mode == ir_var_shader_in) {
-         prog->InputsRead |= bitfield;
+         if (is_patch_generic)
+            prog->PatchInputsRead |= bitfield;
+         else
+            prog->InputsRead |= bitfield;
+
          if (dual_slot)
             prog->DoubleInputsRead |= bitfield;
          if (is_fragment_shader) {
@@ -122,7 +138,10 @@
          prog->SystemValuesRead |= bitfield;
       } else {
          assert(var->data.mode == ir_var_shader_out);
-	 prog->OutputsWritten |= bitfield;
+         if (is_patch_generic)
+            prog->PatchOutputsWritten |= bitfield;
+         else
+            prog->OutputsWritten |= bitfield;
       }
    }
 }
@@ -140,6 +159,24 @@
       type = type->fields.array;
    }
 
+   if (this->shader_stage == MESA_SHADER_TESS_CTRL &&
+       var->data.mode == ir_var_shader_in) {
+      assert(type->is_array());
+      type = type->fields.array;
+   }
+
+   if (this->shader_stage == MESA_SHADER_TESS_CTRL &&
+       var->data.mode == ir_var_shader_out && !var->data.patch) {
+      assert(type->is_array());
+      type = type->fields.array;
+   }
+
+   if (this->shader_stage == MESA_SHADER_TESS_EVAL &&
+       var->data.mode == ir_var_shader_in && !var->data.patch) {
+      assert(type->is_array());
+      type = type->fields.array;
+   }
+
    mark(this->prog, var, 0, type->count_attribute_slots(),
         this->shader_stage == MESA_SHADER_FRAGMENT);
 }
@@ -165,6 +202,9 @@
  *
  * *Except gl_PrimitiveIDIn, as noted below.
  *
+ * For tessellation control shaders all inputs and non-patch outputs are
+ * arrays. For tessellation evaluation shaders non-patch inputs are arrays.
+ *
  * If the index can't be interpreted as a constant, or some other problem
  * occurs, then nothing will be marked and false will be returned.
  */
@@ -184,6 +224,24 @@
       type = type->fields.array;
    }
 
+   if (this->shader_stage == MESA_SHADER_TESS_CTRL &&
+       var->data.mode == ir_var_shader_in) {
+      assert(type->is_array());
+      type = type->fields.array;
+   }
+
+   if (this->shader_stage == MESA_SHADER_TESS_CTRL &&
+       var->data.mode == ir_var_shader_out && !var->data.patch) {
+      assert(type->is_array());
+      type = type->fields.array;
+   }
+
+   if (this->shader_stage == MESA_SHADER_TESS_EVAL &&
+       var->data.mode == ir_var_shader_in && !var->data.patch) {
+      assert(type->is_array());
+      type = type->fields.array;
+   }
+
    /* The code below only handles:
     *
     * - Indexing into matrices
@@ -242,6 +300,22 @@
    return true;
 }
 
+static bool
+is_multiple_vertices(gl_shader_stage stage, ir_variable *var)
+{
+   if (var->data.patch)
+      return false;
+
+   if (var->data.mode == ir_var_shader_in)
+      return stage == MESA_SHADER_GEOMETRY ||
+             stage == MESA_SHADER_TESS_CTRL ||
+             stage == MESA_SHADER_TESS_EVAL;
+   if (var->data.mode == ir_var_shader_out)
+      return stage == MESA_SHADER_TESS_CTRL;
+
+   return false;
+}
+
 ir_visitor_status
 ir_set_program_inouts_visitor::visit_enter(ir_dereference_array *ir)
 {
@@ -256,10 +330,9 @@
        */
       if (ir_dereference_variable * const deref_var =
           inner_array->array->as_dereference_variable()) {
-         if (this->shader_stage == MESA_SHADER_GEOMETRY &&
-             deref_var->var->data.mode == ir_var_shader_in) {
-            /* foo is a geometry shader input, so i is the vertex, and j the
-             * part of the input we're accessing.
+         if (is_multiple_vertices(this->shader_stage, deref_var->var)) {
+            /* foo is a geometry or tessellation shader input, so i is
+             * the vertex, and j the part of the input we're accessing.
              */
             if (try_mark_partial_variable(deref_var->var, ir->array_index))
             {
@@ -275,10 +348,9 @@
    } else if (ir_dereference_variable * const deref_var =
               ir->array->as_dereference_variable()) {
       /* ir => foo[i], where foo is a variable. */
-      if (this->shader_stage == MESA_SHADER_GEOMETRY &&
-          deref_var->var->data.mode == ir_var_shader_in) {
-         /* foo is a geometry shader input, so i is the vertex, and we're
-          * accessing the entire input.
+      if (is_multiple_vertices(this->shader_stage, deref_var->var)) {
+         /* foo is a geometry or tessellation shader input, so i is
+          * the vertex, and we're accessing the entire input.
           */
          mark_whole_variable(deref_var->var);
          /* We've now taken care of foo, but i might contain a subexpression
@@ -353,6 +425,8 @@
 
    prog->InputsRead = 0;
    prog->OutputsWritten = 0;
+   prog->PatchInputsRead = 0;
+   prog->PatchOutputsWritten = 0;
    prog->SystemValuesRead = 0;
    if (shader_stage == MESA_SHADER_FRAGMENT) {
       gl_fragment_program *fprog = (gl_fragment_program *) prog;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_uniform.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_uniform.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_uniform.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_uniform.h	2015-09-16 14:36:09.000000000 +0000
@@ -114,6 +114,8 @@
 
    struct gl_opaque_uniform_index image[MESA_SHADER_STAGES];
 
+   struct gl_opaque_uniform_index subroutine[MESA_SHADER_STAGES];
+
    /**
     * Storage used by the driver for the uniform
     */
@@ -173,14 +175,25 @@
    /**
     * The 'base location' for this uniform in the uniform remap table. For
     * arrays this is the first element in the array.
+    * for subroutines this is in shader subroutine uniform remap table.
     */
    unsigned remap_location;
 
    /**
+    * The number of compatible subroutines with this subroutine uniform.
+    */
+   unsigned num_compatible_subroutines;
+
+   /**
     * This is a compiler-generated uniform that should not be advertised
     * via the API.
     */
    bool hidden;
+
+   /**
+    * This is a built-in uniform that should not be modified through any gl API.
+    */
+   bool builtin;
 };
 
 #ifdef __cplusplus
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_validate.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_validate.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_validate.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_validate.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -35,7 +35,8 @@
 
 #include "ir.h"
 #include "ir_hierarchical_visitor.h"
-#include "program/hash_table.h"
+#include "util/hash_table.h"
+#include "util/set.h"
 #include "glsl_types.h"
 
 namespace {
@@ -44,18 +45,18 @@
 public:
    ir_validate()
    {
-      this->ht = hash_table_ctor(0, hash_table_pointer_hash,
-				 hash_table_pointer_compare);
+      this->ir_set = _mesa_set_create(NULL, _mesa_hash_pointer,
+                                      _mesa_key_pointer_equal);
 
       this->current_function = NULL;
 
       this->callback_enter = ir_validate::validate_ir;
-      this->data_enter = ht;
+      this->data_enter = ir_set;
    }
 
    ~ir_validate()
    {
-      hash_table_dtor(this->ht);
+      _mesa_set_destroy(this->ir_set, NULL);
    }
 
    virtual ir_visitor_status visit(ir_variable *v);
@@ -80,7 +81,7 @@
 
    ir_function *current_function;
 
-   struct hash_table *ht;
+   struct set *ir_set;
 };
 
 } /* anonymous namespace */
@@ -94,7 +95,7 @@
       abort();
    }
 
-   if (hash_table_find(ht, ir->var) == NULL) {
+   if (_mesa_set_search(ir_set, ir->var) == NULL) {
       printf("ir_dereference_variable @ %p specifies undeclared variable "
 	     "`%s' @ %p\n",
 	     (void *) ir, ir->var->name, (void *) ir->var);
@@ -447,6 +448,10 @@
              ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
       assert(ir->type->base_type == GLSL_TYPE_INT);
       break;
+   case ir_unop_subroutine_to_int:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_SUBROUTINE);
+      assert(ir->type->base_type == GLSL_TYPE_INT);
+      break;
    case ir_binop_add:
    case ir_binop_sub:
    case ir_binop_mul:
@@ -730,8 +735,7 @@
    if (ir->name && ir->is_name_ralloced())
       assert(ralloc_parent(ir->name) == ir);
 
-   hash_table_insert(ht, ir, ir);
-
+   _mesa_set_add(ir_set, ir);
 
    /* If a variable is an array, verify that the maximum array index is in
     * bounds.  There was once an error in AST-to-HIR conversion that set this
@@ -885,15 +889,15 @@
 void
 ir_validate::validate_ir(ir_instruction *ir, void *data)
 {
-   struct hash_table *ht = (struct hash_table *) data;
+   struct set *ir_set = (struct set *) data;
 
-   if (hash_table_find(ht, ir)) {
+   if (_mesa_set_search(ir_set, ir)) {
       printf("Instruction node present twice in ir tree:\n");
       ir->print();
       printf("\n");
       abort();
    }
-   hash_table_insert(ht, ir, ir);
+   _mesa_set_add(ir_set, ir);
 }
 
 void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_visitor.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_visitor.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/ir_visitor.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/ir_visitor.h	2015-09-16 14:36:09.000000000 +0000
@@ -65,6 +65,7 @@
    virtual void visit(class ir_loop_jump *) = 0;
    virtual void visit(class ir_emit_vertex *) = 0;
    virtual void visit(class ir_end_primitive *) = 0;
+   virtual void visit(class ir_barrier *) = 0;
    /*@}*/
 };
 
@@ -85,6 +86,7 @@
    virtual void visit(class ir_call *) {}
    virtual void visit(class ir_emit_vertex *) {}
    virtual void visit(class ir_end_primitive *) {}
+   virtual void visit(class ir_barrier *) {}
 };
 #endif /* __cplusplus */
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_atomics.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_atomics.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_atomics.cpp	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_atomics.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -207,7 +207,7 @@
          storage->atomic_buffer_index = i;
          storage->offset = var->data.atomic.offset;
          storage->array_stride = (var->type->is_array() ?
-                                  var->type->element_type()->atomic_size() : 0);
+                                  var->type->without_array()->atomic_size() : 0);
       }
 
       /* Assign stage-specific fields. */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/linker.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/linker.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/linker.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/linker.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -224,7 +224,7 @@
          return visit_continue;
       }
 
-      var->type = glsl_type::get_array_instance(var->type->element_type(),
+      var->type = glsl_type::get_array_instance(var->type->fields.array,
                                                 this->num_vertices);
       var->data.max_array_access = this->num_vertices - 1;
 
@@ -245,11 +245,149 @@
    {
       const glsl_type *const vt = ir->array->type;
       if (vt->is_array())
-         ir->type = vt->element_type();
+         ir->type = vt->fields.array;
       return visit_continue;
    }
 };
 
+class tess_eval_array_resize_visitor : public ir_hierarchical_visitor {
+public:
+   unsigned num_vertices;
+   gl_shader_program *prog;
+
+   tess_eval_array_resize_visitor(unsigned num_vertices, gl_shader_program *prog)
+   {
+      this->num_vertices = num_vertices;
+      this->prog = prog;
+   }
+
+   virtual ~tess_eval_array_resize_visitor()
+   {
+      /* empty */
+   }
+
+   virtual ir_visitor_status visit(ir_variable *var)
+   {
+      if (!var->type->is_array() || var->data.mode != ir_var_shader_in || var->data.patch)
+         return visit_continue;
+
+      var->type = glsl_type::get_array_instance(var->type->fields.array,
+                                                this->num_vertices);
+      var->data.max_array_access = this->num_vertices - 1;
+
+      return visit_continue;
+   }
+
+   /* Dereferences of input variables need to be updated so that their type
+    * matches the newly assigned type of the variable they are accessing. */
+   virtual ir_visitor_status visit(ir_dereference_variable *ir)
+   {
+      ir->type = ir->var->type;
+      return visit_continue;
+   }
+
+   /* Dereferences of 2D input arrays need to be updated so that their type
+    * matches the newly assigned type of the array they are accessing. */
+   virtual ir_visitor_status visit_leave(ir_dereference_array *ir)
+   {
+      const glsl_type *const vt = ir->array->type;
+      if (vt->is_array())
+         ir->type = vt->fields.array;
+      return visit_continue;
+   }
+};
+
+class barrier_use_visitor : public ir_hierarchical_visitor {
+public:
+   barrier_use_visitor(gl_shader_program *prog)
+      : prog(prog), in_main(false), after_return(false), control_flow(0)
+   {
+   }
+
+   virtual ~barrier_use_visitor()
+   {
+      /* empty */
+   }
+
+   virtual ir_visitor_status visit_enter(ir_function *ir)
+   {
+      if (strcmp(ir->name, "main") == 0)
+         in_main = true;
+
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_leave(ir_function *ir)
+   {
+      in_main = false;
+      after_return = false;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_leave(ir_return *ir)
+   {
+      after_return = true;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_enter(ir_if *ir)
+   {
+      ++control_flow;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_leave(ir_if *ir)
+   {
+      --control_flow;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_enter(ir_loop *ir)
+   {
+      ++control_flow;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_leave(ir_loop *ir)
+   {
+      --control_flow;
+      return visit_continue;
+   }
+
+   /* FINISHME: `switch` is not expressed at the IR level -- it's already
+    * been lowered to a mess of `if`s. We'll correctly disallow any use of
+    * barrier() in a conditional path within the switch, but not in a path
+    * which is always hit.
+    */
+
+   virtual ir_visitor_status visit_enter(ir_call *ir)
+   {
+      if (ir->use_builtin && strcmp(ir->callee_name(), "barrier") == 0) {
+         /* Use of barrier(); determine if it is legal: */
+         if (!in_main) {
+            linker_error(prog, "Builtin barrier() may only be used in main");
+            return visit_stop;
+         }
+
+         if (after_return) {
+            linker_error(prog, "Builtin barrier() may not be used after return");
+            return visit_stop;
+         }
+
+         if (control_flow != 0) {
+            linker_error(prog, "Builtin barrier() may not be used inside control flow");
+            return visit_stop;
+         }
+      }
+      return visit_continue;
+   }
+
+private:
+   gl_shader_program *prog;
+   bool in_main, after_return;
+   int control_flow;
+};
+
 /**
  * Visitor that determines the highest stream id to which a (geometry) shader
  * emits vertices. It also checks whether End{Stream}Primitive is ever called.
@@ -462,6 +600,10 @@
    if (array_index < 0)
       return -1;
 
+   /* Check for leading zero */
+   if (name[i] == '0' && name[i+1] != ']')
+      return -1;
+
    *out_base_name_end = name + (i - 1);
    return array_index;
 }
@@ -615,6 +757,17 @@
                       &prog->Vert.ClipDistanceArraySize);
 }
 
+void
+validate_tess_eval_shader_executable(struct gl_shader_program *prog,
+                                     struct gl_shader *shader)
+{
+   if (shader == NULL)
+      return;
+
+   analyze_clip_usage(prog, shader, &prog->TessEval.UsesClipDistance,
+                      &prog->TessEval.ClipDistanceArraySize);
+}
+
 
 /**
  * Verify that a fragment shader executable meets all semantic requirements
@@ -777,9 +930,13 @@
 	 if (var == NULL)
 	    continue;
 
-	 if (uniforms_only && (var->data.mode != ir_var_uniform))
+	 if (uniforms_only && (var->data.mode != ir_var_uniform && var->data.mode != ir_var_shader_storage))
 	    continue;
 
+         /* don't cross validate subroutine uniforms */
+         if (var->type->contains_subroutine())
+            continue;
+
 	 /* Don't cross validate temporaries that are at global scope.  These
 	  * will eventually get pulled into the shaders 'main'.
 	  */
@@ -1250,8 +1407,7 @@
                resize_interface_members(var->type->fields.array,
                                         var->get_max_ifc_array_access());
             var->change_interface_type(new_type);
-            var->type =
-               glsl_type::get_array_instance(new_type, var->type->length);
+            var->type = update_interface_members_array(var->type, new_type);
          }
       } else if (const glsl_type *ifc_type = var->get_interface_type()) {
          /* Store a pointer to the variable in the unnamed_interfaces
@@ -1299,6 +1455,21 @@
       }
    }
 
+   static const glsl_type *
+   update_interface_members_array(const glsl_type *type,
+                                  const glsl_type *new_interface_type)
+   {
+      const glsl_type *element_type = type->fields.array;
+      if (element_type->is_array()) {
+         const glsl_type *new_array_type =
+            update_interface_members_array(element_type, new_interface_type);
+         return glsl_type::get_array_instance(new_array_type, type->length);
+      } else {
+         return glsl_type::get_array_instance(new_interface_type,
+                                              type->length);
+      }
+   }
+
    /**
     * Determine whether the given interface type contains unsized arrays (if
     * it doesn't, array_sizing_visitor doesn't need to process it).
@@ -1383,6 +1554,167 @@
    hash_table *unnamed_interfaces;
 };
 
+
+/**
+ * Performs the cross-validation of tessellation control shader vertices and
+ * layout qualifiers for the attached tessellation control shaders,
+ * and propagates them to the linked TCS and linked shader program.
+ */
+static void
+link_tcs_out_layout_qualifiers(struct gl_shader_program *prog,
+			      struct gl_shader *linked_shader,
+			      struct gl_shader **shader_list,
+			      unsigned num_shaders)
+{
+   linked_shader->TessCtrl.VerticesOut = 0;
+
+   if (linked_shader->Stage != MESA_SHADER_TESS_CTRL)
+      return;
+
+   /* From the GLSL 4.0 spec (chapter 4.3.8.2):
+    *
+    *     "All tessellation control shader layout declarations in a program
+    *      must specify the same output patch vertex count.  There must be at
+    *      least one layout qualifier specifying an output patch vertex count
+    *      in any program containing tessellation control shaders; however,
+    *      such a declaration is not required in all tessellation control
+    *      shaders."
+    */
+
+   for (unsigned i = 0; i < num_shaders; i++) {
+      struct gl_shader *shader = shader_list[i];
+
+      if (shader->TessCtrl.VerticesOut != 0) {
+	 if (linked_shader->TessCtrl.VerticesOut != 0 &&
+	     linked_shader->TessCtrl.VerticesOut != shader->TessCtrl.VerticesOut) {
+	    linker_error(prog, "tessellation control shader defined with "
+			 "conflicting output vertex count (%d and %d)\n",
+			 linked_shader->TessCtrl.VerticesOut,
+			 shader->TessCtrl.VerticesOut);
+	    return;
+	 }
+	 linked_shader->TessCtrl.VerticesOut = shader->TessCtrl.VerticesOut;
+      }
+   }
+
+   /* Just do the intrastage -> interstage propagation right now,
+    * since we already know we're in the right type of shader program
+    * for doing it.
+    */
+   if (linked_shader->TessCtrl.VerticesOut == 0) {
+      linker_error(prog, "tessellation control shader didn't declare "
+		   "vertices out layout qualifier\n");
+      return;
+   }
+   prog->TessCtrl.VerticesOut = linked_shader->TessCtrl.VerticesOut;
+}
+
+
+/**
+ * Performs the cross-validation of tessellation evaluation shader
+ * primitive type, vertex spacing, ordering and point_mode layout qualifiers
+ * for the attached tessellation evaluation shaders, and propagates them
+ * to the linked TES and linked shader program.
+ */
+static void
+link_tes_in_layout_qualifiers(struct gl_shader_program *prog,
+				struct gl_shader *linked_shader,
+				struct gl_shader **shader_list,
+				unsigned num_shaders)
+{
+   linked_shader->TessEval.PrimitiveMode = PRIM_UNKNOWN;
+   linked_shader->TessEval.Spacing = 0;
+   linked_shader->TessEval.VertexOrder = 0;
+   linked_shader->TessEval.PointMode = -1;
+
+   if (linked_shader->Stage != MESA_SHADER_TESS_EVAL)
+      return;
+
+   /* From the GLSL 4.0 spec (chapter 4.3.8.1):
+    *
+    *     "At least one tessellation evaluation shader (compilation unit) in
+    *      a program must declare a primitive mode in its input layout.
+    *      Declaration vertex spacing, ordering, and point mode identifiers is
+    *      optional.  It is not required that all tessellation evaluation
+    *      shaders in a program declare a primitive mode.  If spacing or
+    *      vertex ordering declarations are omitted, the tessellation
+    *      primitive generator will use equal spacing or counter-clockwise
+    *      vertex ordering, respectively.  If a point mode declaration is
+    *      omitted, the tessellation primitive generator will produce lines or
+    *      triangles according to the primitive mode."
+    */
+
+   for (unsigned i = 0; i < num_shaders; i++) {
+      struct gl_shader *shader = shader_list[i];
+
+      if (shader->TessEval.PrimitiveMode != PRIM_UNKNOWN) {
+	 if (linked_shader->TessEval.PrimitiveMode != PRIM_UNKNOWN &&
+	     linked_shader->TessEval.PrimitiveMode != shader->TessEval.PrimitiveMode) {
+	    linker_error(prog, "tessellation evaluation shader defined with "
+			 "conflicting input primitive modes.\n");
+	    return;
+	 }
+	 linked_shader->TessEval.PrimitiveMode = shader->TessEval.PrimitiveMode;
+      }
+
+      if (shader->TessEval.Spacing != 0) {
+	 if (linked_shader->TessEval.Spacing != 0 &&
+	     linked_shader->TessEval.Spacing != shader->TessEval.Spacing) {
+	    linker_error(prog, "tessellation evaluation shader defined with "
+			 "conflicting vertex spacing.\n");
+	    return;
+	 }
+	 linked_shader->TessEval.Spacing = shader->TessEval.Spacing;
+      }
+
+      if (shader->TessEval.VertexOrder != 0) {
+	 if (linked_shader->TessEval.VertexOrder != 0 &&
+	     linked_shader->TessEval.VertexOrder != shader->TessEval.VertexOrder) {
+	    linker_error(prog, "tessellation evaluation shader defined with "
+			 "conflicting ordering.\n");
+	    return;
+	 }
+	 linked_shader->TessEval.VertexOrder = shader->TessEval.VertexOrder;
+      }
+
+      if (shader->TessEval.PointMode != -1) {
+	 if (linked_shader->TessEval.PointMode != -1 &&
+	     linked_shader->TessEval.PointMode != shader->TessEval.PointMode) {
+	    linker_error(prog, "tessellation evaluation shader defined with "
+			 "conflicting point modes.\n");
+	    return;
+	 }
+	 linked_shader->TessEval.PointMode = shader->TessEval.PointMode;
+      }
+
+   }
+
+   /* Just do the intrastage -> interstage propagation right now,
+    * since we already know we're in the right type of shader program
+    * for doing it.
+    */
+   if (linked_shader->TessEval.PrimitiveMode == PRIM_UNKNOWN) {
+      linker_error(prog,
+		   "tessellation evaluation shader didn't declare input "
+		   "primitive modes.\n");
+      return;
+   }
+   prog->TessEval.PrimitiveMode = linked_shader->TessEval.PrimitiveMode;
+
+   if (linked_shader->TessEval.Spacing == 0)
+      linked_shader->TessEval.Spacing = GL_EQUAL;
+   prog->TessEval.Spacing = linked_shader->TessEval.Spacing;
+
+   if (linked_shader->TessEval.VertexOrder == 0)
+      linked_shader->TessEval.VertexOrder = GL_CCW;
+   prog->TessEval.VertexOrder = linked_shader->TessEval.VertexOrder;
+
+   if (linked_shader->TessEval.PointMode == -1)
+      linked_shader->TessEval.PointMode = GL_FALSE;
+   prog->TessEval.PointMode = linked_shader->TessEval.PointMode;
+}
+
+
 /**
  * Performs the cross-validation of layout qualifiers specified in
  * redeclaration of gl_FragCoord for the attached fragment shaders,
@@ -1433,8 +1765,8 @@
                       "layout qualifiers for gl_FragCoord\n");
       }
 
-      /* Update the linked shader state.  Note that uses_gl_fragcoord should
-       * accumulate the results.  The other values should replace.  If there
+      /* Update the linked shader state.  Note that uses_gl_fragcoord should
+       * accumulate the results.  The other values should replace.  If there
        * are multiple redeclarations, all the fields except uses_gl_fragcoord
        * are already known to be the same.
        */
@@ -1729,6 +2061,8 @@
    ralloc_steal(linked, linked->UniformBlocks);
 
    link_fs_input_layout_qualifiers(prog, linked, shader_list, num_shaders);
+   link_tcs_out_layout_qualifiers(prog, linked, shader_list, num_shaders);
+   link_tes_in_layout_qualifiers(prog, linked, shader_list, num_shaders);
    link_gs_inout_layout_qualifiers(prog, linked, shader_list, num_shaders);
    link_cs_input_layout_qualifiers(prog, linked, shader_list, num_shaders);
 
@@ -1811,6 +2145,14 @@
    if (ctx->Const.VertexID_is_zero_based)
       lower_vertex_id(linked);
 
+   /* Validate correct usage of barrier() in the tess control shader */
+   if (linked->Stage == MESA_SHADER_TESS_CTRL) {
+      barrier_use_visitor visitor(prog);
+      foreach_in_list(ir_instruction, ir, linked->ir) {
+         ir->accept(&visitor);
+      }
+   }
+
    /* Make a pass over all variable declarations to ensure that arrays with
     * unspecified sizes have a size specified.  The size is inferred from the
     * max_array_access field.
@@ -1858,8 +2200,11 @@
           * Atomic counters are supposed to get deterministic
           * locations assigned based on the declaration ordering and
           * sizes, array compaction would mess that up.
+          *
+          * Subroutine uniforms are not removed.
 	  */
-	 if (var->is_in_uniform_block() || var->type->contains_atomic())
+	 if (var->is_in_buffer_block() || var->type->contains_atomic() ||
+	     var->type->contains_subroutine())
 	    continue;
 
 	 unsigned int size = var->data.max_array_access;
@@ -1905,6 +2250,34 @@
 }
 
 /**
+ * Resize tessellation evaluation per-vertex inputs to the size of
+ * tessellation control per-vertex outputs.
+ */
+static void
+resize_tes_inputs(struct gl_context *ctx,
+                  struct gl_shader_program *prog)
+{
+   if (prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] == NULL)
+      return;
+
+   gl_shader *const tcs = prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
+   gl_shader *const tes = prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
+
+   /* If no control shader is present, then the TES inputs are statically
+    * sized to MaxPatchVertices; the actual size of the arrays won't be
+    * known until draw time.
+    */
+   const int num_vertices = tcs
+      ? tcs->TessCtrl.VerticesOut
+      : ctx->Const.MaxPatchVertices;
+
+   tess_eval_array_resize_visitor input_resize_visitor(num_vertices, prog);
+   foreach_in_list(ir_instruction, ir, tes->ir) {
+      ir->accept(&input_resize_visitor);
+   }
+}
+
+/**
  * Find a contiguous set of available bits in a bitmask.
  *
  * \param used_mask     Bits representing used (1) and unused (0) locations
@@ -1940,12 +2313,10 @@
  * Assign locations for either VS inputs or FS outputs
  *
  * \param prog          Shader program whose variables need locations assigned
+ * \param constants     Driver specific constant values for the program.
  * \param target_index  Selector for the program target to receive location
  *                      assignmnets.  Must be either \c MESA_SHADER_VERTEX or
  *                      \c MESA_SHADER_FRAGMENT.
- * \param max_index     Maximum number of generic locations.  This corresponds
- *                      to either the maximum number of draw buffers or the
- *                      maximum number of generic attributes.
  *
  * \return
  * If locations are successfully assigned, true is returned.  Otherwise an
@@ -1953,13 +2324,22 @@
  */
 bool
 assign_attribute_or_color_locations(gl_shader_program *prog,
-				    unsigned target_index,
-				    unsigned max_index)
+                                    struct gl_constants *constants,
+                                    unsigned target_index)
 {
+   /* Maximum number of generic locations.  This corresponds to either the
+    * maximum number of draw buffers or the maximum number of generic
+    * attributes.
+    */
+   unsigned max_index = (target_index == MESA_SHADER_VERTEX) ?
+      constants->Program[target_index].MaxAttribs :
+      MAX2(constants->MaxDrawBuffers, constants->MaxDualSourceDrawBuffers);
+
    /* Mark invalid locations as being used.
     */
    unsigned used_locations = (max_index >= 32)
       ? ~0 : ~((1 << max_index) - 1);
+   unsigned double_storage_locations = 0;
 
    assert((target_index == MESA_SHADER_VERTEX)
 	  || (target_index == MESA_SHADER_FRAGMENT));
@@ -2052,36 +2432,27 @@
 	 }
       }
 
-      const unsigned slots = var->type->count_attribute_slots();
-
-      /* From GL4.5 core spec, section 11.1.1 (Vertex Attributes):
+      /* From GL4.5 core spec, section 15.2 (Shader Execution):
        *
-       * "A program with more than the value of MAX_VERTEX_ATTRIBS active
-       * attribute variables may fail to link, unless device-dependent
-       * optimizations are able to make the program fit within available
-       * hardware resources. For the purposes of this test, attribute variables
-       * of the type dvec3, dvec4, dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3,
-       * and dmat4 may count as consuming twice as many attributes as equivalent
-       * single-precision types. While these types use the same number of
-       * generic attributes as their single-precision equivalents,
-       * implementations are permitted to consume two single-precision vectors
-       * of internal storage for each three- or four-component double-precision
-       * vector."
-       * Until someone has a good reason in Mesa, enforce that now.
-       */
-      if (target_index == MESA_SHADER_VERTEX) {
-	 total_attribs_size += slots;
-	 if (var->type->without_array() == glsl_type::dvec3_type ||
-	     var->type->without_array() == glsl_type::dvec4_type ||
-	     var->type->without_array() == glsl_type::dmat2x3_type ||
-	     var->type->without_array() == glsl_type::dmat2x4_type ||
-	     var->type->without_array() == glsl_type::dmat3_type ||
-	     var->type->without_array() == glsl_type::dmat3x4_type ||
-	     var->type->without_array() == glsl_type::dmat4x3_type ||
-	     var->type->without_array() == glsl_type::dmat4_type)
-	    total_attribs_size += slots;
+       *     "Output binding assignments will cause LinkProgram to fail:
+       *     ...
+       *     If the program has an active output assigned to a location greater
+       *     than or equal to the value of MAX_DUAL_SOURCE_DRAW_BUFFERS and has
+       *     an active output assigned an index greater than or equal to one;"
+       */
+      if (target_index == MESA_SHADER_FRAGMENT && var->data.index >= 1 &&
+          var->data.location - generic_base >=
+          (int) constants->MaxDualSourceDrawBuffers) {
+         linker_error(prog,
+                      "output location %d >= GL_MAX_DUAL_SOURCE_DRAW_BUFFERS "
+                      "with index %u for %s\n",
+                      var->data.location - generic_base, var->data.index,
+                      var->name);
+         return false;
       }
 
+      const unsigned slots = var->type->count_attribute_slots();
+
       /* If the variable is not a built-in and has a location statically
        * assigned in the shader (presumably via a layout qualifier), make sure
        * that it doesn't collide with other assigned locations.  Otherwise,
@@ -2196,6 +2567,38 @@
 	    }
 
 	    used_locations |= (use_mask << attr);
+
+            /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
+             *
+             * "A program with more than the value of MAX_VERTEX_ATTRIBS
+             *  active attribute variables may fail to link, unless
+             *  device-dependent optimizations are able to make the program
+             *  fit within available hardware resources. For the purposes
+             *  of this test, attribute variables of the type dvec3, dvec4,
+             *  dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
+             *  count as consuming twice as many attributes as equivalent
+             *  single-precision types. While these types use the same number
+             *  of generic attributes as their single-precision equivalents,
+             *  implementations are permitted to consume two single-precision
+             *  vectors of internal storage for each three- or four-component
+             *  double-precision vector."
+             *
+             * Mark this attribute slot as taking up twice as much space
+             * so we can count it properly against limits.  According to
+             * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
+             * is optional behavior, but it seems preferable.
+             */
+            const glsl_type *type = var->type->without_array();
+            if (type == glsl_type::dvec3_type ||
+                type == glsl_type::dvec4_type ||
+                type == glsl_type::dmat2x3_type ||
+                type == glsl_type::dmat2x4_type ||
+                type == glsl_type::dmat3_type ||
+                type == glsl_type::dmat3x4_type ||
+                type == glsl_type::dmat4x3_type ||
+                type == glsl_type::dmat4_type) {
+               double_storage_locations |= (use_mask << attr);
+            }
 	 }
 
 	 continue;
@@ -2207,6 +2610,9 @@
    }
 
    if (target_index == MESA_SHADER_VERTEX) {
+      unsigned total_attribs_size =
+         _mesa_bitcount(used_locations & ((1 << max_index) - 1)) +
+         _mesa_bitcount(double_storage_locations);
       if (total_attribs_size > max_index) {
 	 linker_error(prog,
 		      "attempt to use %d vertex attribute slots only %d available ",
@@ -2388,6 +2794,13 @@
    unsigned total_uniform_blocks = 0;
 
    for (unsigned i = 0; i < prog->NumUniformBlocks; i++) {
+      if (prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) {
+         linker_error(prog, "Uniform block %s too big (%d/%d)\n",
+                      prog->UniformBlocks[i].Name,
+                      prog->UniformBlocks[i].UniformBufferSize,
+                      ctx->Const.MaxUniformBlockSize);
+      }
+
       for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
 	 if (prog->UniformBlockStageIndex[j][i] != -1) {
 	    blocks[j]++;
@@ -2415,6 +2828,49 @@
    }
 }
 
+static void
+link_calculate_subroutine_compat(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_shader *sh = prog->_LinkedShaders[i];
+      int count;
+      if (!sh)
+         continue;
+
+      for (unsigned j = 0; j < sh->NumSubroutineUniformRemapTable; j++) {
+         struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[j];
+
+         if (!uni)
+            continue;
+
+         count = 0;
+         for (unsigned f = 0; f < sh->NumSubroutineFunctions; f++) {
+            struct gl_subroutine_function *fn = &sh->SubroutineFunctions[f];
+            for (int k = 0; k < fn->num_compat_types; k++) {
+               if (fn->types[k] == uni->type) {
+                  count++;
+                  break;
+               }
+            }
+         }
+         uni->num_compatible_subroutines = count;
+      }
+   }
+}
+
+static void
+check_subroutine_resources(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_shader *sh = prog->_LinkedShaders[i];
+
+      if (sh) {
+         if (sh->NumSubroutineUniformRemapTable > MAX_SUBROUTINE_UNIFORM_LOCATIONS)
+            linker_error(prog, "Too many %s shader subroutine uniforms\n",
+                         _mesa_shader_stage_to_string(i));
+      }
+   }
+}
 /**
  * Validate shader image resources.
  */
@@ -2432,8 +2888,9 @@
 
       if (sh) {
          if (sh->NumImages > ctx->Const.Program[i].MaxImageUniforms)
-            linker_error(prog, "Too many %s shader image uniforms\n",
-                         _mesa_shader_stage_to_string(i));
+            linker_error(prog, "Too many %s shader image uniforms (%u > %u)\n",
+                         _mesa_shader_stage_to_string(i), sh->NumImages,
+                         ctx->Const.Program[i].MaxImageUniforms);
 
          total_image_units += sh->NumImages;
 
@@ -2451,7 +2908,7 @@
       linker_error(prog, "Too many combined image uniforms\n");
 
    if (total_image_units + fragment_outputs >
-       ctx->Const.MaxCombinedImageUnitsAndFragmentOutputs)
+       ctx->Const.MaxCombinedShaderOutputResources)
       linker_error(prog, "Too many combined image uniforms and fragment outputs\n");
 }
 
@@ -2523,6 +2980,59 @@
    return true;
 }
 
+static bool
+reserve_subroutine_explicit_locations(struct gl_shader_program *prog,
+                                      struct gl_shader *sh,
+                                      ir_variable *var)
+{
+   unsigned slots = var->type->uniform_locations();
+   unsigned max_loc = var->data.location + slots - 1;
+
+   /* Resize remap table if locations do not fit in the current one. */
+   if (max_loc + 1 > sh->NumSubroutineUniformRemapTable) {
+      sh->SubroutineUniformRemapTable =
+         reralloc(sh, sh->SubroutineUniformRemapTable,
+                  gl_uniform_storage *,
+                  max_loc + 1);
+
+      if (!sh->SubroutineUniformRemapTable) {
+         linker_error(prog, "Out of memory during linking.\n");
+         return false;
+      }
+
+      /* Initialize allocated space. */
+      for (unsigned i = sh->NumSubroutineUniformRemapTable; i < max_loc + 1; i++)
+         sh->SubroutineUniformRemapTable[i] = NULL;
+
+      sh->NumSubroutineUniformRemapTable = max_loc + 1;
+   }
+
+   for (unsigned i = 0; i < slots; i++) {
+      unsigned loc = var->data.location + i;
+
+      /* Check if location is already used. */
+      if (sh->SubroutineUniformRemapTable[loc] == INACTIVE_UNIFORM_EXPLICIT_LOCATION) {
+
+         /* ARB_explicit_uniform_location specification states:
+          *     "No two subroutine uniform variables can have the same location
+          *     in the same shader stage, otherwise a compiler or linker error
+          *     will be generated."
+          */
+         linker_error(prog,
+                      "location qualifier for uniform %s overlaps "
+                      "previously used location\n",
+                      var->name);
+         return false;
+      }
+
+      /* Initialize location as inactive before optimization
+       * rounds and location assignment.
+       */
+      sh->SubroutineUniformRemapTable[loc] = INACTIVE_UNIFORM_EXPLICIT_LOCATION;
+   }
+
+   return true;
+}
 /**
  * Check and reserve all explicit uniform locations, called before
  * any optimizations happen to handle also inactive uniforms and
@@ -2553,9 +3063,14 @@
 
       foreach_in_list(ir_instruction, node, sh->ir) {
          ir_variable *var = node->as_variable();
-         if ((var && var->data.mode == ir_var_uniform) &&
+         if (var && (var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage) &&
              var->data.explicit_location) {
-            if (!reserve_explicit_locations(prog, uniform_map, var)) {
+            bool ret;
+            if (var->type->is_subroutine())
+               ret = reserve_subroutine_explicit_locations(prog, sh, var);
+            else
+               ret = reserve_explicit_locations(prog, uniform_map, var);
+            if (!ret) {
                delete uniform_map;
                return;
             }
@@ -2604,7 +3119,8 @@
  * Function builds a stage reference bitmask from variable name.
  */
 static uint8_t
-build_stageref(struct gl_shader_program *shProg, const char *name)
+build_stageref(struct gl_shader_program *shProg, const char *name,
+               unsigned mode)
 {
    uint8_t stages = 0;
 
@@ -2617,9 +3133,34 @@
       struct gl_shader *sh = shProg->_LinkedShaders[i];
       if (!sh)
          continue;
-      ir_variable *var = sh->symbols->get_variable(name);
-      if (var)
-         stages |= (1 << i);
+
+      /* Shader symbol table may contain variables that have
+       * been optimized away. Search IR for the variable instead.
+       */
+      foreach_in_list(ir_instruction, node, sh->ir) {
+         ir_variable *var = node->as_variable();
+         if (var) {
+            unsigned baselen = strlen(var->name);
+
+            /* Type needs to match if specified, otherwise we might
+             * pick a variable with same name but different interface.
+             */
+            if (var->data.mode != mode)
+               continue;
+
+            if (strncmp(var->name, name, baselen) == 0) {
+               /* Check for exact name matches but also check for arrays and
+                * structs.
+                */
+               if (name[baselen] == '\0' ||
+                   name[baselen] == '[' ||
+                   name[baselen] == '.') {
+                  stages |= (1 << i);
+                  break;
+               }
+            }
+         }
+      }
    }
    return stages;
 }
@@ -2664,7 +3205,8 @@
       };
 
       if (!add_program_resource(shProg, programInterface, var,
-                                build_stageref(shProg, var->name) | mask))
+                                build_stageref(shProg, var->name,
+                                               var->data.mode) | mask))
          return false;
    }
    return true;
@@ -2674,7 +3216,7 @@
  * Builds up a list of program resources that point to existing
  * resource data.
  */
-static void
+void
 build_program_resource_list(struct gl_context *ctx,
                             struct gl_shader_program *shProg)
 {
@@ -2715,24 +3257,22 @@
    /* Add transform feedback varyings. */
    if (shProg->LinkedTransformFeedback.NumVarying > 0) {
       for (int i = 0; i < shProg->LinkedTransformFeedback.NumVarying; i++) {
-         uint8_t stageref =
-            build_stageref(shProg,
-                           shProg->LinkedTransformFeedback.Varyings[i].Name);
          if (!add_program_resource(shProg, GL_TRANSFORM_FEEDBACK_VARYING,
                                    &shProg->LinkedTransformFeedback.Varyings[i],
-                                   stageref))
+                                   0))
          return;
       }
    }
 
    /* Add uniforms from uniform storage. */
-   for (unsigned i = 0; i < shProg->NumUserUniformStorage; i++) {
+   for (unsigned i = 0; i < shProg->NumUniformStorage; i++) {
       /* Do not add uniforms internally used by Mesa. */
       if (shProg->UniformStorage[i].hidden)
          continue;
 
       uint8_t stageref =
-         build_stageref(shProg, shProg->UniformStorage[i].name);
+         build_stageref(shProg, shProg->UniformStorage[i].name,
+                        ir_var_uniform);
 
       /* Add stagereferences for uniforms in a uniform block. */
       int block_index = shProg->UniformStorage[i].block_index;
@@ -2762,10 +3302,39 @@
          return;
    }
 
+   for (unsigned i = 0; i < shProg->NumUniformStorage; i++) {
+      GLenum type;
+      if (!shProg->UniformStorage[i].hidden)
+         continue;
+
+      for (int j = MESA_SHADER_VERTEX; j < MESA_SHADER_STAGES; j++) {
+         if (!shProg->UniformStorage[i].subroutine[j].active)
+            continue;
+
+         type = _mesa_shader_stage_to_subroutine_uniform((gl_shader_stage)j);
+         /* add shader subroutines */
+         if (!add_program_resource(shProg, type, &shProg->UniformStorage[i], 0))
+            return;
+      }
+   }
+
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_shader *sh = shProg->_LinkedShaders[i];
+      GLuint type;
+
+      if (!sh)
+         continue;
+
+      type = _mesa_shader_stage_to_subroutine((gl_shader_stage)i);
+      for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) {
+         if (!add_program_resource(shProg, type, &sh->SubroutineFunctions[j], 0))
+            return;
+      }
+   }
+
    /* TODO - following extensions will require more resource types:
     *
     *    GL_ARB_shader_storage_buffer_object
-    *    GL_ARB_shader_subroutine
     */
 }
 
@@ -2803,6 +3372,41 @@
    return true;
 }
 
+void
+link_assign_subroutine_types(struct gl_context *ctx,
+                             struct gl_shader_program *prog)
+{
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      gl_shader *sh = prog->_LinkedShaders[i];
+
+      if (sh == NULL)
+         continue;
+
+      foreach_in_list(ir_instruction, node, sh->ir) {
+         ir_function *fn = node->as_function();
+         if (!fn)
+            continue;
+
+         if (fn->is_subroutine)
+            sh->NumSubroutineUniformTypes++;
+
+         if (!fn->num_subroutine_types)
+            continue;
+
+         sh->SubroutineFunctions = reralloc(sh, sh->SubroutineFunctions,
+                                            struct gl_subroutine_function,
+                                            sh->NumSubroutineFunctions + 1);
+         sh->SubroutineFunctions[sh->NumSubroutineFunctions].name = ralloc_strdup(sh, fn->name);
+         sh->SubroutineFunctions[sh->NumSubroutineFunctions].num_compat_types = fn->num_subroutine_types;
+         sh->SubroutineFunctions[sh->NumSubroutineFunctions].types =
+            ralloc_array(sh, const struct glsl_type *,
+                         fn->num_subroutine_types);
+         for (int j = 0; j < fn->num_subroutine_types; j++)
+            sh->SubroutineFunctions[sh->NumSubroutineFunctions].types[j] = fn->subroutine_types[j];
+         sh->NumSubroutineFunctions++;
+      }
+   }
+}
 
 void
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
@@ -2864,7 +3468,7 @@
    prog->Version = max_version;
    prog->IsES = is_es_prog;
 
-   /* Geometry shaders have to be linked with vertex shaders.
+   /* Some shaders have to be linked with some other shaders present.
     */
    if (num_shaders[MESA_SHADER_GEOMETRY] > 0 &&
        num_shaders[MESA_SHADER_VERTEX] == 0 &&
@@ -2873,6 +3477,44 @@
 		   "vertex shader\n");
       goto done;
    }
+   if (num_shaders[MESA_SHADER_TESS_EVAL] > 0 &&
+       num_shaders[MESA_SHADER_VERTEX] == 0 &&
+       !prog->SeparateShader) {
+      linker_error(prog, "Tessellation evaluation shader must be linked with "
+		   "vertex shader\n");
+      goto done;
+   }
+   if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 &&
+       num_shaders[MESA_SHADER_VERTEX] == 0 &&
+       !prog->SeparateShader) {
+      linker_error(prog, "Tessellation control shader must be linked with "
+		   "vertex shader\n");
+      goto done;
+   }
+
+   /* The spec is self-contradictory here. It allows linking without a tess
+    * eval shader, but that can only be used with transform feedback and
+    * rasterization disabled. However, transform feedback isn't allowed
+    * with GL_PATCHES, so it can't be used.
+    *
+    * More investigation showed that the idea of transform feedback after
+    * a tess control shader was dropped, because some hw vendors couldn't
+    * support tessellation without a tess eval shader, but the linker section
+    * wasn't updated to reflect that.
+    *
+    * All specifications (ARB_tessellation_shader, GL 4.0-4.5) have this
+    * spec bug.
+    *
+    * Do what's reasonable and always require a tess eval shader if a tess
+    * control shader is present.
+    */
+   if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 &&
+       num_shaders[MESA_SHADER_TESS_EVAL] == 0 &&
+       !prog->SeparateShader) {
+      linker_error(prog, "Tessellation control shader must be linked with "
+		   "tessellation evaluation shader\n");
+      goto done;
+   }
 
    /* Compute shaders have additional restrictions. */
    if (num_shaders[MESA_SHADER_COMPUTE] > 0 &&
@@ -2906,6 +3548,12 @@
          case MESA_SHADER_VERTEX:
             validate_vertex_shader_executable(prog, sh);
             break;
+         case MESA_SHADER_TESS_CTRL:
+            /* nothing to be done */
+            break;
+         case MESA_SHADER_TESS_EVAL:
+            validate_tess_eval_shader_executable(prog, sh);
+            break;
          case MESA_SHADER_GEOMETRY:
             validate_geometry_shader_executable(prog, sh);
             break;
@@ -2925,6 +3573,8 @@
 
    if (num_shaders[MESA_SHADER_GEOMETRY] > 0)
       prog->LastClipDistanceArraySize = prog->Geom.ClipDistanceArraySize;
+   else if (num_shaders[MESA_SHADER_TESS_EVAL] > 0)
+      prog->LastClipDistanceArraySize = prog->TessEval.ClipDistanceArraySize;
    else if (num_shaders[MESA_SHADER_VERTEX] > 0)
       prog->LastClipDistanceArraySize = prog->Vert.ClipDistanceArraySize;
    else
@@ -2946,9 +3596,13 @@
    }
 
    check_explicit_uniform_locations(ctx, prog);
+   link_assign_subroutine_types(ctx, prog);
+
    if (!prog->LinkStatus)
       goto done;
 
+   resize_tes_inputs(ctx, prog);
+
    /* Validate the inputs of each stage with the output of the preceding
     * stage.
     */
@@ -3013,6 +3667,10 @@
          lower_clip_distance(prog->_LinkedShaders[i]);
       }
 
+      if (ctx->Const.LowerTessLevel) {
+         lower_tess_level(prog->_LinkedShaders[i]);
+      }
+
       while (do_common_optimization(prog->_LinkedShaders[i]->ir, true, false,
                                     &ctx->Const.ShaderCompilerOptions[i],
                                     ctx->Const.NativeIntegers))
@@ -3041,16 +3699,13 @@
       }
    }
 
-   /* FINISHME: The value of the max_attribute_index parameter is
-    * FINISHME: implementation dependent based on the value of
-    * FINISHME: GL_MAX_VERTEX_ATTRIBS.  GL_MAX_VERTEX_ATTRIBS must be
-    * FINISHME: at least 16, so hardcode 16 for now.
-    */
-   if (!assign_attribute_or_color_locations(prog, MESA_SHADER_VERTEX, 16)) {
+   if (!assign_attribute_or_color_locations(prog, &ctx->Const,
+                                            MESA_SHADER_VERTEX)) {
       goto done;
    }
 
-   if (!assign_attribute_or_color_locations(prog, MESA_SHADER_FRAGMENT, MAX2(ctx->Const.MaxDrawBuffers, ctx->Const.MaxDualSourceDrawBuffers))) {
+   if (!assign_attribute_or_color_locations(prog, &ctx->Const,
+                                            MESA_SHADER_FRAGMENT)) {
       goto done;
    }
 
@@ -3109,8 +3764,7 @@
           */
          if (!assign_varying_locations(ctx, mem_ctx, prog,
                                        NULL, prog->_LinkedShaders[first],
-                                       num_tfeedback_decls, tfeedback_decls,
-                                       prog->Geom.VerticesIn))
+                                       num_tfeedback_decls, tfeedback_decls))
             goto done;
       }
 
@@ -3121,8 +3775,7 @@
           */
          if (!assign_varying_locations(ctx, mem_ctx, prog,
                                        sh, NULL,
-                                       num_tfeedback_decls, tfeedback_decls,
-                                       0))
+                                       num_tfeedback_decls, tfeedback_decls))
             goto done;
       }
 
@@ -3150,8 +3803,7 @@
                                        NULL /* producer */,
                                        sh /* consumer */,
                                        0 /* num_tfeedback_decls */,
-                                       NULL /* tfeedback_decls */,
-                                       0 /* gs_input_vertices */))
+                                       NULL /* tfeedback_decls */))
             goto done;
       } else
          demote_shader_inputs_and_outputs(sh, ir_var_shader_in);
@@ -3167,12 +3819,10 @@
 
       gl_shader *const sh_i = prog->_LinkedShaders[i];
       gl_shader *const sh_next = prog->_LinkedShaders[next];
-      unsigned gs_input_vertices =
-         next == MESA_SHADER_GEOMETRY ? prog->Geom.VerticesIn : 0;
 
       if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next,
                 next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
-                tfeedback_decls, gs_input_vertices))
+                tfeedback_decls))
          goto done;
 
       do_dead_builtin_varyings(ctx, sh_i, sh_next,
@@ -3206,7 +3856,9 @@
    link_assign_atomic_counter_resources(ctx, prog);
    store_fragdepth_layout(prog);
 
+   link_calculate_subroutine_compat(ctx, prog);
    check_resources(ctx, prog);
+   check_subroutine_resources(ctx, prog);
    check_image_resources(ctx, prog);
    link_check_atomic_counter_resources(ctx, prog);
 
@@ -3227,10 +3879,6 @@
       }
    }
 
-   build_program_resource_list(ctx, prog);
-   if (!prog->LinkStatus)
-      goto done;
-
    /* FINISHME: Assign fragment shader output locations. */
 
 done:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_interface_blocks.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_interface_blocks.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_interface_blocks.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_interface_blocks.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -112,7 +112,8 @@
     * it's not clear from the spec whether they need to match, but
     * Mesa's implementation relies on them matching.
     */
-   if (a->instance_name != NULL && mode != ir_var_uniform &&
+   if (a->instance_name != NULL &&
+       mode != ir_var_uniform && mode != ir_var_shader_storage &&
        strcmp(a->instance_name, b->instance_name) != 0) {
       return false;
    }
@@ -133,9 +134,9 @@
  * Check if two interfaces match, according to interstage (in/out) interface
  * matching rules.
  *
- * If \c extra_array_level is true, then vertex-to-geometry shader matching
- * rules are enforced (i.e. a successful match requires the consumer interface
- * to be an array and the producer interface to be a non-array).
+ * If \c extra_array_level is true, the consumer interface is required to be
+ * an array and the producer interface is required to be a non-array.
+ * This is used for tessellation control and geometry shader consumers.
  */
 bool
 interstage_match(const interface_block_definition *producer,
@@ -253,6 +254,7 @@
    interface_block_definitions in_interfaces;
    interface_block_definitions out_interfaces;
    interface_block_definitions uniform_interfaces;
+   interface_block_definitions buffer_interfaces;
 
    for (unsigned int i = 0; i < num_shaders; i++) {
       if (shader_list[i] == NULL)
@@ -279,6 +281,9 @@
          case ir_var_uniform:
             definitions = &uniform_interfaces;
             break;
+         case ir_var_shader_storage:
+            definitions = &buffer_interfaces;
+            break;
          default:
             /* Only in, out, and uniform interfaces are legal, so we should
              * never get here.
@@ -313,7 +318,10 @@
                                  const gl_shader *consumer)
 {
    interface_block_definitions definitions;
-   const bool extra_array_level = consumer->Stage == MESA_SHADER_GEOMETRY;
+   /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
+   const bool extra_array_level = (producer->Stage == MESA_SHADER_VERTEX &&
+                                   consumer->Stage != MESA_SHADER_FRAGMENT) ||
+                                  consumer->Stage == MESA_SHADER_GEOMETRY;
 
    /* Add input interfaces from the consumer to the symbol table. */
    foreach_in_list(ir_instruction, node, consumer->ir) {
@@ -361,7 +369,9 @@
       const gl_shader *stage = stages[i];
       foreach_in_list(ir_instruction, node, stage->ir) {
          ir_variable *var = node->as_variable();
-         if (!var || !var->get_interface_type() || var->data.mode != ir_var_uniform)
+         if (!var || !var->get_interface_type() ||
+             (var->data.mode != ir_var_uniform &&
+              var->data.mode != ir_var_shader_storage))
             continue;
 
          interface_block_definition *old_def =
@@ -374,7 +384,9 @@
              * uniform matchin rules (for uniforms, it is as though all
              * shaders are in the same shader stage).
              */
-            if (!intrastage_match(old_def, &new_def, ir_var_uniform, prog)) {
+            if (!intrastage_match(old_def, &new_def,
+                                  (ir_variable_mode) var->data.mode,
+                                  prog)) {
                linker_error(prog, "definitions of interface block `%s' do not "
                             "match\n", var->get_interface_type()->name);
                return;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_uniform_block_active_visitor.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_uniform_block_active_visitor.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_uniform_block_active_visitor.cpp	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_uniform_block_active_visitor.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -44,6 +44,7 @@
 
       b->type = block_type;
       b->has_instance_name = var->is_interface_instance();
+      b->is_shader_storage = var->data.mode == ir_var_shader_storage;
 
       if (var->data.explicit_binding) {
          b->has_binding = true;
@@ -73,7 +74,7 @@
 ir_visitor_status
 link_uniform_block_active_visitor::visit(ir_variable *var)
 {
-   if (!var->is_in_uniform_block())
+   if (!var->is_in_buffer_block())
       return visit_continue;
 
    const glsl_type *const block_type = var->is_interface_instance()
@@ -124,7 +125,7 @@
     * function.
     */
    if (var == NULL
-       || !var->is_in_uniform_block()
+       || !var->is_in_buffer_block()
        || !var->is_interface_instance())
       return visit_continue;
 
@@ -194,7 +195,7 @@
 {
    ir_variable *var = ir->var;
 
-   if (!var->is_in_uniform_block())
+   if (!var->is_in_buffer_block())
       return visit_continue;
 
    assert(!var->is_interface_instance() || !var->type->is_array());
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_uniform_block_active_visitor.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_uniform_block_active_visitor.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_uniform_block_active_visitor.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_uniform_block_active_visitor.h	2015-09-16 14:36:09.000000000 +0000
@@ -38,6 +38,7 @@
 
    bool has_instance_name;
    bool has_binding;
+   bool is_shader_storage;
 };
 
 class link_uniform_block_active_visitor : public ir_hierarchical_visitor {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_uniform_blocks.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_uniform_blocks.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_uniform_blocks.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_uniform_blocks.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -293,6 +293,8 @@
             blocks[i].NumUniforms =
                (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
 
+            blocks[i].IsShaderStorage = b->is_shader_storage;
+
             i++;
          }
       } else {
@@ -311,6 +313,8 @@
          blocks[i].NumUniforms =
             (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
 
+         blocks[i].IsShaderStorage = b->is_shader_storage;
+
          i++;
       }
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_uniform_initializers.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_uniform_initializers.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_uniform_initializers.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_uniform_initializers.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -89,6 +89,7 @@
       case GLSL_TYPE_ATOMIC_UINT:
       case GLSL_TYPE_INTERFACE:
       case GLSL_TYPE_VOID:
+      case GLSL_TYPE_SUBROUTINE:
       case GLSL_TYPE_ERROR:
 	 /* All other types should have already been filtered by other
 	  * paths in the caller.
@@ -99,11 +100,16 @@
    }
 }
 
+/**
+ * Initialize an opaque uniform from the value of an explicit binding
+ * qualifier specified in the shader.  Atomic counters are different because
+ * they have no storage and should be handled elsewhere.
+ */
 void
-set_sampler_binding(gl_shader_program *prog, const char *name, int binding)
+set_opaque_binding(gl_shader_program *prog, const char *name, int binding)
 {
    struct gl_uniform_storage *const storage =
-      get_storage(prog->UniformStorage, prog->NumUserUniformStorage, name);
+      get_storage(prog->UniformStorage, prog->NumUniformStorage, name);
 
    if (storage == NULL) {
       assert(storage != NULL);
@@ -126,11 +132,20 @@
    for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) {
       gl_shader *shader = prog->_LinkedShaders[sh];
 
-      if (shader && storage->sampler[sh].active) {
-         for (unsigned i = 0; i < elements; i++) {
-            unsigned index = storage->sampler[sh].index + i;
+      if (shader) {
+         if (storage->type->base_type == GLSL_TYPE_SAMPLER &&
+             storage->sampler[sh].active) {
+            for (unsigned i = 0; i < elements; i++) {
+               const unsigned index = storage->sampler[sh].index + i;
+               shader->SamplerUnits[index] = storage->storage[i].i;
+            }
 
-            shader->SamplerUnits[index] = storage->storage[i].i;
+         } else if (storage->type->base_type == GLSL_TYPE_IMAGE &&
+                    storage->image[sh].active) {
+            for (unsigned i = 0; i < elements; i++) {
+               const unsigned index = storage->image[sh].index + i;
+               shader->ImageUnits[index] = storage->storage[i].i;
+            }
          }
       }
    }
@@ -193,7 +208,7 @@
 
    struct gl_uniform_storage *const storage =
       get_storage(prog->UniformStorage,
-		  prog->NumUserUniformStorage,
+                  prog->NumUniformStorage,
 		  name);
    if (storage == NULL) {
       assert(storage != NULL);
@@ -256,7 +271,8 @@
       foreach_in_list(ir_instruction, node, shader->ir) {
 	 ir_variable *const var = node->as_variable();
 
-	 if (!var || var->data.mode != ir_var_uniform)
+	 if (!var || (var->data.mode != ir_var_uniform &&
+	     var->data.mode != ir_var_shader_storage))
 	    continue;
 
 	 if (!mem_ctx)
@@ -265,9 +281,10 @@
          if (var->data.explicit_binding) {
             const glsl_type *const type = var->type;
 
-            if (type->without_array()->is_sampler()) {
-               linker::set_sampler_binding(prog, var->name, var->data.binding);
-            } else if (var->is_in_uniform_block()) {
+            if (type->without_array()->is_sampler() ||
+                type->without_array()->is_image()) {
+               linker::set_opaque_binding(prog, var->name, var->data.binding);
+            } else if (var->is_in_buffer_block()) {
                const glsl_type *const iface_type = var->get_interface_type();
 
                /* If the variable is an array and it is an interface instance,
@@ -280,7 +297,7 @@
                 *         float f[4];
                 *     };
                 *
-                * In this case "f" would pass is_in_uniform_block (above) and
+                * In this case "f" would pass is_in_buffer_block (above) and
                 * type->is_array(), but it will fail is_interface_instance().
                 */
                if (var->is_interface_instance() && var->type->is_array()) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_uniforms.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_uniforms.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_uniforms.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_uniforms.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -47,9 +47,10 @@
 static unsigned
 values_for_type(const glsl_type *type)
 {
-   if (type->is_sampler()) {
+   if (type->is_sampler() || type->is_subroutine()) {
       return 1;
-   } else if (type->is_array() && type->fields.array->is_sampler()) {
+   } else if (type->is_array() && (type->fields.array->is_sampler() ||
+                                   type->fields.array->is_subroutine())) {
       return type->array_size();
    } else {
       return type->component_slots();
@@ -284,6 +285,7 @@
    count_uniform_size(struct string_to_uint_map *map)
       : num_active_uniforms(0), num_values(0), num_shader_samplers(0),
         num_shader_images(0), num_shader_uniform_components(0),
+        num_shader_subroutines(0),
         is_ubo_var(false), map(map)
    {
       /* empty */
@@ -294,11 +296,12 @@
       this->num_shader_samplers = 0;
       this->num_shader_images = 0;
       this->num_shader_uniform_components = 0;
+      this->num_shader_subroutines = 0;
    }
 
    void process(ir_variable *var)
    {
-      this->is_ubo_var = var->is_in_uniform_block();
+      this->is_ubo_var = var->is_in_buffer_block();
       if (var->is_interface_instance())
          program_resource_visitor::process(var->get_interface_type(),
                                            var->get_interface_type()->name);
@@ -331,6 +334,11 @@
     */
    unsigned num_shader_uniform_components;
 
+   /**
+    * Number of subroutine uniforms used
+    */
+   unsigned num_shader_subroutines;
+
    bool is_ubo_var;
 
 private:
@@ -348,7 +356,9 @@
        * count it for each shader target.
        */
       const unsigned values = values_for_type(type);
-      if (type->contains_sampler()) {
+      if (type->contains_subroutine()) {
+         this->num_shader_subroutines += values;
+      } else if (type->contains_sampler()) {
          this->num_shader_samplers += values;
       } else if (type->contains_image()) {
          this->num_shader_images += values;
@@ -421,6 +431,7 @@
       this->shader_shadow_samplers = 0;
       this->next_sampler = 0;
       this->next_image = 0;
+      this->next_subroutine = 0;
       memset(this->targets, 0, sizeof(this->targets));
    }
 
@@ -431,7 +442,7 @@
       field_counter = 0;
 
       ubo_block_index = -1;
-      if (var->is_in_uniform_block()) {
+      if (var->is_in_buffer_block()) {
          if (var->is_interface_instance() && var->type->is_array()) {
             unsigned l = strlen(var->get_interface_type()->name);
 
@@ -535,6 +546,24 @@
       }
    }
 
+   void handle_subroutines(const glsl_type *base_type,
+                           struct gl_uniform_storage *uniform)
+   {
+      if (base_type->is_subroutine()) {
+         uniform->subroutine[shader_type].index = this->next_subroutine;
+         uniform->subroutine[shader_type].active = true;
+
+         /* Increment the subroutine index by 1 for non-arrays and by the
+          * number of array elements for arrays.
+          */
+         this->next_subroutine += MAX2(1, uniform->array_elements);
+
+      } else {
+         uniform->subroutine[shader_type].index = ~0;
+         uniform->subroutine[shader_type].active = false;
+      }
+   }
+
    virtual void visit_field(const glsl_type *type, const char *name,
                             bool row_major)
    {
@@ -588,13 +617,15 @@
       /* This assigns uniform indices to sampler and image uniforms. */
       handle_samplers(base_type, &this->uniforms[id]);
       handle_images(base_type, &this->uniforms[id]);
+      handle_subroutines(base_type, &this->uniforms[id]);
 
-      /* If there is already storage associated with this uniform, it means
-       * that it was set while processing an earlier shader stage.  For
-       * example, we may be processing the uniform in the fragment shader, but
-       * the uniform was already processed in the vertex shader.
+      /* If there is already storage associated with this uniform or if the
+       * uniform is set as builtin, it means that it was set while processing
+       * an earlier shader stage.  For example, we may be processing the
+       * uniform in the fragment shader, but the uniform was already processed
+       * in the vertex shader.
        */
-      if (this->uniforms[id].storage != NULL) {
+      if (this->uniforms[id].storage != NULL || this->uniforms[id].builtin) {
          return;
       }
 
@@ -619,10 +650,15 @@
       this->uniforms[id].initialized = 0;
       this->uniforms[id].num_driver_storage = 0;
       this->uniforms[id].driver_storage = NULL;
-      this->uniforms[id].storage = this->values;
       this->uniforms[id].atomic_buffer_index = -1;
       this->uniforms[id].hidden =
          current_var->data.how_declared == ir_var_hidden;
+      this->uniforms[id].builtin = is_gl_identifier(name);
+
+      /* Do not assign storage if the uniform is builtin */
+      if (!this->uniforms[id].builtin)
+         this->uniforms[id].storage = this->values;
+
       if (this->ubo_block_index != -1) {
 	 this->uniforms[id].block_index = this->ubo_block_index;
 
@@ -666,6 +702,7 @@
    struct gl_uniform_storage *uniforms;
    unsigned next_sampler;
    unsigned next_image;
+   unsigned next_subroutine;
 
 public:
    union gl_constant_value *values;
@@ -757,10 +794,11 @@
    foreach_in_list(ir_instruction, node, shader->ir) {
       ir_variable *const var = node->as_variable();
 
-      if ((var == NULL) || !var->is_in_uniform_block())
+      if ((var == NULL) || !var->is_in_buffer_block())
 	 continue;
 
-      assert(var->data.mode == ir_var_uniform);
+      assert(var->data.mode == ir_var_uniform ||
+             var->data.mode == ir_var_shader_storage);
 
       if (var->is_interface_instance()) {
          var->data.location = 0;
@@ -894,7 +932,7 @@
 {
    ralloc_free(prog->UniformStorage);
    prog->UniformStorage = NULL;
-   prog->NumUserUniformStorage = 0;
+   prog->NumUniformStorage = 0;
 
    if (prog->UniformHash != NULL) {
       prog->UniformHash->clear();
@@ -937,41 +975,34 @@
       foreach_in_list(ir_instruction, node, sh->ir) {
 	 ir_variable *const var = node->as_variable();
 
-	 if ((var == NULL) || (var->data.mode != ir_var_uniform))
+	 if ((var == NULL) || (var->data.mode != ir_var_uniform &&
+	                       var->data.mode != ir_var_shader_storage))
 	    continue;
 
-	 /* FINISHME: Update code to process built-in uniforms!
-	  */
-	 if (is_gl_identifier(var->name)) {
-	    uniform_size.num_shader_uniform_components +=
-	       var->type->component_slots();
-	    continue;
-	 }
-
 	 uniform_size.process(var);
       }
 
       sh->num_samplers = uniform_size.num_shader_samplers;
       sh->NumImages = uniform_size.num_shader_images;
       sh->num_uniform_components = uniform_size.num_shader_uniform_components;
-
       sh->num_combined_uniform_components = sh->num_uniform_components;
+
       for (unsigned i = 0; i < sh->NumUniformBlocks; i++) {
 	 sh->num_combined_uniform_components +=
 	    sh->UniformBlocks[i].UniformBufferSize / 4;
       }
    }
 
-   const unsigned num_user_uniforms = uniform_size.num_active_uniforms;
+   const unsigned num_uniforms = uniform_size.num_active_uniforms;
    const unsigned num_data_slots = uniform_size.num_values;
 
    /* On the outside chance that there were no uniforms, bail out.
     */
-   if (num_user_uniforms == 0)
+   if (num_uniforms == 0)
       return;
 
    struct gl_uniform_storage *uniforms =
-      rzalloc_array(prog, struct gl_uniform_storage, num_user_uniforms);
+      rzalloc_array(prog, struct gl_uniform_storage, num_uniforms);
    union gl_constant_value *data =
       rzalloc_array(uniforms, union gl_constant_value, num_data_slots);
 #ifndef NDEBUG
@@ -989,12 +1020,7 @@
       foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) {
 	 ir_variable *const var = node->as_variable();
 
-	 if ((var == NULL) || (var->data.mode != ir_var_uniform))
-	    continue;
-
-	 /* FINISHME: Update code to process built-in uniforms!
-	  */
-	 if (is_gl_identifier(var->name))
+	 if ((var == NULL) || (var->data.mode != ir_var_uniform && var->data.mode != ir_var_shader_storage))
 	    continue;
 
 	 parcel.set_and_process(prog, var);
@@ -1009,10 +1035,13 @@
    }
 
    const unsigned hidden_uniforms =
-      move_hidden_uniforms_to_end(prog, uniforms, num_user_uniforms);
+      move_hidden_uniforms_to_end(prog, uniforms, num_uniforms);
 
    /* Reserve all the explicit locations of the active uniforms. */
-   for (unsigned i = 0; i < num_user_uniforms; i++) {
+   for (unsigned i = 0; i < num_uniforms; i++) {
+      if (uniforms[i].type->is_subroutine())
+         continue;
+
       if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) {
          /* How many new entries for this uniform? */
          const unsigned entries = MAX2(1, uniforms[i].array_elements);
@@ -1028,7 +1057,13 @@
    }
 
    /* Reserve locations for rest of the uniforms. */
-   for (unsigned i = 0; i < num_user_uniforms; i++) {
+   for (unsigned i = 0; i < num_uniforms; i++) {
+
+      if (uniforms[i].type->is_subroutine())
+         continue;
+      /* Built-in uniforms should not get any location. */
+      if (uniforms[i].builtin)
+         continue;
 
       /* Explicit ones have been set already. */
       if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC)
@@ -1054,15 +1089,74 @@
       prog->NumUniformRemapTable += entries;
    }
 
+   /* Reserve all the explicit locations of the active subroutine uniforms. */
+   for (unsigned i = 0; i < num_uniforms; i++) {
+      if (!uniforms[i].type->is_subroutine())
+         continue;
+
+      if (uniforms[i].remap_location == UNMAPPED_UNIFORM_LOC)
+         continue;
+
+      for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
+         struct gl_shader *sh = prog->_LinkedShaders[j];
+         if (!sh)
+            continue;
+
+         if (!uniforms[i].subroutine[j].active)
+            continue;
+
+         /* How many new entries for this uniform? */
+         const unsigned entries = MAX2(1, uniforms[i].array_elements);
+
+         /* Set remap table entries point to correct gl_uniform_storage. */
+         for (unsigned k = 0; k < entries; k++) {
+            unsigned element_loc = uniforms[i].remap_location + k;
+            assert(sh->SubroutineUniformRemapTable[element_loc] ==
+                   INACTIVE_UNIFORM_EXPLICIT_LOCATION);
+            sh->SubroutineUniformRemapTable[element_loc] = &uniforms[i];
+         }
+      }
+   }
+
+   /* reserve subroutine locations */
+   for (unsigned i = 0; i < num_uniforms; i++) {
+
+      if (!uniforms[i].type->is_subroutine())
+         continue;
+      const unsigned entries = MAX2(1, uniforms[i].array_elements);
+
+      if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC)
+         continue;
+      for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
+         struct gl_shader *sh = prog->_LinkedShaders[j];
+         if (!sh)
+            continue;
+
+         if (!uniforms[i].subroutine[j].active)
+            continue;
+
+         sh->SubroutineUniformRemapTable =
+            reralloc(sh,
+                     sh->SubroutineUniformRemapTable,
+                     gl_uniform_storage *,
+                     sh->NumSubroutineUniformRemapTable + entries);
+
+         for (unsigned k = 0; k < entries; k++)
+            sh->SubroutineUniformRemapTable[sh->NumSubroutineUniformRemapTable + k] = &uniforms[i];
+         uniforms[i].remap_location = sh->NumSubroutineUniformRemapTable;
+         sh->NumSubroutineUniformRemapTable += entries;
+      }
+   }
+
 #ifndef NDEBUG
-   for (unsigned i = 0; i < num_user_uniforms; i++) {
-      assert(uniforms[i].storage != NULL);
+   for (unsigned i = 0; i < num_uniforms; i++) {
+      assert(uniforms[i].storage != NULL || uniforms[i].builtin);
    }
 
    assert(parcel.values == data_end);
 #endif
 
-   prog->NumUserUniformStorage = num_user_uniforms;
+   prog->NumUniformStorage = num_uniforms;
    prog->NumHiddenUniforms = hidden_uniforms;
    prog->UniformStorage = uniforms;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_varyings.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_varyings.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_varyings.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_varyings.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -54,10 +54,16 @@
    /* Check that the types match between stages.
     */
    const glsl_type *type_to_match = input->type;
-   if (consumer_stage == MESA_SHADER_GEOMETRY) {
-      assert(type_to_match->is_array()); /* Enforced by ast_to_hir */
-      type_to_match = type_to_match->element_type();
+
+   /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
+   const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
+                                   consumer_stage != MESA_SHADER_FRAGMENT) ||
+                                  consumer_stage == MESA_SHADER_GEOMETRY;
+   if (extra_array_level) {
+      assert(type_to_match->is_array());
+      type_to_match = type_to_match->fields.array;
    }
+
    if (type_to_match != output->type) {
       /* There is a bit of a special case for gl_TexCoord.  This
        * built-in is unsized by default.  Applications that variable
@@ -116,6 +122,18 @@
       return;
    }
 
+   if (input->data.patch != output->data.patch) {
+      linker_error(prog,
+                   "%s shader output `%s' %s patch qualifier, "
+                   "but %s shader input %s patch qualifier\n",
+                   _mesa_shader_stage_to_string(producer_stage),
+                   output->name,
+                   (output->data.patch) ? "has" : "lacks",
+                   _mesa_shader_stage_to_string(consumer_stage),
+                   (input->data.patch) ? "has" : "lacks");
+      return;
+   }
+
    if (!prog->IsES && input->data.invariant != output->data.invariant) {
       linker_error(prog,
                    "%s shader output `%s' %s invariant qualifier, "
@@ -128,7 +146,17 @@
       return;
    }
 
-   if (input->data.interpolation != output->data.interpolation) {
+   /* GLSL >= 4.40 removes text requiring interpolation qualifiers
+    * to match cross stage, they must only match within the same stage.
+    *
+    * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
+    *
+    *     "It is a link-time error if, within the same stage, the interpolation
+    *     qualifiers of variables of the same name do not match.
+    *
+    */
+   if (input->data.interpolation != output->data.interpolation &&
+       prog->Version < 440) {
       linker_error(prog,
                    "%s shader output `%s' specifies %s "
                    "interpolation qualifier, "
@@ -300,7 +328,7 @@
 
    this->location = -1;
    this->orig_name = input;
-   this->is_clip_distance_mesa = false;
+   this->lowered_builtin_array_variable = none;
    this->skip_components = 0;
    this->next_buffer_separator = false;
    this->matched_candidate = NULL;
@@ -349,8 +377,15 @@
     */
    if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerClipDistance &&
        strcmp(this->var_name, "gl_ClipDistance") == 0) {
-      this->is_clip_distance_mesa = true;
+      this->lowered_builtin_array_variable = clip_distance;
    }
+
+   if (ctx->Const.LowerTessLevel &&
+       (strcmp(this->var_name, "gl_TessLevelOuter") == 0))
+      this->lowered_builtin_array_variable = tess_level_outer;
+   if (ctx->Const.LowerTessLevel &&
+       (strcmp(this->var_name, "gl_TessLevelInner") == 0))
+      this->lowered_builtin_array_variable = tess_level_inner;
 }
 
 
@@ -397,9 +432,22 @@
          this->matched_candidate->type->fields.array->matrix_columns;
       const unsigned vector_elements =
          this->matched_candidate->type->fields.array->vector_elements;
-      unsigned actual_array_size = this->is_clip_distance_mesa ?
-         prog->LastClipDistanceArraySize :
-         this->matched_candidate->type->array_size();
+      unsigned actual_array_size;
+      switch (this->lowered_builtin_array_variable) {
+      case clip_distance:
+         actual_array_size = prog->LastClipDistanceArraySize;
+         break;
+      case tess_level_outer:
+         actual_array_size = 4;
+         break;
+      case tess_level_inner:
+         actual_array_size = 2;
+         break;
+      case none:
+      default:
+         actual_array_size = this->matched_candidate->type->array_size();
+         break;
+      }
 
       if (this->is_subscripted) {
          /* Check array bounds. */
@@ -410,7 +458,7 @@
                          actual_array_size);
             return false;
          }
-         unsigned array_elem_size = this->is_clip_distance_mesa ?
+         unsigned array_elem_size = this->lowered_builtin_array_variable ?
             1 : vector_elements * matrix_cols;
          fine_location += array_elem_size * this->array_subscript;
          this->size = 1;
@@ -419,7 +467,7 @@
       }
       this->vector_elements = vector_elements;
       this->matrix_columns = matrix_cols;
-      if (this->is_clip_distance_mesa)
+      if (this->lowered_builtin_array_variable)
          this->type = GL_FLOAT;
       else
          this->type = this->matched_candidate->type->fields.array->gl_type;
@@ -524,6 +572,7 @@
       info->Outputs[info->NumOutputs].DstOffset = info->BufferStride[buffer];
       ++info->NumOutputs;
       info->BufferStride[buffer] += output_size;
+      info->BufferStream[buffer] = this->stream_id;
       num_components -= output_size;
       location++;
       location_frac = 0;
@@ -542,8 +591,21 @@
 tfeedback_decl::find_candidate(gl_shader_program *prog,
                                hash_table *tfeedback_candidates)
 {
-   const char *name = this->is_clip_distance_mesa
-      ? "gl_ClipDistanceMESA" : this->var_name;
+   const char *name = this->var_name;
+   switch (this->lowered_builtin_array_variable) {
+   case none:
+      name = this->var_name;
+      break;
+   case clip_distance:
+      name = "gl_ClipDistanceMESA";
+      break;
+   case tess_level_outer:
+      name = "gl_TessLevelOuterMESA";
+      break;
+   case tess_level_inner:
+      name = "gl_TessLevelInnerMESA";
+      break;
+   }
    this->matched_candidate = (const tfeedback_candidate *)
       hash_table_find(tfeedback_candidates, name);
    if (!this->matched_candidate) {
@@ -699,7 +761,9 @@
 class varying_matches
 {
 public:
-   varying_matches(bool disable_varying_packing, bool consumer_is_fs);
+   varying_matches(bool disable_varying_packing,
+                   gl_shader_stage producer_stage,
+                   gl_shader_stage consumer_stage);
    ~varying_matches();
    void record(ir_variable *producer_var, ir_variable *consumer_var);
    unsigned assign_locations();
@@ -780,15 +844,18 @@
     */
    unsigned matches_capacity;
 
-   const bool consumer_is_fs;
+   gl_shader_stage producer_stage;
+   gl_shader_stage consumer_stage;
 };
 
 } /* anonymous namespace */
 
 varying_matches::varying_matches(bool disable_varying_packing,
-                                 bool consumer_is_fs)
+                                 gl_shader_stage producer_stage,
+                                 gl_shader_stage consumer_stage)
    : disable_varying_packing(disable_varying_packing),
-     consumer_is_fs(consumer_is_fs)
+     producer_stage(producer_stage),
+     consumer_stage(consumer_stage)
 {
    /* Note: this initial capacity is rather arbitrarily chosen to be large
     * enough for many cases without wasting an unreasonable amount of space.
@@ -839,7 +906,7 @@
    }
 
    if ((consumer_var == NULL && producer_var->type->contains_integer()) ||
-       !consumer_is_fs) {
+       consumer_stage != MESA_SHADER_FRAGMENT) {
       /* Since this varying is not being consumed by the fragment shader, its
        * interpolation type varying cannot possibly affect rendering.  Also,
        * this variable is non-flat and is (or contains) an integer.
@@ -876,9 +943,22 @@
    this->matches[this->num_matches].packing_order
       = this->compute_packing_order(var);
    if (this->disable_varying_packing) {
-      unsigned slots = var->type->is_array()
-         ? (var->type->length * var->type->fields.array->matrix_columns)
-         : var->type->matrix_columns;
+      const struct glsl_type *type = var->type;
+      unsigned slots;
+
+      /* Some shader stages have 2-dimensional varyings. Use the inner type. */
+      if (!var->data.patch &&
+          ((var == producer_var && producer_stage == MESA_SHADER_TESS_CTRL) ||
+           (var == consumer_var && (consumer_stage == MESA_SHADER_TESS_CTRL ||
+                                    consumer_stage == MESA_SHADER_TESS_EVAL ||
+                                    consumer_stage == MESA_SHADER_GEOMETRY)))) {
+         assert(type->is_array());
+         type = type->fields.array;
+      }
+
+      slots = (type->is_array()
+            ? (type->length * type->fields.array->matrix_columns)
+            : type->matrix_columns);
       this->matches[this->num_matches].num_components = 4 * slots;
    } else {
       this->matches[this->num_matches].num_components
@@ -906,8 +986,17 @@
          &varying_matches::match_comparator);
 
    unsigned generic_location = 0;
+   unsigned generic_patch_location = MAX_VARYING*4;
 
    for (unsigned i = 0; i < this->num_matches; i++) {
+      unsigned *location = &generic_location;
+
+      if ((this->matches[i].consumer_var &&
+           this->matches[i].consumer_var->data.patch) ||
+          (this->matches[i].producer_var &&
+           this->matches[i].producer_var->data.patch))
+         location = &generic_patch_location;
+
       /* Advance to the next slot if this varying has a different packing
        * class than the previous one, and we're not already on a slot
        * boundary.
@@ -915,12 +1004,12 @@
       if (i > 0 &&
           this->matches[i - 1].packing_class
           != this->matches[i].packing_class) {
-         generic_location = ALIGN(generic_location, 4);
+         *location = ALIGN(*location, 4);
       }
 
-      this->matches[i].generic_location = generic_location;
+      this->matches[i].generic_location = *location;
 
-      generic_location += this->matches[i].num_components;
+      *location += this->matches[i].num_components;
    }
 
    return (generic_location + 3) / 4;
@@ -979,7 +1068,8 @@
     *
     * Therefore, the packing class depends only on the interpolation type.
     */
-   unsigned packing_class = var->data.centroid | (var->data.sample << 1);
+   unsigned packing_class = var->data.centroid | (var->data.sample << 1) |
+                            (var->data.patch << 2);
    packing_class *= 4;
    packing_class += var->data.interpolation;
    return packing_class;
@@ -1133,11 +1223,11 @@
 populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
                              hash_table *consumer_inputs,
                              hash_table *consumer_interface_inputs,
-                             ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX])
+                             ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
 {
    memset(consumer_inputs_with_locations,
           0,
-          sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_MAX);
+          sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
 
    foreach_in_list(ir_instruction, node, ir) {
       ir_variable *const input_var = node->as_variable();
@@ -1193,7 +1283,7 @@
                    const ir_variable *output_var,
                    hash_table *consumer_inputs,
                    hash_table *consumer_interface_inputs,
-                   ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX])
+                   ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
 {
    ir_variable *input_var;
 
@@ -1294,9 +1384,6 @@
  *        each of these objects that matches one of the outputs of the
  *        producer.
  *
- * \param gs_input_vertices: if \c consumer is a geometry shader, this is the
- *        number of input vertices it accepts.  Otherwise zero.
- *
  * When num_tfeedback_decls is nonzero, it is permissible for the consumer to
  * be NULL.  In this case, varying locations are assigned solely based on the
  * requirements of transform feedback.
@@ -1307,21 +1394,44 @@
 			 struct gl_shader_program *prog,
 			 gl_shader *producer, gl_shader *consumer,
                          unsigned num_tfeedback_decls,
-                         tfeedback_decl *tfeedback_decls,
-                         unsigned gs_input_vertices)
+                         tfeedback_decl *tfeedback_decls)
 {
-   varying_matches matches(ctx->Const.DisableVaryingPacking,
-                           consumer && consumer->Stage == MESA_SHADER_FRAGMENT);
+   if (ctx->Const.DisableVaryingPacking) {
+      /* Transform feedback code assumes varyings are packed, so if the driver
+       * has disabled varying packing, make sure it does not support transform
+       * feedback.
+       */
+      assert(!ctx->Extensions.EXT_transform_feedback);
+   }
+
+   /* Tessellation shaders treat inputs and outputs as shared memory and can
+    * access inputs and outputs of other invocations.
+    * Therefore, they can't be lowered to temps easily (and definitely not
+    * efficiently).
+    */
+   bool disable_varying_packing =
+      ctx->Const.DisableVaryingPacking ||
+      (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
+      (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
+      (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
+
+   varying_matches matches(disable_varying_packing,
+                           producer ? producer->Stage : (gl_shader_stage)-1,
+                           consumer ? consumer->Stage : (gl_shader_stage)-1);
    hash_table *tfeedback_candidates
       = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare);
    hash_table *consumer_inputs
       = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare);
    hash_table *consumer_interface_inputs
       = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare);
-   ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX] = {
+   ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
       NULL,
    };
 
+   unsigned consumer_vertices = 0;
+   if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
+      consumer_vertices = prog->Geom.VerticesIn;
+
    /* Operate in a total of four passes.
     *
     * 1. Sort inputs / outputs into a canonical order.  This is necessary so
@@ -1380,8 +1490,12 @@
          /* If a matching input variable was found, add this ouptut (and the
           * input) to the set.  If this is a separable program and there is no
           * consumer stage, add the output.
+          *
+          * Always add TCS outputs. They are shared by all invocations
+          * within a patch and can be used as shared memory.
           */
-         if (input_var || (prog->SeparateShader && consumer == NULL)) {
+         if (input_var || (prog->SeparateShader && consumer == NULL) ||
+             producer->Type == GL_TESS_CONTROL_SHADER) {
             matches.record(output_var, input_var);
          }
 
@@ -1448,20 +1562,14 @@
    hash_table_dtor(consumer_inputs);
    hash_table_dtor(consumer_interface_inputs);
 
-   if (ctx->Const.DisableVaryingPacking) {
-      /* Transform feedback code assumes varyings are packed, so if the driver
-       * has disabled varying packing, make sure it does not support transform
-       * feedback.
-       */
-      assert(!ctx->Extensions.EXT_transform_feedback);
-   } else {
+   if (!disable_varying_packing) {
       if (producer) {
          lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,
                                0, producer);
       }
       if (consumer) {
          lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,
-                               gs_input_vertices, consumer);
+                               consumer_vertices, consumer);
       }
    }
 
@@ -1540,13 +1648,15 @@
    const unsigned output_components = output_vectors * 4;
    if (output_components > max_output_components) {
       if (ctx->API == API_OPENGLES2 || prog->IsES)
-         linker_error(prog, "shader uses too many output vectors "
+         linker_error(prog, "%s shader uses too many output vectors "
                       "(%u > %u)\n",
+                      _mesa_shader_stage_to_string(producer->Stage),
                       output_vectors,
                       max_output_components / 4);
       else
-         linker_error(prog, "shader uses too many output components "
+         linker_error(prog, "%s shader uses too many output components "
                       "(%u > %u)\n",
+                      _mesa_shader_stage_to_string(producer->Stage),
                       output_components,
                       max_output_components);
 
@@ -1579,13 +1689,15 @@
    const unsigned input_components = input_vectors * 4;
    if (input_components > max_input_components) {
       if (ctx->API == API_OPENGLES2 || prog->IsES)
-         linker_error(prog, "shader uses too many input vectors "
+         linker_error(prog, "%s shader uses too many input vectors "
                       "(%u > %u)\n",
+                      _mesa_shader_stage_to_string(consumer->Stage),
                       input_vectors,
                       max_input_components / 4);
       else
-         linker_error(prog, "shader uses too many input components "
+         linker_error(prog, "%s shader uses too many input components "
                       "(%u > %u)\n",
+                      _mesa_shader_stage_to_string(consumer->Stage),
                       input_components,
                       max_input_components);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_varyings.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_varyings.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/link_varyings.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/link_varyings.h	2015-09-16 14:36:09.000000000 +0000
@@ -128,7 +128,7 @@
     */
    unsigned num_components() const
    {
-      if (this->is_clip_distance_mesa)
+      if (this->lowered_builtin_array_variable)
          return this->size;
       else
          return this->vector_elements * this->matrix_columns * this->size;
@@ -161,10 +161,15 @@
    unsigned array_subscript;
 
    /**
-    * True if the variable is gl_ClipDistance and the driver lowers
-    * gl_ClipDistance to gl_ClipDistanceMESA.
+    * Non-zero if the variable is gl_ClipDistance, glTessLevelOuter or
+    * gl_TessLevelInner and the driver lowers it to gl_*MESA.
     */
-   bool is_clip_distance_mesa;
+   enum {
+      none,
+      clip_distance,
+      tess_level_outer,
+      tess_level_inner,
+   } lowered_builtin_array_variable;
 
    /**
     * The vertex shader output location that the linker assigned for this
@@ -250,8 +255,7 @@
 			 struct gl_shader_program *prog,
 			 gl_shader *producer, gl_shader *consumer,
                          unsigned num_tfeedback_decls,
-                         tfeedback_decl *tfeedback_decls,
-                         unsigned gs_input_vertices);
+                         tfeedback_decl *tfeedback_decls);
 
 bool
 check_against_output_limit(struct gl_context *ctx,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/loop_unroll.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/loop_unroll.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/loop_unroll.cpp	2015-07-27 05:57:14.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/loop_unroll.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -145,6 +145,7 @@
                   unsupported_variable_indexing = true;
                break;
             case ir_var_uniform:
+            case ir_var_shader_storage:
                if (options->EmitNoIndirectUniform)
                   unsupported_variable_indexing = true;
                break;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_clip_distance.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_clip_distance.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_clip_distance.cpp	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_clip_distance.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -55,9 +55,9 @@
 class lower_clip_distance_visitor : public ir_rvalue_visitor {
 public:
    explicit lower_clip_distance_visitor(gl_shader_stage shader_stage)
-      : progress(false), old_clip_distance_1d_var(NULL),
-        old_clip_distance_2d_var(NULL), new_clip_distance_1d_var(NULL),
-        new_clip_distance_2d_var(NULL), shader_stage(shader_stage)
+      : progress(false), old_clip_distance_out_var(NULL),
+        old_clip_distance_in_var(NULL), new_clip_distance_out_var(NULL),
+        new_clip_distance_in_var(NULL), shader_stage(shader_stage)
    {
    }
 
@@ -80,20 +80,21 @@
     *
     * Note:
     *
-    * - the 2d_var is for geometry shader input only.
+    * - the in_var is for geometry and both tessellation shader inputs only.
     *
-    * - since gl_ClipDistance is available in geometry shaders as both an
-    *   input and an output, it's possible for both old_clip_distance_1d_var
-    *   and old_clip_distance_2d_var to be non-null.
+    * - since gl_ClipDistance is available in tessellation control,
+    *   tessellation evaluation and geometry shaders as both an input
+    *   and an output, it's possible for both old_clip_distance_out_var
+    *   and old_clip_distance_in_var to be non-null.
     */
-   ir_variable *old_clip_distance_1d_var;
-   ir_variable *old_clip_distance_2d_var;
+   ir_variable *old_clip_distance_out_var;
+   ir_variable *old_clip_distance_in_var;
 
    /**
     * Pointer to the newly-created gl_ClipDistanceMESA variable.
     */
-   ir_variable *new_clip_distance_1d_var;
-   ir_variable *new_clip_distance_2d_var;
+   ir_variable *new_clip_distance_out_var;
+   ir_variable *new_clip_distance_in_var;
 
    /**
     * Type of shader we are compiling (e.g. MESA_SHADER_VERTEX)
@@ -110,62 +111,81 @@
 ir_visitor_status
 lower_clip_distance_visitor::visit(ir_variable *ir)
 {
+   ir_variable **old_var;
+   ir_variable **new_var;
+
    if (!ir->name || strcmp(ir->name, "gl_ClipDistance") != 0)
       return visit_continue;
    assert (ir->type->is_array());
 
-   if (!ir->type->element_type()->is_array()) {
-      /* 1D gl_ClipDistance (used for vertex and geometry output, and fragment
-       * input).
-       */
-      if (this->old_clip_distance_1d_var)
+   if (ir->data.mode == ir_var_shader_out) {
+      if (this->old_clip_distance_out_var)
+         return visit_continue;
+      old_var = &old_clip_distance_out_var;
+      new_var = &new_clip_distance_out_var;
+   } else if (ir->data.mode == ir_var_shader_in) {
+      if (this->old_clip_distance_in_var)
          return visit_continue;
+      old_var = &old_clip_distance_in_var;
+      new_var = &new_clip_distance_in_var;
+   } else {
+      unreachable("not reached");
+   }
 
-      this->progress = true;
-      this->old_clip_distance_1d_var = ir;
-      assert (ir->type->element_type() == glsl_type::float_type);
+   this->progress = true;
+
+   if (!ir->type->fields.array->is_array()) {
+      /* gl_ClipDistance (used for vertex, tessellation evaluation and
+       * geometry output, and fragment input).
+       */
+      assert((ir->data.mode == ir_var_shader_in &&
+              this->shader_stage == MESA_SHADER_FRAGMENT) ||
+             (ir->data.mode == ir_var_shader_out &&
+              (this->shader_stage == MESA_SHADER_VERTEX ||
+               this->shader_stage == MESA_SHADER_TESS_EVAL ||
+               this->shader_stage == MESA_SHADER_GEOMETRY)));
+
+      *old_var = ir;
+      assert (ir->type->fields.array == glsl_type::float_type);
       unsigned new_size = (ir->type->array_size() + 3) / 4;
 
       /* Clone the old var so that we inherit all of its properties */
-      this->new_clip_distance_1d_var = ir->clone(ralloc_parent(ir), NULL);
+      *new_var = ir->clone(ralloc_parent(ir), NULL);
 
       /* And change the properties that we need to change */
-      this->new_clip_distance_1d_var->name
-         = ralloc_strdup(this->new_clip_distance_1d_var,
-                         "gl_ClipDistanceMESA");
-      this->new_clip_distance_1d_var->type
-         = glsl_type::get_array_instance(glsl_type::vec4_type, new_size);
-      this->new_clip_distance_1d_var->data.max_array_access
-         = ir->data.max_array_access / 4;
+      (*new_var)->name = ralloc_strdup(*new_var, "gl_ClipDistanceMESA");
+      (*new_var)->type = glsl_type::get_array_instance(glsl_type::vec4_type,
+                                                       new_size);
+      (*new_var)->data.max_array_access = ir->data.max_array_access / 4;
 
-      ir->replace_with(this->new_clip_distance_1d_var);
+      ir->replace_with(*new_var);
    } else {
-      /* 2D gl_ClipDistance (used for geometry input). */
-      assert(ir->data.mode == ir_var_shader_in &&
-             this->shader_stage == MESA_SHADER_GEOMETRY);
-      if (this->old_clip_distance_2d_var)
-         return visit_continue;
-
-      this->progress = true;
-      this->old_clip_distance_2d_var = ir;
-      assert (ir->type->element_type()->element_type() == glsl_type::float_type);
-      unsigned new_size = (ir->type->element_type()->array_size() + 3) / 4;
+      /* 2D gl_ClipDistance (used for tessellation control, tessellation
+       * evaluation and geometry input, and tessellation control output).
+       */
+      assert((ir->data.mode == ir_var_shader_in &&
+              (this->shader_stage == MESA_SHADER_GEOMETRY ||
+               this->shader_stage == MESA_SHADER_TESS_EVAL)) ||
+             this->shader_stage == MESA_SHADER_TESS_CTRL);
+
+      *old_var = ir;
+      assert (ir->type->fields.array->fields.array == glsl_type::float_type);
+      unsigned new_size = (ir->type->fields.array->array_size() + 3) / 4;
 
       /* Clone the old var so that we inherit all of its properties */
-      this->new_clip_distance_2d_var = ir->clone(ralloc_parent(ir), NULL);
+      *new_var = ir->clone(ralloc_parent(ir), NULL);
 
       /* And change the properties that we need to change */
-      this->new_clip_distance_2d_var->name
-         = ralloc_strdup(this->new_clip_distance_2d_var, "gl_ClipDistanceMESA");
-      this->new_clip_distance_2d_var->type = glsl_type::get_array_instance(
+      (*new_var)->name = ralloc_strdup(*new_var, "gl_ClipDistanceMESA");
+      (*new_var)->type = glsl_type::get_array_instance(
          glsl_type::get_array_instance(glsl_type::vec4_type,
             new_size),
          ir->type->array_size());
-      this->new_clip_distance_2d_var->data.max_array_access
-         = ir->data.max_array_access / 4;
+      (*new_var)->data.max_array_access = ir->data.max_array_access / 4;
 
-      ir->replace_with(this->new_clip_distance_2d_var);
+      ir->replace_with(*new_var);
    }
+
    return visit_continue;
 }
 
@@ -242,26 +262,27 @@
 {
    /* Note that geometry shaders contain gl_ClipDistance both as an input
     * (which is a 2D array) and an output (which is a 1D array), so it's
-    * possible for both this->old_clip_distance_1d_var and
-    * this->old_clip_distance_2d_var to be non-NULL in the same shader.
+    * possible for both this->old_clip_distance_out_var and
+    * this->old_clip_distance_in_var to be non-NULL in the same shader.
     */
 
-   if (this->old_clip_distance_1d_var) {
-      ir_dereference_variable *var_ref = ir->as_dereference_variable();
-      if (var_ref && var_ref->var == this->old_clip_distance_1d_var)
+   if (!ir->type->is_array())
+      return false;
+   if (ir->type->fields.array != glsl_type::float_type)
+      return false;
+
+   if (this->old_clip_distance_out_var) {
+      if (ir->variable_referenced() == this->old_clip_distance_out_var)
          return true;
    }
-   if (this->old_clip_distance_2d_var) {
-      /* 2D clip distance is only possible as a geometry input */
-      assert(this->shader_stage == MESA_SHADER_GEOMETRY);
+   if (this->old_clip_distance_in_var) {
+      assert(this->shader_stage == MESA_SHADER_TESS_CTRL ||
+             this->shader_stage == MESA_SHADER_TESS_EVAL ||
+             this->shader_stage == MESA_SHADER_GEOMETRY ||
+             this->shader_stage == MESA_SHADER_FRAGMENT);
 
-      ir_dereference_array *array_ref = ir->as_dereference_array();
-      if (array_ref) {
-         ir_dereference_variable *var_ref =
-            array_ref->array->as_dereference_variable();
-         if (var_ref && var_ref->var == this->old_clip_distance_2d_var)
-            return true;
-      }
+      if (ir->variable_referenced() == this->old_clip_distance_in_var)
+         return true;
    }
    return false;
 }
@@ -279,29 +300,33 @@
 ir_rvalue *
 lower_clip_distance_visitor::lower_clip_distance_vec8(ir_rvalue *ir)
 {
-   if (this->old_clip_distance_1d_var) {
-      ir_dereference_variable *var_ref = ir->as_dereference_variable();
-      if (var_ref && var_ref->var == this->old_clip_distance_1d_var) {
-         return new(ralloc_parent(ir))
-            ir_dereference_variable(this->new_clip_distance_1d_var);
-      }
+   if (!ir->type->is_array())
+      return NULL;
+   if (ir->type->fields.array != glsl_type::float_type)
+      return NULL;
+
+   ir_variable **new_var = NULL;
+   if (this->old_clip_distance_out_var) {
+      if (ir->variable_referenced() == this->old_clip_distance_out_var)
+         new_var = &this->new_clip_distance_out_var;
+   }
+   if (this->old_clip_distance_in_var) {
+      if (ir->variable_referenced() == this->old_clip_distance_in_var)
+         new_var = &this->new_clip_distance_in_var;
    }
-   if (this->old_clip_distance_2d_var) {
-      /* 2D clip distance is only possible as a geometry input */
-      assert(this->shader_stage == MESA_SHADER_GEOMETRY);
+   if (new_var == NULL)
+      return NULL;
 
+   if (ir->as_dereference_variable()) {
+      return new(ralloc_parent(ir)) ir_dereference_variable(*new_var);
+   } else {
       ir_dereference_array *array_ref = ir->as_dereference_array();
-      if (array_ref) {
-         ir_dereference_variable *var_ref =
-            array_ref->array->as_dereference_variable();
-         if (var_ref && var_ref->var == this->old_clip_distance_2d_var) {
-            return new(ralloc_parent(ir))
-               ir_dereference_array(this->new_clip_distance_2d_var,
-                                    array_ref->array_index);
-         }
-      }
+      assert(array_ref);
+      assert(array_ref->array->as_dereference_variable());
+
+      return new(ralloc_parent(ir))
+         ir_dereference_array(*new_var, array_ref->array_index);
    }
-   return NULL;
 }
 
 
@@ -540,10 +565,10 @@
 
    visit_list_elements(&v, shader->ir);
 
-   if (v.new_clip_distance_1d_var)
-      shader->symbols->add_variable(v.new_clip_distance_1d_var);
-   if (v.new_clip_distance_2d_var)
-      shader->symbols->add_variable(v.new_clip_distance_2d_var);
+   if (v.new_clip_distance_out_var)
+      shader->symbols->add_variable(v.new_clip_distance_out_var);
+   if (v.new_clip_distance_in_var)
+      shader->symbols->add_variable(v.new_clip_distance_in_var);
 
    return v.progress;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_named_interface_blocks.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_named_interface_blocks.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_named_interface_blocks.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_named_interface_blocks.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -108,7 +108,8 @@
        * but, this will require changes to the other uniform block
        * support code.
        */
-      if (var->data.mode == ir_var_uniform)
+      if (var->data.mode == ir_var_uniform ||
+          var->data.mode == ir_var_shader_storage)
          continue;
 
       const glsl_type * iface_t = var->type;
@@ -125,7 +126,8 @@
       for (unsigned i = 0; i < iface_t->length; i++) {
          const char * field_name = iface_t->fields.structure[i].name;
          char *iface_field_name =
-            ralloc_asprintf(mem_ctx, "%s.%s.%s",
+            ralloc_asprintf(mem_ctx, "%s %s.%s.%s",
+                            var->data.mode == ir_var_shader_in ? "in" : "out",
                             iface_t->name, var->name, field_name);
 
          ir_variable *found_var =
@@ -158,6 +160,7 @@
                iface_t->fields.structure[i].interpolation;
             new_var->data.centroid = iface_t->fields.structure[i].centroid;
             new_var->data.sample = iface_t->fields.structure[i].sample;
+            new_var->data.patch = iface_t->fields.structure[i].patch;
 
             new_var->init_interface_type(iface_t);
             hash_table_insert(interface_namespace, new_var,
@@ -212,12 +215,14 @@
     * but, this will require changes to the other uniform block
     * support code.
     */
-   if (var->data.mode == ir_var_uniform)
+   if (var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage)
       return;
 
    if (var->get_interface_type() != NULL) {
       char *iface_field_name =
-         ralloc_asprintf(mem_ctx, "%s.%s.%s", var->get_interface_type()->name,
+         ralloc_asprintf(mem_ctx, "%s %s.%s.%s",
+                         var->data.mode == ir_var_shader_in ? "in" : "out",
+                         var->get_interface_type()->name,
                          var->name, ir->field);
       /* Find the variable in the set of flattened interface blocks */
       ir_variable *found_var =
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_output_reads.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_output_reads.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_output_reads.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_output_reads.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -48,8 +48,10 @@
    hash_table *replacements;
 
    void *mem_ctx;
+
+   unsigned stage;
 public:
-   output_read_remover();
+   output_read_remover(unsigned stage);
    ~output_read_remover();
    virtual ir_visitor_status visit(class ir_dereference_variable *);
    virtual ir_visitor_status visit_leave(class ir_emit_vertex *);
@@ -75,8 +77,9 @@
    return hash_table_string_hash(var->name);
 }
 
-output_read_remover::output_read_remover()
+output_read_remover::output_read_remover(unsigned stage)
 {
+   this->stage = stage;
    mem_ctx = ralloc_context(NULL);
    replacements =
       hash_table_ctor(0, hash_table_var_hash, hash_table_pointer_compare);
@@ -93,6 +96,8 @@
 {
    if (ir->var->data.mode != ir_var_shader_out)
       return visit_continue;
+   if (stage == MESA_SHADER_TESS_CTRL)
+      return visit_continue;
 
    ir_variable *temp = (ir_variable *) hash_table_find(replacements, ir->var);
 
@@ -166,8 +171,8 @@
 }
 
 void
-lower_output_reads(exec_list *instructions)
+lower_output_reads(unsigned stage, exec_list *instructions)
 {
-   output_read_remover v;
+   output_read_remover v(stage);
    visit_list_elements(&v, instructions);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_packed_varyings.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_packed_varyings.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_packed_varyings.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_packed_varyings.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -610,6 +610,7 @@
       }
       packed_var->data.centroid = unpacked_var->data.centroid;
       packed_var->data.sample = unpacked_var->data.sample;
+      packed_var->data.patch = unpacked_var->data.patch;
       packed_var->data.interpolation = unpacked_var->data.interpolation;
       packed_var->data.location = location;
       unpacked_var->insert_before(packed_var);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_subroutine.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_subroutine.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_subroutine.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_subroutine.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,109 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_subroutine.cpp
+ *
+ * lowers subroutines to an if ladder.
+ */
+
+#include "glsl_types.h"
+#include "glsl_parser_extras.h"
+#include "ir.h"
+#include "ir_builder.h"
+
+using namespace ir_builder;
+namespace {
+
+class lower_subroutine_visitor : public ir_hierarchical_visitor {
+public:
+   lower_subroutine_visitor(struct _mesa_glsl_parse_state *state)
+      : state(state)
+   {
+      this->progress = false;
+   }
+
+   ir_visitor_status visit_leave(ir_call *);
+   bool progress;
+   struct _mesa_glsl_parse_state *state;
+};
+
+}
+
+bool
+lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state)
+{
+   lower_subroutine_visitor v(state);
+   visit_list_elements(&v, instructions);
+   return v.progress;
+}
+
+ir_visitor_status
+lower_subroutine_visitor::visit_leave(ir_call *ir)
+{
+   if (!ir->sub_var)
+      return visit_continue;
+
+   void *mem_ctx = ralloc_parent(ir);
+   ir_if *last_branch = NULL;
+   ir_dereference_variable *return_deref = ir->return_deref;
+
+   for (int s = this->state->num_subroutines - 1; s >= 0; s--) {
+      ir_rvalue *var;
+      ir_constant *lc = new(mem_ctx)ir_constant(s);
+      ir_function *fn = this->state->subroutines[s];
+      bool is_compat = false;
+
+      for (int i = 0; i < fn->num_subroutine_types; i++) {
+         if (ir->sub_var->type->without_array() == fn->subroutine_types[i]) {
+            is_compat = true;
+            break;
+         }
+      }
+      if (is_compat == false)
+         continue;
+
+      if (ir->array_idx != NULL)
+         var = new(mem_ctx) ir_dereference_array(ir->sub_var, ir->array_idx->clone(mem_ctx, NULL));
+      else
+         var = new(mem_ctx) ir_dereference_variable(ir->sub_var);
+
+      ir_function_signature *sub_sig =
+         fn->exact_matching_signature(this->state,
+                                      &ir->actual_parameters);
+
+      ir_call *new_call = new(mem_ctx) ir_call(sub_sig, return_deref, &ir->actual_parameters);
+      if (!last_branch)
+         last_branch = if_tree(equal(subr_to_int(var), lc), new_call);
+      else
+         last_branch = if_tree(equal(subr_to_int(var), lc), new_call, last_branch);
+
+      if (return_deref && s > 0)
+        return_deref = return_deref->clone(mem_ctx, NULL);
+   }
+   if (last_branch)
+      ir->insert_before(last_branch);
+   ir->remove();
+
+   return visit_continue;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_tess_level.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_tess_level.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_tess_level.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_tess_level.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,459 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_tess_level.cpp
+ *
+ * This pass accounts for the difference between the way gl_TessLevelOuter
+ * and gl_TessLevelInner is declared in standard GLSL (as an array of
+ * floats), and the way it is frequently implemented in hardware (as a vec4
+ * and vec2).
+ *
+ * The declaration of gl_TessLevel* is replaced with a declaration
+ * of gl_TessLevel*MESA, and any references to gl_TessLevel* are
+ * translated to refer to gl_TessLevel*MESA with the appropriate
+ * swizzling of array indices.  For instance:
+ *
+ *   gl_TessLevelOuter[i]
+ *
+ * is translated into:
+ *
+ *   gl_TessLevelOuterMESA[i]
+ *
+ * Since some hardware may not internally represent gl_TessLevel* as a pair
+ * of vec4's, this lowering pass is optional.  To enable it, set the
+ * LowerTessLevel flag in gl_shader_compiler_options to true.
+ */
+
+#include "glsl_symbol_table.h"
+#include "ir_rvalue_visitor.h"
+#include "ir.h"
+#include "program/prog_instruction.h" /* For WRITEMASK_* */
+
+namespace {
+
+class lower_tess_level_visitor : public ir_rvalue_visitor {
+public:
+   explicit lower_tess_level_visitor(gl_shader_stage shader_stage)
+      : progress(false), old_tess_level_outer_var(NULL),
+        old_tess_level_inner_var(NULL), new_tess_level_outer_var(NULL),
+        new_tess_level_inner_var(NULL), shader_stage(shader_stage)
+   {
+   }
+
+   virtual ir_visitor_status visit(ir_variable *);
+   bool is_tess_level_array(ir_rvalue *ir);
+   ir_rvalue *lower_tess_level_array(ir_rvalue *ir);
+   virtual ir_visitor_status visit_leave(ir_assignment *);
+   void visit_new_assignment(ir_assignment *ir);
+   virtual ir_visitor_status visit_leave(ir_call *);
+
+   virtual void handle_rvalue(ir_rvalue **rvalue);
+
+   void fix_lhs(ir_assignment *);
+
+   bool progress;
+
+   /**
+    * Pointer to the declaration of gl_TessLevel*, if found.
+    */
+   ir_variable *old_tess_level_outer_var;
+   ir_variable *old_tess_level_inner_var;
+
+   /**
+    * Pointer to the newly-created gl_TessLevel*MESA variables.
+    */
+   ir_variable *new_tess_level_outer_var;
+   ir_variable *new_tess_level_inner_var;
+
+   /**
+    * Type of shader we are compiling (e.g. MESA_SHADER_TESS_CTRL)
+    */
+   const gl_shader_stage shader_stage;
+};
+
+} /* anonymous namespace */
+
+/**
+ * Replace any declaration of gl_TessLevel* as an array of floats with a
+ * declaration of gl_TessLevel*MESA as a vec4.
+ */
+ir_visitor_status
+lower_tess_level_visitor::visit(ir_variable *ir)
+{
+   if ((!ir->name) ||
+       ((strcmp(ir->name, "gl_TessLevelInner") != 0) &&
+        (strcmp(ir->name, "gl_TessLevelOuter") != 0)))
+      return visit_continue;
+
+   assert (ir->type->is_array());
+
+   if (strcmp(ir->name, "gl_TessLevelOuter") == 0) {
+      if (this->old_tess_level_outer_var)
+         return visit_continue;
+
+      old_tess_level_outer_var = ir;
+      assert(ir->type->fields.array == glsl_type::float_type);
+
+      /* Clone the old var so that we inherit all of its properties */
+      new_tess_level_outer_var = ir->clone(ralloc_parent(ir), NULL);
+
+      /* And change the properties that we need to change */
+      new_tess_level_outer_var->name = ralloc_strdup(new_tess_level_outer_var,
+                                                "gl_TessLevelOuterMESA");
+      new_tess_level_outer_var->type = glsl_type::vec4_type;
+      new_tess_level_outer_var->data.max_array_access = 0;
+
+      ir->replace_with(new_tess_level_outer_var);
+   } else if (strcmp(ir->name, "gl_TessLevelInner") == 0) {
+      if (this->old_tess_level_inner_var)
+         return visit_continue;
+
+      old_tess_level_inner_var = ir;
+      assert(ir->type->fields.array == glsl_type::float_type);
+
+      /* Clone the old var so that we inherit all of its properties */
+      new_tess_level_inner_var = ir->clone(ralloc_parent(ir), NULL);
+
+      /* And change the properties that we need to change */
+      new_tess_level_inner_var->name = ralloc_strdup(new_tess_level_inner_var,
+                                                "gl_TessLevelInnerMESA");
+      new_tess_level_inner_var->type = glsl_type::vec2_type;
+      new_tess_level_inner_var->data.max_array_access = 0;
+
+      ir->replace_with(new_tess_level_inner_var);
+   } else {
+      assert(0);
+   }
+
+   this->progress = true;
+
+   return visit_continue;
+}
+
+
+/**
+ * Determine whether the given rvalue describes an array of floats that
+ * needs to be lowered to a vec4; that is, determine whether it
+ * matches one of the following patterns:
+ *
+ * - gl_TessLevelOuter
+ * - gl_TessLevelInner
+ */
+bool
+lower_tess_level_visitor::is_tess_level_array(ir_rvalue *ir)
+{
+   if (!ir->type->is_array())
+      return false;
+   if (ir->type->fields.array != glsl_type::float_type)
+      return false;
+
+   if (this->old_tess_level_outer_var) {
+      if (ir->variable_referenced() == this->old_tess_level_outer_var)
+         return true;
+   }
+   if (this->old_tess_level_inner_var) {
+      if (ir->variable_referenced() == this->old_tess_level_inner_var)
+         return true;
+   }
+   return false;
+}
+
+
+/**
+ * If the given ir satisfies is_tess_level_array(), return new ir
+ * representing its lowered equivalent.  That is, map:
+ *
+ * - gl_TessLevelOuter => gl_TessLevelOuterMESA
+ * - gl_TessLevelInner => gl_TessLevelInnerMESA
+ *
+ * Otherwise return NULL.
+ */
+ir_rvalue *
+lower_tess_level_visitor::lower_tess_level_array(ir_rvalue *ir)
+{
+   if (!ir->type->is_array())
+      return NULL;
+   if (ir->type->fields.array != glsl_type::float_type)
+      return NULL;
+
+   ir_variable **new_var = NULL;
+
+   if (this->old_tess_level_outer_var) {
+      if (ir->variable_referenced() == this->old_tess_level_outer_var)
+         new_var = &this->new_tess_level_outer_var;
+   }
+   if (this->old_tess_level_inner_var) {
+      if (ir->variable_referenced() == this->old_tess_level_inner_var)
+         new_var = &this->new_tess_level_inner_var;
+   }
+
+   if (new_var == NULL)
+      return NULL;
+
+   assert(ir->as_dereference_variable());
+   return new(ralloc_parent(ir)) ir_dereference_variable(*new_var);
+}
+
+
+void
+lower_tess_level_visitor::handle_rvalue(ir_rvalue **rv)
+{
+   if (*rv == NULL)
+      return;
+
+   ir_dereference_array *const array_deref = (*rv)->as_dereference_array();
+   if (array_deref == NULL)
+      return;
+
+   /* Replace any expression that indexes one of the floats in gl_TessLevel*
+    * with an expression that indexes into one of the vec4's
+    * gl_TessLevel*MESA and accesses the appropriate component.
+    */
+   ir_rvalue *lowered_vec4 =
+      this->lower_tess_level_array(array_deref->array);
+   if (lowered_vec4 != NULL) {
+      this->progress = true;
+      void *mem_ctx = ralloc_parent(array_deref);
+
+      ir_expression *const expr =
+         new(mem_ctx) ir_expression(ir_binop_vector_extract,
+                                    lowered_vec4,
+                                    array_deref->array_index);
+
+      *rv = expr;
+   }
+}
+
+void
+lower_tess_level_visitor::fix_lhs(ir_assignment *ir)
+{
+   if (ir->lhs->ir_type != ir_type_expression)
+      return;
+   void *mem_ctx = ralloc_parent(ir);
+   ir_expression *const expr = (ir_expression *) ir->lhs;
+
+   /* The expression must be of the form:
+    *
+    *     (vector_extract gl_TessLevel*MESA, j).
+    */
+   assert(expr->operation == ir_binop_vector_extract);
+   assert(expr->operands[0]->ir_type == ir_type_dereference_variable);
+   assert((expr->operands[0]->type == glsl_type::vec4_type) ||
+          (expr->operands[0]->type == glsl_type::vec2_type));
+
+   ir_dereference *const new_lhs = (ir_dereference *) expr->operands[0];
+
+   ir_constant *old_index_constant = expr->operands[1]->constant_expression_value();
+   if (!old_index_constant) {
+      ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
+                                           expr->operands[0]->type,
+                                           new_lhs->clone(mem_ctx, NULL),
+                                           ir->rhs,
+                                           expr->operands[1]);
+   }
+   ir->set_lhs(new_lhs);
+
+   if (old_index_constant) {
+      /* gl_TessLevel* is being accessed via a constant index.  Don't bother
+       * creating a vector insert op. Just use a write mask.
+       */
+      ir->write_mask = 1 << old_index_constant->get_int_component(0);
+   } else {
+      ir->write_mask = (1 << expr->operands[0]->type->vector_elements) - 1;
+   }
+}
+
+/**
+ * Replace any assignment having a gl_TessLevel* (undereferenced) as
+ * its LHS or RHS with a sequence of assignments, one for each component of
+ * the array.  Each of these assignments is lowered to refer to
+ * gl_TessLevel*MESA as appropriate.
+ */
+ir_visitor_status
+lower_tess_level_visitor::visit_leave(ir_assignment *ir)
+{
+   /* First invoke the base class visitor.  This causes handle_rvalue() to be
+    * called on ir->rhs and ir->condition.
+    */
+   ir_rvalue_visitor::visit_leave(ir);
+
+   if (this->is_tess_level_array(ir->lhs) ||
+       this->is_tess_level_array(ir->rhs)) {
+      /* LHS or RHS of the assignment is the entire gl_TessLevel* array.
+       * Since we are
+       * reshaping gl_TessLevel* from an array of floats to a
+       * vec4, this isn't going to work as a bulk assignment anymore, so
+       * unroll it to element-by-element assignments and lower each of them.
+       *
+       * Note: to unroll into element-by-element assignments, we need to make
+       * clones of the LHS and RHS.  This is safe because expressions and
+       * l-values are side-effect free.
+       */
+      void *ctx = ralloc_parent(ir);
+      int array_size = ir->lhs->type->array_size();
+      for (int i = 0; i < array_size; ++i) {
+         ir_dereference_array *new_lhs = new(ctx) ir_dereference_array(
+            ir->lhs->clone(ctx, NULL), new(ctx) ir_constant(i));
+         ir_dereference_array *new_rhs = new(ctx) ir_dereference_array(
+            ir->rhs->clone(ctx, NULL), new(ctx) ir_constant(i));
+         this->handle_rvalue((ir_rvalue **) &new_rhs);
+
+         /* Handle the LHS after creating the new assignment.  This must
+          * happen in this order because handle_rvalue may replace the old LHS
+          * with an ir_expression of ir_binop_vector_extract.  Since this is
+          * not a valide l-value, this will cause an assertion in the
+          * ir_assignment constructor to fail.
+          *
+          * If this occurs, replace the mangled LHS with a dereference of the
+          * vector, and replace the RHS with an ir_triop_vector_insert.
+          */
+         ir_assignment *const assign = new(ctx) ir_assignment(new_lhs, new_rhs);
+         this->handle_rvalue((ir_rvalue **) &assign->lhs);
+         this->fix_lhs(assign);
+
+         this->base_ir->insert_before(assign);
+      }
+      ir->remove();
+
+      return visit_continue;
+   }
+
+   /* Handle the LHS as if it were an r-value.  Normally
+    * rvalue_visit(ir_assignment *) only visits the RHS, but we need to lower
+    * expressions in the LHS as well.
+    *
+    * This may cause the LHS to get replaced with an ir_expression of
+    * ir_binop_vector_extract.  If this occurs, replace it with a dereference
+    * of the vector, and replace the RHS with an ir_triop_vector_insert.
+    */
+   handle_rvalue((ir_rvalue **)&ir->lhs);
+   this->fix_lhs(ir);
+
+   return rvalue_visit(ir);
+}
+
+
+/**
+ * Set up base_ir properly and call visit_leave() on a newly created
+ * ir_assignment node.  This is used in cases where we have to insert an
+ * ir_assignment in a place where we know the hierarchical visitor won't see
+ * it.
+ */
+void
+lower_tess_level_visitor::visit_new_assignment(ir_assignment *ir)
+{
+   ir_instruction *old_base_ir = this->base_ir;
+   this->base_ir = ir;
+   ir->accept(this);
+   this->base_ir = old_base_ir;
+}
+
+
+/**
+ * If a gl_TessLevel* variable appears as an argument in an ir_call
+ * expression, replace it with a temporary variable, and make sure the ir_call
+ * is preceded and/or followed by assignments that copy the contents of the
+ * temporary variable to and/or from gl_TessLevel*.  Each of these
+ * assignments is then lowered to refer to gl_TessLevel*MESA.
+ */
+ir_visitor_status
+lower_tess_level_visitor::visit_leave(ir_call *ir)
+{
+   void *ctx = ralloc_parent(ir);
+
+   const exec_node *formal_param_node = ir->callee->parameters.head;
+   const exec_node *actual_param_node = ir->actual_parameters.head;
+   while (!actual_param_node->is_tail_sentinel()) {
+      ir_variable *formal_param = (ir_variable *) formal_param_node;
+      ir_rvalue *actual_param = (ir_rvalue *) actual_param_node;
+
+      /* Advance formal_param_node and actual_param_node now so that we can
+       * safely replace actual_param with another node, if necessary, below.
+       */
+      formal_param_node = formal_param_node->next;
+      actual_param_node = actual_param_node->next;
+
+      if (!this->is_tess_level_array(actual_param))
+         continue;
+
+      /* User is trying to pass a whole gl_TessLevel* array to a function
+       * call.  Since we are reshaping gl_TessLevel* from an array of floats
+       * to a vec4, this isn't going to work anymore, so use a temporary
+       * array instead.
+       */
+      ir_variable *temp = new(ctx) ir_variable(
+         actual_param->type, "temp_tess_level", ir_var_temporary);
+      this->base_ir->insert_before(temp);
+      actual_param->replace_with(
+         new(ctx) ir_dereference_variable(temp));
+      if (formal_param->data.mode == ir_var_function_in
+          || formal_param->data.mode == ir_var_function_inout) {
+         /* Copy from gl_TessLevel* to the temporary before the call.
+          * Since we are going to insert this copy before the current
+          * instruction, we need to visit it afterwards to make sure it
+          * gets lowered.
+          */
+         ir_assignment *new_assignment = new(ctx) ir_assignment(
+            new(ctx) ir_dereference_variable(temp),
+            actual_param->clone(ctx, NULL));
+         this->base_ir->insert_before(new_assignment);
+         this->visit_new_assignment(new_assignment);
+      }
+      if (formal_param->data.mode == ir_var_function_out
+          || formal_param->data.mode == ir_var_function_inout) {
+         /* Copy from the temporary to gl_TessLevel* after the call.
+          * Since visit_list_elements() has already decided which
+          * instruction it's going to visit next, we need to visit
+          * afterwards to make sure it gets lowered.
+          */
+         ir_assignment *new_assignment = new(ctx) ir_assignment(
+            actual_param->clone(ctx, NULL),
+            new(ctx) ir_dereference_variable(temp));
+         this->base_ir->insert_after(new_assignment);
+         this->visit_new_assignment(new_assignment);
+      }
+   }
+
+   return rvalue_visit(ir);
+}
+
+
+bool
+lower_tess_level(gl_shader *shader)
+{
+   if ((shader->Stage != MESA_SHADER_TESS_CTRL) &&
+       (shader->Stage != MESA_SHADER_TESS_EVAL))
+      return false;
+
+   lower_tess_level_visitor v(shader->Stage);
+
+   visit_list_elements(&v, shader->ir);
+
+   if (v.new_tess_level_outer_var)
+      shader->symbols->add_variable(v.new_tess_level_outer_var);
+   if (v.new_tess_level_inner_var)
+      shader->symbols->add_variable(v.new_tess_level_inner_var);
+
+   return v.progress;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_ubo_reference.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_ubo_reference.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_ubo_reference.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_ubo_reference.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -37,6 +37,7 @@
 #include "ir_builder.h"
 #include "ir_rvalue_visitor.h"
 #include "main/macros.h"
+#include "glsl_parser_extras.h"
 
 using namespace ir_builder;
 
@@ -139,17 +140,38 @@
    }
 
    void handle_rvalue(ir_rvalue **rvalue);
-   void emit_ubo_loads(ir_dereference *deref, ir_variable *base_offset,
-                       unsigned int deref_offset, bool row_major,
-                       int matrix_columns);
+   ir_visitor_status visit_enter(ir_assignment *ir);
+
+   void setup_for_load_or_store(ir_variable *var,
+                                ir_dereference *deref,
+                                ir_rvalue **offset,
+                                unsigned *const_offset,
+                                bool *row_major,
+                                int *matrix_columns);
    ir_expression *ubo_load(const struct glsl_type *type,
 			   ir_rvalue *offset);
+   ir_call *ssbo_load(const struct glsl_type *type,
+                      ir_rvalue *offset);
+
+   void check_for_ssbo_store(ir_assignment *ir);
+   void write_to_memory(ir_dereference *deref,
+                        ir_variable *var,
+                        ir_variable *write_var,
+                        unsigned write_mask);
+   ir_call *ssbo_store(ir_rvalue *deref, ir_rvalue *offset,
+                       unsigned write_mask);
+
+   void emit_access(bool is_write, ir_dereference *deref,
+                    ir_variable *base_offset, unsigned int deref_offset,
+                    bool row_major, int matrix_columns,
+                    unsigned write_mask);
 
    void *mem_ctx;
    struct gl_shader *shader;
    struct gl_uniform_buffer_variable *ubo_var;
    ir_rvalue *uniform_block;
    bool progress;
+   bool is_shader_storage;
 };
 
 /**
@@ -218,26 +240,20 @@
 }
 
 void
-lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
+lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
+                                                     ir_dereference *deref,
+                                                     ir_rvalue **offset,
+                                                     unsigned *const_offset,
+                                                     bool *row_major,
+                                                     int *matrix_columns)
 {
-   if (!*rvalue)
-      return;
-
-   ir_dereference *deref = (*rvalue)->as_dereference();
-   if (!deref)
-      return;
-
-   ir_variable *var = deref->variable_referenced();
-   if (!var || !var->is_in_uniform_block())
-      return;
-
-   mem_ctx = ralloc_parent(*rvalue);
-
+   /* Determine the name of the interface block */
    ir_rvalue *nonconst_block_index;
    const char *const field_name =
       interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
                            deref, &nonconst_block_index);
 
+   /* Locate the ubo block by interface name */
    this->uniform_block = NULL;
    for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
       if (strcmp(field_name, shader->UniformBlocks[i].Name) == 0) {
@@ -252,6 +268,8 @@
             this->uniform_block = index;
          }
 
+         this->is_shader_storage = shader->UniformBlocks[i].IsShaderStorage;
+
          struct gl_uniform_block *block = &shader->UniformBlocks[i];
 
          this->ubo_var = var->is_interface_instance()
@@ -263,10 +281,10 @@
 
    assert(this->uniform_block);
 
-   ir_rvalue *offset = new(mem_ctx) ir_constant(0u);
-   unsigned const_offset = 0;
-   bool row_major = is_dereferenced_thing_row_major(deref);
-   int matrix_columns = 1;
+   *offset = new(mem_ctx) ir_constant(0u);
+   *const_offset = 0;
+   *row_major = is_dereferenced_thing_row_major(deref);
+   *matrix_columns = 1;
 
    /* Calculate the offset to the start of the region of the UBO
     * dereferenced by *rvalue.  This may be a variable offset if an
@@ -275,76 +293,76 @@
    while (deref) {
       switch (deref->ir_type) {
       case ir_type_dereference_variable: {
-	 const_offset += ubo_var->Offset;
-	 deref = NULL;
-	 break;
+         *const_offset += ubo_var->Offset;
+         deref = NULL;
+         break;
       }
 
       case ir_type_dereference_array: {
-	 ir_dereference_array *deref_array = (ir_dereference_array *)deref;
-	 unsigned array_stride;
-	 if (deref_array->array->type->is_matrix() && row_major) {
-	    /* When loading a vector out of a row major matrix, the
-	     * step between the columns (vectors) is the size of a
-	     * float, while the step between the rows (elements of a
-	     * vector) is handled below in emit_ubo_loads.
-	     */
-	    array_stride = 4;
+         ir_dereference_array *deref_array = (ir_dereference_array *) deref;
+         unsigned array_stride;
+         if (deref_array->array->type->is_matrix() && *row_major) {
+            /* When loading a vector out of a row major matrix, the
+             * step between the columns (vectors) is the size of a
+             * float, while the step between the rows (elements of a
+             * vector) is handled below in emit_ubo_loads.
+             */
+            array_stride = 4;
             if (deref_array->array->type->is_double())
                array_stride *= 2;
-            matrix_columns = deref_array->array->type->matrix_columns;
+            *matrix_columns = deref_array->array->type->matrix_columns;
          } else if (deref_array->type->is_interface()) {
             /* We're processing an array dereference of an interface instance
-	     * array.  The thing being dereferenced *must* be a variable
-	     * dereference because intefaces cannot be embedded an other
-	     * types.  In terms of calculating the offsets for the lowering
-	     * pass, we don't care about the array index.  All elements of an
-	     * interface instance array will have the same offsets relative to
-	     * the base of the block that backs them.
+             * array. The thing being dereferenced *must* be a variable
+             * dereference because interfaces cannot be embedded in other
+             * types. In terms of calculating the offsets for the lowering
+             * pass, we don't care about the array index. All elements of an
+             * interface instance array will have the same offsets relative to
+             * the base of the block that backs them.
              */
             assert(deref_array->array->as_dereference_variable());
             deref = deref_array->array->as_dereference();
             break;
-	 } else {
+         } else {
             /* Whether or not the field is row-major (because it might be a
-             * bvec2 or something) does not affect the array itself.  We need
+             * bvec2 or something) does not affect the array itself. We need
              * to know whether an array element in its entirety is row-major.
              */
             const bool array_row_major =
                is_dereferenced_thing_row_major(deref_array);
 
-	    array_stride = deref_array->type->std140_size(array_row_major);
-	    array_stride = glsl_align(array_stride, 16);
-	 }
+            array_stride = deref_array->type->std140_size(array_row_major);
+            array_stride = glsl_align(array_stride, 16);
+         }
 
          ir_rvalue *array_index = deref_array->array_index;
          if (array_index->type->base_type == GLSL_TYPE_INT)
             array_index = i2u(array_index);
 
-	 ir_constant *const_index =
+         ir_constant *const_index =
             array_index->constant_expression_value(NULL);
-	 if (const_index) {
-	    const_offset += array_stride * const_index->value.u[0];
-	 } else {
-	    offset = add(offset,
-			 mul(array_index,
-			     new(mem_ctx) ir_constant(array_stride)));
-	 }
-	 deref = deref_array->array->as_dereference();
-	 break;
+         if (const_index) {
+            *const_offset += array_stride * const_index->value.u[0];
+         } else {
+            *offset = add(*offset,
+                          mul(array_index,
+                              new(mem_ctx) ir_constant(array_stride)));
+         }
+         deref = deref_array->array->as_dereference();
+         break;
       }
 
       case ir_type_dereference_record: {
-	 ir_dereference_record *deref_record = (ir_dereference_record *)deref;
-	 const glsl_type *struct_type = deref_record->record->type;
-	 unsigned intra_struct_offset = 0;
-
-	 for (unsigned int i = 0; i < struct_type->length; i++) {
-	    const glsl_type *type = struct_type->fields.structure[i].type;
-
-            ir_dereference_record *field_deref =
-               new(mem_ctx) ir_dereference_record(deref_record->record,
-                                                  struct_type->fields.structure[i].name);
+         ir_dereference_record *deref_record = (ir_dereference_record *) deref;
+         const glsl_type *struct_type = deref_record->record->type;
+         unsigned intra_struct_offset = 0;
+
+         for (unsigned int i = 0; i < struct_type->length; i++) {
+            const glsl_type *type = struct_type->fields.structure[i].type;
+
+            ir_dereference_record *field_deref = new(mem_ctx)
+               ir_dereference_record(deref_record->record,
+                                     struct_type->fields.structure[i].name);
             const bool field_row_major =
                is_dereferenced_thing_row_major(field_deref);
 
@@ -352,11 +370,12 @@
 
             unsigned field_align = type->std140_base_alignment(field_row_major);
 
-	    intra_struct_offset = glsl_align(intra_struct_offset, field_align);
+            intra_struct_offset = glsl_align(intra_struct_offset, field_align);
+
+            if (strcmp(struct_type->fields.structure[i].name,
+                       deref_record->field) == 0)
+               break;
 
-	    if (strcmp(struct_type->fields.structure[i].name,
-		       deref_record->field) == 0)
-	       break;
             intra_struct_offset += type->std140_size(field_row_major);
 
             /* If the field just examined was itself a structure, apply rule
@@ -371,19 +390,49 @@
                                                 field_align);
 
             }
-	 }
-
-	 const_offset += intra_struct_offset;
+         }
 
-	 deref = deref_record->record->as_dereference();
-	 break;
+         *const_offset += intra_struct_offset;
+         deref = deref_record->record->as_dereference();
+         break;
       }
+
       default:
-	 assert(!"not reached");
-	 deref = NULL;
-	 break;
+         assert(!"not reached");
+         deref = NULL;
+         break;
       }
    }
+}
+
+void
+lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+   if (!*rvalue)
+      return;
+
+   ir_dereference *deref = (*rvalue)->as_dereference();
+   if (!deref)
+      return;
+
+   ir_variable *var = deref->variable_referenced();
+   if (!var || !var->is_in_buffer_block())
+      return;
+
+   mem_ctx = ralloc_parent(shader->ir);
+
+   ir_rvalue *offset = NULL;
+   unsigned const_offset;
+   bool row_major;
+   int matrix_columns;
+
+   /* Compute the offset to the start if the dereference as well as other
+    * information we need to configure the write
+    */
+   setup_for_load_or_store(var, deref,
+                           &offset, &const_offset,
+                           &row_major, &matrix_columns);
+   assert(offset);
 
    /* Now that we've calculated the offset to the start of the
     * dereference, walk over the type and emit loads into a temporary.
@@ -401,7 +450,8 @@
    base_ir->insert_before(assign(load_offset, offset));
 
    deref = new(mem_ctx) ir_dereference_variable(load_var);
-   emit_ubo_loads(deref, load_offset, const_offset, row_major, matrix_columns);
+   emit_access(false, deref, load_offset, const_offset,
+               row_major, matrix_columns, 0);
    *rvalue = deref;
 
    progress = true;
@@ -420,74 +470,163 @@
 
 }
 
+static bool
+shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
+{
+   return state->ARB_shader_storage_buffer_object_enable;
+}
+
+ir_call *
+lower_ubo_reference_visitor::ssbo_store(ir_rvalue *deref,
+                                        ir_rvalue *offset,
+                                        unsigned write_mask)
+{
+   exec_list sig_params;
+
+   ir_variable *block_ref = new(mem_ctx)
+      ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
+   sig_params.push_tail(block_ref);
+
+   ir_variable *offset_ref = new(mem_ctx)
+      ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
+   sig_params.push_tail(offset_ref);
+
+   ir_variable *val_ref = new(mem_ctx)
+      ir_variable(deref->type, "value" , ir_var_function_in);
+   sig_params.push_tail(val_ref);
+
+   ir_variable *writemask_ref = new(mem_ctx)
+      ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
+   sig_params.push_tail(writemask_ref);
+
+   ir_function_signature *sig = new(mem_ctx)
+      ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
+   assert(sig);
+   sig->replace_parameters(&sig_params);
+   sig->is_intrinsic = true;
+
+   ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo");
+   f->add_signature(sig);
+
+   exec_list call_params;
+   call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
+   call_params.push_tail(offset->clone(mem_ctx, NULL));
+   call_params.push_tail(deref->clone(mem_ctx, NULL));
+   call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
+   return new(mem_ctx) ir_call(sig, NULL, &call_params);
+}
+
+ir_call *
+lower_ubo_reference_visitor::ssbo_load(const struct glsl_type *type,
+                                       ir_rvalue *offset)
+{
+   exec_list sig_params;
+
+   ir_variable *block_ref = new(mem_ctx)
+      ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
+   sig_params.push_tail(block_ref);
+
+   ir_variable *offset_ref = new(mem_ctx)
+      ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
+   sig_params.push_tail(offset_ref);
+
+   ir_function_signature *sig =
+      new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
+   assert(sig);
+   sig->replace_parameters(&sig_params);
+   sig->is_intrinsic = true;
+
+   ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
+   f->add_signature(sig);
+
+   ir_variable *result = new(mem_ctx)
+      ir_variable(type, "ssbo_load_result", ir_var_temporary);
+   base_ir->insert_before(result);
+   ir_dereference_variable *deref_result = new(mem_ctx)
+      ir_dereference_variable(result);
+
+   exec_list call_params;
+   call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
+   call_params.push_tail(offset->clone(mem_ctx, NULL));
+
+   return new(mem_ctx) ir_call(sig, deref_result, &call_params);
+}
+
+static inline int
+writemask_for_size(unsigned n)
+{
+   return ((1 << n) - 1);
+}
+
 /**
- * Takes LHS and emits a series of assignments into its components
- * from the UBO variable at variable_offset + deref_offset.
- *
- * Recursively calls itself to break the deref down to the point that
- * the ir_binop_ubo_load expressions generated are contiguous scalars
- * or vectors.
+ * Takes a deref and recursively calls itself to break the deref down to the
+ * point that the reads or writes generated are contiguous scalars or vectors.
  */
 void
-lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
-					    ir_variable *base_offset,
-                                            unsigned int deref_offset,
-                                            bool row_major,
-                                            int matrix_columns)
+lower_ubo_reference_visitor::emit_access(bool is_write,
+                                         ir_dereference *deref,
+                                         ir_variable *base_offset,
+                                         unsigned int deref_offset,
+                                         bool row_major,
+                                         int matrix_columns,
+                                         unsigned write_mask)
 {
    if (deref->type->is_record()) {
       unsigned int field_offset = 0;
 
       for (unsigned i = 0; i < deref->type->length; i++) {
-	 const struct glsl_struct_field *field =
-	    &deref->type->fields.structure[i];
-	 ir_dereference *field_deref =
-	    new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
-					       field->name);
+         const struct glsl_struct_field *field =
+            &deref->type->fields.structure[i];
+         ir_dereference *field_deref =
+            new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
+                                               field->name);
 
-	 field_offset =
-	    glsl_align(field_offset,
+         field_offset =
+            glsl_align(field_offset,
                        field->type->std140_base_alignment(row_major));
 
-	 emit_ubo_loads(field_deref, base_offset, deref_offset + field_offset,
-                        row_major, 1);
+         emit_access(is_write, field_deref, base_offset,
+                     deref_offset + field_offset,
+                     row_major, 1,
+                     writemask_for_size(field_deref->type->vector_elements));
 
-	 field_offset += field->type->std140_size(row_major);
+         field_offset += field->type->std140_size(row_major);
       }
       return;
    }
 
    if (deref->type->is_array()) {
       unsigned array_stride =
-	 glsl_align(deref->type->fields.array->std140_size(row_major),
-		    16);
+         glsl_align(deref->type->fields.array->std140_size(row_major), 16);
 
       for (unsigned i = 0; i < deref->type->length; i++) {
-	 ir_constant *element = new(mem_ctx) ir_constant(i);
-	 ir_dereference *element_deref =
-	    new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL),
-					      element);
-	 emit_ubo_loads(element_deref, base_offset,
-			deref_offset + i * array_stride,
-                        row_major, 1);
+         ir_constant *element = new(mem_ctx) ir_constant(i);
+         ir_dereference *element_deref =
+            new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL),
+                                              element);
+         emit_access(is_write, element_deref, base_offset,
+                     deref_offset + i * array_stride,
+                     row_major, 1,
+                     writemask_for_size(element_deref->type->vector_elements));
       }
       return;
    }
 
    if (deref->type->is_matrix()) {
       for (unsigned i = 0; i < deref->type->matrix_columns; i++) {
-	 ir_constant *col = new(mem_ctx) ir_constant(i);
-	 ir_dereference *col_deref =
-	    new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL),
-					      col);
+         ir_constant *col = new(mem_ctx) ir_constant(i);
+         ir_dereference *col_deref =
+            new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col);
 
          if (row_major) {
             /* For a row-major matrix, the next column starts at the next
              * element.
              */
             int size_mul = deref->type->is_double() ? 8 : 4;
-            emit_ubo_loads(col_deref, base_offset, deref_offset + i * size_mul,
-                           row_major, deref->type->matrix_columns);
+            emit_access(is_write, col_deref, base_offset,
+                        deref_offset + i * size_mul,
+                        row_major, deref->type->matrix_columns,
+                        writemask_for_size(col_deref->type->vector_elements));
          } else {
             /* std140 always rounds the stride of arrays (and matrices) to a
              * vec4, so matrices are always 16 between columns/rows. With
@@ -495,21 +634,33 @@
              */
             int size_mul = (deref->type->is_double() &&
                             deref->type->vector_elements > 2) ? 32 : 16;
-            emit_ubo_loads(col_deref, base_offset, deref_offset + i * size_mul,
-                           row_major, deref->type->matrix_columns);
+            emit_access(is_write, col_deref, base_offset,
+                        deref_offset + i * size_mul,
+                        row_major, deref->type->matrix_columns,
+                        writemask_for_size(col_deref->type->vector_elements));
          }
       }
       return;
    }
 
-   assert(deref->type->is_scalar() ||
-	  deref->type->is_vector());
+   assert(deref->type->is_scalar() || deref->type->is_vector());
 
    if (!row_major) {
-      ir_rvalue *offset = add(base_offset,
-			      new(mem_ctx) ir_constant(deref_offset));
-      base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
-				    ubo_load(deref->type, offset)));
+      ir_rvalue *offset =
+         add(base_offset, new(mem_ctx) ir_constant(deref_offset));
+      if (is_write)
+         base_ir->insert_after(ssbo_store(deref, offset, write_mask));
+      else {
+         if (!this->is_shader_storage) {
+             base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
+                                           ubo_load(deref->type, offset)));
+         } else {
+            ir_call *load_ssbo = ssbo_load(deref->type, offset);
+            base_ir->insert_before(load_ssbo);
+            ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
+            base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), value));
+         }
+      }
    } else {
       unsigned N = deref->type->is_double() ? 8 : 4;
 
@@ -527,22 +678,109 @@
       assert(matrix_columns <= 4);
       unsigned matrix_stride = glsl_align(matrix_columns * N, 16);
 
-      const glsl_type *ubo_type = deref->type->base_type == GLSL_TYPE_FLOAT ?
+      const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ?
          glsl_type::float_type : glsl_type::double_type;
 
       for (unsigned i = 0; i < deref->type->vector_elements; i++) {
-	 ir_rvalue *chan_offset =
-	    add(base_offset,
-		new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
-
-	 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
-				       ubo_load(ubo_type,
-						chan_offset),
-				       (1U << i)));
+         ir_rvalue *chan_offset =
+            add(base_offset,
+                new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
+         if (is_write) {
+            base_ir->insert_after(ssbo_store(swizzle(deref, i, 1), chan_offset, 1));
+         } else {
+            if (!this->is_shader_storage) {
+               base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
+                                             ubo_load(deref_type, chan_offset),
+                                             (1U << i)));
+            } else {
+               ir_call *load_ssbo = ssbo_load(deref_type, chan_offset);
+               base_ir->insert_before(load_ssbo);
+               ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
+               base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
+                                             value,
+                                             (1U << i)));
+            }
+         }
       }
    }
 }
 
+void
+lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
+                                             ir_variable *var,
+                                             ir_variable *write_var,
+                                             unsigned write_mask)
+{
+   ir_rvalue *offset = NULL;
+   unsigned const_offset;
+   bool row_major;
+   int matrix_columns;
+
+   /* Compute the offset to the start if the dereference as well as other
+    * information we need to configure the write
+    */
+   setup_for_load_or_store(var, deref,
+                           &offset, &const_offset,
+                           &row_major, &matrix_columns);
+   assert(offset);
+
+   /* Now emit writes from the temporary to memory */
+   ir_variable *write_offset =
+      new(mem_ctx) ir_variable(glsl_type::uint_type,
+                               "ssbo_store_temp_offset",
+                               ir_var_temporary);
+
+   base_ir->insert_before(write_offset);
+   base_ir->insert_before(assign(write_offset, offset));
+
+   deref = new(mem_ctx) ir_dereference_variable(write_var);
+   emit_access(true, deref, write_offset, const_offset,
+               row_major, matrix_columns, write_mask);
+}
+
+void
+lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
+{
+   if (!ir || !ir->lhs)
+      return;
+
+   ir_rvalue *rvalue = ir->lhs->as_rvalue();
+   if (!rvalue)
+      return;
+
+   ir_dereference *deref = ir->lhs->as_dereference();
+   if (!deref)
+      return;
+
+   ir_variable *var = ir->lhs->variable_referenced();
+   if (!var || !var->is_in_buffer_block())
+      return;
+
+   /* We have a write to a buffer variable, so declare a temporary and rewrite
+    * the assignment so that the temporary is the LHS.
+    */
+   mem_ctx = ralloc_parent(shader->ir);
+
+   const glsl_type *type = rvalue->type;
+   ir_variable *write_var = new(mem_ctx) ir_variable(type,
+                                                     "ssbo_store_temp",
+                                                     ir_var_temporary);
+   base_ir->insert_before(write_var);
+   ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
+
+   /* Now we have to write the value assigned to the temporary back to memory */
+   write_to_memory(deref, var, write_var, ir->write_mask);
+   progress = true;
+}
+
+
+ir_visitor_status
+lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
+{
+   check_for_ssbo_store(ir);
+   return rvalue_visit(ir);
+}
+
 } /* unnamed namespace */
 
 void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_variable_index_to_cond_assign.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_variable_index_to_cond_assign.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/lower_variable_index_to_cond_assign.cpp	2014-09-20 14:48:28.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/lower_variable_index_to_cond_assign.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -335,12 +335,14 @@
 
 class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor {
 public:
-   variable_index_to_cond_assign_visitor(bool lower_input,
-					 bool lower_output,
-					 bool lower_temp,
-					 bool lower_uniform)
+   variable_index_to_cond_assign_visitor(gl_shader_stage stage,
+                                         bool lower_input,
+                                         bool lower_output,
+                                         bool lower_temp,
+                                         bool lower_uniform)
    {
       this->progress = false;
+      this->stage = stage;
       this->lower_inputs = lower_input;
       this->lower_outputs = lower_output;
       this->lower_temps = lower_temp;
@@ -348,6 +350,8 @@
    }
 
    bool progress;
+
+   gl_shader_stage stage;
    bool lower_inputs;
    bool lower_outputs;
    bool lower_temps;
@@ -369,17 +373,44 @@
       case ir_var_auto:
       case ir_var_temporary:
 	 return this->lower_temps;
+
       case ir_var_uniform:
+      case ir_var_shader_storage:
 	 return this->lower_uniforms;
+
       case ir_var_function_in:
       case ir_var_const_in:
          return this->lower_temps;
+
       case ir_var_shader_in:
+         /* The input array size is unknown at compiler time for non-patch
+          * inputs in TCS and TES. The arrays are sized to
+          * the implementation-dependent limit "gl_MaxPatchVertices", but
+          * the real size is stored in the "gl_PatchVerticesIn" built-in
+          * uniform.
+          *
+          * The TCS input array size is specified by
+          * glPatchParameteri(GL_PATCH_VERTICES).
+          *
+          * The TES input array size is specified by the "vertices" output
+          * layout qualifier in TCS.
+          */
+         if ((stage == MESA_SHADER_TESS_CTRL ||
+              stage == MESA_SHADER_TESS_EVAL) && !var->data.patch)
+            return false;
          return this->lower_inputs;
+
       case ir_var_function_out:
+         /* TCS non-patch outputs can only be indexed with "gl_InvocationID".
+          * Other expressions are not allowed.
+          */
+         if (stage == MESA_SHADER_TESS_CTRL && !var->data.patch)
+            return false;
          return this->lower_temps;
+
       case ir_var_shader_out:
          return this->lower_outputs;
+
       case ir_var_function_inout:
 	 return this->lower_temps;
       }
@@ -522,16 +553,18 @@
 } /* anonymous namespace */
 
 bool
-lower_variable_index_to_cond_assign(exec_list *instructions,
-				    bool lower_input,
-				    bool lower_output,
-				    bool lower_temp,
-				    bool lower_uniform)
+lower_variable_index_to_cond_assign(gl_shader_stage stage,
+                                    exec_list *instructions,
+                                    bool lower_input,
+                                    bool lower_output,
+                                    bool lower_temp,
+                                    bool lower_uniform)
 {
-   variable_index_to_cond_assign_visitor v(lower_input,
-					   lower_output,
-					   lower_temp,
-					   lower_uniform);
+   variable_index_to_cond_assign_visitor v(stage,
+                                           lower_input,
+                                           lower_output,
+                                           lower_temp,
+                                           lower_uniform);
 
    /* Continue lowering until no progress is made.  If there are multiple
     * levels of indirection (e.g., non-constant indexing of array elements and
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/main.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/main.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/main.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/main.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -204,6 +204,8 @@
    }
 
    ctx->Const.GenerateTemporaryNames = true;
+   ctx->Const.MaxPatchVertices = 32;
+
    ctx->Driver.NewShader = _mesa_new_shader;
 }
 
@@ -273,10 +275,10 @@
 {
 
    const char *header =
-      "usage: %s [options] <file.vert | file.geom | file.frag>\n"
+      "usage: %s [options] <file.vert | file.tesc | file.tese | file.geom | file.frag | file.comp>\n"
       "\n"
       "Possible options are:\n";
-   printf(header, name, name);
+   printf(header, name);
    for (const struct option *o = compiler_opts; o->name != 0; ++o) {
       printf("    --%s\n", o->name);
    }
@@ -373,6 +375,10 @@
       const char *const ext = & argv[optind][len - 5];
       if (strncmp(".vert", ext, 5) == 0 || strncmp(".glsl", ext, 5) == 0)
 	 shader->Type = GL_VERTEX_SHADER;
+      else if (strncmp(".tesc", ext, 5) == 0)
+	 shader->Type = GL_TESS_CONTROL_SHADER;
+      else if (strncmp(".tese", ext, 5) == 0)
+	 shader->Type = GL_TESS_EVALUATION_SHADER;
       else if (strncmp(".geom", ext, 5) == 0)
 	 shader->Type = GL_GEOMETRY_SHADER;
       else if (strncmp(".frag", ext, 5) == 0)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -19,8 +19,6 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-AUTOMAKE_OPTIONS = subdir-objects
-
 AM_CPPFLAGS = \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src \
@@ -142,13 +140,16 @@
 	glsl_parser.cpp					\
 	glsl_parser.h					\
 	$(LIBGLSL_FILES)				\
-	$(NIR_FILES)
+	$(NIR_FILES)					\
+	$(NIR_GENERATED_FILES)
+
 
 libnir_la_SOURCES =					\
 	glsl_types.cpp					\
 	builtin_types.cpp				\
 	glsl_symbol_table.cpp				\
-	$(NIR_FILES)
+	$(NIR_FILES)					\
+	$(NIR_GENERATED_FILES)
 
 glsl_compiler_SOURCES = \
 	$(GLSL_COMPILER_CXX_FILES)
@@ -199,19 +200,23 @@
 am__v_YACC_0 = @echo "  YACC    " $@;
 am__v_YACC_1 =
 
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
+
 glsl_parser.cpp glsl_parser.h: glsl_parser.yy
-	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $<
+	$(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy
 
 glsl_lexer.cpp: glsl_lexer.ll
-	$(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+	$(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll
 
 glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
-	$(AM_V_at)$(MKDIR_P) glcpp
-	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $<
+	$(MKDIR_GEN)
+	$(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y
 
 glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
-	$(AM_V_at)$(MKDIR_P) glcpp
-	$(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+	$(MKDIR_GEN)
+	$(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l
 
 # Only the parsers (specifically the header files generated at the same time)
 # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
@@ -224,11 +229,7 @@
 	glsl_lexer.cpp					\
 	glcpp/glcpp-parse.c				\
 	glcpp/glcpp-lex.c				\
-	nir/nir_builder_opcodes.h				\
-	nir/nir_constant_expressions.c			\
-	nir/nir_opcodes.c				\
-	nir/nir_opcodes.h				\
-	nir/nir_opt_algebraic.c
+	$(NIR_GENERATED_FILES)
 CLEANFILES =						\
 	glcpp/glcpp-parse.h				\
 	glsl_parser.h					\
@@ -241,22 +242,24 @@
 	$(RM) glcpp/tests/*.out
 	$(RM) glcpp/tests/subtest*/*.out
 
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
 nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
-	$(MKDIR_P) nir;							\
-	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
 
-nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h
-	$(MKDIR_P) nir;							\
-	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@
+nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@
 
 nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
-	$(MKDIR_P) nir;							\
-	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@
 
 nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
-	$(MKDIR_P) nir;							\
-	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@
 
 nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
-	$(MKDIR_P) nir;							\
-	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/Makefile.sources	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/Makefile.sources	2015-09-16 14:36:09.000000000 +0000
@@ -33,6 +33,7 @@
 	nir/nir_lower_alu_to_scalar.c \
 	nir/nir_lower_atomics.c \
 	nir/nir_lower_global_vars_to_local.c \
+	nir/nir_lower_load_const_to_scalar.c \
 	nir/nir_lower_locals_to_regs.c \
 	nir/nir_lower_idiv.c \
 	nir/nir_lower_io.c \
@@ -55,6 +56,7 @@
 	nir/nir_opt_peephole_ffma.c \
 	nir/nir_opt_peephole_select.c \
 	nir/nir_opt_remove_phis.c \
+	nir/nir_opt_undef.c \
 	nir/nir_print.c \
 	nir/nir_remove_dead_variables.c \
 	nir/nir_search.c \
@@ -67,8 +69,7 @@
 	nir/nir_vla.h \
 	nir/nir_worklist.c \
 	nir/nir_worklist.h \
-	nir/nir_types.cpp \
-	$(NIR_GENERATED_FILES)
+	nir/nir_types.cpp
 
 # libglsl
 
@@ -154,6 +155,8 @@
 	lower_packed_varyings.cpp \
 	lower_named_interface_blocks.cpp \
 	lower_packing_builtins.cpp \
+	lower_subroutine.cpp \
+	lower_tess_level.cpp \
 	lower_texture_projection.cpp \
 	lower_variable_index_to_cond_assign.cpp \
 	lower_vec_index_to_cond_assign.cpp \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/glsl_to_nir.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/glsl_to_nir.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/glsl_to_nir.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/glsl_to_nir.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -65,6 +65,7 @@
    virtual void visit(ir_dereference_variable *);
    virtual void visit(ir_dereference_record *);
    virtual void visit(ir_dereference_array *);
+   virtual void visit(ir_barrier *);
 
    void create_function(ir_function *ir);
 
@@ -279,6 +280,9 @@
       var->data.mode = nir_var_uniform;
       break;
 
+   case ir_var_shader_storage:
+      var->data.mode = nir_var_shader_storage;
+      break;
 
    case ir_var_system_value:
       var->data.mode = nir_var_system_value;
@@ -370,6 +374,7 @@
       break;
 
    case nir_var_uniform:
+   case nir_var_shader_storage:
       exec_list_push_tail(&shader->uniforms, &var->node);
       break;
 
@@ -636,6 +641,8 @@
          op = nir_intrinsic_image_atomic_comp_swap;
       } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier") == 0) {
          op = nir_intrinsic_memory_barrier;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) {
+         op = nir_intrinsic_image_size;
       } else {
          unreachable("not reached");
       }
@@ -661,7 +668,8 @@
       case nir_intrinsic_image_atomic_or:
       case nir_intrinsic_image_atomic_xor:
       case nir_intrinsic_image_atomic_exchange:
-      case nir_intrinsic_image_atomic_comp_swap: {
+      case nir_intrinsic_image_atomic_comp_swap:
+      case nir_intrinsic_image_size: {
          nir_ssa_undef_instr *instr_undef =
             nir_ssa_undef_instr_create(shader, 1);
          nir_instr_insert_after_cf_list(this->cf_node_list,
@@ -676,6 +684,17 @@
          instr->variables[0] = evaluate_deref(&instr->instr, image);
          param = param->get_next();
 
+         /* Set the intrinsic destination. */
+         if (ir->return_deref) {
+            const nir_intrinsic_info *info =
+                    &nir_intrinsic_infos[instr->intrinsic];
+            nir_ssa_dest_init(&instr->instr, &instr->dest,
+                              info->dest_components, NULL);
+         }
+
+         if (op == nir_intrinsic_image_size)
+            break;
+
          /* Set the address argument, extending the coordinate vector to four
           * components.
           */
@@ -716,11 +735,6 @@
             instr->src[3] = evaluate_rvalue((ir_dereference *)param);
             param = param->get_next();
          }
-
-         /* Set the intrinsic destination. */
-         if (ir->return_deref)
-            nir_ssa_dest_init(&instr->instr, &instr->dest,
-                              ir->return_deref->type->vector_elements, NULL);
          break;
       }
       case nir_intrinsic_memory_barrier:
@@ -930,13 +944,9 @@
    }
 
    nir_dest *dest = get_instr_dest(this->result);
-
    assert(dest->is_ssa);
-   nir_src src = NIR_SRC_INIT;
-   src.is_ssa = true;
-   src.ssa = &dest->ssa;
 
-   return src;
+   return nir_src_for_ssa(&dest->ssa);
 }
 
 nir_alu_instr *
@@ -1170,6 +1180,7 @@
    case ir_unop_bitcast_f2i:
    case ir_unop_bitcast_u2f:
    case ir_unop_bitcast_f2u:
+   case ir_unop_subroutine_to_int:
       /* no-op */
       emit(nir_op_imov, dest_size, srcs);
       break;
@@ -1893,3 +1904,11 @@
    ralloc_steal(this->deref_tail, deref);
    this->deref_tail = &deref->deref;
 }
+
+void
+nir_visitor::visit(ir_barrier *ir)
+{
+   nir_intrinsic_instr *instr =
+      nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier);
+   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir.c	2015-09-16 14:36:09.000000000 +0000
@@ -57,7 +57,6 @@
 {
    nir_register *reg = ralloc(mem_ctx, nir_register);
 
-   reg->parent_instr = NULL;
    list_inithead(&reg->uses);
    list_inithead(&reg->defs);
    list_inithead(&reg->if_uses);
@@ -148,18 +147,18 @@
 
 void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
 {
-   dest->is_ssa = src->is_ssa;
-   if (src->is_ssa) {
-      dest->ssa = src->ssa;
+   /* Copying an SSA definition makes no sense whatsoever. */
+   assert(!src->is_ssa);
+
+   dest->is_ssa = false;
+
+   dest->reg.base_offset = src->reg.base_offset;
+   dest->reg.reg = src->reg.reg;
+   if (src->reg.indirect) {
+      dest->reg.indirect = ralloc(mem_ctx, nir_src);
+      nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
    } else {
-      dest->reg.base_offset = src->reg.base_offset;
-      dest->reg.reg = src->reg.reg;
-      if (src->reg.indirect) {
-         dest->reg.indirect = ralloc(mem_ctx, nir_src);
-         nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
-      } else {
-         dest->reg.indirect = NULL;
-      }
+      dest->reg.indirect = NULL;
    }
 }
 
@@ -451,7 +450,7 @@
 nir_tex_instr *
 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
 {
-   nir_tex_instr *instr = ralloc(shader, nir_tex_instr);
+   nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
    instr_init(&instr->instr, nir_instr_type_tex);
 
    dest_init(&instr->dest);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_constant_expressions.py mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_constant_expressions.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_constant_expressions.py	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_constant_expressions.py	2015-09-16 14:36:09.000000000 +0000
@@ -31,12 +31,7 @@
 #include "util/rounding.h" /* for _mesa_roundeven */
 #include "nir_constant_expressions.h"
 
-#if defined(_MSC_VER) && (_MSC_VER < 1800)
-static int isnormal(double x)
-{
-   return _fpclass(x) == _FPCLASS_NN || _fpclass(x) == _FPCLASS_PN;
-}
-#elif defined(__SUNPRO_CC)
+#if defined(__SUNPRO_CC)
 #include <ieeefp.h>
 static int isnormal(double x)
 {
@@ -44,13 +39,6 @@
 }
 #endif
 
-#if defined(_MSC_VER)
-static double copysign(double x, double y)
-{
-   return _copysign(x, y);
-}
-#endif
-
 /**
  * Evaluate one component of packSnorm4x8.
  */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_from_ssa.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_from_ssa.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_from_ssa.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_from_ssa.c	2015-09-16 14:36:09.000000000 +0000
@@ -37,6 +37,7 @@
 struct from_ssa_state {
    void *mem_ctx;
    void *dead_ctx;
+   bool phi_webs_only;
    struct hash_table *merge_node_table;
    nir_instr *instr;
    nir_function_impl *impl;
@@ -482,6 +483,9 @@
 
       reg = node->set->reg;
    } else {
+      if (state->phi_webs_only)
+         return true;
+
       /* We leave load_const SSA values alone.  They act as immediates to
        * the backend.  If it got coalesced into a phi, that's ok.
        */
@@ -492,21 +496,20 @@
       reg->name = def->name;
       reg->num_components = def->num_components;
       reg->num_array_elems = 0;
-
-      /* This register comes from an SSA definition that is defined and not
-       * part of a phi-web.  Therefore, we know it has a single unique
-       * definition that dominates all of its uses; we can copy the
-       * parent_instr from the SSA def safely.
-       */
-      if (def->parent_instr->type != nir_instr_type_ssa_undef)
-         reg->parent_instr = def->parent_instr;
    }
 
    nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg), state->mem_ctx);
    assert(list_empty(&def->uses) && list_empty(&def->if_uses));
 
-   if (def->parent_instr->type == nir_instr_type_ssa_undef)
+   if (def->parent_instr->type == nir_instr_type_ssa_undef) {
+      /* If it's an ssa_undef instruction, remove it since we know we just got
+       * rid of all its uses.
+       */
+      nir_instr *parent_instr = def->parent_instr;
+      nir_instr_remove(parent_instr);
+      ralloc_steal(state->dead_ctx, parent_instr);
       return true;
+   }
 
    assert(def->parent_instr->type != nir_instr_type_load_const);
 
@@ -523,7 +526,7 @@
 }
 
 /* Resolves ssa definitions to registers.  While we're at it, we also
- * remove phi nodes and ssa_undef instructions
+ * remove phi nodes.
  */
 static bool
 resolve_registers_block(nir_block *block, void *void_state)
@@ -534,8 +537,7 @@
       state->instr = instr;
       nir_foreach_ssa_def(instr, rewrite_ssa_def, state);
 
-      if (instr->type == nir_instr_type_ssa_undef ||
-          instr->type == nir_instr_type_phi) {
+      if (instr->type == nir_instr_type_phi) {
          nir_instr_remove(instr);
          ralloc_steal(state->dead_ctx, instr);
       }
@@ -765,13 +767,14 @@
 }
 
 static void
-nir_convert_from_ssa_impl(nir_function_impl *impl)
+nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
 {
    struct from_ssa_state state;
 
    state.mem_ctx = ralloc_parent(impl);
    state.dead_ctx = ralloc_context(NULL);
    state.impl = impl;
+   state.phi_webs_only = phi_webs_only;
    state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                                     _mesa_key_pointer_equal);
 
@@ -801,10 +804,10 @@
 }
 
 void
-nir_convert_from_ssa(nir_shader *shader)
+nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only)
 {
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         nir_convert_from_ssa_impl(overload->impl);
+         nir_convert_from_ssa_impl(overload->impl, phi_webs_only);
    }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir.h	2015-09-16 14:36:09.000000000 +0000
@@ -87,6 +87,7 @@
    nir_var_global,
    nir_var_local,
    nir_var_uniform,
+   nir_var_shader_storage,
    nir_var_system_value
 } nir_variable_mode;
 
@@ -389,14 +390,6 @@
     */
    bool is_packed;
 
-   /**
-    * If this pointer is non-NULL then this register has exactly one
-    * definition and that definition dominates all of its uses.  This is
-    * set by the out-of-SSA pass so that backends can get SSA-like
-    * information even once they have gone out of SSA.
-    */
-   struct nir_instr *parent_instr;
-
    /** set of nir_instr's where this register is used (read from) */
    struct list_head uses;
 
@@ -450,6 +443,18 @@
       return exec_node_data(nir_instr, prev, node);
 }
 
+static inline bool
+nir_instr_is_first(nir_instr *instr)
+{
+   return exec_node_is_head_sentinel(exec_node_get_prev(&instr->node));
+}
+
+static inline bool
+nir_instr_is_last(nir_instr *instr)
+{
+   return exec_node_is_tail_sentinel(exec_node_get_next(&instr->node));
+}
+
 typedef struct {
    /** for debugging only, can be NULL */
    const char* name;
@@ -565,16 +570,6 @@
    return src;
 }
 
-static inline nir_instr *
-nir_src_get_parent_instr(const nir_src *src)
-{
-   if (src->is_ssa) {
-      return src->ssa->parent_instr;
-   } else {
-      return src->reg.reg->parent_instr;
-   }
-}
-
 static inline nir_dest
 nir_dest_for_reg(nir_register *reg)
 {
@@ -1238,6 +1233,8 @@
    foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
 #define nir_foreach_instr_safe(block, instr) \
    foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
+#define nir_foreach_instr_safe_reverse(block, instr) \
+   foreach_list_typed_safe_reverse(nir_instr, instr, node, &(block)->instr_list)
 
 typedef struct nir_if {
    nir_cf_node cf_node;
@@ -1640,14 +1637,16 @@
 
 void nir_lower_locals_to_regs(nir_shader *shader);
 
-void nir_assign_var_locations_scalar(struct exec_list *var_list,
-                                     unsigned *size);
-void nir_assign_var_locations_scalar_direct_first(nir_shader *shader,
-                                                  struct exec_list *var_list,
-                                                  unsigned *direct_size,
-                                                  unsigned *size);
+void nir_assign_var_locations(struct exec_list *var_list,
+                              unsigned *size,
+                              bool is_scalar);
+void nir_assign_var_locations_direct_first(nir_shader *shader,
+                                           struct exec_list *var_list,
+                                           unsigned *direct_size,
+                                           unsigned *size,
+                                           bool is_scalar);
 
-void nir_lower_io(nir_shader *shader);
+void nir_lower_io(nir_shader *shader, bool is_scalar);
 
 void nir_lower_vars_to_ssa(nir_shader *shader);
 
@@ -1655,6 +1654,7 @@
 
 void nir_lower_vec_to_movs(nir_shader *shader);
 void nir_lower_alu_to_scalar(nir_shader *shader);
+void nir_lower_load_const_to_scalar(nir_shader *shader);
 
 void nir_lower_phis_to_scalar(nir_shader *shader);
 
@@ -1676,7 +1676,12 @@
 
 void nir_convert_to_ssa_impl(nir_function_impl *impl);
 void nir_convert_to_ssa(nir_shader *shader);
-void nir_convert_from_ssa(nir_shader *shader);
+
+/* If phi_webs_only is true, only convert SSA values involved in phi nodes to
+ * registers.  If false, convert all values (even those not involved in a phi
+ * node) to registers.
+ */
+void nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only);
 
 bool nir_opt_algebraic(nir_shader *shader);
 bool nir_opt_algebraic_late(nir_shader *shader);
@@ -1699,6 +1704,8 @@
 
 bool nir_opt_remove_phis(nir_shader *shader);
 
+bool nir_opt_undef(nir_shader *shader);
+
 void nir_sweep(nir_shader *shader);
 
 #ifdef __cplusplus
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_intrinsics.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_intrinsics.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_intrinsics.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_intrinsics.h	2015-09-16 14:36:09.000000000 +0000
@@ -67,6 +67,7 @@
  */
 #define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0)
 
+BARRIER(barrier)
 BARRIER(discard)
 
 /*
@@ -122,6 +123,8 @@
 INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
 INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0)
 INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0)
+INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0,
+          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 
 #define SYSTEM_VALUE(name, components) \
    INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \
@@ -138,12 +141,11 @@
 SYSTEM_VALUE(invocation_id, 1)
 
 /*
- * The first index is the address to load from, and the second index is the
- * number of array elements to load.  Indirect loads have an additional
- * register input, which is added to the constant address to compute the
- * final address to load from.  For UBO's (and SSBO's), the first source is
- * the (possibly constant) UBO buffer index and the indirect (if it exists)
- * is the second source.
+ * The first and only index is the base address to load from.  Indirect
+ * loads have an additional register input, which is added to the constant
+ * address to compute the final address to load from.  For UBO's (and
+ * SSBO's), the first source is the (possibly constant) UBO buffer index
+ * and the indirect (if it exists) is the second source.
  *
  * For vector backends, the address is in terms of one vec4, and so each array
  * element is +4 scalar components from the previous array element. For scalar
@@ -152,9 +154,9 @@
  */
 
 #define LOAD(name, extra_srcs, flags) \
-   INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \
+   INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 1, flags) \
    INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
-             true, 0, 0, 2, flags)
+             true, 0, 0, 1, flags)
 
 LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
@@ -172,7 +174,7 @@
    INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
              num_indices, flags) \
 
-STORE(output, 2, 0)
-/* STORE(ssbo, 3, 0) */
+STORE(output, 1, 0)
+/* STORE(ssbo, 2, 0) */
 
 LAST_INTRINSIC(store_output_indirect)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_lower_alu_to_scalar.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_lower_alu_to_scalar.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_lower_alu_to_scalar.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_lower_alu_to_scalar.c	2015-09-16 14:36:09.000000000 +0000
@@ -100,6 +100,21 @@
        */
       return;
 
+   case nir_op_unpack_unorm_4x8:
+   case nir_op_unpack_snorm_4x8:
+   case nir_op_unpack_unorm_2x16:
+   case nir_op_unpack_snorm_2x16:
+      /* There is no scalar version of these ops, unless we were to break it
+       * down to bitshifts and math (which is definitely not intended).
+       */
+      return;
+
+   case nir_op_unpack_half_2x16:
+      /* We could split this into unpack_half_2x16_split_[xy], but should
+       * we?
+       */
+      return;
+
       LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
       LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
       LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
@@ -164,7 +179,7 @@
 {
    nir_foreach_instr_safe(block, instr) {
       if (instr->type == nir_instr_type_alu)
-         lower_alu_instr_scalar((nir_alu_instr *)instr, data);
+         lower_alu_instr_scalar(nir_instr_as_alu(instr), data);
    }
 
    return true;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_lower_atomics.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_lower_atomics.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_lower_atomics.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_lower_atomics.c	2015-09-16 14:36:09.000000000 +0000
@@ -55,7 +55,8 @@
       return;
    }
 
-   if (instr->variables[0]->var->data.mode != nir_var_uniform)
+   if (instr->variables[0]->var->data.mode != nir_var_uniform &&
+       instr->variables[0]->var->data.mode != nir_var_shader_storage)
       return; /* atomics passed as function arguments can't be lowered */
 
    void *mem_ctx = ralloc_parent(instr);
@@ -109,7 +110,7 @@
    }
 
    new_instr->src[0].is_ssa = true;
-   new_instr->src[0].ssa = offset_def;;
+   new_instr->src[0].ssa = offset_def;
 
    if (instr->dest.is_ssa) {
       nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_lower_io.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_lower_io.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_lower_io.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_lower_io.c	2015-09-16 14:36:09.000000000 +0000
@@ -29,19 +29,60 @@
 /*
  * This lowering pass converts references to input/output variables with
  * loads/stores to actual input/output intrinsics.
- *
- * NOTE: This pass really only works for scalar backends at the moment due
- * to the way it packes the input/output data.
  */
 
 #include "nir.h"
+#include "nir_builder.h"
 
 struct lower_io_state {
+   nir_builder builder;
    void *mem_ctx;
+   bool is_scalar;
 };
 
+static int
+type_size_vec4(const struct glsl_type *type)
+{
+   unsigned int i;
+   int size;
+
+   switch (glsl_get_base_type(type)) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      if (glsl_type_is_matrix(type)) {
+         return glsl_get_matrix_columns(type);
+      } else {
+         return 1;
+      }
+   case GLSL_TYPE_ARRAY:
+      return type_size_vec4(glsl_get_array_element(type)) * glsl_get_length(type);
+   case GLSL_TYPE_STRUCT:
+      size = 0;
+      for (i = 0; i <  glsl_get_length(type); i++) {
+         size += type_size_vec4(glsl_get_struct_field(type, i));
+      }
+      return size;
+   case GLSL_TYPE_SUBROUTINE:
+      return 1;
+   case GLSL_TYPE_SAMPLER:
+      return 0;
+   case GLSL_TYPE_ATOMIC_UINT:
+      return 0;
+   case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_ERROR:
+   case GLSL_TYPE_INTERFACE:
+      unreachable("not reached");
+   }
+
+   return 0;
+}
+
 static unsigned
-type_size(const struct glsl_type *type)
+type_size_scalar(const struct glsl_type *type)
 {
    unsigned int size, i;
 
@@ -52,13 +93,15 @@
    case GLSL_TYPE_BOOL:
       return glsl_get_components(type);
    case GLSL_TYPE_ARRAY:
-      return type_size(glsl_get_array_element(type)) * glsl_get_length(type);
+      return type_size_scalar(glsl_get_array_element(type)) * glsl_get_length(type);
    case GLSL_TYPE_STRUCT:
       size = 0;
       for (i = 0; i < glsl_get_length(type); i++) {
-         size += type_size(glsl_get_struct_field(type, i));
+         size += type_size_scalar(glsl_get_struct_field(type, i));
       }
       return size;
+   case GLSL_TYPE_SUBROUTINE:
+      return 1;
    case GLSL_TYPE_SAMPLER:
       return 0;
    case GLSL_TYPE_ATOMIC_UINT:
@@ -76,8 +119,17 @@
    return 0;
 }
 
+static unsigned
+type_size(const struct glsl_type *type, bool is_scalar)
+{
+   if (is_scalar)
+      return type_size_scalar(type);
+   else
+      return type_size_vec4(type);
+}
+
 void
-nir_assign_var_locations_scalar(struct exec_list *var_list, unsigned *size)
+nir_assign_var_locations(struct exec_list *var_list, unsigned *size, bool is_scalar)
 {
    unsigned location = 0;
 
@@ -86,11 +138,12 @@
        * UBO's have their own address spaces, so don't count them towards the
        * number of global uniforms
        */
-      if (var->data.mode == nir_var_uniform && var->interface_type != NULL)
+      if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) &&
+          var->interface_type != NULL)
          continue;
 
       var->data.driver_location = location;
-      location += type_size(var->type);
+      location += type_size(var->type, is_scalar);
    }
 
    *size = location;
@@ -136,10 +189,11 @@
  * assigns locations to variables that are used indirectly.
  */
 void
-nir_assign_var_locations_scalar_direct_first(nir_shader *shader,
-                                             struct exec_list *var_list,
-                                             unsigned *direct_size,
-                                             unsigned *size)
+nir_assign_var_locations_direct_first(nir_shader *shader,
+                                      struct exec_list *var_list,
+                                      unsigned *direct_size,
+                                      unsigned *size,
+                                      bool is_scalar)
 {
    struct set *indirect_set = _mesa_set_create(NULL, _mesa_hash_pointer,
                                                _mesa_key_pointer_equal);
@@ -153,27 +207,29 @@
    unsigned location = 0;
 
    foreach_list_typed(nir_variable, var, node, var_list) {
-      if (var->data.mode == nir_var_uniform && var->interface_type != NULL)
+      if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) &&
+          var->interface_type != NULL)
          continue;
 
       if (_mesa_set_search(indirect_set, var))
          continue;
 
       var->data.driver_location = location;
-      location += type_size(var->type);
+      location += type_size(var->type, is_scalar);
    }
 
    *direct_size = location;
 
    foreach_list_typed(nir_variable, var, node, var_list) {
-      if (var->data.mode == nir_var_uniform && var->interface_type != NULL)
+      if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) &&
+          var->interface_type != NULL)
          continue;
 
       if (!_mesa_set_search(indirect_set, var))
          continue;
 
       var->data.driver_location = location;
-      location += type_size(var->type);
+      location += type_size(var->type, is_scalar);
    }
 
    *size = location;
@@ -188,6 +244,9 @@
    bool found_indirect = false;
    unsigned base_offset = 0;
 
+   nir_builder *b = &state->builder;
+   nir_builder_insert_before_instr(b, instr);
+
    nir_deref *tail = &deref->deref;
    while (tail->child != NULL) {
       const struct glsl_type *parent_type = tail->type;
@@ -195,55 +254,55 @@
 
       if (tail->deref_type == nir_deref_type_array) {
          nir_deref_array *deref_array = nir_deref_as_array(tail);
-         unsigned size = type_size(tail->type);
+         unsigned size = type_size(tail->type, state->is_scalar);
 
          base_offset += size * deref_array->base_offset;
 
          if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
-            nir_load_const_instr *load_const =
-               nir_load_const_instr_create(state->mem_ctx, 1);
-            load_const->value.u[0] = size;
-            nir_instr_insert_before(instr, &load_const->instr);
-
-            nir_alu_instr *mul = nir_alu_instr_create(state->mem_ctx,
-                                                      nir_op_imul);
-            mul->src[0].src.is_ssa = true;
-            mul->src[0].src.ssa = &load_const->def;
-            nir_src_copy(&mul->src[1].src, &deref_array->indirect,
-                         state->mem_ctx);
-            mul->dest.write_mask = 1;
-            nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
-            nir_instr_insert_before(instr, &mul->instr);
+            nir_ssa_def *mul =
+               nir_imul(b, nir_imm_int(b, size),
+                        nir_ssa_for_src(b, deref_array->indirect, 1));
 
             if (found_indirect) {
-               nir_alu_instr *add = nir_alu_instr_create(state->mem_ctx,
-                                                         nir_op_iadd);
-               add->src[0].src = *indirect;
-               add->src[1].src.is_ssa = true;
-               add->src[1].src.ssa = &mul->dest.dest.ssa;
-               add->dest.write_mask = 1;
-               nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
-               nir_instr_insert_before(instr, &add->instr);
-
-               indirect->is_ssa = true;
-               indirect->ssa = &add->dest.dest.ssa;
+               indirect->ssa =
+                  nir_iadd(b, nir_ssa_for_src(b, *indirect, 1), mul);
             } else {
-               indirect->is_ssa = true;
-               indirect->ssa = &mul->dest.dest.ssa;
-               found_indirect = true;
+               indirect->ssa = mul;
             }
+            indirect->is_ssa = true;
+            found_indirect = true;
          }
       } else if (tail->deref_type == nir_deref_type_struct) {
          nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
 
          for (unsigned i = 0; i < deref_struct->index; i++)
-            base_offset += type_size(glsl_get_struct_field(parent_type, i));
+            base_offset += type_size(glsl_get_struct_field(parent_type, i),
+                                     state->is_scalar);
       }
    }
 
    return base_offset;
 }
 
+static nir_intrinsic_op
+load_op(nir_variable_mode mode, bool has_indirect)
+{
+   nir_intrinsic_op op;
+   switch (mode) {
+   case nir_var_shader_in:
+      op = has_indirect ? nir_intrinsic_load_input_indirect :
+                          nir_intrinsic_load_input;
+      break;
+   case nir_var_uniform:
+      op = has_indirect ? nir_intrinsic_load_uniform_indirect :
+                          nir_intrinsic_load_uniform;
+      break;
+   default:
+      unreachable("Unknown variable mode");
+   }
+   return op;
+}
+
 static bool
 nir_lower_io_block(nir_block *block, void *void_state)
 {
@@ -263,23 +322,9 @@
 
          bool has_indirect = deref_has_indirect(intrin->variables[0]);
 
-         /* Figure out the opcode */
-         nir_intrinsic_op load_op;
-         switch (mode) {
-         case nir_var_shader_in:
-            load_op = has_indirect ? nir_intrinsic_load_input_indirect :
-                                     nir_intrinsic_load_input;
-            break;
-         case nir_var_uniform:
-            load_op = has_indirect ? nir_intrinsic_load_uniform_indirect :
-                                     nir_intrinsic_load_uniform;
-            break;
-         default:
-            unreachable("Unknown variable mode");
-         }
-
-         nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx,
-                                                                load_op);
+         nir_intrinsic_instr *load =
+            nir_intrinsic_instr_create(state->mem_ctx,
+                                       load_op(mode, has_indirect));
          load->num_components = intrin->num_components;
 
          nir_src indirect;
@@ -288,7 +333,6 @@
          offset += intrin->variables[0]->var->data.driver_location;
 
          load->const_index[0] = offset;
-         load->const_index[1] = 1;
 
          if (has_indirect)
             load->src[0] = indirect;
@@ -331,7 +375,6 @@
          offset += intrin->variables[0]->var->data.driver_location;
 
          store->const_index[0] = offset;
-         store->const_index[1] = 1;
 
          nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
 
@@ -352,11 +395,13 @@
 }
 
 static void
-nir_lower_io_impl(nir_function_impl *impl)
+nir_lower_io_impl(nir_function_impl *impl, bool is_scalar)
 {
    struct lower_io_state state;
 
+   nir_builder_init(&state.builder, impl);
    state.mem_ctx = ralloc_parent(impl);
+   state.is_scalar = is_scalar;
 
    nir_foreach_block(impl, nir_lower_io_block, &state);
 
@@ -365,10 +410,10 @@
 }
 
 void
-nir_lower_io(nir_shader *shader)
+nir_lower_io(nir_shader *shader, bool is_scalar)
 {
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         nir_lower_io_impl(overload->impl);
+         nir_lower_io_impl(overload->impl, is_scalar);
    }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_lower_load_const_to_scalar.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_lower_load_const_to_scalar.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_lower_load_const_to_scalar.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_lower_load_const_to_scalar.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,103 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/macros.h"
+#include "nir.h"
+#include "nir_builder.h"
+
+/** @file nir_lower_load_const_to_scalar.c
+ *
+ * Replaces vector nir_load_const instructions with a series of loads and a
+ * vec[234] to reconstruct the original vector (on the assumption that
+ * nir_lower_alu_to_scalar() will then be used to split it up).
+ *
+ * This gives NIR a chance to CSE more operations on a scalar shader, when the
+ * same value was used in different vector contant loads.
+ */
+
+static void
+lower_load_const_instr_scalar(nir_load_const_instr *lower)
+{
+   if (lower->def.num_components == 1)
+      return;
+
+   nir_builder b;
+   nir_builder_init(&b, nir_cf_node_get_function(&lower->instr.block->cf_node));
+   nir_builder_insert_before_instr(&b, &lower->instr);
+
+   /* Emit the individual loads. */
+   nir_ssa_def *loads[4];
+   for (unsigned i = 0; i < lower->def.num_components; i++) {
+      nir_load_const_instr *load_comp = nir_load_const_instr_create(b.shader, 1);
+      load_comp->value.u[0] = lower->value.u[i];
+      nir_builder_instr_insert(&b, &load_comp->instr);
+      loads[i] = &load_comp->def;
+   }
+
+   /* Batch things back together into a vector. */
+   nir_ssa_def *vec;
+   switch (lower->def.num_components) {
+   case 2:
+      vec = nir_vec2(&b, loads[0], loads[1]);
+      break;
+   case 3:
+      vec = nir_vec3(&b, loads[0], loads[1], loads[2]);
+      break;
+   case 4:
+      vec = nir_vec4(&b, loads[0], loads[1], loads[2], loads[3]);
+      break;
+   default:
+      unreachable("Unknown load_const component count.");
+   }
+
+   /* Replace the old load with a reference to our reconstructed vector. */
+   nir_ssa_def_rewrite_uses(&lower->def, nir_src_for_ssa(vec),
+                            ralloc_parent(b.impl));
+   nir_instr_remove(&lower->instr);
+}
+
+static bool
+lower_load_const_to_scalar_block(nir_block *block, void *data)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_load_const)
+         lower_load_const_instr_scalar(nir_instr_as_load_const(instr));
+   }
+
+   return true;
+}
+
+static void
+nir_lower_load_const_to_scalar_impl(nir_function_impl *impl)
+{
+   nir_foreach_block(impl, lower_load_const_to_scalar_block, NULL);
+}
+
+void
+nir_lower_load_const_to_scalar(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_load_const_to_scalar_impl(overload->impl);
+   }
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_lower_phis_to_scalar.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_lower_phis_to_scalar.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_lower_phis_to_scalar.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_lower_phis_to_scalar.c	2015-09-16 14:36:09.000000000 +0000
@@ -75,6 +75,7 @@
       return should_lower_phi(nir_instr_as_phi(src_instr), state);
 
    case nir_instr_type_load_const:
+   case nir_instr_type_ssa_undef:
       /* These are trivially scalarizable */
       return true;
 
@@ -153,6 +154,11 @@
          break;
    }
 
+   /* The hash table entry for 'phi' may have changed while recursing the
+    * dependence graph, so we need to reset it */
+   entry = _mesa_hash_table_search(state->phi_table, phi);
+   assert(entry);
+
    entry->data = (void *)(intptr_t)scalarizable;
 
    return scalarizable;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_lower_vec_to_movs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_lower_vec_to_movs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_lower_vec_to_movs.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_lower_vec_to_movs.c	2015-09-16 14:36:09.000000000 +0000
@@ -90,7 +90,7 @@
       if (instr->type != nir_instr_type_alu)
          continue;
 
-      nir_alu_instr *vec = (nir_alu_instr *)instr;
+      nir_alu_instr *vec = nir_instr_as_alu(instr);
 
       switch (vec->op) {
       case nir_op_vec2:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_opcodes.py mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_opcodes.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_opcodes.py	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_opcodes.py	2015-09-16 14:36:09.000000000 +0000
@@ -474,10 +474,10 @@
 """)
 
 opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
-dst = ldexp(src0, src1);
+dst = ldexpf(src0, src1);
 /* flush denormals to zero. */
 if (!isnormal(dst))
-   dst = copysign(0.0f, src0);
+   dst = copysignf(0.0f, src0);
 """)
 
 # Combines the first component of each input to make a 2-component vector.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_opt_algebraic.py mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_opt_algebraic.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_opt_algebraic.py	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_opt_algebraic.py	2015-09-16 14:36:09.000000000 +0000
@@ -101,6 +101,7 @@
    (('umin', a, a), a),
    (('umax', a, a), a),
    (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
+   (('fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
    (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
    (('fsat', ('fsat', a)), ('fsat', a)),
    (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
@@ -112,6 +113,8 @@
    (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
    (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
    (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'),
+   (('fne', ('fneg', a), a), ('fne', a, 0.0)),
+   (('feq', ('fneg', a), a), ('feq', a, 0.0)),
    # Emulating booleans
    (('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))),
    (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
@@ -131,6 +134,7 @@
    # Logical and bit operations
    (('fand', a, 0.0), 0.0),
    (('iand', a, a), a),
+   (('iand', a, ~0), a),
    (('iand', a, 0), 0),
    (('ior', a, a), a),
    (('ior', a, 0), a),
@@ -156,6 +160,8 @@
    (('fpow', a, 2.0), ('fmul', a, a)),
    (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
    (('fpow', 2.0, a), ('fexp2', a)),
+   (('fpow', ('fpow', a, 2.2), 0.454545), a),
+   (('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
    (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
    (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
    (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_opt_cse.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_opt_cse.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_opt_cse.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_opt_cse.c	2015-09-16 14:36:09.000000000 +0000
@@ -86,8 +86,41 @@
       }
       return true;
    }
-   case nir_instr_type_tex:
-      return false;
+   case nir_instr_type_tex: {
+      nir_tex_instr *tex1 = nir_instr_as_tex(instr1);
+      nir_tex_instr *tex2 = nir_instr_as_tex(instr2);
+
+      if (tex1->op != tex2->op)
+         return false;
+
+      if (tex1->num_srcs != tex2->num_srcs)
+         return false;
+      for (unsigned i = 0; i < tex1->num_srcs; i++) {
+         if (tex1->src[i].src_type != tex2->src[i].src_type ||
+             !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) {
+            return false;
+         }
+      }
+
+      if (tex1->coord_components != tex2->coord_components ||
+          tex1->sampler_dim != tex2->sampler_dim ||
+          tex1->is_array != tex2->is_array ||
+          tex1->is_shadow != tex2->is_shadow ||
+          tex1->is_new_style_shadow != tex2->is_new_style_shadow ||
+          memcmp(tex1->const_offset, tex2->const_offset,
+                 sizeof(tex1->const_offset)) != 0 ||
+          tex1->component != tex2->component ||
+         tex1->sampler_index != tex2->sampler_index ||
+         tex1->sampler_array_size != tex2->sampler_array_size) {
+         return false;
+      }
+
+      /* Don't support un-lowered sampler derefs currently. */
+      if (tex1->sampler || tex2->sampler)
+         return false;
+
+      return true;
+   }
    case nir_instr_type_load_const: {
       nir_load_const_instr *load1 = nir_instr_as_load_const(instr1);
       nir_load_const_instr *load2 = nir_instr_as_load_const(instr2);
@@ -181,11 +214,10 @@
 
    switch (instr->type) {
    case nir_instr_type_alu:
+   case nir_instr_type_tex:
    case nir_instr_type_load_const:
    case nir_instr_type_phi:
       return true;
-   case nir_instr_type_tex:
-      return false; /* TODO */
    case nir_instr_type_intrinsic: {
       const nir_intrinsic_info *info =
          &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
@@ -212,6 +244,9 @@
    case nir_instr_type_alu:
       assert(nir_instr_as_alu(instr)->dest.dest.is_ssa);
       return &nir_instr_as_alu(instr)->dest.dest.ssa;
+   case nir_instr_type_tex:
+      assert(nir_instr_as_tex(instr)->dest.is_ssa);
+      return &nir_instr_as_tex(instr)->dest.ssa;
    case nir_instr_type_load_const:
       return &nir_instr_as_load_const(instr)->def;
    case nir_instr_type_phi:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_opt_peephole_ffma.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_opt_peephole_ffma.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_opt_peephole_ffma.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_opt_peephole_ffma.c	2015-09-16 14:36:09.000000000 +0000
@@ -73,8 +73,10 @@
 }
 
 static nir_alu_instr *
-get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
+get_mul_for_src(nir_alu_src *src, int num_components,
+                uint8_t swizzle[4], bool *negate, bool *abs)
 {
+   uint8_t swizzle_tmp[4];
    assert(src->src.is_ssa && !src->abs && !src->negate);
 
    nir_instr *instr = src->src.ssa->parent_instr;
@@ -85,16 +87,16 @@
    switch (alu->op) {
    case nir_op_imov:
    case nir_op_fmov:
-      alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+      alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
       break;
 
    case nir_op_fneg:
-      alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+      alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
       *negate = !*negate;
       break;
 
    case nir_op_fabs:
-      alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+      alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
       *negate = false;
       *abs = true;
       break;
@@ -115,12 +117,18 @@
    if (!alu)
       return NULL;
 
-   for (unsigned i = 0; i < 4; i++) {
-      if (!(alu->dest.write_mask & (1 << i)))
-         break;
-
-      swizzle[i] = swizzle[src->swizzle[i]];
-   }
+   /* Copy swizzle data before overwriting it to avoid setting a wrong swizzle.
+    *
+    * Example:
+    *   Former swizzle[] = xyzw
+    *   src->swizzle[] = zyxx
+    *
+    *   Expected output swizzle = zyxx
+    *   If we reuse swizzle in the loop, then output swizzle would be zyzz.
+    */
+   memcpy(swizzle_tmp, swizzle, 4*sizeof(uint8_t));
+   for (unsigned i = 0; i < num_components; i++)
+      swizzle[i] = swizzle_tmp[src->swizzle[i]];
 
    return alu;
 }
@@ -160,7 +168,9 @@
          negate = false;
          abs = false;
 
-         mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs);
+         mul = get_mul_for_src(&add->src[add_mul_src],
+                               add->dest.dest.ssa.num_components,
+                               swizzle, &negate, &abs);
 
          if (mul != NULL)
             break;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_opt_peephole_select.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_opt_peephole_select.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_opt_peephole_select.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_opt_peephole_select.c	2015-09-16 14:36:09.000000000 +0000
@@ -82,12 +82,22 @@
          break;
 
       case nir_instr_type_alu: {
-         /* It must be a move operation */
          nir_alu_instr *mov = nir_instr_as_alu(instr);
-         if (mov->op != nir_op_fmov && mov->op != nir_op_imov &&
-             mov->op != nir_op_fneg && mov->op != nir_op_ineg &&
-             mov->op != nir_op_fabs && mov->op != nir_op_iabs)
+         switch (mov->op) {
+         case nir_op_fmov:
+         case nir_op_imov:
+         case nir_op_fneg:
+         case nir_op_ineg:
+         case nir_op_fabs:
+         case nir_op_iabs:
+         case nir_op_vec2:
+         case nir_op_vec3:
+         case nir_op_vec4:
+            /* It must be a move-like operation. */
+            break;
+         default:
             return false;
+         }
 
          /* Can't handle saturate */
          if (mov->dest.saturate)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_opt_undef.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_opt_undef.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_opt_undef.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_opt_undef.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,104 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+/** @file nir_opt_undef.c
+ *
+ * Handles optimization of operations involving ssa_undef.  For now, we just
+ * make sure that csels between undef and some other value just give the other
+ * value (on the assumption that the condition's going to be choosing the
+ * defined value).  This reduces work after if flattening when each side of
+ * the if is defining a variable.
+ *
+ * Some day, we may find some use for making other operations consuming an
+ * undef arg output undef, but I don't know of any cases currently.
+ */
+
+static bool
+opt_undef_alu(nir_alu_instr *instr)
+{
+   if (instr->op != nir_op_bcsel && instr->op != nir_op_fcsel)
+      return false;
+
+   assert(instr->dest.dest.is_ssa);
+
+   for (int i = 1; i <= 2; i++) {
+      if (!instr->src[i].src.is_ssa)
+         continue;
+
+      nir_instr *parent = instr->src[i].src.ssa->parent_instr;
+      if (parent->type != nir_instr_type_ssa_undef)
+         continue;
+
+      /* We can't just use nir_alu_src_copy, because we need the def/use
+       * updated.
+       */
+      nir_instr_rewrite_src(&instr->instr, &instr->src[0].src,
+                            instr->src[i == 1 ? 2 : 1].src);
+      nir_alu_src_copy(&instr->src[0], &instr->src[i == 1 ? 2 : 1],
+                       ralloc_parent(instr));
+
+      nir_src empty_src;
+      memset(&empty_src, 0, sizeof(empty_src));
+      nir_instr_rewrite_src(&instr->instr, &instr->src[1].src, empty_src);
+      nir_instr_rewrite_src(&instr->instr, &instr->src[2].src, empty_src);
+      instr->op = nir_op_imov;
+
+      return true;
+   }
+
+   return false;
+}
+
+static bool
+opt_undef_block(nir_block *block, void *data)
+{
+   bool *progress = data;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_alu)
+         if (opt_undef_alu(nir_instr_as_alu(instr)))
+             (*progress) = true;
+   }
+
+   return true;
+}
+
+bool
+nir_opt_undef(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         nir_foreach_block(overload->impl, opt_undef_block, &progress);
+         if (progress)
+            nir_metadata_preserve(overload->impl,
+                                  nir_metadata_block_index |
+                                  nir_metadata_dominance);
+      }
+   }
+
+   return progress;
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_print.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_print.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_print.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_print.c	2015-09-16 14:36:09.000000000 +0000
@@ -214,7 +214,7 @@
    const char *const samp = (var->data.sample) ? "sample " : "";
    const char *const inv = (var->data.invariant) ? "invariant " : "";
    const char *const mode[] = { "shader_in ", "shader_out ", "", "",
-                                "uniform ", "system " };
+                                "uniform ", "shader_storage", "system " };
    const char *const interp[] = { "", "smooth", "flat", "noperspective" };
 
    fprintf(fp, "%s%s%s%s%s ",
@@ -239,7 +239,8 @@
 
    if (var->data.mode == nir_var_shader_in ||
        var->data.mode == nir_var_shader_out ||
-       var->data.mode == nir_var_uniform) {
+       var->data.mode == nir_var_uniform ||
+       var->data.mode == nir_var_shader_storage) {
       fprintf(fp, " (%u, %u)", var->data.location, var->data.driver_location);
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_search.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_search.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_search.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_search.c	2015-09-16 14:36:09.000000000 +0000
@@ -48,7 +48,7 @@
       return false;
    if (src.ssa->parent_instr->type != nir_instr_type_alu)
       return false;
-   return alu_instr_is_bool((nir_alu_instr *)src.ssa->parent_instr);
+   return alu_instr_is_bool(nir_instr_as_alu(src.ssa->parent_instr));
 }
 
 static bool
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_types.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_types.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_types.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_types.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -155,6 +155,12 @@
 }
 
 const glsl_type *
+glsl_uint_type(void)
+{
+   return glsl_type::uint_type;
+}
+
+const glsl_type *
 glsl_array_type(const glsl_type *base, unsigned elements)
 {
    return glsl_type::get_array_instance(base, elements);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_types.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_types.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_types.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_types.h	2015-09-16 14:36:09.000000000 +0000
@@ -71,6 +71,7 @@
 const struct glsl_type *glsl_void_type(void);
 const struct glsl_type *glsl_float_type(void);
 const struct glsl_type *glsl_vec4_type(void);
+const struct glsl_type *glsl_uint_type(void);
 const struct glsl_type *glsl_array_type(const struct glsl_type *base,
                                         unsigned elements);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_validate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_validate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/nir/nir_validate.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/nir/nir_validate.c	2015-09-16 14:36:09.000000000 +0000
@@ -400,11 +400,13 @@
       break;
    case nir_intrinsic_store_var:
       assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
-             instr->variables[0]->var->data.mode != nir_var_uniform);
+             instr->variables[0]->var->data.mode != nir_var_uniform &&
+             instr->variables[0]->var->data.mode != nir_var_shader_storage);
       break;
    case nir_intrinsic_copy_var:
       assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
-             instr->variables[0]->var->data.mode != nir_var_uniform);
+             instr->variables[0]->var->data.mode != nir_var_uniform &&
+             instr->variables[0]->var->data.mode != nir_var_shader_storage);
       assert(instr->variables[1]->var->data.mode != nir_var_shader_out);
       break;
    default:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/opt_constant_propagation.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/opt_constant_propagation.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/opt_constant_propagation.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/opt_constant_propagation.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -40,6 +40,7 @@
 #include "ir_basic_block.h"
 #include "ir_optimization.h"
 #include "glsl_types.h"
+#include "util/hash_table.h"
 
 namespace {
 
@@ -95,7 +96,8 @@
       killed_all = false;
       mem_ctx = ralloc_context(0);
       this->acp = new(mem_ctx) exec_list;
-      this->kills = new(mem_ctx) exec_list;
+      this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                            _mesa_key_pointer_equal);
    }
    ~ir_constant_propagation_visitor()
    {
@@ -110,6 +112,8 @@
    virtual ir_visitor_status visit_enter(class ir_if *);
 
    void add_constant(ir_assignment *ir);
+   void constant_folding(ir_rvalue **rvalue);
+   void constant_propagation(ir_rvalue **rvalue);
    void kill(ir_variable *ir, unsigned write_mask);
    void handle_if_block(exec_list *instructions);
    void handle_rvalue(ir_rvalue **rvalue);
@@ -121,7 +125,7 @@
     * List of kill_entry: The masks of variables whose values were
     * killed in this block.
     */
-   exec_list *kills;
+   hash_table *kills;
 
    bool progress;
 
@@ -132,8 +136,38 @@
 
 
 void
-ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue)
-{
+ir_constant_propagation_visitor::constant_folding(ir_rvalue **rvalue) {
+
+   if (*rvalue == NULL || (*rvalue)->ir_type == ir_type_constant)
+      return;
+
+   /* Note that we visit rvalues one leaving.  So if an expression has a
+    * non-constant operand, no need to go looking down it to find if it's
+    * constant.  This cuts the time of this pass down drastically.
+    */
+   ir_expression *expr = (*rvalue)->as_expression();
+   if (expr) {
+      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+	 if (!expr->operands[i]->as_constant())
+	    return;
+      }
+   }
+
+   /* Ditto for swizzles. */
+   ir_swizzle *swiz = (*rvalue)->as_swizzle();
+   if (swiz && !swiz->val->as_constant())
+      return;
+
+   ir_constant *constant = (*rvalue)->constant_expression_value();
+   if (constant) {
+      *rvalue = constant;
+      this->progress = true;
+   }
+}
+
+void
+ir_constant_propagation_visitor::constant_propagation(ir_rvalue **rvalue) {
+
    if (this->in_assignee || !*rvalue)
       return;
 
@@ -216,6 +250,13 @@
    this->progress = true;
 }
 
+void
+ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+   constant_propagation(rvalue);
+   constant_folding(rvalue);
+}
+
 ir_visitor_status
 ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir)
 {
@@ -224,11 +265,12 @@
     * main() at link time, so they're irrelevant to us.
     */
    exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
    bool orig_killed_all = this->killed_all;
 
    this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
    this->killed_all = false;
 
    visit_list_elements(this, &ir->body);
@@ -243,6 +285,8 @@
 ir_visitor_status
 ir_constant_propagation_visitor::visit_leave(ir_assignment *ir)
 {
+  constant_folding(&ir->rhs);
+
    if (this->in_assignee)
       return visit_continue;
 
@@ -311,11 +355,12 @@
 ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
 {
    exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
    bool orig_killed_all = this->killed_all;
 
    this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
    this->killed_all = false;
 
    /* Populate the initial acp with a constant of the original */
@@ -329,12 +374,14 @@
       orig_acp->make_empty();
    }
 
-   exec_list *new_kills = this->kills;
+   hash_table *new_kills = this->kills;
    this->kills = orig_kills;
    this->acp = orig_acp;
    this->killed_all = this->killed_all || orig_killed_all;
 
-   foreach_in_list(kill_entry, k, new_kills) {
+   hash_entry *htk;
+   hash_table_foreach(new_kills, htk) {
+      kill_entry *k = (kill_entry *) htk->data;
       kill(k->var, k->write_mask);
    }
 }
@@ -356,7 +403,7 @@
 ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
 {
    exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
    bool orig_killed_all = this->killed_all;
 
    /* FINISHME: For now, the initial acp for loops is totally empty.
@@ -364,7 +411,8 @@
     * cloned minus the killed entries after the first run through.
     */
    this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
    this->killed_all = false;
 
    visit_list_elements(this, &ir->body_instructions);
@@ -373,12 +421,14 @@
       orig_acp->make_empty();
    }
 
-   exec_list *new_kills = this->kills;
+   hash_table *new_kills = this->kills;
    this->kills = orig_kills;
    this->acp = orig_acp;
    this->killed_all = this->killed_all || orig_killed_all;
 
-   foreach_in_list(kill_entry, k, new_kills) {
+   hash_entry *htk;
+   hash_table_foreach(new_kills, htk) {
+      kill_entry *k = (kill_entry *) htk->data;
       kill(k->var, k->write_mask);
    }
 
@@ -407,14 +457,15 @@
    /* Add this writemask of the variable to the list of killed
     * variables in this block.
     */
-   foreach_in_list(kill_entry, entry, this->kills) {
-      if (entry->var == var) {
-	 entry->write_mask |= write_mask;
-	 return;
-      }
+   hash_entry *kill_hash_entry = _mesa_hash_table_search(this->kills, var);
+   if (kill_hash_entry) {
+      kill_entry *entry = (kill_entry *) kill_hash_entry->data;
+      entry->write_mask |= write_mask;
+      return;
    }
    /* Not already in the list.  Make new entry. */
-   this->kills->push_tail(new(this->mem_ctx) kill_entry(var, write_mask));
+   _mesa_hash_table_insert(this->kills, var,
+                           new(this->mem_ctx) kill_entry(var, write_mask));
 }
 
 /**
@@ -444,6 +495,14 @@
    if (!deref->var->type->is_vector() && !deref->var->type->is_scalar())
       return;
 
+   /* We can't do copy propagation on buffer variables, since the underlying
+    * memory storage is shared across multiple threads we can't be sure that
+    * the variable value isn't modified between this assignment and the next
+    * instruction where its value is read.
+    */
+   if (deref->var->data.mode == ir_var_shader_storage)
+      return;
+
    entry = new(this->mem_ctx) acp_entry(deref->var, ir->write_mask, constant);
    this->acp->push_tail(entry);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/opt_constant_variable.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/opt_constant_variable.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/opt_constant_variable.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/opt_constant_variable.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -36,11 +36,11 @@
 #include "ir_visitor.h"
 #include "ir_optimization.h"
 #include "glsl_types.h"
+#include "util/hash_table.h"
 
 namespace {
 
 struct assignment_entry {
-   exec_node link;
    int assignment_count;
    ir_variable *var;
    ir_constant *constval;
@@ -54,31 +54,32 @@
    virtual ir_visitor_status visit_enter(ir_assignment *);
    virtual ir_visitor_status visit_enter(ir_call *);
 
-   exec_list list;
+   struct hash_table *ht;
 };
 
 } /* unnamed namespace */
 
 static struct assignment_entry *
-get_assignment_entry(ir_variable *var, exec_list *list)
+get_assignment_entry(ir_variable *var, struct hash_table *ht)
 {
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
    struct assignment_entry *entry;
 
-   foreach_list_typed(struct assignment_entry, entry, link, list) {
-      if (entry->var == var)
-	 return entry;
+   if (hte) {
+      entry = (struct assignment_entry *) hte->data;
+   } else {
+      entry = (struct assignment_entry *) calloc(1, sizeof(*entry));
+      entry->var = var;
+      _mesa_hash_table_insert(ht, var, entry);
    }
 
-   entry = (struct assignment_entry *)calloc(1, sizeof(*entry));
-   entry->var = var;
-   list->push_head(&entry->link);
    return entry;
 }
 
 ir_visitor_status
 ir_constant_variable_visitor::visit(ir_variable *ir)
 {
-   struct assignment_entry *entry = get_assignment_entry(ir, &this->list);
+   struct assignment_entry *entry = get_assignment_entry(ir, this->ht);
    entry->our_scope = true;
    return visit_continue;
 }
@@ -97,7 +98,7 @@
    ir_constant *constval;
    struct assignment_entry *entry;
 
-   entry = get_assignment_entry(ir->lhs->variable_referenced(), &this->list);
+   entry = get_assignment_entry(ir->lhs->variable_referenced(), this->ht);
    assert(entry);
    entry->assignment_count++;
 
@@ -115,6 +116,13 @@
    if (!var)
       return visit_continue;
 
+   /* Ignore buffer variables, since the underlying storage is shared
+    * and we can't be sure that this variable won't be written by another
+    * thread.
+    */
+   if (var->data.mode == ir_var_shader_storage)
+      return visit_continue;
+
    constval = ir->rhs->constant_expression_value();
    if (!constval)
       return visit_continue;
@@ -143,7 +151,7 @@
 	 struct assignment_entry *entry;
 
 	 assert(var);
-	 entry = get_assignment_entry(var, &this->list);
+	 entry = get_assignment_entry(var, this->ht);
 	 entry->assignment_count++;
       }
    }
@@ -154,7 +162,7 @@
       struct assignment_entry *entry;
 
       assert(var);
-      entry = get_assignment_entry(var, &this->list);
+      entry = get_assignment_entry(var, this->ht);
       entry->assignment_count++;
    }
 
@@ -170,20 +178,22 @@
    bool progress = false;
    ir_constant_variable_visitor v;
 
+   v.ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                  _mesa_key_pointer_equal);
    v.run(instructions);
 
-   while (!v.list.is_empty()) {
-
-      struct assignment_entry *entry;
-      entry = exec_node_data(struct assignment_entry, v.list.head, link);
+   struct hash_entry *hte;
+   hash_table_foreach(v.ht, hte) {
+      struct assignment_entry *entry = (struct assignment_entry *) hte->data;
 
       if (entry->assignment_count == 1 && entry->constval && entry->our_scope) {
 	 entry->var->constant_value = entry->constval;
 	 progress = true;
       }
-      entry->link.remove();
+      hte->data = NULL;
       free(entry);
    }
+   _mesa_hash_table_destroy(v.ht, NULL);
 
    return progress;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/opt_copy_propagation.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/opt_copy_propagation.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/opt_copy_propagation.cpp	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/opt_copy_propagation.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -330,7 +330,7 @@
 	  */
 	 ir->condition = new(ralloc_parent(ir)) ir_constant(false);
 	 this->progress = true;
-      } else {
+      } else if (lhs_var->data.mode != ir_var_shader_storage) {
 	 entry = new(this->acp) acp_entry(lhs_var, rhs_var);
 	 this->acp->push_tail(entry);
       }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/opt_dead_code.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/opt_dead_code.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/opt_dead_code.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/opt_dead_code.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -77,11 +77,13 @@
 
       if (entry->assign) {
 	 /* Remove a single dead assignment to the variable we found.
-	  * Don't do so if it's a shader or function output, though.
+	  * Don't do so if it's a shader or function output or a shader
+	  * storage variable though.
 	  */
 	 if (entry->var->data.mode != ir_var_function_out &&
 	     entry->var->data.mode != ir_var_function_inout &&
-             entry->var->data.mode != ir_var_shader_out) {
+             entry->var->data.mode != ir_var_shader_out &&
+             entry->var->data.mode != ir_var_shader_storage) {
 	    entry->assign->remove();
 	    progress = true;
 
@@ -99,7 +101,8 @@
 	  * stage.  Also, once uniform locations have been assigned, the
 	  * declaration cannot be deleted.
 	  */
-         if (entry->var->data.mode == ir_var_uniform) {
+         if (entry->var->data.mode == ir_var_uniform ||
+             entry->var->data.mode == ir_var_shader_storage) {
             if (uniform_locations_assigned || entry->var->constant_value)
                continue;
 
@@ -115,7 +118,7 @@
              * If the variable is in a uniform block with one of those
              * layouts, do not eliminate it.
              */
-            if (entry->var->is_in_uniform_block()) {
+            if (entry->var->is_in_buffer_block()) {
                const glsl_type *const block_type =
                   entry->var->is_interface_instance()
                   ? entry->var->type : entry->var->get_interface_type();
@@ -123,6 +126,9 @@
                if (block_type->interface_packing != GLSL_INTERFACE_PACKING_PACKED)
                   continue;
             }
+
+            if (entry->var->type->is_subroutine())
+               continue;
          }
 
 	 entry->var->remove();
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/opt_structure_splitting.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/opt_structure_splitting.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/opt_structure_splitting.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/opt_structure_splitting.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -103,8 +103,9 @@
 {
    assert(var);
 
-   if (!var->type->is_record() || var->data.mode == ir_var_uniform
-       || var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out)
+   if (!var->type->is_record() ||
+       var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage ||
+       var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out)
       return NULL;
 
    foreach_in_list(variable_entry, entry, &this->variable_list) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/opt_tree_grafting.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/opt_tree_grafting.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/opt_tree_grafting.cpp	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/opt_tree_grafting.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -359,10 +359,11 @@
       if (!lhs_var)
 	 continue;
 
-      if (lhs_var->data.mode == ir_var_function_out ||
-	  lhs_var->data.mode == ir_var_function_inout ||
-          lhs_var->data.mode == ir_var_shader_out)
-	 continue;
+   if (lhs_var->data.mode == ir_var_function_out ||
+       lhs_var->data.mode == ir_var_function_inout ||
+       lhs_var->data.mode == ir_var_shader_out ||
+       lhs_var->data.mode == ir_var_shader_storage)
+      continue;
 
       ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/program.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/program.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/program.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/program.h	2015-09-16 14:36:09.000000000 +0000
@@ -40,6 +40,10 @@
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog);
 
 extern void
+build_program_resource_list(struct gl_context *ctx,
+                            struct gl_shader_program *shProg);
+
+extern void
 linker_error(struct gl_shader_program *prog, const char *fmt, ...)
    PRINTFLIKE(2, 3);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/shader_enums.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/shader_enums.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/shader_enums.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/shader_enums.h	2015-09-16 14:36:09.000000000 +0000
@@ -36,14 +36,95 @@
 typedef enum
 {
    MESA_SHADER_VERTEX = 0,
-   MESA_SHADER_GEOMETRY = 1,
-   MESA_SHADER_FRAGMENT = 2,
-   MESA_SHADER_COMPUTE = 3,
+   MESA_SHADER_TESS_CTRL = 1,
+   MESA_SHADER_TESS_EVAL = 2,
+   MESA_SHADER_GEOMETRY = 3,
+   MESA_SHADER_FRAGMENT = 4,
+   MESA_SHADER_COMPUTE = 5,
 } gl_shader_stage;
 
 #define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1)
 
 /**
+ * Indexes for vertex shader outputs, geometry shader inputs/outputs, and
+ * fragment shader inputs.
+ *
+ * Note that some of these values are not available to all pipeline stages.
+ *
+ * When this enum is updated, the following code must be updated too:
+ * - vertResults (in prog_print.c's arb_output_attrib_string())
+ * - fragAttribs (in prog_print.c's arb_input_attrib_string())
+ * - _mesa_varying_slot_in_fs()
+ */
+typedef enum
+{
+   VARYING_SLOT_POS,
+   VARYING_SLOT_COL0, /* COL0 and COL1 must be contiguous */
+   VARYING_SLOT_COL1,
+   VARYING_SLOT_FOGC,
+   VARYING_SLOT_TEX0, /* TEX0-TEX7 must be contiguous */
+   VARYING_SLOT_TEX1,
+   VARYING_SLOT_TEX2,
+   VARYING_SLOT_TEX3,
+   VARYING_SLOT_TEX4,
+   VARYING_SLOT_TEX5,
+   VARYING_SLOT_TEX6,
+   VARYING_SLOT_TEX7,
+   VARYING_SLOT_PSIZ, /* Does not appear in FS */
+   VARYING_SLOT_BFC0, /* Does not appear in FS */
+   VARYING_SLOT_BFC1, /* Does not appear in FS */
+   VARYING_SLOT_EDGE, /* Does not appear in FS */
+   VARYING_SLOT_CLIP_VERTEX, /* Does not appear in FS */
+   VARYING_SLOT_CLIP_DIST0,
+   VARYING_SLOT_CLIP_DIST1,
+   VARYING_SLOT_PRIMITIVE_ID, /* Does not appear in VS */
+   VARYING_SLOT_LAYER, /* Appears as VS or GS output */
+   VARYING_SLOT_VIEWPORT, /* Appears as VS or GS output */
+   VARYING_SLOT_FACE, /* FS only */
+   VARYING_SLOT_PNTC, /* FS only */
+   VARYING_SLOT_TESS_LEVEL_OUTER, /* Only appears as TCS output. */
+   VARYING_SLOT_TESS_LEVEL_INNER, /* Only appears as TCS output. */
+   VARYING_SLOT_VAR0, /* First generic varying slot */
+} gl_varying_slot;
+
+
+/**
+ * Bitflags for varying slots.
+ */
+/*@{*/
+#define VARYING_BIT_POS BITFIELD64_BIT(VARYING_SLOT_POS)
+#define VARYING_BIT_COL0 BITFIELD64_BIT(VARYING_SLOT_COL0)
+#define VARYING_BIT_COL1 BITFIELD64_BIT(VARYING_SLOT_COL1)
+#define VARYING_BIT_FOGC BITFIELD64_BIT(VARYING_SLOT_FOGC)
+#define VARYING_BIT_TEX0 BITFIELD64_BIT(VARYING_SLOT_TEX0)
+#define VARYING_BIT_TEX1 BITFIELD64_BIT(VARYING_SLOT_TEX1)
+#define VARYING_BIT_TEX2 BITFIELD64_BIT(VARYING_SLOT_TEX2)
+#define VARYING_BIT_TEX3 BITFIELD64_BIT(VARYING_SLOT_TEX3)
+#define VARYING_BIT_TEX4 BITFIELD64_BIT(VARYING_SLOT_TEX4)
+#define VARYING_BIT_TEX5 BITFIELD64_BIT(VARYING_SLOT_TEX5)
+#define VARYING_BIT_TEX6 BITFIELD64_BIT(VARYING_SLOT_TEX6)
+#define VARYING_BIT_TEX7 BITFIELD64_BIT(VARYING_SLOT_TEX7)
+#define VARYING_BIT_TEX(U) BITFIELD64_BIT(VARYING_SLOT_TEX0 + (U))
+#define VARYING_BITS_TEX_ANY BITFIELD64_RANGE(VARYING_SLOT_TEX0, \
+                                              MAX_TEXTURE_COORD_UNITS)
+#define VARYING_BIT_PSIZ BITFIELD64_BIT(VARYING_SLOT_PSIZ)
+#define VARYING_BIT_BFC0 BITFIELD64_BIT(VARYING_SLOT_BFC0)
+#define VARYING_BIT_BFC1 BITFIELD64_BIT(VARYING_SLOT_BFC1)
+#define VARYING_BIT_EDGE BITFIELD64_BIT(VARYING_SLOT_EDGE)
+#define VARYING_BIT_CLIP_VERTEX BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX)
+#define VARYING_BIT_CLIP_DIST0 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)
+#define VARYING_BIT_CLIP_DIST1 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1)
+#define VARYING_BIT_PRIMITIVE_ID BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID)
+#define VARYING_BIT_LAYER BITFIELD64_BIT(VARYING_SLOT_LAYER)
+#define VARYING_BIT_VIEWPORT BITFIELD64_BIT(VARYING_SLOT_VIEWPORT)
+#define VARYING_BIT_FACE BITFIELD64_BIT(VARYING_SLOT_FACE)
+#define VARYING_BIT_PNTC BITFIELD64_BIT(VARYING_SLOT_PNTC)
+#define VARYING_BIT_TESS_LEVEL_OUTER BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_OUTER)
+#define VARYING_BIT_TESS_LEVEL_INNER BITFIELD64_BIT(VARYING_SLOT_TESS_LEVEL_INNER)
+#define VARYING_BIT_VAR(V) BITFIELD64_BIT(VARYING_SLOT_VAR0 + (V))
+/*@}*/
+
+/**
  * Bitflags for system values.
  */
 #define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID)
@@ -150,7 +231,7 @@
     * \name Geometry shader system values
     */
    /*@{*/
-   SYSTEM_VALUE_INVOCATION_ID,
+   SYSTEM_VALUE_INVOCATION_ID,  /**< (Also in Tessellation Control shader) */
    /*@}*/
 
    /**
@@ -163,6 +244,17 @@
    SYSTEM_VALUE_SAMPLE_MASK_IN,
    /*@}*/
 
+   /**
+    * \name Tessellation Evaluation shader system values
+    */
+   /*@{*/
+   SYSTEM_VALUE_TESS_COORD,
+   SYSTEM_VALUE_VERTICES_IN,    /**< Tessellation vertices in input patch */
+   SYSTEM_VALUE_PRIMITIVE_ID,   /**< (currently not used by GS) */
+   SYSTEM_VALUE_TESS_LEVEL_OUTER, /**< TES input */
+   SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */
+   /*@}*/
+
    SYSTEM_VALUE_MAX             /**< Number of values */
 } gl_system_value;
 
@@ -183,5 +275,24 @@
    INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */
 };
 
+/**
+ * Fragment program results
+ */
+typedef enum
+{
+   FRAG_RESULT_DEPTH = 0,
+   FRAG_RESULT_STENCIL = 1,
+   /* If a single color should be written to all render targets, this
+    * register is written.  No FRAG_RESULT_DATAn will be written.
+    */
+   FRAG_RESULT_COLOR = 2,
+   FRAG_RESULT_SAMPLE_MASK = 3,
+
+   /* FRAG_RESULT_DATAn are the per-render-target (GLSL gl_FragData[n]
+    * or ARB_fragment_program fragment.color[n]) color results.  If
+    * any are written, FRAG_RESULT_COLOR will not be written.
+    */
+   FRAG_RESULT_DATA0 = 4,
+} gl_frag_result;
 
 #endif /* SHADER_ENUMS_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/standalone_scaffolding.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/standalone_scaffolding.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/standalone_scaffolding.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/standalone_scaffolding.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -33,6 +33,7 @@
 #include <stdio.h>
 #include <string.h>
 #include "util/ralloc.h"
+#include "util/strtod.h"
 
 void
 _mesa_warning(struct gl_context *ctx, const char *fmt, ...)
@@ -89,7 +90,7 @@
 {
    unsigned i;
 
-   shProg->NumUserUniformStorage = 0;
+   shProg->NumUniformStorage = 0;
    shProg->UniformStorage = NULL;
    shProg->NumUniformRemapTable = 0;
    shProg->UniformRemapTable = NULL;
@@ -132,9 +133,11 @@
    ctx->Extensions.ARB_sample_shading = true;
    ctx->Extensions.ARB_shader_bit_encoding = true;
    ctx->Extensions.ARB_shader_stencil_export = true;
+   ctx->Extensions.ARB_shader_subroutine = true;
    ctx->Extensions.ARB_shader_texture_lod = true;
    ctx->Extensions.ARB_shading_language_420pack = true;
    ctx->Extensions.ARB_shading_language_packing = true;
+   ctx->Extensions.ARB_tessellation_shader = true;
    ctx->Extensions.ARB_texture_cube_map_array = true;
    ctx->Extensions.ARB_texture_gather = true;
    ctx->Extensions.ARB_texture_multisample = true;
@@ -191,4 +194,6 @@
 
    for (int sh = 0; sh < MESA_SHADER_STAGES; ++sh)
       memcpy(&ctx->Const.ShaderCompilerOptions[sh], &options, sizeof(options));
+
+   _mesa_locale_init();
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/standalone_scaffolding.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/standalone_scaffolding.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/standalone_scaffolding.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/standalone_scaffolding.h	2015-09-16 14:36:09.000000000 +0000
@@ -61,6 +61,10 @@
       return MESA_SHADER_FRAGMENT;
    case GL_GEOMETRY_SHADER:
       return MESA_SHADER_GEOMETRY;
+   case GL_TESS_CONTROL_SHADER:
+      return MESA_SHADER_TESS_CTRL;
+   case GL_TESS_EVALUATION_SHADER:
+      return MESA_SHADER_TESS_EVAL;
    case GL_COMPUTE_SHADER:
       return MESA_SHADER_COMPUTE;
    default:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/test_optpass.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/test_optpass.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/test_optpass.cpp	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/test_optpass.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -124,7 +124,8 @@
    } else if (sscanf(optimization, "lower_variable_index_to_cond_assign "
                      "( %d , %d , %d , %d ) ", &int_0, &int_1, &int_2,
                      &int_3) == 4) {
-      return lower_variable_index_to_cond_assign(ir, int_0 != 0, int_1 != 0,
+      return lower_variable_index_to_cond_assign(MESA_SHADER_VERTEX, ir,
+                                                 int_0 != 0, int_1 != 0,
                                                  int_2 != 0, int_3 != 0);
    } else if (sscanf(optimization, "lower_quadop_vector ( %d ) ",
                      &int_0) == 1) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/tests/general_ir_test.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/tests/general_ir_test.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/tests/general_ir_test.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/tests/general_ir_test.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -31,11 +31,7 @@
    void *mem_ctx = ralloc_context(NULL);
 
    static const glsl_struct_field f[] = {
-      {
-         glsl_type::vec(4),
-         "v",
-         false
-      }
+      glsl_struct_field(glsl_type::vec(4), "v")
    };
 
    const glsl_type *const interface =
@@ -60,11 +56,7 @@
    void *mem_ctx = ralloc_context(NULL);
 
    static const glsl_struct_field f[] = {
-      {
-         glsl_type::vec(4),
-         "v",
-         false
-      }
+      glsl_struct_field(glsl_type::vec(4), "v")
    };
 
    const glsl_type *const interface =
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/tests/set_uniform_initializer_tests.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/tests/set_uniform_initializer_tests.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/tests/set_uniform_initializer_tests.cpp	2014-09-10 05:44:12.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/tests/set_uniform_initializer_tests.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -110,7 +110,7 @@
 
    prog->UniformStorage = rzalloc_array(prog, struct gl_uniform_storage,
 					num_storage);
-   prog->NumUserUniformStorage = num_storage;
+   prog->NumUniformStorage = num_storage;
 
    prog->UniformStorage[index_to_set].name = (char *) name;
    prog->UniformStorage[index_to_set].type = type;
@@ -155,7 +155,7 @@
 static void
 verify_initialization(struct gl_shader_program *prog, unsigned actual_index)
 {
-   for (unsigned i = 0; i < prog->NumUserUniformStorage; i++) {
+   for (unsigned i = 0; i < prog->NumUniformStorage; i++) {
       if (i == actual_index) {
 	 EXPECT_TRUE(prog->UniformStorage[actual_index].initialized);
       } else {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/tests/uniform_initializer_utils.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/tests/uniform_initializer_utils.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/tests/uniform_initializer_utils.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/tests/uniform_initializer_utils.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -102,6 +102,7 @@
       case GLSL_TYPE_VOID:
       case GLSL_TYPE_ERROR:
       case GLSL_TYPE_INTERFACE:
+      case GLSL_TYPE_SUBROUTINE:
 	 ASSERT_TRUE(false);
 	 break;
       }
@@ -134,6 +135,7 @@
       case GLSL_TYPE_VOID:
       case GLSL_TYPE_ERROR:
       case GLSL_TYPE_INTERFACE:
+      case GLSL_TYPE_SUBROUTINE:
 	 ASSERT_TRUE(false);
 	 break;
       }
@@ -238,6 +240,7 @@
 	 case GLSL_TYPE_VOID:
 	 case GLSL_TYPE_ERROR:
 	 case GLSL_TYPE_INTERFACE:
+	 case GLSL_TYPE_SUBROUTINE:
 	    ASSERT_TRUE(false);
 	    break;
 	 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/tests/varyings_test.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/tests/varyings_test.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glsl/tests/varyings_test.cpp	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glsl/tests/varyings_test.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -70,21 +70,13 @@
    hash_table *consumer_interface_inputs;
 
    const glsl_type *simple_interface;
-   ir_variable *junk[VARYING_SLOT_MAX];
+   ir_variable *junk[VARYING_SLOT_TESS_MAX];
 };
 
 link_varyings::link_varyings()
 {
    static const glsl_struct_field f[] = {
-      {
-         glsl_type::vec(4),
-         "v",
-         false,
-         0,
-         0,
-         0,
-         0
-      }
+      glsl_struct_field(glsl_type::vec(4), "v")
    };
 
    this->simple_interface =
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glx/compsize.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/compsize.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glx/compsize.c	2012-06-29 19:28:36.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/compsize.c	2015-09-16 14:36:09.000000000 +0000
@@ -65,6 +65,8 @@
    switch (format) {
    case GL_RGB:
    case GL_BGR:
+   case GL_RGB_INTEGER_EXT:
+   case GL_BGR_INTEGER_EXT:
       return 3;
    case GL_RG:
    case GL_422_EXT:
@@ -74,10 +76,13 @@
    case GL_DEPTH_STENCIL_NV:
    case GL_YCBCR_422_APPLE:
    case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE_ALPHA_INTEGER_EXT:
       return 2;
    case GL_RGBA:
    case GL_BGRA:
    case GL_ABGR_EXT:
+   case GL_RGBA_INTEGER_EXT:
+   case GL_BGRA_INTEGER_EXT:
       return 4;
    case GL_COLOR_INDEX:
    case GL_STENCIL_INDEX:
@@ -88,6 +93,11 @@
    case GL_ALPHA:
    case GL_LUMINANCE:
    case GL_INTENSITY:
+   case GL_RED_INTEGER_EXT:
+   case GL_GREEN_INTEGER_EXT:
+   case GL_BLUE_INTEGER_EXT:
+   case GL_ALPHA_INTEGER_EXT:
+   case GL_LUMINANCE_INTEGER_EXT:
       return 1;
    default:
       return 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glx/dri2_glx.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/dri2_glx.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glx/dri2_glx.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/dri2_glx.c	2015-09-16 14:36:09.000000000 +0000
@@ -1183,15 +1183,7 @@
       return NULL;
    }
 
-#ifdef O_CLOEXEC
-   psc->fd = open(deviceName, O_RDWR | O_CLOEXEC);
-   if (psc->fd == -1 && errno == EINVAL)
-#endif
-   {
-      psc->fd = open(deviceName, O_RDWR);
-      if (psc->fd != -1)
-         fcntl(psc->fd, F_SETFD, fcntl(psc->fd, F_GETFD) | FD_CLOEXEC);
-   }
+   psc->fd = loader_open_device(deviceName);
    if (psc->fd < 0) {
       ErrorMessageF("failed to open drm device: %s\n", strerror(errno));
       goto handle_error;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glx/dri3_glx.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/dri3_glx.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glx/dri3_glx.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/dri3_glx.c	2015-09-16 14:36:09.000000000 +0000
@@ -1679,6 +1679,8 @@
    fd = xcb_dri3_open_reply_fds(c, reply)[0];
    fcntl(fd, F_SETFD, FD_CLOEXEC);
 
+   free(reply);
+
    return fd;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glx/dri_common.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/dri_common.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glx/dri_common.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/dri_common.c	2015-09-16 14:36:09.000000000 +0000
@@ -253,8 +253,7 @@
 static int
 scalarEqual(struct glx_config *mode, unsigned int attrib, unsigned int value)
 {
-   unsigned int glxValue;
-   int i;
+   unsigned glxValue, i;
 
    for (i = 0; i < ARRAY_SIZE(attribMap); i++)
       if (attribMap[i].attrib == attrib) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glx/dri_common_query_renderer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/dri_common_query_renderer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glx/dri_common_query_renderer.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/dri_common_query_renderer.c	2015-09-16 14:36:09.000000000 +0000
@@ -56,7 +56,7 @@
 static int
 dri2_convert_glx_query_renderer_attribs(int attribute)
 {
-   int i;
+   unsigned i;
 
    for (i = 0; i < ARRAY_SIZE(query_renderer_map); i++)
       if (query_renderer_map[i].glx_attrib == attribute)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glx/glxextensions.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/glxextensions.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glx/glxextensions.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/glxextensions.c	2015-09-16 14:36:09.000000000 +0000
@@ -241,6 +241,7 @@
    { GL(EXT_texture_env_combine),        VER(1,3), Y, N, N, N },
    { GL(EXT_texture_env_dot3),           VER(0,0), Y, N, N, N },
    { GL(EXT_texture_filter_anisotropic), VER(0,0), Y, N, N, N },
+   { GL(EXT_texture_integer),            VER(0,0), Y, N, N, N },
    { GL(EXT_texture_lod),                VER(1,2), Y, N, N, N },
    { GL(EXT_texture_lod_bias),           VER(1,4), Y, N, N, N },
    { GL(EXT_texture_mirror_clamp),       VER(0,0), Y, N, N, N },
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glx/glxextensions.h mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/glxextensions.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glx/glxextensions.h	2014-04-29 19:36:58.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/glxextensions.h	2015-09-16 14:36:09.000000000 +0000
@@ -146,6 +146,7 @@
    GL_EXT_texture_env_combine_bit,
    GL_EXT_texture_env_dot3_bit,
    GL_EXT_texture_filter_anisotropic_bit,
+   GL_EXT_texture_integer_bit,
    GL_EXT_texture_lod_bit,
    GL_EXT_texture_lod_bias_bit,
    GL_EXT_texture_mirror_clamp_bit,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glx/pixelstore.c mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/pixelstore.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glx/pixelstore.c	2012-01-02 08:22:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/pixelstore.c	2015-09-16 14:36:09.000000000 +0000
@@ -30,6 +30,7 @@
 
 #include "glxclient.h"
 #include "indirect.h"
+#include "util/rounding.h"
 
 #if !defined(__GNUC__)
 #  define __builtin_expect(x, y) x
@@ -77,7 +78,7 @@
 
    switch (pname) {
    case GL_PACK_ROW_LENGTH:
-      a = (GLuint) (param + 0.5);
+      a = _mesa_lroundevenf(param);
       if (((GLint) a) < 0) {
          __glXSetError(gc, GL_INVALID_VALUE);
          return;
@@ -85,7 +86,7 @@
       state->storePack.rowLength = a;
       break;
    case GL_PACK_IMAGE_HEIGHT:
-      a = (GLuint) (param + 0.5);
+      a = _mesa_lroundevenf(param);
       if (((GLint) a) < 0) {
          __glXSetError(gc, GL_INVALID_VALUE);
          return;
@@ -93,7 +94,7 @@
       state->storePack.imageHeight = a;
       break;
    case GL_PACK_SKIP_ROWS:
-      a = (GLuint) (param + 0.5);
+      a = _mesa_lroundevenf(param);
       if (((GLint) a) < 0) {
          __glXSetError(gc, GL_INVALID_VALUE);
          return;
@@ -101,7 +102,7 @@
       state->storePack.skipRows = a;
       break;
    case GL_PACK_SKIP_PIXELS:
-      a = (GLuint) (param + 0.5);
+      a = _mesa_lroundevenf(param);
       if (((GLint) a) < 0) {
          __glXSetError(gc, GL_INVALID_VALUE);
          return;
@@ -109,7 +110,7 @@
       state->storePack.skipPixels = a;
       break;
    case GL_PACK_SKIP_IMAGES:
-      a = (GLuint) (param + 0.5);
+      a = _mesa_lroundevenf(param);
       if (((GLint) a) < 0) {
          __glXSetError(gc, GL_INVALID_VALUE);
          return;
@@ -117,7 +118,7 @@
       state->storePack.skipImages = a;
       break;
    case GL_PACK_ALIGNMENT:
-      a = (GLint) (param + 0.5);
+      a = _mesa_lroundevenf(param);
       switch (a) {
       case 1:
       case 2:
@@ -138,7 +139,7 @@
       break;
 
    case GL_UNPACK_ROW_LENGTH:
-      a = (GLuint) (param + 0.5);
+      a = _mesa_lroundevenf(param);
       if (((GLint) a) < 0) {
          __glXSetError(gc, GL_INVALID_VALUE);
          return;
@@ -146,7 +147,7 @@
       state->storeUnpack.rowLength = a;
       break;
    case GL_UNPACK_IMAGE_HEIGHT:
-      a = (GLuint) (param + 0.5);
+      a = _mesa_lroundevenf(param);
       if (((GLint) a) < 0) {
          __glXSetError(gc, GL_INVALID_VALUE);
          return;
@@ -154,7 +155,7 @@
       state->storeUnpack.imageHeight = a;
       break;
    case GL_UNPACK_SKIP_ROWS:
-      a = (GLuint) (param + 0.5);
+      a = _mesa_lroundevenf(param);
       if (((GLint) a) < 0) {
          __glXSetError(gc, GL_INVALID_VALUE);
          return;
@@ -162,7 +163,7 @@
       state->storeUnpack.skipRows = a;
       break;
    case GL_UNPACK_SKIP_PIXELS:
-      a = (GLuint) (param + 0.5);
+      a = _mesa_lroundevenf(param);
       if (((GLint) a) < 0) {
          __glXSetError(gc, GL_INVALID_VALUE);
          return;
@@ -170,7 +171,7 @@
       state->storeUnpack.skipPixels = a;
       break;
    case GL_UNPACK_SKIP_IMAGES:
-      a = (GLuint) (param + 0.5);
+      a = _mesa_lroundevenf(param);
       if (((GLint) a) < 0) {
          __glXSetError(gc, GL_INVALID_VALUE);
          return;
@@ -178,7 +179,7 @@
       state->storeUnpack.skipImages = a;
       break;
    case GL_UNPACK_ALIGNMENT:
-      a = (GLint) (param + 0.5);
+      a = _mesa_lroundevenf(param);
       switch (a) {
       case 1:
       case 2:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/glx/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/glx/SConscript	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/glx/SConscript	2015-09-16 14:36:09.000000000 +0000
@@ -125,7 +125,7 @@
     target = 'indirect_size.h',
     script = GLAPI + 'gen/glX_proto_size.py',
     source = sources,
-    command = python_cmd + ' $SCRIPT -f $SOURCE -m size_h --only-set -h _INDIRECT_SIZE_H > $TARGET'
+    command = python_cmd + ' $SCRIPT -f $SOURCE -m size_h --only-set --header-tag _INDIRECT_SIZE_H > $TARGET'
 )
 
 env.CodeGenerate(
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/gtest/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/gtest/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/gtest/Makefile.am	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/gtest/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -19,8 +19,6 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-AUTOMAKE_OPTIONS = subdir-objects
-
 AM_CFLAGS = $(DEFINES) -I$(top_srcdir)/src/gtest/include
 AM_CXXFLAGS = $(DEFINES) -I$(top_srcdir)/src/gtest/include
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/loader/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/loader/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/loader/Android.mk	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/loader/Android.mk	2015-09-16 14:36:09.000000000 +0000
@@ -33,13 +33,13 @@
 LOCAL_SRC_FILES := \
 	$(LOADER_C_FILES)
 
-# swrast only
-ifeq ($(MESA_GPU_DRIVERS),swrast)
-LOCAL_CFLAGS += -D__NOT_HAVE_DRM_H
-else
+ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
+LOCAL_CFLAGS += -DHAVE_LIBDRM
 LOCAL_SHARED_LIBRARIES := libdrm
 endif
 
+LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)
+
 LOCAL_MODULE := libmesa_loader
 
 include $(MESA_COMMON_MK)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/loader/loader.c mesa-11.0.0~git20150916+11.0.c4bae579/src/loader/loader.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/loader/loader.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/loader/loader.c	2015-09-16 14:36:09.000000000 +0000
@@ -64,6 +64,8 @@
  *    Rob Clark <robclark@freedesktop.org>
  */
 
+#include <errno.h>
+#include <fcntl.h>
 #include <sys/stat.h>
 #include <stdarg.h>
 #include <stdio.h>
@@ -71,10 +73,8 @@
 #ifdef HAVE_LIBUDEV
 #include <assert.h>
 #include <dlfcn.h>
-#include <fcntl.h>
 #include <unistd.h>
 #include <stdlib.h>
-#include <errno.h>
 #ifdef USE_DRICONF
 #include "xmlconfig.h"
 #include "xmlpool.h"
@@ -85,7 +85,7 @@
 #endif
 #include "loader.h"
 
-#ifndef __NOT_HAVE_DRM_H
+#ifdef HAVE_LIBDRM
 #include <xf86drm.h>
 #endif
 
@@ -104,6 +104,22 @@
 
 static void (*log_)(int level, const char *fmt, ...) = default_logger;
 
+int
+loader_open_device(const char *device_name)
+{
+   int fd;
+#ifdef O_CLOEXEC
+   fd = open(device_name, O_RDWR | O_CLOEXEC);
+   if (fd == -1 && errno == EINVAL)
+#endif
+   {
+      fd = open(device_name, O_RDWR);
+      if (fd != -1)
+         fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+   }
+   return fd;
+}
+
 #ifdef HAVE_LIBUDEV
 #include <libudev.h>
 
@@ -257,6 +273,8 @@
                (struct udev_enumerate *));
    UDEV_SYMBOL(struct udev_list_entry *, udev_enumerate_get_list_entry,
                (struct udev_enumerate *));
+   UDEV_SYMBOL(void, udev_enumerate_unref,
+               (struct udev_enumerate *));
    UDEV_SYMBOL(struct udev_list_entry *, udev_list_entry_get_next,
                (struct udev_list_entry *));
    UDEV_SYMBOL(const char *, udev_list_entry_get_name,
@@ -291,6 +309,8 @@
       udev_device_unref(device);
    }
 
+   udev_enumerate_unref(e);
+
    if (found) {
       path_res = strdup(udev_device_get_devnode(device));
       udev_device_unref(device);
@@ -324,22 +344,6 @@
    return id_path_tag;
 }
 
-static int
-drm_open_device(const char *device_name)
-{
-   int fd;
-#ifdef O_CLOEXEC
-   fd = open(device_name, O_RDWR | O_CLOEXEC);
-   if (fd == -1 && errno == EINVAL)
-#endif
-   {
-      fd = open(device_name, O_RDWR);
-      if (fd != -1)
-         fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
-   }
-   return fd;
-}
-
 #ifdef USE_DRICONF
 const char __driConfigOptionsLoader[] =
 DRI_CONF_BEGIN
@@ -414,7 +418,7 @@
       goto default_device_clean;
    }
 
-   fd = drm_open_device(device_name);
+   fd = loader_open_device(device_name);
    if (fd >= 0) {
       close(default_fd);
    } else {
@@ -501,7 +505,7 @@
 }
 #endif
 
-#if !defined(__NOT_HAVE_DRM_H)
+#if defined(HAVE_LIBDRM)
 /* for i915 */
 #include <i915_drm.h>
 /* for radeon */
@@ -584,7 +588,7 @@
    if (sysfs_get_pci_id_for_fd(fd, vendor_id, chip_id))
       return 1;
 #endif
-#if !defined(__NOT_HAVE_DRM_H)
+#if HAVE_LIBDRM
    if (drm_get_pci_id_for_fd(fd, vendor_id, chip_id))
       return 1;
 #endif
@@ -695,7 +699,7 @@
 
    if (!loader_get_pci_id_for_fd(fd, &vendor_id, &chip_id)) {
 
-#ifndef __NOT_HAVE_DRM_H
+#if HAVE_LIBDRM
       /* fallback to drmGetVersion(): */
       drmVersionPtr version = drmGetVersion(fd);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/loader/loader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/loader/loader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/loader/loader.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/loader/loader.h	2015-09-16 14:36:09.000000000 +0000
@@ -37,6 +37,9 @@
 #define _LOADER_GALLIUM      (1 << 1)
 
 int
+loader_open_device(const char *);
+
+int
 loader_get_pci_id_for_fd(int fd, int *vendor_id, int *chip_id);
 
 char *
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/loader/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/loader/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/loader/Makefile.am	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/loader/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -48,10 +48,7 @@
 
 endif
 
-if !HAVE_LIBDRM
-libloader_la_CPPFLAGS += \
-	-D__NOT_HAVE_DRM_H
-else
+if HAVE_LIBDRM
 libloader_la_CPPFLAGS += \
 	$(LIBDRM_CFLAGS)
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/loader/pci_id_driver_map.c mesa-11.0.0~git20150916+11.0.c4bae579/src/loader/pci_id_driver_map.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/loader/pci_id_driver_map.c	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/loader/pci_id_driver_map.c	2015-09-16 14:36:09.000000000 +0000
@@ -23,7 +23,7 @@
 
 int is_nouveau_vieux(int fd);
 
-#ifndef __NOT_HAVE_DRM_H
+#ifdef HAVE_LIBDRM
 
 #include <xf86drm.h>
 #include <nouveau_drm.h>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/loader/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/loader/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/loader/SConscript	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/loader/SConscript	2015-09-16 14:36:09.000000000 +0000
@@ -8,8 +8,6 @@
     '#include'
 ])
 
-env.Append(CPPDEFINES = ['__NOT_HAVE_DRM_H'])
-
 if env['udev']:
     env.PkgUseModules('UDEV')
     env.Append(CPPDEFINES = ['HAVE_LIBUDEV'])
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/Makefile.am	2015-09-16 14:36:08.000000000 +0000
@@ -19,8 +19,6 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-AUTOMAKE_OPTIONS = subdir-objects
-
 SUBDIRS = . gtest util mapi/glapi/gen mapi
 
 if NEED_OPENGL_COMMON
@@ -37,16 +35,12 @@
 SUBDIRS += egl/wayland/wayland-egl egl/wayland/wayland-drm
 endif
 
-if HAVE_EGL_DRIVER_DRI2
-SUBDIRS += egl/drivers/dri2
-endif
-
 if HAVE_GBM
 SUBDIRS += gbm
 endif
 
 if HAVE_EGL
-SUBDIRS += egl/main
+SUBDIRS += egl
 endif
 
 if HAVE_GALLIUM
@@ -54,8 +48,6 @@
 endif
 
 EXTRA_DIST = \
-	egl/drivers/haiku \
-	egl/docs \
 	getopt hgl SConscript
 
 AM_CFLAGS = $(VISIBILITY_CFLAGS)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/apiexec.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/apiexec.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/apiexec.py	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/apiexec.py	2015-09-16 14:36:09.000000000 +0000
@@ -74,6 +74,17 @@
     # GL_ARB_geometry_shader4, so OpenGL 3.2 is required.
     "FramebufferTexture": exec_info(core=32),
 
+    # OpenGL 4.0 / GL_ARB_shader_subroutines. Mesa only exposes this
+    # extension with core profile.
+    "GetSubroutineUniformLocation": exec_info(core=31),
+    "GetSubroutineIndex": exec_info(core=31),
+    "GetActiveSubroutineUniformiv": exec_info(core=31),
+    "GetActiveSubroutineUniformName": exec_info(core=31),
+    "GetActiveSubroutineName": exec_info(core=31),
+    "UniformSubroutinesuiv": exec_info(core=31),
+    "GetUniformSubroutineuiv": exec_info(core=31),
+    "GetProgramStageiv": exec_info(core=31),
+
     # OpenGL 4.0 / GL_ARB_gpu_shader_fp64.  The extension spec says:
     #
     #     "OpenGL 3.2 and GLSL 1.50 are required."
@@ -138,6 +149,11 @@
     # with OpenGL 3.1.
     "TexBufferRange": exec_info(core=31),
 
+    # OpenGL 4.3 / GL_ARB_framebuffer_no_attachments.  Mesa can expose the
+    # extension with OpenGL 3.0.
+    "FramebufferParameteri": exec_info(compatibility=30, core=31),
+    "GetFramebufferParameteri": exec_info(compatibility=30, core=31),
+
     # OpenGL 4.5 / GL_ARB_direct_state_access.   Mesa can expose the extension
     # with core profile.
     "CreateTransformFeedbacks": exec_info(core=31),
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_compute_shader.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_compute_shader.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_compute_shader.xml	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_compute_shader.xml	2015-09-16 14:36:09.000000000 +0000
@@ -26,13 +26,13 @@
   <enum name="DISPATCH_INDIRECT_BUFFER_BINDING"                value="0x90EF"/>
   <enum name="COMPUTE_SHADER_BIT"                              value="0x00000020"/>
 
-  <function name="DispatchCompute">
+  <function name="DispatchCompute" es2="3.1">
     <param name="num_groups_x" type="GLuint"/>
     <param name="num_groups_y" type="GLuint"/>
     <param name="num_groups_z" type="GLuint"/>
   </function>
 
-  <function name="DispatchComputeIndirect">
+  <function name="DispatchComputeIndirect" es2="3.1">
     <param name="indirect" type="GLintptr"/>
   </function>
 </category>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_draw_indirect.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_draw_indirect.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_draw_indirect.xml	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_draw_indirect.xml	2015-09-16 14:36:09.000000000 +0000
@@ -8,12 +8,12 @@
     <enum name="DRAW_INDIRECT_BUFFER"                   value="0x8F3F"/>
     <enum name="DRAW_INDIRECT_BUFFER_BINDING"           value="0x8F43"/>
 
-    <function name="DrawArraysIndirect" exec="dynamic">
+    <function name="DrawArraysIndirect" exec="dynamic" es2="3.1">
         <param name="mode" type="GLenum"/>
         <param name="indirect" type="const GLvoid *"/>
     </function>
 
-    <function name="DrawElementsIndirect" exec="dynamic">
+    <function name="DrawElementsIndirect" exec="dynamic" es2="3.1">
         <param name="mode" type="GLenum"/>
         <param name="type" type="GLenum"/>
         <param name="indirect" type="const GLvoid *"/>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_framebuffer_no_attachments.xml	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
+
+<OpenGLAPI>
+
+<category name="GL_ARB_framebuffer_no_attachments" number="130">
+
+   <enum name="FRAMEBUFFER_DEFAULT_WIDTH"                  value="0x9310" />
+   <enum name="FRAMEBUFFER_DEFAULT_HEIGHT"                 value="0x9311" />
+   <enum name="FRAMEBUFFER_DEFAULT_LAYERS"                 value="0x9312" />
+   <enum name="FRAMEBUFFER_DEFAULT_SAMPLES"                value="0x9313" />
+   <enum name="FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS" value="0x9314" />
+   <enum name="MAX_FRAMEBUFFER_WIDTH"                      value="0x9315" />
+   <enum name="MAX_FRAMEBUFFER_HEIGHT"                     value="0x9316" />
+   <enum name="MAX_FRAMEBUFFER_LAYERS"                     value="0x9317" />
+   <enum name="MAX_FRAMEBUFFER_SAMPLES"                    value="0x9318" />
+
+    <function name="FramebufferParameteri">
+       <param name="target" type="GLenum"/>
+       <param name="pname"  type="GLenum"/>
+       <param name="param"  type="GLint" />
+    </function>
+
+    <function name="GetFramebufferParameteriv">
+       <param name="target" type="GLenum" />
+       <param name="pname"  type="GLenum" />
+       <param name="params" type="GLint *" output="true" />
+    </function>
+
+</category>
+
+</OpenGLAPI>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_get_texture_sub_image.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_get_texture_sub_image.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_get_texture_sub_image.xml	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_get_texture_sub_image.xml	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
+
+<!-- This is included by gl_and_es_API.xml.  Could be moved to gl_API.xml. -->
+
+<OpenGLAPI>
+
+<category name="GL_ARB_get_texture_sub_image" number="165">
+
+    <function name="GetTextureSubImage" offset="assign">
+        <param name="texture" type="GLuint"/>
+        <param name="level" type="GLint"/>
+        <param name="xoffset" type="GLint"/>
+        <param name="yoffset" type="GLint"/>
+        <param name="zoffset" type="GLint"/>
+        <param name="width" type="GLsizei"/>
+        <param name="height" type="GLsizei"/>
+        <param name="depth" type="GLsizei"/>
+        <param name="format" type="GLenum"/>
+        <param name="type" type="GLenum"/>
+        <param name="bufSize" type="GLsizei"/>
+        <param name="pixels" type="GLvoid *"/>
+    </function>
+
+    <function name="GetCompressedTextureSubImage" offset="assign">
+        <param name="texture" type="GLuint"/>
+        <param name="level" type="GLint"/>
+        <param name="xoffset" type="GLint"/>
+        <param name="yoffset" type="GLint"/>
+        <param name="zoffset" type="GLint"/>
+        <param name="width" type="GLsizei"/>
+        <param name="height" type="GLsizei"/>
+        <param name="depth" type="GLsizei"/>
+        <param name="bufSize" type="GLsizei"/>
+        <param name="pixels" type="GLvoid *"/>
+    </function>
+
+</category>
+
+</OpenGLAPI>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_program_interface_query.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_program_interface_query.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_program_interface_query.xml	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_program_interface_query.xml	2015-09-16 14:36:09.000000000 +0000
@@ -56,21 +56,21 @@
     <enum name="NUM_COMPATIBLE_SUBROUTINES"                      value="0x8E4A"/>
     <enum name="COMPATIBLE_SUBROUTINES"                          value="0x8E4B"/>
 
-    <function name="GetProgramInterfaceiv">
+    <function name="GetProgramInterfaceiv" es2="3.1">
         <param name="program" type="GLuint"/>
         <param name="programInterface" type="GLenum"/>
         <param name="pname" type="GLenum"/>
         <param name="params" type="GLint *" output="true"/>
     </function>
 
-    <function name="GetProgramResourceIndex">
+    <function name="GetProgramResourceIndex" es2="3.1">
         <param name="program" type="GLuint"/>
         <param name="programInterface" type="GLenum"/>
         <param name="name" type="const GLchar *"/>
         <return type="GLuint"/>
     </function>
 
-    <function name="GetProgramResourceName">
+    <function name="GetProgramResourceName" es2="3.1">
         <param name="program" type="GLuint"/>
         <param name="programInterface" type="GLenum"/>
         <param name="index" type="GLuint"/>
@@ -79,7 +79,7 @@
         <param name="name" type="GLchar *" output="true"/>
     </function>
 
-    <function name="GetProgramResourceiv">
+    <function name="GetProgramResourceiv" es2="3.1">
         <param name="program" type="GLuint"/>
         <param name="programInterface" type="GLenum"/>
         <param name="index" type="GLuint"/>
@@ -90,7 +90,7 @@
         <param name="params" type="GLint *" output="true"/>
     </function>
 
-    <function name="GetProgramResourceLocation">
+    <function name="GetProgramResourceLocation" es2="3.1">
         <param name="program" type="GLuint"/>
         <param name="programInterface" type="GLenum"/>
         <param name="name" type="const GLchar *"/>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_separate_shader_objects.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_separate_shader_objects.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_separate_shader_objects.xml	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_separate_shader_objects.xml	2015-09-16 14:36:09.000000000 +0000
@@ -15,33 +15,33 @@
       <enum   name="ALL_SHADER_BITS"                              value="0xFFFFFFFF"/>
       <enum   name="PROGRAM_SEPARABLE"                            value="0x8258"/>
 
-      <function name="UseProgramStages">
+      <function name="UseProgramStages" es2="3.1">
          <param name="pipeline" type="GLuint" />
          <param name="stages" type="GLbitfield" />
          <param name="program" type="GLuint" />
       </function>
-      <function name="ActiveShaderProgram">
+      <function name="ActiveShaderProgram" es2="3.1">
          <param name="pipeline" type="GLuint" />
          <param name="program" type="GLuint" />
       </function>
-      <function name="CreateShaderProgramv">
+      <function name="CreateShaderProgramv" es2="3.1">
          <param name="type" type="GLenum" />
          <param name="count" type="GLsizei" />
          <param name="strings" type="const GLchar * const *" />
          <return type="GLuint"/>
       </function>
-      <function name="BindProgramPipeline">
+      <function name="BindProgramPipeline" es2="3.1">
          <param name="pipeline" type="GLuint" />
       </function>
-      <function name="DeleteProgramPipelines">
+      <function name="DeleteProgramPipelines" es2="3.1">
          <param name="n" type="GLsizei" />
          <param name="pipelines" type="const GLuint *" />
       </function>
-      <function name="GenProgramPipelines">
+      <function name="GenProgramPipelines" es2="3.1">
          <param name="n" type="GLsizei" />
          <param name="pipelines" type="GLuint *" />
       </function>
-      <function name="IsProgramPipeline">
+      <function name="IsProgramPipeline" es2="3.1">
          <param name="pipeline" type="GLuint" />
          <return type="GLboolean"/>
       </function>
@@ -54,30 +54,30 @@
          <param name="value" type="GLint"/>
       </function>
       -->
-      <function name="GetProgramPipelineiv">
+      <function name="GetProgramPipelineiv" es2="3.1">
          <param name="pipeline" type="GLuint" />
          <param name="pname" type="GLenum" />
          <param name="params" type="GLint *" />
       </function>
-      <function name="ProgramUniform1i">
+      <function name="ProgramUniform1i" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="x" type="GLint" />
       </function>
-      <function name="ProgramUniform2i">
+      <function name="ProgramUniform2i" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="x" type="GLint" />
          <param name="y" type="GLint" />
       </function>
-      <function name="ProgramUniform3i">
+      <function name="ProgramUniform3i" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="x" type="GLint" />
          <param name="y" type="GLint" />
          <param name="z" type="GLint" />
       </function>
-      <function name="ProgramUniform4i">
+      <function name="ProgramUniform4i" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="x" type="GLint" />
@@ -85,25 +85,25 @@
          <param name="z" type="GLint" />
          <param name="w" type="GLint" />
       </function>
-      <function name="ProgramUniform1ui">
+      <function name="ProgramUniform1ui" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="x" type="GLuint" />
       </function>
-      <function name="ProgramUniform2ui">
+      <function name="ProgramUniform2ui" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="x" type="GLuint" />
          <param name="y" type="GLuint" />
       </function>
-      <function name="ProgramUniform3ui">
+      <function name="ProgramUniform3ui" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="x" type="GLuint" />
          <param name="y" type="GLuint" />
          <param name="z" type="GLuint" />
       </function>
-      <function name="ProgramUniform4ui">
+      <function name="ProgramUniform4ui" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="x" type="GLuint" />
@@ -111,25 +111,25 @@
          <param name="z" type="GLuint" />
          <param name="w" type="GLuint" />
       </function>
-      <function name="ProgramUniform1f">
+      <function name="ProgramUniform1f" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="x" type="GLfloat" />
       </function>
-      <function name="ProgramUniform2f">
+      <function name="ProgramUniform2f" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="x" type="GLfloat" />
          <param name="y" type="GLfloat" />
       </function>
-      <function name="ProgramUniform3f">
+      <function name="ProgramUniform3f" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="x" type="GLfloat" />
          <param name="y" type="GLfloat" />
          <param name="z" type="GLfloat" />
       </function>
-      <function name="ProgramUniform4f">
+      <function name="ProgramUniform4f" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="x" type="GLfloat" />
@@ -137,145 +137,145 @@
          <param name="z" type="GLfloat" />
          <param name="w" type="GLfloat" />
       </function>
-      <function name="ProgramUniform1iv">
+      <function name="ProgramUniform1iv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="value" type="const GLint *" />
       </function>
-      <function name="ProgramUniform2iv">
+      <function name="ProgramUniform2iv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="value" type="const GLint *" />
       </function>
-      <function name="ProgramUniform3iv">
+      <function name="ProgramUniform3iv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="value" type="const GLint *" />
       </function>
-      <function name="ProgramUniform4iv">
+      <function name="ProgramUniform4iv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="value" type="const GLint *" />
       </function>
-      <function name="ProgramUniform1uiv">
+      <function name="ProgramUniform1uiv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="value" type="const GLuint *" />
       </function>
-      <function name="ProgramUniform2uiv">
+      <function name="ProgramUniform2uiv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="value" type="const GLuint *" />
       </function>
-      <function name="ProgramUniform3uiv">
+      <function name="ProgramUniform3uiv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="value" type="const GLuint *" />
       </function>
-      <function name="ProgramUniform4uiv">
+      <function name="ProgramUniform4uiv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="value" type="const GLuint *" />
       </function>
-      <function name="ProgramUniform1fv">
+      <function name="ProgramUniform1fv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="value" type="const GLfloat *" />
       </function>
-      <function name="ProgramUniform2fv">
+      <function name="ProgramUniform2fv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="value" type="const GLfloat *" />
       </function>
-      <function name="ProgramUniform3fv">
+      <function name="ProgramUniform3fv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="value" type="const GLfloat *" />
       </function>
-      <function name="ProgramUniform4fv">
+      <function name="ProgramUniform4fv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="value" type="const GLfloat *" />
       </function>
-      <function name="ProgramUniformMatrix2fv">
+      <function name="ProgramUniformMatrix2fv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="transpose" type="GLboolean" />
          <param name="value" type="const GLfloat *" />
       </function>
-      <function name="ProgramUniformMatrix3fv">
+      <function name="ProgramUniformMatrix3fv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="transpose" type="GLboolean" />
          <param name="value" type="const GLfloat *" />
       </function>
-      <function name="ProgramUniformMatrix4fv">
+      <function name="ProgramUniformMatrix4fv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="transpose" type="GLboolean" />
          <param name="value" type="const GLfloat *" />
       </function>
-      <function name="ProgramUniformMatrix2x3fv">
+      <function name="ProgramUniformMatrix2x3fv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="transpose" type="GLboolean" />
          <param name="value" type="const GLfloat *" />
       </function>
-      <function name="ProgramUniformMatrix3x2fv">
+      <function name="ProgramUniformMatrix3x2fv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="transpose" type="GLboolean" />
          <param name="value" type="const GLfloat *" />
       </function>
-      <function name="ProgramUniformMatrix2x4fv">
+      <function name="ProgramUniformMatrix2x4fv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="transpose" type="GLboolean" />
          <param name="value" type="const GLfloat *" />
       </function>
-      <function name="ProgramUniformMatrix4x2fv">
+      <function name="ProgramUniformMatrix4x2fv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="transpose" type="GLboolean" />
          <param name="value" type="const GLfloat *" />
       </function>
-      <function name="ProgramUniformMatrix3x4fv">
+      <function name="ProgramUniformMatrix3x4fv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="transpose" type="GLboolean" />
          <param name="value" type="const GLfloat *" />
       </function>
-      <function name="ProgramUniformMatrix4x3fv">
+      <function name="ProgramUniformMatrix4x3fv" es2="3.1">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
          <param name="count" type="GLsizei" />
          <param name="transpose" type="GLboolean" />
          <param name="value" type="const GLfloat *" />
       </function>
-      <function name="ValidateProgramPipeline">
+      <function name="ValidateProgramPipeline" es2="3.1">
          <param name="pipeline" type="GLuint" />
       </function>
-      <function name="GetProgramPipelineInfoLog">
+      <function name="GetProgramPipelineInfoLog" es2="3.1">
          <param name="pipeline" type="GLuint" />
          <param name="bufSize" type="GLsizei" />
          <param name="length" type="GLsizei *" />
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_shader_image_load_store.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_shader_image_load_store.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_shader_image_load_store.xml	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_shader_image_load_store.xml	2015-09-16 14:36:09.000000000 +0000
@@ -70,7 +70,7 @@
 <enum name="MAX_FRAGMENT_IMAGE_UNIFORMS" value="0x90CE"/>
 <enum name="MAX_COMBINED_IMAGE_UNIFORMS" value="0x90CF"/>
 
-<function name="BindImageTexture">
+<function name="BindImageTexture" es2="3.1">
   <param name="unit" type="GLuint"/>
   <param name="texture" type="GLuint"/>
   <param name="level" type="GLint"/>
@@ -80,7 +80,7 @@
   <param name="format" type="GLenum"/>
 </function>
 
-<function name="MemoryBarrier">
+<function name="MemoryBarrier" es2="3.1">
   <param name="barriers" type="GLbitfield"/>
 </function>
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_shader_subroutine.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_shader_subroutine.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_shader_subroutine.xml	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_shader_subroutine.xml	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,84 @@
+<?xml version="1.0"?>
+<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
+
+<!-- Note: no GLX protocol info yet. -->
+
+<OpenGLAPI>
+
+<category name="GL_ARB_shader_subroutine" number="90">
+
+    <function name="GetSubroutineUniformLocation" offset="assign">
+        <param name="program" type="GLuint"/>
+        <param name="shadertype" type="GLenum"/>
+        <param name="name" type="const GLchar *"/>
+        <return type="GLint"/>
+    </function>
+
+    <function name="GetSubroutineIndex" offset="assign">
+        <param name="program" type="GLuint"/>
+        <param name="shadertype" type="GLenum"/>
+        <param name="name" type="const GLchar *"/>
+        <return type="GLuint"/>
+    </function>
+
+    <function name="GetActiveSubroutineUniformiv" offset="assign">
+        <param name="program" type="GLuint"/>
+        <param name="shadertype" type="GLenum"/>
+        <param name="index" type="GLuint"/>
+        <param name="pname" type="GLenum"/>
+        <param name="values" type="GLint *" output="true"/>
+    </function>
+
+    <function name="GetActiveSubroutineUniformName" offset="assign">
+        <param name="program" type="GLuint"/>
+        <param name="shadertype" type="GLenum"/>
+        <param name="index" type="GLuint"/>
+        <param name="bufsize" type="GLsizei"/>
+        <param name="length" type="GLsizei *" output="true"/>
+        <param name="name" type="GLchar *" output="true"/>
+    </function>
+
+    <function name="GetActiveSubroutineName" offset="assign">
+        <param name="program" type="GLuint"/>
+        <param name="shadertype" type="GLenum"/>
+        <param name="index" type="GLuint"/>
+        <param name="bufsize" type="GLsizei"/>
+        <param name="length" type="GLsizei *" output="true"/>
+        <param name="name" type="GLchar *" output="true"/>
+    </function>
+
+    <function name="UniformSubroutinesuiv" offset="assign">
+        <param name="shadertype" type="GLenum"/>
+        <param name="count" type="GLsizei"/>
+        <param name="indices" type="const GLuint *"/>
+    </function>
+
+    <function name="GetUniformSubroutineuiv" offset="assign">
+        <param name="shadertype" type="GLenum"/>
+        <param name="location" type="GLint"/>
+        <param name="params" type="GLuint *" output="true"/>
+    </function>
+
+    <function name="GetProgramStageiv" offset="assign">
+        <param name="program" type="GLuint"/>
+        <param name="shadertype" type="GLenum"/>
+        <param name="pname" type="GLenum"/>
+        <param name="values" type="GLint *" output="true"/>
+    </function>
+
+    <enum name="ACTIVE_SUBROUTINES" value="0x8DE5"/>
+    <enum name="ACTIVE_SUBROUTINE_UNIFORMS" value="0x8DE6"/>
+    <enum name="ACTIVE_SUBROUTINE_UNIFORM_LOCATIONS" value="0x8E47"/>
+    <enum name="ACTIVE_SUBROUTINE_MAX_LENGTH" value="0x8E48"/>
+    <enum name="ACTIVE_SUBROUTINE_UNIFORM_MAX_LENGTH" value="0x8E49"/>
+
+    <enum name="MAX_SUBROUTINES" value="0x8DE7"/>
+    <enum name="MAX_SUBROUTINE_UNIFORM_LOCATIONS" value="0x8DE8"/>
+
+    <enum name="NUM_COMPATIBLE_SUBROUTINES" value="0x8E4A"/>
+    <enum name="COMPATIBLE_SUBROUTINES" value="0x8E4B"/>
+
+    <!-- UNIFORM_SIZE, UNIFORM_NAME_LENGTH already in GL3.1 -->
+
+</category>
+</OpenGLAPI>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_tessellation_shader.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_tessellation_shader.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_tessellation_shader.xml	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_tessellation_shader.xml	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,62 @@
+<?xml version="1.0"?>
+<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
+
+<!-- Note: no GLX protocol info yet. -->
+
+<OpenGLAPI>
+
+
+<category name="GL_ARB_tessellation_shader" number="91">
+
+    <!--<enum value="0" name="FALSE"/>
+    <enum value="1" name="TRUE"/>
+    <enum value="0x0004" name="TRIANGLES"/>
+    <enum value="0x0007" name="QUADS"/>
+    <enum value="0x0202" name="EQUAL"/>
+    <enum value="0x0900" name="CW"/>
+    <enum value="0x0901" name="CCW"/>-->
+
+    <enum value="0x000E" name="PATCHES"/>
+    <enum value="0x84F0" name="UNIFORM_BLOCK_REFERENCED_BY_TESS_CONTROL_SHADER"/>
+    <enum value="0x84F1" name="UNIFORM_BLOCK_REFERENCED_BY_TESS_EVALUATION_SHADER"/>
+    <enum value="0x886C" name="MAX_TESS_CONTROL_INPUT_COMPONENTS"/>
+    <enum value="0x886D" name="MAX_TESS_EVALUATION_INPUT_COMPONENTS"/>
+    <enum value="0x8E1E" name="MAX_COMBINED_TESS_CONTROL_UNIFORM_COMPONENTS"/>
+    <enum value="0x8E1F" name="MAX_COMBINED_TESS_EVALUATION_UNIFORM_COMPONENTS"/>
+    <enum value="0x8E72" name="PATCH_VERTICES"/>
+    <enum value="0x8E73" name="PATCH_DEFAULT_INNER_LEVEL"/>
+    <enum value="0x8E74" name="PATCH_DEFAULT_OUTER_LEVEL"/>
+    <enum value="0x8E75" name="TESS_CONTROL_OUTPUT_VERTICES"/>
+    <enum value="0x8E76" name="TESS_GEN_MODE"/>
+    <enum value="0x8E77" name="TESS_GEN_SPACING"/>
+    <enum value="0x8E78" name="TESS_GEN_VERTEX_ORDER"/>
+    <enum value="0x8E79" name="TESS_GEN_POINT_MODE"/>
+    <enum value="0x8E7A" name="ISOLINES"/>
+    <enum value="0x8E7B" name="FRACTIONAL_ODD"/>
+    <enum value="0x8E7C" name="FRACTIONAL_EVEN"/>
+    <enum value="0x8E7D" name="MAX_PATCH_VERTICES"/>
+    <enum value="0x8E7E" name="MAX_TESS_GEN_LEVEL"/>
+    <enum value="0x8E7F" name="MAX_TESS_CONTROL_UNIFORM_COMPONENTS"/>
+    <enum value="0x8E80" name="MAX_TESS_EVALUATION_UNIFORM_COMPONENTS"/>
+    <enum value="0x8E81" name="MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS"/>
+    <enum value="0x8E82" name="MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS"/>
+    <enum value="0x8E83" name="MAX_TESS_CONTROL_OUTPUT_COMPONENTS"/>
+    <enum value="0x8E84" name="MAX_TESS_PATCH_COMPONENTS"/>
+    <enum value="0x8E85" name="MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS"/>
+    <enum value="0x8E86" name="MAX_TESS_EVALUATION_OUTPUT_COMPONENTS"/>
+    <enum value="0x8E87" name="TESS_EVALUATION_SHADER"/>
+    <enum value="0x8E88" name="TESS_CONTROL_SHADER"/>
+    <enum value="0x8E89" name="MAX_TESS_CONTROL_UNIFORM_BLOCKS"/>
+    <enum value="0x8E8A" name="MAX_TESS_EVALUATION_UNIFORM_BLOCKS"/>
+
+    <function name="PatchParameteri" offset="assign">
+        <param name="pname" type="GLenum"/>
+        <param name="value" type="GLint"/>
+    </function>
+    <function name="PatchParameterfv" offset="assign">
+        <param name="pname" type="GLenum"/>
+        <param name="values" type="const GLfloat *"/>
+    </function>
+</category>
+
+</OpenGLAPI>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_texture_multisample.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_texture_multisample.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_texture_multisample.xml	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_texture_multisample.xml	2015-09-16 14:36:09.000000000 +0000
@@ -53,13 +53,13 @@
       <param name="fixedsamplelocations" type="GLboolean"/>
    </function>
 
-   <function name="GetMultisamplefv">
+   <function name="GetMultisamplefv" es2="3.1">
       <param name="pname" type="GLenum"/>
       <param name="index" type="GLuint"/>
       <param name="val" type="GLfloat *"/>
    </function>
 
-   <function name="SampleMaski">
+   <function name="SampleMaski" es2="3.1">
       <param name="index" type="GLuint"/>
       <param name="mask" type="GLbitfield"/>
    </function>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_texture_storage_multisample.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_texture_storage_multisample.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_texture_storage_multisample.xml	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_texture_storage_multisample.xml	2015-09-16 14:36:09.000000000 +0000
@@ -7,7 +7,7 @@
 
 <category name="GL_ARB_texture_storage_multisample" number="141">
 
-   <function name="TexStorage2DMultisample">
+   <function name="TexStorage2DMultisample" es2="3.1">
       <param name="target" type="GLenum"/>
       <param name="samples" type="GLsizei"/>
       <param name="internalformat" type="GLenum"/>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_vertex_attrib_binding.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_vertex_attrib_binding.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/ARB_vertex_attrib_binding.xml	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/ARB_vertex_attrib_binding.xml	2015-09-16 14:36:09.000000000 +0000
@@ -7,14 +7,14 @@
 
 <category name="GL_ARB_vertex_attrib_binding" number="125">
 
-    <function name="BindVertexBuffer">
+    <function name="BindVertexBuffer" es2="3.1">
         <param name="bindingindex" type="GLuint"/>
         <param name="buffer" type="GLuint"/>
         <param name="offset" type="GLintptr"/>
         <param name="stride" type="GLsizei"/>
     </function>
 
-    <function name="VertexAttribFormat">
+    <function name="VertexAttribFormat" es2="3.1">
         <param name="attribindex" type="GLuint"/>
         <param name="size" type="GLint"/>
         <param name="type" type="GLenum"/>
@@ -22,7 +22,7 @@
         <param name="relativeoffset" type="GLuint"/>
     </function>
 
-    <function name="VertexAttribIFormat">
+    <function name="VertexAttribIFormat" es2="3.1">
         <param name="attribindex" type="GLuint"/>
         <param name="size" type="GLint"/>
         <param name="type" type="GLenum"/>
@@ -36,12 +36,12 @@
         <param name="relativeoffset" type="GLuint"/>
     </function>
 
-    <function name="VertexAttribBinding">
+    <function name="VertexAttribBinding" es2="3.1">
         <param name="attribindex" type="GLuint"/>
         <param name="bindingindex" type="GLuint"/>
     </function>
 
-    <function name="VertexBindingDivisor">
+    <function name="VertexBindingDivisor" es2="3.1">
         <param name="attribindex" type="GLuint"/>
         <param name="divisor" type="GLuint"/>
     </function>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/GL3x.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/GL3x.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/GL3x.xml	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/GL3x.xml	2015-09-16 14:36:09.000000000 +0000
@@ -166,7 +166,7 @@
     <param name="a" type="GLboolean"/>
   </function>
 
-  <function name="GetBooleani_v">
+  <function name="GetBooleani_v" es2="3.1">
     <param name="value" type="GLenum"/>
     <param name="index" type="GLuint"/>
     <param name="data" type="GLboolean *"/>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/GL4x.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/GL4x.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/GL4x.xml	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/GL4x.xml	2015-09-16 14:36:09.000000000 +0000
@@ -44,4 +44,10 @@
   <enum name="DEPTH_STENCIL_TEXTURE_MODE"              value="0x90EA"/>
 </category>
 
+<category name="4.5">
+  <function name="MemoryBarrierByRegion" es2="3.1">
+    <param name="barriers" type="GLbitfield"/>
+  </function>
+</category>
+
 </OpenGLAPI>
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_apitemp.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_apitemp.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_apitemp.py	2013-05-08 13:13:18.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_apitemp.py	2015-09-16 14:36:09.000000000 +0000
@@ -25,9 +25,10 @@
 # Authors:
 #    Ian Romanick <idr@us.ibm.com>
 
+import argparse
+
 import gl_XML, glX_XML
 import license
-import sys, getopt
 
 class PrintGlOffsets(gl_XML.gl_print_base):
     def __init__(self, es=False):
@@ -301,27 +302,30 @@
         return
 
 
-def show_usage():
-    print "Usage: %s [-f input_file_name] [-c]" % sys.argv[0]
-    print "-c          Enable compatibility with OpenGL ES."
-    sys.exit(1)
+def _parser():
+    """Parser arguments and return a namespace."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f',
+                        metavar='<input file name>',
+                        dest='filename',
+                        default="gl_API.xml",
+                        help="An XML file describing the API.")
+    parser.add_argument('-c',
+                        action='store_true',
+                        dest='es',
+                        help="Enable OpenGL ES compatibility")
+    return parser.parse_args()
+
+
+def main():
+    """Main function."""
+    args = _parser()
 
-if __name__ == '__main__':
-    file_name = "gl_API.xml"
+    api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory())
 
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "f:c")
-    except Exception,e:
-        show_usage()
-
-    es = False
-    for (arg,val) in args:
-        if arg == "-f":
-            file_name = val
-        elif arg == "-c":
-            es = True
+    printer = PrintGlOffsets(args.es)
+    printer.Print(api)
 
-    api = gl_XML.parse_GL_API(file_name, glX_XML.glx_item_factory())
 
-    printer = PrintGlOffsets(es)
-    printer.Print(api)
+if __name__ == '__main__':
+    main()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_API.xml mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_API.xml
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_API.xml	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_API.xml	2015-09-16 14:36:09.000000000 +0000
@@ -2824,7 +2824,7 @@
         <glx sop="137"/>
     </function>
 
-    <function name="GetTexLevelParameterfv">
+    <function name="GetTexLevelParameterfv" es2="3.1">
         <param name="target" type="GLenum"/>
         <param name="level" type="GLint"/>
         <param name="pname" type="GLenum"/>
@@ -2832,7 +2832,7 @@
         <glx sop="138"/>
     </function>
 
-    <function name="GetTexLevelParameteriv">
+    <function name="GetTexLevelParameteriv" es2="3.1">
         <param name="target" type="GLenum"/>
         <param name="level" type="GLint"/>
         <param name="pname" type="GLenum"/>
@@ -8072,7 +8072,13 @@
 
 <xi:include href="ARB_vertex_type_2_10_10_10_rev.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
-<!-- ARB extensions #86...#93 -->
+<!-- ARB extensions #86...#89 -->
+
+<xi:include href="ARB_shader_subroutine.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+
+<xi:include href="ARB_tessellation_shader.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+
+<!-- ARB extensions #92...#93 -->
 
 <xi:include href="ARB_draw_indirect.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
@@ -8188,7 +8194,9 @@
     <!-- No new functions, types, enums. -->
 </category>
 
-<!-- ARB extensions #130..#131 -->
+<xi:include href="ARB_framebuffer_no_attachments.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+
+<!-- ARB extensions #131 -->
 
 <category name="GL_ARB_explicit_uniform_location" number="128">
     <enum name="MAX_UNIFORM_LOCATIONS" count="1" value="0x826E" >
@@ -8251,7 +8259,9 @@
 
 <xi:include href="ARB_direct_state_access.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
-<!-- ARB extensions 165 - 166 -->
+<xi:include href="ARB_get_texture_sub_image.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+
+<!-- ARB extension 166 -->
 
 <xi:include href="ARB_texture_barrier.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_enums.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_enums.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_enums.py	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_enums.py	2015-09-16 14:36:09.000000000 +0000
@@ -1,8 +1,8 @@
 #!/usr/bin/python2
 # -*- Mode: Python; py-indent-offset: 8 -*-
 
-# (C) Copyright Zack Rusin 2005
-# All Rights Reserved.
+# (C) Copyright Zack Rusin 2005. All Rights Reserved.
+# Copyright (C) 2015 Intel Corporation
 # 
 # Permission is hereby granted, free of charge, to any person obtaining a
 # copy of this software and associated documentation files (the "Software"),
@@ -26,6 +26,8 @@
 # Authors:
 #    Zack Rusin <zack@kde.org>
 
+import argparse
+
 import license
 import gl_XML
 import sys, getopt
@@ -76,7 +78,7 @@
 
 static char token_tmp[20];
 
-const char *_mesa_lookup_enum_by_nr( int nr )
+const char *_mesa_enum_to_string( int nr )
 {
    enum_elt *elt;
 
@@ -116,6 +118,7 @@
    "GL_LINE_STRIP_ADJACENCY",
    "GL_TRIANGLES_ADJACENCY",
    "GL_TRIANGLE_STRIP_ADJACENCY",
+   "GL_PATCHES",
    "outside begin/end",
    "unknown state"
 };
@@ -201,21 +204,21 @@
                 enum.append( [name, priority] )
 
 
-def show_usage():
-    print "Usage: %s [-f input_file_name]" % sys.argv[0]
-    sys.exit(1)
+def _parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f', '--input_file',
+                        required=True,
+                        help="Choose an xml file to parse.")
+    return parser.parse_args()
 
-if __name__ == '__main__':
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "f:")
-    except Exception,e:
-        show_usage()
-
-    api_list = []
-    for (arg,val) in args:
-        if arg == "-f":
-            api = gl_XML.parse_GL_API( val )
-            api_list.append(api);
+
+def main():
+    args = _parser()
+    api_list = [gl_XML.parse_GL_API(args.input_file)]
 
     printer = PrintGlEnums()
-    printer.Print( api_list )
+    printer.Print(api_list)
+
+
+if __name__ == '__main__':
+    main()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_genexec.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_genexec.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_genexec.py	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_genexec.py	2015-09-16 14:36:09.000000000 +0000
@@ -25,10 +25,11 @@
 # _mesa_initialize_exec_table().  It is responsible for populating all
 # entries in the "exec" dispatch table that aren't dynamic.
 
+import argparse
 import collections
 import license
 import gl_XML
-import sys, getopt
+import sys
 import apiexec
 
 
@@ -239,24 +240,23 @@
             print '   }'
 
 
-def show_usage():
-    print "Usage: %s [-f input_file_name]" % sys.argv[0]
-    sys.exit(1)
+def _parser():
+    """Parse arguments and return namespace."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f',
+                        dest='filename',
+                        default='gl_and_es_API.xml',
+                        help='an xml file describing an API')
+    return parser.parse_args()
 
 
-if __name__ == '__main__':
-    file_name = "gl_and_es_API.xml"
-
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "m:f:")
-    except Exception,e:
-        show_usage()
-
-    for (arg,val) in args:
-        if arg == "-f":
-            file_name = val
-
+def main():
+    """Main function."""
+    args = _parser()
     printer = PrintCode()
-
-    api = gl_XML.parse_GL_API(file_name)
+    api = gl_XML.parse_GL_API(args.filename)
     printer.Print(api)
+
+
+if __name__ == '__main__':
+    main()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_gentable.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_gentable.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_gentable.py	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_gentable.py	2015-09-16 14:36:09.000000000 +0000
@@ -2,6 +2,7 @@
 
 # (C) Copyright IBM Corporation 2004, 2005
 # (C) Copyright Apple Inc. 2011
+# Copyright (C) 2015 Intel Corporation
 # All Rights Reserved.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a
@@ -29,9 +30,10 @@
 # Based on code ogiginally by:
 #    Ian Romanick <idr@us.ibm.com>
 
+import argparse
+
 import license
 import gl_XML, glX_XML
-import sys, getopt
 
 header = """/* GLXEXT is the define used in the xserver when the GLX extension is being
  * built.  Hijack this to determine whether this file is being built for the
@@ -186,23 +188,27 @@
                 print body_template % vars
         return
 
-def show_usage():
-    print "Usage: %s [-f input_file_name]" % sys.argv[0]
-    sys.exit(1)
 
-if __name__ == '__main__':
-    file_name = "gl_API.xml"
+def _parser():
+    """Parse arguments and return a namespace object."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f',
+                        dest='filename',
+                        default='gl_API.xml',
+                        help='An XML file description of an API')
+
+    return parser.parse_args()
+
 
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "m:f:")
-    except Exception,e:
-        show_usage()
-
-    for (arg,val) in args:
-        if arg == "-f":
-            file_name = val
+def main():
+    """Main function."""
+    args = _parser()
 
     printer = PrintCode()
 
-    api = gl_XML.parse_GL_API(file_name, glX_XML.glx_item_factory())
+    api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory())
     printer.Print(api)
+
+
+if __name__ == '__main__':
+    main()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_procs.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_procs.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_procs.py	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_procs.py	2015-09-16 14:36:09.000000000 +0000
@@ -25,9 +25,12 @@
 # Authors:
 #    Ian Romanick <idr@us.ibm.com>
 
+import argparse
+
 import license
-import gl_XML, glX_XML
-import sys, getopt
+import gl_XML
+import glX_XML
+
 
 class PrintGlProcs(gl_XML.gl_print_base):
     def __init__(self, es=False):
@@ -39,7 +42,6 @@
 """Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
 (C) Copyright IBM Corporation 2004, 2006""", "BRIAN PAUL, IBM")
 
-
     def printRealHeader(self):
         print """
 /* This file is only included by glapi.c and is used for
@@ -161,26 +163,28 @@
         return
 
 
-def show_usage():
-    print "Usage: %s [-f input_file_name] [-c]" % sys.argv[0]
-    print "-c          Enable compatibility with OpenGL ES."
-    sys.exit(1)
+def _parser():
+    """Parse arguments and return a namepsace."""
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f', '--filename',
+                        default='gl_API.xml',
+                        metavar="input_file_name",
+                        dest='file_name',
+                        help="Path to an XML description of OpenGL API.")
+    parser.add_argument('-c', '--es-version',
+                        dest='es',
+                        action="store_true",
+                        help="filter functions for es")
+    return parser.parse_args()
+
+
+def main():
+    """Main function."""
+    args = _parser()
+    api = gl_XML.parse_GL_API(args.file_name, glX_XML.glx_item_factory())
+    PrintGlProcs(args.es).Print(api)
 
-if __name__ == '__main__':
-    file_name = "gl_API.xml"
 
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "f:c")
-    except Exception,e:
-        show_usage()
-
-    es = False
-    for (arg,val) in args:
-        if arg == "-f":
-            file_name = val
-        elif arg == "-c":
-            es = True
-
-    api = gl_XML.parse_GL_API(file_name, glX_XML.glx_item_factory())
-    printer = PrintGlProcs(es)
-    printer.Print(api)
+if __name__ == '__main__':
+    main()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_SPARC_asm.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_SPARC_asm.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_SPARC_asm.py	2013-05-08 13:13:18.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_SPARC_asm.py	2015-09-16 14:36:09.000000000 +0000
@@ -25,9 +25,10 @@
 # Authors:
 #    Ian Romanick <idr@us.ibm.com>
 
+import argparse
+
 import license
 import gl_XML, glX_XML
-import sys, getopt
 
 class PrintGenericStubs(gl_XML.gl_print_base):
     def __init__(self):
@@ -244,30 +245,24 @@
         return
 
 
-def show_usage():
-    print "Usage: %s [-f input_file_name] [-m output_mode]" % sys.argv[0]
-    sys.exit(1)
+def _parser():
+    """Parse arguments and return a namespace."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f',
+                        dest='filename',
+                        default='gl_API.xml',
+                        help='An XML description of an API.')
+    return parser.parse_args()
+
+
+def main():
+    """Main function."""
+    args = _parser()
+    printer = PrintGenericStubs()
 
-if __name__ == '__main__':
-    file_name = "gl_API.xml"
-    mode = "generic"
+    api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory())
+    printer.Print(api)
 
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "m:f:")
-    except Exception,e:
-        show_usage()
-
-    for (arg,val) in args:
-        if arg == '-m':
-            mode = val
-        elif arg == "-f":
-            file_name = val
-
-    if mode == "generic":
-        printer = PrintGenericStubs()
-    else:
-        print "ERROR: Invalid mode \"%s\" specified." % mode
-        show_usage()
 
-    api = gl_XML.parse_GL_API(file_name, glX_XML.glx_item_factory())
-    printer.Print(api)
+if __name__ == '__main__':
+    main()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_table.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_table.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_table.py	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_table.py	2015-09-16 14:36:09.000000000 +0000
@@ -2,6 +2,7 @@
 
 # (C) Copyright IBM Corporation 2004
 # All Rights Reserved.
+# Copyright (c) 2014 Intel Corporation
 #
 # Permission is hereby granted, free of charge, to any person obtaining a
 # copy of this software and associated documentation files (the "Software"),
@@ -25,35 +26,35 @@
 # Authors:
 #    Ian Romanick <idr@us.ibm.com>
 
+import argparse
+
 import gl_XML
 import license
-import sys, getopt
+
 
 class PrintGlTable(gl_XML.gl_print_base):
-    def __init__(self, es=False):
+    def __init__(self):
         gl_XML.gl_print_base.__init__(self)
 
-        self.es = es
         self.header_tag = '_GLAPI_TABLE_H_'
         self.name = "gl_table.py (from Mesa)"
         self.license = license.bsd_license_template % ( \
 """Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
 (C) Copyright IBM Corporation 2004""", "BRIAN PAUL, IBM")
-        self.ifdef_emitted = False;
+        self.ifdef_emitted = False
         return
 
-
     def printBody(self, api):
         for f in api.functionIterateByOffset():
             if not f.is_abi() and not self.ifdef_emitted:
                 print '#if !defined HAVE_SHARED_GLAPI'
                 self.ifdef_emitted = True
             arg_string = f.get_parameter_string()
-            print '   %s (GLAPIENTRYP %s)(%s); /* %d */' % (f.return_type, f.name, arg_string, f.offset)
+            print '   %s (GLAPIENTRYP %s)(%s); /* %d */' % (
+                f.return_type, f.name, arg_string, f.offset)
 
         print '#endif /* !defined HAVE_SHARED_GLAPI */'
 
-
     def printRealHeader(self):
         print '#ifndef GLAPIENTRYP'
         print '# ifndef GLAPIENTRY'
@@ -68,20 +69,19 @@
         print '{'
         return
 
-
     def printRealFooter(self):
         print '};'
         return
 
 
 class PrintRemapTable(gl_XML.gl_print_base):
-    def __init__(self, es=False):
+    def __init__(self):
         gl_XML.gl_print_base.__init__(self)
 
-        self.es = es
         self.header_tag = '_DISPATCH_H_'
         self.name = "gl_table.py (from Mesa)"
-        self.license = license.bsd_license_template % ("(C) Copyright IBM Corporation 2005", "IBM")
+        self.license = license.bsd_license_template % (
+            "(C) Copyright IBM Corporation 2005", "IBM")
         return
 
 
@@ -100,6 +100,7 @@
 """
         return
 
+
     def printBody(self, api):
         print '#define CALL_by_offset(disp, cast, offset, parameters) \\'
         print '    (*(cast (GET_by_offset(disp, offset)))) parameters'
@@ -120,19 +121,13 @@
 
         functions = []
         abi_functions = []
-        alias_functions = []
         count = 0
         for f in api.functionIterateByOffset():
             if not f.is_abi():
-                functions.append( [f, count] )
+                functions.append([f, count])
                 count += 1
             else:
-                abi_functions.append( [f, -1] )
-
-            if self.es:
-                # remember functions with aliases
-                if len(f.entry_points) > 1:
-                    alias_functions.append(f)
+                abi_functions.append([f, -1])
 
         print '/* total number of offsets below */'
         print '#define _gloffset_COUNT %d' % (len(abi_functions + functions))
@@ -141,18 +136,11 @@
         for f, index in abi_functions:
             print '#define _gloffset_%s %d' % (f.name, f.offset)
 
-        if self.es:
-            remap_table = "esLocalRemapTable"
+        remap_table = "driDispatchRemapTable"
 
-            print '#define %s_size %u' % (remap_table, count)
-            print 'static int %s[ %s_size ];' % (remap_table, remap_table)
-            print ''
-        else:
-            remap_table = "driDispatchRemapTable"
-
-            print '#define %s_size %u' % (remap_table, count)
-            print 'extern int %s[ %s_size ];' % (remap_table, remap_table)
-            print ''
+        print '#define %s_size %u' % (remap_table, count)
+        print 'extern int %s[ %s_size ];' % (remap_table, remap_table)
+        print ''
 
         for f, index in functions:
             print '#define %s_remap_index %u' % (f.name, index)
@@ -165,7 +153,7 @@
         print ''
 
         for f, index in abi_functions + functions:
-            arg_string = gl_XML.create_parameter_string( f.parameters, 0 )
+            arg_string = gl_XML.create_parameter_string(f.parameters, 0)
 
             print 'typedef %s (GLAPIENTRYP _glptr_%s)(%s);' % (f.return_type, f.name, arg_string)
             print '#define CALL_%s(disp, parameters) \\' % (f.name)
@@ -179,60 +167,38 @@
             print '}'
             print
 
-        if alias_functions:
-            print ''
-            print '/* define aliases for compatibility */'
-            for f in alias_functions:
-                for name in f.entry_points:
-                    if name != f.name:
-                        print '#define CALL_%s(disp, parameters) CALL_%s(disp, parameters)' % (name, f.name)
-                        print '#define GET_%s(disp) GET_%s(disp)' % (name, f.name)
-                        print '#define SET_%s(disp, fn) SET_%s(disp, fn)' % (name, f.name)
-            print ''
-
-            for f in alias_functions:
-                for name in f.entry_points:
-                    if name != f.name:
-                        print '#define %s_remap_index %s_remap_index' % (name, f.name)
-            print ''
-
         return
 
 
-def show_usage():
-    print "Usage: %s [-f input_file_name] [-m mode] [-c ver]" % sys.argv[0]
-    print "    -m mode   Mode can be 'table' or 'remap_table'."
-    print "    -c ver    Version can be 'es1' or 'es2'."
-    sys.exit(1)
-
-if __name__ == '__main__':
-    file_name = "gl_API.xml"
-
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "f:m:c:")
-    except Exception,e:
-        show_usage()
-
-    mode = "table"
-    es = None
-    for (arg,val) in args:
-        if arg == "-f":
-            file_name = val
-        elif arg == "-m":
-            mode = val
-        elif arg == "-c":
-            es = val
-
-    if mode == "table":
-        printer = PrintGlTable(es)
-    elif mode == "remap_table":
-        printer = PrintRemapTable(es)
-    else:
-        show_usage()
+def _parser():
+    """Parse arguments and return a namespace."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f', '--filename',
+                        default='gl_API.xml',
+                        metavar="input_file_name",
+                        dest='file_name',
+                        help="Path to an XML description of OpenGL API.")
+    parser.add_argument('-m', '--mode',
+                        choices=['table', 'remap_table'],
+                        default='table',
+                        metavar="mode",
+                        help="Generate either a table or a remap_table")
+    return parser.parse_args()
+
+
+def main():
+    """Main function."""
+    args = _parser()
+
+    api = gl_XML.parse_GL_API(args.file_name)
+
+    if args.mode == "table":
+        printer = PrintGlTable()
+    elif args.mode == "remap_table":
+        printer = PrintRemapTable()
 
-    api = gl_XML.parse_GL_API( file_name )
+    printer.Print(api)
 
-    if es is not None:
-        api.filter_functions_by_api(es)
 
-    printer.Print( api )
+if __name__ == '__main__':
+    main()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_x86-64_asm.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_x86-64_asm.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_x86-64_asm.py	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_x86-64_asm.py	2015-09-16 14:36:09.000000000 +0000
@@ -25,9 +25,11 @@
 # Authors:
 #    Ian Romanick <idr@us.ibm.com>
 
+import argparse
+import copy
+
 import license
 import gl_XML, glX_XML
-import sys, getopt, copy
 
 def should_use_push(registers):
     for [reg, offset] in registers:
@@ -289,30 +291,25 @@
 
         return
 
-def show_usage():
-    print "Usage: %s [-f input_file_name] [-m output_mode]" % sys.argv[0]
-    sys.exit(1)
 
-if __name__ == '__main__':
-    file_name = "gl_API.xml"
-    mode = "generic"
+def _parser():
+    """Parse arguments and return a namespace."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f',
+                        default='gl_API.xml',
+                        dest='filename',
+                        help='An XML file describing an API')
+    return parser.parse_args()
 
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "m:f:")
-    except Exception,e:
-        show_usage()
-
-    for (arg,val) in args:
-        if arg == '-m':
-            mode = val
-        elif arg == "-f":
-            file_name = val
 
-    if mode == "generic":
-        printer = PrintGenericStubs()
-    else:
-        print "ERROR: Invalid mode \"%s\" specified." % mode
-        show_usage()
+def main():
+    """Main file."""
+    args = _parser()
+    printer = PrintGenericStubs()
+    api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory())
 
-    api = gl_XML.parse_GL_API(file_name, glX_XML.glx_item_factory())
     printer.Print(api)
+
+
+if __name__ == '__main__':
+    main()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_x86_asm.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_x86_asm.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/gl_x86_asm.py	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/gl_x86_asm.py	2015-09-16 14:36:09.000000000 +0000
@@ -25,9 +25,10 @@
 # Authors:
 #    Ian Romanick <idr@us.ibm.com>
 
+import argparse
+
 import license
 import gl_XML, glX_XML
-import sys, getopt
 
 class PrintGenericStubs(gl_XML.gl_print_base):
 
@@ -217,30 +218,22 @@
 
         return
 
-def show_usage():
-    print "Usage: %s [-f input_file_name] [-m output_mode]" % sys.argv[0]
-    sys.exit(1)
+def _parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f',
+                        dest='filename',
+                        default='gl_API.xml',
+                        help='An XML file describing an API.')
+    return parser.parse_args()
 
-if __name__ == '__main__':
-    file_name = "gl_API.xml"
-    mode = "generic"
 
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "m:f:")
-    except Exception,e:
-        show_usage()
-
-    for (arg,val) in args:
-        if arg == '-m':
-            mode = val
-        elif arg == "-f":
-            file_name = val
-
-    if mode == "generic":
-        printer = PrintGenericStubs()
-    else:
-        print "ERROR: Invalid mode \"%s\" specified." % mode
-        show_usage()
+def main():
+    args = _parser()
+    printer = PrintGenericStubs()
 
-    api = gl_XML.parse_GL_API(file_name, glX_XML.glx_item_factory())
+    api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory())
     printer.Print(api)
+
+
+if __name__ == '__main__':
+    main()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/glX_proto_recv.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/glX_proto_recv.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/glX_proto_recv.py	2014-04-29 19:36:58.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/glX_proto_recv.py	2015-09-16 14:36:09.000000000 +0000
@@ -25,8 +25,10 @@
 # Authors:
 #    Ian Romanick <idr@us.ibm.com>
 
+import argparse
+import string
+
 import gl_XML, glX_XML, glX_proto_common, license
-import sys, getopt, string
 
 
 class PrintGlxDispatch_h(gl_XML.gl_print_base):
@@ -524,31 +526,39 @@
         return
 
 
-if __name__ == '__main__':
-    file_name = "gl_API.xml"
-
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "f:m:s")
-    except Exception,e:
-        show_usage()
-
-    mode = "dispatch_c"
-    do_swap = 0
-    for (arg,val) in args:
-        if arg == "-f":
-            file_name = val
-        elif arg == "-m":
-            mode = val
-        elif arg == "-s":
-            do_swap = 1
-
-    if mode == "dispatch_c":
-        printer = PrintGlxDispatchFunctions(do_swap)
-    elif mode == "dispatch_h":
+def _parser():
+    """Parse any arguments passed and return a namespace."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f',
+                        dest='filename',
+                        default='gl_API.xml',
+                        help='an xml file describing an OpenGL API')
+    parser.add_argument('-m',
+                        dest='mode',
+                        default='dispatch_c',
+                        choices=['dispatch_c', 'dispatch_h'],
+                        help='what file to generate')
+    parser.add_argument('-s',
+                        dest='swap',
+                        action='store_true',
+                        help='emit swap in GlXDispatchFunctions')
+    return parser.parse_args()
+
+
+def main():
+    """Main function."""
+    args = _parser()
+
+    if args.mode == "dispatch_c":
+        printer = PrintGlxDispatchFunctions(args.swap)
+    elif args.mode == "dispatch_h":
         printer = PrintGlxDispatch_h()
-    else:
-        show_usage()
 
-    api = gl_XML.parse_GL_API( file_name, glX_proto_common.glx_proto_item_factory() )
+    api = gl_XML.parse_GL_API(
+        args.filename, glX_proto_common.glx_proto_item_factory())
 
-    printer.Print( api )
+    printer.Print(api)
+
+
+if __name__ == '__main__':
+    main()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/glX_proto_send.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/glX_proto_send.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/glX_proto_send.py	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/glX_proto_send.py	2015-09-16 14:36:09.000000000 +0000
@@ -2,6 +2,7 @@
 
 # (C) Copyright IBM Corporation 2004, 2005
 # All Rights Reserved.
+# Copyright (c) 2015 Intel Corporation
 #
 # Permission is hereby granted, free of charge, to any person obtaining a
 # copy of this software and associated documentation files (the "Software"),
@@ -26,8 +27,10 @@
 #    Ian Romanick <idr@us.ibm.com>
 #    Jeremy Kolb <jkolb@brandeis.edu>
 
+import argparse
+
 import gl_XML, glX_XML, glX_proto_common, license
-import sys, getopt, copy, string
+import copy, string
 
 def convertStringForXCB(str):
     tmp = ""
@@ -1085,42 +1088,41 @@
         print '#endif'
 
 
-def show_usage():
-    print "Usage: %s [-f input_file_name] [-m output_mode] [-d]" % sys.argv[0]
-    print "    -m output_mode   Output mode can be one of 'proto', 'init_c' or 'init_h'."
-    print "    -d               Enable extra debug information in the generated code."
-    sys.exit(1)
-
-
-if __name__ == '__main__':
-    file_name = "gl_API.xml"
-
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "f:m:d")
-    except Exception,e:
-        show_usage()
-
-    debug = 0
-    mode = "proto"
-    for (arg,val) in args:
-        if arg == "-f":
-            file_name = val
-        elif arg == "-m":
-            mode = val
-        elif arg == "-d":
-            debug = 1
+def _parser():
+    """Parse input and returned a parsed namespace."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f',
+                        default='gl_API.xml',
+                        dest='filename',
+                        help='An XML file describing an API')
+    parser.add_argument('-m',
+                        required=True,
+                        dest='mode',
+                        choices=frozenset(['proto', 'init_c', 'init_h']),
+                        help='which file to generate')
+    parser.add_argument('-d',
+                        action='store_true',
+                        dest='debug',
+                        help='turn debug mode on.')
+    return parser.parse_args()
+
+
+def main():
+    """Main function."""
+    args = _parser()
 
-    if mode == "proto":
+    if args.mode == "proto":
         printer = PrintGlxProtoStubs()
-    elif mode == "init_c":
+    elif args.mode == "init_c":
         printer = PrintGlxProtoInit_c()
-    elif mode == "init_h":
+    elif args.mode == "init_h":
         printer = PrintGlxProtoInit_h()
-    else:
-        show_usage()
 
-
-    printer.debug = debug
-    api = gl_XML.parse_GL_API( file_name, glX_XML.glx_item_factory() )
+    printer.debug = args.debug
+    api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory())
 
     printer.Print( api )
+
+
+if __name__ == '__main__':
+    main()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/glX_proto_size.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/glX_proto_size.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/glX_proto_size.py	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/glX_proto_size.py	2015-09-16 14:36:09.000000000 +0000
@@ -25,9 +25,11 @@
 # Authors:
 #    Ian Romanick <idr@us.ibm.com>
 
+import argparse
+import sys, string
+
 import gl_XML, glX_XML
 import license
-import sys, getopt, copy, string
 
 
 class glx_enum_function(object):
@@ -650,54 +652,57 @@
         return alias
 
 
-def show_usage():
-    print "Usage: %s [-f input_file_name] -m output_mode [--only-get | --only-set] [--get-alias-set]" % sys.argv[0]
-    print "    -m output_mode   Output mode can be one of 'size_c' or 'size_h'."
-    print "    --only-get       Only emit 'get'-type functions."
-    print "    --only-set       Only emit 'set'-type functions."
-    print ""
-    print "By default, both 'get' and 'set'-type functions are emitted."
-    sys.exit(1)
-
-
-if __name__ == '__main__':
-    file_name = "gl_API.xml"
-
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "f:m:h:", ["only-get", "only-set", "header-tag"])
-    except Exception,e:
-        show_usage()
-
-    mode = None
-    header_tag = None
-    which_functions = PrintGlxSizeStubs_common.do_get | PrintGlxSizeStubs_common.do_set
-
-    for (arg,val) in args:
-        if arg == "-f":
-            file_name = val
-        elif arg == "-m":
-            mode = val
-        elif arg == "--only-get":
-            which_functions = PrintGlxSizeStubs_common.do_get
-        elif arg == "--only-set":
-            which_functions = PrintGlxSizeStubs_common.do_set
-        elif (arg == '-h') or (arg == "--header-tag"):
-            header_tag = val
-
-    if mode == "size_c":
-        printer = PrintGlxSizeStubs_c( which_functions )
-    elif mode == "size_h":
-        printer = PrintGlxSizeStubs_h( which_functions )
-        if header_tag:
-            printer.header_tag = header_tag
-    elif mode == "reqsize_c":
+def _parser():
+    """Parse arguments and return a namespace."""
+    parser = argparse.ArgumentParser()
+    parser.set_defaults(which_functions=(PrintGlxSizeStubs_common.do_get |
+                                         PrintGlxSizeStubs_common.do_set))
+    parser.add_argument('-f',
+                        dest='filename',
+                        default='gl_API.xml',
+                        help='an XML file describing an OpenGL API.')
+    parser.add_argument('-m',
+                        dest='mode',
+                        choices=['size_c', 'size_h', 'reqsize_c', 'reqsize_h'],
+                        help='Which file to generate')
+    getset = parser.add_mutually_exclusive_group()
+    getset.add_argument('--only-get',
+                        dest='which_functions',
+                        action='store_const',
+                        const=PrintGlxSizeStubs_common.do_get,
+                        help='only emit "get-type" functions')
+    getset.add_argument('--only-set',
+                        dest='which_functions',
+                        action='store_const',
+                        const=PrintGlxSizeStubs_common.do_set,
+                        help='only emit "set-type" functions')
+    parser.add_argument('--header-tag',
+                        dest='header_tag',
+                        action='store',
+                        default=None,
+                        help='set header tag value')
+    return parser.parse_args()
+
+
+def main():
+    """Main function."""
+    args = _parser()
+
+    if args.mode == "size_c":
+        printer = PrintGlxSizeStubs_c(args.which_functions)
+    elif args.mode == "size_h":
+        printer = PrintGlxSizeStubs_h(args.which_functions)
+        if args.header_tag is not None:
+            printer.header_tag = args.header_tag
+    elif args.mode == "reqsize_c":
         printer = PrintGlxReqSize_c()
-    elif mode == "reqsize_h":
+    elif args.mode == "reqsize_h":
         printer = PrintGlxReqSize_h()
-    else:
-        show_usage()
 
-    api = gl_XML.parse_GL_API( file_name, glX_XML.glx_item_factory() )
+    api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory())
+
+    printer.Print(api)
 
 
-    printer.Print( api )
+if __name__ == '__main__':
+    main()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/glX_server_table.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/glX_server_table.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/glX_server_table.py	2013-05-08 13:13:18.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/glX_server_table.py	2015-09-16 14:36:09.000000000 +0000
@@ -25,8 +25,9 @@
 # Authors:
 #    Ian Romanick <idr@us.ibm.com>
 
+import argparse
+
 import gl_XML, glX_XML, glX_proto_common, license
-import sys, getopt
 
 
 def log2(value):
@@ -383,28 +384,19 @@
         return
 
 
-if __name__ == '__main__':
-    file_name = "gl_API.xml"
-
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "f:m")
-    except Exception,e:
-        show_usage()
-
-    mode = "table_c"
-    for (arg,val) in args:
-        if arg == "-f":
-            file_name = val
-        elif arg == "-m":
-            mode = val
+def _parser():
+    """Parse arguments and return namespace."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f',
+                        dest='filename',
+                        default='gl_API.xml',
+                        help='An XML file describing an API.')
+    return parser.parse_args()
 
-    if mode == "table_c":
-        printer = PrintGlxDispatchTables()
-    else:
-        show_usage()
-
-
-    api = gl_XML.parse_GL_API( file_name, glX_XML.glx_item_factory() )
 
+if __name__ == '__main__':
+    args = _parser()
+    printer = PrintGlxDispatchTables()
+    api = gl_XML.parse_GL_API(args.filename, glX_XML.glx_item_factory())
 
-    printer.Print( api )
+    printer.Print(api)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -20,7 +20,7 @@
 
 MESA_DIR = $(top_builddir)/src/mesa
 MESA_GLAPI_DIR = $(top_builddir)/src/mapi/glapi
-MESA_MAPI_DIR = $(top_builddir)/src/mapi
+MESA_MAPI_DIR = $(top_srcdir)/src/mapi
 MESA_GLX_DIR = $(top_builddir)/src/glx
 
 MESA_GLAPI_OUTPUTS = \
@@ -131,9 +131,11 @@
 	ARB_draw_instanced.xml \
 	ARB_ES2_compatibility.xml \
 	ARB_ES3_compatibility.xml \
+	ARB_framebuffer_no_attachments.xml \
 	ARB_framebuffer_object.xml \
 	ARB_geometry_shader4.xml \
 	ARB_get_program_binary.xml \
+	ARB_get_texture_sub_image.xml \
 	ARB_gpu_shader_fp64.xml \
 	ARB_gpu_shader5.xml \
 	ARB_instanced_arrays.xml \
@@ -150,7 +152,9 @@
 	ARB_separate_shader_objects.xml \
 	ARB_shader_atomic_counters.xml \
 	ARB_shader_image_load_store.xml \
+	ARB_shader_subroutine.xml \
 	ARB_sync.xml \
+	ARB_tessellation_shader.xml \
 	ARB_texture_barrier.xml \
 	ARB_texture_buffer_object.xml \
 	ARB_texture_buffer_range.xml \
@@ -205,7 +209,7 @@
 
 COMMON_GLX = $(COMMON) glX_API.xml glX_XML.py glX_proto_common.py
 
-PYTHON_GEN = $(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
 
 ######################################################################
 
@@ -237,65 +241,65 @@
 ######################################################################
 
 $(MESA_GLAPI_DIR)/glapi_mapi_tmp.h: $(MESA_MAPI_DIR)/mapi_abi.py $(COMMON)
-	$(PYTHON_GEN) $< \
+	$(PYTHON_GEN) $(MESA_MAPI_DIR)/mapi_abi.py \
 		--printer glapi --mode lib $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_GLAPI_DIR)/glprocs.h: gl_procs.py $(COMMON)
-	$(PYTHON_GEN) $< -c -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_procs.py -c -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_GLAPI_DIR)/glapitemp.h: gl_apitemp.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_apitemp.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_GLAPI_DIR)/glapitable.h: gl_table.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_table.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_GLAPI_DIR)/glapi_gentable.c: gl_gentable.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_gentable.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 ######################################################################
 
 $(MESA_GLAPI_DIR)/glapi_x86.S: gl_x86_asm.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_x86_asm.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_GLAPI_DIR)/glapi_x86-64.S: gl_x86-64_asm.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_x86-64_asm.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_GLAPI_DIR)/glapi_sparc.S: gl_SPARC_asm.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_SPARC_asm.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 ######################################################################
 
 $(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_enums.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_DIR)/main/api_exec.c: gl_genexec.py apiexec.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_genexec.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 $(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml -m remap_table > $@
+	$(PYTHON_GEN) $(srcdir)/gl_table.py -f $(srcdir)/gl_and_es_API.xml -m remap_table > $@
 
 $(MESA_DIR)/main/remap_helper.h: remap_helper.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/remap_helper.py -f $(srcdir)/gl_and_es_API.xml > $@
 
 ######################################################################
 
 $(MESA_GLX_DIR)/indirect.c: glX_proto_send.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m proto \
+	$(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m proto \
 	  | $(INDENT) $(INDENT_FLAGS) > $@
 
 $(MESA_GLX_DIR)/indirect.h: glX_proto_send.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m init_h > $@
+	$(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m init_h > $@
 
 $(MESA_GLX_DIR)/indirect_init.c: glX_proto_send.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m init_c > $@
+	$(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m init_c > $@
 
 $(MESA_GLX_DIR)/indirect_size.h $(XORG_GLX_DIR)/indirect_size.h: glX_proto_size.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m size_h --only-set \
-	    -h _INDIRECT_SIZE_H_ \
+	$(PYTHON_GEN) $(srcdir)/glX_proto_size.py -f $(srcdir)/gl_API.xml -m size_h --only-set \
+	    --header-tag _INDIRECT_SIZE_H_ \
 	  | $(INDENT) $(INDENT_FLAGS) > $@
 
 $(MESA_GLX_DIR)/indirect_size.c: glX_proto_size.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m size_c --only-set \
+	$(PYTHON_GEN) $(srcdir)/glX_proto_size.py -f $(srcdir)/gl_API.xml -m size_c --only-set \
 	  | $(INDENT) $(INDENT_FLAGS) > $@
 
 ######################################################################
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/remap_helper.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/remap_helper.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/glapi/gen/remap_helper.py	2013-05-08 13:13:18.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/glapi/gen/remap_helper.py	2015-09-16 14:36:09.000000000 +0000
@@ -24,9 +24,11 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-import gl_XML
+import argparse
+
 import license
-import sys, getopt, string
+import gl_XML
+
 
 def get_function_spec(func):
     sig = ""
@@ -54,6 +56,7 @@
 
     return spec
 
+
 class PrintGlRemap(gl_XML.gl_print_base):
     def __init__(self):
         gl_XML.gl_print_base.__init__(self)
@@ -163,30 +166,26 @@
         return
 
 
-def show_usage():
-    print "Usage: %s [-f input_file_name] [-c ver]" % sys.argv[0]
-    print "    -c ver    Version can be 'es1' or 'es2'."
-    sys.exit(1)
+def _parser():
+    """Parse input options and return a namsepace."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f', '--filename',
+                        default="gl_API.xml",
+                        metavar="input_file_name",
+                        dest='file_name',
+                        help="An xml description file.")
+    return parser.parse_args()
+
+
+def main():
+    """Main function."""
+    args = _parser()
 
-if __name__ == '__main__':
-    file_name = "gl_API.xml"
-
-    try:
-        (args, trail) = getopt.getopt(sys.argv[1:], "f:c:")
-    except Exception,e:
-        show_usage()
-
-    es = None
-    for (arg,val) in args:
-        if arg == "-f":
-            file_name = val
-        elif arg == "-c":
-            es = val
+    api = gl_XML.parse_GL_API(args.file_name)
 
-    api = gl_XML.parse_GL_API( file_name )
+    printer = PrintGlRemap()
+    printer.Print(api)
 
-    if es is not None:
-        api.filter_functions_by_api(es)
 
-    printer = PrintGlRemap()
-    printer.Print( api )
+if __name__ == '__main__':
+    main()
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mapi/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mapi/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -19,8 +19,6 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-AUTOMAKE_OPTIONS = subdir-objects
-
 SUBDIRS =
 TESTS =
 
@@ -52,19 +50,14 @@
 
 include Makefile.sources
 
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
 glapi_gen_mapi_deps := \
 	mapi_abi.py \
 	$(wildcard glapi/gen/*.xml) \
 	$(wildcard glapi/gen/*.py)
 
-# $(1): path to an XML file
-# $(2): name of the printer
-define glapi_gen_mapi
-@$(MKDIR_P) $(dir $@)
-$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/mapi_abi.py \
-	--mode lib --printer $(2) $(1) > $@
-endef
-
 if HAVE_SHARED_GLAPI
 BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h
 
@@ -95,7 +88,9 @@
 endif
 
 shared-glapi/glapi_mapi_tmp.h : glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-	$(call glapi_gen_mapi,$<,shared-glapi)
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer shared-glapi \
+		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@
 
 if HAVE_OPENGL
 noinst_LTLIBRARIES = glapi/libglapi.la
@@ -187,7 +182,9 @@
 endif
 
 es1api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-	$(call glapi_gen_mapi,$<,es1api)
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer es1api \
+		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@
 
 if HAVE_OPENGL_ES2
 TESTS += es2api/ABI-check
@@ -231,6 +228,8 @@
 endif
 
 es2api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-	$(call glapi_gen_mapi,$<,es2api)
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer es2api \
+		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@
 
 include $(top_srcdir)/install-lib-links.mk
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/Android.gen.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/Android.gen.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/Android.gen.mk	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/Android.gen.mk	2015-09-16 14:36:09.000000000 +0000
@@ -115,9 +115,11 @@
 
 GET_HASH_GEN := $(LOCAL_PATH)/main/get_hash_generator.py
 
+$(intermediates)/main/get_hash.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(GET_HASH_GEN)
+$(intermediates)/main/get_hash.h: PRIVATE_XML := -f $(glapi)/gl_and_es_API.xml
 $(intermediates)/main/get_hash.h: $(glapi)/gl_and_es_API.xml \
                $(LOCAL_PATH)/main/get_hash_params.py $(GET_HASH_GEN)
-	@$(MESA_PYTHON2) $(GET_HASH_GEN) -f $< > $@
+	$(call es-gen)
 
 FORMAT_INFO := $(LOCAL_PATH)/main/format_info.py
 format_info_deps := \
@@ -125,8 +127,10 @@
 	$(LOCAL_PATH)/main/format_parser.py \
 	$(FORMAT_INFO)
 
+$(intermediates)/main/format_info.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(FORMAT_INFO)
+$(intermediates)/main/format_info.h: PRIVATE_XML :=
 $(intermediates)/main/format_info.h: $(format_info_deps)
-	@$(MESA_PYTHON2) $(FORMAT_INFO) $< > $@
+	$(call es-gen, $<)
 
 FORMAT_PACK := $(LOCAL_PATH)/main/format_pack.py
 format_pack_deps := \
@@ -134,8 +138,10 @@
 	$(LOCAL_PATH)/main/format_parser.py \
 	$(FORMAT_PACK)
 
+$(intermediates)/main/format_pack.c: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(FORMAT_PACK)
+$(intermediates)/main/format_pack.c: PRIVATE_XML :=
 $(intermediates)/main/format_pack.c: $(format_pack_deps)
-	$(hide) $(MESA_PYTHON2) $(FORMAT_PACK) $< > $@
+	$(call es-gen, $<)
 
 FORMAT_UNPACK := $(LOCAL_PATH)/main/format_unpack.py
 format_unpack_deps := \
@@ -143,5 +149,7 @@
 	$(LOCAL_PATH)/main/format_parser.py \
 	$(FORMAT_UNPACK)
 
+$(intermediates)/main/format_unpack.c: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(FORMAT_UNPACK)
+$(intermediates)/main/format_unpack.c: PRIVATE_XML :=
 $(intermediates)/main/format_unpack.c: $(format_unpack_deps)
-	$(hide) $(MESA_PYTHON2) $(FORMAT_UNPACK) $< > $@
+	$(call es-gen, $<)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/common/driverfuncs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/common/driverfuncs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/common/driverfuncs.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/common/driverfuncs.c	2015-09-16 14:36:09.000000000 +0000
@@ -94,14 +94,14 @@
    driver->QuerySamplesForFormat = _mesa_query_samples_for_format;
    driver->TexImage = _mesa_store_teximage;
    driver->TexSubImage = _mesa_store_texsubimage;
-   driver->GetTexImage = _mesa_meta_GetTexImage;
+   driver->GetTexSubImage = _mesa_meta_GetTexSubImage;
    driver->ClearTexSubImage = _mesa_meta_ClearTexSubImage;
    driver->CopyTexSubImage = _mesa_meta_CopyTexSubImage;
    driver->GenerateMipmap = _mesa_meta_GenerateMipmap;
    driver->TestProxyTexImage = _mesa_test_proxy_teximage;
    driver->CompressedTexImage = _mesa_store_compressed_teximage;
    driver->CompressedTexSubImage = _mesa_store_compressed_texsubimage;
-   driver->GetCompressedTexImage = _mesa_GetCompressedTexImage_sw;
+   driver->GetCompressedTexSubImage = _mesa_GetCompressedTexSubImage_sw;
    driver->BindTexture = NULL;
    driver->NewTextureObject = _mesa_new_texture_object;
    driver->DeleteTexture = _mesa_delete_texture_object;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/common/meta_blit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/common/meta_blit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/common/meta_blit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/common/meta_blit.c	2015-09-16 14:36:09.000000000 +0000
@@ -82,7 +82,7 @@
    y_scale = samples * 0.5;
 
    /* We expect only power of 2 samples in source multisample buffer. */
-   assert(samples > 0 && (samples & (samples - 1)) == 0);
+   assert(samples > 0 && _mesa_is_pow_two(samples));
    while (samples >> (shader_offset + 1)) {
       shader_offset++;
    }
@@ -263,7 +263,7 @@
    }
 
    /* We expect only power of 2 samples in source multisample buffer. */
-   assert(samples > 0 && (samples & (samples - 1)) == 0);
+   assert(samples > 0 && _mesa_is_pow_two(samples));
    while (samples >> (shader_offset + 1)) {
       shader_offset++;
    }
@@ -312,7 +312,7 @@
       break;
    default:
       _mesa_problem(ctx, "Unkown texture target %s\n",
-                    _mesa_lookup_enum_by_nr(target));
+                    _mesa_enum_to_string(target));
       shader_index = BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE;
    }
 
@@ -434,7 +434,7 @@
           * (so the floating point exponent just gets increased), rather than
           * doing a naive sum and dividing.
           */
-         assert((samples & (samples - 1)) == 0);
+         assert(_mesa_is_pow_two(samples));
          /* Fetch each individual sample. */
          sample_resolve = rzalloc_size(mem_ctx, 1);
          for (i = 0; i < samples; i++) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/common/meta.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/common/meta.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/common/meta.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/common/meta.c	2015-09-16 14:36:09.000000000 +0000
@@ -945,6 +945,8 @@
    if (state & MESA_META_SHADER) {
       static const GLenum targets[] = {
          GL_VERTEX_SHADER,
+         GL_TESS_CONTROL_SHADER,
+         GL_TESS_EVALUATION_SHADER,
          GL_GEOMETRY_SHADER,
          GL_FRAGMENT_SHADER,
       };
@@ -2449,30 +2451,53 @@
 
 /**
  * Compute the texture coordinates for the four vertices of a quad for
- * drawing a 2D texture image or slice of a cube/3D texture.
+ * drawing a 2D texture image or slice of a cube/3D texture.  The offset
+ * and width, height specify a sub-region of the 2D image.
+ *
  * \param faceTarget  GL_TEXTURE_1D/2D/3D or cube face name
  * \param slice  slice of a 1D/2D array texture or 3D texture
- * \param width  width of the texture image
- * \param height  height of the texture image
+ * \param xoffset  X position of sub texture
+ * \param yoffset  Y position of sub texture
+ * \param width  width of the sub texture image
+ * \param height  height of the sub texture image
+ * \param total_width  total width of the texture image
+ * \param total_height  total height of the texture image
+ * \param total_depth  total depth of the texture image
  * \param coords0/1/2/3  returns the computed texcoords
  */
 void
 _mesa_meta_setup_texture_coords(GLenum faceTarget,
                                 GLint slice,
+                                GLint xoffset,
+                                GLint yoffset,
                                 GLint width,
                                 GLint height,
-                                GLint depth,
+                                GLint total_width,
+                                GLint total_height,
+                                GLint total_depth,
                                 GLfloat coords0[4],
                                 GLfloat coords1[4],
                                 GLfloat coords2[4],
                                 GLfloat coords3[4])
 {
-   static const GLfloat st[4][2] = {
-      {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
-   };
+   float st[4][2];
    GLuint i;
+   const float s0 = (float) xoffset / (float) total_width;
+   const float s1 = (float) (xoffset + width) / (float) total_width;
+   const float t0 = (float) yoffset / (float) total_height;
+   const float t1 = (float) (yoffset + height) / (float) total_height;
    GLfloat r;
 
+   /* setup the reference texcoords */
+   st[0][0] = s0;
+   st[0][1] = t0;
+   st[1][0] = s1;
+   st[1][1] = t0;
+   st[2][0] = s1;
+   st[2][1] = t1;
+   st[3][0] = s0;
+   st[3][1] = t1;
+
    if (faceTarget == GL_TEXTURE_CUBE_MAP_ARRAY)
       faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + slice % 6;
 
@@ -2489,52 +2514,52 @@
    case GL_TEXTURE_3D:
    case GL_TEXTURE_2D_ARRAY:
       if (faceTarget == GL_TEXTURE_3D) {
-         assert(slice < depth);
-         assert(depth >= 1);
-         r = (slice + 0.5f) / depth;
+         assert(slice < total_depth);
+         assert(total_depth >= 1);
+         r = (slice + 0.5f) / total_depth;
       }
       else if (faceTarget == GL_TEXTURE_2D_ARRAY)
          r = (float) slice;
       else
          r = 0.0F;
-      coords0[0] = 0.0F; /* s */
-      coords0[1] = 0.0F; /* t */
+      coords0[0] = st[0][0]; /* s */
+      coords0[1] = st[0][1]; /* t */
       coords0[2] = r; /* r */
-      coords1[0] = 1.0F;
-      coords1[1] = 0.0F;
+      coords1[0] = st[1][0];
+      coords1[1] = st[1][1];
       coords1[2] = r;
-      coords2[0] = 1.0F;
-      coords2[1] = 1.0F;
+      coords2[0] = st[2][0];
+      coords2[1] = st[2][1];
       coords2[2] = r;
-      coords3[0] = 0.0F;
-      coords3[1] = 1.0F;
+      coords3[0] = st[3][0];
+      coords3[1] = st[3][1];
       coords3[2] = r;
       break;
    case GL_TEXTURE_RECTANGLE_ARB:
-      coords0[0] = 0.0F; /* s */
-      coords0[1] = 0.0F; /* t */
+      coords0[0] = (float) xoffset; /* s */
+      coords0[1] = (float) yoffset; /* t */
       coords0[2] = 0.0F; /* r */
-      coords1[0] = (float) width;
-      coords1[1] = 0.0F;
+      coords1[0] = (float) (xoffset + width);
+      coords1[1] = (float) yoffset;
       coords1[2] = 0.0F;
-      coords2[0] = (float) width;
-      coords2[1] = (float) height;
+      coords2[0] = (float) (xoffset + width);
+      coords2[1] = (float) (yoffset + height);
       coords2[2] = 0.0F;
-      coords3[0] = 0.0F;
-      coords3[1] = (float) height;
+      coords3[0] = (float) xoffset;
+      coords3[1] = (float) (yoffset + height);
       coords3[2] = 0.0F;
       break;
    case GL_TEXTURE_1D_ARRAY:
-      coords0[0] = 0.0F; /* s */
+      coords0[0] = st[0][0]; /* s */
       coords0[1] = (float) slice; /* t */
       coords0[2] = 0.0F; /* r */
-      coords1[0] = 1.0f;
+      coords1[0] = st[1][0];
       coords1[1] = (float) slice;
       coords1[2] = 0.0F;
-      coords2[0] = 1.0F;
+      coords2[0] = st[2][0];
       coords2[1] = (float) slice;
       coords2[2] = 0.0F;
-      coords3[0] = 0.0F;
+      coords3[0] = st[3][0];
       coords3[1] = (float) slice;
       coords3[2] = 0.0F;
       break;
@@ -2943,15 +2968,14 @@
 decompress_texture_image(struct gl_context *ctx,
                          struct gl_texture_image *texImage,
                          GLuint slice,
+                         GLint xoffset, GLint yoffset,
+                         GLsizei width, GLsizei height,
                          GLenum destFormat, GLenum destType,
                          GLvoid *dest)
 {
    struct decompress_state *decompress = &ctx->Meta->Decompress;
    struct decompress_fbo_state *decompress_fbo;
    struct gl_texture_object *texObj = texImage->TexObject;
-   const GLint width = texImage->Width;
-   const GLint height = texImage->Height;
-   const GLint depth = texImage->Height;
    const GLenum target = texObj->Target;
    GLenum rbFormat;
    GLenum faceTarget;
@@ -3069,7 +3093,10 @@
    /* Silence valgrind warnings about reading uninitialized stack. */
    memset(verts, 0, sizeof(verts));
 
-   _mesa_meta_setup_texture_coords(faceTarget, slice, width, height, depth,
+   _mesa_meta_setup_texture_coords(faceTarget, slice,
+                                   xoffset, yoffset, width, height,
+                                   texImage->Width, texImage->Height,
+                                   texImage->Depth,
                                    verts[0].tex,
                                    verts[1].tex,
                                    verts[2].tex,
@@ -3123,7 +3150,7 @@
    /* read pixels from renderbuffer */
    {
       GLenum baseTexFormat = texImage->_BaseFormat;
-      GLenum destBaseFormat = _mesa_base_tex_format(ctx, destFormat);
+      GLenum destBaseFormat = _mesa_unpack_format_to_base_format(destFormat);
 
       /* The pixel transfer state will be set to default values at this point
        * (see MESA_META_PIXEL_TRANSFER) so pixel transfer ops are effectively
@@ -3132,19 +3159,13 @@
        * returned as red and two-channel texture values are returned as
        * red/alpha.
        */
-      if ((baseTexFormat == GL_LUMINANCE ||
-           baseTexFormat == GL_LUMINANCE_ALPHA ||
-           baseTexFormat == GL_INTENSITY) ||
+      if (_mesa_need_luminance_to_rgb_conversion(baseTexFormat,
+                                                 destBaseFormat) ||
           /* If we're reading back an RGB(A) texture (using glGetTexImage) as
 	   * luminance then we need to return L=tex(R).
 	   */
-          ((baseTexFormat == GL_RGBA ||
-            baseTexFormat == GL_RGB  ||
-            baseTexFormat == GL_RG) &&
-          (destBaseFormat == GL_LUMINANCE ||
-           destBaseFormat == GL_LUMINANCE_ALPHA ||
-           destBaseFormat == GL_LUMINANCE_INTEGER_EXT ||
-           destBaseFormat == GL_LUMINANCE_ALPHA_INTEGER_EXT))) {
+          _mesa_need_rgb_to_luminance_conversion(baseTexFormat,
+                                                 destBaseFormat)) {
          /* Green and blue must be zero */
          _mesa_PixelTransferf(GL_GREEN_SCALE, 0.0f);
          _mesa_PixelTransferf(GL_BLUE_SCALE, 0.0f);
@@ -3171,15 +3192,17 @@
  * from core Mesa.
  */
 void
-_mesa_meta_GetTexImage(struct gl_context *ctx,
-                       GLenum format, GLenum type, GLvoid *pixels,
-                       struct gl_texture_image *texImage)
+_mesa_meta_GetTexSubImage(struct gl_context *ctx,
+                          GLint xoffset, GLint yoffset, GLint zoffset,
+                          GLsizei width, GLsizei height, GLsizei depth,
+                          GLenum format, GLenum type, GLvoid *pixels,
+                          struct gl_texture_image *texImage)
 {
    if (_mesa_is_format_compressed(texImage->TexFormat)) {
       GLuint slice;
       bool result = true;
 
-      for (slice = 0; slice < texImage->Depth; slice++) {
+      for (slice = 0; slice < depth; slice++) {
          void *dst;
          if (texImage->TexObject->Target == GL_TEXTURE_2D_ARRAY
              || texImage->TexObject->Target == GL_TEXTURE_CUBE_MAP_ARRAY) {
@@ -3191,14 +3214,14 @@
             struct gl_pixelstore_attrib packing = ctx->Pack;
             packing.SkipPixels = 0;
             packing.SkipRows = 0;
-            dst = _mesa_image_address3d(&packing, pixels, texImage->Width,
-                                        texImage->Height, format, type,
-                                        slice, 0, 0);
+            dst = _mesa_image_address3d(&packing, pixels, width, height,
+                                        format, type, slice, 0, 0);
          }
          else {
             dst = pixels;
          }
          result = decompress_texture_image(ctx, texImage, slice,
+                                           xoffset, yoffset, width, height,
                                            format, type, dst);
          if (!result)
             break;
@@ -3208,7 +3231,8 @@
          return;
    }
 
-   _mesa_GetTexImage_sw(ctx, format, type, pixels, texImage);
+   _mesa_GetTexSubImage_sw(ctx, xoffset, yoffset, zoffset,
+                           width, height, depth, format, type, pixels, texImage);
 }
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/common/meta_generate_mipmap.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/common/meta_generate_mipmap.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/common/meta_generate_mipmap.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/common/meta_generate_mipmap.c	2015-09-16 14:36:09.000000000 +0000
@@ -66,7 +66,7 @@
    if (target == GL_TEXTURE_3D) {
       _mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH,
                        "glGenerateMipmap() to %s target\n",
-                       _mesa_lookup_enum_by_nr(target));
+                       _mesa_enum_to_string(target));
       return true;
    }
 
@@ -163,7 +163,6 @@
    const GLuint maxLevel = texObj->MaxLevel;
    const GLint maxLevelSave = texObj->MaxLevel;
    const GLboolean genMipmapSave = texObj->GenerateMipmap;
-   const GLuint currentTexUnitSave = ctx->Texture.CurrentUnit;
    const GLboolean use_glsl_version = ctx->Extensions.ARB_vertex_shader &&
                                       ctx->Extensions.ARB_fragment_shader;
    GLenum faceTarget;
@@ -202,8 +201,12 @@
    samplerSave = ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler ?
       ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler->Name : 0;
 
-   if (currentTexUnitSave != 0)
-      _mesa_BindTexture(target, texObj->Name);
+   /* We may have been called from glGenerateTextureMipmap with CurrentUnit
+    * still set to 0, so we don't know when we can skip binding the texture.
+    * Assume that _mesa_BindTexture will be fast if we're rebinding the same
+    * texture.
+    */
+   _mesa_BindTexture(target, texObj->Name);
 
    if (!mipmap->Sampler) {
       _mesa_GenSamplers(1, &mipmap->Sampler);
@@ -317,7 +320,9 @@
          /* Setup texture coordinates */
          _mesa_meta_setup_texture_coords(faceTarget,
                                          layer,
-                                         0, 0, 1, /* width, height never used here */
+                                         0, 0, /* xoffset, yoffset */
+                                         srcWidth, srcHeight, /* img size */
+                                         srcWidth, srcHeight, srcDepth,
                                          verts[0].tex,
                                          verts[1].tex,
                                          verts[2].tex,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/common/meta.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/common/meta.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/common/meta.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/common/meta.h	2015-09-16 14:36:09.000000000 +0000
@@ -560,9 +560,11 @@
                             const GLvoid *clearValue);
 
 extern void
-_mesa_meta_GetTexImage(struct gl_context *ctx,
-                       GLenum format, GLenum type, GLvoid *pixels,
-                       struct gl_texture_image *texImage);
+_mesa_meta_GetTexSubImage(struct gl_context *ctx,
+                          GLint xoffset, GLint yoffset, GLint zoffset,
+                          GLsizei width, GLsizei height, GLsizei depth,
+                          GLenum format, GLenum type, GLvoid *pixels,
+                          struct gl_texture_image *texImage);
 
 extern void
 _mesa_meta_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
@@ -594,9 +596,13 @@
 void
 _mesa_meta_setup_texture_coords(GLenum faceTarget,
                                 GLint slice,
+                                GLint xoffset,
+                                GLint yoffset,
                                 GLint width,
                                 GLint height,
-                                GLint depth,
+                                GLint total_width,
+                                GLint total_height,
+                                GLint total_depth,
                                 GLfloat coords0[4],
                                 GLfloat coords1[4],
                                 GLfloat coords2[4],
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/common/meta_tex_subimage.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/common/meta_tex_subimage.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/common/meta_tex_subimage.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/common/meta_tex_subimage.c	2015-09-16 14:36:09.000000000 +0000
@@ -153,7 +153,8 @@
    bool success = false;
    int z;
 
-   if (!_mesa_is_bufferobj(packing->BufferObj) && !create_pbo)
+   if (!_mesa_is_bufferobj(packing->BufferObj) &&
+       (!create_pbo || pixels == NULL))
       return false;
 
    if (format == GL_DEPTH_COMPONENT ||
@@ -279,6 +280,7 @@
    int full_height, image_height;
    struct gl_texture_image *pbo_tex_image;
    struct gl_renderbuffer *rb = NULL;
+   GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format);
    GLenum status, src_base_format;
    bool success = false, clear_channels_to_zero = false;
    float save_clear_color[4];
@@ -304,7 +306,8 @@
                                             type, GL_FALSE))
          return false;
 
-      if (_mesa_need_rgb_to_luminance_conversion(rb->Format, format))
+      if (_mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat,
+                                                 dstBaseFormat))
          return false;
 
       /* This function rely on BlitFramebuffer to fill in the pixel data for
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/Android.mk mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/Android.mk
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/Android.mk	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/Android.mk	2015-09-16 14:36:09.000000000 +0000
@@ -39,14 +39,9 @@
 LOCAL_C_INCLUDES := \
     $(MESA_DRI_C_INCLUDES)
 
-LOCAL_EXPORT_C_INCLUDE_DIRS := $(intermediates)
-
-# swrast only
-ifeq ($(MESA_GPU_DRIVERS),swrast)
-LOCAL_CFLAGS := -D__NOT_HAVE_DRM_H
-else
-LOCAL_SHARED_LIBRARIES := libdrm
-endif
+LOCAL_EXPORT_C_INCLUDE_DIRS := \
+    $(LOCAL_PATH) \
+    $(intermediates)
 
 LOCAL_SRC_FILES := \
 	$(DRI_COMMON_FILES) \
@@ -108,13 +103,6 @@
 LOCAL_C_INCLUDES := \
     $(MESA_DRI_C_INCLUDES)
 
-# swrast only
-ifeq ($(MESA_GPU_DRIVERS),swrast)
-LOCAL_CFLAGS := -D__NOT_HAVE_DRM_H
-else
-LOCAL_SHARED_LIBRARIES := libdrm
-endif
-
 LOCAL_SRC_FILES := $(megadriver_stub_FILES)
 
 include $(MESA_COMMON_MK)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/drirc mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/drirc
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/drirc	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/drirc	2015-09-16 14:36:09.000000000 +0000
@@ -4,24 +4,15 @@
 Application bugs worked around in this file:
 ============================================
 
+* Unigine Heaven 3.0 and older contain too many bugs and can't be supported
+  by drivers that want to be compliant.
+
 * Various Unigine products don't use the #version and #extension GLSL
   directives, meaning they only get GLSL 1.10 and no extensions for their
   shaders.
   Enabling all extensions for Unigine fixes most issues, but the GLSL version
   is still 1.10.
 
-* Unigine Heaven 3.0 with ARB_texture_multisample uses a "ivec4 * vec4"
-  expression, which is illegal in GLSL 1.10.
-  Adding "#version 130" fixes this.
-
-* Unigine Heaven 3.0 with ARB_shader_bit_encoding uses the uint keyword, which
-  is illegal in GLSL 1.10.
-  Adding "#version 130" fixes this.
-
-* Unigine Heaven 3.0 with ARB_shader_bit_encoding uses a "uint & int"
-  expression, which is illegal in any GLSL version.
-  Disabling ARB_shader_bit_encoding fixes this.
-
 * If ARB_sample_shading is supported, Unigine Heaven 4.0 and Valley 1.0 uses
   an #extension directive in the middle of its shaders, which is illegal
   in GLSL.
@@ -45,18 +36,10 @@
 	</application>
 
         <application name="Unigine Heaven (32-bit)" executable="heaven_x86">
-            <option name="force_glsl_extensions_warn" value="true" />
-            <option name="disable_blend_func_extended" value="true" />
-            <option name="force_glsl_version" value="130" />
-            <option name="disable_shader_bit_encoding" value="true" />
             <option name="allow_glsl_extension_directive_midshader" value="true" />
 	</application>
 
         <application name="Unigine Heaven (64-bit)" executable="heaven_x64">
-            <option name="force_glsl_extensions_warn" value="true" />
-            <option name="disable_blend_func_extended" value="true" />
-            <option name="force_glsl_version" value="130" />
-            <option name="disable_shader_bit_encoding" value="true" />
             <option name="allow_glsl_extension_directive_midshader" value="true" />
 	</application>
 
@@ -70,10 +53,12 @@
 
         <application name="Unigine OilRush (32-bit)" executable="OilRush_x86">
             <option name="disable_blend_func_extended" value="true" />
+            <option name="allow_glsl_extension_directive_midshader" value="true" />
 	</application>
 
         <application name="Unigine OilRush (64-bit)" executable="OilRush_x64">
             <option name="disable_blend_func_extended" value="true" />
+            <option name="allow_glsl_extension_directive_midshader" value="true" />
 	</application>
 
         <application name="Savage 2" executable="savage2.bin">
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/dri_util.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/dri_util.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/dri_util.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/dri_util.c	2015-09-16 14:36:09.000000000 +0000
@@ -40,13 +40,9 @@
 
 
 #include <stdbool.h>
-#ifndef __NOT_HAVE_DRM_H
-#include <xf86drm.h>
-#endif
 #include "dri_util.h"
 #include "utils.h"
 #include "xmlpool.h"
-#include "../glsl/glsl_parser_extras.h"
 #include "main/mtypes.h"
 #include "main/version.h"
 #include "main/errors.h"
@@ -138,18 +134,6 @@
 
     setupLoaderExtensions(psp, extensions);
 
-#ifndef __NOT_HAVE_DRM_H
-    if (fd != -1) {
-       drmVersionPtr version = drmGetVersion(fd);
-       if (version) {
-          psp->drm_version.major = version->version_major;
-          psp->drm_version.minor = version->version_minor;
-          psp->drm_version.patch = version->version_patchlevel;
-          drmFreeVersion(version);
-       }
-    }
-#endif
-
     psp->loaderPrivate = data;
 
     psp->extensions = emptyExtensionList;
@@ -162,16 +146,26 @@
 	return NULL;
     }
 
-    int gl_version_override = _mesa_get_gl_version_override();
-    if (gl_version_override >= 31) {
-       psp->max_gl_core_version = MAX2(psp->max_gl_core_version,
-                                       gl_version_override);
-    } else {
-       psp->max_gl_compat_version = MAX2(psp->max_gl_compat_version,
-                                         gl_version_override);
+    struct gl_constants consts = { 0 };
+    gl_api api;
+    unsigned version;
+
+    api = API_OPENGLES2;
+    if (_mesa_override_gl_version_contextless(&consts, &api, &version))
+       psp->max_gl_es2_version = version;
+
+    api = API_OPENGL_COMPAT;
+    if (_mesa_override_gl_version_contextless(&consts, &api, &version)) {
+       if (api == API_OPENGL_CORE) {
+          psp->max_gl_core_version = version;
+       } else {
+          psp->max_gl_compat_version = version;
+       }
     }
 
-    psp->api_mask = (1 << __DRI_API_OPENGL);
+    psp->api_mask = 0;
+    if (psp->max_gl_compat_version > 0)
+       psp->api_mask |= (1 << __DRI_API_OPENGL);
     if (psp->max_gl_core_version > 0)
        psp->api_mask |= (1 << __DRI_API_OPENGL_CORE);
     if (psp->max_gl_es1_version > 0)
@@ -230,8 +224,6 @@
 	 * stream open to the X-server anymore.
 	 */
 
-       _mesa_destroy_shader_compiler();
-
 	psp->driver->DestroyScreen(psp);
 
 	driDestroyOptionCache(&psp->optionCache);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/dri_util.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/dri_util.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/dri_util.h	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/dri_util.h	2015-09-16 14:36:09.000000000 +0000
@@ -149,11 +149,6 @@
     int fd;
 
     /**
-     * DRM (kernel module) version information.
-     */
-    __DRIversion drm_version;
-
-    /**
      * Device-dependent private information (not stored in the SAREA).
      * 
      * This pointer is never touched by the DRI layer.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -32,6 +32,7 @@
 	-I$(top_srcdir)/src/mesa/ \
 	-I$(top_srcdir)/src/gallium/include \
 	-I$(top_srcdir)/src/gallium/auxiliary \
+	$(LIBDRM_CFLAGS) \
 	$(DEFINES) \
 	$(VISIBILITY_CFLAGS)
 
@@ -53,10 +54,3 @@
 libmegadriver_stub_la_SOURCES = $(megadriver_stub_FILES)
 
 sysconf_DATA = drirc
-
-if DRICOMMON_NEED_LIBDRM
-AM_CFLAGS += $(LIBDRM_CFLAGS)
-libdricommon_la_LIBADD = $(LIBDRM_LIBS)
-else
-AM_CFLAGS += -D__NOT_HAVE_DRM_H
-endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/SConscript	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/SConscript	2015-09-16 14:36:09.000000000 +0000
@@ -32,11 +32,6 @@
     'expat',
 ])
 
-# if HAVE_DRI2
-drienv.PkgUseModules('DRM')
-# else
-#env.Append(CPPDEFINES = ['__NOT_HAVE_DRM_H'])
-
 sources = drienv.ParseSourceList('Makefile.sources', ['DRI_COMMON_FILES', 'XMLCONFIG_FILES' ])
 
 dri_common = drienv.ConvenienceLibrary(
@@ -57,7 +52,6 @@
 ])
 
 env.Append(CPPDEFINES = [
-    '__NOT_HAVE_DRM_H',
     'HAVE_DLADDR',
 ])
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/utils.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/utils.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/utils.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/utils.c	2015-09-16 14:36:09.000000000 +0000
@@ -213,6 +213,7 @@
       masks = masks_table[0];
       break;
    case MESA_FORMAT_B8G8R8X8_UNORM:
+   case MESA_FORMAT_B8G8R8X8_SRGB:
       masks = masks_table[1];
       break;
    case MESA_FORMAT_B8G8R8A8_UNORM:
@@ -451,7 +452,7 @@
 driGetConfigAttrib(const __DRIconfig *config,
 		   unsigned int attrib, unsigned int *value)
 {
-    int i;
+    unsigned i;
 
     for (i = 0; i < ARRAY_SIZE(attribMap); i++)
 	if (attribMap[i].attrib == attrib)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/xmlpool/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/xmlpool/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/common/xmlpool/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/common/xmlpool/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -67,7 +67,7 @@
 	$(MOS)
 
 # Default target options.h
-options.h: LOCALEDIR := .
+LOCALEDIR := .
 options.h: t_options.h $(MOS)
 	$(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/gen_xmlpool.py $(srcdir)/t_options.h $(LOCALEDIR) $(LANGS) > options.h
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/i830_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/i830_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/i830_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/i830_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -57,7 +57,7 @@
    mask = mask & 0xff;
 
    DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __func__,
-       _mesa_lookup_enum_by_nr(func), ref, mask);
+       _mesa_enum_to_string(func), ref, mask);
 
 
    I830_STATECHANGE(i830, I830_UPLOAD_CTX);
@@ -95,9 +95,9 @@
    int fop, dfop, dpop;
 
    DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __func__,
-       _mesa_lookup_enum_by_nr(fail),
-       _mesa_lookup_enum_by_nr(zfail), 
-       _mesa_lookup_enum_by_nr(zpass));
+       _mesa_enum_to_string(fail),
+       _mesa_enum_to_string(zfail), 
+       _mesa_enum_to_string(zpass));
 
    fop = 0;
    dfop = 0;
@@ -389,8 +389,8 @@
 i830BlendEquationSeparate(struct gl_context * ctx, GLenum modeRGB, GLenum modeA)
 {
    DBG("%s -> %s, %s\n", __func__,
-       _mesa_lookup_enum_by_nr(modeRGB),
-       _mesa_lookup_enum_by_nr(modeA));
+       _mesa_enum_to_string(modeRGB),
+       _mesa_enum_to_string(modeA));
 
    (void) modeRGB;
    (void) modeA;
@@ -403,10 +403,10 @@
                       GLenum dfactorRGB, GLenum sfactorA, GLenum dfactorA)
 {
    DBG("%s -> RGB(%s, %s) A(%s, %s)\n", __func__,
-       _mesa_lookup_enum_by_nr(sfactorRGB),
-       _mesa_lookup_enum_by_nr(dfactorRGB),
-       _mesa_lookup_enum_by_nr(sfactorA),
-       _mesa_lookup_enum_by_nr(dfactorA));
+       _mesa_enum_to_string(sfactorRGB),
+       _mesa_enum_to_string(dfactorRGB),
+       _mesa_enum_to_string(sfactorA),
+       _mesa_enum_to_string(dfactorA));
 
    (void) sfactorRGB;
    (void) dfactorRGB;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/i915_fragprog.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/i915_fragprog.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/i915_fragprog.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/i915_fragprog.c	2015-09-16 14:36:09.000000000 +0000
@@ -220,7 +220,7 @@
 {
    GLuint flags = 0;
 
-   if (inst->SaturateMode == SATURATE_ZERO_ONE)
+   if (inst->Saturate)
       flags |= A0_DEST_SATURATE;
    if (inst->DstReg.WriteMask & WRITEMASK_X)
       flags |= A0_DEST_CHANNEL_X;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/i915_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/i915_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/i915_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/i915_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -402,7 +402,7 @@
 intelCalcViewport(struct gl_context * ctx)
 {
    struct intel_context *intel = intel_context(ctx);
-   double scale[3], translate[3];
+   float scale[3], translate[3];
 
    _mesa_get_viewport_xform(ctx, 0, scale, translate);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/i915_texstate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/i915_texstate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/i915_texstate.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/i915_texstate.c	2015-09-16 14:36:09.000000000 +0000
@@ -342,7 +342,7 @@
        * Thus, I guess we need do this for other platforms as well.
        */
       if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB &&
-          !is_power_of_two(firstImage->Height))
+          !_mesa_is_pow_two(firstImage->Height))
          return false;
 
       state[I915_TEXREG_SS3] = ss3;     /* SS3_NORMALIZED_COORDS */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -428,7 +428,6 @@
 
    driContextPriv->driverPrivate = intel;
    intel->driContext = driContextPriv;
-   intel->driFd = sPriv->fd;
 
    intel->gen = intelScreen->gen;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_context.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -273,8 +273,6 @@
 
    bool use_early_z;
 
-   int driFd;
-
    __DRIcontext *driContext;
    struct intel_screen *intelScreen;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_fbo.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_fbo.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_fbo.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_fbo.c	2015-09-16 14:36:09.000000000 +0000
@@ -216,7 +216,7 @@
    intel_miptree_release(&irb->mt);
 
    DBG("%s: %s: %s (%dx%d)\n", __func__,
-       _mesa_lookup_enum_by_nr(internalFormat),
+       _mesa_enum_to_string(internalFormat),
        _mesa_get_format_name(rb->Format), width, height);
 
    if (width == 0 || height == 0)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_mipmap_tree.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_mipmap_tree.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_mipmap_tree.c	2015-09-16 14:36:09.000000000 +0000
@@ -81,7 +81,7 @@
       return NULL;
 
    DBG("%s target %s format %s level %d..%d <-- %p\n", __func__,
-       _mesa_lookup_enum_by_nr(target),
+       _mesa_enum_to_string(target),
        _mesa_get_format_name(format),
        first_level, last_level, mt);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_render.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_render.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_render.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_render.c	2015-09-16 14:36:09.000000000 +0000
@@ -113,7 +113,7 @@
 intelDmaPrimitive(struct intel_context *intel, GLenum prim)
 {
    if (0)
-      fprintf(stderr, "%s %s\n", __func__, _mesa_lookup_enum_by_nr(prim));
+      fprintf(stderr, "%s %s\n", __func__, _mesa_enum_to_string(prim));
    INTEL_FIREVERTICES(intel);
    intel->vtbl.reduced_primitive_state(intel, reduced_prim[prim]);
    intel_set_prim(intel, hw_prim[prim]);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_tex_image.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_tex_image.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_tex_image.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_tex_image.c	2015-09-16 14:36:09.000000000 +0000
@@ -189,7 +189,7 @@
               const struct gl_pixelstore_attrib *unpack)
 {
    DBG("%s target %s level %d %dx%dx%d\n", __func__,
-       _mesa_lookup_enum_by_nr(texImage->TexObject->Target),
+       _mesa_enum_to_string(texImage->TexObject->Target),
        texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
 
    /* Attempt to use the blitter for PBO image uploads.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_tex_subimage.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_tex_subimage.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_tex_subimage.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_tex_subimage.c	2015-09-16 14:36:09.000000000 +0000
@@ -72,7 +72,7 @@
 
    DBG("BLT subimage %s target %s level %d offset %d,%d %dx%d\n",
        __func__,
-       _mesa_lookup_enum_by_nr(texImage->TexObject->Target),
+       _mesa_enum_to_string(texImage->TexObject->Target),
        texImage->Level, xoffset, yoffset, width, height);
 
    pixels = _mesa_validate_pbo_teximage(ctx, 2, width, height, 1,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_tris.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_tris.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i915/intel_tris.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i915/intel_tris.c	2015-09-16 14:36:09.000000000 +0000
@@ -1134,7 +1134,7 @@
 
    if (0)
       fprintf(stderr, "%s %s %x\n", __func__,
-              _mesa_lookup_enum_by_nr(rprim), hwprim);
+              _mesa_enum_to_string(rprim), hwprim);
 
    intel->vtbl.reduced_primitive_state(intel, rprim);
 
@@ -1158,7 +1158,7 @@
                          ctx->Polygon.BackMode != GL_FILL);
 
    if (0)
-      fprintf(stderr, "%s %s\n", __func__, _mesa_lookup_enum_by_nr(prim));
+      fprintf(stderr, "%s %s\n", __func__, _mesa_enum_to_string(prim));
 
    /* Let some clipping routines know which primitive they're dealing
     * with.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_binding_tables.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_binding_tables.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_binding_tables.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_binding_tables.c	2015-09-16 14:36:09.000000000 +0000
@@ -44,6 +44,41 @@
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
 
+static const GLuint stage_to_bt_edit[] = {
+   [MESA_SHADER_VERTEX] = _3DSTATE_BINDING_TABLE_EDIT_VS,
+   [MESA_SHADER_GEOMETRY] = _3DSTATE_BINDING_TABLE_EDIT_GS,
+   [MESA_SHADER_FRAGMENT] = _3DSTATE_BINDING_TABLE_EDIT_PS,
+};
+
+static uint32_t
+reserve_hw_bt_space(struct brw_context *brw, unsigned bytes)
+{
+   /* From the Broadwell PRM, Volume 16, "Workarounds",
+    * WaStateBindingTableOverfetch:
+    * "HW over-fetches two cache lines of binding table indices.  When
+    *  using the resource streamer, SW needs to pad binding table pointer
+    *  updates with an additional two cache lines."
+    *
+    * Cache lines are 64 bytes, so we subtract 128 bytes from the size of
+    * the binding table pool buffer.
+    */
+   if (brw->hw_bt_pool.next_offset + bytes >= brw->hw_bt_pool.bo->size - 128) {
+      gen7_reset_hw_bt_pool_offsets(brw);
+   }
+
+   uint32_t offset = brw->hw_bt_pool.next_offset;
+
+   /* From the Haswell PRM, Volume 2b: Command Reference: Instructions,
+    * 3DSTATE_BINDING_TABLE_POINTERS_xS:
+    *
+    * "If HW Binding Table is enabled, the offset is relative to the
+    *  Binding Table Pool Base Address and the alignment is 64 bytes."
+    */
+   brw->hw_bt_pool.next_offset += ALIGN(bytes, 64);
+
+   return offset;
+}
+
 /**
  * Upload a shader stage's binding table as indirect state.
  *
@@ -72,22 +107,41 @@
             brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
             brw->shader_time.bo->size, 1, true);
       }
-
-      uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
-                                       prog_data->binding_table.size_bytes, 32,
-                                       &stage_state->bind_bo_offset);
-
-      /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
-      memcpy(bind, stage_state->surf_offset,
-             prog_data->binding_table.size_bytes);
+      /* When RS is enabled use hw-binding table uploads, otherwise fallback to
+       * software-uploads.
+       */
+      if (brw->use_resource_streamer) {
+         gen7_update_binding_table_from_array(brw, stage_state->stage,
+                                              stage_state->surf_offset,
+                                              prog_data->binding_table
+                                              .size_bytes / 4);
+      } else {
+         uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+                                          prog_data->binding_table.size_bytes,
+                                          32,
+                                          &stage_state->bind_bo_offset);
+
+         /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
+         memcpy(bind, stage_state->surf_offset,
+                prog_data->binding_table.size_bytes);
+      }
    }
 
    brw->ctx.NewDriverState |= brw_new_binding_table;
 
    if (brw->gen >= 7) {
+      if (brw->use_resource_streamer) {
+         stage_state->bind_bo_offset =
+            reserve_hw_bt_space(brw, prog_data->binding_table.size_bytes);
+      }
       BEGIN_BATCH(2);
       OUT_BATCH(packet_name << 16 | (2 - 2));
-      OUT_BATCH(stage_state->bind_bo_offset);
+      /* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field
+       * when hw-generated binding table is enabled.
+       */
+      OUT_BATCH(brw->use_resource_streamer ?
+                (stage_state->bind_bo_offset >> 1) :
+                stage_state->bind_bo_offset);
       ADVANCE_BATCH();
    }
 }
@@ -170,6 +224,158 @@
    .emit = brw_gs_upload_binding_table,
 };
 
+/**
+ * Edit a single entry in a hardware-generated binding table
+ */
+void
+gen7_edit_hw_binding_table_entry(struct brw_context *brw,
+                                 gl_shader_stage stage,
+                                 uint32_t index,
+                                 uint32_t surf_offset)
+{
+   assert(stage < ARRAY_SIZE(stage_to_bt_edit));
+   assert(stage_to_bt_edit[stage]);
+
+   uint32_t dw2 = SET_FIELD(index, BRW_BINDING_TABLE_INDEX) |
+      (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(surf_offset) :
+       HSW_SURFACE_STATE_EDIT(surf_offset));
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(stage_to_bt_edit[stage] << 16 | (3 - 2));
+   OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
+   OUT_BATCH(dw2);
+   ADVANCE_BATCH();
+}
+
+/**
+ * Upload a whole hardware binding table for the given stage.
+ *
+ * Takes an array of surface offsets and the number of binding table
+ * entries.
+ */
+void
+gen7_update_binding_table_from_array(struct brw_context *brw,
+                                     gl_shader_stage stage,
+                                     const uint32_t* binding_table,
+                                     int num_surfaces)
+{
+   uint32_t dw2 = 0;
+
+   assert(stage < ARRAY_SIZE(stage_to_bt_edit));
+   assert(stage_to_bt_edit[stage]);
+
+   BEGIN_BATCH(num_surfaces + 2);
+   OUT_BATCH(stage_to_bt_edit[stage] << 16 | num_surfaces);
+   OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
+   for (int i = 0; i < num_surfaces; i++) {
+      dw2 = SET_FIELD(i, BRW_BINDING_TABLE_INDEX) |
+         (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(binding_table[i]) :
+          HSW_SURFACE_STATE_EDIT(binding_table[i]));
+      OUT_BATCH(dw2);
+   }
+   ADVANCE_BATCH();
+}
+
+/**
+ * Disable hardware binding table support, falling back to the
+ * older software-generated binding table mechanism.
+ */
+void
+gen7_disable_hw_binding_tables(struct brw_context *brw)
+{
+   if (!brw->use_resource_streamer)
+      return;
+   /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
+    * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+    *
+    * "When switching between HW and SW binding table generation, SW must
+    * issue a state cache invalidate."
+    */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+
+   int pkt_len = brw->gen >= 8 ? 4 : 3;
+
+   BEGIN_BATCH(pkt_len);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
+   if (brw->gen >= 8) {
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+   } else {
+      OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE);
+      OUT_BATCH(0);
+   }
+   ADVANCE_BATCH();
+}
+
+/**
+ * Enable hardware binding tables and set up the binding table pool.
+ */
+void
+gen7_enable_hw_binding_tables(struct brw_context *brw)
+{
+   if (!brw->use_resource_streamer)
+      return;
+
+   if (!brw->hw_bt_pool.bo) {
+      /* We use a single re-usable buffer object for the lifetime of the
+       * context and size it to maximum allowed binding tables that can be
+       * programmed per batch:
+       *
+       * From the Haswell PRM, Volume 7: 3D Media GPGPU,
+       * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+       * "A maximum of 16,383 Binding tables are allowed in any batch buffer"
+       */
+      static const int max_size = 16383 * 4;
+      brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
+                                              max_size, 64);
+      brw->hw_bt_pool.next_offset = 0;
+   }
+
+   /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
+    * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+    *
+    * "When switching between HW and SW binding table generation, SW must
+    * issue a state cache invalidate."
+    */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+
+   int pkt_len = brw->gen >= 8 ? 4 : 3;
+   uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE;
+   if (brw->is_haswell) {
+      dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
+             HSW_BT_POOL_ALLOC_MUST_BE_ONE;
+   } else if (brw->gen >= 8) {
+      dw1 |= BDW_MOCS_WB;
+   }
+
+   BEGIN_BATCH(pkt_len);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
+   if (brw->gen >= 8) {
+      OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+      OUT_BATCH(brw->hw_bt_pool.bo->size);
+   } else {
+      OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+      OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+             brw->hw_bt_pool.bo->size);
+   }
+   ADVANCE_BATCH();
+}
+
+void
+gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
+{
+   brw->hw_bt_pool.next_offset = 0;
+}
+
+const struct brw_tracked_state gen7_hw_binding_tables = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+   },
+   .emit = gen7_enable_hw_binding_tables
+};
+
 /** @} */
 
 /**
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -215,6 +215,10 @@
    struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
    struct intel_texture_image *intel_image = intel_texture_image(dst_image);
 
+   /* No pixel transfer operations (zoom, bias, mapping), just a blit */
+   if (brw->ctx._ImageTransferState)
+      return false;
+
    /* Sync up the state of window system buffers.  We need to do this before
     * we go looking at the src renderbuffer's miptree.
     */
@@ -1285,8 +1289,8 @@
       /* Round the float coordinates down to nearest integer */
       emit_rndd(Xp_f, X_f);
       emit_rndd(Yp_f, Y_f);
-      emit_mul(X_f, Xp_f, brw_imm_f(1 / key->x_scale));
-      emit_mul(Y_f, Yp_f, brw_imm_f(1 / key->y_scale));
+      emit_mul(X_f, Xp_f, brw_imm_f(1.0f / key->x_scale));
+      emit_mul(Y_f, Yp_f, brw_imm_f(1.0f / key->y_scale));
       SWAP_XY_AND_XPYP();
    } else if (!key->bilinear_filter) {
       /* Round the float coordinates down to nearest integer by moving to
@@ -1442,7 +1446,7 @@
       for (int j = 0; j < 4; ++j) {
          emit_mul(offset(texture_data[0], 2*j),
                  offset(vec8(texture_data[0]), 2*j),
-                 brw_imm_f(1.0/num_samples));
+                 brw_imm_f(1.0f / num_samples));
       }
    }
 
@@ -1475,9 +1479,9 @@
 
       /* Compute pixel coordinates */
       emit_add(vec16(x_sample_coords), Xp_f,
-              brw_imm_f((float)(i & 0x1) * (1.0 / key->x_scale)));
+              brw_imm_f((float)(i & 0x1) * (1.0f / key->x_scale)));
       emit_add(vec16(y_sample_coords), Yp_f,
-              brw_imm_f((float)((i >> 1) & 0x1) * (1.0 / key->y_scale)));
+              brw_imm_f((float)((i >> 1) & 0x1) * (1.0f / key->y_scale)));
       emit_mov(vec16(X), x_sample_coords);
       emit_mov(vec16(Y), y_sample_coords);
 
@@ -1789,7 +1793,7 @@
        * so 0.5 provides the necessary correction.
        */
       multiplier = scale;
-      offset = src0 + (-dst0 + 0.5) * scale;
+      offset = src0 + (-dst0 + 0.5f) * scale;
    } else {
       /* When mirroring X we need:
        *   src_x - src_x0 = dst_x1 - dst_x - 0.5
@@ -1797,7 +1801,7 @@
        *   src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale
        */
       multiplier = -scale;
-      offset = src0 + (dst1 - 0.5) * scale;
+      offset = src0 + (dst1 - 0.5f) * scale;
    }
 }
 
@@ -1952,8 +1956,8 @@
    /* Scaling factors used for bilinear filtering in multisample scaled
     * blits.
     */
-   wm_prog_key.x_scale = 2.0;
-   wm_prog_key.y_scale = src_mt->num_samples / 2.0;
+   wm_prog_key.x_scale = 2.0f;
+   wm_prog_key.y_scale = src_mt->num_samples / 2.0f;
 
    if (filter == GL_LINEAR && src.num_samples <= 1 && dst.num_samples <= 1)
       wm_prog_key.bilinear_filter = true;
@@ -2000,9 +2004,9 @@
    x1 = wm_push_consts.dst_x1 = roundf(dst_x1);
    y1 = wm_push_consts.dst_y1 = roundf(dst_y1);
    wm_push_consts.rect_grid_x1 = (minify(src_mt->logical_width0, src_level) *
-                                  wm_prog_key.x_scale - 1.0);
+                                  wm_prog_key.x_scale - 1.0f);
    wm_push_consts.rect_grid_y1 = (minify(src_mt->logical_height0, src_level) *
-                                  wm_prog_key.y_scale - 1.0);
+                                  wm_prog_key.y_scale - 1.0f);
 
    wm_push_consts.x_transform.setup(src_x0, src_x1, dst_x0, dst_x1, mirror_x);
    wm_push_consts.y_transform.setup(src_y0, src_y1, dst_y0, dst_y1, mirror_y);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -29,7 +29,8 @@
 brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw,
                                            bool debug_flag)
    : mem_ctx(ralloc_context(NULL)),
-     generator(brw, mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key),
+     generator(brw->intelScreen->compiler, brw,
+               mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key),
                (struct brw_stage_prog_data *) rzalloc(mem_ctx, struct brw_wm_prog_data),
                NULL, 0, false, "BLORP")
 {
@@ -72,7 +73,7 @@
    emit_cmp(BRW_CONDITIONAL_L, x, dst_x1)->predicate = BRW_PREDICATE_NORMAL;
    emit_cmp(BRW_CONDITIONAL_L, y, dst_y1)->predicate = BRW_PREDICATE_NORMAL;
 
-   fs_inst *inst = new (mem_ctx) fs_inst(BRW_OPCODE_AND, g1, f0, g1);
+   fs_inst *inst = new (mem_ctx) fs_inst(BRW_OPCODE_AND, 16, g1, f0, g1);
    inst->force_writemask_all = true;
    insts.push_tail(inst);
 }
@@ -83,7 +84,7 @@
                                           unsigned base_mrf,
                                           unsigned msg_length)
 {
-   fs_inst *inst = new (mem_ctx) fs_inst(op, dst, brw_message_reg(base_mrf),
+   fs_inst *inst = new (mem_ctx) fs_inst(op, 16, dst, brw_message_reg(base_mrf),
                                          fs_reg(0u));
 
    inst->base_mrf = base_mrf;
@@ -118,7 +119,8 @@
 {
    assert(combine_opcode == BRW_OPCODE_ADD || combine_opcode == BRW_OPCODE_AVG);
 
-   insts.push_tail(new (mem_ctx) fs_inst(combine_opcode, dst, src_1, src_2));
+   insts.push_tail(new (mem_ctx) fs_inst(combine_opcode, 16, dst,
+                                         src_1, src_2));
 }
 
 fs_inst *
@@ -126,7 +128,7 @@
                                const struct brw_reg &x,
                                const struct brw_reg &y)
 {
-   fs_inst *cmp = new (mem_ctx) fs_inst(BRW_OPCODE_CMP,
+   fs_inst *cmp = new (mem_ctx) fs_inst(BRW_OPCODE_CMP, 16,
                                         vec16(brw_null_reg()), x, y);
    cmp->conditional_mod = op;
    insts.push_tail(cmp);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_blorp.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_blorp.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_blorp.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_blorp.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -220,13 +220,13 @@
     * data with different formats, which blorp does for stencil and depth
     * data.
     */
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
 retry:
    intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING);
    intel_batchbuffer_save_state(brw);
    drm_intel_bo *saved_bo = brw->batch.bo;
-   uint32_t saved_used = brw->batch.used;
+   uint32_t saved_used = USED_BATCH(brw->batch);
    uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;
 
    switch (brw->gen) {
@@ -245,7 +245,7 @@
     * reserved enough space that a wrap will never happen.
     */
    assert(brw->batch.bo == saved_bo);
-   assert((brw->batch.used - saved_used) * 4 +
+   assert((USED_BATCH(brw->batch) - saved_used) * 4 +
           (saved_state_batch_offset - brw->batch.state_batch_offset) <
           estimated_max_batch_usage);
    /* Shut up compiler warnings on release build */
@@ -283,7 +283,7 @@
    /* Flush the sampler cache so any texturing from the destination is
     * coherent.
     */
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 }
 
 brw_hiz_op_params::brw_hiz_op_params(struct intel_mipmap_tree *mt,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_cfg.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_cfg.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_cfg.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_cfg.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -141,12 +141,12 @@
 }
 
 void
-bblock_t::dump(backend_visitor *v) const
+bblock_t::dump(backend_shader *s) const
 {
    int ip = this->start_ip;
    foreach_inst_in_block(backend_instruction, inst, this) {
       fprintf(stderr, "%5d: ", ip);
-      v->dump_instruction(inst);
+      s->dump_instruction(inst);
       ip++;
    }
 }
@@ -208,6 +208,7 @@
          cur_else = cur;
 
 	 next = new_block();
+         assert(cur_if != NULL);
 	 cur_if->add_successor(mem_ctx, next);
 
 	 set_next_block(&cur, next, ip);
@@ -231,6 +232,7 @@
          if (cur_else) {
             cur_else->add_successor(mem_ctx, cur_endif);
          } else {
+            assert(cur_if != NULL);
             cur_if->add_successor(mem_ctx, cur_endif);
          }
 
@@ -273,6 +275,7 @@
          inst->exec_node::remove();
          cur->instructions.push_tail(inst);
 
+         assert(cur_do != NULL);
 	 cur->add_successor(mem_ctx, cur_do);
 
 	 next = new_block();
@@ -286,6 +289,7 @@
          inst->exec_node::remove();
          cur->instructions.push_tail(inst);
 
+         assert(cur_while != NULL);
 	 cur->add_successor(mem_ctx, cur_while);
 
 	 next = new_block();
@@ -299,6 +303,7 @@
          inst->exec_node::remove();
          cur->instructions.push_tail(inst);
 
+         assert(cur_do != NULL && cur_while != NULL);
 	 cur->add_successor(mem_ctx, cur_do);
 	 set_next_block(&cur, cur_while, ip);
 
@@ -411,7 +416,7 @@
 }
 
 void
-cfg_t::dump(backend_visitor *v)
+cfg_t::dump(backend_shader *s)
 {
    if (idom_dirty)
       calculate_idom();
@@ -423,8 +428,8 @@
                  link->block->num);
       }
       fprintf(stderr, "\n");
-      if (v != NULL)
-         block->dump(v);
+      if (s != NULL)
+         block->dump(s);
       fprintf(stderr, "END B%d", block->num);
       foreach_list_typed(bblock_link, link, link, &block->children) {
          fprintf(stderr, " ->B%d",
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_cfg.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_cfg.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_cfg.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_cfg.h	2015-09-16 14:36:09.000000000 +0000
@@ -60,7 +60,7 @@
    bool is_successor_of(const bblock_t *block) const;
    bool can_combine_with(const bblock_t *that) const;
    void combine_with(bblock_t *that);
-   void dump(backend_visitor *v) const;
+   void dump(backend_shader *s) const;
 
    backend_instruction *start();
    const backend_instruction *start() const;
@@ -273,7 +273,7 @@
    void calculate_idom();
    static bblock_t *intersect(bblock_t *b1, bblock_t *b2);
 
-   void dump(backend_visitor *v);
+   void dump(backend_shader *s);
    void dump_cfg();
    void dump_domtree();
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_clear.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_clear.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_clear.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_clear.c	2015-09-16 14:36:09.000000000 +0000
@@ -121,8 +121,9 @@
     * first.
     */
    if ((ctx->Scissor.EnableFlags & 1) && !noop_scissor(ctx, fb)) {
-      perf_debug("Failed to fast clear depth due to scissor being enabled.  "
-                 "Possible 5%% performance win if avoided.\n");
+      perf_debug("Failed to fast clear %dx%d depth because of scissors.  "
+                 "Possible 5%% performance win if avoided.\n",
+                 mt->logical_width0, mt->logical_height0);
       return false;
    }
 
@@ -183,7 +184,7 @@
     *      must be issued before the rectangle primitive used for the depth
     *      buffer clear operation.
     */
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    if (fb->MaxNumLayers > 0) {
       for (unsigned layer = 0; layer < depth_irb->layer_count; layer++) {
@@ -203,7 +204,7 @@
        *      by a PIPE_CONTROL command with DEPTH_STALL bit set and Then
        *      followed by Depth FLUSH'
       */
-      intel_batchbuffer_emit_mi_flush(brw);
+      brw_emit_mi_flush(brw);
    }
 
    /* Now, the HiZ buffer contains data that needs to be resolved to the depth
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_clip_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_clip_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_clip_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_clip_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -32,6 +32,7 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "main/framebuffer.h"
 
 static void
 upload_clip_vp(struct brw_context *brw)
@@ -59,7 +60,9 @@
    struct brw_clip_unit_state *clip;
 
    /* _NEW_BUFFERS */
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   const struct gl_framebuffer *fb = ctx->DrawBuffer;
+   const float fb_width = (float)_mesa_geometric_width(fb);
+   const float fb_height = (float)_mesa_geometric_height(fb);
 
    upload_clip_vp(brw);
 
@@ -127,8 +130,8 @@
    /* enable guardband clipping if we can */
    if (ctx->ViewportArray[0].X == 0 &&
        ctx->ViewportArray[0].Y == 0 &&
-       ctx->ViewportArray[0].Width == (float) fb->Width &&
-       ctx->ViewportArray[0].Height == (float) fb->Height)
+       ctx->ViewportArray[0].Width == fb_width &&
+       ctx->ViewportArray[0].Height == fb_height)
    {
       clip->clip5.guard_band_enable = 1;
       clip->clip6.clipper_viewport_state_ptr =
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_compute.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_compute.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_compute.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_compute.c	2015-09-16 14:36:09.000000000 +0000
@@ -45,7 +45,7 @@
    unsigned thread_width_max =
       (group_size + simd_size - 1) / simd_size;
 
-   uint32_t right_mask = (1u << simd_size) - 1;
+   uint32_t right_mask = 0xffffffffu >> (32 - simd_size);
    const unsigned right_non_aligned = group_size & (simd_size - 1);
    if (right_non_aligned != 0)
       right_mask >>= (simd_size - right_non_aligned);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_conditional_render.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_conditional_render.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_conditional_render.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_conditional_render.c	2015-09-16 14:36:09.000000000 +0000
@@ -56,6 +56,12 @@
 
    assert(query->bo != NULL);
 
+   /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
+    * command when loading the values into the predicate source registers for
+    * conditional rendering.
+    */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
+
    brw_load_register_mem64(brw,
                            MI_PREDICATE_SRC0,
                            query->bo,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_context.c	2015-09-16 14:37:00.000000000 +0000
@@ -50,6 +50,7 @@
 
 #include "brw_context.h"
 #include "brw_defines.h"
+#include "brw_shader.h"
 #include "brw_draw.h"
 #include "brw_state.h"
 
@@ -68,8 +69,6 @@
 #include "tnl/t_pipeline.h"
 #include "util/ralloc.h"
 
-#include "glsl/nir/nir.h"
-
 /***************************************
  * Mesa's Driver Functions
  ***************************************/
@@ -324,6 +323,15 @@
 
    ctx->Const.StripTextureBorder = true;
 
+   ctx->Const.MaxUniformBlockSize = 65536;
+   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_program_constants *prog = &ctx->Const.Program[i];
+      prog->MaxUniformBlocks = 12;
+      prog->MaxCombinedUniformComponents =
+         prog->MaxUniformComponents +
+         ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
+   }
+
    ctx->Const.MaxDualSourceDrawBuffers = 1;
    ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
@@ -428,11 +436,7 @@
 
    ctx->Const.MinLineWidth = 1.0;
    ctx->Const.MinLineWidthAA = 1.0;
-   if (brw->gen >= 9 || brw->is_cherryview) {
-      ctx->Const.MaxLineWidth = 40.0;
-      ctx->Const.MaxLineWidthAA = 40.0;
-      ctx->Const.LineWidthGranularity = 0.125;
-   } else if (brw->gen >= 6) {
+   if (brw->gen >= 6) {
       ctx->Const.MaxLineWidth = 7.375;
       ctx->Const.MaxLineWidthAA = 7.375;
       ctx->Const.LineWidthGranularity = 0.125;
@@ -510,6 +514,18 @@
       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
       ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
       ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
+
+      ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms =
+         BRW_MAX_IMAGES;
+      ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms =
+         (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0);
+      ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms =
+         BRW_MAX_IMAGES;
+      ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
+      ctx->Const.MaxCombinedShaderOutputResources =
+         MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
+      ctx->Const.MaxImageSamples = 0;
+      ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES;
    }
 
    /* Gen6 converts quads to polygon in beginning of 3D pipeline,
@@ -552,6 +568,7 @@
     */
    ctx->Const.UniformBufferOffsetAlignment = 16;
    ctx->Const.TextureBufferOffsetAlignment = 16;
+   ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
 
    if (brw->gen >= 6) {
       ctx->Const.MaxVarying = 32;
@@ -561,55 +578,12 @@
       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
    }
 
-   static const nir_shader_compiler_options nir_options = {
-      .native_integers = true,
-      /* In order to help allow for better CSE at the NIR level we tell NIR
-       * to split all ffma instructions during opt_algebraic and we then
-       * re-combine them as a later step.
-       */
-      .lower_ffma = true,
-      .lower_sub = true,
-   };
-
    /* We want the GLSL compiler to emit code that uses condition codes */
    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-      ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
-      ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true;
-      ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true;
-      ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true;
-      ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true;
-      ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput =
-	 (i == MESA_SHADER_FRAGMENT);
-      ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp =
-	 (i == MESA_SHADER_FRAGMENT);
-      ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
-      ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true;
-
-      /* !ARB_gpu_shader5 */
-      if (brw->gen < 7)
-         ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true;
-   }
-
-   ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
-   ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
-
-   if (brw->scalar_vs) {
-      /* If we're using the scalar backend for vertex shaders, we need to
-       * configure these accordingly.
-       */
-      ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
-      ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
-      ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false;
-
-      if (brw_env_var_as_boolean("INTEL_USE_NIR", true))
-         ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions = &nir_options;
+      ctx->Const.ShaderCompilerOptions[i] =
+         brw->intelScreen->compiler->glsl_compiler_options[i];
    }
 
-   if (brw_env_var_as_boolean("INTEL_USE_NIR", true))
-      ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions = &nir_options;
-
-   ctx->Const.ShaderCompilerOptions[MESA_SHADER_COMPUTE].NirOptions = &nir_options;
-
    /* ARB_viewport_array */
    if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
@@ -624,6 +598,12 @@
    /* ARB_gpu_shader5 */
    if (brw->gen >= 7)
       ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
+
+   /* ARB_framebuffer_no_attachments */
+   ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth;
+   ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight;
+   ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
+   ctx->Const.MaxFramebufferSamples = max_samples;
 }
 
 static void
@@ -756,6 +736,7 @@
    brw->is_baytrail = devinfo->is_baytrail;
    brw->is_haswell = devinfo->is_haswell;
    brw->is_cherryview = devinfo->is_cherryview;
+   brw->is_broxton = devinfo->is_broxton;
    brw->has_llc = devinfo->has_llc;
    brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
    brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
@@ -826,10 +807,9 @@
    _mesa_meta_init(ctx);
 
    brw_process_driconf_options(brw);
-   brw_process_intel_debug_variable(brw);
 
-   if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
-      brw->scalar_vs = true;
+   if (INTEL_DEBUG & DEBUG_PERF)
+      brw->perf_debug = true;
 
    brw_initialize_context_constants(brw);
 
@@ -860,6 +840,12 @@
       }
    }
 
+   if (brw_init_pipe_control(brw, devinfo)) {
+      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
+      intelDestroyContext(driContextPriv);
+      return false;
+   }
+
    brw_init_state(brw);
 
    intelInitExtensions(ctx);
@@ -906,6 +892,10 @@
 
    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
 
+   brw->use_resource_streamer = screen->has_resource_streamer &&
+      (brw_env_var_as_boolean("INTEL_USE_HW_BT", false) ||
+       brw_env_var_as_boolean("INTEL_USE_GATHER", false));
+
    ctx->VertexProgram._MaintainTnlProgram = true;
    ctx->FragmentProgram._MaintainTexEnvProgram = true;
 
@@ -972,6 +962,10 @@
    if (brw->wm.base.scratch_bo)
       drm_intel_bo_unreference(brw->wm.base.scratch_bo);
 
+   gen7_reset_hw_bt_pool_offsets(brw);
+   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
+   brw->hw_bt_pool.bo = NULL;
+
    drm_intel_gem_context_destroy(brw->hw_ctx);
 
    if (ctx->swrast_context) {
@@ -983,6 +977,7 @@
    if (ctx->swrast_context)
       _swrast_DestroyContext(&brw->ctx);
 
+   brw_fini_pipe_control(brw);
    intel_batchbuffer_free(brw);
 
    drm_intel_bo_unreference(brw->throttle_batch[1]);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_context.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -201,6 +201,7 @@
    BRW_STATE_STATS_WM,
    BRW_STATE_UNIFORM_BUFFER,
    BRW_STATE_ATOMIC_BUFFER,
+   BRW_STATE_IMAGE_UNITS,
    BRW_STATE_META_IN_PROGRESS,
    BRW_STATE_INTERPOLATION_MAP,
    BRW_STATE_PUSH_CONSTANT_ALLOCATION,
@@ -282,6 +283,7 @@
 #define BRW_NEW_STATS_WM                (1ull << BRW_STATE_STATS_WM)
 #define BRW_NEW_UNIFORM_BUFFER          (1ull << BRW_STATE_UNIFORM_BUFFER)
 #define BRW_NEW_ATOMIC_BUFFER           (1ull << BRW_STATE_ATOMIC_BUFFER)
+#define BRW_NEW_IMAGE_UNITS             (1ull << BRW_STATE_IMAGE_UNITS)
 #define BRW_NEW_META_IN_PROGRESS        (1ull << BRW_STATE_META_IN_PROGRESS)
 #define BRW_NEW_INTERPOLATION_MAP       (1ull << BRW_STATE_INTERPOLATION_MAP)
 #define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1ull << BRW_STATE_PUSH_CONSTANT_ALLOCATION)
@@ -361,6 +363,7 @@
 
    GLuint nr_params;       /**< number of float params/constants */
    GLuint nr_pull_params;
+   unsigned nr_image_params;
 
    unsigned curb_read_length;
    unsigned total_scratch;
@@ -381,6 +384,59 @@
     */
    const gl_constant_value **param;
    const gl_constant_value **pull_param;
+
+   /**
+    * Image metadata passed to the shader as uniforms.  This is deliberately
+    * ignored by brw_stage_prog_data_compare() because its contents don't have
+    * any influence on program compilation.
+    */
+   struct brw_image_param *image_param;
+};
+
+/*
+ * Image metadata structure as laid out in the shader parameter
+ * buffer.  Entries have to be 16B-aligned for the vec4 back-end to be
+ * able to use them.  That's okay because the padding and any unused
+ * entries [most of them except when we're doing untyped surface
+ * access] will be removed by the uniform packing pass.
+ */
+#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET      0
+#define BRW_IMAGE_PARAM_OFFSET_OFFSET           4
+#define BRW_IMAGE_PARAM_SIZE_OFFSET             8
+#define BRW_IMAGE_PARAM_STRIDE_OFFSET           12
+#define BRW_IMAGE_PARAM_TILING_OFFSET           16
+#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET        20
+#define BRW_IMAGE_PARAM_SIZE                    24
+
+struct brw_image_param {
+   /** Surface binding table index. */
+   uint32_t surface_idx;
+
+   /** Offset applied to the X and Y surface coordinates. */
+   uint32_t offset[2];
+
+   /** Surface X, Y and Z dimensions. */
+   uint32_t size[3];
+
+   /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in
+    * pixels, vertical slice stride in pixels.
+    */
+   uint32_t stride[4];
+
+   /** Log2 of the tiling modulus in the X, Y and Z dimension. */
+   uint32_t tiling[3];
+
+   /**
+    * Right shift to apply for bit 6 address swizzling.  Two different
+    * swizzles can be specified and will be applied one after the other.  The
+    * resulting address will be:
+    *
+    *  addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^
+    *                              (addr >> swizzling[1])))
+    *
+    * Use \c 0xff if any of the swizzles is not required.
+    */
+   uint32_t swizzling[2];
 };
 
 /* Data about a particular attempt to compile a program.  Note that
@@ -410,6 +466,7 @@
 
    uint8_t computed_depth_mode;
 
+   bool early_fragment_tests;
    bool no_8;
    bool dual_src_blend;
    bool uses_pos_offset;
@@ -606,6 +663,12 @@
    unsigned svbi_postincrement_value;
 };
 
+enum shader_dispatch_mode {
+   DISPATCH_MODE_4X1_SINGLE = 0,
+   DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
+   DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
+   DISPATCH_MODE_SIMD8 = 3,
+};
 
 /* Note: brw_vue_prog_data_compare() must be updated when adding fields to
  * this struct!
@@ -623,7 +686,7 @@
     */
    GLuint urb_entry_size;
 
-   bool simd8;
+   enum shader_dispatch_mode dispatch_mode;
 };
 
 
@@ -721,14 +784,6 @@
    int invocations;
 
    /**
-    * Dispatch mode, can be any of:
-    * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT
-    * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE
-    * GEN7_GS_DISPATCH_MODE_SINGLE
-    */
-   int dispatch_mode;
-
-   /**
     * Gen6 transform feedback enabled flag.
     */
    bool gen6_xfb_enabled;
@@ -824,20 +879,10 @@
 enum shader_time_shader_type {
    ST_NONE,
    ST_VS,
-   ST_VS_WRITTEN,
-   ST_VS_RESET,
    ST_GS,
-   ST_GS_WRITTEN,
-   ST_GS_RESET,
    ST_FS8,
-   ST_FS8_WRITTEN,
-   ST_FS8_RESET,
    ST_FS16,
-   ST_FS16_WRITTEN,
-   ST_FS16_RESET,
    ST_CS,
-   ST_CS_WRITTEN,
-   ST_CS_RESET,
 };
 
 struct brw_vertex_buffer {
@@ -881,11 +926,12 @@
    drm_intel_bo *bo;
    /** Last BO submitted to the hardware.  Used for glFinish(). */
    drm_intel_bo *last_bo;
-   /** BO for post-sync nonzero writes for gen6 workaround. */
-   drm_intel_bo *workaround_bo;
 
+#ifdef DEBUG
    uint16_t emit, total;
-   uint16_t used, reserved_space;
+#endif
+   uint16_t reserved_space;
+   uint32_t *map_next;
    uint32_t *map;
    uint32_t *cpu_map;
 #define BATCH_SZ (8192*sizeof(uint32_t))
@@ -894,10 +940,8 @@
    enum brw_gpu_ring ring;
    bool needs_sol_reset;
 
-   uint8_t pipe_controls_since_last_cs_stall;
-
    struct {
-      uint16_t used;
+      uint32_t *map_next;
       int reloc_count;
    } saved;
 };
@@ -982,6 +1026,8 @@
    BRW_PREDICATE_STATE_USE_BIT
 };
 
+struct shader_times;
+
 /**
  * brw_context is derived from gl_context.
  */
@@ -1045,6 +1091,10 @@
 
    drm_intel_context *hw_ctx;
 
+   /** BO for post-sync nonzero writes for gen6 workaround. */
+   drm_intel_bo *workaround_bo;
+   uint8_t pipe_controls_since_last_cs_stall;
+
    /**
     * Set of drm_intel_bo * that have been rendered to within this batchbuffer
     * and would need flushing before being used from another cache domain that
@@ -1128,6 +1178,7 @@
    bool is_baytrail;
    bool is_haswell;
    bool is_cherryview;
+   bool is_broxton;
 
    bool has_hiz;
    bool has_separate_stencil;
@@ -1140,7 +1191,7 @@
    bool has_pln;
    bool no_simd8;
    bool use_rep_send;
-   bool scalar_vs;
+   bool use_resource_streamer;
 
    /**
     * Some versions of Gen hardware don't do centroid interpolation correctly
@@ -1247,12 +1298,12 @@
     * Platform specific constants containing the maximum number of threads
     * for each pipeline stage.
     */
-   int max_vs_threads;
-   int max_hs_threads;
-   int max_ds_threads;
-   int max_gs_threads;
-   int max_wm_threads;
-   int max_cs_threads;
+   unsigned max_vs_threads;
+   unsigned max_hs_threads;
+   unsigned max_ds_threads;
+   unsigned max_gs_threads;
+   unsigned max_wm_threads;
+   unsigned max_cs_threads;
 
    /* BRW_NEW_URB_ALLOCATIONS:
     */
@@ -1404,6 +1455,12 @@
       struct brw_cs_prog_data *prog_data;
    } cs;
 
+   /* RS hardware binding table */
+   struct {
+      drm_intel_bo *bo;
+      uint32_t next_offset;
+   } hw_bt_pool;
+
    struct {
       uint32_t state_offset;
       uint32_t blend_state_offset;
@@ -1459,8 +1516,8 @@
    } perfmon;
 
    int num_atoms[BRW_NUM_PIPELINES];
-   const struct brw_tracked_state render_atoms[57];
-   const struct brw_tracked_state compute_atoms[3];
+   const struct brw_tracked_state render_atoms[60];
+   const struct brw_tracked_state compute_atoms[4];
 
    /* If (INTEL_DEBUG & DEBUG_BATCH) */
    struct {
@@ -1507,7 +1564,7 @@
       const char **names;
       int *ids;
       enum shader_time_shader_type *types;
-      uint64_t *cumulative;
+      struct shader_times *cumulative;
       int num_entries;
       int max_entries;
       double report_time;
@@ -1738,11 +1795,17 @@
                              struct gl_shader_program *prog,
                              struct brw_stage_state *stage_state,
                              struct brw_stage_prog_data *prog_data);
+void brw_upload_image_surfaces(struct brw_context *brw,
+                               struct gl_shader *shader,
+                               struct brw_stage_state *stage_state,
+                               struct brw_stage_prog_data *prog_data);
 
 /* brw_surface_formats.c */
 bool brw_render_target_supported(struct brw_context *brw,
                                  struct gl_renderbuffer *rb);
 uint32_t brw_depth_format(struct brw_context *brw, mesa_format format);
+mesa_format brw_lower_mesa_image_format(const struct brw_device_info *devinfo,
+                                        mesa_format format);
 
 /* brw_performance_monitor.c */
 void brw_init_performance_monitors(struct brw_context *brw);
@@ -2006,6 +2069,25 @@
                            struct brw_stage_state *stage_state,
                            enum aub_state_struct_type type);
 
+bool
+gen9_use_linear_1d_layout(const struct brw_context *brw,
+                          const struct intel_mipmap_tree *mt);
+
+/* brw_pipe_control.c */
+int brw_init_pipe_control(struct brw_context *brw,
+			  const struct brw_device_info *info);
+void brw_fini_pipe_control(struct brw_context *brw);
+
+void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
+void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
+                                 drm_intel_bo *bo, uint32_t offset,
+                                 uint32_t imm_lower, uint32_t imm_upper);
+void brw_emit_mi_flush(struct brw_context *brw);
+void brw_emit_post_sync_nonzero_flush(struct brw_context *brw);
+void brw_emit_depth_stall_flushes(struct brw_context *brw);
+void gen7_emit_vs_workaround_flush(struct brw_context *brw);
+void gen7_emit_cs_stall_flush(struct brw_context *brw);
+
 #ifdef __cplusplus
 }
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_cs.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_cs.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_cs.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_cs.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -82,16 +82,21 @@
    prog_data->local_size[0] = cp->LocalSize[0];
    prog_data->local_size[1] = cp->LocalSize[1];
    prog_data->local_size[2] = cp->LocalSize[2];
-   int local_workgroup_size =
+   unsigned local_workgroup_size =
       cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
 
    cfg_t *cfg = NULL;
    const char *fail_msg = NULL;
 
+   int st_index = -1;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
+
    /* Now the main event: Visit the shader IR and generate our CS IR for it.
     */
-   fs_visitor v8(brw, mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
-                 &cp->Base, 8);
+   fs_visitor v8(brw->intelScreen->compiler, brw,
+                 mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+                 &cp->Base, 8, st_index);
    if (!v8.run_cs()) {
       fail_msg = v8.fail_msg;
    } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
@@ -99,8 +104,9 @@
       prog_data->simd_size = 8;
    }
 
-   fs_visitor v16(brw, mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
-                  &cp->Base, 16);
+   fs_visitor v16(brw->intelScreen->compiler, brw,
+                  mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+                  &cp->Base, 16, st_index);
    if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
        !fail_msg && !v8.simd16_unsupported &&
        local_workgroup_size <= 16 * brw->max_cs_threads) {
@@ -128,7 +134,8 @@
       return NULL;
    }
 
-   fs_generator g(brw, mem_ctx, (void*) key, &prog_data->base, &cp->Base,
+   fs_generator g(brw->intelScreen->compiler, brw,
+                  mem_ctx, (void*) key, &prog_data->base, &cp->Base,
                   v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
    if (INTEL_DEBUG & DEBUG_CS) {
       char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
@@ -175,7 +182,8 @@
     * prog_data associated with the compiled program, and which will be freed
     * by the state cache.
     */
-   int param_count = cs->num_uniform_components;
+   int param_count = cs->num_uniform_components +
+                     cs->NumImages * BRW_IMAGE_PARAM_SIZE;
 
    /* The backend also sometimes adds params for texture size. */
    param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
@@ -183,7 +191,10 @@
       rzalloc_array(NULL, const gl_constant_value *, param_count);
    prog_data.base.pull_param =
       rzalloc_array(NULL, const gl_constant_value *, param_count);
+   prog_data.base.image_param =
+      rzalloc_array(NULL, struct brw_image_param, cs->NumImages);
    prog_data.base.nr_params = param_count;
+   prog_data.base.nr_image_params = cs->NumImages;
 
    program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
                          &cp->program, prog, &program_size);
@@ -284,6 +295,17 @@
 }
 
 
+static unsigned
+get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data)
+{
+   const unsigned simd_size = cs_prog_data->simd_size;
+   unsigned group_size = cs_prog_data->local_size[0] *
+      cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
+
+   return (group_size + simd_size - 1) / simd_size;
+}
+
+
 static void
 brw_upload_cs_state(struct brw_context *brw)
 {
@@ -309,6 +331,8 @@
                                             prog_data->binding_table.size_bytes,
                                             32, &stage_state->bind_bo_offset);
 
+   unsigned threads = get_cs_thread_count(cs_prog_data);
+
    uint32_t dwords = brw->gen < 8 ? 8 : 9;
    BEGIN_BATCH(dwords);
    OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
@@ -358,6 +382,13 @@
    desc[dw++] = 0;
    desc[dw++] = 0;
    desc[dw++] = stage_state->bind_bo_offset;
+   desc[dw++] = 0;
+   const uint32_t media_threads =
+      brw->gen >= 8 ?
+      SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
+      SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT);
+   assert(threads <= brw->max_cs_threads);
+   desc[dw++] = media_threads;
 
    BEGIN_BATCH(4);
    OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_curbe.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_curbe.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_curbe.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_curbe.c	2015-09-16 14:36:09.000000000 +0000
@@ -176,7 +176,7 @@
    ADVANCE_BATCH();
 }
 
-static GLfloat fixed_plane[6][4] = {
+static const GLfloat fixed_plane[6][4] = {
    { 0,    0,   -1, 1 },
    { 0,    0,    1, 1 },
    { 0,   -1,    0, 1 },
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -36,11 +36,11 @@
  *   - if/else/endif
  */
 bool
-dead_control_flow_eliminate(backend_visitor *v)
+dead_control_flow_eliminate(backend_shader *s)
 {
    bool progress = false;
 
-   foreach_block_safe (block, v->cfg) {
+   foreach_block_safe (block, s->cfg) {
       bblock_t *if_block = NULL, *else_block = NULL, *endif_block = block;
       bool found = false;
 
@@ -115,7 +115,7 @@
    }
 
    if (progress)
-      v->invalidate_live_intervals();
+      s->invalidate_live_intervals();
 
    return progress;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_dead_control_flow.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_dead_control_flow.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_dead_control_flow.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_dead_control_flow.h	2015-09-16 14:36:09.000000000 +0000
@@ -23,4 +23,4 @@
 
 #include "brw_shader.h"
 
-bool dead_control_flow_eliminate(backend_visitor *v);
+bool dead_control_flow_eliminate(backend_shader *s);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_defines.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_defines.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_defines.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_defines.h	2015-09-16 14:36:09.000000000 +0000
@@ -592,6 +592,15 @@
 #define GEN7_SURFACE_MOCS_SHIFT                 16
 #define GEN7_SURFACE_MOCS_MASK                  INTEL_MASK(19, 16)
 
+#define GEN9_SURFACE_TRMODE_SHIFT          18
+#define GEN9_SURFACE_TRMODE_MASK           INTEL_MASK(19, 18)
+#define GEN9_SURFACE_TRMODE_NONE           0
+#define GEN9_SURFACE_TRMODE_TILEYF         1
+#define GEN9_SURFACE_TRMODE_TILEYS         2
+
+#define GEN9_SURFACE_MIP_TAIL_START_LOD_SHIFT      8
+#define GEN9_SURFACE_MIP_TAIL_START_LOD_MASK       INTEL_MASK(11, 8)
+
 /* Surface state DW6 */
 #define GEN7_SURFACE_MCS_ENABLE                 (1 << 0)
 #define GEN7_SURFACE_MCS_PITCH_SHIFT            3
@@ -866,6 +875,21 @@
     * instructions.
     */
    FS_OPCODE_FB_WRITE = 128,
+
+   /**
+    * Same as FS_OPCODE_FB_WRITE but expects its arguments separately as
+    * individual sources instead of as a single payload blob:
+    *
+    * Source 0: [required] Color 0.
+    * Source 1: [optional] Color 1 (for dual source blend messages).
+    * Source 2: [optional] Src0 Alpha.
+    * Source 3: [optional] Source Depth (passthrough from the thread payload).
+    * Source 4: [optional] Destination Depth (gl_FragDepth).
+    * Source 5: [optional] Sample Mask (gl_SampleMask).
+    * Source 6: [required] Number of color components (as a UD immediate).
+    */
+   FS_OPCODE_FB_WRITE_LOGICAL,
+
    FS_OPCODE_BLORP_FB_WRITE,
    FS_OPCODE_REP_FB_WRITE,
    SHADER_OPCODE_RCP,
@@ -879,18 +903,49 @@
    SHADER_OPCODE_SIN,
    SHADER_OPCODE_COS,
 
+   /**
+    * Texture sampling opcodes.
+    *
+    * LOGICAL opcodes are eventually translated to the matching non-LOGICAL
+    * opcode but instead of taking a single payload blob they expect their
+    * arguments separately as individual sources:
+    *
+    * Source 0: [optional] Texture coordinates.
+    * Source 1: [optional] Shadow comparitor.
+    * Source 2: [optional] dPdx if the operation takes explicit derivatives,
+    *                      otherwise LOD value.
+    * Source 3: [optional] dPdy if the operation takes explicit derivatives.
+    * Source 4: [optional] Sample index.
+    * Source 5: [optional] MCS data.
+    * Source 6: [required] Texture sampler.
+    * Source 7: [optional] Texel offset.
+    * Source 8: [required] Number of coordinate components (as UD immediate).
+    * Source 9: [required] Number derivative components (as UD immediate).
+    */
    SHADER_OPCODE_TEX,
+   SHADER_OPCODE_TEX_LOGICAL,
    SHADER_OPCODE_TXD,
+   SHADER_OPCODE_TXD_LOGICAL,
    SHADER_OPCODE_TXF,
+   SHADER_OPCODE_TXF_LOGICAL,
    SHADER_OPCODE_TXL,
+   SHADER_OPCODE_TXL_LOGICAL,
    SHADER_OPCODE_TXS,
+   SHADER_OPCODE_TXS_LOGICAL,
    FS_OPCODE_TXB,
+   FS_OPCODE_TXB_LOGICAL,
    SHADER_OPCODE_TXF_CMS,
+   SHADER_OPCODE_TXF_CMS_LOGICAL,
    SHADER_OPCODE_TXF_UMS,
+   SHADER_OPCODE_TXF_UMS_LOGICAL,
    SHADER_OPCODE_TXF_MCS,
+   SHADER_OPCODE_TXF_MCS_LOGICAL,
    SHADER_OPCODE_LOD,
+   SHADER_OPCODE_LOD_LOGICAL,
    SHADER_OPCODE_TG4,
+   SHADER_OPCODE_TG4_LOGICAL,
    SHADER_OPCODE_TG4_OFFSET,
+   SHADER_OPCODE_TG4_OFFSET_LOGICAL,
 
    /**
     * Combines multiple sources of size 1 into a larger virtual GRF.
@@ -908,13 +963,33 @@
 
    SHADER_OPCODE_SHADER_TIME_ADD,
 
+   /**
+    * Typed and untyped surface access opcodes.
+    *
+    * LOGICAL opcodes are eventually translated to the matching non-LOGICAL
+    * opcode but instead of taking a single payload blob they expect their
+    * arguments separately as individual sources:
+    *
+    * Source 0: [required] Surface coordinates.
+    * Source 1: [optional] Operation source.
+    * Source 2: [required] Surface index.
+    * Source 3: [required] Number of coordinate components (as UD immediate).
+    * Source 4: [required] Opcode-specific control immediate, same as source 2
+    *                      of the matching non-LOGICAL opcode.
+    */
    SHADER_OPCODE_UNTYPED_ATOMIC,
+   SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
    SHADER_OPCODE_UNTYPED_SURFACE_READ,
+   SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
    SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
+   SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
 
    SHADER_OPCODE_TYPED_ATOMIC,
+   SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
    SHADER_OPCODE_TYPED_SURFACE_READ,
+   SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
    SHADER_OPCODE_TYPED_SURFACE_WRITE,
+   SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
 
    SHADER_OPCODE_MEMORY_FENCE,
 
@@ -960,7 +1035,6 @@
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
    FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
    FS_OPCODE_DISCARD_JUMP,
-   FS_OPCODE_SET_OMASK,
    FS_OPCODE_SET_SAMPLE_ID,
    FS_OPCODE_SET_SIMD4X2_OFFSET,
    FS_OPCODE_PACK_HALF_2x16_SPLIT,
@@ -1135,6 +1209,16 @@
     * Terminate the compute shader.
     */
    CS_OPCODE_CS_TERMINATE,
+
+   /**
+    * GLSL barrier()
+    */
+   SHADER_OPCODE_BARRIER,
+
+   /**
+    * Calculate the high 32-bits of a 32x32 multiply.
+    */
+   SHADER_OPCODE_MULH,
 };
 
 enum brw_urb_write_flags {
@@ -1596,6 +1680,14 @@
 #define BRW_SCRATCH_SPACE_SIZE_1M     10
 #define BRW_SCRATCH_SPACE_SIZE_2M     11
 
+#define BRW_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY         0
+#define BRW_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY        1
+#define BRW_MESSAGE_GATEWAY_SFID_FORWARD_MSG          2
+#define BRW_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP        3
+#define BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG          4
+#define BRW_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE 5
+#define BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE      6
+
 
 #define CMD_URB_FENCE                 0x6000
 #define CMD_CS_URB_STATE              0x6001
@@ -1618,6 +1710,36 @@
 #define _3DSTATE_BINDING_TABLE_POINTERS_GS	0x7829 /* GEN7+ */
 #define _3DSTATE_BINDING_TABLE_POINTERS_PS	0x782A /* GEN7+ */
 
+#define _3DSTATE_BINDING_TABLE_POOL_ALLOC       0x7919 /* GEN7.5+ */
+#define BRW_HW_BINDING_TABLE_ENABLE             (1 << 11)
+#define GEN7_HW_BT_POOL_MOCS_SHIFT              7
+#define GEN7_HW_BT_POOL_MOCS_MASK               INTEL_MASK(10, 7)
+#define GEN8_HW_BT_POOL_MOCS_SHIFT              0
+#define GEN8_HW_BT_POOL_MOCS_MASK               INTEL_MASK(6, 0)
+/* Only required in HSW */
+#define HSW_BT_POOL_ALLOC_MUST_BE_ONE           (3 << 5)
+
+#define _3DSTATE_BINDING_TABLE_EDIT_VS          0x7843 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_GS          0x7844 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_HS          0x7845 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_DS          0x7846 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_PS          0x7847 /* GEN7.5 */
+#define BRW_BINDING_TABLE_INDEX_SHIFT           16
+#define BRW_BINDING_TABLE_INDEX_MASK            INTEL_MASK(23, 16)
+
+#define BRW_BINDING_TABLE_EDIT_TARGET_ALL       3
+#define BRW_BINDING_TABLE_EDIT_TARGET_CORE1     2
+#define BRW_BINDING_TABLE_EDIT_TARGET_CORE0     1
+/* In HSW, when editing binding table entries to surface state offsets,
+ * the surface state offset is a 16-bit value aligned to 32 bytes. But
+ * Surface State Pointer in dword 2 is [15:0]. Right shift surf_offset
+ * by 5 bits so it won't disturb bit 16 (which is used as the binding
+ * table index entry), otherwise it would hang the GPU.
+ */
+#define HSW_SURFACE_STATE_EDIT(value)           (value >> 5)
+/* Same as Haswell, but surface state offsets now aligned to 64 bytes.*/
+#define GEN8_SURFACE_STATE_EDIT(value)          (value >> 6)
+
 #define _3DSTATE_SAMPLER_STATE_POINTERS		0x7802 /* GEN6+ */
 # define PS_SAMPLER_STATE_CHANGE				(1 << 12)
 # define GS_SAMPLER_STATE_CHANGE				(1 << 9)
@@ -1733,6 +1855,7 @@
 # define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
 # define GEN6_VS_FLOATING_POINT_MODE_IEEE_754		(0 << 16)
 # define GEN6_VS_FLOATING_POINT_MODE_ALT		(1 << 16)
+# define HSW_VS_UAV_ACCESS_ENABLE                       (1 << 12)
 /* DW4 */
 # define GEN6_VS_DISPATCH_START_GRF_SHIFT		20
 # define GEN6_VS_URB_READ_LENGTH_SHIFT			11
@@ -1758,6 +1881,7 @@
 # define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
 # define GEN6_GS_FLOATING_POINT_MODE_IEEE_754		(0 << 16)
 # define GEN6_GS_FLOATING_POINT_MODE_ALT		(1 << 16)
+# define HSW_GS_UAV_ACCESS_ENABLE       		(1 << 12)
 /* DW4 */
 # define GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT		23
 # define GEN7_GS_OUTPUT_TOPOLOGY_SHIFT			17
@@ -1773,9 +1897,8 @@
 # define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID		1
 # define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT		20
 # define GEN7_GS_INSTANCE_CONTROL_SHIFT			15
-# define GEN7_GS_DISPATCH_MODE_SINGLE			(0 << 11)
-# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE		(1 << 11)
-# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT		(2 << 11)
+# define GEN7_GS_DISPATCH_MODE_SHIFT                    11
+# define GEN7_GS_DISPATCH_MODE_MASK                     INTEL_MASK(12, 11)
 # define GEN6_GS_STATISTICS_ENABLE			(1 << 10)
 # define GEN6_GS_SO_STATISTICS_ENABLE			(1 << 9)
 # define GEN6_GS_RENDERING_ENABLE			(1 << 8)
@@ -2261,6 +2384,9 @@
 # define GEN7_WM_KILL_ENABLE				(1 << 25)
 # define GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT              23
 # define GEN7_WM_USES_SOURCE_DEPTH			(1 << 20)
+# define GEN7_WM_EARLY_DS_CONTROL_NORMAL                (0 << 21)
+# define GEN7_WM_EARLY_DS_CONTROL_PSEXEC                (1 << 21)
+# define GEN7_WM_EARLY_DS_CONTROL_PREPS                 (2 << 21)
 # define GEN7_WM_USES_SOURCE_W			        (1 << 19)
 # define GEN7_WM_POSITION_ZW_PIXEL			(0 << 17)
 # define GEN7_WM_POSITION_ZW_CENTROID			(2 << 17)
@@ -2285,6 +2411,7 @@
 /* DW2 */
 # define GEN7_WM_MSDISPMODE_PERSAMPLE			(0 << 31)
 # define GEN7_WM_MSDISPMODE_PERPIXEL			(1 << 31)
+# define HSW_WM_UAV_ONLY                                (1 << 30)
 
 #define _3DSTATE_PS				0x7820 /* GEN7+ */
 /* DW1: kernel pointer */
@@ -2308,6 +2435,7 @@
 # define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE	(1 << 8)
 # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE		(1 << 7)
 # define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE		(1 << 6)
+# define HSW_PS_UAV_ACCESS_ENABLE			(1 << 5)
 # define GEN7_PS_POSOFFSET_NONE				(0 << 3)
 # define GEN7_PS_POSOFFSET_CENTROID			(2 << 3)
 # define GEN7_PS_POSOFFSET_SAMPLE			(3 << 3)
@@ -2498,6 +2626,11 @@
 # define MEDIA_VFE_STATE_CURBE_ALLOC_MASK       INTEL_MASK(15, 0)
 
 #define MEDIA_INTERFACE_DESCRIPTOR_LOAD         0x7002
+/* GEN7 DW5, GEN8+ DW6 */
+# define MEDIA_GPGPU_THREAD_COUNT_SHIFT         0
+# define MEDIA_GPGPU_THREAD_COUNT_MASK          INTEL_MASK(7, 0)
+# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT    0
+# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK     INTEL_MASK(9, 0)
 #define MEDIA_STATE_FLUSH                       0x7004
 #define GPGPU_WALKER                            0x7105
 /* GEN8+ DW2 */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_device_info.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_device_info.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_device_info.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_device_info.c	2015-09-16 14:36:09.000000000 +0000
@@ -170,7 +170,8 @@
 #define HSW_FEATURES             \
    GEN7_FEATURES,                \
    .is_haswell = true,           \
-   .supports_simd16_3src = true
+   .supports_simd16_3src = true, \
+   .has_resource_streamer = true
 
 static const struct brw_device_info brw_device_info_hsw_gt1 = {
    HSW_FEATURES, .gt = 1,
@@ -229,6 +230,7 @@
 #define GEN8_FEATURES                               \
    .gen = 8,                                        \
    .has_hiz_and_separate_stencil = true,            \
+   .has_resource_streamer = true,                   \
    .must_use_separate_stencil = true,               \
    .has_llc = true,                                 \
    .has_pln = true,                                 \
@@ -297,41 +299,62 @@
    }
 };
 
-/* Thread counts and URB limits are placeholders, and may not be accurate. */
 #define GEN9_FEATURES                               \
    .gen = 9,                                        \
    .has_hiz_and_separate_stencil = true,            \
+   .has_resource_streamer = true,                   \
    .must_use_separate_stencil = true,               \
    .has_llc = true,                                 \
    .has_pln = true,                                 \
+   .supports_simd16_3src = true,                    \
    .max_vs_threads = 336,                           \
    .max_gs_threads = 336,                           \
+   .max_hs_threads = 336,                           \
+   .max_ds_threads = 336,                           \
    .max_wm_threads = 64 * 6,                        \
+   .max_cs_threads = 56,                            \
    .urb = {                                         \
       .size = 192,                                  \
       .min_vs_entries = 64,                         \
       .max_vs_entries = 1856,                       \
+      .max_hs_entries = 672,                        \
+      .max_ds_entries = 1120,                       \
       .max_gs_entries = 640,                        \
    }
 
-static const struct brw_device_info brw_device_info_skl_early = {
-   GEN9_FEATURES, .gt = 1,
-   .supports_simd16_3src = false,
-};
-
 static const struct brw_device_info brw_device_info_skl_gt1 = {
    GEN9_FEATURES, .gt = 1,
-   .supports_simd16_3src = true,
 };
 
 static const struct brw_device_info brw_device_info_skl_gt2 = {
    GEN9_FEATURES, .gt = 2,
-   .supports_simd16_3src = true,
 };
 
 static const struct brw_device_info brw_device_info_skl_gt3 = {
    GEN9_FEATURES, .gt = 3,
-   .supports_simd16_3src = true,
+};
+
+static const struct brw_device_info brw_device_info_bxt = {
+   GEN9_FEATURES,
+   .is_broxton = 1,
+   .gt = 1,
+   .has_llc = false,
+
+   /* XXX: These are preliminary thread counts and URB sizes. */
+   .max_vs_threads = 56,
+   .max_hs_threads = 56,
+   .max_ds_threads = 56,
+   .max_gs_threads = 56,
+   .max_wm_threads = 32,
+   .max_cs_threads = 28,
+   .urb = {
+      .size = 64,
+      .min_vs_entries = 34,
+      .max_vs_entries = 640,
+      .max_hs_entries = 80,
+      .max_ds_entries = 80,
+      .max_gs_entries = 256,
+   }
 };
 
 const struct brw_device_info *
@@ -348,8 +371,5 @@
       return NULL;
    }
 
-   if (devinfo->gen == 9 && (revision == 2 || revision == 3 || revision == -1))
-      return &brw_device_info_skl_early;
-
    return devinfo;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_device_info.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_device_info.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_device_info.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_device_info.h	2015-09-16 14:36:09.000000000 +0000
@@ -35,6 +35,7 @@
    bool is_baytrail;
    bool is_haswell;
    bool is_cherryview;
+   bool is_broxton;
 
    bool has_hiz_and_separate_stencil;
    bool must_use_separate_stencil;
@@ -45,6 +46,7 @@
    bool has_compr4;
    bool has_surface_tile_offset;
    bool supports_simd16_3src;
+   bool has_resource_streamer;
 
    /**
     * Quirks:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_disasm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_disasm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_disasm.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_disasm.c	2015-09-16 14:36:09.000000000 +0000
@@ -402,6 +402,16 @@
    [HSW_SFID_CRE]                      = "cre",
 };
 
+static const char *const gen7_gateway_subfuncid[8] = {
+   [BRW_MESSAGE_GATEWAY_SFID_OPEN_GATEWAY] = "open",
+   [BRW_MESSAGE_GATEWAY_SFID_CLOSE_GATEWAY] = "close",
+   [BRW_MESSAGE_GATEWAY_SFID_FORWARD_MSG] = "forward msg",
+   [BRW_MESSAGE_GATEWAY_SFID_GET_TIMESTAMP] = "get timestamp",
+   [BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG] = "barrier msg",
+   [BRW_MESSAGE_GATEWAY_SFID_UPDATE_GATEWAY_STATE] = "update state",
+   [BRW_MESSAGE_GATEWAY_SFID_MMIO_READ_WRITE] = "mmio read/write",
+};
+
 static const char *const dp_write_port_msg_type[8] = {
    [0b000] = "OWord block write",
    [0b001] = "OWord dual block write",
@@ -977,13 +987,14 @@
               brw_inst_3src_src0_reg_nr(devinfo, inst));
    if (err == -1)
       return 0;
-   if (src0_subreg_nr)
+   if (src0_subreg_nr || brw_inst_3src_src0_rep_ctrl(devinfo, inst))
       format(file, ".%d", src0_subreg_nr);
    if (brw_inst_3src_src0_rep_ctrl(devinfo, inst))
       string(file, "<0,1,0>");
-   else
+   else {
       string(file, "<4,4,1>");
-   err |= src_swizzle(file, brw_inst_3src_src0_swizzle(devinfo, inst));
+      err |= src_swizzle(file, brw_inst_3src_src0_swizzle(devinfo, inst));
+   }
    err |= control(file, "src da16 reg type", three_source_reg_encoding,
                   brw_inst_3src_src_type(devinfo, inst), NULL);
    return err;
@@ -1003,13 +1014,14 @@
               brw_inst_3src_src1_reg_nr(devinfo, inst));
    if (err == -1)
       return 0;
-   if (src1_subreg_nr)
+   if (src1_subreg_nr || brw_inst_3src_src1_rep_ctrl(devinfo, inst))
       format(file, ".%d", src1_subreg_nr);
    if (brw_inst_3src_src1_rep_ctrl(devinfo, inst))
       string(file, "<0,1,0>");
-   else
+   else {
       string(file, "<4,4,1>");
-   err |= src_swizzle(file, brw_inst_3src_src1_swizzle(devinfo, inst));
+      err |= src_swizzle(file, brw_inst_3src_src1_swizzle(devinfo, inst));
+   }
    err |= control(file, "src da16 reg type", three_source_reg_encoding,
                   brw_inst_3src_src_type(devinfo, inst), NULL);
    return err;
@@ -1030,13 +1042,14 @@
               brw_inst_3src_src2_reg_nr(devinfo, inst));
    if (err == -1)
       return 0;
-   if (src2_subreg_nr)
+   if (src2_subreg_nr || brw_inst_3src_src2_rep_ctrl(devinfo, inst))
       format(file, ".%d", src2_subreg_nr);
    if (brw_inst_3src_src2_rep_ctrl(devinfo, inst))
       string(file, "<0,1,0>");
-   else
+   else {
       string(file, "<4,4,1>");
-   err |= src_swizzle(file, brw_inst_3src_src2_swizzle(devinfo, inst));
+      err |= src_swizzle(file, brw_inst_3src_src2_swizzle(devinfo, inst));
+   }
    err |= control(file, "src da16 reg type", three_source_reg_encoding,
                   brw_inst_3src_src_type(devinfo, inst), NULL);
    return err;
@@ -1495,6 +1508,12 @@
             break;
          case BRW_SFID_THREAD_SPAWNER:
             break;
+
+         case BRW_SFID_MESSAGE_GATEWAY:
+            format(file, " (%s)",
+                   gen7_gateway_subfuncid[brw_inst_gateway_subfuncid(devinfo, inst)]);
+            break;
+
          case GEN7_SFID_DATAPORT_DATA_CACHE:
             if (devinfo->gen >= 7) {
                format(file, " (");
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_draw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_draw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_draw.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_draw.c	2015-09-16 14:36:09.000000000 +0000
@@ -92,8 +92,10 @@
 {
    if (mode >= BRW_PRIM_OFFSET)
       return mode - BRW_PRIM_OFFSET;
-   else
+   else {
+      assert(mode < ARRAY_SIZE(prim_to_hw_prim));
       return prim_to_hw_prim[mode];
+   }
 }
 
 
@@ -102,13 +104,13 @@
  * programs be immune to the active primitive (ie. cope with all
  * possibilities).  That may not be realistic however.
  */
-static void brw_set_prim(struct brw_context *brw,
-                         const struct _mesa_prim *prim)
+static void
+brw_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
 {
    struct gl_context *ctx = &brw->ctx;
    uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode);
 
-   DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
+   DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
 
    /* Slight optimization to avoid the GS program when not needed:
     */
@@ -136,15 +138,12 @@
    }
 }
 
-static void gen6_set_prim(struct brw_context *brw,
-                          const struct _mesa_prim *prim)
+static void
+gen6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
 {
-   uint32_t hw_prim;
-
-   DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
-
-   hw_prim = get_hw_prim_for_gl_prim(prim->mode);
+   DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
 
+   const uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode);
    if (hw_prim != brw->primitive) {
       brw->primitive = hw_prim;
       brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE;
@@ -160,7 +159,8 @@
  * quads so that those dangling vertices won't get drawn when we convert to
  * trifans/tristrips.
  */
-static GLuint trim(GLenum prim, GLuint length)
+static GLuint
+trim(GLenum prim, GLuint length)
 {
    if (prim == GL_QUAD_STRIP)
       return length > 3 ? (length - length % 2) : 0;
@@ -171,16 +171,16 @@
 }
 
 
-static void brw_emit_prim(struct brw_context *brw,
-			  const struct _mesa_prim *prim,
-			  uint32_t hw_prim)
+static void
+brw_emit_prim(struct brw_context *brw,
+              const struct _mesa_prim *prim,
+              uint32_t hw_prim)
 {
    int verts_per_instance;
    int vertex_access_type;
    int indirect_flag;
-   int predicate_enable;
 
-   DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
+   DBG("PRIM: %s %d %d\n", _mesa_enum_to_string(prim->mode),
        prim->start, prim->count);
 
    int start_vertex_location = prim->start;
@@ -214,9 +214,8 @@
     * and missed flushes of the render cache as it heads to other parts of
     * the besides the draw code.
     */
-   if (brw->always_flush_cache) {
-      intel_batchbuffer_emit_mi_flush(brw);
-   }
+   if (brw->always_flush_cache)
+      brw_emit_mi_flush(brw);
 
    /* If indirect, emit a bunch of loads from the indirect BO. */
    if (prim->is_indirect) {
@@ -254,22 +253,20 @@
          OUT_BATCH(0);
          ADVANCE_BATCH();
       }
-   }
-   else {
+   } else {
       indirect_flag = 0;
    }
 
+   BEGIN_BATCH(brw->gen >= 7 ? 7 : 6);
+
    if (brw->gen >= 7) {
-      if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
-         predicate_enable = GEN7_3DPRIM_PREDICATE_ENABLE;
-      else
-         predicate_enable = 0;
+      const int predicate_enable =
+         (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
+         ? GEN7_3DPRIM_PREDICATE_ENABLE : 0;
 
-      BEGIN_BATCH(7);
       OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable);
       OUT_BATCH(hw_prim | vertex_access_type);
    } else {
-      BEGIN_BATCH(6);
       OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
                 hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
                 vertex_access_type);
@@ -281,14 +278,14 @@
    OUT_BATCH(base_vertex_location);
    ADVANCE_BATCH();
 
-   if (brw->always_flush_cache) {
-      intel_batchbuffer_emit_mi_flush(brw);
-   }
+   if (brw->always_flush_cache)
+      brw_emit_mi_flush(brw);
 }
 
 
-static void brw_merge_inputs( struct brw_context *brw,
-		       const struct gl_client_array *arrays[])
+static void
+brw_merge_inputs(struct brw_context *brw,
+                 const struct gl_client_array *arrays[])
 {
    const struct gl_context *ctx = &brw->ctx;
    GLuint i;
@@ -357,7 +354,8 @@
  * Also mark any render targets which will be textured as needing a render
  * cache flush.
  */
-static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
+static void
+brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
    struct gl_framebuffer *fb = ctx->DrawBuffer;
@@ -385,7 +383,7 @@
       brw_render_cache_set_add_bo(brw, stencil_irb->mt->bo);
    }
 
-   for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
+   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
       struct intel_renderbuffer *irb =
          intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 
@@ -397,21 +395,22 @@
 /* May fail if out of video memory for texture or vbo upload, or on
  * fallback conditions.
  */
-static void brw_try_draw_prims( struct gl_context *ctx,
-				     const struct gl_client_array *arrays[],
-				     const struct _mesa_prim *prims,
-				     GLuint nr_prims,
-				     const struct _mesa_index_buffer *ib,
-				     GLuint min_index,
-				     GLuint max_index,
-				     struct gl_buffer_object *indirect)
+static void
+brw_try_draw_prims(struct gl_context *ctx,
+                   const struct gl_client_array *arrays[],
+                   const struct _mesa_prim *prims,
+                   GLuint nr_prims,
+                   const struct _mesa_index_buffer *ib,
+                   GLuint min_index,
+                   GLuint max_index,
+                   struct gl_buffer_object *indirect)
 {
    struct brw_context *brw = brw_context(ctx);
    GLuint i;
    bool fail_next = false;
 
    if (ctx->NewState)
-      _mesa_update_state( ctx );
+      _mesa_update_state(ctx);
 
    /* Find the highest sampler unit used by each shader program.  A bit-count
     * won't work since ARB programs use the texture unit number as the sampler
@@ -431,7 +430,7 @@
     * software fallback will segfault if it attempts to access any
     * texture level other than level 0.
     */
-   brw_validate_textures( brw );
+   brw_validate_textures(brw);
 
    intel_prepare_render(brw);
 
@@ -443,7 +442,7 @@
 
    /* Bind all inputs, derive varying and size information:
     */
-   brw_merge_inputs( brw, arrays );
+   brw_merge_inputs(brw, arrays);
 
    brw->ib.ib = ib;
    brw->ctx.NewDriverState |= BRW_NEW_INDICES;
@@ -551,15 +550,17 @@
    return;
 }
 
-void brw_draw_prims( struct gl_context *ctx,
-		     const struct _mesa_prim *prims,
-		     GLuint nr_prims,
-		     const struct _mesa_index_buffer *ib,
-		     GLboolean index_bounds_valid,
-		     GLuint min_index,
-		     GLuint max_index,
-		     struct gl_transform_feedback_object *unused_tfb_object,
-		     struct gl_buffer_object *indirect )
+void
+brw_draw_prims(struct gl_context *ctx,
+               const struct _mesa_prim *prims,
+               GLuint nr_prims,
+               const struct _mesa_index_buffer *ib,
+               GLboolean index_bounds_valid,
+               GLuint min_index,
+               GLuint max_index,
+               struct gl_transform_feedback_object *unused_tfb_object,
+               unsigned stream,
+               struct gl_buffer_object *indirect)
 {
    struct brw_context *brw = brw_context(ctx);
    const struct gl_client_array **arrays = ctx->Array._DrawArrays;
@@ -580,11 +581,11 @@
     */
    if (ctx->RenderMode != GL_RENDER) {
       perf_debug("%s render mode not supported in hardware\n",
-                 _mesa_lookup_enum_by_nr(ctx->RenderMode));
+                 _mesa_enum_to_string(ctx->RenderMode));
       _swsetup_Wakeup(ctx);
       _tnl_wakeup(ctx);
       _tnl_draw_prims(ctx, prims, nr_prims, ib,
-                      index_bounds_valid, min_index, max_index, NULL, NULL);
+                      index_bounds_valid, min_index, max_index, NULL, 0, NULL);
       return;
    }
 
@@ -602,28 +603,30 @@
     * manage it.  swrast doesn't support our featureset, so we can't fall back
     * to it.
     */
-   brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index, indirect);
+   brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index,
+                      indirect);
 }
 
-void brw_draw_init( struct brw_context *brw )
+void
+brw_draw_init(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
    struct vbo_context *vbo = vbo_context(ctx);
-   int i;
 
    /* Register our drawing function:
     */
    vbo->draw_prims = brw_draw_prims;
 
-   for (i = 0; i < VERT_ATTRIB_MAX; i++)
+   for (int i = 0; i < VERT_ATTRIB_MAX; i++)
       brw->vb.inputs[i].buffer = -1;
    brw->vb.nr_buffers = 0;
    brw->vb.nr_enabled = 0;
 }
 
-void brw_draw_destroy( struct brw_context *brw )
+void
+brw_draw_destroy(struct brw_context *brw)
 {
-   int i;
+   unsigned i;
 
    for (i = 0; i < brw->vb.nr_buffers; i++) {
       drm_intel_bo_unreference(brw->vb.buffers[i].bo);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_draw.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_draw.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_draw.h	2014-09-25 15:15:35.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_draw.h	2015-09-16 14:36:09.000000000 +0000
@@ -34,7 +34,7 @@
 struct brw_context;
 
 
-void brw_draw_prims( struct gl_context *ctx,
+void brw_draw_prims(struct gl_context *ctx,
 		     const struct _mesa_prim *prims,
 		     GLuint nr_prims,
 		     const struct _mesa_index_buffer *ib,
@@ -42,6 +42,7 @@
 		     GLuint min_index,
 		     GLuint max_index,
 		     struct gl_transform_feedback_object *unused_tfb_object,
+                     unsigned stream,
 		     struct gl_buffer_object *indirect );
 
 void brw_draw_init( struct brw_context *brw );
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_draw_upload.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_draw_upload.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_draw_upload.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_draw_upload.c	2015-09-16 14:36:09.000000000 +0000
@@ -40,7 +40,7 @@
 #include "intel_batchbuffer.h"
 #include "intel_buffer_objects.h"
 
-static GLuint double_types[5] = {
+static const GLuint double_types[5] = {
    0,
    BRW_SURFACEFORMAT_R64_FLOAT,
    BRW_SURFACEFORMAT_R64G64_FLOAT,
@@ -48,7 +48,7 @@
    BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
 };
 
-static GLuint float_types[5] = {
+static const GLuint float_types[5] = {
    0,
    BRW_SURFACEFORMAT_R32_FLOAT,
    BRW_SURFACEFORMAT_R32G32_FLOAT,
@@ -56,7 +56,7 @@
    BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
 };
 
-static GLuint half_float_types[5] = {
+static const GLuint half_float_types[5] = {
    0,
    BRW_SURFACEFORMAT_R16_FLOAT,
    BRW_SURFACEFORMAT_R16G16_FLOAT,
@@ -64,7 +64,7 @@
    BRW_SURFACEFORMAT_R16G16B16A16_FLOAT
 };
 
-static GLuint fixed_point_types[5] = {
+static const GLuint fixed_point_types[5] = {
    0,
    BRW_SURFACEFORMAT_R32_SFIXED,
    BRW_SURFACEFORMAT_R32G32_SFIXED,
@@ -72,7 +72,7 @@
    BRW_SURFACEFORMAT_R32G32B32A32_SFIXED,
 };
 
-static GLuint uint_types_direct[5] = {
+static const GLuint uint_types_direct[5] = {
    0,
    BRW_SURFACEFORMAT_R32_UINT,
    BRW_SURFACEFORMAT_R32G32_UINT,
@@ -80,7 +80,7 @@
    BRW_SURFACEFORMAT_R32G32B32A32_UINT
 };
 
-static GLuint uint_types_norm[5] = {
+static const GLuint uint_types_norm[5] = {
    0,
    BRW_SURFACEFORMAT_R32_UNORM,
    BRW_SURFACEFORMAT_R32G32_UNORM,
@@ -88,7 +88,7 @@
    BRW_SURFACEFORMAT_R32G32B32A32_UNORM
 };
 
-static GLuint uint_types_scale[5] = {
+static const GLuint uint_types_scale[5] = {
    0,
    BRW_SURFACEFORMAT_R32_USCALED,
    BRW_SURFACEFORMAT_R32G32_USCALED,
@@ -96,7 +96,7 @@
    BRW_SURFACEFORMAT_R32G32B32A32_USCALED
 };
 
-static GLuint int_types_direct[5] = {
+static const GLuint int_types_direct[5] = {
    0,
    BRW_SURFACEFORMAT_R32_SINT,
    BRW_SURFACEFORMAT_R32G32_SINT,
@@ -104,7 +104,7 @@
    BRW_SURFACEFORMAT_R32G32B32A32_SINT
 };
 
-static GLuint int_types_norm[5] = {
+static const GLuint int_types_norm[5] = {
    0,
    BRW_SURFACEFORMAT_R32_SNORM,
    BRW_SURFACEFORMAT_R32G32_SNORM,
@@ -112,7 +112,7 @@
    BRW_SURFACEFORMAT_R32G32B32A32_SNORM
 };
 
-static GLuint int_types_scale[5] = {
+static const GLuint int_types_scale[5] = {
    0,
    BRW_SURFACEFORMAT_R32_SSCALED,
    BRW_SURFACEFORMAT_R32G32_SSCALED,
@@ -120,7 +120,7 @@
    BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
 };
 
-static GLuint ushort_types_direct[5] = {
+static const GLuint ushort_types_direct[5] = {
    0,
    BRW_SURFACEFORMAT_R16_UINT,
    BRW_SURFACEFORMAT_R16G16_UINT,
@@ -128,7 +128,7 @@
    BRW_SURFACEFORMAT_R16G16B16A16_UINT
 };
 
-static GLuint ushort_types_norm[5] = {
+static const GLuint ushort_types_norm[5] = {
    0,
    BRW_SURFACEFORMAT_R16_UNORM,
    BRW_SURFACEFORMAT_R16G16_UNORM,
@@ -136,7 +136,7 @@
    BRW_SURFACEFORMAT_R16G16B16A16_UNORM
 };
 
-static GLuint ushort_types_scale[5] = {
+static const GLuint ushort_types_scale[5] = {
    0,
    BRW_SURFACEFORMAT_R16_USCALED,
    BRW_SURFACEFORMAT_R16G16_USCALED,
@@ -144,7 +144,7 @@
    BRW_SURFACEFORMAT_R16G16B16A16_USCALED
 };
 
-static GLuint short_types_direct[5] = {
+static const GLuint short_types_direct[5] = {
    0,
    BRW_SURFACEFORMAT_R16_SINT,
    BRW_SURFACEFORMAT_R16G16_SINT,
@@ -152,7 +152,7 @@
    BRW_SURFACEFORMAT_R16G16B16A16_SINT
 };
 
-static GLuint short_types_norm[5] = {
+static const GLuint short_types_norm[5] = {
    0,
    BRW_SURFACEFORMAT_R16_SNORM,
    BRW_SURFACEFORMAT_R16G16_SNORM,
@@ -160,7 +160,7 @@
    BRW_SURFACEFORMAT_R16G16B16A16_SNORM
 };
 
-static GLuint short_types_scale[5] = {
+static const GLuint short_types_scale[5] = {
    0,
    BRW_SURFACEFORMAT_R16_SSCALED,
    BRW_SURFACEFORMAT_R16G16_SSCALED,
@@ -168,7 +168,7 @@
    BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
 };
 
-static GLuint ubyte_types_direct[5] = {
+static const GLuint ubyte_types_direct[5] = {
    0,
    BRW_SURFACEFORMAT_R8_UINT,
    BRW_SURFACEFORMAT_R8G8_UINT,
@@ -176,7 +176,7 @@
    BRW_SURFACEFORMAT_R8G8B8A8_UINT
 };
 
-static GLuint ubyte_types_norm[5] = {
+static const GLuint ubyte_types_norm[5] = {
    0,
    BRW_SURFACEFORMAT_R8_UNORM,
    BRW_SURFACEFORMAT_R8G8_UNORM,
@@ -184,7 +184,7 @@
    BRW_SURFACEFORMAT_R8G8B8A8_UNORM
 };
 
-static GLuint ubyte_types_scale[5] = {
+static const GLuint ubyte_types_scale[5] = {
    0,
    BRW_SURFACEFORMAT_R8_USCALED,
    BRW_SURFACEFORMAT_R8G8_USCALED,
@@ -192,7 +192,7 @@
    BRW_SURFACEFORMAT_R8G8B8A8_USCALED
 };
 
-static GLuint byte_types_direct[5] = {
+static const GLuint byte_types_direct[5] = {
    0,
    BRW_SURFACEFORMAT_R8_SINT,
    BRW_SURFACEFORMAT_R8G8_SINT,
@@ -200,7 +200,7 @@
    BRW_SURFACEFORMAT_R8G8B8A8_SINT
 };
 
-static GLuint byte_types_norm[5] = {
+static const GLuint byte_types_norm[5] = {
    0,
    BRW_SURFACEFORMAT_R8_SNORM,
    BRW_SURFACEFORMAT_R8G8_SNORM,
@@ -208,7 +208,7 @@
    BRW_SURFACEFORMAT_R8G8B8A8_SNORM
 };
 
-static GLuint byte_types_scale[5] = {
+static const GLuint byte_types_scale[5] = {
    0,
    BRW_SURFACEFORMAT_R8_SSCALED,
    BRW_SURFACEFORMAT_R8G8_SSCALED,
@@ -230,7 +230,7 @@
 
    if (unlikely(INTEL_DEBUG & DEBUG_VERTS))
       fprintf(stderr, "type %s size %d normalized %d\n",
-              _mesa_lookup_enum_by_nr(glarray->Type),
+              _mesa_enum_to_string(glarray->Type),
               glarray->Size, glarray->Normalized);
 
    if (glarray->Integer) {
@@ -395,7 +395,8 @@
    GLuint interleaved = 0;
    unsigned int min_index = brw->vb.min_index + brw->basevertex;
    unsigned int max_index = brw->vb.max_index + brw->basevertex;
-   int delta, i, j;
+   unsigned i;
+   int delta, j;
 
    struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
    GLuint nr_uploads = 0;
@@ -418,10 +419,10 @@
    /* Accumulate the list of enabled arrays. */
    brw->vb.nr_enabled = 0;
    while (vs_inputs) {
-      GLuint i = ffsll(vs_inputs) - 1;
-      struct brw_vertex_element *input = &brw->vb.inputs[i];
+      GLuint index = ffsll(vs_inputs) - 1;
+      struct brw_vertex_element *input = &brw->vb.inputs[index];
 
-      vs_inputs &= ~BITFIELD64_BIT(i);
+      vs_inputs &= ~BITFIELD64_BIT(index);
       brw->vb.enabled[brw->vb.nr_enabled++] = input;
    }
 
@@ -438,7 +439,7 @@
       if (_mesa_is_bufferobj(glarray->BufferObj)) {
 	 struct intel_buffer_object *intel_buffer =
 	    intel_buffer_object(glarray->BufferObj);
-	 int k;
+	 unsigned k;
 
 	 /* If we have a VB set to be uploaded for this buffer object
 	  * already, reuse that VB state so that we emit fewer
@@ -604,14 +605,15 @@
 /**
  * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS).
  */
-static void
+static uint32_t *
 emit_vertex_buffer_state(struct brw_context *brw,
                          unsigned buffer_nr,
                          drm_intel_bo *bo,
                          unsigned bo_ending_address,
                          unsigned bo_offset,
                          unsigned stride,
-                         unsigned step_rate)
+                         unsigned step_rate,
+                         uint32_t *__map)
 {
    struct gl_context *ctx = &brw->ctx;
    uint32_t dw0;
@@ -643,9 +645,13 @@
       OUT_BATCH(0);
    }
    OUT_BATCH(step_rate);
+
+   return __map;
 }
+#define EMIT_VERTEX_BUFFER_STATE(...) __map = emit_vertex_buffer_state(__VA_ARGS__, __map)
 
-static void brw_emit_vertices(struct brw_context *brw)
+static void
+brw_emit_vertices(struct brw_context *brw)
 {
    GLuint i;
 
@@ -704,14 +710,14 @@
       OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
       for (i = 0; i < brw->vb.nr_buffers; i++) {
 	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
-         emit_vertex_buffer_state(brw, i, buffer->bo, buffer->bo->size - 1,
+         EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->bo->size - 1,
                                   buffer->offset, buffer->stride,
                                   buffer->step_rate);
 
       }
 
       if (brw->vs.prog_data->uses_vertexid) {
-         emit_vertex_buffer_state(brw, brw->vb.nr_buffers,
+         EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers,
                                   brw->draw.draw_params_bo,
                                   brw->draw.draw_params_bo->size - 1,
                                   brw->draw.draw_params_offset,
@@ -787,21 +793,6 @@
                     ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
    }
 
-   if (brw->gen >= 6 && gen6_edgeflag_input) {
-      uint32_t format =
-         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
-
-      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
-                GEN6_VE0_VALID |
-                GEN6_VE0_EDGE_FLAG_ENABLE |
-                (format << BRW_VE0_FORMAT_SHIFT) |
-                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
-      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
-   }
-
    if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
       uint32_t dw0 = 0, dw1 = 0;
       uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
@@ -842,6 +833,21 @@
       OUT_BATCH(dw1);
    }
 
+   if (brw->gen >= 6 && gen6_edgeflag_input) {
+      uint32_t format =
+         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
+
+      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
+                GEN6_VE0_VALID |
+                GEN6_VE0_EDGE_FLAG_ENABLE |
+                (format << BRW_VE0_FORMAT_SHIFT) |
+                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
+      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+   }
+
    ADVANCE_BATCH();
 }
 
@@ -855,7 +861,8 @@
    .emit = brw_emit_vertices,
 };
 
-static void brw_upload_indices(struct brw_context *brw)
+static void
+brw_upload_indices(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
    const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
@@ -935,7 +942,8 @@
    .emit = brw_upload_indices,
 };
 
-static void brw_emit_index_buffer(struct brw_context *brw)
+static void
+brw_emit_index_buffer(struct brw_context *brw)
 {
    const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
    GLuint cut_index_setting;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_eu_emit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_eu_emit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_eu_emit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_eu_emit.c	2015-09-16 14:36:09.000000000 +0000
@@ -914,6 +914,8 @@
          brw_inst_set_3src_src_type(devinfo, inst, BRW_3SRC_TYPE_UD);
          brw_inst_set_3src_dst_type(devinfo, inst, BRW_3SRC_TYPE_UD);
          break;
+      default:
+         unreachable("not reached");
       }
    }
 
@@ -1582,8 +1584,8 @@
    }
 
    if (devinfo->gen < 6) {
-      brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
-      brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
       brw_set_src1(p, insn, brw_imm_d(0x0));
    } else if (devinfo->gen == 6) {
       brw_set_dest(p, insn, brw_imm_w(0));
@@ -3404,3 +3406,54 @@
 
    brw_pop_insn_state(p);
 }
+
+
+/**
+ * Emit the SEND message for a barrier
+ */
+void
+brw_barrier(struct brw_codegen *p, struct brw_reg src)
+{
+   const struct brw_device_info *devinfo = p->devinfo;
+   struct brw_inst *inst;
+
+   assert(devinfo->gen >= 7);
+
+   inst = next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, inst, brw_null_reg());
+   brw_set_src0(p, inst, src);
+   brw_set_src1(p, inst, brw_null_reg());
+
+   brw_set_message_descriptor(p, inst, BRW_SFID_MESSAGE_GATEWAY,
+                              1 /* msg_length */,
+                              0 /* response_length */,
+                              false /* header_present */,
+                              false /* end_of_thread */);
+
+   brw_inst_set_gateway_notify(devinfo, inst, 1);
+   brw_inst_set_gateway_subfuncid(devinfo, inst,
+                                  BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
+
+   brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
+}
+
+
+/**
+ * Emit the wait instruction for a barrier
+ */
+void
+brw_WAIT(struct brw_codegen *p)
+{
+   const struct brw_device_info *devinfo = p->devinfo;
+   struct brw_inst *insn;
+
+   struct brw_reg src = brw_notification_reg();
+
+   insn = next_insn(p, BRW_OPCODE_WAIT);
+   brw_set_dest(p, insn, src);
+   brw_set_src0(p, insn, src);
+   brw_set_src1(p, insn, brw_null_reg());
+
+   brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
+   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_eu.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_eu.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_eu.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_eu.h	2015-09-16 14:36:09.000000000 +0000
@@ -361,6 +361,8 @@
    return 1;
 }
 
+void brw_barrier(struct brw_codegen *p, struct brw_reg src);
+
 /* If/else/endif.  Works by manipulating the execution flags on each
  * channel.
  */
@@ -390,6 +392,8 @@
 
 void brw_NOP(struct brw_codegen *p);
 
+void brw_WAIT(struct brw_codegen *p);
+
 /* Special case: there is never a destination, execution size will be
  * taken from src0:
  */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_builder.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_builder.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_builder.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_builder.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,678 @@
+/* -*- c++ -*- */
+/*
+ * Copyright © 2010-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_FS_BUILDER_H
+#define BRW_FS_BUILDER_H
+
+#include "brw_ir_fs.h"
+#include "brw_shader.h"
+#include "brw_context.h"
+
+namespace brw {
+   /**
+    * Toolbox to assemble an FS IR program out of individual instructions.
+    *
+    * This object is meant to have an interface consistent with
+    * brw::vec4_builder.  They cannot be fully interchangeable because
+    * brw::fs_builder generates scalar code while brw::vec4_builder generates
+    * vector code.
+    */
+   class fs_builder {
+   public:
+      /** Type used in this IR to represent a source of an instruction. */
+      typedef fs_reg src_reg;
+
+      /** Type used in this IR to represent the destination of an instruction. */
+      typedef fs_reg dst_reg;
+
+      /** Type used in this IR to represent an instruction. */
+      typedef fs_inst instruction;
+
+      /**
+       * Construct an fs_builder that inserts instructions into \p shader.
+       * \p dispatch_width gives the native execution width of the program.
+       */
+      fs_builder(backend_shader *shader,
+                 unsigned dispatch_width) :
+         shader(shader), block(NULL), cursor(NULL),
+         _dispatch_width(dispatch_width),
+         _group(0),
+         force_writemask_all(false),
+         annotation()
+      {
+      }
+
+      /**
+       * Construct an fs_builder that inserts instructions into \p shader
+       * before instruction \p inst in basic block \p block.  The default
+       * execution controls and debug annotation are initialized from the
+       * instruction passed as argument.
+       */
+      fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :
+         shader(shader), block(block), cursor(inst),
+         _dispatch_width(inst->exec_size),
+         _group(inst->force_sechalf ? 8 : 0),
+         force_writemask_all(inst->force_writemask_all)
+      {
+         annotation.str = inst->annotation;
+         annotation.ir = inst->ir;
+      }
+
+      /**
+       * Construct an fs_builder that inserts instructions before \p cursor in
+       * basic block \p block, inheriting other code generation parameters
+       * from this.
+       */
+      fs_builder
+      at(bblock_t *block, exec_node *cursor) const
+      {
+         fs_builder bld = *this;
+         bld.block = block;
+         bld.cursor = cursor;
+         return bld;
+      }
+
+      /**
+       * Construct an fs_builder appending instructions at the end of the
+       * instruction list of the shader, inheriting other code generation
+       * parameters from this.
+       */
+      fs_builder
+      at_end() const
+      {
+         return at(NULL, (exec_node *)&shader->instructions.tail);
+      }
+
+      /**
+       * Construct a builder specifying the default SIMD width and group of
+       * channel enable signals, inheriting other code generation parameters
+       * from this.
+       *
+       * \p n gives the default SIMD width, \p i gives the slot group used for
+       * predication and control flow masking in multiples of \p n channels.
+       */
+      fs_builder
+      group(unsigned n, unsigned i) const
+      {
+         assert(force_writemask_all ||
+                (n <= dispatch_width() && i < dispatch_width() / n));
+         fs_builder bld = *this;
+         bld._dispatch_width = n;
+         bld._group += i * n;
+         return bld;
+      }
+
+      /**
+       * Alias for group() with width equal to eight.
+       */
+      fs_builder
+      half(unsigned i) const
+      {
+         return group(8, i);
+      }
+
+      /**
+       * Construct a builder with per-channel control flow execution masking
+       * disabled if \p b is true.  If control flow execution masking is
+       * already disabled this has no effect.
+       */
+      fs_builder
+      exec_all(bool b = true) const
+      {
+         fs_builder bld = *this;
+         if (b)
+            bld.force_writemask_all = true;
+         return bld;
+      }
+
+      /**
+       * Construct a builder with the given debug annotation info.
+       */
+      fs_builder
+      annotate(const char *str, const void *ir = NULL) const
+      {
+         fs_builder bld = *this;
+         bld.annotation.str = str;
+         bld.annotation.ir = ir;
+         return bld;
+      }
+
+      /**
+       * Get the SIMD width in use.
+       */
+      unsigned
+      dispatch_width() const
+      {
+         return _dispatch_width;
+      }
+
+      /**
+       * Allocate a virtual register of natural vector size (one for this IR)
+       * and SIMD width.  \p n gives the amount of space to allocate in
+       * dispatch_width units (which is just enough space for one logical
+       * component in this IR).
+       */
+      dst_reg
+      vgrf(enum brw_reg_type type, unsigned n = 1) const
+      {
+         assert(dispatch_width() <= 32);
+
+         if (n > 0)
+            return dst_reg(GRF, shader->alloc.allocate(
+                              DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
+                                           REG_SIZE)),
+                           type);
+         else
+            return retype(null_reg_ud(), type);
+      }
+
+      /**
+       * Create a null register of floating type.
+       */
+      dst_reg
+      null_reg_f() const
+      {
+         return dst_reg(retype(brw_null_vec(dispatch_width()),
+                               BRW_REGISTER_TYPE_F));
+      }
+
+      /**
+       * Create a null register of signed integer type.
+       */
+      dst_reg
+      null_reg_d() const
+      {
+         return dst_reg(retype(brw_null_vec(dispatch_width()),
+                               BRW_REGISTER_TYPE_D));
+      }
+
+      /**
+       * Create a null register of unsigned integer type.
+       */
+      dst_reg
+      null_reg_ud() const
+      {
+         return dst_reg(retype(brw_null_vec(dispatch_width()),
+                               BRW_REGISTER_TYPE_UD));
+      }
+
+      /**
+       * Get the mask of SIMD channels enabled by dispatch and not yet
+       * disabled by discard.
+       */
+      src_reg
+      sample_mask_reg() const
+      {
+         const bool uses_kill =
+            (shader->stage == MESA_SHADER_FRAGMENT &&
+             ((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill);
+         return (shader->stage != MESA_SHADER_FRAGMENT ? src_reg(0xffff) :
+                 uses_kill ? brw_flag_reg(0, 1) :
+                 retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD));
+      }
+
+      /**
+       * Insert an instruction into the program.
+       */
+      instruction *
+      emit(const instruction &inst) const
+      {
+         return emit(new(shader->mem_ctx) instruction(inst));
+      }
+
+      /**
+       * Create and insert a nullary control instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode) const
+      {
+         return emit(instruction(opcode, dispatch_width()));
+      }
+
+      /**
+       * Create and insert a nullary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst) const
+      {
+         return emit(instruction(opcode, dispatch_width(), dst));
+      }
+
+      /**
+       * Create and insert a unary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
+      {
+         switch (opcode) {
+         case SHADER_OPCODE_RCP:
+         case SHADER_OPCODE_RSQ:
+         case SHADER_OPCODE_SQRT:
+         case SHADER_OPCODE_EXP2:
+         case SHADER_OPCODE_LOG2:
+         case SHADER_OPCODE_SIN:
+         case SHADER_OPCODE_COS:
+            return fix_math_instruction(
+               emit(instruction(opcode, dispatch_width(), dst,
+                                fix_math_operand(src0))));
+
+         default:
+            return emit(instruction(opcode, dispatch_width(), dst, src0));
+         }
+      }
+
+      /**
+       * Create and insert a binary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+           const src_reg &src1) const
+      {
+         switch (opcode) {
+         case SHADER_OPCODE_POW:
+         case SHADER_OPCODE_INT_QUOTIENT:
+         case SHADER_OPCODE_INT_REMAINDER:
+            return fix_math_instruction(
+               emit(instruction(opcode, dispatch_width(), dst,
+                                fix_math_operand(src0),
+                                fix_math_operand(src1))));
+
+         default:
+            return emit(instruction(opcode, dispatch_width(), dst, src0, src1));
+
+         }
+      }
+
+      /**
+       * Create and insert a ternary instruction into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+           const src_reg &src1, const src_reg &src2) const
+      {
+         switch (opcode) {
+         case BRW_OPCODE_BFE:
+         case BRW_OPCODE_BFI2:
+         case BRW_OPCODE_MAD:
+         case BRW_OPCODE_LRP:
+            return emit(instruction(opcode, dispatch_width(), dst,
+                                    fix_3src_operand(src0),
+                                    fix_3src_operand(src1),
+                                    fix_3src_operand(src2)));
+
+         default:
+            return emit(instruction(opcode, dispatch_width(), dst,
+                                    src0, src1, src2));
+         }
+      }
+
+      /**
+       * Create and insert an instruction with a variable number of sources
+       * into the program.
+       */
+      instruction *
+      emit(enum opcode opcode, const dst_reg &dst, const src_reg srcs[],
+           unsigned n) const
+      {
+         return emit(instruction(opcode, dispatch_width(), dst, srcs, n));
+      }
+
+      /**
+       * Insert a preallocated instruction into the program.
+       */
+      instruction *
+      emit(instruction *inst) const
+      {
+         assert(inst->exec_size <= 32);
+         assert(inst->exec_size == dispatch_width() ||
+                force_writemask_all);
+         assert(_group == 0 || _group == 8);
+
+         inst->force_sechalf = (_group == 8);
+         inst->force_writemask_all = force_writemask_all;
+         inst->annotation = annotation.str;
+         inst->ir = annotation.ir;
+
+         if (block)
+            static_cast<instruction *>(cursor)->insert_before(block, inst);
+         else
+            cursor->insert_before(inst);
+
+         return inst;
+      }
+
+      /**
+       * Select \p src0 if the comparison of both sources with the given
+       * conditional mod evaluates to true, otherwise select \p src1.
+       *
+       * Generally useful to get the minimum or maximum of two values.
+       */
+      void
+      emit_minmax(const dst_reg &dst, const src_reg &src0,
+                  const src_reg &src1, brw_conditional_mod mod) const
+      {
+         if (shader->devinfo->gen >= 6) {
+            set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
+                                 fix_unsigned_negate(src1)));
+         } else {
+            CMP(null_reg_d(), src0, src1, mod);
+            set_predicate(BRW_PREDICATE_NORMAL,
+                          SEL(dst, src0, src1));
+         }
+      }
+
+      /**
+       * Copy any live channel from \p src to the first channel of the result.
+       */
+      src_reg
+      emit_uniformize(const src_reg &src) const
+      {
+         const fs_builder ubld = exec_all();
+         const dst_reg chan_index = component(vgrf(BRW_REGISTER_TYPE_UD), 0);
+         const dst_reg dst = component(vgrf(src.type), 0);
+
+         ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
+         ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index);
+
+         return src_reg(dst);
+      }
+
+      /**
+       * Assorted arithmetic ops.
+       * @{
+       */
+#define ALU1(op)                                        \
+      instruction *                                     \
+      op(const dst_reg &dst, const src_reg &src0) const \
+      {                                                 \
+         return emit(BRW_OPCODE_##op, dst, src0);       \
+      }
+
+#define ALU2(op)                                                        \
+      instruction *                                                     \
+      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
+      {                                                                 \
+         return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
+      }
+
+#define ALU2_ACC(op)                                                    \
+      instruction *                                                     \
+      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
+      {                                                                 \
+         instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
+         inst->writes_accumulator = true;                               \
+         return inst;                                                   \
+      }
+
+#define ALU3(op)                                                        \
+      instruction *                                                     \
+      op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
+         const src_reg &src2) const                                     \
+      {                                                                 \
+         return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
+      }
+
+      ALU2(ADD)
+      ALU2_ACC(ADDC)
+      ALU2(AND)
+      ALU2(ASR)
+      ALU2(AVG)
+      ALU3(BFE)
+      ALU2(BFI1)
+      ALU3(BFI2)
+      ALU1(BFREV)
+      ALU1(CBIT)
+      ALU2(CMPN)
+      ALU3(CSEL)
+      ALU2(DP2)
+      ALU2(DP3)
+      ALU2(DP4)
+      ALU2(DPH)
+      ALU1(F16TO32)
+      ALU1(F32TO16)
+      ALU1(FBH)
+      ALU1(FBL)
+      ALU1(FRC)
+      ALU2(LINE)
+      ALU1(LZD)
+      ALU2(MAC)
+      ALU2_ACC(MACH)
+      ALU3(MAD)
+      ALU1(MOV)
+      ALU2(MUL)
+      ALU1(NOT)
+      ALU2(OR)
+      ALU2(PLN)
+      ALU1(RNDD)
+      ALU1(RNDE)
+      ALU1(RNDU)
+      ALU1(RNDZ)
+      ALU2(SAD2)
+      ALU2_ACC(SADA2)
+      ALU2(SEL)
+      ALU2(SHL)
+      ALU2(SHR)
+      ALU2_ACC(SUBB)
+      ALU2(XOR)
+
+#undef ALU3
+#undef ALU2_ACC
+#undef ALU2
+#undef ALU1
+      /** @} */
+
+      /**
+       * CMP: Sets the low bit of the destination channels with the result
+       * of the comparison, while the upper bits are undefined, and updates
+       * the flag register with the packed 16 bits of the result.
+       */
+      instruction *
+      CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
+          brw_conditional_mod condition) const
+      {
+         /* Take the instruction:
+          *
+          * CMP null<d> src0<f> src1<f>
+          *
+          * Original gen4 does type conversion to the destination type
+          * before comparison, producing garbage results for floating
+          * point comparisons.
+          *
+          * The destination type doesn't matter on newer generations,
+          * so we set the type to match src0 so we can compact the
+          * instruction.
+          */
+         return set_condmod(condition,
+                            emit(BRW_OPCODE_CMP, retype(dst, src0.type),
+                                 fix_unsigned_negate(src0),
+                                 fix_unsigned_negate(src1)));
+      }
+
+      /**
+       * Gen4 predicated IF.
+       */
+      instruction *
+      IF(brw_predicate predicate) const
+      {
+         return set_predicate(predicate, emit(BRW_OPCODE_IF));
+      }
+
+      /**
+       * Emit a linear interpolation instruction.
+       */
+      instruction *
+      LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
+          const src_reg &a) const
+      {
+         if (shader->devinfo->gen >= 6) {
+            /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
+             * we need to reorder the operands.
+             */
+            return emit(BRW_OPCODE_LRP, dst, a, y, x);
+
+         } else {
+            /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
+            const dst_reg y_times_a = vgrf(dst.type);
+            const dst_reg one_minus_a = vgrf(dst.type);
+            const dst_reg x_times_one_minus_a = vgrf(dst.type);
+
+            MUL(y_times_a, y, a);
+            ADD(one_minus_a, negate(a), src_reg(1.0f));
+            MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
+            return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
+         }
+      }
+
+      /**
+       * Collect a number of registers in a contiguous range of registers.
+       */
+      instruction *
+      LOAD_PAYLOAD(const dst_reg &dst, const src_reg *src,
+                   unsigned sources, unsigned header_size) const
+      {
+         instruction *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources);
+         inst->header_size = header_size;
+         inst->regs_written = header_size +
+                              (sources - header_size) * (dispatch_width() / 8);
+
+         return inst;
+      }
+
+      backend_shader *shader;
+
+   private:
+      /**
+       * Workaround for negation of UD registers.  See comment in
+       * fs_generator::generate_code() for more details.
+       */
+      src_reg
+      fix_unsigned_negate(const src_reg &src) const
+      {
+         if (src.type == BRW_REGISTER_TYPE_UD &&
+             src.negate) {
+            dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
+            MOV(temp, src);
+            return src_reg(temp);
+         } else {
+            return src;
+         }
+      }
+
+      /**
+       * Workaround for source register modes not supported by the ternary
+       * instruction encoding.
+       */
+      src_reg
+      fix_3src_operand(const src_reg &src) const
+      {
+         if (src.file == GRF || src.file == UNIFORM || src.stride > 1) {
+            return src;
+         } else {
+            dst_reg expanded = vgrf(src.type);
+            MOV(expanded, src);
+            return expanded;
+         }
+      }
+
+      /**
+       * Workaround for source register modes not supported by the math
+       * instruction.
+       */
+      src_reg
+      fix_math_operand(const src_reg &src) const
+      {
+         /* Can't do hstride == 0 args on gen6 math, so expand it out. We
+          * might be able to do better by doing execsize = 1 math and then
+          * expanding that result out, but we would need to be careful with
+          * masking.
+          *
+          * Gen6 hardware ignores source modifiers (negate and abs) on math
+          * instructions, so we also move to a temp to set those up.
+          *
+          * Gen7 relaxes most of the above restrictions, but still can't use IMM
+          * operands to math
+          */
+         if ((shader->devinfo->gen == 6 &&
+              (src.file == IMM || src.file == UNIFORM ||
+               src.abs || src.negate)) ||
+             (shader->devinfo->gen == 7 && src.file == IMM)) {
+            const dst_reg tmp = vgrf(src.type);
+            MOV(tmp, src);
+            return tmp;
+         } else {
+            return src;
+         }
+      }
+
+      /**
+       * Workaround other weirdness of the math instruction.
+       */
+      instruction *
+      fix_math_instruction(instruction *inst) const
+      {
+         if (shader->devinfo->gen < 6) {
+            inst->base_mrf = 2;
+            inst->mlen = inst->sources * dispatch_width() / 8;
+
+            if (inst->sources > 1) {
+               /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
+                * "Message Payload":
+                *
+                * "Operand0[7].  For the INT DIV functions, this operand is the
+                *  denominator."
+                *  ...
+                * "Operand1[7].  For the INT DIV functions, this operand is the
+                *  numerator."
+                */
+               const bool is_int_div = inst->opcode != SHADER_OPCODE_POW;
+               const fs_reg src0 = is_int_div ? inst->src[1] : inst->src[0];
+               const fs_reg src1 = is_int_div ? inst->src[0] : inst->src[1];
+
+               inst->resize_sources(1);
+               inst->src[0] = src0;
+
+               at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type),
+                                   src1);
+            }
+         }
+
+         return inst;
+      }
+
+      bblock_t *block;
+      exec_node *cursor;
+
+      unsigned _dispatch_width;
+      unsigned _group;
+      bool force_writemask_all;
+
+      /** Debug annotation info. */
+      struct {
+         const char *str;
+         const void *ir;
+      } annotation;
+   };
+}
+
+#endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -243,6 +243,7 @@
    case ir_unop_find_msb:
    case ir_unop_find_lsb:
    case ir_unop_saturate:
+   case ir_unop_subroutine_to_int:
       for (i = 0; i < vector_elements; i++) {
 	 ir_rvalue *op0 = get_element(op_var[0], i);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -38,6 +38,8 @@
 #include "brw_fs_live_variables.h"
 #include "brw_cfg.h"
 
+using namespace brw;
+
 /* Returns whether an instruction could co-issue if its immediate source were
  * replaced with a GRF source.
  */
@@ -270,15 +272,14 @@
    reg.stride = 0;
    for (int i = 0; i < table.len; i++) {
       struct imm *imm = &table.imm[i];
+      /* Insert it either before the instruction that generated the immediate
+       * or after the last non-control flow instruction of the common ancestor.
+       */
+      exec_node *n = (imm->inst ? imm->inst :
+                      imm->block->last_non_control_flow_inst()->next);
+      const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0);
 
-      fs_inst *mov = MOV(reg, fs_reg(imm->val));
-      mov->force_writemask_all = true;
-      if (imm->inst) {
-         imm->inst->insert_before(imm->block, mov);
-      } else {
-         backend_instruction *inst = imm->block->last_non_control_flow_inst();
-         inst->insert_after(imm->block, mov);
-      }
+      ibld.MOV(reg, fs_reg(imm->val));
       imm->reg = reg.reg;
       imm->subreg_offset = reg.subreg_offset;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -279,6 +279,7 @@
 can_change_source_types(fs_inst *inst)
 {
    return !inst->src[0].abs && !inst->src[0].negate &&
+          inst->dst.type == inst->src[0].type &&
           (inst->opcode == BRW_OPCODE_MOV ||
            (inst->opcode == BRW_OPCODE_SEL &&
             inst->predicate != BRW_PREDICATE_NONE &&
@@ -339,6 +340,14 @@
    if (entry->src.stride * inst->src[arg].stride > 4)
       return false;
 
+   /* Bail if the instruction type is larger than the execution type of the
+    * copy, what implies that each channel is reading multiple channels of the
+    * destination of the copy, and simply replacing the sources would give a
+    * program with different semantics.
+    */
+   if (type_sz(entry->dst.type) < type_sz(inst->src[arg].type))
+      return false;
+
    /* Bail if the result of composing both strides cannot be expressed
     * as another stride. This avoids, for example, trying to transform
     * this:
@@ -388,17 +397,14 @@
 
    switch (entry->src.file) {
    case UNIFORM:
-      assert(entry->src.width == 1);
    case BAD_FILE:
    case HW_REG:
-      inst->src[arg].width = entry->src.width;
       inst->src[arg].reg_offset = entry->src.reg_offset;
       inst->src[arg].subreg_offset = entry->src.subreg_offset;
       break;
    case ATTR:
    case GRF:
       {
-         assert(entry->src.width % inst->src[arg].width == 0);
          /* In this case, we'll just leave the width alone.  The source
           * register could have different widths depending on how it is
           * being used.  For instance, if only half of the register was
@@ -529,6 +535,7 @@
 
       case BRW_OPCODE_MACH:
       case BRW_OPCODE_MUL:
+      case SHADER_OPCODE_MULH:
       case BRW_OPCODE_ADD:
       case BRW_OPCODE_OR:
       case BRW_OPCODE_AND:
@@ -715,7 +722,6 @@
                acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
                entry->dst = inst->dst;
                entry->dst.reg_offset = offset;
-               entry->dst.width = effective_width;
                entry->src = inst->src[i];
                entry->regs_written = regs_written;
                entry->opcode = inst->opcode;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -49,6 +49,8 @@
 #include "glsl/glsl_types.h"
 #include "program/sampler.h"
 
+using namespace brw;
+
 void
 fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
               const fs_reg *src, unsigned sources)
@@ -66,28 +68,6 @@
 
    assert(dst.file != IMM && dst.file != UNIFORM);
 
-   /* If exec_size == 0, try to guess it from the registers.  Since all
-    * manner of things may use hardware registers, we first try to guess
-    * based on GRF registers.  If this fails, we will go ahead and take the
-    * width from the destination register.
-    */
-   if (this->exec_size == 0) {
-      if (dst.file == GRF) {
-         this->exec_size = dst.width;
-      } else {
-         for (unsigned i = 0; i < sources; ++i) {
-            if (src[i].file != GRF && src[i].file != ATTR)
-               continue;
-
-            if (this->exec_size <= 1)
-               this->exec_size = src[i].width;
-            assert(src[i].width == 1 || src[i].width == this->exec_size);
-         }
-      }
-
-      if (this->exec_size == 0 && dst.file != BAD_FILE)
-         this->exec_size = dst.width;
-   }
    assert(this->exec_size != 0);
 
    this->conditional_mod = BRW_CONDITIONAL_NONE;
@@ -98,8 +78,8 @@
    case HW_REG:
    case MRF:
    case ATTR:
-      this->regs_written =
-         DIV_ROUND_UP(MAX2(dst.width * dst.stride, 1) * type_sz(dst.type), 32);
+      this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size),
+                                        REG_SIZE);
       break;
    case BAD_FILE:
       this->regs_written = 0;
@@ -124,9 +104,9 @@
    init(opcode, exec_size, reg_undef, NULL, 0);
 }
 
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst)
+fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst)
 {
-   init(opcode, 0, dst, NULL, 0);
+   init(opcode, exec_size, dst, NULL, 0);
 }
 
 fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
@@ -136,12 +116,6 @@
    init(opcode, exec_size, dst, src, 1);
 }
 
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0)
-{
-   const fs_reg src[1] = { src0 };
-   init(opcode, 0, dst, src, 1);
-}
-
 fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
                  const fs_reg &src0, const fs_reg &src1)
 {
@@ -149,13 +123,6 @@
    init(opcode, exec_size, dst, src, 2);
 }
 
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
-                 const fs_reg &src1)
-{
-   const fs_reg src[2] = { src0, src1 };
-   init(opcode, 0, dst, src, 2);
-}
-
 fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
                  const fs_reg &src0, const fs_reg &src1, const fs_reg &src2)
 {
@@ -163,19 +130,6 @@
    init(opcode, exec_size, dst, src, 3);
 }
 
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
-                 const fs_reg &src1, const fs_reg &src2)
-{
-   const fs_reg src[3] = { src0, src1, src2 };
-   init(opcode, 0, dst, src, 3);
-}
-
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst,
-                 const fs_reg src[], unsigned sources)
-{
-   init(opcode, 0, dst, src, sources);
-}
-
 fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
                  const fs_reg src[], unsigned sources)
 {
@@ -212,152 +166,13 @@
    }
 }
 
-#define ALU1(op)                                                        \
-   fs_inst *                                                            \
-   fs_visitor::op(const fs_reg &dst, const fs_reg &src0)                \
-   {                                                                    \
-      return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0);          \
-   }
-
-#define ALU2(op)                                                        \
-   fs_inst *                                                            \
-   fs_visitor::op(const fs_reg &dst, const fs_reg &src0,                \
-                  const fs_reg &src1)                                   \
-   {                                                                    \
-      return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);    \
-   }
-
-#define ALU2_ACC(op)                                                    \
-   fs_inst *                                                            \
-   fs_visitor::op(const fs_reg &dst, const fs_reg &src0,                \
-                  const fs_reg &src1)                                   \
-   {                                                                    \
-      fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\
-      inst->writes_accumulator = true;                                  \
-      return inst;                                                      \
-   }
-
-#define ALU3(op)                                                        \
-   fs_inst *                                                            \
-   fs_visitor::op(const fs_reg &dst, const fs_reg &src0,                \
-                  const fs_reg &src1, const fs_reg &src2)               \
-   {                                                                    \
-      return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\
-   }
-
-ALU1(NOT)
-ALU1(MOV)
-ALU1(FRC)
-ALU1(RNDD)
-ALU1(RNDE)
-ALU1(RNDZ)
-ALU2(ADD)
-ALU2(MUL)
-ALU2_ACC(MACH)
-ALU2(AND)
-ALU2(OR)
-ALU2(XOR)
-ALU2(SHL)
-ALU2(SHR)
-ALU2(ASR)
-ALU3(LRP)
-ALU1(BFREV)
-ALU3(BFE)
-ALU2(BFI1)
-ALU3(BFI2)
-ALU1(FBH)
-ALU1(FBL)
-ALU1(CBIT)
-ALU3(MAD)
-ALU2_ACC(ADDC)
-ALU2_ACC(SUBB)
-ALU2(SEL)
-ALU2(MAC)
-
-/** Gen4 predicated IF. */
-fs_inst *
-fs_visitor::IF(enum brw_predicate predicate)
-{
-   fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width);
-   inst->predicate = predicate;
-   return inst;
-}
-
-/** Gen6 IF with embedded comparison. */
-fs_inst *
-fs_visitor::IF(const fs_reg &src0, const fs_reg &src1,
-               enum brw_conditional_mod condition)
-{
-   assert(devinfo->gen == 6);
-   fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width,
-                                        reg_null_d, src0, src1);
-   inst->conditional_mod = condition;
-   return inst;
-}
-
-/**
- * CMP: Sets the low bit of the destination channels with the result
- * of the comparison, while the upper bits are undefined, and updates
- * the flag register with the packed 16 bits of the result.
- */
-fs_inst *
-fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1,
-                enum brw_conditional_mod condition)
-{
-   fs_inst *inst;
-
-   /* Take the instruction:
-    *
-    * CMP null<d> src0<f> src1<f>
-    *
-    * Original gen4 does type conversion to the destination type before
-    * comparison, producing garbage results for floating point comparisons.
-    *
-    * The destination type doesn't matter on newer generations, so we set the
-    * type to match src0 so we can compact the instruction.
-    */
-   dst.type = src0.type;
-   if (dst.file == HW_REG)
-      dst.fixed_hw_reg.type = dst.type;
-
-   resolve_ud_negate(&src0);
-   resolve_ud_negate(&src1);
-
-   inst = new(mem_ctx) fs_inst(BRW_OPCODE_CMP, dst, src0, src1);
-   inst->conditional_mod = condition;
-
-   return inst;
-}
-
-fs_inst *
-fs_visitor::LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources,
-                         int header_size)
-{
-   assert(dst.width % 8 == 0);
-   fs_inst *inst = new(mem_ctx) fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, dst.width,
-                                        dst, src, sources);
-   inst->header_size = header_size;
-
-   for (int i = 0; i < header_size; i++)
-      assert(src[i].file != GRF || src[i].width * type_sz(src[i].type) == 32);
-   inst->regs_written = header_size;
-
-   for (int i = header_size; i < sources; ++i)
-      assert(src[i].file != GRF || src[i].width == dst.width);
-   inst->regs_written += (sources - header_size) * (dst.width / 8);
-
-   return inst;
-}
-
-exec_list
-fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst,
+void
+fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
+                                       const fs_reg &dst,
                                        const fs_reg &surf_index,
                                        const fs_reg &varying_offset,
                                        uint32_t const_offset)
 {
-   exec_list instructions;
-   fs_inst *inst;
-
    /* We have our constant surface use a pitch of 4 bytes, so our index can
     * be any component of a vector, and then we load 4 contiguous
     * components starting from that.
@@ -370,11 +185,10 @@
     * the redundant ones.
     */
    fs_reg vec4_offset = vgrf(glsl_type::int_type);
-   instructions.push_tail(ADD(vec4_offset,
-                              varying_offset, fs_reg(const_offset & ~3)));
+   bld.ADD(vec4_offset, varying_offset, fs_reg(const_offset & ~3));
 
    int scale = 1;
-   if (devinfo->gen == 4 && dst.width == 8) {
+   if (devinfo->gen == 4 && bld.dispatch_width() == 8) {
       /* Pre-gen5, we can either use a SIMD8 message that requires (header,
        * u, v, r) as parameters, or we can just use the SIMD16 message
        * consisting of (header, u).  We choose the second, at the cost of a
@@ -389,13 +203,10 @@
    else
       op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
 
-   assert(dst.width % 8 == 0);
-   int regs_written = 4 * (dst.width / 8) * scale;
-   fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written),
-                               dst.type, dst.width);
-   inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset);
+   int regs_written = 4 * (bld.dispatch_width() / 8) * scale;
+   fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), dst.type);
+   fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset);
    inst->regs_written = regs_written;
-   instructions.push_tail(inst);
 
    if (devinfo->gen < 7) {
       inst->base_mrf = 13;
@@ -403,33 +214,26 @@
       if (devinfo->gen == 4)
          inst->mlen = 3;
       else
-         inst->mlen = 1 + dispatch_width / 8;
+         inst->mlen = 1 + bld.dispatch_width() / 8;
    }
 
-   fs_reg result = offset(vec4_result, (const_offset & 3) * scale);
-   instructions.push_tail(MOV(dst, result));
-
-   return instructions;
+   bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale));
 }
 
 /**
  * A helper for MOV generation for fixing up broken hardware SEND dependency
  * handling.
  */
-fs_inst *
-fs_visitor::DEP_RESOLVE_MOV(int grf)
+void
+fs_visitor::DEP_RESOLVE_MOV(const fs_builder &bld, int grf)
 {
-   fs_inst *inst = MOV(brw_null_reg(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F));
-
-   inst->ir = NULL;
-   inst->annotation = "send dependency resolve";
-
    /* The caller always wants uncompressed to emit the minimal extra
     * dependencies, and to avoid having to deal with aligning its regs to 2.
     */
-   inst->exec_size = 8;
+   const fs_builder ubld = bld.annotate("send dependency resolve")
+                              .half(0);
 
-   return inst;
+   ubld.MOV(ubld.null_reg_f(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F));
 }
 
 bool
@@ -504,10 +308,14 @@
 
    for (int i = 0; i < this->sources; i++) {
       reg.type = this->src[i].type;
-      reg.width = this->src[i].width;
       if (!this->src[i].equals(reg))
          return false;
-      reg = ::offset(reg, 1);
+
+      if (i < this->header_size) {
+         reg.reg_offset += 1;
+      } else {
+         reg.reg_offset += this->exec_size / 8;
+      }
    }
 
    return true;
@@ -554,8 +362,8 @@
    init();
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_F;
+   this->stride = 0;
    this->fixed_hw_reg.dw1.f = f;
-   this->width = 1;
 }
 
 /** Immediate value constructor. */
@@ -564,8 +372,8 @@
    init();
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_D;
+   this->stride = 0;
    this->fixed_hw_reg.dw1.d = i;
-   this->width = 1;
 }
 
 /** Immediate value constructor. */
@@ -574,8 +382,8 @@
    init();
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_UD;
+   this->stride = 0;
    this->fixed_hw_reg.dw1.ud = u;
-   this->width = 1;
 }
 
 /** Vector float immediate value constructor. */
@@ -606,7 +414,6 @@
    this->file = HW_REG;
    this->fixed_hw_reg = fixed_hw_reg;
    this->type = fixed_hw_reg.type;
-   this->width = 1 << fixed_hw_reg.width;
 }
 
 bool
@@ -621,7 +428,6 @@
            abs == r.abs &&
            !reladdr && !r.reladdr &&
            memcmp(&fixed_hw_reg, &r.fixed_hw_reg, sizeof(fixed_hw_reg)) == 0 &&
-           width == r.width &&
            stride == r.stride);
 }
 
@@ -640,6 +446,15 @@
    return stride == 1;
 }
 
+unsigned
+fs_reg::component_size(unsigned width) const
+{
+   const unsigned stride = (file != HW_REG ? this->stride :
+                            fixed_hw_reg.hstride == 0 ? 0 :
+                            1 << (fixed_hw_reg.hstride - 1));
+   return MAX2(width * stride, 1) * type_sz(type);
+}
+
 int
 fs_visitor::type_size(const struct glsl_type *type)
 {
@@ -666,7 +481,10 @@
       return 0;
    case GLSL_TYPE_ATOMIC_UINT:
       return 0;
+   case GLSL_TYPE_SUBROUTINE:
+      return 1;
    case GLSL_TYPE_IMAGE:
+      return BRW_IMAGE_PARAM_SIZE;
    case GLSL_TYPE_VOID:
    case GLSL_TYPE_ERROR:
    case GLSL_TYPE_INTERFACE:
@@ -684,7 +502,7 @@
  * the destination of the MOV, with extra parameters set.
  */
 fs_reg
-fs_visitor::get_timestamp(fs_inst **out_mov)
+fs_visitor::get_timestamp(const fs_builder &bld)
 {
    assert(devinfo->gen >= 7);
 
@@ -693,13 +511,12 @@
                                           0),
                              BRW_REGISTER_TYPE_UD));
 
-   fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4);
+   fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
 
-   fs_inst *mov = MOV(dst, ts);
    /* We want to read the 3 fields we care about even if it's not enabled in
     * the dispatch.
     */
-   mov->force_writemask_all = true;
+   bld.group(4, 0).exec_all().MOV(dst, ts);
 
    /* The caller wants the low 32 bits of the timestamp.  Since it's running
     * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
@@ -713,105 +530,62 @@
     */
    dst.set_smear(0);
 
-   *out_mov = mov;
    return dst;
 }
 
 void
 fs_visitor::emit_shader_time_begin()
 {
-   current_annotation = "shader time start";
-   fs_inst *mov;
-   shader_start_time = get_timestamp(&mov);
-   emit(mov);
+   shader_start_time = get_timestamp(bld.annotate("shader time start"));
 }
 
 void
 fs_visitor::emit_shader_time_end()
 {
-   current_annotation = "shader time end";
-
-   enum shader_time_shader_type type, written_type, reset_type;
-   switch (stage) {
-   case MESA_SHADER_VERTEX:
-      type = ST_VS;
-      written_type = ST_VS_WRITTEN;
-      reset_type = ST_VS_RESET;
-      break;
-   case MESA_SHADER_GEOMETRY:
-      type = ST_GS;
-      written_type = ST_GS_WRITTEN;
-      reset_type = ST_GS_RESET;
-      break;
-   case MESA_SHADER_FRAGMENT:
-      if (dispatch_width == 8) {
-         type = ST_FS8;
-         written_type = ST_FS8_WRITTEN;
-         reset_type = ST_FS8_RESET;
-      } else {
-         assert(dispatch_width == 16);
-         type = ST_FS16;
-         written_type = ST_FS16_WRITTEN;
-         reset_type = ST_FS16_RESET;
-      }
-      break;
-   case MESA_SHADER_COMPUTE:
-      type = ST_CS;
-      written_type = ST_CS_WRITTEN;
-      reset_type = ST_CS_RESET;
-      break;
-   default:
-      unreachable("fs_visitor::emit_shader_time_end missing code");
-   }
-
    /* Insert our code just before the final SEND with EOT. */
    exec_node *end = this->instructions.get_tail();
    assert(end && ((fs_inst *) end)->eot);
+   const fs_builder ibld = bld.annotate("shader time end")
+                              .exec_all().at(NULL, end);
 
-   fs_inst *tm_read;
-   fs_reg shader_end_time = get_timestamp(&tm_read);
-   end->insert_before(tm_read);
+   fs_reg shader_end_time = get_timestamp(ibld);
 
    /* Check that there weren't any timestamp reset events (assuming these
     * were the only two timestamp reads that happened).
     */
    fs_reg reset = shader_end_time;
    reset.set_smear(2);
-   fs_inst *test = AND(reg_null_d, reset, fs_reg(1u));
-   test->conditional_mod = BRW_CONDITIONAL_Z;
-   test->force_writemask_all = true;
-   end->insert_before(test);
-   end->insert_before(IF(BRW_PREDICATE_NORMAL));
+   set_condmod(BRW_CONDITIONAL_Z,
+               ibld.AND(ibld.null_reg_ud(), reset, fs_reg(1u)));
+   ibld.IF(BRW_PREDICATE_NORMAL);
 
    fs_reg start = shader_start_time;
    start.negate = true;
-   fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1);
+   fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
    diff.set_smear(0);
-   fs_inst *add = ADD(diff, start, shader_end_time);
-   add->force_writemask_all = true;
-   end->insert_before(add);
+
+   const fs_builder cbld = ibld.group(1, 0);
+   cbld.group(1, 0).ADD(diff, start, shader_end_time);
 
    /* If there were no instructions between the two timestamp gets, the diff
     * is 2 cycles.  Remove that overhead, so I can forget about that when
     * trying to determine the time taken for single instructions.
     */
-   add = ADD(diff, diff, fs_reg(-2u));
-   add->force_writemask_all = true;
-   end->insert_before(add);
-
-   end->insert_before(SHADER_TIME_ADD(type, diff));
-   end->insert_before(SHADER_TIME_ADD(written_type, fs_reg(1u)));
-   end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ELSE, dispatch_width));
-   end->insert_before(SHADER_TIME_ADD(reset_type, fs_reg(1u)));
-   end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ENDIF, dispatch_width));
+   cbld.ADD(diff, diff, fs_reg(-2u));
+   SHADER_TIME_ADD(cbld, 0, diff);
+   SHADER_TIME_ADD(cbld, 1, fs_reg(1u));
+   ibld.emit(BRW_OPCODE_ELSE);
+   SHADER_TIME_ADD(cbld, 2, fs_reg(1u));
+   ibld.emit(BRW_OPCODE_ENDIF);
 }
 
-fs_inst *
-fs_visitor::SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value)
+void
+fs_visitor::SHADER_TIME_ADD(const fs_builder &bld,
+                            int shader_time_subindex,
+                            fs_reg value)
 {
-   int shader_time_index =
-      brw_get_shader_time_index(brw, shader_prog, prog, type);
-   fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE);
+   int index = shader_time_index * 3 + shader_time_subindex;
+   fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE);
 
    fs_reg payload;
    if (dispatch_width == 8)
@@ -819,8 +593,7 @@
    else
       payload = vgrf(glsl_type::uint_type);
 
-   return new(mem_ctx) fs_inst(SHADER_OPCODE_SHADER_TIME_ADD,
-                               fs_reg(), payload, offset, value);
+   bld.emit(SHADER_OPCODE_SHADER_TIME_ADD, fs_reg(), payload, offset, value);
 }
 
 void
@@ -863,65 +636,16 @@
  * During a SIMD16 compile (if one happens anyway), this just calls fail().
  */
 void
-fs_visitor::no16(const char *format, ...)
+fs_visitor::no16(const char *msg)
 {
-   va_list va;
-
-   va_start(va, format);
-
    if (dispatch_width == 16) {
-      vfail(format, va);
+      fail("%s", msg);
    } else {
       simd16_unsupported = true;
 
-      if (brw->perf_debug) {
-         if (no16_msg)
-            ralloc_vasprintf_append(&no16_msg, format, va);
-         else
-            no16_msg = ralloc_vasprintf(mem_ctx, format, va);
-      }
+      compiler->shader_perf_log(log_data,
+                                "SIMD16 shader failed to compile: %s", msg);
    }
-
-   va_end(va);
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode)
-{
-   return emit(new(mem_ctx) fs_inst(opcode, dispatch_width));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst)
-{
-   return emit(new(mem_ctx) fs_inst(opcode, dst));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0)
-{
-   return emit(new(mem_ctx) fs_inst(opcode, dst, src0));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
-                 const fs_reg &src1)
-{
-   return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
-                 const fs_reg &src1, const fs_reg &src2)
-{
-   return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1, src2));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst,
-                 fs_reg src[], int sources)
-{
-   return emit(new(mem_ctx) fs_inst(opcode, dst, src, sources));
 }
 
 /**
@@ -936,51 +660,160 @@
 fs_inst::is_partial_write() const
 {
    return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
-           (this->dst.width * type_sz(this->dst.type)) < 32 ||
+           (this->exec_size * type_sz(this->dst.type)) < 32 ||
            !this->dst.is_contiguous());
 }
 
+unsigned
+fs_inst::components_read(unsigned i) const
+{
+   switch (opcode) {
+   case FS_OPCODE_LINTERP:
+      if (i == 0)
+         return 2;
+      else
+         return 1;
+
+   case FS_OPCODE_PIXEL_X:
+   case FS_OPCODE_PIXEL_Y:
+      assert(i == 0);
+      return 2;
+
+   case FS_OPCODE_FB_WRITE_LOGICAL:
+      assert(src[6].file == IMM);
+      /* First/second FB write color. */
+      if (i < 2)
+         return src[6].fixed_hw_reg.dw1.ud;
+      else
+         return 1;
+
+   case SHADER_OPCODE_TEX_LOGICAL:
+   case SHADER_OPCODE_TXD_LOGICAL:
+   case SHADER_OPCODE_TXF_LOGICAL:
+   case SHADER_OPCODE_TXL_LOGICAL:
+   case SHADER_OPCODE_TXS_LOGICAL:
+   case FS_OPCODE_TXB_LOGICAL:
+   case SHADER_OPCODE_TXF_CMS_LOGICAL:
+   case SHADER_OPCODE_TXF_UMS_LOGICAL:
+   case SHADER_OPCODE_TXF_MCS_LOGICAL:
+   case SHADER_OPCODE_LOD_LOGICAL:
+   case SHADER_OPCODE_TG4_LOGICAL:
+   case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
+      assert(src[8].file == IMM && src[9].file == IMM);
+      /* Texture coordinates. */
+      if (i == 0)
+         return src[8].fixed_hw_reg.dw1.ud;
+      /* Texture derivatives. */
+      else if ((i == 2 || i == 3) && opcode == SHADER_OPCODE_TXD_LOGICAL)
+         return src[9].fixed_hw_reg.dw1.ud;
+      /* Texture offset. */
+      else if (i == 7)
+         return 2;
+      else
+         return 1;
+
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+   case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+      assert(src[3].file == IMM);
+      /* Surface coordinates. */
+      if (i == 0)
+         return src[3].fixed_hw_reg.dw1.ud;
+      /* Surface operation source (ignored for reads). */
+      else if (i == 1)
+         return 0;
+      else
+         return 1;
+
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+      assert(src[3].file == IMM &&
+             src[4].file == IMM);
+      /* Surface coordinates. */
+      if (i == 0)
+         return src[3].fixed_hw_reg.dw1.ud;
+      /* Surface operation source. */
+      else if (i == 1)
+         return src[4].fixed_hw_reg.dw1.ud;
+      else
+         return 1;
+
+   case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+   case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: {
+      assert(src[3].file == IMM &&
+             src[4].file == IMM);
+      const unsigned op = src[4].fixed_hw_reg.dw1.ud;
+      /* Surface coordinates. */
+      if (i == 0)
+         return src[3].fixed_hw_reg.dw1.ud;
+      /* Surface operation source. */
+      else if (i == 1 && op == BRW_AOP_CMPWR)
+         return 2;
+      else if (i == 1 && (op == BRW_AOP_INC || op == BRW_AOP_DEC ||
+                          op == BRW_AOP_PREDEC))
+         return 0;
+      else
+         return 1;
+   }
+
+   default:
+      return 1;
+   }
+}
+
 int
 fs_inst::regs_read(int arg) const
 {
-   if (is_tex() && arg == 0 && src[0].file == GRF) {
-      return mlen;
-   } else if (opcode == FS_OPCODE_FB_WRITE && arg == 0) {
-      return mlen;
-   } else if (opcode == SHADER_OPCODE_URB_WRITE_SIMD8 && arg == 0) {
-      return mlen;
-   } else if (opcode == SHADER_OPCODE_UNTYPED_ATOMIC && arg == 0) {
-      return mlen;
-   } else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_READ && arg == 0) {
-      return mlen;
-   } else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_WRITE && arg == 0) {
-      return mlen;
-   } else if (opcode == SHADER_OPCODE_TYPED_ATOMIC && arg == 0) {
-      return mlen;
-   } else if (opcode == SHADER_OPCODE_TYPED_SURFACE_READ && arg == 0) {
-      return mlen;
-   } else if (opcode == SHADER_OPCODE_TYPED_SURFACE_WRITE && arg == 0) {
-      return mlen;
-   } else if (opcode == FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET && arg == 0) {
-      return mlen;
-   } else if (opcode == FS_OPCODE_LINTERP && arg == 0) {
-      return exec_size / 4;
+   switch (opcode) {
+   case FS_OPCODE_FB_WRITE:
+   case SHADER_OPCODE_URB_WRITE_SIMD8:
+   case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+   case SHADER_OPCODE_TYPED_ATOMIC:
+   case SHADER_OPCODE_TYPED_SURFACE_READ:
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+   case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
+      if (arg == 0)
+         return mlen;
+      break;
+
+   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+      /* The payload is actually stored in src1 */
+      if (arg == 1)
+         return mlen;
+      break;
+
+   case FS_OPCODE_LINTERP:
+      if (arg == 1)
+         return 1;
+      break;
+
+   case SHADER_OPCODE_LOAD_PAYLOAD:
+      if (arg < this->header_size)
+         return 1;
+      break;
+
+   case CS_OPCODE_CS_TERMINATE:
+      return 1;
+
+   default:
+      if (is_tex() && arg == 0 && src[0].file == GRF)
+         return mlen;
+      break;
    }
 
    switch (src[arg].file) {
    case BAD_FILE:
+      return 0;
    case UNIFORM:
    case IMM:
       return 1;
    case GRF:
    case ATTR:
    case HW_REG:
-      if (src[arg].stride == 0) {
-         return 1;
-      } else {
-         int size = src[arg].width * src[arg].stride * type_sz(src[arg].type);
-         return (size + 31) / 32;
-      }
+      return DIV_ROUND_UP(components_read(arg) *
+                          src[arg].component_size(exec_size),
+                          REG_SIZE);
    case MRF:
       unreachable("MRF registers are not allowed as sources");
    default:
@@ -1074,15 +907,7 @@
 {
    int reg_width = dispatch_width / 8;
    return fs_reg(GRF, alloc.allocate(type_size(type) * reg_width),
-                 brw_type_for_base_type(type), dispatch_width);
-}
-
-fs_reg
-fs_visitor::vgrf(int num_components)
-{
-   int reg_width = dispatch_width / 8;
-   return fs_reg(GRF, alloc.allocate(num_components * reg_width),
-                 BRW_REGISTER_TYPE_F, dispatch_width);
+                 brw_type_for_base_type(type));
 }
 
 /** Fixed HW reg constructor. */
@@ -1092,14 +917,7 @@
    this->file = file;
    this->reg = reg;
    this->type = BRW_REGISTER_TYPE_F;
-
-   switch (file) {
-   case UNIFORM:
-      this->width = 1;
-      break;
-   default:
-      this->width = 8;
-   }
+   this->stride = (file == UNIFORM ? 0 : 1);
 }
 
 /** Fixed HW reg constructor. */
@@ -1109,45 +927,7 @@
    this->file = file;
    this->reg = reg;
    this->type = type;
-
-   switch (file) {
-   case UNIFORM:
-      this->width = 1;
-      break;
-   default:
-      this->width = 8;
-   }
-}
-
-/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type,
-               uint8_t width)
-{
-   init();
-   this->file = file;
-   this->reg = reg;
-   this->type = type;
-   this->width = width;
-}
-
-fs_reg *
-fs_visitor::variable_storage(ir_variable *var)
-{
-   return (fs_reg *)hash_table_find(this->variable_ht, var);
-}
-
-void
-import_uniforms_callback(const void *key,
-			 void *data,
-			 void *closure)
-{
-   struct hash_table *dst_ht = (struct hash_table *)closure;
-   const fs_reg *reg = (const fs_reg *)data;
-
-   if (reg->file != UNIFORM)
-      return;
-
-   hash_table_insert(dst_ht, data, key);
+   this->stride = (file == UNIFORM ? 0 : 1);
 }
 
 /* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.
@@ -1156,89 +936,22 @@
 void
 fs_visitor::import_uniforms(fs_visitor *v)
 {
-   hash_table_call_foreach(v->variable_ht,
-			   import_uniforms_callback,
-			   variable_ht);
    this->push_constant_loc = v->push_constant_loc;
    this->pull_constant_loc = v->pull_constant_loc;
    this->uniforms = v->uniforms;
    this->param_size = v->param_size;
 }
 
-/* Our support for uniforms is piggy-backed on the struct
- * gl_fragment_program, because that's where the values actually
- * get stored, rather than in some global gl_shader_program uniform
- * store.
- */
-void
-fs_visitor::setup_uniform_values(ir_variable *ir)
-{
-   int namelen = strlen(ir->name);
-
-   /* The data for our (non-builtin) uniforms is stored in a series of
-    * gl_uniform_driver_storage structs for each subcomponent that
-    * glGetUniformLocation() could name.  We know it's been set up in the same
-    * order we'd walk the type, so walk the list of storage and find anything
-    * with our name, or the prefix of a component that starts with our name.
-    */
-   unsigned params_before = uniforms;
-   for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
-      struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
-
-      if (strncmp(ir->name, storage->name, namelen) != 0 ||
-          (storage->name[namelen] != 0 &&
-           storage->name[namelen] != '.' &&
-           storage->name[namelen] != '[')) {
-         continue;
-      }
-
-      unsigned slots = storage->type->component_slots();
-      if (storage->array_elements)
-         slots *= storage->array_elements;
-
-      for (unsigned i = 0; i < slots; i++) {
-         stage_prog_data->param[uniforms++] = &storage->storage[i];
-      }
-   }
-
-   /* Make sure we actually initialized the right amount of stuff here. */
-   assert(params_before + ir->type->component_slots() == uniforms);
-   (void)params_before;
-}
-
-
-/* Our support for builtin uniforms is even scarier than non-builtin.
- * It sits on top of the PROG_STATE_VAR parameters that are
- * automatically updated from GL context state.
- */
 void
-fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
+fs_visitor::setup_vector_uniform_values(const gl_constant_value *values, unsigned n)
 {
-   const ir_state_slot *const slots = ir->get_state_slots();
-   assert(slots != NULL);
+   static const gl_constant_value zero = { 0 };
 
-   for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
-      /* This state reference has already been setup by ir_to_mesa, but we'll
-       * get the same index back here.
-       */
-      int index = _mesa_add_state_reference(this->prog->Parameters,
-					    (gl_state_index *)slots[i].tokens);
-
-      /* Add each of the unique swizzles of the element as a parameter.
-       * This'll end up matching the expected layout of the
-       * array/matrix/structure we're trying to fill in.
-       */
-      int last_swiz = -1;
-      for (unsigned int j = 0; j < 4; j++) {
-	 int swiz = GET_SWZ(slots[i].swizzle, j);
-	 if (swiz == last_swiz)
-	    break;
-	 last_swiz = swiz;
+   for (unsigned i = 0; i < n; ++i)
+      stage_prog_data->param[uniforms++] = &values[i];
 
-         stage_prog_data->param[uniforms++] =
-            &prog->Parameters->ParameterValues[index][swiz];
-      }
-   }
+   for (unsigned i = n; i < 4; ++i)
+      stage_prog_data->param[uniforms++] = &zero;
 }
 
 fs_reg *
@@ -1253,40 +966,40 @@
 
    /* gl_FragCoord.x */
    if (pixel_center_integer) {
-      emit(MOV(wpos, this->pixel_x));
+      bld.MOV(wpos, this->pixel_x);
    } else {
-      emit(ADD(wpos, this->pixel_x, fs_reg(0.5f)));
+      bld.ADD(wpos, this->pixel_x, fs_reg(0.5f));
    }
-   wpos = offset(wpos, 1);
+   wpos = offset(wpos, bld, 1);
 
    /* gl_FragCoord.y */
    if (!flip && pixel_center_integer) {
-      emit(MOV(wpos, this->pixel_y));
+      bld.MOV(wpos, this->pixel_y);
    } else {
       fs_reg pixel_y = this->pixel_y;
-      float offset = (pixel_center_integer ? 0.0 : 0.5);
+      float offset = (pixel_center_integer ? 0.0f : 0.5f);
 
       if (flip) {
 	 pixel_y.negate = true;
-	 offset += key->drawable_height - 1.0;
+	 offset += key->drawable_height - 1.0f;
       }
 
-      emit(ADD(wpos, pixel_y, fs_reg(offset)));
+      bld.ADD(wpos, pixel_y, fs_reg(offset));
    }
-   wpos = offset(wpos, 1);
+   wpos = offset(wpos, bld, 1);
 
    /* gl_FragCoord.z */
    if (devinfo->gen >= 6) {
-      emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
+      bld.MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)));
    } else {
-      emit(FS_OPCODE_LINTERP, wpos,
+      bld.emit(FS_OPCODE_LINTERP, wpos,
            this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
            interp_reg(VARYING_SLOT_POS, 2));
    }
-   wpos = offset(wpos, 1);
+   wpos = offset(wpos, bld, 1);
 
    /* gl_FragCoord.w: Already set up in emit_interpolation */
-   emit(BRW_OPCODE_MOV, wpos, this->wpos_w);
+   bld.MOV(wpos, this->wpos_w);
 
    return reg;
 }
@@ -1321,8 +1034,8 @@
        */
       barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
    }
-   return emit(FS_OPCODE_LINTERP, attr,
-               this->delta_xy[barycoord_mode], interp);
+   return bld.emit(FS_OPCODE_LINTERP, attr,
+                   this->delta_xy[barycoord_mode], interp);
 }
 
 void
@@ -1366,7 +1079,7 @@
 	    /* If there's no incoming setup data for this slot, don't
 	     * emit interpolation for it.
 	     */
-	    attr = offset(attr, type->vector_elements);
+	    attr = offset(attr, bld, type->vector_elements);
 	    location++;
 	    continue;
 	 }
@@ -1380,8 +1093,8 @@
 	       struct brw_reg interp = interp_reg(location, k);
 	       interp = suboffset(interp, 3);
                interp.type = attr.type;
-	       emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
-	       attr = offset(attr, 1);
+               bld.emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
+	       attr = offset(attr, bld, 1);
 	    }
 	 } else {
 	    /* Smooth/noperspective interpolation case. */
@@ -1393,7 +1106,7 @@
                    * unlit, replace the centroid data with non-centroid
                    * data.
                    */
-                  emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+                  bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
 
                   fs_inst *inst;
                   inst = emit_linterp(attr, fs_reg(interp), interpolation_mode,
@@ -1417,9 +1130,9 @@
                                mod_sample || key->persample_shading);
                }
                if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
-                  emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
+                  bld.MUL(attr, attr, this->pixel_w);
                }
-	       attr = offset(attr, 1);
+	       attr = offset(attr, bld, 1);
 	    }
 
 	 }
@@ -1448,7 +1161,7 @@
       fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W));
       g0.negate = true;
 
-      emit(ASR(*reg, g0, fs_reg(15)));
+      bld.ASR(*reg, g0, fs_reg(15));
    } else {
       /* Bit 31 of g1.6 is 0 if the polygon is front facing. We want to create
        * a boolean result from this (1/true or 0/false).
@@ -1463,7 +1176,7 @@
       fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));
       g1_6.negate = true;
 
-      emit(ASR(*reg, g1_6, fs_reg(31)));
+      bld.ASR(*reg, g1_6, fs_reg(31));
    }
 
    return reg;
@@ -1478,9 +1191,9 @@
 
    if (key->compute_pos_offset) {
       /* Convert int_sample_pos to floating point */
-      emit(MOV(dst, int_sample_pos));
+      bld.MOV(dst, int_sample_pos);
       /* Scale to the range [0, 1] */
-      emit(MUL(dst, dst, fs_reg(1 / 16.0f)));
+      bld.MUL(dst, dst, fs_reg(1 / 16.0f));
    }
    else {
       /* From ARB_sample_shading specification:
@@ -1488,7 +1201,7 @@
        *  rasterization is disabled, gl_SamplePosition will always be
        *  (0.5, 0.5).
        */
-      emit(MOV(dst, fs_reg(0.5f)));
+      bld.MOV(dst, fs_reg(0.5f));
    }
 }
 
@@ -1497,7 +1210,7 @@
 {
    assert(devinfo->gen >= 6);
 
-   this->current_annotation = "compute sample position";
+   const fs_builder abld = bld.annotate("compute sample position");
    fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::vec2_type));
    fs_reg pos = *reg;
    fs_reg int_sample_x = vgrf(glsl_type::int_type);
@@ -1519,22 +1232,22 @@
                     BRW_REGISTER_TYPE_B), 16, 8, 2);
 
    if (dispatch_width == 8) {
-      emit(MOV(int_sample_x, fs_reg(sample_pos_reg)));
+      abld.MOV(int_sample_x, fs_reg(sample_pos_reg));
    } else {
-      emit(MOV(half(int_sample_x, 0), fs_reg(sample_pos_reg)));
-      emit(MOV(half(int_sample_x, 1), fs_reg(suboffset(sample_pos_reg, 16))))
-         ->force_sechalf = true;
+      abld.half(0).MOV(half(int_sample_x, 0), fs_reg(sample_pos_reg));
+      abld.half(1).MOV(half(int_sample_x, 1),
+                       fs_reg(suboffset(sample_pos_reg, 16)));
    }
    /* Compute gl_SamplePosition.x */
    compute_sample_position(pos, int_sample_x);
-   pos = offset(pos, 1);
+   pos = offset(pos, abld, 1);
    if (dispatch_width == 8) {
-      emit(MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1))));
+      abld.MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1)));
    } else {
-      emit(MOV(half(int_sample_y, 0),
-               fs_reg(suboffset(sample_pos_reg, 1))));
-      emit(MOV(half(int_sample_y, 1), fs_reg(suboffset(sample_pos_reg, 17))))
-         ->force_sechalf = true;
+      abld.half(0).MOV(half(int_sample_y, 0),
+                       fs_reg(suboffset(sample_pos_reg, 1)));
+      abld.half(1).MOV(half(int_sample_y, 1),
+                       fs_reg(suboffset(sample_pos_reg, 17)));
    }
    /* Compute gl_SamplePosition.y */
    compute_sample_position(pos, int_sample_y);
@@ -1548,7 +1261,7 @@
    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
    assert(devinfo->gen >= 6);
 
-   this->current_annotation = "compute sample id";
+   const fs_builder abld = bld.annotate("compute sample id");
    fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type));
 
    if (key->compute_sample_id) {
@@ -1575,161 +1288,61 @@
        * are sample 1 of subspan 0; the third group is sample 0 of
        * subspan 1, and finally sample 1 of subspan 1.
        */
-      fs_inst *inst;
-      inst = emit(BRW_OPCODE_AND, t1,
-                  fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)),
-                  fs_reg(0xc0));
-      inst->force_writemask_all = true;
-      inst = emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5));
-      inst->force_writemask_all = true;
+      abld.exec_all()
+          .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+               fs_reg(0xc0));
+      abld.exec_all().SHR(t1, t1, fs_reg(5));
+
       /* This works for both SIMD8 and SIMD16 */
-      inst = emit(MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210)));
-      inst->force_writemask_all = true;
+      abld.exec_all()
+          .MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210));
+
       /* This special instruction takes care of setting vstride=1,
        * width=4, hstride=0 of t2 during an ADD instruction.
        */
-      emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2);
+      abld.emit(FS_OPCODE_SET_SAMPLE_ID, *reg, t1, t2);
    } else {
       /* As per GL_ARB_sample_shading specification:
        * "When rendering to a non-multisample buffer, or if multisample
        *  rasterization is disabled, gl_SampleID will always be zero."
        */
-      emit(BRW_OPCODE_MOV, *reg, fs_reg(0));
+      abld.MOV(*reg, fs_reg(0));
    }
 
    return reg;
 }
 
-void
-fs_visitor::resolve_source_modifiers(fs_reg *src)
+fs_reg
+fs_visitor::resolve_source_modifiers(const fs_reg &src)
 {
-   if (!src->abs && !src->negate)
-      return;
+   if (!src.abs && !src.negate)
+      return src;
 
-   fs_reg temp = retype(vgrf(1), src->type);
-   emit(MOV(temp, *src));
-   *src = temp;
+   fs_reg temp = bld.vgrf(src.type);
+   bld.MOV(temp, src);
+
+   return temp;
 }
 
-fs_reg
-fs_visitor::fix_math_operand(fs_reg src)
+void
+fs_visitor::emit_discard_jump()
 {
-   /* Can't do hstride == 0 args on gen6 math, so expand it out. We
-    * might be able to do better by doing execsize = 1 math and then
-    * expanding that result out, but we would need to be careful with
-    * masking.
-    *
-    * The hardware ignores source modifiers (negate and abs) on math
-    * instructions, so we also move to a temp to set those up.
-    */
-   if (devinfo->gen == 6 && src.file != UNIFORM && src.file != IMM &&
-       !src.abs && !src.negate)
-      return src;
+   assert(((brw_wm_prog_data*) this->prog_data)->uses_kill);
 
-   /* Gen7 relaxes most of the above restrictions, but still can't use IMM
-    * operands to math
+   /* For performance, after a discard, jump to the end of the
+    * shader if all relevant channels have been discarded.
     */
-   if (devinfo->gen >= 7 && src.file != IMM)
-      return src;
+   fs_inst *discard_jump = bld.emit(FS_OPCODE_DISCARD_JUMP);
+   discard_jump->flag_subreg = 1;
 
-   fs_reg expanded = vgrf(glsl_type::float_type);
-   expanded.type = src.type;
-   emit(BRW_OPCODE_MOV, expanded, src);
-   return expanded;
+   discard_jump->predicate = (dispatch_width == 8)
+                             ? BRW_PREDICATE_ALIGN1_ANY8H
+                             : BRW_PREDICATE_ALIGN1_ANY16H;
+   discard_jump->predicate_inverse = true;
 }
 
-fs_inst *
-fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
-{
-   switch (opcode) {
-   case SHADER_OPCODE_RCP:
-   case SHADER_OPCODE_RSQ:
-   case SHADER_OPCODE_SQRT:
-   case SHADER_OPCODE_EXP2:
-   case SHADER_OPCODE_LOG2:
-   case SHADER_OPCODE_SIN:
-   case SHADER_OPCODE_COS:
-      break;
-   default:
-      unreachable("not reached: bad math opcode");
-   }
-
-   /* Can't do hstride == 0 args to gen6 math, so expand it out.  We
-    * might be able to do better by doing execsize = 1 math and then
-    * expanding that result out, but we would need to be careful with
-    * masking.
-    *
-    * Gen 6 hardware ignores source modifiers (negate and abs) on math
-    * instructions, so we also move to a temp to set those up.
-    */
-   if (devinfo->gen == 6 || devinfo->gen == 7)
-      src = fix_math_operand(src);
-
-   fs_inst *inst = emit(opcode, dst, src);
-
-   if (devinfo->gen < 6) {
-      inst->base_mrf = 2;
-      inst->mlen = dispatch_width / 8;
-   }
-
-   return inst;
-}
-
-fs_inst *
-fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
-{
-   int base_mrf = 2;
-   fs_inst *inst;
-
-   if (devinfo->gen >= 8) {
-      inst = emit(opcode, dst, src0, src1);
-   } else if (devinfo->gen >= 6) {
-      src0 = fix_math_operand(src0);
-      src1 = fix_math_operand(src1);
-
-      inst = emit(opcode, dst, src0, src1);
-   } else {
-      /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
-       * "Message Payload":
-       *
-       * "Operand0[7].  For the INT DIV functions, this operand is the
-       *  denominator."
-       *  ...
-       * "Operand1[7].  For the INT DIV functions, this operand is the
-       *  numerator."
-       */
-      bool is_int_div = opcode != SHADER_OPCODE_POW;
-      fs_reg &op0 = is_int_div ? src1 : src0;
-      fs_reg &op1 = is_int_div ? src0 : src1;
-
-      emit(MOV(fs_reg(MRF, base_mrf + 1, op1.type, dispatch_width), op1));
-      inst = emit(opcode, dst, op0, reg_null_f);
-
-      inst->base_mrf = base_mrf;
-      inst->mlen = 2 * dispatch_width / 8;
-   }
-   return inst;
-}
-
-void
-fs_visitor::emit_discard_jump()
-{
-   assert(((brw_wm_prog_data*) this->prog_data)->uses_kill);
-
-   /* For performance, after a discard, jump to the end of the
-    * shader if all relevant channels have been discarded.
-    */
-   fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP);
-   discard_jump->flag_subreg = 1;
-
-   discard_jump->predicate = (dispatch_width == 8)
-                             ? BRW_PREDICATE_ALIGN1_ANY8H
-                             : BRW_PREDICATE_ALIGN1_ANY16H;
-   discard_jump->predicate_inverse = true;
-}
-
-void
-fs_visitor::assign_curb_setup()
+void
+fs_visitor::assign_curb_setup()
 {
    if (dispatch_width == 8) {
       prog_data->dispatch_grf_start_reg = payload.num_regs;
@@ -1768,6 +1381,7 @@
 						  constant_nr / 8,
 						  constant_nr % 8);
 
+            assert(inst->src[i].stride == 0);
 	    inst->src[i].file = HW_REG;
 	    inst->src[i].fixed_hw_reg = byte_offset(
                retype(brw_reg, inst->src[i].type),
@@ -2313,26 +1927,25 @@
 	    continue;
 
          /* Set up the annotation tracking for new generated instructions. */
-         base_ir = inst->ir;
-         current_annotation = inst->annotation;
-
+         const fs_builder ibld(this, block, inst);
          fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start);
          fs_reg dst = vgrf(glsl_type::float_type);
 
+         assert(inst->src[i].stride == 0);
+
          /* Generate a pull load into dst. */
          if (inst->src[i].reladdr) {
-            exec_list list = VARYING_PULL_CONSTANT_LOAD(dst,
-                                                        surf_index,
-                                                        *inst->src[i].reladdr,
-                                                        pull_index);
-            inst->insert_before(block, &list);
+            VARYING_PULL_CONSTANT_LOAD(ibld, dst,
+                                       surf_index,
+                                       *inst->src[i].reladdr,
+                                       pull_index);
             inst->src[i].reladdr = NULL;
+            inst->src[i].stride = 1;
          } else {
+            const fs_builder ubld = ibld.exec_all().group(8, 0);
             fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
-            fs_inst *pull =
-               new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8,
-                                    dst, surf_index, offset);
-            inst->insert_before(block, pull);
+            ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+                      dst, surf_index, offset);
             inst->src[i].set_smear(pull_index & 3);
          }
 
@@ -2340,7 +1953,6 @@
          inst->src[i].file = GRF;
          inst->src[i].reg = dst.reg;
          inst->src[i].reg_offset = 0;
-         inst->src[i].width = dispatch_width;
       }
    }
    invalidate_live_intervals();
@@ -2608,11 +2220,11 @@
        *     "Parameter 0 is required except for the sampleinfo message, which
        *      has no parameter 0"
        */
-      while (inst->mlen > inst->header_size + dispatch_width / 8 &&
+      while (inst->mlen > inst->header_size + inst->exec_size / 8 &&
              load_payload->src[(inst->mlen - inst->header_size) /
-                               (dispatch_width / 8) +
+                               (inst->exec_size / 8) +
                                inst->header_size - 1].is_zero()) {
-         inst->mlen -= dispatch_width / 8;
+         inst->mlen -= inst->exec_size / 8;
          progress = true;
       }
    }
@@ -2649,7 +2261,8 @@
       return false;
 
    /* Look for a texturing instruction immediately before the final FB_WRITE. */
-   fs_inst *fb_write = (fs_inst *) cfg->blocks[cfg->num_blocks - 1]->end();
+   bblock_t *block = cfg->blocks[cfg->num_blocks - 1];
+   fs_inst *fb_write = (fs_inst *)block->end();
    assert(fb_write->eot);
    assert(fb_write->opcode == FS_OPCODE_FB_WRITE);
 
@@ -2680,9 +2293,11 @@
    assert(!tex_inst->eot); /* We can't get here twice */
    assert((tex_inst->offset & (0xff << 24)) == 0);
 
+   const fs_builder ibld(this, block, tex_inst);
+
    tex_inst->offset |= fb_write->target << 24;
    tex_inst->eot = true;
-   tex_inst->dst = reg_null_ud;
+   tex_inst->dst = ibld.null_reg_ud();
    fb_write->remove(cfg->blocks[cfg->num_blocks - 1]);
 
    /* If a header is present, marking the eot is sufficient. Otherwise, we need
@@ -2694,7 +2309,8 @@
    if (tex_inst->header_size != 0)
       return true;
 
-   fs_reg send_header = vgrf(load_payload->sources + 1);
+   fs_reg send_header = ibld.vgrf(BRW_REGISTER_TYPE_F,
+                                  load_payload->sources + 1);
    fs_reg *new_sources =
       ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1);
 
@@ -2756,12 +2372,12 @@
 
       if (depth == 0 &&
           inst->dst.file == GRF &&
-          alloc.sizes[inst->dst.reg] == inst->dst.width / 8 &&
+          alloc.sizes[inst->dst.reg] == inst->exec_size / 8 &&
           !inst->is_partial_write()) {
          if (remap[dst] == -1) {
             remap[dst] = dst;
          } else {
-            remap[dst] = alloc.allocate(inst->dst.width / 8);
+            remap[dst] = alloc.allocate(inst->exec_size / 8);
             inst->dst.reg = remap[dst];
             progress = true;
          }
@@ -2892,7 +2508,7 @@
             /* Things returning more than one register would need us to
              * understand coalescing out more than one MOV at a time.
              */
-            if (scan_inst->regs_written > scan_inst->dst.width / 8)
+            if (scan_inst->regs_written > scan_inst->exec_size / 8)
                break;
 
 	    /* SEND instructions can't have MRF as a destination. */
@@ -3045,13 +2661,12 @@
    int base_mrf = 1;
    int color_mrf = base_mrf + 2;
 
-   fs_inst *mov = emit(MOV(vec4(brw_message_reg(color_mrf)),
-                           fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)));
-   mov->force_writemask_all = true;
+   fs_inst *mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
+                                     fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
 
    fs_inst *write;
    if (key->nr_color_regions == 1) {
-      write = emit(FS_OPCODE_REP_FB_WRITE);
+      write = bld.emit(FS_OPCODE_REP_FB_WRITE);
       write->saturate = key->clamp_fragment_color;
       write->base_mrf = color_mrf;
       write->target = 0;
@@ -3060,7 +2675,7 @@
    } else {
       assume(key->nr_color_regions > 0);
       for (int i = 0; i < key->nr_color_regions; ++i) {
-         write = emit(FS_OPCODE_REP_FB_WRITE);
+         write = bld.emit(FS_OPCODE_REP_FB_WRITE);
          write->saturate = key->clamp_fragment_color;
          write->base_mrf = base_mrf;
          write->target = i;
@@ -3214,9 +2829,9 @@
        */
       if (block->start() == scan_inst) {
          for (int i = 0; i < write_len; i++) {
-            if (needs_dep[i]) {
-               inst->insert_before(block, DEP_RESOLVE_MOV(first_write_grf + i));
-            }
+            if (needs_dep[i])
+               DEP_RESOLVE_MOV(fs_builder(this, block, inst),
+                               first_write_grf + i);
          }
          return;
       }
@@ -3232,7 +2847,7 @@
             if (reg >= first_write_grf &&
                 reg < first_write_grf + write_len &&
                 needs_dep[reg - first_write_grf]) {
-               inst->insert_before(block, DEP_RESOLVE_MOV(reg));
+               DEP_RESOLVE_MOV(fs_builder(this, block, inst), reg);
                needs_dep[reg - first_write_grf] = false;
                if (scan_inst->exec_size == 16)
                   needs_dep[reg - first_write_grf + 1] = false;
@@ -3279,8 +2894,8 @@
       if (block->end() == scan_inst) {
          for (int i = 0; i < write_len; i++) {
             if (needs_dep[i])
-               scan_inst->insert_before(block,
-                                        DEP_RESOLVE_MOV(first_write_grf + i));
+               DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),
+                               first_write_grf + i);
          }
          return;
       }
@@ -3295,7 +2910,8 @@
           scan_inst->dst.reg >= first_write_grf &&
           scan_inst->dst.reg < first_write_grf + write_len &&
           needs_dep[scan_inst->dst.reg - first_write_grf]) {
-         scan_inst->insert_before(block, DEP_RESOLVE_MOV(scan_inst->dst.reg));
+         DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),
+                         scan_inst->dst.reg);
          needs_dep[scan_inst->dst.reg - first_write_grf] = false;
       }
 
@@ -3365,14 +2981,18 @@
          assert(const_offset_reg.file == IMM &&
                 const_offset_reg.type == BRW_REGISTER_TYPE_UD);
          const_offset_reg.fixed_hw_reg.dw1.ud /= 4;
-         fs_reg payload = fs_reg(GRF, alloc.allocate(1));
 
-         /* We have to use a message header on Skylake to get SIMD4x2 mode.
-          * Reserve space for the register.
-          */
+         fs_reg payload, offset;
          if (devinfo->gen >= 9) {
-            payload.reg_offset++;
-            alloc.sizes[payload.reg] = 2;
+            /* We have to use a message header on Skylake to get SIMD4x2
+             * mode.  Reserve space for the register.
+            */
+            offset = payload = fs_reg(GRF, alloc.allocate(2));
+            offset.reg_offset++;
+            inst->mlen = 2;
+         } else {
+            offset = payload = fs_reg(GRF, alloc.allocate(1));
+            inst->mlen = 1;
          }
 
          /* This is actually going to be a MOV, but since only the first dword
@@ -3381,7 +3001,7 @@
           * by live variable analysis, or register allocation will explode.
           */
          fs_inst *setup = new(mem_ctx) fs_inst(FS_OPCODE_SET_SIMD4X2_OFFSET,
-                                               8, payload, const_offset_reg);
+                                               8, offset, const_offset_reg);
          setup->force_writemask_all = true;
 
          setup->ir = inst->ir;
@@ -3394,6 +3014,7 @@
           */
          inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7;
          inst->src[1] = payload;
+         inst->base_mrf = -1;
 
          invalidate_live_intervals();
       } else {
@@ -3419,27 +3040,24 @@
 
       assert(inst->dst.file == MRF || inst->dst.file == GRF);
       assert(inst->saturate == false);
-
       fs_reg dst = inst->dst;
 
       /* Get rid of COMPR4.  We'll add it back in if we need it */
       if (dst.file == MRF)
          dst.reg = dst.reg & ~BRW_MRF_COMPR4;
 
-      dst.width = 8;
+      const fs_builder ibld(this, block, inst);
+      const fs_builder hbld = ibld.exec_all().group(8, 0);
+
       for (uint8_t i = 0; i < inst->header_size; i++) {
          if (inst->src[i].file != BAD_FILE) {
             fs_reg mov_dst = retype(dst, BRW_REGISTER_TYPE_UD);
             fs_reg mov_src = retype(inst->src[i], BRW_REGISTER_TYPE_UD);
-            mov_src.width = 8;
-            fs_inst *mov = MOV(mov_dst, mov_src);
-            mov->force_writemask_all = true;
-            inst->insert_before(block, mov);
+            hbld.MOV(mov_dst, mov_src);
          }
-         dst = offset(dst, 1);
+         dst = offset(dst, hbld, 1);
       }
 
-      dst.width = inst->exec_size;
       if (inst->dst.file == MRF && (inst->dst.reg & BRW_MRF_COMPR4) &&
           inst->exec_size > 8) {
          /* In this case, the payload portion of the LOAD_PAYLOAD isn't
@@ -3465,23 +3083,13 @@
                if (devinfo->has_compr4) {
                   fs_reg compr4_dst = retype(dst, inst->src[i].type);
                   compr4_dst.reg |= BRW_MRF_COMPR4;
-
-                  fs_inst *mov = MOV(compr4_dst, inst->src[i]);
-                  mov->force_writemask_all = inst->force_writemask_all;
-                  inst->insert_before(block, mov);
+                  ibld.MOV(compr4_dst, inst->src[i]);
                } else {
                   /* Platform doesn't have COMPR4.  We have to fake it */
                   fs_reg mov_dst = retype(dst, inst->src[i].type);
-                  mov_dst.width = 8;
-
-                  fs_inst *mov = MOV(mov_dst, half(inst->src[i], 0));
-                  mov->force_writemask_all = inst->force_writemask_all;
-                  inst->insert_before(block, mov);
-
-                  mov = MOV(offset(mov_dst, 4), half(inst->src[i], 1));
-                  mov->force_writemask_all = inst->force_writemask_all;
-                  mov->force_sechalf = true;
-                  inst->insert_before(block, mov);
+                  ibld.half(0).MOV(mov_dst, half(inst->src[i], 0));
+                  mov_dst.reg += 4;
+                  ibld.half(1).MOV(mov_dst, half(inst->src[i], 1));
                }
             }
 
@@ -3504,14 +3112,9 @@
       }
 
       for (uint8_t i = inst->header_size; i < inst->sources; i++) {
-         if (inst->src[i].file != BAD_FILE) {
-            fs_inst *mov = MOV(retype(dst, inst->src[i].type),
-                               inst->src[i]);
-            mov->force_writemask_all = inst->force_writemask_all;
-            mov->force_sechalf = inst->force_sechalf;
-            inst->insert_before(block, mov);
-         }
-         dst = offset(dst, 1);
+         if (inst->src[i].file != BAD_FILE)
+            ibld.MOV(retype(dst, inst->src[i].type), inst->src[i]);
+         dst = offset(dst, ibld, 1);
       }
 
       inst->remove(block);
@@ -3529,160 +3132,989 @@
 {
    bool progress = false;
 
-   /* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit operation
-    * directly, but Cherryview cannot.
-    */
-   if (devinfo->gen >= 8 && !devinfo->is_cherryview)
-      return false;
-
    foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
-      if (inst->opcode != BRW_OPCODE_MUL ||
-          inst->dst.is_accumulator() ||
-          (inst->dst.type != BRW_REGISTER_TYPE_D &&
-           inst->dst.type != BRW_REGISTER_TYPE_UD))
-         continue;
+      const fs_builder ibld(this, block, inst);
 
-#define insert(instr) inst->insert_before(block, instr)
+      if (inst->opcode == BRW_OPCODE_MUL) {
+         if (inst->dst.is_accumulator() ||
+             (inst->dst.type != BRW_REGISTER_TYPE_D &&
+              inst->dst.type != BRW_REGISTER_TYPE_UD))
+            continue;
 
-      /* The MUL instruction isn't commutative. On Gen <= 6, only the low
-       * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
-       * src1 are used.
-       *
-       * If multiplying by an immediate value that fits in 16-bits, do a
-       * single MUL instruction with that value in the proper location.
-       */
-      if (inst->src[1].file == IMM &&
-          inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) {
-         if (devinfo->gen < 7) {
-            fs_reg imm(GRF, alloc.allocate(dispatch_width / 8),
-                       inst->dst.type, dispatch_width);
-            insert(MOV(imm, inst->src[1]));
-            insert(MUL(inst->dst, imm, inst->src[0]));
-         } else {
-            insert(MUL(inst->dst, inst->src[0], inst->src[1]));
-         }
-      } else {
-         /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot
-          * do 32-bit integer multiplication in one instruction, but instead
-          * must do a sequence (which actually calculates a 64-bit result):
-          *
-          *    mul(8)  acc0<1>D   g3<8,8,1>D      g4<8,8,1>D
-          *    mach(8) null       g3<8,8,1>D      g4<8,8,1>D
-          *    mov(8)  g2<1>D     acc0<8,8,1>D
-          *
-          * But on Gen > 6, the ability to use second accumulator register
-          * (acc1) for non-float data types was removed, preventing a simple
-          * implementation in SIMD16. A 16-channel result can be calculated by
-          * executing the three instructions twice in SIMD8, once with quarter
-          * control of 1Q for the first eight channels and again with 2Q for
-          * the second eight channels.
-          *
-          * Which accumulator register is implicitly accessed (by AccWrEnable
-          * for instance) is determined by the quarter control. Unfortunately
-          * Ivybridge (and presumably Baytrail) has a hardware bug in which an
-          * implicit accumulator access by an instruction with 2Q will access
-          * acc1 regardless of whether the data type is usable in acc1.
-          *
-          * Specifically, the 2Q mach(8) writes acc1 which does not exist for
-          * integer data types.
-          *
-          * Since we only want the low 32-bits of the result, we can do two
-          * 32-bit x 16-bit multiplies (like the mul and mach are doing), and
-          * adjust the high result and add them (like the mach is doing):
-          *
-          *    mul(8)  g7<1>D     g3<8,8,1>D      g4.0<8,8,1>UW
-          *    mul(8)  g8<1>D     g3<8,8,1>D      g4.1<8,8,1>UW
-          *    shl(8)  g9<1>D     g8<8,8,1>D      16D
-          *    add(8)  g2<1>D     g7<8,8,1>D      g8<8,8,1>D
-          *
-          * We avoid the shl instruction by realizing that we only want to add
-          * the low 16-bits of the "high" result to the high 16-bits of the
-          * "low" result and using proper regioning on the add:
-          *
-          *    mul(8)  g7<1>D     g3<8,8,1>D      g4.0<16,8,2>UW
-          *    mul(8)  g8<1>D     g3<8,8,1>D      g4.1<16,8,2>UW
-          *    add(8)  g7.1<2>UW  g7.1<16,8,2>UW  g8<16,8,2>UW
-          *
-          * Since it does not use the (single) accumulator register, we can
-          * schedule multi-component multiplications much better.
+         /* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit
+          * operation directly, but CHV/BXT cannot.
           */
+         if (devinfo->gen >= 8 &&
+             !devinfo->is_cherryview && !devinfo->is_broxton)
+            continue;
+
+         if (inst->src[1].file == IMM &&
+             inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) {
+            /* The MUL instruction isn't commutative. On Gen <= 6, only the low
+             * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
+             * src1 are used.
+             *
+             * If multiplying by an immediate value that fits in 16-bits, do a
+             * single MUL instruction with that value in the proper location.
+             */
+            if (devinfo->gen < 7) {
+               fs_reg imm(GRF, alloc.allocate(dispatch_width / 8),
+                          inst->dst.type);
+               ibld.MOV(imm, inst->src[1]);
+               ibld.MUL(inst->dst, imm, inst->src[0]);
+            } else {
+               ibld.MUL(inst->dst, inst->src[0], inst->src[1]);
+            }
+         } else {
+            /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot
+             * do 32-bit integer multiplication in one instruction, but instead
+             * must do a sequence (which actually calculates a 64-bit result):
+             *
+             *    mul(8)  acc0<1>D   g3<8,8,1>D      g4<8,8,1>D
+             *    mach(8) null       g3<8,8,1>D      g4<8,8,1>D
+             *    mov(8)  g2<1>D     acc0<8,8,1>D
+             *
+             * But on Gen > 6, the ability to use second accumulator register
+             * (acc1) for non-float data types was removed, preventing a simple
+             * implementation in SIMD16. A 16-channel result can be calculated by
+             * executing the three instructions twice in SIMD8, once with quarter
+             * control of 1Q for the first eight channels and again with 2Q for
+             * the second eight channels.
+             *
+             * Which accumulator register is implicitly accessed (by AccWrEnable
+             * for instance) is determined by the quarter control. Unfortunately
+             * Ivybridge (and presumably Baytrail) has a hardware bug in which an
+             * implicit accumulator access by an instruction with 2Q will access
+             * acc1 regardless of whether the data type is usable in acc1.
+             *
+             * Specifically, the 2Q mach(8) writes acc1 which does not exist for
+             * integer data types.
+             *
+             * Since we only want the low 32-bits of the result, we can do two
+             * 32-bit x 16-bit multiplies (like the mul and mach are doing), and
+             * adjust the high result and add them (like the mach is doing):
+             *
+             *    mul(8)  g7<1>D     g3<8,8,1>D      g4.0<8,8,1>UW
+             *    mul(8)  g8<1>D     g3<8,8,1>D      g4.1<8,8,1>UW
+             *    shl(8)  g9<1>D     g8<8,8,1>D      16D
+             *    add(8)  g2<1>D     g7<8,8,1>D      g8<8,8,1>D
+             *
+             * We avoid the shl instruction by realizing that we only want to add
+             * the low 16-bits of the "high" result to the high 16-bits of the
+             * "low" result and using proper regioning on the add:
+             *
+             *    mul(8)  g7<1>D     g3<8,8,1>D      g4.0<16,8,2>UW
+             *    mul(8)  g8<1>D     g3<8,8,1>D      g4.1<16,8,2>UW
+             *    add(8)  g7.1<2>UW  g7.1<16,8,2>UW  g8<16,8,2>UW
+             *
+             * Since it does not use the (single) accumulator register, we can
+             * schedule multi-component multiplications much better.
+             */
+
+            fs_reg orig_dst = inst->dst;
+            if (orig_dst.is_null() || orig_dst.file == MRF) {
+               inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
+                                  inst->dst.type);
+            }
+            fs_reg low = inst->dst;
+            fs_reg high(GRF, alloc.allocate(dispatch_width / 8),
+                        inst->dst.type);
+
+            if (devinfo->gen >= 7) {
+               fs_reg src1_0_w = inst->src[1];
+               fs_reg src1_1_w = inst->src[1];
+
+               if (inst->src[1].file == IMM) {
+                  src1_0_w.fixed_hw_reg.dw1.ud &= 0xffff;
+                  src1_1_w.fixed_hw_reg.dw1.ud >>= 16;
+               } else {
+                  src1_0_w.type = BRW_REGISTER_TYPE_UW;
+                  if (src1_0_w.stride != 0) {
+                     assert(src1_0_w.stride == 1);
+                     src1_0_w.stride = 2;
+                  }
 
-         if (inst->conditional_mod && inst->dst.is_null()) {
-            inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
-                               inst->dst.type, dispatch_width);
-         }
-         fs_reg low = inst->dst;
-         fs_reg high(GRF, alloc.allocate(dispatch_width / 8),
-                     inst->dst.type, dispatch_width);
-
-         if (brw->gen >= 7) {
-            fs_reg src1_0_w = inst->src[1];
-            fs_reg src1_1_w = inst->src[1];
-
-            if (inst->src[1].file == IMM) {
-               src1_0_w.fixed_hw_reg.dw1.ud &= 0xffff;
-               src1_1_w.fixed_hw_reg.dw1.ud >>= 16;
+                  src1_1_w.type = BRW_REGISTER_TYPE_UW;
+                  if (src1_1_w.stride != 0) {
+                     assert(src1_1_w.stride == 1);
+                     src1_1_w.stride = 2;
+                  }
+                  src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
+               }
+               ibld.MUL(low, inst->src[0], src1_0_w);
+               ibld.MUL(high, inst->src[0], src1_1_w);
             } else {
-               src1_0_w.type = BRW_REGISTER_TYPE_UW;
-               if (src1_0_w.stride != 0) {
-                  assert(src1_0_w.stride == 1);
-                  src1_0_w.stride = 2;
+               fs_reg src0_0_w = inst->src[0];
+               fs_reg src0_1_w = inst->src[0];
+
+               src0_0_w.type = BRW_REGISTER_TYPE_UW;
+               if (src0_0_w.stride != 0) {
+                  assert(src0_0_w.stride == 1);
+                  src0_0_w.stride = 2;
                }
 
-               src1_1_w.type = BRW_REGISTER_TYPE_UW;
-               if (src1_1_w.stride != 0) {
-                  assert(src1_1_w.stride == 1);
-                  src1_1_w.stride = 2;
+               src0_1_w.type = BRW_REGISTER_TYPE_UW;
+               if (src0_1_w.stride != 0) {
+                  assert(src0_1_w.stride == 1);
+                  src0_1_w.stride = 2;
                }
-               src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
-            }
-            insert(MUL(low, inst->src[0], src1_0_w));
-            insert(MUL(high, inst->src[0], src1_1_w));
-         } else {
-            fs_reg src0_0_w = inst->src[0];
-            fs_reg src0_1_w = inst->src[0];
+               src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
 
-            src0_0_w.type = BRW_REGISTER_TYPE_UW;
-            if (src0_0_w.stride != 0) {
-               assert(src0_0_w.stride == 1);
-               src0_0_w.stride = 2;
+               ibld.MUL(low, src0_0_w, inst->src[1]);
+               ibld.MUL(high, src0_1_w, inst->src[1]);
             }
 
-            src0_1_w.type = BRW_REGISTER_TYPE_UW;
-            if (src0_1_w.stride != 0) {
-               assert(src0_1_w.stride == 1);
-               src0_1_w.stride = 2;
+            fs_reg dst = inst->dst;
+            dst.type = BRW_REGISTER_TYPE_UW;
+            dst.subreg_offset = 2;
+            dst.stride = 2;
+
+            high.type = BRW_REGISTER_TYPE_UW;
+            high.stride = 2;
+
+            low.type = BRW_REGISTER_TYPE_UW;
+            low.subreg_offset = 2;
+            low.stride = 2;
+
+            ibld.ADD(dst, low, high);
+
+            if (inst->conditional_mod || orig_dst.file == MRF) {
+               set_condmod(inst->conditional_mod,
+                           ibld.MOV(orig_dst, inst->dst));
             }
-            src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
+         }
 
-            insert(MUL(low, src0_0_w, inst->src[1]));
-            insert(MUL(high, src0_1_w, inst->src[1]));
+      } else if (inst->opcode == SHADER_OPCODE_MULH) {
+         /* Should have been lowered to 8-wide. */
+         assert(inst->exec_size <= 8);
+         const fs_reg acc = retype(brw_acc_reg(inst->exec_size),
+                                   inst->dst.type);
+         fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]);
+         fs_inst *mach = ibld.MACH(inst->dst, inst->src[0], inst->src[1]);
+
+         if (devinfo->gen >= 8) {
+            /* Until Gen8, integer multiplies read 32-bits from one source,
+             * and 16-bits from the other, and relying on the MACH instruction
+             * to generate the high bits of the result.
+             *
+             * On Gen8, the multiply instruction does a full 32x32-bit
+             * multiply, but in order to do a 64-bit multiply we can simulate
+             * the previous behavior and then use a MACH instruction.
+             *
+             * FINISHME: Don't use source modifiers on src1.
+             */
+            assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
+                   mul->src[1].type == BRW_REGISTER_TYPE_UD);
+            mul->src[1].type = (type_is_signed(mul->src[1].type) ?
+                                BRW_REGISTER_TYPE_W : BRW_REGISTER_TYPE_UW);
+            mul->src[1].stride *= 2;
+
+         } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
+                    inst->force_sechalf) {
+            /* Among other things the quarter control bits influence which
+             * accumulator register is used by the hardware for instructions
+             * that access the accumulator implicitly (e.g. MACH).  A
+             * second-half instruction would normally map to acc1, which
+             * doesn't exist on Gen7 and up (the hardware does emulate it for
+             * floating-point instructions *only* by taking advantage of the
+             * extra precision of acc0 not normally used for floating point
+             * arithmetic).
+             *
+             * HSW and up are careful enough not to try to access an
+             * accumulator register that doesn't exist, but on earlier Gen7
+             * hardware we need to make sure that the quarter control bits are
+             * zero to avoid non-deterministic behaviour and emit an extra MOV
+             * to get the result masked correctly according to the current
+             * channel enables.
+             */
+            mach->force_sechalf = false;
+            mach->force_writemask_all = true;
+            mach->dst = ibld.vgrf(inst->dst.type);
+            ibld.MOV(inst->dst, mach->dst);
          }
+      } else {
+         continue;
+      }
+
+      inst->remove(block);
+      progress = true;
+   }
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}
+
+static void
+setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
+                    fs_reg *dst, fs_reg color, unsigned components)
+{
+   if (key->clamp_fragment_color) {
+      fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
+      assert(color.type == BRW_REGISTER_TYPE_F);
+
+      for (unsigned i = 0; i < components; i++)
+         set_saturate(true,
+                      bld.MOV(offset(tmp, bld, i), offset(color, bld, i)));
+
+      color = tmp;
+   }
+
+   for (unsigned i = 0; i < components; i++)
+      dst[i] = offset(color, bld, i);
+}
+
+static void
+lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
+                            const brw_wm_prog_data *prog_data,
+                            const brw_wm_prog_key *key,
+                            const fs_visitor::thread_payload &payload)
+{
+   assert(inst->src[6].file == IMM);
+   const brw_device_info *devinfo = bld.shader->devinfo;
+   const fs_reg &color0 = inst->src[0];
+   const fs_reg &color1 = inst->src[1];
+   const fs_reg &src0_alpha = inst->src[2];
+   const fs_reg &src_depth = inst->src[3];
+   const fs_reg &dst_depth = inst->src[4];
+   fs_reg sample_mask = inst->src[5];
+   const unsigned components = inst->src[6].fixed_hw_reg.dw1.ud;
+
+   /* We can potentially have a message length of up to 15, so we have to set
+    * base_mrf to either 0 or 1 in order to fit in m0..m15.
+    */
+   fs_reg sources[15];
+   int header_size = 2, payload_header_size;
+   unsigned length = 0;
+
+   /* From the Sandy Bridge PRM, volume 4, page 198:
+    *
+    *     "Dispatched Pixel Enables. One bit per pixel indicating
+    *      which pixels were originally enabled when the thread was
+    *      dispatched. This field is only required for the end-of-
+    *      thread message and on all dual-source messages."
+    */
+   if (devinfo->gen >= 6 &&
+       (devinfo->is_haswell || devinfo->gen >= 8 || !prog_data->uses_kill) &&
+       color1.file == BAD_FILE &&
+       key->nr_color_regions == 1) {
+      header_size = 0;
+   }
+
+   if (header_size != 0) {
+      assert(header_size == 2);
+      /* Allocate 2 registers for a header */
+      length += 2;
+   }
+
+   if (payload.aa_dest_stencil_reg) {
+      sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1));
+      bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
+         .MOV(sources[length],
+              fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)));
+      length++;
+   }
+
+   if (prog_data->uses_omask) {
+      sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1),
+                               BRW_REGISTER_TYPE_UD);
+
+      /* Hand over gl_SampleMask.  Only the lower 16 bits of each channel are
+       * relevant.  Since it's unsigned single words one vgrf is always
+       * 16-wide, but only the lower or higher 8 channels will be used by the
+       * hardware when doing a SIMD8 write depending on whether we have
+       * selected the subspans for the first or second half respectively.
+       */
+      assert(sample_mask.file != BAD_FILE && type_sz(sample_mask.type) == 4);
+      sample_mask.type = BRW_REGISTER_TYPE_UW;
+      sample_mask.stride *= 2;
+
+      bld.exec_all().annotate("FB write oMask")
+         .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW),
+                   inst->force_sechalf),
+              sample_mask);
+      length++;
+   }
+
+   payload_header_size = length;
+
+   if (src0_alpha.file != BAD_FILE) {
+      /* FIXME: This is being passed at the wrong location in the payload and
+       * doesn't work when gl_SampleMask and MRTs are used simultaneously.
+       * It's supposed to be immediately before oMask but there seems to be no
+       * reasonable way to pass them in the correct order because LOAD_PAYLOAD
+       * requires header sources to form a contiguous segment at the beginning
+       * of the message and src0_alpha has per-channel semantics.
+       */
+      setup_color_payload(bld, key, &sources[length], src0_alpha, 1);
+      length++;
+   }
+
+   setup_color_payload(bld, key, &sources[length], color0, components);
+   length += 4;
+
+   if (color1.file != BAD_FILE) {
+      setup_color_payload(bld, key, &sources[length], color1, components);
+      length += 4;
+   }
+
+   if (src_depth.file != BAD_FILE) {
+      sources[length] = src_depth;
+      length++;
+   }
 
-         fs_reg dst = inst->dst;
-         dst.type = BRW_REGISTER_TYPE_UW;
-         dst.subreg_offset = 2;
-         dst.stride = 2;
+   if (dst_depth.file != BAD_FILE) {
+      sources[length] = dst_depth;
+      length++;
+   }
 
-         high.type = BRW_REGISTER_TYPE_UW;
-         high.stride = 2;
+   fs_inst *load;
+   if (devinfo->gen >= 7) {
+      /* Send from the GRF */
+      fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F);
+      load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
+      payload.reg = bld.shader->alloc.allocate(load->regs_written);
+      load->dst = payload;
 
-         low.type = BRW_REGISTER_TYPE_UW;
-         low.subreg_offset = 2;
-         low.stride = 2;
+      inst->src[0] = payload;
+      inst->resize_sources(1);
+      inst->base_mrf = -1;
+   } else {
+      /* Send from the MRF */
+      load = bld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F),
+                              sources, length, payload_header_size);
+
+      /* On pre-SNB, we have to interlace the color values.  LOAD_PAYLOAD
+       * will do this for us if we just give it a COMPR4 destination.
+       */
+      if (devinfo->gen < 6 && bld.dispatch_width() == 16)
+         load->dst.reg |= BRW_MRF_COMPR4;
+
+      inst->resize_sources(0);
+      inst->base_mrf = 1;
+   }
+
+   inst->opcode = FS_OPCODE_FB_WRITE;
+   inst->mlen = load->regs_written;
+   inst->header_size = header_size;
+}
+
+static void
+lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op,
+                                const fs_reg &coordinate,
+                                const fs_reg &shadow_c,
+                                const fs_reg &lod, const fs_reg &lod2,
+                                const fs_reg &sampler,
+                                unsigned coord_components,
+                                unsigned grad_components)
+{
+   const bool has_lod = (op == SHADER_OPCODE_TXL || op == FS_OPCODE_TXB ||
+                         op == SHADER_OPCODE_TXF || op == SHADER_OPCODE_TXS);
+   fs_reg msg_begin(MRF, 1, BRW_REGISTER_TYPE_F);
+   fs_reg msg_end = msg_begin;
+
+   /* g0 header. */
+   msg_end = offset(msg_end, bld.group(8, 0), 1);
+
+   for (unsigned i = 0; i < coord_components; i++)
+      bld.MOV(retype(offset(msg_end, bld, i), coordinate.type),
+              offset(coordinate, bld, i));
+
+   msg_end = offset(msg_end, bld, coord_components);
+
+   /* Messages other than SAMPLE and RESINFO in SIMD16 and TXD in SIMD8
+    * require all three components to be present and zero if they are unused.
+    */
+   if (coord_components > 0 &&
+       (has_lod || shadow_c.file != BAD_FILE ||
+        (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8))) {
+      for (unsigned i = coord_components; i < 3; i++)
+         bld.MOV(offset(msg_end, bld, i), fs_reg(0.0f));
+
+      msg_end = offset(msg_end, bld, 3 - coord_components);
+   }
+
+   if (op == SHADER_OPCODE_TXD) {
+      /* TXD unsupported in SIMD16 mode. */
+      assert(bld.dispatch_width() == 8);
+
+      /* the slots for u and v are always present, but r is optional */
+      if (coord_components < 2)
+         msg_end = offset(msg_end, bld, 2 - coord_components);
+
+      /*  P   = u, v, r
+       * dPdx = dudx, dvdx, drdx
+       * dPdy = dudy, dvdy, drdy
+       *
+       * 1-arg: Does not exist.
+       *
+       * 2-arg: dudx   dvdx   dudy   dvdy
+       *        dPdx.x dPdx.y dPdy.x dPdy.y
+       *        m4     m5     m6     m7
+       *
+       * 3-arg: dudx   dvdx   drdx   dudy   dvdy   drdy
+       *        dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
+       *        m5     m6     m7     m8     m9     m10
+       */
+      for (unsigned i = 0; i < grad_components; i++)
+         bld.MOV(offset(msg_end, bld, i), offset(lod, bld, i));
+
+      msg_end = offset(msg_end, bld, MAX2(grad_components, 2));
+
+      for (unsigned i = 0; i < grad_components; i++)
+         bld.MOV(offset(msg_end, bld, i), offset(lod2, bld, i));
+
+      msg_end = offset(msg_end, bld, MAX2(grad_components, 2));
+   }
+
+   if (has_lod) {
+      /* Bias/LOD with shadow comparitor is unsupported in SIMD16 -- *Without*
+       * shadow comparitor (including RESINFO) it's unsupported in SIMD8 mode.
+       */
+      assert(shadow_c.file != BAD_FILE ? bld.dispatch_width() == 8 :
+             bld.dispatch_width() == 16);
+
+      const brw_reg_type type =
+         (op == SHADER_OPCODE_TXF || op == SHADER_OPCODE_TXS ?
+          BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);
+      bld.MOV(retype(msg_end, type), lod);
+      msg_end = offset(msg_end, bld, 1);
+   }
+
+   if (shadow_c.file != BAD_FILE) {
+      if (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8) {
+         /* There's no plain shadow compare message, so we use shadow
+          * compare with a bias of 0.0.
+          */
+         bld.MOV(msg_end, fs_reg(0.0f));
+         msg_end = offset(msg_end, bld, 1);
+      }
+
+      bld.MOV(msg_end, shadow_c);
+      msg_end = offset(msg_end, bld, 1);
+   }
 
-         insert(ADD(dst, low, high));
+   inst->opcode = op;
+   inst->src[0] = reg_undef;
+   inst->src[1] = sampler;
+   inst->resize_sources(2);
+   inst->base_mrf = msg_begin.reg;
+   inst->mlen = msg_end.reg - msg_begin.reg;
+   inst->header_size = 1;
+}
+
+static void
+lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
+                                fs_reg coordinate,
+                                const fs_reg &shadow_c,
+                                fs_reg lod, fs_reg lod2,
+                                const fs_reg &sample_index,
+                                const fs_reg &sampler,
+                                const fs_reg &offset_value,
+                                unsigned coord_components,
+                                unsigned grad_components)
+{
+   fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F);
+   fs_reg msg_coords = message;
+   unsigned header_size = 0;
+
+   if (offset_value.file != BAD_FILE) {
+      /* The offsets set up by the visitor are in the m1 header, so we can't
+       * go headerless.
+       */
+      header_size = 1;
+      message.reg--;
+   }
+
+   for (unsigned i = 0; i < coord_components; i++) {
+      bld.MOV(retype(offset(msg_coords, bld, i), coordinate.type), coordinate);
+      coordinate = offset(coordinate, bld, 1);
+   }
+   fs_reg msg_end = offset(msg_coords, bld, coord_components);
+   fs_reg msg_lod = offset(msg_coords, bld, 4);
+
+   if (shadow_c.file != BAD_FILE) {
+      fs_reg msg_shadow = msg_lod;
+      bld.MOV(msg_shadow, shadow_c);
+      msg_lod = offset(msg_shadow, bld, 1);
+      msg_end = msg_lod;
+   }
+
+   switch (op) {
+   case SHADER_OPCODE_TXL:
+   case FS_OPCODE_TXB:
+      bld.MOV(msg_lod, lod);
+      msg_end = offset(msg_lod, bld, 1);
+      break;
+   case SHADER_OPCODE_TXD:
+      /**
+       *  P   =  u,    v,    r
+       * dPdx = dudx, dvdx, drdx
+       * dPdy = dudy, dvdy, drdy
+       *
+       * Load up these values:
+       * - dudx   dudy   dvdx   dvdy   drdx   drdy
+       * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z
+       */
+      msg_end = msg_lod;
+      for (unsigned i = 0; i < grad_components; i++) {
+         bld.MOV(msg_end, lod);
+         lod = offset(lod, bld, 1);
+         msg_end = offset(msg_end, bld, 1);
+
+         bld.MOV(msg_end, lod2);
+         lod2 = offset(lod2, bld, 1);
+         msg_end = offset(msg_end, bld, 1);
+      }
+      break;
+   case SHADER_OPCODE_TXS:
+      msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD);
+      bld.MOV(msg_lod, lod);
+      msg_end = offset(msg_lod, bld, 1);
+      break;
+   case SHADER_OPCODE_TXF:
+      msg_lod = offset(msg_coords, bld, 3);
+      bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod);
+      msg_end = offset(msg_lod, bld, 1);
+      break;
+   case SHADER_OPCODE_TXF_CMS:
+      msg_lod = offset(msg_coords, bld, 3);
+      /* lod */
+      bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
+      /* sample index */
+      bld.MOV(retype(offset(msg_lod, bld, 1), BRW_REGISTER_TYPE_UD), sample_index);
+      msg_end = offset(msg_lod, bld, 2);
+      break;
+   default:
+      break;
+   }
+
+   inst->opcode = op;
+   inst->src[0] = reg_undef;
+   inst->src[1] = sampler;
+   inst->resize_sources(2);
+   inst->base_mrf = message.reg;
+   inst->mlen = msg_end.reg - message.reg;
+   inst->header_size = header_size;
+
+   /* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */
+   assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE);
+}
+
+static bool
+is_high_sampler(const struct brw_device_info *devinfo, const fs_reg &sampler)
+{
+   if (devinfo->gen < 8 && !devinfo->is_haswell)
+      return false;
+
+   return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
+}
+
+static void
+lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
+                                fs_reg coordinate,
+                                const fs_reg &shadow_c,
+                                fs_reg lod, fs_reg lod2,
+                                const fs_reg &sample_index,
+                                const fs_reg &mcs, const fs_reg &sampler,
+                                fs_reg offset_value,
+                                unsigned coord_components,
+                                unsigned grad_components)
+{
+   const brw_device_info *devinfo = bld.shader->devinfo;
+   int reg_width = bld.dispatch_width() / 8;
+   unsigned header_size = 0, length = 0;
+   fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE];
+   for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
+      sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F);
+
+   if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET ||
+       offset_value.file != BAD_FILE ||
+       is_high_sampler(devinfo, sampler)) {
+      /* For general texture offsets (no txf workaround), we need a header to
+       * put them in.  Note that we're only reserving space for it in the
+       * message payload as it will be initialized implicitly by the
+       * generator.
+       *
+       * TG4 needs to place its channel select in the header, for interaction
+       * with ARB_texture_swizzle.  The sampler index is only 4-bits, so for
+       * larger sampler numbers we need to offset the Sampler State Pointer in
+       * the header.
+       */
+      header_size = 1;
+      sources[0] = fs_reg();
+      length++;
+   }
+
+   if (shadow_c.file != BAD_FILE) {
+      bld.MOV(sources[length], shadow_c);
+      length++;
+   }
+
+   bool coordinate_done = false;
+
+   /* The sampler can only meaningfully compute LOD for fragment shader
+    * messages. For all other stages, we change the opcode to TXL and
+    * hardcode the LOD to 0.
+    */
+   if (bld.shader->stage != MESA_SHADER_FRAGMENT &&
+       op == SHADER_OPCODE_TEX) {
+      op = SHADER_OPCODE_TXL;
+      lod = fs_reg(0.0f);
+   }
+
+   /* Set up the LOD info */
+   switch (op) {
+   case FS_OPCODE_TXB:
+   case SHADER_OPCODE_TXL:
+      bld.MOV(sources[length], lod);
+      length++;
+      break;
+   case SHADER_OPCODE_TXD:
+      /* TXD should have been lowered in SIMD16 mode. */
+      assert(bld.dispatch_width() == 8);
+
+      /* Load dPdx and the coordinate together:
+       * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
+       */
+      for (unsigned i = 0; i < coord_components; i++) {
+         bld.MOV(sources[length], coordinate);
+         coordinate = offset(coordinate, bld, 1);
+         length++;
 
-         if (inst->conditional_mod) {
-            fs_reg null(retype(brw_null_reg(), inst->dst.type));
-            fs_inst *mov = MOV(null, inst->dst);
-            mov->conditional_mod = inst->conditional_mod;
-            insert(mov);
+         /* For cube map array, the coordinate is (u,v,r,ai) but there are
+          * only derivatives for (u, v, r).
+          */
+         if (i < grad_components) {
+            bld.MOV(sources[length], lod);
+            lod = offset(lod, bld, 1);
+            length++;
+
+            bld.MOV(sources[length], lod2);
+            lod2 = offset(lod2, bld, 1);
+            length++;
          }
       }
-#undef insert
 
-      inst->remove(block);
+      coordinate_done = true;
+      break;
+   case SHADER_OPCODE_TXS:
+      bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod);
+      length++;
+      break;
+   case SHADER_OPCODE_TXF:
+      /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r.
+       * On Gen9 they are u, v, lod, r
+       */
+      bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
+      coordinate = offset(coordinate, bld, 1);
+      length++;
+
+      if (devinfo->gen >= 9) {
+         if (coord_components >= 2) {
+            bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
+            coordinate = offset(coordinate, bld, 1);
+         }
+         length++;
+      }
+
+      bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod);
+      length++;
+
+      for (unsigned i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) {
+         bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
+         coordinate = offset(coordinate, bld, 1);
+         length++;
+      }
+
+      coordinate_done = true;
+      break;
+   case SHADER_OPCODE_TXF_CMS:
+   case SHADER_OPCODE_TXF_UMS:
+   case SHADER_OPCODE_TXF_MCS:
+      if (op == SHADER_OPCODE_TXF_UMS || op == SHADER_OPCODE_TXF_CMS) {
+         bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index);
+         length++;
+      }
+
+      if (op == SHADER_OPCODE_TXF_CMS) {
+         /* Data from the multisample control surface. */
+         bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
+         length++;
+      }
+
+      /* There is no offsetting for this message; just copy in the integer
+       * texture coordinates.
+       */
+      for (unsigned i = 0; i < coord_components; i++) {
+         bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
+         coordinate = offset(coordinate, bld, 1);
+         length++;
+      }
+
+      coordinate_done = true;
+      break;
+   case SHADER_OPCODE_TG4_OFFSET:
+      /* gather4_po_c should have been lowered in SIMD16 mode. */
+      assert(bld.dispatch_width() == 8 || shadow_c.file == BAD_FILE);
+
+      /* More crazy intermixing */
+      for (unsigned i = 0; i < 2; i++) { /* u, v */
+         bld.MOV(sources[length], coordinate);
+         coordinate = offset(coordinate, bld, 1);
+         length++;
+      }
+
+      for (unsigned i = 0; i < 2; i++) { /* offu, offv */
+         bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value);
+         offset_value = offset(offset_value, bld, 1);
+         length++;
+      }
+
+      if (coord_components == 3) { /* r if present */
+         bld.MOV(sources[length], coordinate);
+         coordinate = offset(coordinate, bld, 1);
+         length++;
+      }
+
+      coordinate_done = true;
+      break;
+   default:
+      break;
+   }
+
+   /* Set up the coordinate (except for cases where it was done above) */
+   if (!coordinate_done) {
+      for (unsigned i = 0; i < coord_components; i++) {
+         bld.MOV(sources[length], coordinate);
+         coordinate = offset(coordinate, bld, 1);
+         length++;
+      }
+   }
+
+   int mlen;
+   if (reg_width == 2)
+      mlen = length * reg_width - header_size;
+   else
+      mlen = length * reg_width;
+
+   const fs_reg src_payload = fs_reg(GRF, bld.shader->alloc.allocate(mlen),
+                                     BRW_REGISTER_TYPE_F);
+   bld.LOAD_PAYLOAD(src_payload, sources, length, header_size);
+
+   /* Generate the SEND. */
+   inst->opcode = op;
+   inst->src[0] = src_payload;
+   inst->src[1] = sampler;
+   inst->resize_sources(2);
+   inst->base_mrf = -1;
+   inst->mlen = mlen;
+   inst->header_size = header_size;
+
+   /* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */
+   assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE);
+}
+
+static void
+lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
+{
+   const brw_device_info *devinfo = bld.shader->devinfo;
+   const fs_reg &coordinate = inst->src[0];
+   const fs_reg &shadow_c = inst->src[1];
+   const fs_reg &lod = inst->src[2];
+   const fs_reg &lod2 = inst->src[3];
+   const fs_reg &sample_index = inst->src[4];
+   const fs_reg &mcs = inst->src[5];
+   const fs_reg &sampler = inst->src[6];
+   const fs_reg &offset_value = inst->src[7];
+   assert(inst->src[8].file == IMM && inst->src[9].file == IMM);
+   const unsigned coord_components = inst->src[8].fixed_hw_reg.dw1.ud;
+   const unsigned grad_components = inst->src[9].fixed_hw_reg.dw1.ud;
+
+   if (devinfo->gen >= 7) {
+      lower_sampler_logical_send_gen7(bld, inst, op, coordinate,
+                                      shadow_c, lod, lod2, sample_index,
+                                      mcs, sampler, offset_value,
+                                      coord_components, grad_components);
+   } else if (devinfo->gen >= 5) {
+      lower_sampler_logical_send_gen5(bld, inst, op, coordinate,
+                                      shadow_c, lod, lod2, sample_index,
+                                      sampler, offset_value,
+                                      coord_components, grad_components);
+   } else {
+      lower_sampler_logical_send_gen4(bld, inst, op, coordinate,
+                                      shadow_c, lod, lod2, sampler,
+                                      coord_components, grad_components);
+   }
+}
+
+/**
+ * Initialize the header present in some typed and untyped surface
+ * messages.
+ */
+static fs_reg
+emit_surface_header(const fs_builder &bld, const fs_reg &sample_mask)
+{
+   fs_builder ubld = bld.exec_all().group(8, 0);
+   const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+   ubld.MOV(dst, fs_reg(0));
+   ubld.MOV(component(dst, 7), sample_mask);
+   return dst;
+}
+
+static void
+lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
+                           const fs_reg &sample_mask)
+{
+   /* Get the logical send arguments. */
+   const fs_reg &addr = inst->src[0];
+   const fs_reg &src = inst->src[1];
+   const fs_reg &surface = inst->src[2];
+   const UNUSED fs_reg &dims = inst->src[3];
+   const fs_reg &arg = inst->src[4];
+
+   /* Calculate the total number of components of the payload. */
+   const unsigned addr_sz = inst->components_read(0);
+   const unsigned src_sz = inst->components_read(1);
+   const unsigned header_sz = (sample_mask.file == BAD_FILE ? 0 : 1);
+   const unsigned sz = header_sz + addr_sz + src_sz;
+
+   /* Allocate space for the payload. */
+   fs_reg *const components = new fs_reg[sz];
+   const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
+   unsigned n = 0;
+
+   /* Construct the payload. */
+   if (header_sz)
+      components[n++] = emit_surface_header(bld, sample_mask);
+
+   for (unsigned i = 0; i < addr_sz; i++)
+      components[n++] = offset(addr, bld, i);
+
+   for (unsigned i = 0; i < src_sz; i++)
+      components[n++] = offset(src, bld, i);
+
+   bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
+
+   /* Update the original instruction. */
+   inst->opcode = op;
+   inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
+   inst->header_size = header_sz;
+
+   inst->src[0] = payload;
+   inst->src[1] = surface;
+   inst->src[2] = arg;
+   inst->resize_sources(3);
+
+   delete[] components;
+}
+
+bool
+fs_visitor::lower_logical_sends()
+{
+   bool progress = false;
+
+   foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+      const fs_builder ibld(this, block, inst);
+
+      switch (inst->opcode) {
+      case FS_OPCODE_FB_WRITE_LOGICAL:
+         assert(stage == MESA_SHADER_FRAGMENT);
+         lower_fb_write_logical_send(ibld, inst,
+                                     (const brw_wm_prog_data *)prog_data,
+                                     (const brw_wm_prog_key *)key,
+                                     payload);
+         break;
+
+      case SHADER_OPCODE_TEX_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TEX);
+         break;
+
+      case SHADER_OPCODE_TXD_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXD);
+         break;
+
+      case SHADER_OPCODE_TXF_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF);
+         break;
+
+      case SHADER_OPCODE_TXL_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXL);
+         break;
+
+      case SHADER_OPCODE_TXS_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXS);
+         break;
+
+      case FS_OPCODE_TXB_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, FS_OPCODE_TXB);
+         break;
+
+      case SHADER_OPCODE_TXF_CMS_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS);
+         break;
+
+      case SHADER_OPCODE_TXF_UMS_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_UMS);
+         break;
+
+      case SHADER_OPCODE_TXF_MCS_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_MCS);
+         break;
+
+      case SHADER_OPCODE_LOD_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_LOD);
+         break;
+
+      case SHADER_OPCODE_TG4_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4);
+         break;
+
+      case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_OFFSET);
+         break;
+
+      case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+         lower_surface_logical_send(ibld, inst,
+                                    SHADER_OPCODE_UNTYPED_SURFACE_READ,
+                                    fs_reg(0xffff));
+         break;
+
+      case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+         lower_surface_logical_send(ibld, inst,
+                                    SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
+                                    ibld.sample_mask_reg());
+         break;
+
+      case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+         lower_surface_logical_send(ibld, inst,
+                                    SHADER_OPCODE_UNTYPED_ATOMIC,
+                                    ibld.sample_mask_reg());
+         break;
+
+      case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+         lower_surface_logical_send(ibld, inst,
+                                    SHADER_OPCODE_TYPED_SURFACE_READ,
+                                    fs_reg(0xffff));
+         break;
+
+      case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+         lower_surface_logical_send(ibld, inst,
+                                    SHADER_OPCODE_TYPED_SURFACE_WRITE,
+                                    ibld.sample_mask_reg());
+         break;
+
+      case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+         lower_surface_logical_send(ibld, inst,
+                                    SHADER_OPCODE_TYPED_ATOMIC,
+                                    ibld.sample_mask_reg());
+         break;
+
+      default:
+         continue;
+      }
+
       progress = true;
    }
 
@@ -3692,6 +4124,265 @@
    return progress;
 }
 
+/**
+ * Get the closest native SIMD width supported by the hardware for instruction
+ * \p inst.  The instruction will be left untouched by
+ * fs_visitor::lower_simd_width() if the returned value is equal to the
+ * original execution size.
+ */
+static unsigned
+get_lowered_simd_width(const struct brw_device_info *devinfo,
+                       const fs_inst *inst)
+{
+   switch (inst->opcode) {
+   case BRW_OPCODE_MOV:
+   case BRW_OPCODE_SEL:
+   case BRW_OPCODE_NOT:
+   case BRW_OPCODE_AND:
+   case BRW_OPCODE_OR:
+   case BRW_OPCODE_XOR:
+   case BRW_OPCODE_SHR:
+   case BRW_OPCODE_SHL:
+   case BRW_OPCODE_ASR:
+   case BRW_OPCODE_CMP:
+   case BRW_OPCODE_CMPN:
+   case BRW_OPCODE_CSEL:
+   case BRW_OPCODE_F32TO16:
+   case BRW_OPCODE_F16TO32:
+   case BRW_OPCODE_BFREV:
+   case BRW_OPCODE_BFE:
+   case BRW_OPCODE_BFI1:
+   case BRW_OPCODE_BFI2:
+   case BRW_OPCODE_ADD:
+   case BRW_OPCODE_MUL:
+   case BRW_OPCODE_AVG:
+   case BRW_OPCODE_FRC:
+   case BRW_OPCODE_RNDU:
+   case BRW_OPCODE_RNDD:
+   case BRW_OPCODE_RNDE:
+   case BRW_OPCODE_RNDZ:
+   case BRW_OPCODE_LZD:
+   case BRW_OPCODE_FBH:
+   case BRW_OPCODE_FBL:
+   case BRW_OPCODE_CBIT:
+   case BRW_OPCODE_SAD2:
+   case BRW_OPCODE_MAD:
+   case BRW_OPCODE_LRP:
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_POW:
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_INT_REMAINDER:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS: {
+      /* According to the PRMs:
+       *  "A. In Direct Addressing mode, a source cannot span more than 2
+       *      adjacent GRF registers.
+       *   B. A destination cannot span more than 2 adjacent GRF registers."
+       *
+       * Look for the source or destination with the largest register region
+       * which is the one that is going to limit the overal execution size of
+       * the instruction due to this rule.
+       */
+      unsigned reg_count = inst->regs_written;
+
+      for (unsigned i = 0; i < inst->sources; i++)
+         reg_count = MAX2(reg_count, (unsigned)inst->regs_read(i));
+
+      /* Calculate the maximum execution size of the instruction based on the
+       * factor by which it goes over the hardware limit of 2 GRFs.
+       */
+      return inst->exec_size / DIV_ROUND_UP(reg_count, 2);
+   }
+   case SHADER_OPCODE_MULH:
+      /* MULH is lowered to the MUL/MACH sequence using the accumulator, which
+       * is 8-wide on Gen7+.
+       */
+      return (devinfo->gen >= 7 ? 8 : inst->exec_size);
+
+   case FS_OPCODE_FB_WRITE_LOGICAL:
+      /* Gen6 doesn't support SIMD16 depth writes but we cannot handle them
+       * here.
+       */
+      assert(devinfo->gen != 6 || inst->src[3].file == BAD_FILE ||
+             inst->exec_size == 8);
+      /* Dual-source FB writes are unsupported in SIMD16 mode. */
+      return (inst->src[1].file != BAD_FILE ? 8 : inst->exec_size);
+
+   case SHADER_OPCODE_TXD_LOGICAL:
+      /* TXD is unsupported in SIMD16 mode. */
+      return 8;
+
+   case SHADER_OPCODE_TG4_OFFSET_LOGICAL: {
+      /* gather4_po_c is unsupported in SIMD16 mode. */
+      const fs_reg &shadow_c = inst->src[1];
+      return (shadow_c.file != BAD_FILE ? 8 : inst->exec_size);
+   }
+   case SHADER_OPCODE_TXL_LOGICAL:
+   case FS_OPCODE_TXB_LOGICAL: {
+      /* Gen4 doesn't have SIMD8 non-shadow-compare bias/LOD instructions, and
+       * Gen4-6 can't support TXL and TXB with shadow comparison in SIMD16
+       * mode because the message exceeds the maximum length of 11.
+       */
+      const fs_reg &shadow_c = inst->src[1];
+      if (devinfo->gen == 4 && shadow_c.file == BAD_FILE)
+         return 16;
+      else if (devinfo->gen < 7 && shadow_c.file != BAD_FILE)
+         return 8;
+      else
+         return inst->exec_size;
+   }
+   case SHADER_OPCODE_TXF_LOGICAL:
+   case SHADER_OPCODE_TXS_LOGICAL:
+      /* Gen4 doesn't have SIMD8 variants for the RESINFO and LD-with-LOD
+       * messages.  Use SIMD16 instead.
+       */
+      if (devinfo->gen == 4)
+         return 16;
+      else
+         return inst->exec_size;
+
+   case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+   case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+      return 8;
+
+   default:
+      return inst->exec_size;
+   }
+}
+
+/**
+ * The \p rows array of registers represents a \p num_rows by \p num_columns
+ * matrix in row-major order, write it in column-major order into the register
+ * passed as destination.  \p stride gives the separation between matrix
+ * elements in the input in fs_builder::dispatch_width() units.
+ */
+static void
+emit_transpose(const fs_builder &bld,
+               const fs_reg &dst, const fs_reg *rows,
+               unsigned num_rows, unsigned num_columns, unsigned stride)
+{
+   fs_reg *const components = new fs_reg[num_rows * num_columns];
+
+   for (unsigned i = 0; i < num_columns; ++i) {
+      for (unsigned j = 0; j < num_rows; ++j)
+         components[num_rows * i + j] = offset(rows[j], bld, stride * i);
+   }
+
+   bld.LOAD_PAYLOAD(dst, components, num_rows * num_columns, 0);
+
+   delete[] components;
+}
+
+bool
+fs_visitor::lower_simd_width()
+{
+   bool progress = false;
+
+   foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+      const unsigned lower_width = get_lowered_simd_width(devinfo, inst);
+
+      if (lower_width != inst->exec_size) {
+         /* Builder matching the original instruction.  We may also need to
+          * emit an instruction of width larger than the original, set the
+          * execution size of the builder to the highest of both for now so
+          * we're sure that both cases can be handled.
+          */
+         const fs_builder ibld = bld.at(block, inst)
+                                    .exec_all(inst->force_writemask_all)
+                                    .group(MAX2(inst->exec_size, lower_width),
+                                           inst->force_sechalf);
+
+         /* Split the copies in chunks of the execution width of either the
+          * original or the lowered instruction, whichever is lower.
+          */
+         const unsigned copy_width = MIN2(lower_width, inst->exec_size);
+         const unsigned n = inst->exec_size / copy_width;
+         const unsigned dst_size = inst->regs_written * REG_SIZE /
+            inst->dst.component_size(inst->exec_size);
+         fs_reg dsts[4];
+
+         assert(n > 0 && n <= ARRAY_SIZE(dsts) &&
+                !inst->writes_accumulator && !inst->mlen);
+
+         for (unsigned i = 0; i < n; i++) {
+            /* Emit a copy of the original instruction with the lowered width.
+             * If the EOT flag was set throw it away except for the last
+             * instruction to avoid killing the thread prematurely.
+             */
+            fs_inst split_inst = *inst;
+            split_inst.exec_size = lower_width;
+            split_inst.eot = inst->eot && i == n - 1;
+
+            /* Select the correct channel enables for the i-th group, then
+             * transform the sources and destination and emit the lowered
+             * instruction.
+             */
+            const fs_builder lbld = ibld.group(lower_width, i);
+
+            for (unsigned j = 0; j < inst->sources; j++) {
+               if (inst->src[j].file != BAD_FILE &&
+                   !is_uniform(inst->src[j])) {
+                  /* Get the i-th copy_width-wide chunk of the source. */
+                  const fs_reg src = horiz_offset(inst->src[j], copy_width * i);
+                  const unsigned src_size = inst->components_read(j);
+
+                  /* Use a trivial transposition to copy one every n
+                   * copy_width-wide components of the register into a
+                   * temporary passed as source to the lowered instruction.
+                   */
+                  split_inst.src[j] = lbld.vgrf(inst->src[j].type, src_size);
+                  emit_transpose(lbld.group(copy_width, 0),
+                                 split_inst.src[j], &src, 1, src_size, n);
+               }
+            }
+
+            if (inst->regs_written) {
+               /* Allocate enough space to hold the result of the lowered
+                * instruction and fix up the number of registers written.
+                */
+               split_inst.dst = dsts[i] =
+                  lbld.vgrf(inst->dst.type, dst_size);
+               split_inst.regs_written =
+                  DIV_ROUND_UP(inst->regs_written * lower_width,
+                               inst->exec_size);
+            }
+
+            lbld.emit(split_inst);
+         }
+
+         if (inst->regs_written) {
+            /* Distance between useful channels in the temporaries, skipping
+             * garbage if the lowered instruction is wider than the original.
+             */
+            const unsigned m = lower_width / copy_width;
+
+            /* Interleave the components of the result from the lowered
+             * instructions.  We need to set exec_all() when copying more than
+             * one half per component, because LOAD_PAYLOAD (in terms of which
+             * emit_transpose is implemented) can only use the same channel
+             * enable signals for all of its non-header sources.
+             */
+            emit_transpose(ibld.exec_all(inst->exec_size > copy_width)
+                               .group(copy_width, 0),
+                           inst->dst, dsts, n, dst_size, m);
+         }
+
+         inst->remove(block);
+         progress = true;
+      }
+   }
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}
+
 void
 fs_visitor::dump_instructions()
 {
@@ -3762,13 +4453,14 @@
    }
    fprintf(file, "(%d) ", inst->exec_size);
 
+   if (inst->mlen) {
+      fprintf(file, "(mlen: %d) ", inst->mlen);
+   }
 
    switch (inst->dst.file) {
    case GRF:
       fprintf(file, "vgrf%d", inst->dst.reg);
-      if (inst->dst.width != dispatch_width)
-         fprintf(file, "@%d", inst->dst.width);
-      if (alloc.sizes[inst->dst.reg] != inst->dst.width / 8 ||
+      if (alloc.sizes[inst->dst.reg] != inst->regs_written ||
           inst->dst.subreg_offset)
          fprintf(file, "+%d.%d",
                  inst->dst.reg_offset, inst->dst.subreg_offset);
@@ -3826,9 +4518,7 @@
       switch (inst->src[i].file) {
       case GRF:
          fprintf(file, "vgrf%d", inst->src[i].reg);
-         if (inst->src[i].width != dispatch_width)
-            fprintf(file, "@%d", inst->src[i].width);
-         if (alloc.sizes[inst->src[i].reg] != inst->src[i].width / 8 ||
+         if (alloc.sizes[inst->src[i].reg] != (unsigned)inst->regs_read(i) ||
              inst->src[i].subreg_offset)
             fprintf(file, "+%d.%d", inst->src[i].reg_offset,
                     inst->src[i].subreg_offset);
@@ -4055,7 +4745,7 @@
 void
 fs_visitor::setup_cs_payload()
 {
-   assert(brw->gen >= 7);
+   assert(devinfo->gen >= 7);
 
    payload.num_regs = 1;
 }
@@ -4098,12 +4788,25 @@
 void
 fs_visitor::optimize()
 {
-   split_virtual_grfs();
+   /* bld is the common builder object pointing at the end of the program we
+    * used to translate it into i965 IR.  For the optimization and lowering
+    * passes coming next, any code added after the end of the program without
+    * having explicitly called fs_builder::at() clearly points at a mistake.
+    * Ideally optimization passes wouldn't be part of the visitor so they
+    * wouldn't have access to bld at all, but they do, so just in case some
+    * pass forgets to ask for a location explicitly set it to NULL here to
+    * make it trip.  The dispatch width is initialized to a bogus value to
+    * make sure that optimizations set the execution controls explicitly to
+    * match the code they are manipulating instead of relying on the defaults.
+    */
+   bld = fs_builder(this, 64);
 
    move_uniform_array_access_to_pull_constants();
    assign_constant_locations();
    demote_pull_constants();
 
+   split_virtual_grfs();
+
 #define OPT(pass, args...) ({                                           \
       pass_num++;                                                       \
       bool this_progress = pass(args);                                  \
@@ -4113,7 +4816,7 @@
          snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass,              \
                   stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
                                                                         \
-         backend_visitor::dump_instructions(filename);                  \
+         backend_shader::dump_instructions(filename);                   \
       }                                                                 \
                                                                         \
       progress = progress || this_progress;                             \
@@ -4126,12 +4829,16 @@
                stage_abbrev, dispatch_width,
                shader_prog ? shader_prog->Name : 0);
 
-      backend_visitor::dump_instructions(filename);
+      backend_shader::dump_instructions(filename);
    }
 
-   bool progress;
+   bool progress = false;
    int iteration = 0;
    int pass_num = 0;
+
+   OPT(lower_simd_width);
+   OPT(lower_logical_sends);
+
    do {
       progress = false;
       pass_num = 0;
@@ -4227,9 +4934,11 @@
          fail("Failure to register allocate.  Reduce number of "
               "live scalar values to avoid this.");
       } else {
-         perf_debug("%s shader triggered register spilling.  "
-                    "Try reducing the number of live scalar values to "
-                    "improve performance.\n", stage_name);
+         compiler->shader_perf_log(log_data,
+                                   "%s shader triggered register spilling.  "
+                                   "Try reducing the number of live scalar "
+                                   "values to improve performance.\n",
+                                   stage_name);
       }
 
       /* Since we're out of heuristics, just go spill registers until we
@@ -4258,33 +4967,26 @@
 }
 
 bool
-fs_visitor::run_vs()
+fs_visitor::run_vs(gl_clip_plane *clip_planes)
 {
    assert(stage == MESA_SHADER_VERTEX);
 
    assign_common_binding_table_offsets(0);
    setup_vs_payload();
 
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+   if (shader_time_index >= 0)
       emit_shader_time_begin();
 
-   if (brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) {
-      emit_nir_code();
-   } else {
-      foreach_in_list(ir_instruction, ir, shader->base.ir) {
-         base_ir = ir;
-         this->result = reg_undef;
-         ir->accept(this);
-      }
-      base_ir = NULL;
-   }
+   emit_nir_code();
 
    if (failed)
       return false;
 
+   compute_clip_distance(clip_planes);
+
    emit_urb_writes();
 
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+   if (shader_time_index >= 0)
       emit_shader_time_end();
 
    calculate_cfg();
@@ -4301,7 +5003,7 @@
 }
 
 bool
-fs_visitor::run_fs()
+fs_visitor::run_fs(bool do_rep_send)
 {
    brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data;
    brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key;
@@ -4319,10 +5021,11 @@
 
    if (0) {
       emit_dummy_fs();
-   } else if (brw->use_rep_send && dispatch_width == 16) {
+   } else if (do_rep_send) {
+      assert(dispatch_width == 16);
       emit_repclear_shader();
    } else {
-      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      if (shader_time_index >= 0)
          emit_shader_time_begin();
 
       calculate_urb_setup();
@@ -4337,37 +5040,27 @@
        * Initialize it with the dispatched pixels.
        */
       if (wm_prog_data->uses_kill) {
-         fs_inst *discard_init = emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+         fs_inst *discard_init = bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
          discard_init->flag_subreg = 1;
       }
 
       /* Generate FS IR for main().  (the visitor only descends into
        * functions called "main").
        */
-      if (brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) {
-         emit_nir_code();
-      } else if (shader) {
-         foreach_in_list(ir_instruction, ir, shader->base.ir) {
-            base_ir = ir;
-            this->result = reg_undef;
-            ir->accept(this);
-         }
-      } else {
-         emit_fragment_program_code();
-      }
-      base_ir = NULL;
+      emit_nir_code();
+
       if (failed)
 	 return false;
 
       if (wm_prog_data->uses_kill)
-         emit(FS_OPCODE_PLACEHOLDER_HALT);
+         bld.emit(FS_OPCODE_PLACEHOLDER_HALT);
 
       if (wm_key->alpha_test_func)
          emit_alpha_test();
 
       emit_fb_writes();
 
-      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      if (shader_time_index >= 0)
          emit_shader_time_end();
 
       calculate_cfg();
@@ -4411,7 +5104,7 @@
 
    setup_cs_payload();
 
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+   if (shader_time_index >= 0)
       emit_shader_time_begin();
 
    emit_nir_code();
@@ -4421,7 +5114,7 @@
 
    emit_cs_terminate();
 
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+   if (shader_time_index >= 0)
       emit_shader_time_end();
 
    calculate_cfg();
@@ -4471,11 +5164,18 @@
    if (unlikely(INTEL_DEBUG & DEBUG_WM))
       brw_dump_ir("fragment", prog, &shader->base, &fp->Base);
 
+   int st_index8 = -1, st_index16 = -1;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+      st_index8 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS8);
+      st_index16 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS16);
+   }
+
    /* Now the main event: Visit the shader IR and generate our FS IR for it.
     */
-   fs_visitor v(brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
-                prog, &fp->Base, 8);
-   if (!v.run_fs()) {
+   fs_visitor v(brw->intelScreen->compiler, brw,
+                mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
+                prog, &fp->Base, 8, st_index8);
+   if (!v.run_fs(false /* do_rep_send */)) {
       if (prog) {
          prog->LinkStatus = false;
          ralloc_strcat(&prog->InfoLog, v.fail_msg);
@@ -4488,21 +5188,18 @@
    }
 
    cfg_t *simd16_cfg = NULL;
-   fs_visitor v2(brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
-                 prog, &fp->Base, 16);
+   fs_visitor v2(brw->intelScreen->compiler, brw,
+                 mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
+                 prog, &fp->Base, 16, st_index16);
    if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
       if (!v.simd16_unsupported) {
          /* Try a SIMD16 compile */
          v2.import_uniforms(&v);
-         if (!v2.run_fs()) {
-            perf_debug("SIMD16 shader failed to compile, falling back to "
-                       "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg);
+         if (!v2.run_fs(brw->use_rep_send)) {
+            perf_debug("SIMD16 shader failed to compile: %s", v2.fail_msg);
          } else {
             simd16_cfg = v2.cfg;
          }
-      } else {
-         perf_debug("SIMD16 shader unsupported, falling back to "
-                    "SIMD8 at a 10-20%% performance cost: %s", v.no16_msg);
       }
    }
 
@@ -4516,7 +5213,8 @@
       prog_data->no_8 = false;
    }
 
-   fs_generator g(brw, mem_ctx, (void *) key, &prog_data->base,
+   fs_generator g(brw->intelScreen->compiler, brw,
+                  mem_ctx, (void *) key, &prog_data->base,
                   &fp->Base, v.promoted_constants, v.runtime_check_aads_emit, "FS");
 
    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_cse.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_cse.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_cse.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -32,6 +32,8 @@
  * 13.1 (p378).
  */
 
+using namespace brw;
+
 namespace {
 struct aeb_entry : public exec_node {
    /** The instruction that generates the expression value. */
@@ -59,6 +61,7 @@
    case BRW_OPCODE_CMPN:
    case BRW_OPCODE_ADD:
    case BRW_OPCODE_MUL:
+   case SHADER_OPCODE_MULH:
    case BRW_OPCODE_FRC:
    case BRW_OPCODE_RNDU:
    case BRW_OPCODE_RNDD:
@@ -152,28 +155,32 @@
 instructions_match(fs_inst *a, fs_inst *b, bool *negate)
 {
    return a->opcode == b->opcode &&
+          a->force_writemask_all == b->force_writemask_all &&
+          a->exec_size == b->exec_size &&
+          a->force_sechalf == b->force_sechalf &&
           a->saturate == b->saturate &&
           a->predicate == b->predicate &&
           a->predicate_inverse == b->predicate_inverse &&
           a->conditional_mod == b->conditional_mod &&
+          a->flag_subreg == b->flag_subreg &&
           a->dst.type == b->dst.type &&
+          a->offset == b->offset &&
+          a->mlen == b->mlen &&
+          a->regs_written == b->regs_written &&
+          a->base_mrf == b->base_mrf &&
+          a->eot == b->eot &&
+          a->header_size == b->header_size &&
+          a->shadow_compare == b->shadow_compare &&
+          a->pi_noperspective == b->pi_noperspective &&
           a->sources == b->sources &&
-          (a->is_tex() ? (a->offset == b->offset &&
-                          a->mlen == b->mlen &&
-                          a->regs_written == b->regs_written &&
-                          a->base_mrf == b->base_mrf &&
-                          a->eot == b->eot &&
-                          a->header_size == b->header_size &&
-                          a->shadow_compare == b->shadow_compare)
-                       : true) &&
           operands_match(a, b, negate);
 }
 
-static fs_inst *
-create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate)
+static void
+create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
 {
    int written = inst->regs_written;
-   int dst_width = inst->dst.width / 8;
+   int dst_width = inst->exec_size / 8;
    fs_inst *copy;
 
    if (written > dst_width) {
@@ -189,25 +196,21 @@
       }
 
       assert(src.file == GRF);
-      payload = ralloc_array(v->mem_ctx, fs_reg, sources);
+      payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources);
       for (int i = 0; i < header_size; i++) {
          payload[i] = src;
-         payload[i].width = 8;
          src.reg_offset++;
       }
       for (int i = header_size; i < sources; i++) {
          payload[i] = src;
-         src = offset(src, 1);
+         src = offset(src, bld, 1);
       }
-      copy = v->LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
+      copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
    } else {
-      copy = v->MOV(inst->dst, src);
-      copy->force_writemask_all = inst->force_writemask_all;
+      copy = bld.MOV(inst->dst, src);
       copy->src[0].negate = negate;
    }
    assert(copy->regs_written == written);
-
-   return copy;
 }
 
 bool
@@ -254,16 +257,14 @@
              */
             bool no_existing_temp = entry->tmp.file == BAD_FILE;
             if (no_existing_temp && !entry->generator->dst.is_null()) {
+               const fs_builder ibld = fs_builder(this, block, entry->generator)
+                                       .at(block, entry->generator->next);
                int written = entry->generator->regs_written;
-               assert((written * 8) % entry->generator->dst.width == 0);
 
                entry->tmp = fs_reg(GRF, alloc.allocate(written),
-                                   entry->generator->dst.type,
-                                   entry->generator->dst.width);
+                                   entry->generator->dst.type);
 
-               fs_inst *copy = create_copy_instr(this, entry->generator,
-                                                 entry->tmp, false);
-               entry->generator->insert_after(block, copy);
+               create_copy_instr(ibld, entry->generator, entry->tmp, false);
 
                entry->generator->dst = entry->tmp;
             }
@@ -271,12 +272,10 @@
             /* dest <- temp */
             if (!inst->dst.is_null()) {
                assert(inst->regs_written == entry->generator->regs_written);
-               assert(inst->dst.width == entry->generator->dst.width);
                assert(inst->dst.type == entry->tmp.type);
+               const fs_builder ibld(this, block, inst);
 
-               fs_inst *copy = create_copy_instr(this, inst,
-                                                 entry->tmp, negate);
-               inst->insert_before(block, copy);
+               create_copy_instr(ibld, inst, entry->tmp, negate);
             }
 
             /* Set our iterator so that next time through the loop inst->next
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_fp.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_fp.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_fp.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,742 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/** @file brw_fs_fp.cpp
- *
- * Implementation of the compiler for GL_ARB_fragment_program shaders on top
- * of the GLSL compiler backend.
- */
-
-#include "brw_context.h"
-#include "brw_fs.h"
-
-void
-fs_visitor::emit_fp_alu1(enum opcode opcode,
-                         const struct prog_instruction *fpi,
-                         fs_reg dst, fs_reg src)
-{
-   for (int i = 0; i < 4; i++) {
-      if (fpi->DstReg.WriteMask & (1 << i))
-         emit(opcode, offset(dst, i), offset(src, i));
-   }
-}
-
-void
-fs_visitor::emit_fp_alu2(enum opcode opcode,
-                         const struct prog_instruction *fpi,
-                         fs_reg dst, fs_reg src0, fs_reg src1)
-{
-   for (int i = 0; i < 4; i++) {
-      if (fpi->DstReg.WriteMask & (1 << i))
-         emit(opcode, offset(dst, i),
-              offset(src0, i), offset(src1, i));
-   }
-}
-
-void
-fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
-                           fs_reg dst, fs_reg src0, fs_reg src1)
-{
-   enum brw_conditional_mod conditionalmod;
-   if (fpi->Opcode == OPCODE_MIN)
-      conditionalmod = BRW_CONDITIONAL_L;
-   else
-      conditionalmod = BRW_CONDITIONAL_GE;
-
-   for (int i = 0; i < 4; i++) {
-      if (fpi->DstReg.WriteMask & (1 << i)) {
-         emit_minmax(conditionalmod, offset(dst, i),
-                     offset(src0, i), offset(src1, i));
-      }
-   }
-}
-
-void
-fs_visitor::emit_fp_sop(enum brw_conditional_mod conditional_mod,
-                        const struct prog_instruction *fpi,
-                        fs_reg dst, fs_reg src0, fs_reg src1,
-                        fs_reg one)
-{
-   for (int i = 0; i < 4; i++) {
-      if (fpi->DstReg.WriteMask & (1 << i)) {
-         fs_inst *inst;
-
-         emit(CMP(reg_null_d, offset(src0, i), offset(src1, i),
-                  conditional_mod));
-
-         inst = emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-      }
-   }
-}
-
-void
-fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
-                                 fs_reg dst, fs_reg src)
-{
-   for (int i = 0; i < 4; i++) {
-      if (fpi->DstReg.WriteMask & (1 << i))
-         emit(MOV(offset(dst, i), src));
-   }
-}
-
-void
-fs_visitor::emit_fp_scalar_math(enum opcode opcode,
-                                const struct prog_instruction *fpi,
-                                fs_reg dst, fs_reg src)
-{
-   fs_reg temp = vgrf(glsl_type::float_type);
-   emit_math(opcode, temp, src);
-   emit_fp_scalar_write(fpi, dst, temp);
-}
-
-void
-fs_visitor::emit_fragment_program_code()
-{
-   setup_fp_regs();
-
-   /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
-    * be:
-    *
-    * sel.f0 dst 1.0 0.0
-    *
-    * instead of
-    *
-    * mov    dst 0.0
-    * mov.f0 dst 1.0
-    */
-   fs_reg one = vgrf(glsl_type::float_type);
-   emit(MOV(one, fs_reg(1.0f)));
-
-   for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
-      const struct prog_instruction *fpi = &prog->Instructions[insn];
-      base_ir = fpi;
-
-      fs_reg dst;
-      fs_reg src[3];
-
-      /* We always emit into a temporary destination register to avoid
-       * aliasing issues.
-       */
-      dst = vgrf(glsl_type::vec4_type);
-
-      for (int i = 0; i < 3; i++)
-         src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
-
-      switch (fpi->Opcode) {
-      case OPCODE_ABS:
-         src[0].abs = true;
-         src[0].negate = false;
-         emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
-         break;
-
-      case OPCODE_ADD:
-         emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
-         break;
-
-      case OPCODE_CMP:
-         for (int i = 0; i < 4; i++) {
-            if (fpi->DstReg.WriteMask & (1 << i)) {
-               fs_inst *inst;
-
-               emit(CMP(reg_null_f, offset(src[0], i), fs_reg(0.0f),
-                        BRW_CONDITIONAL_L));
-
-               inst = emit(BRW_OPCODE_SEL, offset(dst, i),
-                           offset(src[1], i), offset(src[2], i));
-               inst->predicate = BRW_PREDICATE_NORMAL;
-            }
-         }
-         break;
-
-      case OPCODE_COS:
-         emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
-         break;
-
-      case OPCODE_DP2:
-      case OPCODE_DP3:
-      case OPCODE_DP4:
-      case OPCODE_DPH: {
-         fs_reg mul = vgrf(glsl_type::float_type);
-         fs_reg acc = vgrf(glsl_type::float_type);
-         int count;
-
-         switch (fpi->Opcode) {
-         case OPCODE_DP2: count = 2; break;
-         case OPCODE_DP3: count = 3; break;
-         case OPCODE_DP4: count = 4; break;
-         case OPCODE_DPH: count = 3; break;
-         default: unreachable("not reached");
-         }
-
-         emit(MUL(acc, offset(src[0], 0), offset(src[1], 0)));
-         for (int i = 1; i < count; i++) {
-            emit(MUL(mul, offset(src[0], i), offset(src[1], i)));
-            emit(ADD(acc, acc, mul));
-         }
-
-         if (fpi->Opcode == OPCODE_DPH)
-            emit(ADD(acc, acc, offset(src[1], 3)));
-
-         emit_fp_scalar_write(fpi, dst, acc);
-         break;
-      }
-
-      case OPCODE_DST:
-         if (fpi->DstReg.WriteMask & WRITEMASK_X)
-            emit(MOV(dst, fs_reg(1.0f)));
-         if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
-            emit(MUL(offset(dst, 1),
-                     offset(src[0], 1), offset(src[1], 1)));
-         }
-         if (fpi->DstReg.WriteMask & WRITEMASK_Z)
-            emit(MOV(offset(dst, 2), offset(src[0], 2)));
-         if (fpi->DstReg.WriteMask & WRITEMASK_W)
-            emit(MOV(offset(dst, 3), offset(src[1], 3)));
-         break;
-
-      case OPCODE_EX2:
-         emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
-         break;
-
-      case OPCODE_FLR:
-         emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
-         break;
-
-      case OPCODE_FRC:
-         emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
-         break;
-
-      case OPCODE_KIL: {
-         for (int i = 0; i < 4; i++) {
-            /* In most cases the argument to a KIL will be something like
-             * TEMP[0].wwww, so there's no point in checking whether .w is < 0
-             * 4 times in a row.
-             */
-            if (i > 0 &&
-                GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
-                GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
-                ((fpi->SrcReg[0].Negate >> i) & 1) ==
-                ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
-               continue;
-            }
-
-
-            /* Emit an instruction that's predicated on the current
-             * undiscarded pixels, and updates just those pixels to be
-             * turned off.
-             */
-            fs_inst *cmp = emit(CMP(reg_null_f, offset(src[0], i),
-                                    fs_reg(0.0f), BRW_CONDITIONAL_GE));
-            cmp->predicate = BRW_PREDICATE_NORMAL;
-            cmp->flag_subreg = 1;
-
-            if (devinfo->gen >= 6)
-               emit_discard_jump();
-         }
-         break;
-      }
-
-      case OPCODE_LG2:
-         emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
-         break;
-
-      case OPCODE_LIT:
-         /* From the ARB_fragment_program spec:
-          *
-          *      tmp = VectorLoad(op0);
-          *      if (tmp.x < 0) tmp.x = 0;
-          *      if (tmp.y < 0) tmp.y = 0;
-          *      if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
-          *      else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
-          *      result.x = 1.0;
-          *      result.y = tmp.x;
-          *      result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
-          *      result.w = 1.0;
-          *
-          * Note that we don't do the clamping to +/- 128.  We didn't in
-          * brw_wm_emit.c either.
-          */
-         if (fpi->DstReg.WriteMask & WRITEMASK_X)
-            emit(MOV(offset(dst, 0), fs_reg(1.0f)));
-
-         if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
-            fs_inst *inst;
-            emit(CMP(reg_null_f, offset(src[0], 0), fs_reg(0.0f),
-                     BRW_CONDITIONAL_LE));
-
-            if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
-               emit(MOV(offset(dst, 1), offset(src[0], 0)));
-               inst = emit(MOV(offset(dst, 1), fs_reg(0.0f)));
-               inst->predicate = BRW_PREDICATE_NORMAL;
-            }
-
-            if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
-               emit_math(SHADER_OPCODE_POW, offset(dst, 2),
-                         offset(src[0], 1), offset(src[0], 3));
-
-               inst = emit(MOV(offset(dst, 2), fs_reg(0.0f)));
-               inst->predicate = BRW_PREDICATE_NORMAL;
-            }
-         }
-
-         if (fpi->DstReg.WriteMask & WRITEMASK_W)
-            emit(MOV(offset(dst, 3), fs_reg(1.0f)));
-
-         break;
-
-      case OPCODE_LRP:
-         for (int i = 0; i < 4; i++) {
-            if (fpi->DstReg.WriteMask & (1 << i)) {
-               fs_reg a = offset(src[0], i);
-               fs_reg y = offset(src[1], i);
-               fs_reg x = offset(src[2], i);
-               emit_lrp(offset(dst, i), x, y, a);
-            }
-         }
-         break;
-
-      case OPCODE_MAD:
-         for (int i = 0; i < 4; i++) {
-            if (fpi->DstReg.WriteMask & (1 << i)) {
-               if (devinfo->gen >= 6) {
-                  emit(MAD(offset(dst, i), offset(src[2], i),
-                           offset(src[1], i), offset(src[0], i)));
-               } else {
-                  fs_reg temp = vgrf(glsl_type::float_type);
-                  emit(MUL(temp, offset(src[0], i), offset(src[1], i)));
-                  emit(ADD(offset(dst, i), temp, offset(src[2], i)));
-               }
-            }
-         }
-         break;
-
-      case OPCODE_MAX:
-         emit_fp_minmax(fpi, dst, src[0], src[1]);
-         break;
-
-      case OPCODE_MOV:
-         emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
-         break;
-
-      case OPCODE_MIN:
-         emit_fp_minmax(fpi, dst, src[0], src[1]);
-         break;
-
-      case OPCODE_MUL:
-         emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
-         break;
-
-      case OPCODE_POW: {
-         fs_reg temp = vgrf(glsl_type::float_type);
-         emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
-         emit_fp_scalar_write(fpi, dst, temp);
-         break;
-      }
-
-      case OPCODE_RCP:
-         emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
-         break;
-
-      case OPCODE_RSQ:
-         emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
-         break;
-
-      case OPCODE_SCS:
-         if (fpi->DstReg.WriteMask & WRITEMASK_X) {
-            emit_math(SHADER_OPCODE_COS, offset(dst, 0),
-                      offset(src[0], 0));
-         }
-
-         if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
-            emit_math(SHADER_OPCODE_SIN, offset(dst, 1),
-                      offset(src[0], 1));
-         }
-         break;
-
-      case OPCODE_SGE:
-         emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
-         break;
-
-      case OPCODE_SIN:
-         emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
-         break;
-
-      case OPCODE_SLT:
-         emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
-         break;
-
-      case OPCODE_SUB: {
-         fs_reg neg_src1 = src[1];
-         neg_src1.negate = !src[1].negate;
-
-         emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
-         break;
-      }
-
-      case OPCODE_TEX:
-      case OPCODE_TXB:
-      case OPCODE_TXP: {
-         ir_texture_opcode op;
-         fs_reg lod;
-         fs_reg dpdy;
-         fs_reg coordinate = src[0];
-         fs_reg shadow_c;
-         fs_reg sample_index;
-         fs_reg texel_offset; /* No offsets; leave as BAD_FILE. */
-
-         switch (fpi->Opcode) {
-         case OPCODE_TEX:
-            op = ir_tex;
-            break;
-         case OPCODE_TXP: {
-            op = ir_tex;
-
-            coordinate = vgrf(glsl_type::vec3_type);
-            fs_reg invproj = vgrf(glsl_type::float_type);
-            emit_math(SHADER_OPCODE_RCP, invproj, offset(src[0], 3));
-            for (int i = 0; i < 3; i++) {
-               emit(MUL(offset(coordinate, i),
-                        offset(src[0], i), invproj));
-            }
-            break;
-         }
-         case OPCODE_TXB:
-            op = ir_txb;
-            lod = offset(src[0], 3);
-            break;
-         default:
-            unreachable("not reached");
-         }
-
-         int coord_components;
-         switch (fpi->TexSrcTarget) {
-         case TEXTURE_1D_INDEX:
-            coord_components = 1;
-            break;
-
-         case TEXTURE_2D_INDEX:
-         case TEXTURE_1D_ARRAY_INDEX:
-         case TEXTURE_RECT_INDEX:
-         case TEXTURE_EXTERNAL_INDEX:
-            coord_components = 2;
-            break;
-
-         case TEXTURE_3D_INDEX:
-         case TEXTURE_2D_ARRAY_INDEX:
-            coord_components = 3;
-            break;
-
-         case TEXTURE_CUBE_INDEX: {
-            coord_components = 3;
-
-            fs_reg temp = vgrf(glsl_type::float_type);
-            fs_reg cubecoord = vgrf(glsl_type::vec3_type);
-            fs_reg abscoord = coordinate;
-            abscoord.negate = false;
-            abscoord.abs = true;
-            emit_minmax(BRW_CONDITIONAL_GE, temp,
-                        offset(abscoord, 0), offset(abscoord, 1));
-            emit_minmax(BRW_CONDITIONAL_GE, temp,
-                        temp, offset(abscoord, 2));
-            emit_math(SHADER_OPCODE_RCP, temp, temp);
-            for (int i = 0; i < 3; i++) {
-               emit(MUL(offset(cubecoord, i),
-                        offset(coordinate, i), temp));
-            }
-
-            coordinate = cubecoord;
-            break;
-         }
-
-         default:
-            unreachable("not reached");
-         }
-
-         if (fpi->TexShadow)
-            shadow_c = offset(coordinate, 2);
-
-         emit_texture(op, glsl_type::vec4_type, coordinate, coord_components,
-                      shadow_c, lod, dpdy, 0, sample_index,
-                      reg_undef, /* offset */
-                      reg_undef, /* mcs */
-                      0, /* gather component */
-                      false, /* is cube array */
-                      fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
-                      fpi->TexSrcUnit, fs_reg(fpi->TexSrcUnit),
-                      fpi->TexSrcUnit);
-         dst = this->result;
-
-         break;
-      }
-
-      case OPCODE_SWZ:
-         /* Note that SWZ's extended swizzles are handled in the general
-          * get_src_reg() code.
-          */
-         emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
-         break;
-
-      case OPCODE_XPD:
-         for (int i = 0; i < 3; i++) {
-            if (fpi->DstReg.WriteMask & (1 << i)) {
-               int i1 = (i + 1) % 3;
-               int i2 = (i + 2) % 3;
-
-               fs_reg temp = vgrf(glsl_type::float_type);
-               fs_reg neg_src1_1 = offset(src[1], i1);
-               neg_src1_1.negate = !neg_src1_1.negate;
-               emit(MUL(temp, offset(src[0], i2), neg_src1_1));
-               emit(MUL(offset(dst, i),
-                        offset(src[0], i1), offset(src[1], i2)));
-               emit(ADD(offset(dst, i), offset(dst, i), temp));
-            }
-         }
-         break;
-
-      case OPCODE_END:
-         break;
-
-      default:
-         _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
-                       _mesa_opcode_string(fpi->Opcode));
-      }
-
-      /* To handle saturates, we emit a MOV with a saturate bit, which
-       * optimization should fold into the preceding instructions when safe.
-       */
-      if (_mesa_num_inst_dst_regs(fpi->Opcode) != 0) {
-         fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
-
-         for (int i = 0; i < 4; i++) {
-            if (fpi->DstReg.WriteMask & (1 << i)) {
-               fs_inst *inst = emit(MOV(offset(real_dst, i),
-                                        offset(dst, i)));
-               inst->saturate = fpi->SaturateMode;
-            }
-         }
-      }
-   }
-
-   /* Epilogue:
-    *
-    * Fragment depth has this strange convention of being the .z component of
-    * a vec4.  emit_fb_write() wants to see a float value, instead.
-    */
-   this->current_annotation = "result.depth write";
-   if (frag_depth.file != BAD_FILE) {
-      fs_reg temp = vgrf(glsl_type::float_type);
-      emit(MOV(temp, offset(frag_depth, 2)));
-      frag_depth = temp;
-   }
-}
-
-void
-fs_visitor::setup_fp_regs()
-{
-   /* PROGRAM_TEMPORARY */
-   int num_temp = prog->NumTemporaries;
-   fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
-   for (int i = 0; i < num_temp; i++)
-      fp_temp_regs[i] = vgrf(glsl_type::vec4_type);
-
-   /* PROGRAM_STATE_VAR etc. */
-   if (dispatch_width == 8) {
-      for (unsigned p = 0;
-           p < prog->Parameters->NumParameters; p++) {
-         for (unsigned int i = 0; i < 4; i++) {
-            stage_prog_data->param[uniforms++] =
-               &prog->Parameters->ParameterValues[p][i];
-         }
-      }
-   }
-
-   fp_input_regs = rzalloc_array(mem_ctx, fs_reg, VARYING_SLOT_MAX);
-   for (int i = 0; i < VARYING_SLOT_MAX; i++) {
-      if (prog->InputsRead & BITFIELD64_BIT(i)) {
-         this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
-                                                    i);
-
-         switch (i) {
-         case VARYING_SLOT_POS:
-            {
-               assert(stage == MESA_SHADER_FRAGMENT);
-               gl_fragment_program *fp = (gl_fragment_program*) prog;
-               fp_input_regs[i] =
-                  *emit_fragcoord_interpolation(fp->PixelCenterInteger,
-                                                fp->OriginUpperLeft);
-            }
-            break;
-         case VARYING_SLOT_FACE:
-            fp_input_regs[i] = *emit_frontfacing_interpolation();
-            break;
-         default:
-            fp_input_regs[i] = vgrf(glsl_type::vec4_type);
-            emit_general_interpolation(fp_input_regs[i], "fp_input",
-                                       glsl_type::vec4_type,
-                                       INTERP_QUALIFIER_NONE,
-                                       i, false, false);
-
-            if (i == VARYING_SLOT_FOGC) {
-               emit(MOV(offset(fp_input_regs[i], 1), fs_reg(0.0f)));
-               emit(MOV(offset(fp_input_regs[i], 2), fs_reg(0.0f)));
-               emit(MOV(offset(fp_input_regs[i], 3), fs_reg(1.0f)));
-            }
-
-            break;
-         }
-
-         this->current_annotation = NULL;
-      }
-   }
-}
-
-fs_reg
-fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
-{
-   assert(stage == MESA_SHADER_FRAGMENT);
-   brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-
-   switch (dst->File) {
-   case PROGRAM_TEMPORARY:
-      return fp_temp_regs[dst->Index];
-
-   case PROGRAM_OUTPUT:
-      if (dst->Index == FRAG_RESULT_DEPTH) {
-         if (frag_depth.file == BAD_FILE)
-            frag_depth = vgrf(glsl_type::vec4_type);
-         return frag_depth;
-      } else if (dst->Index == FRAG_RESULT_COLOR) {
-         if (outputs[0].file == BAD_FILE) {
-            outputs[0] = vgrf(glsl_type::vec4_type);
-            output_components[0] = 4;
-
-            /* Tell emit_fb_writes() to smear fragment.color across all the
-             * color attachments.
-             */
-            for (int i = 1; i < key->nr_color_regions; i++) {
-               outputs[i] = outputs[0];
-               output_components[i] = output_components[0];
-            }
-         }
-         return outputs[0];
-      } else {
-         int output_index = dst->Index - FRAG_RESULT_DATA0;
-         if (outputs[output_index].file == BAD_FILE) {
-            outputs[output_index] = vgrf(glsl_type::vec4_type);
-         }
-         output_components[output_index] = 4;
-         return outputs[output_index];
-      }
-
-   case PROGRAM_UNDEFINED:
-      return fs_reg();
-
-   default:
-      _mesa_problem(ctx, "bad dst register file: %s\n",
-                    _mesa_register_file_name((gl_register_file)dst->File));
-      return vgrf(glsl_type::vec4_type);
-   }
-}
-
-fs_reg
-fs_visitor::get_fp_src_reg(const prog_src_register *src)
-{
-   struct gl_program_parameter_list *plist = prog->Parameters;
-
-   fs_reg result;
-
-   assert(!src->Abs);
-
-   switch (src->File) {
-   case PROGRAM_UNDEFINED:
-      return fs_reg();
-   case PROGRAM_TEMPORARY:
-      result = fp_temp_regs[src->Index];
-      break;
-
-   case PROGRAM_INPUT:
-      result = fp_input_regs[src->Index];
-      break;
-
-   case PROGRAM_STATE_VAR:
-   case PROGRAM_UNIFORM:
-   case PROGRAM_CONSTANT:
-      /* We actually want to look at the type in the Parameters list for this,
-       * because this lets us upload constant builtin uniforms, as actual
-       * constants.
-       */
-      switch (plist->Parameters[src->Index].Type) {
-      case PROGRAM_CONSTANT: {
-         result = vgrf(glsl_type::vec4_type);
-
-         for (int i = 0; i < 4; i++) {
-            emit(MOV(offset(result, i),
-                     fs_reg(plist->ParameterValues[src->Index][i].f)));
-         }
-         break;
-      }
-
-      case PROGRAM_STATE_VAR:
-      case PROGRAM_UNIFORM:
-         result = fs_reg(UNIFORM, src->Index * 4);
-         break;
-
-      default:
-         _mesa_problem(ctx, "bad uniform src register file: %s\n",
-                       _mesa_register_file_name((gl_register_file)src->File));
-         return vgrf(glsl_type::vec4_type);
-      }
-      break;
-
-   default:
-      _mesa_problem(ctx, "bad src register file: %s\n",
-                    _mesa_register_file_name((gl_register_file)src->File));
-      return vgrf(glsl_type::vec4_type);
-   }
-
-   if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
-      fs_reg unswizzled = result;
-      result = vgrf(glsl_type::vec4_type);
-      for (int i = 0; i < 4; i++) {
-         bool negate = src->Negate & (1 << i);
-         /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
-          * but it costs us nothing to support it.
-          */
-         int src_swiz = GET_SWZ(src->Swizzle, i);
-         if (src_swiz == SWIZZLE_ZERO) {
-            emit(MOV(offset(result, i), fs_reg(0.0f)));
-         } else if (src_swiz == SWIZZLE_ONE) {
-            emit(MOV(offset(result, i),
-                     negate ? fs_reg(-1.0f) : fs_reg(1.0f)));
-         } else {
-            fs_reg src = offset(unswizzled, src_swiz);
-            if (negate)
-               src.negate = !src.negate;
-            emit(MOV(offset(result, i), src));
-         }
-      }
-   }
-
-   return result;
-}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_generator.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_generator.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_generator.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -48,7 +48,7 @@
 }
 
 static struct brw_reg
-brw_reg_from_fs_reg(fs_reg *reg)
+brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg)
 {
    struct brw_reg brw_reg;
 
@@ -57,10 +57,10 @@
    case MRF:
       if (reg->stride == 0) {
          brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, 0);
-      } else if (reg->width < 8) {
+      } else if (inst->exec_size < 8) {
          brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
-         brw_reg = stride(brw_reg, reg->width * reg->stride,
-                          reg->width, reg->stride);
+         brw_reg = stride(brw_reg, inst->exec_size * reg->stride,
+                          inst->exec_size, reg->stride);
       } else {
          /* From the Haswell PRM:
           *
@@ -79,6 +79,10 @@
       brw_reg = byte_offset(brw_reg, reg->subreg_offset);
       break;
    case IMM:
+      assert(reg->stride == ((reg->type == BRW_REGISTER_TYPE_V ||
+                              reg->type == BRW_REGISTER_TYPE_UV ||
+                              reg->type == BRW_REGISTER_TYPE_VF) ? 1 : 0));
+
       switch (reg->type) {
       case BRW_REGISTER_TYPE_F:
 	 brw_reg = brw_imm_f(reg->fixed_hw_reg.dw1.f);
@@ -121,7 +125,7 @@
    return brw_reg;
 }
 
-fs_generator::fs_generator(struct brw_context *brw,
+fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data,
                            void *mem_ctx,
                            const void *key,
                            struct brw_stage_prog_data *prog_data,
@@ -130,7 +134,8 @@
                            bool runtime_check_aads_emit,
                            const char *stage_abbrev)
 
-   : brw(brw), devinfo(brw->intelScreen->devinfo), key(key),
+   : compiler(compiler), log_data(log_data),
+     devinfo(compiler->devinfo), key(key),
      prog_data(prog_data),
      prog(prog), promoted_constants(promoted_constants),
      runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false),
@@ -216,11 +221,11 @@
    if (inst->opcode == FS_OPCODE_REP_FB_WRITE)
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
    else if (prog_data->dual_src_blend) {
-      if (dispatch_width == 8 || !inst->eot)
+      if (!inst->force_sechalf)
          msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
       else
          msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
-   } else if (dispatch_width == 16)
+   } else if (inst->exec_size == 16)
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
    else
       msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
@@ -401,12 +406,19 @@
 }
 
 void
+fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
+{
+   brw_barrier(p, src);
+   brw_WAIT(p);
+}
+
+void
 fs_generator::generate_blorp_fb_write(fs_inst *inst)
 {
    brw_fb_WRITE(p,
                 16 /* dispatch_width */,
                 brw_message_reg(inst->base_mrf),
-                brw_reg_from_fs_reg(&inst->src[0]),
+                brw_reg_from_fs_reg(inst, &inst->src[0]),
                 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
                 inst->target,
                 inst->mlen,
@@ -643,7 +655,7 @@
 	 /* Note that G45 and older determines shadow compare and dispatch width
 	  * from message length for most messages.
 	  */
-         if (dispatch_width == 8) {
+         if (inst->exec_size == 8) {
             msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
             if (inst->shadow_compare) {
                assert(inst->mlen == 6);
@@ -662,7 +674,7 @@
 	 break;
       case FS_OPCODE_TXB:
 	 if (inst->shadow_compare) {
-            assert(dispatch_width == 8);
+            assert(inst->exec_size == 8);
 	    assert(inst->mlen == 6);
 	    msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
 	 } else {
@@ -673,7 +685,7 @@
 	 break;
       case SHADER_OPCODE_TXL:
 	 if (inst->shadow_compare) {
-            assert(dispatch_width == 8);
+            assert(inst->exec_size == 8);
 	    assert(inst->mlen == 6);
 	    msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
 	 } else {
@@ -684,7 +696,7 @@
 	 break;
       case SHADER_OPCODE_TXD:
 	 /* There is no sample_d_c message; comparisons are done manually */
-         assert(dispatch_width == 8);
+         assert(inst->exec_size == 8);
 	 assert(inst->mlen == 7 || inst->mlen == 10);
 	 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
 	 break;
@@ -1046,7 +1058,6 @@
                                                        struct brw_reg index,
                                                        struct brw_reg offset)
 {
-   assert(inst->mlen == 0);
    assert(index.type == BRW_REGISTER_TYPE_UD);
 
    assert(offset.file == BRW_GENERAL_REGISTER_FILE);
@@ -1061,12 +1072,10 @@
 
    struct brw_reg src = offset;
    bool header_present = false;
-   int mlen = 1;
 
    if (devinfo->gen >= 9) {
       /* Skylake requires a message header in order to use SIMD4x2 mode. */
-      src = retype(brw_vec4_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD);
-      mlen = 2;
+      src = retype(brw_vec4_grf(offset.nr, 0), BRW_REGISTER_TYPE_UD);
       header_present = true;
 
       brw_push_insn_state(p);
@@ -1097,7 +1106,7 @@
                               0, /* LD message ignores sampler unit */
                               GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                               1, /* rlen */
-                              mlen,
+                              inst->mlen,
                               header_present,
                               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                               0);
@@ -1127,7 +1136,7 @@
                               0, /* LD message ignores sampler unit */
                               GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                               1, /* rlen */
-                              mlen,
+                              inst->mlen,
                               header_present,
                               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                               0);
@@ -1355,37 +1364,6 @@
    brw_pop_insn_state(p);
 }
 
-/* Sets vstride=16, width=8, hstride=2 or vstride=0, width=1, hstride=0
- * (when mask is passed as a uniform) of register mask before moving it
- * to register dst.
- */
-void
-fs_generator::generate_set_omask(fs_inst *inst,
-                                 struct brw_reg dst,
-                                 struct brw_reg mask)
-{
-   bool stride_8_8_1 =
-    (mask.vstride == BRW_VERTICAL_STRIDE_8 &&
-     mask.width == BRW_WIDTH_8 &&
-     mask.hstride == BRW_HORIZONTAL_STRIDE_1);
-
-   bool stride_0_1_0 = has_scalar_region(mask);
-
-   assert(stride_8_8_1 || stride_0_1_0);
-   assert(dst.type == BRW_REGISTER_TYPE_UW);
-
-   brw_push_insn_state(p);
-   brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-
-   if (stride_8_8_1) {
-      brw_MOV(p, dst, retype(stride(mask, 16, 8, 2), dst.type));
-   } else if (stride_0_1_0) {
-      brw_MOV(p, dst, retype(mask, dst.type));
-   }
-   brw_pop_insn_state(p);
-}
-
 /* Sets vstride=1, width=4, hstride=0 of register src1 during
  * the ADD instruction.
  */
@@ -1555,7 +1533,7 @@
          annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
 
       for (unsigned int i = 0; i < inst->sources; i++) {
-	 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
+	 src[i] = brw_reg_from_fs_reg(inst, &inst->src[i]);
 
 	 /* The accumulator result appears to get used for the
 	  * conditional modifier generation.  When negating a UD
@@ -1567,7 +1545,7 @@
 		inst->src[i].type != BRW_REGISTER_TYPE_UD ||
 		!inst->src[i].negate);
       }
-      dst = brw_reg_from_fs_reg(&inst->dst);
+      dst = brw_reg_from_fs_reg(inst, &inst->dst);
 
       brw_set_default_predicate_control(p, inst->predicate);
       brw_set_default_predicate_inverse(p, inst->predicate_inverse);
@@ -1596,7 +1574,7 @@
          /* If the instruction writes to more than one register, it needs to
           * be a "compressed" instruction on Gen <= 5.
           */
-         if (inst->exec_size * inst->dst.stride * type_sz(inst->dst.type) > 32)
+         if (inst->dst.component_size(inst->exec_size) > REG_SIZE)
             brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
          else
             brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
@@ -1864,7 +1842,7 @@
 	 break;
 
       case BRW_OPCODE_DO:
-	 brw_DO(p, BRW_EXECUTE_8);
+	 brw_DO(p, dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
 	 break;
 
       case BRW_OPCODE_BREAK:
@@ -2011,19 +1989,15 @@
          break;
 
       case SHADER_OPCODE_UNTYPED_ATOMIC:
-         assert(src[1].file == BRW_IMMEDIATE_VALUE &&
-                src[2].file == BRW_IMMEDIATE_VALUE);
+         assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_atomic(p, dst, src[0], src[1], src[2].dw1.ud,
                             inst->mlen, !inst->dst.is_null());
-         brw_mark_surface_used(prog_data, src[1].dw1.ud);
          break;
 
       case SHADER_OPCODE_UNTYPED_SURFACE_READ:
-         assert(src[1].file == BRW_IMMEDIATE_VALUE &&
-                src[2].file == BRW_IMMEDIATE_VALUE);
+         assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_surface_read(p, dst, src[0], src[1],
                                   inst->mlen, src[2].dw1.ud);
-         brw_mark_surface_used(prog_data, src[1].dw1.ud);
          break;
 
       case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
@@ -2065,10 +2039,6 @@
          brw_broadcast(p, dst, src[0], src[1]);
          break;
 
-      case FS_OPCODE_SET_OMASK:
-         generate_set_omask(inst, dst, src[0]);
-         break;
-
       case FS_OPCODE_SET_SAMPLE_ID:
          generate_set_sample_id(inst, dst, src[0], src[1]);
          break;
@@ -2117,6 +2087,10 @@
          generate_cs_terminate(inst, src[0]);
          break;
 
+      case SHADER_OPCODE_BARRIER:
+	 generate_barrier(inst, src[0]);
+	 break;
+
       default:
          unreachable("Unsupported opcode");
 
@@ -2162,15 +2136,13 @@
       ralloc_free(annotation.ann);
    }
 
-   static GLuint msg_id = 0;
-   _mesa_gl_debug(&brw->ctx, &msg_id,
-                  MESA_DEBUG_SOURCE_SHADER_COMPILER,
-                  MESA_DEBUG_TYPE_OTHER,
-                  MESA_DEBUG_SEVERITY_NOTIFICATION,
-                  "%s SIMD%d shader: %d inst, %d loops, %d:%d spills:fills, "
-                  "Promoted %u constants, compacted %d to %d bytes.\n",
-                  stage_abbrev, dispatch_width, before_size / 16, loop_count,
-                  spill_count, fill_count, promoted_constants, before_size, after_size);
+   compiler->shader_debug_log(log_data,
+                              "%s SIMD%d shader: %d inst, %d loops, "
+                              "%d:%d spills:fills, Promoted %u constants, "
+                              "compacted %d to %d bytes.\n",
+                              stage_abbrev, dispatch_width, before_size / 16,
+                              loop_count, spill_count, fill_count,
+                              promoted_constants, before_size, after_size);
 
    return start_offset;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs.h	2015-09-16 14:36:09.000000000 +0000
@@ -29,6 +29,7 @@
 
 #include "brw_shader.h"
 #include "brw_ir_fs.h"
+#include "brw_fs_builder.h"
 
 extern "C" {
 
@@ -61,127 +62,70 @@
    class fs_live_variables;
 }
 
+static inline fs_reg
+offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
+{
+   switch (reg.file) {
+   case BAD_FILE:
+      break;
+   case GRF:
+   case MRF:
+   case HW_REG:
+   case ATTR:
+      return byte_offset(reg,
+                         delta * reg.component_size(bld.dispatch_width()));
+   case UNIFORM:
+      reg.reg_offset += delta;
+      break;
+   case IMM:
+      assert(delta == 0);
+   }
+   return reg;
+}
+
 /**
  * The fragment shader front-end.
  *
  * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
  */
-class fs_visitor : public backend_visitor
+class fs_visitor : public backend_shader
 {
 public:
-   const fs_reg reg_null_f;
-   const fs_reg reg_null_d;
-   const fs_reg reg_null_ud;
-
-   fs_visitor(struct brw_context *brw,
+   fs_visitor(const struct brw_compiler *compiler, void *log_data,
               void *mem_ctx,
               gl_shader_stage stage,
               const void *key,
               struct brw_stage_prog_data *prog_data,
               struct gl_shader_program *shader_prog,
               struct gl_program *prog,
-              unsigned dispatch_width);
+              unsigned dispatch_width,
+              int shader_time_index);
 
    ~fs_visitor();
 
-   fs_reg *variable_storage(ir_variable *var);
    fs_reg vgrf(const glsl_type *const type);
-   fs_reg vgrf(int num_components);
    void import_uniforms(fs_visitor *v);
-   void setup_uniform_clipplane_values();
-   void compute_clip_distance();
-
-   void visit(ir_variable *ir);
-   void visit(ir_assignment *ir);
-   void visit(ir_dereference_variable *ir);
-   void visit(ir_dereference_record *ir);
-   void visit(ir_dereference_array *ir);
-   void visit(ir_expression *ir);
-   void visit(ir_texture *ir);
-   void visit(ir_if *ir);
-   void visit(ir_constant *ir);
-   void visit(ir_swizzle *ir);
-   void visit(ir_return *ir);
-   void visit(ir_loop *ir);
-   void visit(ir_loop_jump *ir);
-   void visit(ir_discard *ir);
-   void visit(ir_call *ir);
-   void visit(ir_function *ir);
-   void visit(ir_function_signature *ir);
-   void visit(ir_emit_vertex *);
-   void visit(ir_end_primitive *);
+   void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
+   void compute_clip_distance(gl_clip_plane *clip_planes);
 
    uint32_t gather_channel(int orig_chan, uint32_t sampler);
    void swizzle_result(ir_texture_opcode op, int dest_components,
                        fs_reg orig_val, uint32_t sampler);
 
-   fs_inst *emit(fs_inst *inst);
-   void emit(exec_list list);
-
-   fs_inst *emit(enum opcode opcode);
-   fs_inst *emit(enum opcode opcode, const fs_reg &dst);
-   fs_inst *emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0);
-   fs_inst *emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
-                 const fs_reg &src1);
-   fs_inst *emit(enum opcode opcode, const fs_reg &dst,
-                 const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
-   fs_inst *emit(enum opcode opcode, const fs_reg &dst,
-                 fs_reg src[], int sources);
-
-   fs_inst *MOV(const fs_reg &dst, const fs_reg &src);
-   fs_inst *NOT(const fs_reg &dst, const fs_reg &src);
-   fs_inst *RNDD(const fs_reg &dst, const fs_reg &src);
-   fs_inst *RNDE(const fs_reg &dst, const fs_reg &src);
-   fs_inst *RNDZ(const fs_reg &dst, const fs_reg &src);
-   fs_inst *FRC(const fs_reg &dst, const fs_reg &src);
-   fs_inst *ADD(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-   fs_inst *MUL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-   fs_inst *MACH(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-   fs_inst *MAC(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-   fs_inst *SHL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-   fs_inst *SHR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-   fs_inst *ASR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-   fs_inst *AND(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-   fs_inst *OR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-   fs_inst *XOR(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-   fs_inst *IF(enum brw_predicate predicate);
-   fs_inst *IF(const fs_reg &src0, const fs_reg &src1,
-               enum brw_conditional_mod condition);
-   fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1,
-                enum brw_conditional_mod condition);
-   fs_inst *LRP(const fs_reg &dst, const fs_reg &a, const fs_reg &y,
-                const fs_reg &x);
-   fs_inst *DEP_RESOLVE_MOV(int grf);
-   fs_inst *BFREV(const fs_reg &dst, const fs_reg &value);
-   fs_inst *BFE(const fs_reg &dst, const fs_reg &bits, const fs_reg &offset,
-                const fs_reg &value);
-   fs_inst *BFI1(const fs_reg &dst, const fs_reg &bits, const fs_reg &offset);
-   fs_inst *BFI2(const fs_reg &dst, const fs_reg &bfi1_dst,
-                 const fs_reg &insert, const fs_reg &base);
-   fs_inst *FBH(const fs_reg &dst, const fs_reg &value);
-   fs_inst *FBL(const fs_reg &dst, const fs_reg &value);
-   fs_inst *CBIT(const fs_reg &dst, const fs_reg &value);
-   fs_inst *MAD(const fs_reg &dst, const fs_reg &c, const fs_reg &b,
-                const fs_reg &a);
-   fs_inst *ADDC(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-   fs_inst *SUBB(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-   fs_inst *SEL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1);
-
    int type_size(const struct glsl_type *type);
    fs_inst *get_instruction_generating_reg(fs_inst *start,
 					   fs_inst *end,
 					   const fs_reg &reg);
 
-   fs_inst *LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources,
-                         int header_size);
-
-   exec_list VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst,
-                                        const fs_reg &surf_index,
-                                        const fs_reg &varying_offset,
-                                        uint32_t const_offset);
+   void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
+                                   const fs_reg &dst,
+                                   const fs_reg &surf_index,
+                                   const fs_reg &varying_offset,
+                                   uint32_t const_offset);
+   void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
 
-   bool run_fs();
-   bool run_vs();
+   bool run_fs(bool do_rep_send);
+   bool run_vs(gl_clip_plane *clip_planes);
    bool run_cs();
    void optimize();
    void allocate_registers();
@@ -197,12 +141,8 @@
    void assign_vs_urb_setup();
    bool assign_regs(bool allow_spilling);
    void assign_regs_trivial();
-   void get_used_mrfs(bool *mrf_used);
    void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
                                    int first_payload_node);
-   void setup_mrf_hack_interference(struct ra_graph *g,
-                                    int first_mrf_hack_node,
-                                    int *first_used_mrf);
    int choose_spill_reg(struct ra_graph *g);
    void spill_reg(int spill_reg);
    void split_virtual_grfs();
@@ -239,10 +179,12 @@
                                                      fs_inst *inst);
    void vfail(const char *msg, va_list args);
    void fail(const char *msg, ...);
-   void no16(const char *msg, ...);
+   void no16(const char *msg);
    void lower_uniform_pull_constant_loads();
    bool lower_load_payload();
+   bool lower_logical_sends();
    bool lower_integer_multiplication();
+   bool lower_simd_width();
    bool opt_combine_constants();
 
    void emit_dummy_fs();
@@ -266,27 +208,6 @@
    void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
    fs_reg rescale_texcoord(fs_reg coordinate, int coord_components,
                            bool is_rect, uint32_t sampler, int texunit);
-   fs_inst *emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
-                              fs_reg coordinate, int coord_components,
-                              fs_reg shadow_comp,
-                              fs_reg lod, fs_reg lod2, int grad_components,
-                              uint32_t sampler);
-   fs_inst *emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
-                                     fs_reg coordinate, int vector_elements,
-                                     fs_reg shadow_c, fs_reg lod,
-                                     uint32_t sampler);
-   fs_inst *emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
-                              fs_reg coordinate, int coord_components,
-                              fs_reg shadow_comp,
-                              fs_reg lod, fs_reg lod2, int grad_components,
-                              fs_reg sample_index, uint32_t sampler,
-                              bool has_offset);
-   fs_inst *emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
-                              fs_reg coordinate, int coord_components,
-                              fs_reg shadow_comp,
-                              fs_reg lod, fs_reg lod2, int grad_components,
-                              fs_reg sample_index, fs_reg mcs, fs_reg sampler,
-                              fs_reg offset_value);
    void emit_texture(ir_texture_opcode op,
                      const glsl_type *dest_type,
                      fs_reg coordinate, int components,
@@ -301,61 +222,22 @@
                      uint32_t sampler,
                      fs_reg sampler_reg,
                      int texunit);
-   fs_reg emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler);
+   fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
+                         const fs_reg &sampler);
    void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
-   void resolve_source_modifiers(fs_reg *src);
-   fs_reg fix_math_operand(fs_reg src);
-   fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
-   fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
-   fs_inst *emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y,
-                     const fs_reg &a);
-   void emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst,
-                    const fs_reg &src0, const fs_reg &src1);
+   fs_reg resolve_source_modifiers(const fs_reg &src);
    void emit_discard_jump();
-   /** Copy any live channel from \p src to the first channel of \p dst. */
-   void emit_uniformize(const fs_reg &dst, const fs_reg &src);
-   bool try_emit_b2f_of_comparison(ir_expression *ir);
-   bool try_emit_saturate(ir_expression *ir);
-   bool try_emit_line(ir_expression *ir);
-   bool try_emit_mad(ir_expression *ir);
    bool try_replace_with_sel();
-   bool try_opt_frontfacing_ternary(ir_if *ir);
    bool opt_peephole_sel();
    bool opt_peephole_predicated_break();
    bool opt_saturate_propagation();
    bool opt_cmod_propagation();
    bool opt_zero_samples();
-   void emit_bool_to_cond_code(ir_rvalue *condition);
-   void emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3]);
-   void emit_if_gen6(ir_if *ir);
    void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg,
                      uint32_t spill_offset, int count);
    void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg,
                    uint32_t spill_offset, int count);
 
-   void emit_fragment_program_code();
-   void setup_fp_regs();
-   fs_reg get_fp_src_reg(const prog_src_register *src);
-   fs_reg get_fp_dst_reg(const prog_dst_register *dst);
-   void emit_fp_alu1(enum opcode opcode,
-                     const struct prog_instruction *fpi,
-                     fs_reg dst, fs_reg src);
-   void emit_fp_alu2(enum opcode opcode,
-                     const struct prog_instruction *fpi,
-                     fs_reg dst, fs_reg src0, fs_reg src1);
-   void emit_fp_scalar_write(const struct prog_instruction *fpi,
-                             fs_reg dst, fs_reg src);
-   void emit_fp_scalar_math(enum opcode opcode,
-                            const struct prog_instruction *fpi,
-                            fs_reg dst, fs_reg src);
-
-   void emit_fp_minmax(const struct prog_instruction *fpi,
-                       fs_reg dst, fs_reg src0, fs_reg src1);
-
-   void emit_fp_sop(enum brw_conditional_mod conditional_mod,
-                    const struct prog_instruction *fpi,
-                    fs_reg dst, fs_reg src0, fs_reg src1, fs_reg one);
-
    void emit_nir_code();
    void nir_setup_inputs(nir_shader *shader);
    void nir_setup_outputs(nir_shader *shader);
@@ -369,55 +251,49 @@
    void nir_emit_loop(nir_loop *loop);
    void nir_emit_block(nir_block *block);
    void nir_emit_instr(nir_instr *instr);
-   void nir_emit_alu(nir_alu_instr *instr);
-   void nir_emit_intrinsic(nir_intrinsic_instr *instr);
-   void nir_emit_texture(nir_tex_instr *instr);
-   void nir_emit_jump(nir_jump_instr *instr);
+   void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
+   void nir_emit_load_const(const brw::fs_builder &bld,
+                            nir_load_const_instr *instr);
+   void nir_emit_undef(const brw::fs_builder &bld,
+                       nir_ssa_undef_instr *instr);
+   void nir_emit_intrinsic(const brw::fs_builder &bld,
+                           nir_intrinsic_instr *instr);
+   void nir_emit_texture(const brw::fs_builder &bld,
+                         nir_tex_instr *instr);
+   void nir_emit_jump(const brw::fs_builder &bld,
+                      nir_jump_instr *instr);
    fs_reg get_nir_src(nir_src src);
    fs_reg get_nir_dest(nir_dest dest);
-   void emit_percomp(fs_inst *inst, unsigned wr_mask);
+   fs_reg get_nir_image_deref(const nir_deref_var *deref);
+   void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
+                     unsigned wr_mask);
 
    bool optimize_frontfacing_ternary(nir_alu_instr *instr,
                                      const fs_reg &result);
 
-   void setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
-                            unsigned exec_size, bool use_2nd_half);
    void emit_alpha_test();
-   fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2,
-                                 fs_reg src0_alpha, unsigned components,
-                                 unsigned exec_size, bool use_2nd_half = false);
+   fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
+                                 fs_reg color1, fs_reg color2,
+                                 fs_reg src0_alpha, unsigned components);
    void emit_fb_writes();
    void emit_urb_writes();
    void emit_cs_terminate();
 
+   void emit_barrier();
+
    void emit_shader_time_begin();
    void emit_shader_time_end();
-   fs_inst *SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value);
+   void SHADER_TIME_ADD(const brw::fs_builder &bld,
+                        int shader_time_subindex,
+                        fs_reg value);
 
-   void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
-                            fs_reg dst, fs_reg offset, fs_reg src0,
-                            fs_reg src1);
-
-   void emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
-                                  fs_reg offset);
-
-   void emit_interpolate_expression(ir_expression *ir);
-
-   bool try_rewrite_rhs_to_dst(ir_assignment *ir,
-			       fs_reg dst,
-			       fs_reg src,
-			       fs_inst *pre_rhs_inst,
-			       fs_inst *last_rhs_inst);
-   void emit_assignment_writes(fs_reg &l, fs_reg &r,
-			       const glsl_type *type, bool predicated);
-   void resolve_ud_negate(fs_reg *reg);
-   void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg);
-
-   fs_reg get_timestamp(fs_inst **out_mov);
+   fs_reg get_timestamp(const brw::fs_builder &bld);
 
    struct brw_reg interp_reg(int location, int channel);
-   void setup_uniform_values(ir_variable *ir);
-   void setup_builtin_uniform_values(ir_variable *ir);
+
+   virtual void setup_vector_uniform_values(const gl_constant_value *values,
+                                            unsigned n);
+
    int implied_mrf_writes(fs_inst *inst);
 
    virtual void dump_instructions();
@@ -425,8 +301,6 @@
    void dump_instruction(backend_instruction *inst);
    void dump_instruction(backend_instruction *inst, FILE *file);
 
-   void visit_atomic_counter_intrinsic(ir_call *ir);
-
    const void *const key;
    const struct brw_sampler_prog_key_data *key_tex;
 
@@ -462,7 +336,6 @@
     */
    int *push_constant_loc;
 
-   struct hash_table *variable_ht;
    fs_reg frag_depth;
    fs_reg sample_mask;
    fs_reg outputs[VARYING_SLOT_MAX];
@@ -473,30 +346,22 @@
    /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
    unsigned max_grf;
 
-   fs_reg *fp_temp_regs;
-   fs_reg *fp_input_regs;
-
    fs_reg *nir_locals;
-   fs_reg *nir_globals;
+   fs_reg *nir_ssa_values;
    fs_reg nir_inputs;
    fs_reg nir_outputs;
    fs_reg *nir_system_values;
 
-   /** @{ debug annotation info */
-   const char *current_annotation;
-   const void *base_ir;
-   /** @} */
-
    bool failed;
    char *fail_msg;
    bool simd16_unsupported;
    char *no16_msg;
 
-   /* Result of last visit() method. */
+   /* Result of last visit() method. Still used by emit_texture() */
    fs_reg result;
 
    /** Register numbers for thread payload fields. */
-   struct {
+   struct thread_payload {
       uint8_t source_depth_reg;
       uint8_t source_w_reg;
       uint8_t aa_dest_stencil_reg;
@@ -525,7 +390,10 @@
 
    const unsigned dispatch_width; /**< 8 or 16 */
 
+   int shader_time_index;
+
    unsigned promoted_constants;
+   brw::fs_builder bld;
 };
 
 /**
@@ -536,7 +404,7 @@
 class fs_generator
 {
 public:
-   fs_generator(struct brw_context *brw,
+   fs_generator(const struct brw_compiler *compiler, void *log_data,
                 void *mem_ctx,
                 const void *key,
                 struct brw_stage_prog_data *prog_data,
@@ -558,6 +426,7 @@
    void generate_fb_write(fs_inst *inst, struct brw_reg payload);
    void generate_urb_write(fs_inst *inst, struct brw_reg payload);
    void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
+   void generate_barrier(fs_inst *inst, struct brw_reg src);
    void generate_blorp_fb_write(fs_inst *inst);
    void generate_linterp(fs_inst *inst, struct brw_reg dst,
 			 struct brw_reg *src);
@@ -601,10 +470,6 @@
                                           struct brw_reg msg_data,
                                           unsigned msg_type);
 
-   void generate_set_omask(fs_inst *inst,
-                           struct brw_reg dst,
-                           struct brw_reg sample_mask);
-
    void generate_set_sample_id(fs_inst *inst,
                                struct brw_reg dst,
                                struct brw_reg src0,
@@ -630,7 +495,9 @@
 
    bool patch_discard_jumps_to_fb_writes();
 
-   struct brw_context *brw;
+   const struct brw_compiler *compiler;
+   void *log_data; /* Passed to compiler->*_log functions */
+
    const struct brw_device_info *devinfo;
 
    struct brw_codegen *p;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -204,27 +204,9 @@
    while (cont) {
       cont = false;
 
-      foreach_block (block, cfg) {
+      foreach_block_reverse (block, cfg) {
          struct block_data *bd = &block_data[block->num];
 
-	 /* Update livein */
-	 for (int i = 0; i < bitset_words; i++) {
-            BITSET_WORD new_livein = (bd->use[i] |
-                                      (bd->liveout[i] &
-                                       ~bd->def[i]));
-	    if (new_livein & ~bd->livein[i]) {
-               bd->livein[i] |= new_livein;
-               cont = true;
-	    }
-	 }
-         BITSET_WORD new_livein = (bd->flag_use[0] |
-                                   (bd->flag_liveout[0] &
-                                    ~bd->flag_def[0]));
-         if (new_livein & ~bd->flag_livein[0]) {
-            bd->flag_livein[0] |= new_livein;
-            cont = true;
-         }
-
 	 /* Update liveout */
 	 foreach_list_typed(bblock_link, child_link, link, &block->children) {
             struct block_data *child_bd = &block_data[child_link->block->num];
@@ -244,6 +226,24 @@
                cont = true;
             }
 	 }
+
+         /* Update livein */
+         for (int i = 0; i < bitset_words; i++) {
+            BITSET_WORD new_livein = (bd->use[i] |
+                                      (bd->liveout[i] &
+                                       ~bd->def[i]));
+            if (new_livein & ~bd->livein[i]) {
+               bd->livein[i] |= new_livein;
+               cont = true;
+            }
+         }
+         BITSET_WORD new_livein = (bd->flag_use[0] |
+                                   (bd->flag_liveout[0] &
+                                    ~bd->flag_def[0]));
+         if (new_livein & ~bd->flag_livein[0]) {
+            bd->flag_livein[0] |= new_livein;
+            cont = true;
+         }
       }
    }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_nir.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_nir.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_nir.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -24,10 +24,14 @@
 #include "glsl/ir.h"
 #include "glsl/ir_optimization.h"
 #include "glsl/nir/glsl_to_nir.h"
+#include "main/shaderimage.h"
 #include "program/prog_to_nir.h"
 #include "brw_fs.h"
+#include "brw_fs_surface_builder.h"
 #include "brw_nir.h"
 
+using namespace brw;
+
 void
 fs_visitor::emit_nir_code()
 {
@@ -36,31 +40,11 @@
    /* emit the arrays used for inputs and outputs - load/store intrinsics will
     * be converted to reads/writes of these arrays
     */
-
-   if (nir->num_inputs > 0) {
-      nir_inputs = vgrf(nir->num_inputs);
-      nir_setup_inputs(nir);
-   }
-
-   if (nir->num_outputs > 0) {
-      nir_outputs = vgrf(nir->num_outputs);
-      nir_setup_outputs(nir);
-   }
-
-   if (nir->num_uniforms > 0) {
-      nir_setup_uniforms(nir);
-   }
-
+   nir_setup_inputs(nir);
+   nir_setup_outputs(nir);
+   nir_setup_uniforms(nir);
    nir_emit_system_values(nir);
 
-   nir_globals = ralloc_array(mem_ctx, fs_reg, nir->reg_alloc);
-   foreach_list_typed(nir_register, reg, node, &nir->registers) {
-      unsigned array_elems =
-         reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
-      unsigned size = array_elems * reg->num_components;
-      nir_globals[reg->index] = vgrf(size);
-   }
-
    /* get the main function and emit it */
    nir_foreach_overload(nir, overload) {
       assert(strcmp(overload->function->name, "main") == 0);
@@ -72,9 +56,11 @@
 void
 fs_visitor::nir_setup_inputs(nir_shader *shader)
 {
+   nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_inputs);
+
    foreach_list_typed(nir_variable, var, node, &shader->inputs) {
       enum brw_reg_type type = brw_type_for_base_type(var->type);
-      fs_reg input = offset(nir_inputs, var->data.driver_location);
+      fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
 
       fs_reg reg;
       switch (stage) {
@@ -97,10 +83,10 @@
          for (unsigned i = 0; i < array_length; i++) {
             for (unsigned j = 0; j < cols; j++) {
                for (unsigned k = 0; k < elts; k++) {
-                  emit(MOV(offset(retype(input, type),
-                                  components * i + elts * j + k),
-                           offset(fs_reg(ATTR, var->data.location + i, type),
-                                  4 * j + k)));
+                  bld.MOV(offset(retype(input, type), bld,
+                                 components * i + elts * j + k),
+                          offset(fs_reg(ATTR, var->data.location + i, type),
+                                 bld, 4 * j + k));
                }
             }
          }
@@ -108,13 +94,16 @@
       }
       case MESA_SHADER_GEOMETRY:
       case MESA_SHADER_COMPUTE:
+      case MESA_SHADER_TESS_CTRL:
+      case MESA_SHADER_TESS_EVAL:
          unreachable("fs_visitor not used for these stages yet.");
          break;
       case MESA_SHADER_FRAGMENT:
          if (var->data.location == VARYING_SLOT_POS) {
             reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
                                                 var->data.origin_upper_left);
-            emit_percomp(MOV(input, reg), 0xF);
+            emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
+                                      input, reg), 0xF);
          } else {
             emit_general_interpolation(input, var->name, var->type,
                                        (glsl_interp_qualifier) var->data.interpolation,
@@ -131,45 +120,54 @@
 {
    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
 
+   nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_outputs);
+
    foreach_list_typed(nir_variable, var, node, &shader->outputs) {
-      fs_reg reg = offset(nir_outputs, var->data.driver_location);
+      fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
 
       int vector_elements =
          var->type->is_array() ? var->type->fields.array->vector_elements
                                : var->type->vector_elements;
 
-      if (stage == MESA_SHADER_VERTEX) {
+      switch (stage) {
+      case MESA_SHADER_VERTEX:
          for (int i = 0; i < ALIGN(type_size(var->type), 4) / 4; i++) {
             int output = var->data.location + i;
-            this->outputs[output] = offset(reg, 4 * i);
+            this->outputs[output] = offset(reg, bld, 4 * i);
             this->output_components[output] = vector_elements;
          }
-      } else if (var->data.index > 0) {
-         assert(var->data.location == FRAG_RESULT_DATA0);
-         assert(var->data.index == 1);
-         this->dual_src_output = reg;
-         this->do_dual_src = true;
-      } else if (var->data.location == FRAG_RESULT_COLOR) {
-         /* Writing gl_FragColor outputs to all color regions. */
-         for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
-            this->outputs[i] = reg;
-            this->output_components[i] = 4;
-         }
-      } else if (var->data.location == FRAG_RESULT_DEPTH) {
-         this->frag_depth = reg;
-      } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
-         this->sample_mask = reg;
-      } else {
-         /* gl_FragData or a user-defined FS output */
-         assert(var->data.location >= FRAG_RESULT_DATA0 &&
-                var->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
-
-         /* General color output. */
-         for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
-            int output = var->data.location - FRAG_RESULT_DATA0 + i;
-            this->outputs[output] = offset(reg, vector_elements * i);
-            this->output_components[output] = vector_elements;
+         break;
+      case MESA_SHADER_FRAGMENT:
+         if (var->data.index > 0) {
+            assert(var->data.location == FRAG_RESULT_DATA0);
+            assert(var->data.index == 1);
+            this->dual_src_output = reg;
+            this->do_dual_src = true;
+         } else if (var->data.location == FRAG_RESULT_COLOR) {
+            /* Writing gl_FragColor outputs to all color regions. */
+            for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
+               this->outputs[i] = reg;
+               this->output_components[i] = 4;
+            }
+         } else if (var->data.location == FRAG_RESULT_DEPTH) {
+            this->frag_depth = reg;
+         } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
+            this->sample_mask = reg;
+         } else {
+            /* gl_FragData or a user-defined FS output */
+            assert(var->data.location >= FRAG_RESULT_DATA0 &&
+                   var->data.location < FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS);
+
+            /* General color output. */
+            for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
+               int output = var->data.location - FRAG_RESULT_DATA0 + i;
+               this->outputs[output] = offset(reg, bld, vector_elements * i);
+               this->output_components[output] = vector_elements;
+            }
          }
+         break;
+      default:
+         unreachable("unhandled shader stage");
       }
    }
 }
@@ -177,18 +175,20 @@
 void
 fs_visitor::nir_setup_uniforms(nir_shader *shader)
 {
-   uniforms = shader->num_uniforms;
    num_direct_uniforms = shader->num_direct_uniforms;
 
+   if (dispatch_width != 8)
+      return;
+
    /* We split the uniform register file in half.  The first half is
     * entirely direct uniforms.  The second half is indirect.
     */
-   param_size[0] = num_direct_uniforms;
+   if (num_direct_uniforms > 0)
+      param_size[0] = num_direct_uniforms;
    if (shader->num_uniforms > num_direct_uniforms)
       param_size[num_direct_uniforms] = shader->num_uniforms - num_direct_uniforms;
 
-   if (dispatch_width != 8)
-      return;
+   uniforms = shader->num_uniforms;
 
    if (shader_prog) {
       foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
@@ -225,9 +225,12 @@
       * our name.
       */
    unsigned index = var->data.driver_location;
-   for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
+   for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
       struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
 
+      if (storage->builtin)
+              continue;
+
       if (strncmp(var->name, storage->name, namelen) != 0 ||
          (storage->name[namelen] != 0 &&
          storage->name[namelen] != '.' &&
@@ -235,17 +238,26 @@
          continue;
       }
 
-      unsigned slots = storage->type->component_slots();
-      if (storage->array_elements)
-         slots *= storage->array_elements;
+      if (storage->type->is_image()) {
+         /* Images don't get a valid location assigned by nir_lower_io()
+          * because their size is driver-specific, so we need to allocate
+          * space for them here at the end of the parameter array.
+          */
+         var->data.driver_location = uniforms;
+         param_size[uniforms] =
+            BRW_IMAGE_PARAM_SIZE * MAX2(storage->array_elements, 1);
+
+         setup_image_uniform_values(storage);
+      } else {
+         unsigned slots = storage->type->component_slots();
+         if (storage->array_elements)
+            slots *= storage->array_elements;
 
-      for (unsigned i = 0; i < slots; i++) {
-         stage_prog_data->param[index++] = &storage->storage[i];
+         for (unsigned i = 0; i < slots; i++) {
+            stage_prog_data->param[index++] = &storage->storage[i];
+         }
       }
    }
-
-   /* Make sure we actually initialized the right amount of stuff here. */
-   assert(var->data.driver_location + var->type->component_slots() == index);
 }
 
 void
@@ -365,9 +377,12 @@
       unsigned array_elems =
          reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
       unsigned size = array_elems * reg->num_components;
-      nir_locals[reg->index] = vgrf(size);
+      nir_locals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size);
    }
 
+   nir_ssa_values = reralloc(mem_ctx, nir_ssa_values, fs_reg,
+                             impl->ssa_alloc);
+
    nir_emit_cf_list(&impl->body);
 }
 
@@ -399,39 +414,33 @@
 fs_visitor::nir_emit_if(nir_if *if_stmt)
 {
    /* first, put the condition into f0 */
-   fs_inst *inst = emit(MOV(reg_null_d,
+   fs_inst *inst = bld.MOV(bld.null_reg_d(),
                             retype(get_nir_src(if_stmt->condition),
-                                   BRW_REGISTER_TYPE_D)));
+                                   BRW_REGISTER_TYPE_D));
    inst->conditional_mod = BRW_CONDITIONAL_NZ;
 
-   emit(IF(BRW_PREDICATE_NORMAL));
+   bld.IF(BRW_PREDICATE_NORMAL);
 
    nir_emit_cf_list(&if_stmt->then_list);
 
    /* note: if the else is empty, dead CF elimination will remove it */
-   emit(BRW_OPCODE_ELSE);
+   bld.emit(BRW_OPCODE_ELSE);
 
    nir_emit_cf_list(&if_stmt->else_list);
 
-   emit(BRW_OPCODE_ENDIF);
+   bld.emit(BRW_OPCODE_ENDIF);
 
-   if (!try_replace_with_sel() && devinfo->gen < 6) {
-      no16("Can't support (non-uniform) control flow on SIMD16\n");
-   }
+   try_replace_with_sel();
 }
 
 void
 fs_visitor::nir_emit_loop(nir_loop *loop)
 {
-   if (devinfo->gen < 6) {
-      no16("Can't support (non-uniform) control flow on SIMD16\n");
-   }
-
-   emit(BRW_OPCODE_DO);
+   bld.emit(BRW_OPCODE_DO);
 
    nir_emit_cf_list(&loop->body);
 
-   emit(BRW_OPCODE_WHILE);
+   bld.emit(BRW_OPCODE_WHILE);
 }
 
 void
@@ -445,71 +454,48 @@
 void
 fs_visitor::nir_emit_instr(nir_instr *instr)
 {
-   this->base_ir = instr;
+   const fs_builder abld = bld.annotate(NULL, instr);
 
    switch (instr->type) {
    case nir_instr_type_alu:
-      nir_emit_alu(nir_instr_as_alu(instr));
+      nir_emit_alu(abld, nir_instr_as_alu(instr));
       break;
 
    case nir_instr_type_intrinsic:
-      nir_emit_intrinsic(nir_instr_as_intrinsic(instr));
+      nir_emit_intrinsic(abld, nir_instr_as_intrinsic(instr));
       break;
 
    case nir_instr_type_tex:
-      nir_emit_texture(nir_instr_as_tex(instr));
+      nir_emit_texture(abld, nir_instr_as_tex(instr));
       break;
 
    case nir_instr_type_load_const:
-      /* We can hit these, but we do nothing now and use them as
-       * immediates later.
-       */
+      nir_emit_load_const(abld, nir_instr_as_load_const(instr));
+      break;
+
+   case nir_instr_type_ssa_undef:
+      nir_emit_undef(abld, nir_instr_as_ssa_undef(instr));
       break;
 
    case nir_instr_type_jump:
-      nir_emit_jump(nir_instr_as_jump(instr));
+      nir_emit_jump(abld, nir_instr_as_jump(instr));
       break;
 
    default:
       unreachable("unknown instruction type");
    }
-
-   this->base_ir = NULL;
-}
-
-static brw_reg_type
-brw_type_for_nir_type(nir_alu_type type)
-{
-   switch (type) {
-   case nir_type_unsigned:
-      return BRW_REGISTER_TYPE_UD;
-   case nir_type_bool:
-   case nir_type_int:
-      return BRW_REGISTER_TYPE_D;
-   case nir_type_float:
-      return BRW_REGISTER_TYPE_F;
-   default:
-      unreachable("unknown type");
-   }
-
-   return BRW_REGISTER_TYPE_F;
 }
 
 bool
 fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
                                          const fs_reg &result)
 {
-   if (instr->src[0].src.is_ssa ||
-       !instr->src[0].src.reg.reg ||
-       !instr->src[0].src.reg.reg->parent_instr)
-      return false;
-
-   if (instr->src[0].src.reg.reg->parent_instr->type !=
-       nir_instr_type_intrinsic)
+   if (!instr->src[0].src.is_ssa ||
+       instr->src[0].src.ssa->parent_instr->type != nir_instr_type_intrinsic)
       return false;
 
    nir_intrinsic_instr *src0 =
-      nir_instr_as_intrinsic(instr->src[0].src.reg.reg->parent_instr);
+      nir_instr_as_intrinsic(instr->src[0].src.ssa->parent_instr);
 
    if (src0->intrinsic != nir_intrinsic_load_front_face)
       return false;
@@ -547,7 +533,7 @@
       tmp.subreg_offset = 2;
       tmp.stride = 2;
 
-      fs_inst *or_inst = emit(OR(tmp, g0, fs_reg(0x3f80)));
+      fs_inst *or_inst = bld.OR(tmp, g0, fs_reg(0x3f80));
       or_inst->src[1].type = BRW_REGISTER_TYPE_UW;
 
       tmp.type = BRW_REGISTER_TYPE_D;
@@ -572,15 +558,15 @@
          g1_6.negate = true;
       }
 
-      emit(OR(tmp, g1_6, fs_reg(0x3f800000)));
+      bld.OR(tmp, g1_6, fs_reg(0x3f800000));
    }
-   emit(AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000)));
+   bld.AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000));
 
    return true;
 }
 
 void
-fs_visitor::nir_emit_alu(nir_alu_instr *instr)
+fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
 {
    struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
    fs_inst *inst;
@@ -612,7 +598,7 @@
          if (!instr->src[i].src.is_ssa &&
              instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) {
             need_extra_copy = true;
-            temp = retype(vgrf(4), result.type);
+            temp = bld.vgrf(result.type, 4);
             break;
          }
       }
@@ -622,11 +608,11 @@
             continue;
 
          if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
-            inst = emit(MOV(offset(temp, i),
-                        offset(op[0], instr->src[0].swizzle[i])));
+            inst = bld.MOV(offset(temp, bld, i),
+                           offset(op[0], bld, instr->src[0].swizzle[i]));
          } else {
-            inst = emit(MOV(offset(temp, i),
-                        offset(op[i], instr->src[i].swizzle[0])));
+            inst = bld.MOV(offset(temp, bld, i),
+                           offset(op[i], bld, instr->src[i].swizzle[0]));
          }
          inst->saturate = instr->dest.saturate;
       }
@@ -640,7 +626,7 @@
             if (!(instr->dest.write_mask & (1 << i)))
                continue;
 
-            emit(MOV(offset(result, i), offset(temp, i)));
+            bld.MOV(offset(result, bld, i), offset(temp, bld, i));
          }
       }
       return;
@@ -661,24 +647,24 @@
       assert(_mesa_bitcount(instr->dest.write_mask) == 1);
       channel = ffs(instr->dest.write_mask) - 1;
 
-      result = offset(result, channel);
+      result = offset(result, bld, channel);
    }
 
    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
       assert(nir_op_infos[instr->op].input_sizes[i] < 2);
-      op[i] = offset(op[i], instr->src[i].swizzle[channel]);
+      op[i] = offset(op[i], bld, instr->src[i].swizzle[channel]);
    }
 
    switch (instr->op) {
    case nir_op_i2f:
    case nir_op_u2f:
-      inst = emit(MOV(result, op[0]));
+      inst = bld.MOV(result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_f2i:
    case nir_op_f2u:
-      emit(MOV(result, op[0]));
+      bld.MOV(result, op[0]);
       break;
 
    case nir_op_fsign: {
@@ -687,17 +673,17 @@
          * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
          * zero.
          */
-      emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
+      bld.CMP(bld.null_reg_f(), op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ);
 
       fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
       op[0].type = BRW_REGISTER_TYPE_UD;
       result.type = BRW_REGISTER_TYPE_UD;
-      emit(AND(result_int, op[0], fs_reg(0x80000000u)));
+      bld.AND(result_int, op[0], fs_reg(0x80000000u));
 
-      inst = emit(OR(result_int, result_int, fs_reg(0x3f800000u)));
+      inst = bld.OR(result_int, result_int, fs_reg(0x3f800000u));
       inst->predicate = BRW_PREDICATE_NORMAL;
       if (instr->dest.saturate) {
-         inst = emit(MOV(result, result));
+         inst = bld.MOV(result, result);
          inst->saturate = true;
       }
       break;
@@ -708,204 +694,157 @@
        *               -> non-negative val generates 0x00000000.
        *  Predicated OR sets 1 if val is positive.
        */
-      emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G));
-      emit(ASR(result, op[0], fs_reg(31)));
-      inst = emit(OR(result, result, fs_reg(1)));
+      bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_G);
+      bld.ASR(result, op[0], fs_reg(31));
+      inst = bld.OR(result, result, fs_reg(1));
       inst->predicate = BRW_PREDICATE_NORMAL;
       break;
 
    case nir_op_frcp:
-      inst = emit_math(SHADER_OPCODE_RCP, result, op[0]);
+      inst = bld.emit(SHADER_OPCODE_RCP, result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_fexp2:
-      inst = emit_math(SHADER_OPCODE_EXP2, result, op[0]);
+      inst = bld.emit(SHADER_OPCODE_EXP2, result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_flog2:
-      inst = emit_math(SHADER_OPCODE_LOG2, result, op[0]);
+      inst = bld.emit(SHADER_OPCODE_LOG2, result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_fsin:
-      inst = emit_math(SHADER_OPCODE_SIN, result, op[0]);
+      inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_fcos:
-      inst = emit_math(SHADER_OPCODE_COS, result, op[0]);
+      inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_fddx:
       if (fs_key->high_quality_derivatives) {
-         inst = emit(FS_OPCODE_DDX_FINE, result, op[0]);
+         inst = bld.emit(FS_OPCODE_DDX_FINE, result, op[0]);
       } else {
-         inst = emit(FS_OPCODE_DDX_COARSE, result, op[0]);
+         inst = bld.emit(FS_OPCODE_DDX_COARSE, result, op[0]);
       }
       inst->saturate = instr->dest.saturate;
       break;
    case nir_op_fddx_fine:
-      inst = emit(FS_OPCODE_DDX_FINE, result, op[0]);
+      inst = bld.emit(FS_OPCODE_DDX_FINE, result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
    case nir_op_fddx_coarse:
-      inst = emit(FS_OPCODE_DDX_COARSE, result, op[0]);
+      inst = bld.emit(FS_OPCODE_DDX_COARSE, result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
    case nir_op_fddy:
       if (fs_key->high_quality_derivatives) {
-         inst = emit(FS_OPCODE_DDY_FINE, result, op[0],
-                     fs_reg(fs_key->render_to_fbo));
+         inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0],
+                         fs_reg(fs_key->render_to_fbo));
       } else {
-         inst = emit(FS_OPCODE_DDY_COARSE, result, op[0],
-                     fs_reg(fs_key->render_to_fbo));
+         inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0],
+                         fs_reg(fs_key->render_to_fbo));
       }
       inst->saturate = instr->dest.saturate;
       break;
    case nir_op_fddy_fine:
-      inst = emit(FS_OPCODE_DDY_FINE, result, op[0],
-                  fs_reg(fs_key->render_to_fbo));
+      inst = bld.emit(FS_OPCODE_DDY_FINE, result, op[0],
+                      fs_reg(fs_key->render_to_fbo));
       inst->saturate = instr->dest.saturate;
       break;
    case nir_op_fddy_coarse:
-      inst = emit(FS_OPCODE_DDY_COARSE, result, op[0],
-                  fs_reg(fs_key->render_to_fbo));
+      inst = bld.emit(FS_OPCODE_DDY_COARSE, result, op[0],
+                      fs_reg(fs_key->render_to_fbo));
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_fadd:
    case nir_op_iadd:
-      inst = emit(ADD(result, op[0], op[1]));
+      inst = bld.ADD(result, op[0], op[1]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_fmul:
-      inst = emit(MUL(result, op[0], op[1]));
+      inst = bld.MUL(result, op[0], op[1]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_imul:
-      emit(MUL(result, op[0], op[1]));
+      bld.MUL(result, op[0], op[1]);
       break;
 
    case nir_op_imul_high:
-   case nir_op_umul_high: {
-      if (devinfo->gen >= 7)
-         no16("SIMD16 explicit accumulator operands unsupported\n");
-
-      struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
-
-      fs_inst *mul = emit(MUL(acc, op[0], op[1]));
-      emit(MACH(result, op[0], op[1]));
-
-      /* Until Gen8, integer multiplies read 32-bits from one source, and
-       * 16-bits from the other, and relying on the MACH instruction to
-       * generate the high bits of the result.
-       *
-       * On Gen8, the multiply instruction does a full 32x32-bit multiply,
-       * but in order to do a 64x64-bit multiply we have to simulate the
-       * previous behavior and then use a MACH instruction.
-       *
-       * FINISHME: Don't use source modifiers on src1.
-       */
-      if (devinfo->gen >= 8) {
-         assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
-                mul->src[1].type == BRW_REGISTER_TYPE_UD);
-         if (mul->src[1].type == BRW_REGISTER_TYPE_D) {
-            mul->src[1].type = BRW_REGISTER_TYPE_W;
-            mul->src[1].stride = 2;
-         } else {
-            mul->src[1].type = BRW_REGISTER_TYPE_UW;
-            mul->src[1].stride = 2;
-         }
-      }
+   case nir_op_umul_high:
+      bld.emit(SHADER_OPCODE_MULH, result, op[0], op[1]);
       break;
-   }
 
    case nir_op_idiv:
    case nir_op_udiv:
-      emit_math(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]);
-      break;
-
-   case nir_op_uadd_carry: {
-      if (devinfo->gen >= 7)
-         no16("SIMD16 explicit accumulator operands unsupported\n");
-
-      struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
-                                  BRW_REGISTER_TYPE_UD);
-
-      emit(ADDC(reg_null_ud, op[0], op[1]));
-      emit(MOV(result, fs_reg(acc)));
+      bld.emit(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]);
       break;
-   }
 
-   case nir_op_usub_borrow: {
-      if (devinfo->gen >= 7)
-         no16("SIMD16 explicit accumulator operands unsupported\n");
+   case nir_op_uadd_carry:
+      unreachable("Should have been lowered by carry_to_arith().");
 
-      struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
-                                  BRW_REGISTER_TYPE_UD);
-
-      emit(SUBB(reg_null_ud, op[0], op[1]));
-      emit(MOV(result, fs_reg(acc)));
-      break;
-   }
+   case nir_op_usub_borrow:
+      unreachable("Should have been lowered by borrow_to_arith().");
 
    case nir_op_umod:
-      emit_math(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
+      bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
       break;
 
    case nir_op_flt:
    case nir_op_ilt:
    case nir_op_ult:
-      emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_L));
+      bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_L);
       break;
 
    case nir_op_fge:
    case nir_op_ige:
    case nir_op_uge:
-      emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_GE));
+      bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_GE);
       break;
 
    case nir_op_feq:
    case nir_op_ieq:
-      emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_Z));
+      bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_Z);
       break;
 
    case nir_op_fne:
    case nir_op_ine:
-      emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ));
+      bld.CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ);
       break;
 
    case nir_op_inot:
       if (devinfo->gen >= 8) {
-         resolve_source_modifiers(&op[0]);
+         op[0] = resolve_source_modifiers(op[0]);
       }
-      emit(NOT(result, op[0]));
+      bld.NOT(result, op[0]);
       break;
    case nir_op_ixor:
       if (devinfo->gen >= 8) {
-         resolve_source_modifiers(&op[0]);
-         resolve_source_modifiers(&op[1]);
+         op[0] = resolve_source_modifiers(op[0]);
+         op[1] = resolve_source_modifiers(op[1]);
       }
-      emit(XOR(result, op[0], op[1]));
+      bld.XOR(result, op[0], op[1]);
       break;
    case nir_op_ior:
       if (devinfo->gen >= 8) {
-         resolve_source_modifiers(&op[0]);
-         resolve_source_modifiers(&op[1]);
+         op[0] = resolve_source_modifiers(op[0]);
+         op[1] = resolve_source_modifiers(op[1]);
       }
-      emit(OR(result, op[0], op[1]));
+      bld.OR(result, op[0], op[1]);
       break;
    case nir_op_iand:
       if (devinfo->gen >= 8) {
-         resolve_source_modifiers(&op[0]);
-         resolve_source_modifiers(&op[1]);
+         op[0] = resolve_source_modifiers(op[0]);
+         op[1] = resolve_source_modifiers(op[1]);
       }
-      emit(AND(result, op[0], op[1]));
+      bld.AND(result, op[0], op[1]);
       break;
 
    case nir_op_fdot2:
@@ -953,53 +892,51 @@
       unreachable("not reached: should be handled by ldexp_to_arith()");
 
    case nir_op_fsqrt:
-      inst = emit_math(SHADER_OPCODE_SQRT, result, op[0]);
+      inst = bld.emit(SHADER_OPCODE_SQRT, result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_frsq:
-      inst = emit_math(SHADER_OPCODE_RSQ, result, op[0]);
+      inst = bld.emit(SHADER_OPCODE_RSQ, result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_b2i:
-      emit(AND(result, op[0], fs_reg(1)));
-      break;
    case nir_op_b2f:
-      emit(AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], fs_reg(0x3f800000u)));
+      bld.MOV(result, negate(op[0]));
       break;
 
    case nir_op_f2b:
-      emit(CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
+      bld.CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ);
       break;
    case nir_op_i2b:
-      emit(CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
+      bld.CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ);
       break;
 
    case nir_op_ftrunc:
-      inst = emit(RNDZ(result, op[0]));
+      inst = bld.RNDZ(result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_fceil: {
       op[0].negate = !op[0].negate;
       fs_reg temp = vgrf(glsl_type::float_type);
-      emit(RNDD(temp, op[0]));
+      bld.RNDD(temp, op[0]);
       temp.negate = true;
-      inst = emit(MOV(result, temp));
+      inst = bld.MOV(result, temp);
       inst->saturate = instr->dest.saturate;
       break;
    }
    case nir_op_ffloor:
-      inst = emit(RNDD(result, op[0]));
+      inst = bld.RNDD(result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
    case nir_op_ffract:
-      inst = emit(FRC(result, op[0]));
+      inst = bld.FRC(result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
    case nir_op_fround_even:
-      inst = emit(RNDE(result, op[0]));
+      inst = bld.RNDE(result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
 
@@ -1007,11 +944,11 @@
    case nir_op_imin:
    case nir_op_umin:
       if (devinfo->gen >= 6) {
-         inst = emit(BRW_OPCODE_SEL, result, op[0], op[1]);
+         inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
          inst->conditional_mod = BRW_CONDITIONAL_L;
       } else {
-         emit(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_L));
-         inst = emit(SEL(result, op[0], op[1]));
+         bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_L);
+         inst = bld.SEL(result, op[0], op[1]);
          inst->predicate = BRW_PREDICATE_NORMAL;
       }
       inst->saturate = instr->dest.saturate;
@@ -1021,11 +958,11 @@
    case nir_op_imax:
    case nir_op_umax:
       if (devinfo->gen >= 6) {
-         inst = emit(BRW_OPCODE_SEL, result, op[0], op[1]);
+         inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
          inst->conditional_mod = BRW_CONDITIONAL_GE;
       } else {
-         emit(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_GE));
-         inst = emit(SEL(result, op[0], op[1]));
+         bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_GE);
+         inst = bld.SEL(result, op[0], op[1]);
          inst->predicate = BRW_PREDICATE_NORMAL;
       }
       inst->saturate = instr->dest.saturate;
@@ -1044,57 +981,57 @@
       unreachable("not reached: should be handled by lower_packing_builtins");
 
    case nir_op_unpack_half_2x16_split_x:
-      inst = emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]);
+      inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
    case nir_op_unpack_half_2x16_split_y:
-      inst = emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]);
+      inst = bld.emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_fpow:
-      inst = emit_math(SHADER_OPCODE_POW, result, op[0], op[1]);
+      inst = bld.emit(SHADER_OPCODE_POW, result, op[0], op[1]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_bitfield_reverse:
-      emit(BFREV(result, op[0]));
+      bld.BFREV(result, op[0]);
       break;
 
    case nir_op_bit_count:
-      emit(CBIT(result, op[0]));
+      bld.CBIT(result, op[0]);
       break;
 
    case nir_op_ufind_msb:
    case nir_op_ifind_msb: {
-      emit(FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]));
+      bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
 
       /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
        * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
        * subtract the result from 31 to convert the MSB count into an LSB count.
        */
 
-      emit(CMP(reg_null_d, result, fs_reg(-1), BRW_CONDITIONAL_NZ));
+      bld.CMP(bld.null_reg_d(), result, fs_reg(-1), BRW_CONDITIONAL_NZ);
       fs_reg neg_result(result);
       neg_result.negate = true;
-      inst = emit(ADD(result, neg_result, fs_reg(31)));
+      inst = bld.ADD(result, neg_result, fs_reg(31));
       inst->predicate = BRW_PREDICATE_NORMAL;
       break;
    }
 
    case nir_op_find_lsb:
-      emit(FBL(result, op[0]));
+      bld.FBL(result, op[0]);
       break;
 
    case nir_op_ubitfield_extract:
    case nir_op_ibitfield_extract:
-      emit(BFE(result, op[2], op[1], op[0]));
+      bld.BFE(result, op[2], op[1], op[0]);
       break;
    case nir_op_bfm:
-      emit(BFI1(result, op[0], op[1]));
+      bld.BFI1(result, op[0], op[1]);
       break;
    case nir_op_bfi:
-      emit(BFI2(result, op[0], op[1], op[2]));
+      bld.BFI2(result, op[0], op[1], op[2]);
       break;
 
    case nir_op_bitfield_insert:
@@ -1102,26 +1039,26 @@
                   "lower_instructions::bitfield_insert_to_bfm_bfi");
 
    case nir_op_ishl:
-      emit(SHL(result, op[0], op[1]));
+      bld.SHL(result, op[0], op[1]);
       break;
    case nir_op_ishr:
-      emit(ASR(result, op[0], op[1]));
+      bld.ASR(result, op[0], op[1]);
       break;
    case nir_op_ushr:
-      emit(SHR(result, op[0], op[1]));
+      bld.SHR(result, op[0], op[1]);
       break;
 
    case nir_op_pack_half_2x16_split:
-      emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
+      bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
       break;
 
    case nir_op_ffma:
-      inst = emit(MAD(result, op[2], op[1], op[0]));
+      inst = bld.MAD(result, op[2], op[1], op[0]);
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_flrp:
-      inst = emit_lrp(result, op[0], op[1], op[2]);
+      inst = bld.LRP(result, op[0], op[1], op[2]);
       inst->saturate = instr->dest.saturate;
       break;
 
@@ -1129,8 +1066,8 @@
       if (optimize_frontfacing_ternary(instr, result))
          return;
 
-      emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
-      inst = emit(SEL(result, op[1], op[2]));
+      bld.CMP(bld.null_reg_d(), op[0], fs_reg(0), BRW_CONDITIONAL_NZ);
+      inst = bld.SEL(result, op[1], op[2]);
       inst->predicate = BRW_PREDICATE_NORMAL;
       break;
 
@@ -1144,29 +1081,48 @@
    if (devinfo->gen <= 5 &&
        (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
       fs_reg masked = vgrf(glsl_type::int_type);
-      emit(AND(masked, result, fs_reg(1)));
+      bld.AND(masked, result, fs_reg(1));
       masked.negate = true;
-      emit(MOV(retype(result, BRW_REGISTER_TYPE_D), masked));
+      bld.MOV(retype(result, BRW_REGISTER_TYPE_D), masked);
    }
 }
 
+void
+fs_visitor::nir_emit_load_const(const fs_builder &bld,
+                                nir_load_const_instr *instr)
+{
+   fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components);
+
+   for (unsigned i = 0; i < instr->def.num_components; i++)
+      bld.MOV(offset(reg, bld, i), fs_reg(instr->value.i[i]));
+
+   nir_ssa_values[instr->def.index] = reg;
+}
+
+void
+fs_visitor::nir_emit_undef(const fs_builder &bld, nir_ssa_undef_instr *instr)
+{
+   nir_ssa_values[instr->def.index] = bld.vgrf(BRW_REGISTER_TYPE_D,
+                                               instr->def.num_components);
+}
+
 static fs_reg
 fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
                    unsigned base_offset, nir_src *indirect)
 {
    fs_reg reg;
-   if (nir_reg->is_global)
-      reg = v->nir_globals[nir_reg->index];
-   else
-      reg = v->nir_locals[nir_reg->index];
 
-   reg = offset(reg, base_offset * nir_reg->num_components);
+   assert(!nir_reg->is_global);
+
+   reg = v->nir_locals[nir_reg->index];
+
+   reg = offset(reg, v->bld, base_offset * nir_reg->num_components);
    if (indirect) {
       int multiplier = nir_reg->num_components * (v->dispatch_width / 8);
 
       reg.reladdr = new(v->mem_ctx) fs_reg(v->vgrf(glsl_type::int_type));
-      v->emit(v->MUL(*reg.reladdr, v->get_nir_src(*indirect),
-                     fs_reg(multiplier)));
+      v->bld.MUL(*reg.reladdr, v->get_nir_src(*indirect),
+                 fs_reg(multiplier));
    }
 
    return reg;
@@ -1175,54 +1131,146 @@
 fs_reg
 fs_visitor::get_nir_src(nir_src src)
 {
+   fs_reg reg;
    if (src.is_ssa) {
-      assert(src.ssa->parent_instr->type == nir_instr_type_load_const);
-      nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
-      fs_reg reg = vgrf(src.ssa->num_components);
-      reg.type = BRW_REGISTER_TYPE_D;
-
-      for (unsigned i = 0; i < src.ssa->num_components; ++i)
-         emit(MOV(offset(reg, i), fs_reg(load->value.i[i])));
-
-      return reg;
+      reg = nir_ssa_values[src.ssa->index];
    } else {
-      fs_reg reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
-                                      src.reg.indirect);
-
-      /* to avoid floating-point denorm flushing problems, set the type by
-       * default to D - instructions that need floating point semantics will set
-       * this to F if they need to
-       */
-      return retype(reg, BRW_REGISTER_TYPE_D);
+      reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
+                               src.reg.indirect);
    }
+
+   /* to avoid floating-point denorm flushing problems, set the type by
+    * default to D - instructions that need floating point semantics will set
+    * this to F if they need to
+    */
+   return retype(reg, BRW_REGISTER_TYPE_D);
 }
 
 fs_reg
 fs_visitor::get_nir_dest(nir_dest dest)
 {
+   if (dest.is_ssa) {
+      nir_ssa_values[dest.ssa.index] = bld.vgrf(BRW_REGISTER_TYPE_F,
+                                                dest.ssa.num_components);
+      return nir_ssa_values[dest.ssa.index];
+   }
+
    return fs_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
                              dest.reg.indirect);
 }
 
+fs_reg
+fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
+{
+   fs_reg image(UNIFORM, deref->var->data.driver_location,
+                BRW_REGISTER_TYPE_UD);
+
+   if (deref->deref.child) {
+      const nir_deref_array *deref_array =
+         nir_deref_as_array(deref->deref.child);
+      assert(deref->deref.child->deref_type == nir_deref_type_array &&
+             deref_array->deref.child == NULL);
+      const unsigned size = glsl_get_length(deref->var->type);
+      const unsigned base = MIN2(deref_array->base_offset, size - 1);
+
+      image = offset(image, bld, base * BRW_IMAGE_PARAM_SIZE);
+
+      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+         fs_reg *tmp = new(mem_ctx) fs_reg(vgrf(glsl_type::int_type));
+
+         if (devinfo->gen == 7 && !devinfo->is_haswell) {
+            /* IVB hangs when trying to access an invalid surface index with
+             * the dataport.  According to the spec "if the index used to
+             * select an individual element is negative or greater than or
+             * equal to the size of the array, the results of the operation
+             * are undefined but may not lead to termination" -- which is one
+             * of the possible outcomes of the hang.  Clamp the index to
+             * prevent access outside of the array bounds.
+             */
+            bld.emit_minmax(*tmp, retype(get_nir_src(deref_array->indirect),
+                                         BRW_REGISTER_TYPE_UD),
+                            fs_reg(size - base - 1), BRW_CONDITIONAL_L);
+         } else {
+            bld.MOV(*tmp, get_nir_src(deref_array->indirect));
+         }
+
+         bld.MUL(*tmp, *tmp, fs_reg(BRW_IMAGE_PARAM_SIZE));
+         image.reladdr = tmp;
+      }
+   }
+
+   return image;
+}
+
 void
-fs_visitor::emit_percomp(fs_inst *inst, unsigned wr_mask)
+fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
+                         unsigned wr_mask)
 {
    for (unsigned i = 0; i < 4; i++) {
       if (!((wr_mask >> i) & 1))
          continue;
 
-      fs_inst *new_inst = new(mem_ctx) fs_inst(*inst);
-      new_inst->dst = offset(new_inst->dst, i);
+      fs_inst *new_inst = new(mem_ctx) fs_inst(inst);
+      new_inst->dst = offset(new_inst->dst, bld, i);
       for (unsigned j = 0; j < new_inst->sources; j++)
-         if (inst->src[j].file == GRF)
-            new_inst->src[j] = offset(new_inst->src[j], i);
+         if (new_inst->src[j].file == GRF)
+            new_inst->src[j] = offset(new_inst->src[j], bld, i);
+
+      bld.emit(new_inst);
+   }
+}
 
-      emit(new_inst);
+/**
+ * Get the matching channel register datatype for an image intrinsic of the
+ * specified GLSL image type.
+ */
+static brw_reg_type
+get_image_base_type(const glsl_type *type)
+{
+   switch ((glsl_base_type)type->sampler_type) {
+   case GLSL_TYPE_UINT:
+      return BRW_REGISTER_TYPE_UD;
+   case GLSL_TYPE_INT:
+      return BRW_REGISTER_TYPE_D;
+   case GLSL_TYPE_FLOAT:
+      return BRW_REGISTER_TYPE_F;
+   default:
+      unreachable("Not reached.");
+   }
+}
+
+/**
+ * Get the appropriate atomic op for an image atomic intrinsic.
+ */
+static unsigned
+get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type)
+{
+   switch (op) {
+   case nir_intrinsic_image_atomic_add:
+      return BRW_AOP_ADD;
+   case nir_intrinsic_image_atomic_min:
+      return (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
+              BRW_AOP_IMIN : BRW_AOP_UMIN);
+   case nir_intrinsic_image_atomic_max:
+      return (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
+              BRW_AOP_IMAX : BRW_AOP_UMAX);
+   case nir_intrinsic_image_atomic_and:
+      return BRW_AOP_AND;
+   case nir_intrinsic_image_atomic_or:
+      return BRW_AOP_OR;
+   case nir_intrinsic_image_atomic_xor:
+      return BRW_AOP_XOR;
+   case nir_intrinsic_image_atomic_exchange:
+      return BRW_AOP_MOV;
+   case nir_intrinsic_image_atomic_comp_swap:
+      return BRW_AOP_CMPWR;
+   default:
+      unreachable("Not reachable.");
    }
 }
 
 void
-fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
+fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
 {
    fs_reg dest;
    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
@@ -1240,12 +1288,12 @@
        */
       fs_inst *cmp;
       if (instr->intrinsic == nir_intrinsic_discard_if) {
-         cmp = emit(CMP(reg_null_f, get_nir_src(instr->src[0]),
-                        fs_reg(0), BRW_CONDITIONAL_Z));
+         cmp = bld.CMP(bld.null_reg_f(), get_nir_src(instr->src[0]),
+                       fs_reg(0), BRW_CONDITIONAL_Z);
       } else {
          fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
                                        BRW_REGISTER_TYPE_UW));
-         cmp = emit(CMP(reg_null_f, some_reg, some_reg, BRW_CONDITIONAL_NZ));
+         cmp = bld.CMP(bld.null_reg_f(), some_reg, some_reg, BRW_CONDITIONAL_NZ);
       }
       cmp->predicate = BRW_PREDICATE_NORMAL;
       cmp->flag_subreg = 1;
@@ -1259,31 +1307,153 @@
    case nir_intrinsic_atomic_counter_inc:
    case nir_intrinsic_atomic_counter_dec:
    case nir_intrinsic_atomic_counter_read: {
-      unsigned surf_index = prog_data->binding_table.abo_start +
-                            (unsigned) instr->const_index[0];
-      fs_reg offset = fs_reg(get_nir_src(instr->src[0]));
+      using namespace surface_access;
 
+      /* Get the arguments of the atomic intrinsic. */
+      const fs_reg offset = get_nir_src(instr->src[0]);
+      const unsigned surface = (stage_prog_data->binding_table.abo_start +
+                                instr->const_index[0]);
+      fs_reg tmp;
+
+      /* Emit a surface read or atomic op. */
       switch (instr->intrinsic) {
-         case nir_intrinsic_atomic_counter_inc:
-            emit_untyped_atomic(BRW_AOP_INC, surf_index, dest, offset,
-                                fs_reg(), fs_reg());
-            break;
-         case nir_intrinsic_atomic_counter_dec:
-            emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dest, offset,
-                                fs_reg(), fs_reg());
-            break;
-         case nir_intrinsic_atomic_counter_read:
-            emit_untyped_surface_read(surf_index, dest, offset);
-            break;
-         default:
-            unreachable("Unreachable");
+      case nir_intrinsic_atomic_counter_read:
+         tmp = emit_untyped_read(bld, fs_reg(surface), offset, 1, 1);
+         break;
+
+      case nir_intrinsic_atomic_counter_inc:
+         tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(),
+                                   fs_reg(), 1, 1, BRW_AOP_INC);
+         break;
+
+      case nir_intrinsic_atomic_counter_dec:
+         tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(),
+                                   fs_reg(), 1, 1, BRW_AOP_PREDEC);
+         break;
+
+      default:
+         unreachable("Unreachable");
       }
+
+      /* Assign the result. */
+      bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), tmp);
+
+      /* Mark the surface as used. */
+      brw_mark_surface_used(stage_prog_data, surface);
+      break;
+   }
+
+   case nir_intrinsic_image_load:
+   case nir_intrinsic_image_store:
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_min:
+   case nir_intrinsic_image_atomic_max:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_image_atomic_exchange:
+   case nir_intrinsic_image_atomic_comp_swap: {
+      using namespace image_access;
+
+      /* Get the referenced image variable and type. */
+      const nir_variable *var = instr->variables[0]->var;
+      const glsl_type *type = var->type->without_array();
+      const brw_reg_type base_type = get_image_base_type(type);
+
+      /* Get some metadata from the image intrinsic. */
+      const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
+      const unsigned arr_dims = type->sampler_array ? 1 : 0;
+      const unsigned surf_dims = type->coordinate_components() - arr_dims;
+      const mesa_format format =
+         (var->data.image.write_only ? MESA_FORMAT_NONE :
+          _mesa_get_shader_image_format(var->data.image.format));
+
+      /* Get the arguments of the image intrinsic. */
+      const fs_reg image = get_nir_image_deref(instr->variables[0]);
+      const fs_reg addr = retype(get_nir_src(instr->src[0]),
+                                 BRW_REGISTER_TYPE_UD);
+      const fs_reg src0 = (info->num_srcs >= 3 ?
+                           retype(get_nir_src(instr->src[2]), base_type) :
+                           fs_reg());
+      const fs_reg src1 = (info->num_srcs >= 4 ?
+                           retype(get_nir_src(instr->src[3]), base_type) :
+                           fs_reg());
+      fs_reg tmp;
+
+      /* Emit an image load, store or atomic op. */
+      if (instr->intrinsic == nir_intrinsic_image_load)
+         tmp = emit_image_load(bld, image, addr, surf_dims, arr_dims, format);
+
+      else if (instr->intrinsic == nir_intrinsic_image_store)
+         emit_image_store(bld, image, addr, src0, surf_dims, arr_dims, format);
+
+      else
+         tmp = emit_image_atomic(bld, image, addr, src0, src1,
+                                 surf_dims, arr_dims, info->dest_components,
+                                 get_image_atomic_op(instr->intrinsic, type));
+
+      /* Assign the result. */
+      for (unsigned c = 0; c < info->dest_components; ++c)
+         bld.MOV(offset(retype(dest, base_type), bld, c),
+                 offset(tmp, bld, c));
+      break;
+   }
+
+   case nir_intrinsic_memory_barrier: {
+      const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 16 / dispatch_width);
+      bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
+         ->regs_written = 2;
+      break;
+   }
+
+   case nir_intrinsic_image_size: {
+      /* Get the referenced image variable and type. */
+      const nir_variable *var = instr->variables[0]->var;
+      const glsl_type *type = var->type->without_array();
+
+      /* Get the size of the image. */
+      const fs_reg image = get_nir_image_deref(instr->variables[0]);
+      const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
+
+      /* For 1DArray image types, the array index is stored in the Z component.
+       * Fix this by swizzling the Z component to the Y component.
+       */
+      const bool is_1d_array_image =
+                  type->sampler_dimensionality == GLSL_SAMPLER_DIM_1D &&
+                  type->sampler_array;
+
+      /* For CubeArray images, we should count the number of cubes instead
+       * of the number of faces. Fix it by dividing the (Z component) by 6.
+       */
+      const bool is_cube_array_image =
+                  type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
+                  type->sampler_array;
+
+      /* Copy all the components. */
+      const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
+      for (unsigned c = 0; c < info->dest_components; ++c) {
+         if ((int)c >= type->coordinate_components()) {
+             bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
+                     fs_reg(1));
+         } else if (c == 1 && is_1d_array_image) {
+            bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
+                    offset(size, bld, 2));
+         } else if (c == 2 && is_cube_array_image) {
+            bld.emit(SHADER_OPCODE_INT_QUOTIENT,
+                     offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
+                     offset(size, bld, c), fs_reg(6));
+         } else {
+            bld.MOV(offset(retype(dest, BRW_REGISTER_TYPE_D), bld, c),
+                    offset(size, bld, c));
+         }
+       }
+
       break;
    }
 
    case nir_intrinsic_load_front_face:
-      emit(MOV(retype(dest, BRW_REGISTER_TYPE_D),
-               *emit_frontfacing_interpolation()));
+      bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
+              *emit_frontfacing_interpolation());
       break;
 
    case nir_intrinsic_load_vertex_id:
@@ -1293,7 +1463,7 @@
       fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
       assert(vertex_id.file != BAD_FILE);
       dest.type = vertex_id.type;
-      emit(MOV(dest, vertex_id));
+      bld.MOV(dest, vertex_id);
       break;
    }
 
@@ -1301,7 +1471,7 @@
       fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
       assert(base_vertex.file != BAD_FILE);
       dest.type = base_vertex.type;
-      emit(MOV(dest, base_vertex));
+      bld.MOV(dest, base_vertex);
       break;
    }
 
@@ -1309,7 +1479,7 @@
       fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
       assert(instance_id.file != BAD_FILE);
       dest.type = instance_id.type;
-      emit(MOV(dest, instance_id));
+      bld.MOV(dest, instance_id);
       break;
    }
 
@@ -1317,7 +1487,7 @@
       fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
       assert(sample_mask_in.file != BAD_FILE);
       dest.type = sample_mask_in.type;
-      emit(MOV(dest, sample_mask_in));
+      bld.MOV(dest, sample_mask_in);
       break;
    }
 
@@ -1325,8 +1495,8 @@
       fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
       assert(sample_pos.file != BAD_FILE);
       dest.type = sample_pos.type;
-      emit(MOV(dest, sample_pos));
-      emit(MOV(offset(dest, 1), offset(sample_pos, 1)));
+      bld.MOV(dest, sample_pos);
+      bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1));
       break;
    }
 
@@ -1334,7 +1504,7 @@
       fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
       assert(sample_id.file != BAD_FILE);
       dest.type = sample_id.type;
-      emit(MOV(dest, sample_id));
+      bld.MOV(dest, sample_id);
       break;
    }
 
@@ -1352,16 +1522,14 @@
          index -= num_direct_uniforms;
       }
 
-      for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0; j < instr->num_components; j++) {
-            fs_reg src = offset(retype(uniform_reg, dest.type), index);
-            if (has_indirect)
-               src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
-            index++;
+      for (unsigned j = 0; j < instr->num_components; j++) {
+         fs_reg src = offset(retype(uniform_reg, dest.type), bld, index);
+         if (has_indirect)
+            src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
+         index++;
 
-            emit(MOV(dest, src));
-            dest = offset(dest, 1);
-         }
+         bld.MOV(dest, src);
+         dest = offset(dest, bld, 1);
       }
       break;
    }
@@ -1382,9 +1550,9 @@
           * from any live channel.
           */
          surf_index = vgrf(glsl_type::uint_type);
-         emit(ADD(surf_index, get_nir_src(instr->src[0]),
-                  fs_reg(stage_prog_data->binding_table.ubo_start)));
-         emit_uniformize(surf_index, surf_index);
+         bld.ADD(surf_index, get_nir_src(instr->src[0]),
+                 fs_reg(stage_prog_data->binding_table.ubo_start));
+         surf_index = bld.emit_uniformize(surf_index);
 
          /* Assume this may touch any UBO. It would be nice to provide
           * a tighter bound, but the array information is already lowered away.
@@ -1397,21 +1565,21 @@
       if (has_indirect) {
          /* Turn the byte offset into a dword offset. */
          fs_reg base_offset = vgrf(glsl_type::int_type);
-         emit(SHR(base_offset, retype(get_nir_src(instr->src[1]),
-                                 BRW_REGISTER_TYPE_D),
-                  fs_reg(2)));
+         bld.SHR(base_offset, retype(get_nir_src(instr->src[1]),
+                                     BRW_REGISTER_TYPE_D),
+                 fs_reg(2));
 
          unsigned vec4_offset = instr->const_index[0] / 4;
          for (int i = 0; i < instr->num_components; i++)
-            emit(VARYING_PULL_CONSTANT_LOAD(offset(dest, i), surf_index,
-                                            base_offset, vec4_offset + i));
+            VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
+                                       base_offset, vec4_offset + i);
       } else {
          fs_reg packed_consts = vgrf(glsl_type::float_type);
          packed_consts.type = dest.type;
 
          fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15);
-         emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
-              surf_index, const_offset_reg);
+         bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
+                  surf_index, const_offset_reg);
 
          for (unsigned i = 0; i < instr->num_components; i++) {
             packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i);
@@ -1421,8 +1589,8 @@
              */
             assert(packed_consts.subreg_offset < 32);
 
-            emit(MOV(dest, packed_consts));
-            dest = offset(dest, 1);
+            bld.MOV(dest, packed_consts);
+            dest = offset(dest, bld, 1);
          }
       }
       break;
@@ -1433,17 +1601,15 @@
       /* fallthrough */
    case nir_intrinsic_load_input: {
       unsigned index = 0;
-      for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0; j < instr->num_components; j++) {
-            fs_reg src = offset(retype(nir_inputs, dest.type),
-                                instr->const_index[0] + index);
-            if (has_indirect)
-               src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
-            index++;
+      for (unsigned j = 0; j < instr->num_components; j++) {
+         fs_reg src = offset(retype(nir_inputs, dest.type), bld,
+                             instr->const_index[0] + index);
+         if (has_indirect)
+            src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
+         index++;
 
-            emit(MOV(dest, src));
-            dest = offset(dest, 1);
-         }
+         bld.MOV(dest, src);
+         dest = offset(dest, bld, 1);
       }
       break;
    }
@@ -1473,13 +1639,7 @@
 
       ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true;
 
-      /* in SIMD16 mode, the pixel interpolator returns coords interleaved
-       * 8 channels at a time, same as the barycentric coords presented in
-       * the FS payload. this requires a bit of extra work to support.
-       */
-      no16("interpolate_at_* not yet supported in SIMD16 mode.");
-
-      fs_reg dst_xy = vgrf(2);
+      fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
 
       /* For most messages, we need one reg of ignored data; the hardware
        * requires mlen==1 even when there is no payload. in the per-slot
@@ -1491,7 +1651,8 @@
 
       switch (instr->intrinsic) {
       case nir_intrinsic_interp_var_at_centroid:
-         inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u));
+         inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID,
+                         dst_xy, src, fs_reg(0u));
          break;
 
       case nir_intrinsic_interp_var_at_sample: {
@@ -1499,8 +1660,8 @@
          nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
          assert(const_sample);
          unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
-         inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
-                     fs_reg(msg_data));
+         inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
+                         fs_reg(msg_data));
          break;
       }
 
@@ -1511,17 +1672,17 @@
             unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
             unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
 
-            inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
-                        fs_reg(off_x | (off_y << 4)));
+            inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
+                            fs_reg(off_x | (off_y << 4)));
          } else {
             src = vgrf(glsl_type::ivec2_type);
             fs_reg offset_src = retype(get_nir_src(instr->src[0]),
                                        BRW_REGISTER_TYPE_F);
             for (int i = 0; i < 2; i++) {
                fs_reg temp = vgrf(glsl_type::float_type);
-               emit(MUL(temp, offset(offset_src, i), fs_reg(16.0f)));
+               bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f));
                fs_reg itemp = vgrf(glsl_type::int_type);
-               emit(MOV(itemp, temp));  /* float to int */
+               bld.MOV(itemp, temp);  /* float to int */
 
                /* Clamp the upper end of the range to +7/16.
                 * ARB_gpu_shader5 requires that we support a maximum offset
@@ -1538,14 +1699,13 @@
                 * implementation-dependent constant
                 * FRAGMENT_INTERPOLATION_OFFSET_BITS"
                 */
-
-               emit(BRW_OPCODE_SEL, offset(src, i), itemp, fs_reg(7))
-                   ->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */
+               set_condmod(BRW_CONDITIONAL_L,
+                           bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
             }
 
-            mlen = 2;
-            inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
-                        fs_reg(0u));
+            mlen = 2 * dispatch_width / 8;
+            inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
+                            fs_reg(0u));
          }
          break;
       }
@@ -1555,7 +1715,8 @@
       }
 
       inst->mlen = mlen;
-      inst->regs_written = 2; /* 2 floats per slot returned */
+      /* 2 floats per slot returned */
+      inst->regs_written = 2 * dispatch_width / 8;
       inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
                                INTERP_QUALIFIER_NOPERSPECTIVE;
 
@@ -1563,8 +1724,8 @@
          fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
          src.type = dest.type;
 
-         emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
-         dest = offset(dest, 1);
+         bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
+         dest = offset(dest, bld, 1);
       }
       break;
    }
@@ -1575,27 +1736,29 @@
    case nir_intrinsic_store_output: {
       fs_reg src = get_nir_src(instr->src[0]);
       unsigned index = 0;
-      for (int i = 0; i < instr->const_index[1]; i++) {
-         for (unsigned j = 0; j < instr->num_components; j++) {
-            fs_reg new_dest = offset(retype(nir_outputs, src.type),
-                                     instr->const_index[0] + index);
-            if (has_indirect)
-               src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
-            index++;
-            emit(MOV(new_dest, src));
-            src = offset(src, 1);
-         }
+      for (unsigned j = 0; j < instr->num_components; j++) {
+         fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
+                                  instr->const_index[0] + index);
+         if (has_indirect)
+            src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
+         index++;
+         bld.MOV(new_dest, src);
+         src = offset(src, bld, 1);
       }
       break;
    }
 
+   case nir_intrinsic_barrier:
+      emit_barrier();
+      break;
+
    default:
       unreachable("unknown intrinsic");
    }
 }
 
 void
-fs_visitor::nir_emit_texture(nir_tex_instr *instr)
+fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
 {
    unsigned sampler = instr->sampler_index;
    fs_reg sampler_reg(sampler);
@@ -1613,7 +1776,8 @@
    bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
                         instr->is_array;
 
-   int lod_components = 0, offset_components = 0;
+   int lod_components = 0;
+   int UNUSED offset_components = 0;
 
    fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset;
 
@@ -1682,8 +1846,8 @@
 
          /* Emit code to evaluate the actual indexing expression */
          sampler_reg = vgrf(glsl_type::uint_type);
-         emit(ADD(sampler_reg, src, fs_reg(sampler)));
-         emit_uniformize(sampler_reg, sampler_reg);
+         bld.ADD(sampler_reg, src, fs_reg(sampler));
+         sampler_reg = bld.emit_uniformize(sampler_reg);
          break;
       }
 
@@ -1709,20 +1873,8 @@
       }
    }
 
-   enum glsl_base_type dest_base_type;
-   switch (instr->dest_type) {
-   case nir_type_float:
-      dest_base_type = GLSL_TYPE_FLOAT;
-      break;
-   case nir_type_int:
-      dest_base_type = GLSL_TYPE_INT;
-      break;
-   case nir_type_unsigned:
-      dest_base_type = GLSL_TYPE_UINT;
-      break;
-   default:
-      unreachable("bad type");
-   }
+   enum glsl_base_type dest_base_type =
+     brw_glsl_base_type_for_nir_type (instr->dest_type);
 
    const glsl_type *dest_type =
       glsl_type::get_instance(dest_base_type, nir_tex_instr_dest_size(instr),
@@ -1752,18 +1904,20 @@
    fs_reg dest = get_nir_dest(instr->dest);
    dest.type = this->result.type;
    unsigned num_components = nir_tex_instr_dest_size(instr);
-   emit_percomp(MOV(dest, this->result), (1 << num_components) - 1);
+   emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
+                             dest, this->result),
+                (1 << num_components) - 1);
 }
 
 void
-fs_visitor::nir_emit_jump(nir_jump_instr *instr)
+fs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr)
 {
    switch (instr->type) {
    case nir_jump_break:
-      emit(BRW_OPCODE_BREAK);
+      bld.emit(BRW_OPCODE_BREAK);
       break;
    case nir_jump_continue:
-      emit(BRW_OPCODE_CONTINUE);
+      bld.emit(BRW_OPCODE_CONTINUE);
       break;
    case nir_jump_return:
    default:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -24,6 +24,8 @@
 #include "brw_fs.h"
 #include "brw_cfg.h"
 
+using namespace brw;
+
 /** @file brw_fs_peephole_predicated_break.cpp
  *
  * Loops are often structured as
@@ -85,9 +87,9 @@
        * instruction to set the flag register.
        */
       if (devinfo->gen == 6 && if_inst->conditional_mod) {
-         fs_inst *cmp_inst = CMP(reg_null_d, if_inst->src[0], if_inst->src[1],
-                                 if_inst->conditional_mod);
-         if_inst->insert_before(if_block, cmp_inst);
+         const fs_builder ibld(this, if_block, if_inst);
+         ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1],
+                  if_inst->conditional_mod);
          jump_inst->predicate = BRW_PREDICATE_NORMAL;
       } else {
          jump_inst->predicate = if_inst->predicate;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -30,6 +30,8 @@
 #include "glsl/glsl_types.h"
 #include "glsl/ir_optimization.h"
 
+using namespace brw;
+
 static void
 assign_reg(unsigned *reg_hw_locations, fs_reg *reg)
 {
@@ -71,11 +73,20 @@
 }
 
 static void
-brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
+brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width)
 {
    const struct brw_device_info *devinfo = compiler->devinfo;
    int base_reg_count = BRW_MAX_GRF;
-   int index = reg_width - 1;
+   int index = (dispatch_width / 8) - 1;
+
+   if (dispatch_width > 8 && devinfo->gen >= 7) {
+      /* For IVB+, we don't need the PLN hacks or the even-reg alignment in
+       * SIMD16.  Therefore, we can use the exact same register sets for
+       * SIMD16 as we do for SIMD8 and we don't need to recalculate them.
+       */
+      compiler->fs_reg_sets[index] = compiler->fs_reg_sets[0];
+      return;
+   }
 
    /* The registers used to make up almost all values handled in the compiler
     * are a scalar value occupying a single register (or 2 registers in the
@@ -119,7 +130,7 @@
    /* Compute the total number of registers across all classes. */
    int ra_reg_count = 0;
    for (int i = 0; i < class_count; i++) {
-      if (devinfo->gen <= 5 && reg_width == 2) {
+      if (devinfo->gen <= 5 && dispatch_width == 16) {
          /* From the G45 PRM:
           *
           * In order to reduce the hardware complexity, the following
@@ -145,7 +156,7 @@
    }
 
    uint8_t *ra_reg_to_grf = ralloc_array(compiler, uint8_t, ra_reg_count);
-   struct ra_regs *regs = ra_alloc_reg_set(compiler, ra_reg_count);
+   struct ra_regs *regs = ra_alloc_reg_set(compiler, ra_reg_count, false);
    if (devinfo->gen >= 6)
       ra_set_allocate_round_robin(regs);
    int *classes = ralloc_array(compiler, int, class_count);
@@ -166,7 +177,7 @@
    int pairs_reg_count = 0;
    for (int i = 0; i < class_count; i++) {
       int class_reg_count;
-      if (devinfo->gen <= 5 && reg_width == 2) {
+      if (devinfo->gen <= 5 && dispatch_width == 16) {
          class_reg_count = (base_reg_count - (class_sizes[i] - 1)) / 2;
 
          /* See comment below.  The only difference here is that we are
@@ -212,7 +223,7 @@
          pairs_reg_count = class_reg_count;
       }
 
-      if (devinfo->gen <= 5 && reg_width == 2) {
+      if (devinfo->gen <= 5 && dispatch_width == 16) {
          for (int j = 0; j < class_reg_count; j++) {
             ra_class_add_reg(regs, classes[i], reg);
 
@@ -221,7 +232,7 @@
             for (int base_reg = j;
                  base_reg < j + (class_sizes[i] + 1) / 2;
                  base_reg++) {
-               ra_add_transitive_reg_conflict(regs, base_reg, reg);
+               ra_add_reg_conflict(regs, base_reg, reg);
             }
 
             reg++;
@@ -235,7 +246,7 @@
             for (int base_reg = j;
                  base_reg < j + class_sizes[i];
                  base_reg++) {
-               ra_add_transitive_reg_conflict(regs, base_reg, reg);
+               ra_add_reg_conflict(regs, base_reg, reg);
             }
 
             reg++;
@@ -244,10 +255,16 @@
    }
    assert(reg == ra_reg_count);
 
+   /* Applying transitivity to all of the base registers gives us the
+    * appropreate register conflict relationships everywhere.
+    */
+   for (int reg = 0; reg < base_reg_count; reg++)
+      ra_make_reg_conflicts_transitive(regs, reg);
+
    /* Add a special class for aligned pairs, which we'll put delta_xy
     * in on Gen <= 6 so that we can do PLN.
     */
-   if (devinfo->has_pln && reg_width == 1 && devinfo->gen <= 6) {
+   if (devinfo->has_pln && dispatch_width == 8 && devinfo->gen <= 6) {
       aligned_pairs_class = ra_alloc_reg_class(regs);
 
       for (int i = 0; i < pairs_reg_count; i++) {
@@ -285,8 +302,8 @@
 void
 brw_fs_alloc_reg_sets(struct brw_compiler *compiler)
 {
-   brw_alloc_reg_set(compiler, 1);
-   brw_alloc_reg_set(compiler, 2);
+   brw_alloc_reg_set(compiler, 8);
+   brw_alloc_reg_set(compiler, 16);
 }
 
 static int
@@ -339,7 +356,9 @@
    int loop_end_ip = 0;
 
    int payload_last_use_ip[payload_node_count];
-   memset(payload_last_use_ip, 0, sizeof(payload_last_use_ip));
+   for (int i = 0; i < payload_node_count; i++)
+      payload_last_use_ip[i] = -1;
+
    int ip = 0;
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       switch (inst->opcode) {
@@ -378,32 +397,15 @@
             if (node_nr >= payload_node_count)
                continue;
 
-            payload_last_use_ip[node_nr] = use_ip;
+            for (int j = 0; j < inst->regs_read(i); j++) {
+               payload_last_use_ip[node_nr + j] = use_ip;
+               assert(node_nr + j < payload_node_count);
+            }
          }
       }
 
       /* Special case instructions which have extra implied registers used. */
       switch (inst->opcode) {
-      case FS_OPCODE_LINTERP:
-         /* On gen6+ in SIMD16, there are 4 adjacent registers used by
-          * PLN's sourcing of the deltas, while we list only the first one
-          * in the arguments.  Pre-gen6, the deltas are computed in normal
-          * VGRFs.
-          */
-         if (devinfo->gen >= 6) {
-            int delta_x_arg = 0;
-            if (inst->src[delta_x_arg].file == HW_REG &&
-                inst->src[delta_x_arg].fixed_hw_reg.file ==
-                BRW_GENERAL_REGISTER_FILE) {
-               for (int i = 1; i < 4; ++i) {
-                  int node = inst->src[delta_x_arg].fixed_hw_reg.nr + i;
-                  assert(node < payload_node_count);
-                  payload_last_use_ip[node] = use_ip;
-               }
-            }
-         }
-         break;
-
       case CS_OPCODE_CS_TERMINATE:
          payload_last_use_ip[0] = use_ip;
          break;
@@ -426,6 +428,9 @@
    }
 
    for (int i = 0; i < payload_node_count; i++) {
+      if (payload_last_use_ip[i] == -1)
+         continue;
+
       /* Mark the payload node as interfering with any virtual grf that is
        * live between the start of the program and our last use of the payload
        * node.
@@ -468,14 +473,14 @@
  * see if we can actually use MRFs to do spills without overwriting normal MRF
  * contents.
  */
-void
-fs_visitor::get_used_mrfs(bool *mrf_used)
+static void
+get_used_mrfs(fs_visitor *v, bool *mrf_used)
 {
-   int reg_width = dispatch_width / 8;
+   int reg_width = v->dispatch_width / 8;
 
    memset(mrf_used, 0, BRW_MAX_MRF * sizeof(bool));
 
-   foreach_block_and_inst(block, fs_inst, inst, cfg) {
+   foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
       if (inst->dst.file == MRF) {
          int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
          mrf_used[reg] = true;
@@ -489,7 +494,7 @@
       }
 
       if (inst->mlen > 0) {
-	 for (int i = 0; i < implied_mrf_writes(inst); i++) {
+	 for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
             mrf_used[inst->base_mrf + i] = true;
          }
       }
@@ -500,12 +505,12 @@
  * Sets interference between virtual GRFs and usage of the high GRFs for SEND
  * messages (treated as MRFs in code generation).
  */
-void
-fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node,
-                                        int *first_used_mrf)
+static void
+setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g,
+                            int first_mrf_node, int *first_used_mrf)
 {
    bool mrf_used[BRW_MAX_MRF];
-   get_used_mrfs(mrf_used);
+   get_used_mrfs(v, mrf_used);
 
    *first_used_mrf = BRW_MAX_MRF;
    for (int i = 0; i < BRW_MAX_MRF; i++) {
@@ -523,7 +528,7 @@
          if (i < *first_used_mrf)
             *first_used_mrf = i;
 
-         for (unsigned j = 0; j < this->alloc.count; j++) {
+         for (unsigned j = 0; j < v->alloc.count; j++) {
             ra_add_node_interference(g, first_mrf_node + i, j);
          }
       }
@@ -533,7 +538,6 @@
 bool
 fs_visitor::assign_regs(bool allow_spilling)
 {
-   struct brw_compiler *compiler = brw->intelScreen->compiler;
    /* Most of this allocation was written for a reg_width of 1
     * (dispatch_width == 8).  In extending to SIMD16, the code was
     * left in place and it was converted to have the hardware
@@ -590,7 +594,8 @@
    setup_payload_interference(g, payload_node_count, first_payload_node);
    if (devinfo->gen >= 7) {
       int first_used_mrf = BRW_MAX_MRF;
-      setup_mrf_hack_interference(g, first_mrf_hack_node, &first_used_mrf);
+      setup_mrf_hack_interference(this, g, first_mrf_hack_node,
+                                  &first_used_mrf);
 
       foreach_block_and_inst(block, fs_inst, inst, cfg) {
          /* When we do send-from-GRF for FB writes, we need to ensure that
@@ -704,30 +709,27 @@
                          uint32_t spill_offset, int count)
 {
    int reg_size = 1;
-   if (dispatch_width == 16 && count % 2 == 0) {
+   if (dispatch_width == 16 && count % 2 == 0)
       reg_size = 2;
-      dst.width = 16;
-   }
+
+   const fs_builder ibld = bld.annotate(inst->annotation, inst->ir)
+                              .group(reg_size * 8, 0)
+                              .at(block, inst);
 
    for (int i = 0; i < count / reg_size; i++) {
       /* The gen7 descriptor-based offset is 12 bits of HWORD units. */
       bool gen7_read = devinfo->gen >= 7 && spill_offset < (1 << 12) * REG_SIZE;
-
-      fs_inst *unspill_inst =
-         new(mem_ctx) fs_inst(gen7_read ?
-                              SHADER_OPCODE_GEN7_SCRATCH_READ :
-                              SHADER_OPCODE_GEN4_SCRATCH_READ,
-                              dst);
+      fs_inst *unspill_inst = ibld.emit(gen7_read ?
+                                        SHADER_OPCODE_GEN7_SCRATCH_READ :
+                                        SHADER_OPCODE_GEN4_SCRATCH_READ,
+                                        dst);
       unspill_inst->offset = spill_offset;
-      unspill_inst->ir = inst->ir;
-      unspill_inst->annotation = inst->annotation;
       unspill_inst->regs_written = reg_size;
 
       if (!gen7_read) {
          unspill_inst->base_mrf = 14;
          unspill_inst->mlen = 1; /* header contains offset */
       }
-      inst->insert_before(block, unspill_inst);
 
       dst.reg_offset += reg_size;
       spill_offset += reg_size * REG_SIZE;
@@ -745,17 +747,17 @@
       reg_size = 2;
    }
 
+   const fs_builder ibld = bld.annotate(inst->annotation, inst->ir)
+                              .group(reg_size * 8, 0)
+                              .at(block, inst->next);
+
    for (int i = 0; i < count / reg_size; i++) {
       fs_inst *spill_inst =
-         new(mem_ctx) fs_inst(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
-                              reg_size * 8, reg_null_f, src);
+         ibld.emit(SHADER_OPCODE_GEN4_SCRATCH_WRITE, ibld.null_reg_f(), src);
       src.reg_offset += reg_size;
       spill_inst->offset = spill_offset + i * reg_size * REG_SIZE;
-      spill_inst->ir = inst->ir;
-      spill_inst->annotation = inst->annotation;
       spill_inst->mlen = 1 + reg_size; /* header, value */
       spill_inst->base_mrf = spill_base_mrf;
-      inst->insert_after(block, spill_inst);
    }
 }
 
@@ -852,7 +854,7 @@
     */
    if (!spilled_any_registers) {
       bool mrf_used[BRW_MAX_MRF];
-      get_used_mrfs(mrf_used);
+      get_used_mrfs(this, mrf_used);
 
       for (int i = spill_base_mrf; i < BRW_MAX_MRF; i++) {
          if (mrf_used[i]) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -167,7 +167,6 @@
          src_size = alloc.sizes[inst->src[0].reg];
          assert(src_size <= MAX_VGRF_SIZE);
 
-         assert(inst->src[0].width % 8 == 0);
          channels_remaining = src_size;
          memset(mov, 0, sizeof(mov));
 
@@ -196,7 +195,7 @@
             continue;
          }
          reg_to_offset[offset] = inst->dst.reg_offset;
-         if (inst->src[0].width == 16)
+         if (inst->regs_written > 1)
             reg_to_offset[offset + 1] = inst->dst.reg_offset + 1;
          mov[offset] = inst;
          channels_remaining -= inst->regs_written;
@@ -229,7 +228,6 @@
          continue;
 
       progress = true;
-      bool was_load_payload = inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD;
 
       for (int i = 0; i < src_size; i++) {
          if (mov[i]) {
@@ -243,22 +241,19 @@
       }
 
       foreach_block_and_inst(block, fs_inst, scan_inst, cfg) {
-         for (int i = 0; i < src_size; i++) {
-            if (mov[i] || was_load_payload) {
-               if (scan_inst->dst.file == GRF &&
-                   scan_inst->dst.reg == reg_from &&
-                   scan_inst->dst.reg_offset == i) {
-                  scan_inst->dst.reg = reg_to;
-                  scan_inst->dst.reg_offset = reg_to_offset[i];
-               }
-               for (int j = 0; j < scan_inst->sources; j++) {
-                  if (scan_inst->src[j].file == GRF &&
-                      scan_inst->src[j].reg == reg_from &&
-                      scan_inst->src[j].reg_offset == i) {
-                     scan_inst->src[j].reg = reg_to;
-                     scan_inst->src[j].reg_offset = reg_to_offset[i];
-                  }
-               }
+         if (scan_inst->dst.file == GRF &&
+             scan_inst->dst.reg == reg_from) {
+            scan_inst->dst.reg = reg_to;
+            scan_inst->dst.reg_offset =
+               reg_to_offset[scan_inst->dst.reg_offset];
+         }
+
+         for (int j = 0; j < scan_inst->sources; j++) {
+            if (scan_inst->src[j].file == GRF &&
+                scan_inst->src[j].reg == reg_from) {
+               scan_inst->src[j].reg = reg_to;
+               scan_inst->src[j].reg_offset =
+                  reg_to_offset[scan_inst->src[j].reg_offset];
             }
          }
       }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -37,6 +37,8 @@
  */
 #define MAX_MOVS 8 /**< The maximum number of MOVs to attempt to match. */
 
+using namespace brw;
+
 /**
  * Scans forwards from an IF counting consecutive MOV instructions in the
  * "then" and "else" blocks of the if statement.
@@ -153,9 +155,6 @@
       if (movs == 0)
          continue;
 
-      fs_inst *sel_inst[MAX_MOVS] = { NULL };
-      fs_inst *mov_imm_inst[MAX_MOVS] = { NULL };
-
       enum brw_predicate predicate;
       bool predicate_inverse;
       if (devinfo->gen == 6 && if_inst->conditional_mod) {
@@ -175,6 +174,9 @@
 
          /* Check that the MOVs are the right form. */
          if (!then_mov[i]->dst.equals(else_mov[i]->dst) ||
+             then_mov[i]->exec_size != else_mov[i]->exec_size ||
+             then_mov[i]->force_sechalf != else_mov[i]->force_sechalf ||
+             then_mov[i]->force_writemask_all != else_mov[i]->force_writemask_all ||
              then_mov[i]->is_partial_write() ||
              else_mov[i]->is_partial_write() ||
              then_mov[i]->conditional_mod != BRW_CONDITIONAL_NONE ||
@@ -188,9 +190,24 @@
             movs = i;
             break;
          }
+      }
+
+      if (movs == 0)
+         continue;
+
+      /* Emit a CMP if our IF used the embedded comparison */
+      if (devinfo->gen == 6 && if_inst->conditional_mod) {
+         const fs_builder ibld(this, block, if_inst);
+         ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1],
+                  if_inst->conditional_mod);
+      }
+
+      for (int i = 0; i < movs; i++) {
+         const fs_builder ibld = fs_builder(this, then_block, then_mov[i])
+                                 .at(block, if_inst);
 
          if (then_mov[i]->src[0].equals(else_mov[i]->src[0])) {
-            sel_inst[i] = MOV(then_mov[i]->dst, then_mov[i]->src[0]);
+            ibld.MOV(then_mov[i]->dst, then_mov[i]->src[0]);
          } else {
             /* Only the last source register can be a constant, so if the MOV
              * in the "then" clause uses a constant, we need to put it in a
@@ -200,29 +217,13 @@
             if (src0.file == IMM) {
                src0 = vgrf(glsl_type::float_type);
                src0.type = then_mov[i]->src[0].type;
-               mov_imm_inst[i] = MOV(src0, then_mov[i]->src[0]);
+               ibld.MOV(src0, then_mov[i]->src[0]);
             }
 
-            sel_inst[i] = SEL(then_mov[i]->dst, src0, else_mov[i]->src[0]);
-            sel_inst[i]->predicate = predicate;
-            sel_inst[i]->predicate_inverse = predicate_inverse;
+            set_predicate_inv(predicate, predicate_inverse,
+                              ibld.SEL(then_mov[i]->dst, src0,
+                                       else_mov[i]->src[0]));
          }
-      }
-
-      if (movs == 0)
-         continue;
-
-      /* Emit a CMP if our IF used the embedded comparison */
-      if (devinfo->gen == 6 && if_inst->conditional_mod) {
-         fs_inst *cmp_inst = CMP(reg_null_d, if_inst->src[0], if_inst->src[1],
-                                 if_inst->conditional_mod);
-         if_inst->insert_before(block, cmp_inst);
-      }
-
-      for (int i = 0; i < movs; i++) {
-         if (mov_imm_inst[i])
-            if_inst->insert_before(block, mov_imm_inst[i]);
-         if_inst->insert_before(block, sel_inst[i]);
 
          then_mov[i]->remove(then_block);
          else_mov[i]->remove(else_block);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,1096 @@
+/*
+ * Copyright © 2013-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_fs_surface_builder.h"
+#include "brw_fs.h"
+
+using namespace brw;
+
+namespace brw {
+   namespace surface_access {
+      namespace {
+         /**
+          * Generate a logical send opcode for a surface message and return
+          * the result.
+          */
+         fs_reg
+         emit_send(const fs_builder &bld, enum opcode opcode,
+                   const fs_reg &addr, const fs_reg &src, const fs_reg &surface,
+                   unsigned dims, unsigned arg, unsigned rsize,
+                   brw_predicate pred = BRW_PREDICATE_NONE)
+         {
+            /* Reduce the dynamically uniform surface index to a single
+             * scalar.
+             */
+            const fs_reg usurface = bld.emit_uniformize(surface);
+            const fs_reg srcs[] = {
+               addr, src, usurface, fs_reg(dims), fs_reg(arg)
+            };
+            const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, rsize);
+            fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
+
+            inst->regs_written = rsize * bld.dispatch_width() / 8;
+            inst->predicate = pred;
+            return dst;
+         }
+      }
+
+      /**
+       * Emit an untyped surface read opcode.  \p dims determines the number
+       * of components of the address and \p size the number of components of
+       * the returned value.
+       */
+      fs_reg
+      emit_untyped_read(const fs_builder &bld,
+                        const fs_reg &surface, const fs_reg &addr,
+                        unsigned dims, unsigned size,
+                        brw_predicate pred)
+      {
+         return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
+                          addr, fs_reg(), surface, dims, size, size, pred);
+      }
+
+      /**
+       * Emit an untyped surface write opcode.  \p dims determines the number
+       * of components of the address and \p size the number of components of
+       * the argument.
+       */
+      void
+      emit_untyped_write(const fs_builder &bld, const fs_reg &surface,
+                         const fs_reg &addr, const fs_reg &src,
+                         unsigned dims, unsigned size,
+                         brw_predicate pred)
+      {
+         emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
+                   addr, src, surface, dims, size, 0, pred);
+      }
+
+      /**
+       * Emit an untyped surface atomic opcode.  \p dims determines the number
+       * of components of the address and \p rsize the number of components of
+       * the returned value (either zero or one).
+       */
+      fs_reg
+      emit_untyped_atomic(const fs_builder &bld,
+                          const fs_reg &surface, const fs_reg &addr,
+                          const fs_reg &src0, const fs_reg &src1,
+                          unsigned dims, unsigned rsize, unsigned op,
+                          brw_predicate pred)
+      {
+         /* FINISHME: Factor out this frequently recurring pattern into a
+          * helper function.
+          */
+         const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+         const fs_reg srcs[] = { src0, src1 };
+         const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, n);
+         bld.LOAD_PAYLOAD(tmp, srcs, n, 0);
+
+         return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
+                          addr, tmp, surface, dims, op, rsize, pred);
+      }
+
+      /**
+       * Emit a typed surface read opcode.  \p dims determines the number of
+       * components of the address and \p size the number of components of the
+       * returned value.
+       */
+      fs_reg
+      emit_typed_read(const fs_builder &bld, const fs_reg &surface,
+                      const fs_reg &addr, unsigned dims, unsigned size)
+      {
+         return emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
+                          addr, fs_reg(), surface, dims, size, size);
+      }
+
+      /**
+       * Emit a typed surface write opcode.  \p dims determines the number of
+       * components of the address and \p size the number of components of the
+       * argument.
+       */
+      void
+      emit_typed_write(const fs_builder &bld, const fs_reg &surface,
+                       const fs_reg &addr, const fs_reg &src,
+                       unsigned dims, unsigned size)
+      {
+         emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
+                   addr, src, surface, dims, size, 0);
+      }
+
+      /**
+       * Emit a typed surface atomic opcode.  \p dims determines the number of
+       * components of the address and \p rsize the number of components of
+       * the returned value (either zero or one).
+       */
+      fs_reg
+      emit_typed_atomic(const fs_builder &bld, const fs_reg &surface,
+                        const fs_reg &addr,
+                        const fs_reg &src0, const fs_reg &src1,
+                        unsigned dims, unsigned rsize, unsigned op,
+                        brw_predicate pred)
+      {
+         /* FINISHME: Factor out this frequently recurring pattern into a
+          * helper function.
+          */
+         const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+         const fs_reg srcs[] = { src0, src1 };
+         const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, n);
+         bld.LOAD_PAYLOAD(tmp, srcs, n, 0);
+
+         return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
+                          addr, tmp, surface, dims, op, rsize);
+      }
+   }
+}
+
+namespace {
+   namespace image_format_info {
+      /**
+       * Simple 4-tuple of scalars used to pass around per-color component
+       * values.
+       */
+      struct color_u {
+         color_u(unsigned x = 0) : r(x), g(x), b(x), a(x)
+         {
+         }
+
+         color_u(unsigned r, unsigned g, unsigned b, unsigned a) :
+            r(r), g(g), b(b), a(a)
+         {
+         }
+
+         unsigned
+         operator[](unsigned i) const
+         {
+            const unsigned xs[] = { r, g, b, a };
+            return xs[i];
+         }
+
+         unsigned r, g, b, a;
+      };
+
+      /**
+       * Return the per-channel bitfield widths for a given image format.
+       */
+      inline color_u
+      get_bit_widths(mesa_format format)
+      {
+         return color_u(_mesa_get_format_bits(format, GL_RED_BITS),
+                        _mesa_get_format_bits(format, GL_GREEN_BITS),
+                        _mesa_get_format_bits(format, GL_BLUE_BITS),
+                        _mesa_get_format_bits(format, GL_ALPHA_BITS));
+      }
+
+      /**
+       * Return the per-channel bitfield shifts for a given image format.
+       */
+      inline color_u
+      get_bit_shifts(mesa_format format)
+      {
+         const color_u widths = get_bit_widths(format);
+         return color_u(0, widths.r, widths.r + widths.g,
+                        widths.r + widths.g + widths.b);
+      }
+
+      /**
+       * Return true if all present components have the same bit width.
+       */
+      inline bool
+      is_homogeneous(mesa_format format)
+      {
+         const color_u widths = get_bit_widths(format);
+         return ((widths.g == 0 || widths.g == widths.r) &&
+                 (widths.b == 0 || widths.b == widths.r) &&
+                 (widths.a == 0 || widths.a == widths.r));
+      }
+
+      /**
+       * Return true if the format conversion boils down to a trivial copy.
+       */
+      inline bool
+      is_conversion_trivial(const brw_device_info *devinfo, mesa_format format)
+      {
+         return (get_bit_widths(format).r == 32 && is_homogeneous(format)) ||
+                 format == brw_lower_mesa_image_format(devinfo, format);
+      }
+
+      /**
+       * Return true if the hardware natively supports some format with
+       * compatible bitfield layout, but possibly different data types.
+       */
+      inline bool
+      has_supported_bit_layout(const brw_device_info *devinfo,
+                               mesa_format format)
+      {
+         const color_u widths = get_bit_widths(format);
+         const color_u lower_widths = get_bit_widths(
+            brw_lower_mesa_image_format(devinfo, format));
+
+         return (widths.r == lower_widths.r &&
+                 widths.g == lower_widths.g &&
+                 widths.b == lower_widths.b &&
+                 widths.a == lower_widths.a);
+      }
+
+      /**
+       * Return true if we are required to spread individual components over
+       * several components of the format used by the hardware (RG32 and
+       * friends implemented as RGBA16UI).
+       */
+      inline bool
+      has_split_bit_layout(const brw_device_info *devinfo, mesa_format format)
+      {
+         const mesa_format lower_format =
+            brw_lower_mesa_image_format(devinfo, format);
+
+         return (_mesa_format_num_components(format) <
+                 _mesa_format_num_components(lower_format));
+      }
+
+      /**
+       * Return true unless we have to fall back to untyped surface access.
+       * Fail!
+       */
+      inline bool
+      has_matching_typed_format(const brw_device_info *devinfo,
+                                mesa_format format)
+      {
+         return (_mesa_get_format_bytes(format) <= 4 ||
+                 (_mesa_get_format_bytes(format) <= 8 &&
+                  (devinfo->gen >= 8 || devinfo->is_haswell)) ||
+                 devinfo->gen >= 9);
+      }
+
+      /**
+       * Return true if the hardware returns garbage in the unused high bits
+       * of each component.  This may happen on IVB because we rely on the
+       * undocumented behavior that typed reads from surfaces of the
+       * unsupported R8 and R16 formats return useful data in their least
+       * significant bits.
+       */
+      inline bool
+      has_undefined_high_bits(const brw_device_info *devinfo,
+                              mesa_format format)
+      {
+         const mesa_format lower_format =
+            brw_lower_mesa_image_format(devinfo, format);
+
+         return (devinfo->gen == 7 && !devinfo->is_haswell &&
+                 (lower_format == MESA_FORMAT_R_UINT16 ||
+                  lower_format == MESA_FORMAT_R_UINT8));
+      }
+
+      /**
+       * Return true if the format represents values as signed integers
+       * requiring sign extension when unpacking.
+       */
+      inline bool
+      needs_sign_extension(mesa_format format)
+      {
+         return (_mesa_get_format_datatype(format) == GL_SIGNED_NORMALIZED ||
+                 _mesa_get_format_datatype(format) == GL_INT);
+      }
+   }
+
+   namespace image_validity {
+      /**
+       * Check whether there is an image bound at the given index and write
+       * the comparison result to f0.0.  Returns an appropriate predication
+       * mode to use on subsequent image operations.
+       */
+      brw_predicate
+      emit_surface_check(const fs_builder &bld, const fs_reg &image)
+      {
+         const brw_device_info *devinfo = bld.shader->devinfo;
+         const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
+
+         if (devinfo->gen == 7 && !devinfo->is_haswell) {
+            /* Check the first component of the size field to find out if the
+             * image is bound.  Necessary on IVB for typed atomics because
+             * they don't seem to respect null surfaces and will happily
+             * corrupt or read random memory when no image is bound.
+             */
+            bld.CMP(bld.null_reg_ud(),
+                    retype(size, BRW_REGISTER_TYPE_UD),
+                    fs_reg(0), BRW_CONDITIONAL_NZ);
+
+            return BRW_PREDICATE_NORMAL;
+         } else {
+            /* More recent platforms implement compliant behavior when a null
+             * surface is bound.
+             */
+            return BRW_PREDICATE_NONE;
+         }
+      }
+
+      /**
+       * Check whether the provided coordinates are within the image bounds
+       * and write the comparison result to f0.0.  Returns an appropriate
+       * predication mode to use on subsequent image operations.
+       */
+      brw_predicate
+      emit_bounds_check(const fs_builder &bld, const fs_reg &image,
+                        const fs_reg &addr, unsigned dims)
+      {
+         const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
+
+         for (unsigned c = 0; c < dims; ++c)
+            set_predicate(c == 0 ? BRW_PREDICATE_NONE : BRW_PREDICATE_NORMAL,
+                          bld.CMP(bld.null_reg_ud(),
+                                  offset(retype(addr, BRW_REGISTER_TYPE_UD), bld, c),
+                                  offset(size, bld, c),
+                                  BRW_CONDITIONAL_L));
+
+         return BRW_PREDICATE_NORMAL;
+      }
+   }
+
+   namespace image_coordinates {
+      /**
+       * Return the total number of coordinates needed to address a texel of
+       * the surface, which may be more than the sum of \p surf_dims and \p
+       * arr_dims if padding is required.
+       */
+      unsigned
+      num_image_coordinates(const fs_builder &bld,
+                            unsigned surf_dims, unsigned arr_dims,
+                            mesa_format format)
+      {
+         /* HSW in vec4 mode and our software coordinate handling for untyped
+          * reads want the array index to be at the Z component.
+          */
+         const bool array_index_at_z =
+            !image_format_info::has_matching_typed_format(
+               bld.shader->devinfo, format);
+         const unsigned zero_dims =
+            ((surf_dims == 1 && arr_dims == 1 && array_index_at_z) ? 1 : 0);
+
+         return surf_dims + zero_dims + arr_dims;
+      }
+
+      /**
+       * Transform image coordinates into the form expected by the
+       * implementation.
+       */
+      fs_reg
+      emit_image_coordinates(const fs_builder &bld, const fs_reg &addr,
+                             unsigned surf_dims, unsigned arr_dims,
+                             mesa_format format)
+      {
+         const unsigned dims =
+            num_image_coordinates(bld, surf_dims, arr_dims, format);
+
+         if (dims > surf_dims + arr_dims) {
+            assert(surf_dims == 1 && arr_dims == 1 && dims == 3);
+            /* The array index is required to be passed in as the Z component,
+             * insert a zero at the Y component to shift it to the right
+             * position.
+             *
+             * FINISHME: Factor out this frequently recurring pattern into a
+             * helper function.
+             */
+            const fs_reg srcs[] = { addr, fs_reg(0), offset(addr, bld, 1) };
+            const fs_reg dst = bld.vgrf(addr.type, dims);
+            bld.LOAD_PAYLOAD(dst, srcs, dims, 0);
+            return dst;
+         } else {
+            return addr;
+         }
+      }
+
+      /**
+       * Calculate the offset in memory of the texel given by \p coord.
+       *
+       * This is meant to be used with untyped surface messages to access a
+       * tiled surface, what involves taking into account the tiling and
+       * swizzling modes of the surface manually so it will hopefully not
+       * happen very often.
+       *
+       * The tiling algorithm implemented here matches either the X or Y
+       * tiling layouts supported by the hardware depending on the tiling
+       * coefficients passed to the program as uniforms.  See Volume 1 Part 2
+       * Section 4.5 "Address Tiling Function" of the IVB PRM for an in-depth
+       * explanation of the hardware tiling format.
+       */
+      fs_reg
+      emit_address_calculation(const fs_builder &bld, const fs_reg &image,
+                               const fs_reg &coord, unsigned dims)
+      {
+         const brw_device_info *devinfo = bld.shader->devinfo;
+         const fs_reg off = offset(image, bld, BRW_IMAGE_PARAM_OFFSET_OFFSET);
+         const fs_reg stride = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET);
+         const fs_reg tile = offset(image, bld, BRW_IMAGE_PARAM_TILING_OFFSET);
+         const fs_reg swz = offset(image, bld, BRW_IMAGE_PARAM_SWIZZLING_OFFSET);
+         const fs_reg addr = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+         const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+         const fs_reg minor = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+         const fs_reg major = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+         /* Shift the coordinates by the fixed surface offset.  It may be
+          * non-zero if the image is a single slice of a higher-dimensional
+          * surface, or if a non-zero mipmap level of the surface is bound to
+          * the pipeline.  The offset needs to be applied here rather than at
+          * surface state set-up time because the desired slice-level may
+          * start mid-tile, so simply shifting the surface base address
+          * wouldn't give a well-formed tiled surface in the general case.
+          */
+         for (unsigned c = 0; c < 2; ++c)
+            bld.ADD(offset(addr, bld, c), offset(off, bld, c),
+                    (c < dims ?
+                     offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, c) :
+                     fs_reg(0)));
+
+         /* The layout of 3-D textures in memory is sort-of like a tiling
+          * format.  At each miplevel, the slices are arranged in rows of
+          * 2^level slices per row.  The slice row is stored in tmp.y and
+          * the slice within the row is stored in tmp.x.
+          *
+          * The layout of 2-D array textures and cubemaps is much simpler:
+          * Depending on whether the ARYSPC_LOD0 layout is in use it will be
+          * stored in memory as an array of slices, each one being a 2-D
+          * arrangement of miplevels, or as a 2D arrangement of miplevels,
+          * each one being an array of slices.  In either case the separation
+          * between slices of the same LOD is equal to the qpitch value
+          * provided as stride.w.
+          *
+          * This code can be made to handle either 2D arrays and 3D textures
+          * by passing in the miplevel as tile.z for 3-D textures and 0 in
+          * tile.z for 2-D array textures.
+          *
+          * See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface
+          * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
+          * of the hardware 3D texture and 2D array layouts.
+          */
+         if (dims > 2) {
+            /* Decompose z into a major (tmp.y) and a minor (tmp.x)
+             * index.
+             */
+            bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), fs_reg(0),
+                    offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2));
+            bld.SHR(offset(tmp, bld, 1),
+                    offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2),
+                    offset(tile, bld, 2));
+
+            /* Take into account the horizontal (tmp.x) and vertical (tmp.y)
+             * slice offset.
+             */
+            for (unsigned c = 0; c < 2; ++c) {
+               bld.MUL(offset(tmp, bld, c),
+                       offset(stride, bld, 2 + c), offset(tmp, bld, c));
+               bld.ADD(offset(addr, bld, c),
+                       offset(addr, bld, c), offset(tmp, bld, c));
+            }
+         }
+
+         if (dims > 1) {
+            /* Calculate the major/minor x and y indices.  In order to
+             * accommodate both X and Y tiling, the Y-major tiling format is
+             * treated as being a bunch of narrow X-tiles placed next to each
+             * other.  This means that the tile width for Y-tiling is actually
+             * the width of one sub-column of the Y-major tile where each 4K
+             * tile has 8 512B sub-columns.
+             *
+             * The major Y value is the row of tiles in which the pixel lives.
+             * The major X value is the tile sub-column in which the pixel
+             * lives; for X tiling, this is the same as the tile column, for Y
+             * tiling, each tile has 8 sub-columns.  The minor X and Y indices
+             * are the position within the sub-column.
+             */
+            for (unsigned c = 0; c < 2; ++c) {
+               /* Calculate the minor x and y indices. */
+               bld.BFE(offset(minor, bld, c), offset(tile, bld, c),
+                       fs_reg(0), offset(addr, bld, c));
+
+               /* Calculate the major x and y indices. */
+               bld.SHR(offset(major, bld, c),
+                       offset(addr, bld, c), offset(tile, bld, c));
+            }
+
+            /* Calculate the texel index from the start of the tile row and
+             * the vertical coordinate of the row.
+             * Equivalent to:
+             *   tmp.x = (major.x << tile.y << tile.x) +
+             *           (minor.y << tile.x) + minor.x
+             *   tmp.y = major.y << tile.y
+             */
+            bld.SHL(tmp, major, offset(tile, bld, 1));
+            bld.ADD(tmp, tmp, offset(minor, bld, 1));
+            bld.SHL(tmp, tmp, offset(tile, bld, 0));
+            bld.ADD(tmp, tmp, minor);
+            bld.SHL(offset(tmp, bld, 1),
+                    offset(major, bld, 1), offset(tile, bld, 1));
+
+            /* Add it to the start of the tile row. */
+            bld.MUL(offset(tmp, bld, 1),
+                    offset(tmp, bld, 1), offset(stride, bld, 1));
+            bld.ADD(tmp, tmp, offset(tmp, bld, 1));
+
+            /* Multiply by the Bpp value. */
+            bld.MUL(dst, tmp, stride);
+
+            if (devinfo->gen < 8 && !devinfo->is_baytrail) {
+               /* Take into account the two dynamically specified shifts.
+                * Both need are used to implement swizzling of X-tiled
+                * surfaces.  For Y-tiled surfaces only one bit needs to be
+                * XOR-ed with bit 6 of the memory address, so a swz value of
+                * 0xff (actually interpreted as 31 by the hardware) will be
+                * provided to cause the relevant bit of tmp.y to be zero and
+                * turn the first XOR into the identity.  For linear surfaces
+                * or platforms lacking address swizzling both shifts will be
+                * 0xff causing the relevant bits of both tmp.x and .y to be
+                * zero, what effectively disables swizzling.
+                */
+               for (unsigned c = 0; c < 2; ++c)
+                  bld.SHR(offset(tmp, bld, c), dst, offset(swz, bld, c));
+
+               /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
+               bld.XOR(tmp, tmp, offset(tmp, bld, 1));
+               bld.AND(tmp, tmp, fs_reg(1 << 6));
+               bld.XOR(dst, dst, tmp);
+            }
+
+         } else {
+            /* Multiply by the Bpp/stride value.  Note that the addr.y may be
+             * non-zero even if the image is one-dimensional because a
+             * vertical offset may have been applied above to select a
+             * non-zero slice or level of a higher-dimensional texture.
+             */
+            bld.MUL(offset(addr, bld, 1),
+                    offset(addr, bld, 1), offset(stride, bld, 1));
+            bld.ADD(addr, addr, offset(addr, bld, 1));
+            bld.MUL(dst, addr, stride);
+         }
+
+         return dst;
+      }
+   }
+
+   namespace image_format_conversion {
+      using image_format_info::color_u;
+
+      namespace {
+         /**
+          * Maximum representable value in an unsigned integer with the given
+          * number of bits.
+          */
+         inline unsigned
+         scale(unsigned n)
+         {
+            return (1 << n) - 1;
+         }
+      }
+
+      /**
+       * Pack the vector \p src in a bitfield given the per-component bit
+       * shifts and widths.  Note that bitfield components are not allowed to
+       * cross 32-bit boundaries.
+       */
+      fs_reg
+      emit_pack(const fs_builder &bld, const fs_reg &src,
+                const color_u &shifts, const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+         bool seen[4] = {};
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+               /* Shift each component left to the correct bitfield position. */
+               bld.SHL(tmp, offset(src, bld, c), fs_reg(shifts[c] % 32));
+
+               /* Add everything up. */
+               if (seen[shifts[c] / 32]) {
+                  bld.OR(offset(dst, bld, shifts[c] / 32),
+                         offset(dst, bld, shifts[c] / 32), tmp);
+               } else {
+                  bld.MOV(offset(dst, bld, shifts[c] / 32), tmp);
+                  seen[shifts[c] / 32] = true;
+               }
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Unpack a vector from the bitfield \p src given the per-component bit
+       * shifts and widths.  Note that bitfield components are not allowed to
+       * cross 32-bit boundaries.
+       */
+      fs_reg
+      emit_unpack(const fs_builder &bld, const fs_reg &src,
+                  const color_u &shifts, const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(src.type, 4);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               /* Shift left to discard the most significant bits. */
+               bld.SHL(offset(dst, bld, c),
+                       offset(src, bld, shifts[c] / 32),
+                       fs_reg(32 - shifts[c] % 32 - widths[c]));
+
+               /* Shift back to the least significant bits using an arithmetic
+                * shift to get sign extension on signed types.
+                */
+               bld.ASR(offset(dst, bld, c),
+                       offset(dst, bld, c), fs_reg(32 - widths[c]));
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Convert an integer vector into another integer vector of the
+       * specified bit widths, properly handling overflow.
+       */
+      fs_reg
+      emit_convert_to_integer(const fs_builder &bld, const fs_reg &src,
+                              const color_u &widths, bool is_signed)
+      {
+         const unsigned s = (is_signed ? 1 : 0);
+         const fs_reg dst = bld.vgrf(
+            is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
+         assert(src.type == dst.type);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               /* Clamp to the maximum value. */
+               bld.emit_minmax(offset(dst, bld, c), offset(src, bld, c),
+                               fs_reg((int)scale(widths[c] - s)),
+                               BRW_CONDITIONAL_L);
+
+               /* Clamp to the minimum value. */
+               if (is_signed)
+                  bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
+                                  fs_reg(-(int)scale(widths[c] - s) - 1),
+                                  BRW_CONDITIONAL_G);
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Convert a normalized fixed-point vector of the specified signedness
+       * and bit widths into a floating point vector.
+       */
+      fs_reg
+      emit_convert_from_scaled(const fs_builder &bld, const fs_reg &src,
+                               const color_u &widths, bool is_signed)
+      {
+         const unsigned s = (is_signed ? 1 : 0);
+         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               /* Convert to float. */
+               bld.MOV(offset(dst, bld, c), offset(src, bld, c));
+
+               /* Divide by the normalization constants. */
+               bld.MUL(offset(dst, bld, c), offset(dst, bld, c),
+                       fs_reg(1.0f / scale(widths[c] - s)));
+
+               /* Clamp to the minimum value. */
+               if (is_signed)
+                  bld.emit_minmax(offset(dst, bld, c),
+                                  offset(dst, bld, c), fs_reg(-1.0f),
+                                  BRW_CONDITIONAL_G);
+            }
+         }
+         return dst;
+      }
+
+      /**
+       * Convert a floating-point vector into a normalized fixed-point vector
+       * of the specified signedness and bit widths.
+       */
+      fs_reg
+      emit_convert_to_scaled(const fs_builder &bld, const fs_reg &src,
+                             const color_u &widths, bool is_signed)
+      {
+         const unsigned s = (is_signed ? 1 : 0);
+         const fs_reg dst = bld.vgrf(
+            is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
+         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               /* Clamp the normalized floating-point argument. */
+               if (is_signed) {
+                  bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c),
+                                  fs_reg(-1.0f), BRW_CONDITIONAL_G);
+
+                  bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
+                                  fs_reg(1.0f), BRW_CONDITIONAL_L);
+               } else {
+                  set_saturate(true, bld.MOV(offset(fdst, bld, c),
+                                             offset(src, bld, c)));
+               }
+
+               /* Multiply by the normalization constants. */
+               bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c),
+                       fs_reg((float)scale(widths[c] - s)));
+
+               /* Convert to integer. */
+               bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
+               bld.MOV(offset(dst, bld, c), offset(fdst, bld, c));
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Convert a floating point vector of the specified bit widths into a
+       * 32-bit floating point vector.
+       */
+      fs_reg
+      emit_convert_from_float(const fs_builder &bld, const fs_reg &src,
+                              const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               bld.MOV(offset(dst, bld, c), offset(src, bld, c));
+
+               /* Extend 10-bit and 11-bit floating point numbers to 15 bits.
+                * This works because they have a 5-bit exponent just like the
+                * 16-bit floating point format, and they have no sign bit.
+                */
+               if (widths[c] < 16)
+                  bld.SHL(offset(dst, bld, c),
+                          offset(dst, bld, c), fs_reg(15 - widths[c]));
+
+               /* Convert to 32-bit floating point. */
+               bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c));
+            }
+         }
+
+         return fdst;
+      }
+
+      /**
+       * Convert a vector into a floating point vector of the specified bit
+       * widths.
+       */
+      fs_reg
+      emit_convert_to_float(const fs_builder &bld, const fs_reg &src,
+                            const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               bld.MOV(offset(fdst, bld, c), offset(src, bld, c));
+
+               /* Clamp to the minimum value. */
+               if (widths[c] < 16)
+                  bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
+                                  fs_reg(0.0f), BRW_CONDITIONAL_G);
+
+               /* Convert to 16-bit floating-point. */
+               bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c));
+
+               /* Discard the least significant bits to get floating point
+                * numbers of the requested width.  This works because the
+                * 10-bit and 11-bit floating point formats have a 5-bit
+                * exponent just like the 16-bit format, and they have no sign
+                * bit.
+                */
+               if (widths[c] < 16)
+                  bld.SHR(offset(dst, bld, c), offset(dst, bld, c),
+                          fs_reg(15 - widths[c]));
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Fill missing components of a vector with 0, 0, 0, 1.
+       */
+      fs_reg
+      emit_pad(const fs_builder &bld, const fs_reg &src,
+               const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(src.type, 4);
+         const unsigned pad[] = { 0, 0, 0, 1 };
+
+         for (unsigned c = 0; c < 4; ++c)
+            bld.MOV(offset(dst, bld, c),
+                    widths[c] ? offset(src, bld, c) : fs_reg(pad[c]));
+
+         return dst;
+      }
+   }
+}
+
+namespace brw {
+   namespace image_access {
+      /**
+       * Load a vector from a surface of the given format and dimensionality
+       * at the given coordinates.  \p surf_dims and \p arr_dims give the
+       * number of non-array and array coordinates of the image respectively.
+       */
+      fs_reg
+      emit_image_load(const fs_builder &bld,
+                      const fs_reg &image, const fs_reg &addr,
+                      unsigned surf_dims, unsigned arr_dims,
+                      mesa_format format)
+      {
+         using namespace image_format_info;
+         using namespace image_format_conversion;
+         using namespace image_validity;
+         using namespace image_coordinates;
+         using namespace surface_access;
+         const brw_device_info *devinfo = bld.shader->devinfo;
+         const mesa_format lower_format =
+            brw_lower_mesa_image_format(devinfo, format);
+         fs_reg tmp;
+
+         /* Transform the image coordinates into actual surface coordinates. */
+         const fs_reg saddr =
+            emit_image_coordinates(bld, addr, surf_dims, arr_dims, format);
+         const unsigned dims =
+            num_image_coordinates(bld, surf_dims, arr_dims, format);
+
+         if (has_matching_typed_format(devinfo, format)) {
+            /* Hopefully we get here most of the time... */
+            tmp = emit_typed_read(bld, image, saddr, dims,
+                                  _mesa_format_num_components(lower_format));
+         } else {
+            /* Untyped surface reads return 32 bits of the surface per
+             * component, without any sort of unpacking or type conversion,
+             */
+            const unsigned size = _mesa_get_format_bytes(format) / 4;
+
+            /* they don't properly handle out of bounds access, so we have to
+             * check manually if the coordinates are valid and predicate the
+             * surface read on the result,
+             */
+            const brw_predicate pred =
+               emit_bounds_check(bld, image, saddr, dims);
+
+            /* and they don't know about surface coordinates, we need to
+             * convert them to a raw memory offset.
+             */
+            const fs_reg laddr = emit_address_calculation(bld, image, saddr, dims);
+
+            tmp = emit_untyped_read(bld, image, laddr, 1, size, pred);
+
+            /* An out of bounds surface access should give zero as result. */
+            for (unsigned c = 0; c < 4; ++c)
+               set_predicate(pred, bld.SEL(offset(tmp, bld, c),
+                                           offset(tmp, bld, c), fs_reg(0)));
+         }
+
+         /* Set the register type to D instead of UD if the data type is
+          * represented as a signed integer in memory so that sign extension
+          * is handled correctly by unpack.
+          */
+         if (needs_sign_extension(format))
+            tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+
+         if (!has_supported_bit_layout(devinfo, format)) {
+            /* Unpack individual vector components from the bitfield if the
+             * hardware is unable to do it for us.
+             */
+            if (has_split_bit_layout(devinfo, format))
+               tmp = emit_pack(bld, tmp, get_bit_shifts(lower_format),
+                               get_bit_widths(lower_format));
+            else
+               tmp = emit_unpack(bld, tmp, get_bit_shifts(format),
+                                 get_bit_widths(format));
+
+         } else if ((needs_sign_extension(format) &&
+                     !is_conversion_trivial(devinfo, format)) ||
+                    has_undefined_high_bits(devinfo, format)) {
+            /* Perform a trivial unpack even though the bit layout matches in
+             * order to get the most significant bits of each component
+             * initialized properly.
+             */
+            tmp = emit_unpack(bld, tmp, color_u(0, 32, 64, 96),
+                              get_bit_widths(format));
+         }
+
+         if (!_mesa_is_format_integer(format)) {
+            if (is_conversion_trivial(devinfo, format)) {
+               /* Just need to cast the vector to the target type. */
+               tmp = retype(tmp, BRW_REGISTER_TYPE_F);
+            } else {
+               /* Do the right sort of type conversion to float. */
+               if (_mesa_get_format_datatype(format) == GL_FLOAT)
+                  tmp = emit_convert_from_float(
+                     bld, tmp, get_bit_widths(format));
+               else
+                  tmp = emit_convert_from_scaled(
+                     bld, tmp, get_bit_widths(format),
+                     _mesa_is_format_signed(format));
+            }
+         }
+
+         /* Initialize missing components of the result. */
+         return emit_pad(bld, tmp, get_bit_widths(format));
+      }
+
+      /**
+       * Store a vector in a surface of the given format and dimensionality at
+       * the given coordinates.  \p surf_dims and \p arr_dims give the number
+       * of non-array and array coordinates of the image respectively.
+       */
+      void
+      emit_image_store(const fs_builder &bld, const fs_reg &image,
+                       const fs_reg &addr, const fs_reg &src,
+                       unsigned surf_dims, unsigned arr_dims,
+                       mesa_format format)
+      {
+         using namespace image_format_info;
+         using namespace image_format_conversion;
+         using namespace image_validity;
+         using namespace image_coordinates;
+         using namespace surface_access;
+         const brw_device_info *devinfo = bld.shader->devinfo;
+
+         /* Transform the image coordinates into actual surface coordinates. */
+         const fs_reg saddr =
+            emit_image_coordinates(bld, addr, surf_dims, arr_dims, format);
+         const unsigned dims =
+            num_image_coordinates(bld, surf_dims, arr_dims, format);
+
+         if (format == MESA_FORMAT_NONE) {
+            /* We don't know what the format is, but that's fine because it
+             * implies write-only access, and typed surface writes are always
+             * able to take care of type conversion and packing for us.
+             */
+            emit_typed_write(bld, image, saddr, src, dims, 4);
+
+         } else {
+            const mesa_format lower_format =
+               brw_lower_mesa_image_format(devinfo, format);
+            fs_reg tmp = src;
+
+            if (!is_conversion_trivial(devinfo, format)) {
+               /* Do the right sort of type conversion. */
+               if (_mesa_get_format_datatype(format) == GL_FLOAT)
+                  tmp = emit_convert_to_float(bld, tmp, get_bit_widths(format));
+
+               else if (_mesa_is_format_integer(format))
+                  tmp = emit_convert_to_integer(bld, tmp, get_bit_widths(format),
+                                                _mesa_is_format_signed(format));
+
+               else
+                  tmp = emit_convert_to_scaled(bld, tmp, get_bit_widths(format),
+                                               _mesa_is_format_signed(format));
+            }
+
+            /* We're down to bit manipulation at this point. */
+            tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+
+            if (!has_supported_bit_layout(devinfo, format)) {
+               /* Pack the vector components into a bitfield if the hardware
+                * is unable to do it for us.
+                */
+               if (has_split_bit_layout(devinfo, format))
+                  tmp = emit_unpack(bld, tmp, get_bit_shifts(lower_format),
+                                    get_bit_widths(lower_format));
+
+               else
+                  tmp = emit_pack(bld, tmp, get_bit_shifts(format),
+                                  get_bit_widths(format));
+            }
+
+            if (has_matching_typed_format(devinfo, format)) {
+               /* Hopefully we get here most of the time... */
+               emit_typed_write(bld, image, saddr, tmp, dims,
+                                _mesa_format_num_components(lower_format));
+
+            } else {
+               /* Untyped surface writes store 32 bits of the surface per
+                * component, without any sort of packing or type conversion,
+                */
+               const unsigned size = _mesa_get_format_bytes(format) / 4;
+
+               /* they don't properly handle out of bounds access, so we have
+                * to check manually if the coordinates are valid and predicate
+                * the surface write on the result,
+                */
+               const brw_predicate pred =
+                  emit_bounds_check(bld, image, saddr, dims);
+
+               /* and, phew, they don't know about surface coordinates, we
+                * need to convert them to a raw memory offset.
+                */
+               const fs_reg laddr = emit_address_calculation(
+                  bld, image, saddr, dims);
+
+               emit_untyped_write(bld, image, laddr, tmp, 1, size, pred);
+            }
+         }
+      }
+
+      /**
+       * Perform an atomic read-modify-write operation in a surface of the
+       * given dimensionality at the given coordinates.  \p surf_dims and \p
+       * arr_dims give the number of non-array and array coordinates of the
+       * image respectively.  Main building block of the imageAtomic GLSL
+       * built-ins.
+       */
+      fs_reg
+      emit_image_atomic(const fs_builder &bld,
+                        const fs_reg &image, const fs_reg &addr,
+                        const fs_reg &src0, const fs_reg &src1,
+                        unsigned surf_dims, unsigned arr_dims,
+                        unsigned rsize, unsigned op)
+      {
+         using namespace image_validity;
+         using namespace image_coordinates;
+         using namespace surface_access;
+         /* Avoid performing an atomic operation on an unbound surface. */
+         const brw_predicate pred = emit_surface_check(bld, image);
+
+         /* Transform the image coordinates into actual surface coordinates. */
+         const fs_reg saddr =
+            emit_image_coordinates(bld, addr, surf_dims, arr_dims,
+                                  MESA_FORMAT_R_UINT32);
+         const unsigned dims =
+            num_image_coordinates(bld, surf_dims, arr_dims,
+                                  MESA_FORMAT_R_UINT32);
+
+         /* Thankfully we can do without untyped atomics here. */
+         const fs_reg tmp = emit_typed_atomic(bld, image, saddr, src0, src1,
+                                              dims, rsize, op, pred);
+
+         /* An unbound surface access should give zero as result. */
+         if (rsize)
+            set_predicate(pred, bld.SEL(tmp, tmp, fs_reg(0)));
+
+         return tmp;
+      }
+   }
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,89 @@
+/* -*- c++ -*- */
+/*
+ * Copyright © 2013-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_FS_SURFACE_BUILDER_H
+#define BRW_FS_SURFACE_BUILDER_H
+
+#include "brw_fs_builder.h"
+#include "brw_context.h"
+
+namespace brw {
+   namespace surface_access {
+      fs_reg
+      emit_untyped_read(const fs_builder &bld,
+                        const fs_reg &surface, const fs_reg &addr,
+                        unsigned dims, unsigned size,
+                        brw_predicate pred = BRW_PREDICATE_NONE);
+
+      void
+      emit_untyped_write(const fs_builder &bld, const fs_reg &surface,
+                         const fs_reg &addr, const fs_reg &src,
+                         unsigned dims, unsigned size,
+                         brw_predicate pred = BRW_PREDICATE_NONE);
+
+      fs_reg
+      emit_untyped_atomic(const fs_builder &bld,
+                          const fs_reg &surface, const fs_reg &addr,
+                          const fs_reg &src0, const fs_reg &src1,
+                          unsigned dims, unsigned rsize, unsigned op,
+                          brw_predicate pred = BRW_PREDICATE_NONE);
+
+      fs_reg
+      emit_typed_read(const fs_builder &bld, const fs_reg &surface,
+                      const fs_reg &addr, unsigned dims, unsigned size);
+
+      void
+      emit_typed_write(const fs_builder &bld, const fs_reg &surface,
+                       const fs_reg &addr, const fs_reg &src,
+                       unsigned dims, unsigned size);
+
+      fs_reg
+      emit_typed_atomic(const fs_builder &bld, const fs_reg &surface,
+                        const fs_reg &addr,
+                        const fs_reg &src0, const fs_reg &src1,
+                        unsigned dims, unsigned rsize, unsigned op,
+                        brw_predicate pred = BRW_PREDICATE_NONE);
+   }
+
+   namespace image_access {
+      fs_reg
+      emit_image_load(const fs_builder &bld,
+                      const fs_reg &image, const fs_reg &addr,
+                      unsigned surf_dims, unsigned arr_dims,
+                      mesa_format format);
+
+      void
+      emit_image_store(const fs_builder &bld, const fs_reg &image,
+                       const fs_reg &addr, const fs_reg &src,
+                       unsigned surf_dims, unsigned arr_dims,
+                       mesa_format format);
+      fs_reg
+      emit_image_atomic(const fs_builder &bld,
+                        const fs_reg &image, const fs_reg &addr,
+                        const fs_reg &src0, const fs_reg &src1,
+                        unsigned surf_dims, unsigned arr_dims,
+                        unsigned rsize, unsigned op);
+   }
+}
+#endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -43,6 +43,7 @@
 #include "glsl/ir_visitor.h"
 #include "glsl/ir_rvalue_visitor.h"
 #include "glsl/glsl_types.h"
+#include "util/hash_table.h"
 
 static bool debug = false;
 
@@ -72,7 +73,8 @@
    ir_vector_reference_visitor(void)
    {
       this->mem_ctx = ralloc_context(NULL);
-      this->variable_list.make_empty();
+      this->ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
    }
 
    ~ir_vector_reference_visitor(void)
@@ -89,7 +91,7 @@
    variable_entry *get_variable_entry(ir_variable *var);
 
    /* List of variable_entry */
-   exec_list variable_list;
+   struct hash_table *ht;
 
    void *mem_ctx;
 };
@@ -119,13 +121,12 @@
       break;
    }
 
-   foreach_in_list(variable_entry, entry, &variable_list) {
-      if (entry->var == var)
-	 return entry;
-   }
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
+   if (hte)
+      return (struct variable_entry *) hte->data;
 
    variable_entry *entry = new(mem_ctx) variable_entry(var);
-   this->variable_list.push_tail(entry);
+   _mesa_hash_table_insert(ht, var, entry);
    return entry;
 }
 
@@ -173,7 +174,7 @@
       return visit_continue_with_parent;
    }
    if (ir->lhs->as_dereference_variable() &&
-       is_power_of_two(ir->write_mask) &&
+       _mesa_is_pow_two(ir->write_mask) &&
        !ir->condition) {
       /* If we're writing just a channel, then channel-splitting the LHS is OK.
        */
@@ -195,9 +196,9 @@
 
 class ir_vector_splitting_visitor : public ir_rvalue_visitor {
 public:
-   ir_vector_splitting_visitor(exec_list *vars)
+   ir_vector_splitting_visitor(struct hash_table *vars)
    {
-      this->variable_list = vars;
+      this->ht = vars;
    }
 
    virtual ir_visitor_status visit_leave(ir_assignment *);
@@ -205,7 +206,7 @@
    void handle_rvalue(ir_rvalue **rvalue);
    variable_entry *get_splitting_entry(ir_variable *var);
 
-   exec_list *variable_list;
+   struct hash_table *ht;
 };
 
 variable_entry *
@@ -216,13 +217,8 @@
    if (!var->type->is_vector())
       return NULL;
 
-   foreach_in_list(variable_entry, entry, variable_list) {
-      if (entry->var == var) {
-	 return entry;
-      }
-   }
-
-   return NULL;
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
+   return hte ? (struct variable_entry *) hte->data : NULL;
 }
 
 void
@@ -329,12 +325,15 @@
 bool
 brw_do_vector_splitting(exec_list *instructions)
 {
+   struct hash_entry *hte;
+
    ir_vector_reference_visitor refs;
 
    visit_list_elements(&refs, instructions);
 
    /* Trim out variables we can't split. */
-   foreach_in_list_safe(variable_entry, entry, &refs.variable_list) {
+   hash_table_foreach(refs.ht, hte) {
+      struct variable_entry *entry = (struct variable_entry *) hte->data;
       if (debug) {
 	 fprintf(stderr, "vector %s@%p: whole_access %d\n",
                  entry->var->name, (void *) entry->var,
@@ -342,11 +341,11 @@
       }
 
       if (entry->whole_vector_access) {
-	 entry->remove();
+         _mesa_hash_table_remove(refs.ht, hte);
       }
    }
 
-   if (refs.variable_list.is_empty())
+   if (refs.ht->entries == 0)
       return false;
 
    void *mem_ctx = ralloc_context(NULL);
@@ -354,7 +353,8 @@
    /* Replace the decls of the vectors to be split with their split
     * components.
     */
-   foreach_in_list(variable_entry, entry, &refs.variable_list) {
+   hash_table_foreach(refs.ht, hte) {
+      struct variable_entry *entry = (struct variable_entry *) hte->data;
       const struct glsl_type *type;
       type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
 
@@ -378,7 +378,7 @@
       entry->var->remove();
    }
 
-   ir_vector_splitting_visitor split(&refs.variable_list);
+   ir_vector_splitting_visitor split(refs.ht);
    visit_list_elements(&split, instructions);
 
    ralloc_free(mem_ctx);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -47,6 +47,7 @@
 #include "glsl/ir_optimization.h"
 #include "program/sampler.h"
 
+using namespace brw;
 
 fs_reg *
 fs_visitor::emit_vs_system_value(int location)
@@ -76,1947 +77,10 @@
    return reg;
 }
 
-void
-fs_visitor::visit(ir_variable *ir)
-{
-   fs_reg *reg = NULL;
-
-   if (variable_storage(ir))
-      return;
-
-   if (ir->data.mode == ir_var_shader_in) {
-      assert(ir->data.location != -1);
-      if (stage == MESA_SHADER_VERTEX) {
-         reg = new(this->mem_ctx)
-            fs_reg(ATTR, ir->data.location,
-                   brw_type_for_base_type(ir->type->get_scalar_type()));
-      } else if (ir->data.location == VARYING_SLOT_POS) {
-         reg = emit_fragcoord_interpolation(ir->data.pixel_center_integer,
-                                            ir->data.origin_upper_left);
-      } else if (ir->data.location == VARYING_SLOT_FACE) {
-	 reg = emit_frontfacing_interpolation();
-      } else {
-         reg = new(this->mem_ctx) fs_reg(vgrf(ir->type));
-         emit_general_interpolation(*reg, ir->name, ir->type,
-                                    (glsl_interp_qualifier) ir->data.interpolation,
-                                    ir->data.location, ir->data.centroid,
-                                    ir->data.sample);
-      }
-      assert(reg);
-      hash_table_insert(this->variable_ht, reg, ir);
-      return;
-   } else if (ir->data.mode == ir_var_shader_out) {
-      reg = new(this->mem_ctx) fs_reg(vgrf(ir->type));
-
-      if (stage == MESA_SHADER_VERTEX) {
-	 int vector_elements =
-	    ir->type->is_array() ? ir->type->fields.array->vector_elements
-				 : ir->type->vector_elements;
-
-	 for (int i = 0; i < (type_size(ir->type) + 3) / 4; i++) {
-	    int output = ir->data.location + i;
-	    this->outputs[output] = *reg;
-	    this->outputs[output].reg_offset = i * 4;
-	    this->output_components[output] = vector_elements;
-	 }
-
-      } else if (ir->data.index > 0) {
-	 assert(ir->data.location == FRAG_RESULT_DATA0);
-	 assert(ir->data.index == 1);
-	 this->dual_src_output = *reg;
-         this->do_dual_src = true;
-      } else if (ir->data.location == FRAG_RESULT_COLOR) {
-	 /* Writing gl_FragColor outputs to all color regions. */
-         assert(stage == MESA_SHADER_FRAGMENT);
-         brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-	 for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
-	    this->outputs[i] = *reg;
-	    this->output_components[i] = 4;
-	 }
-      } else if (ir->data.location == FRAG_RESULT_DEPTH) {
-	 this->frag_depth = *reg;
-      } else if (ir->data.location == FRAG_RESULT_SAMPLE_MASK) {
-         this->sample_mask = *reg;
-      } else {
-	 /* gl_FragData or a user-defined FS output */
-	 assert(ir->data.location >= FRAG_RESULT_DATA0 &&
-		ir->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
-
-	 int vector_elements =
-	    ir->type->is_array() ? ir->type->fields.array->vector_elements
-				 : ir->type->vector_elements;
-
-	 /* General color output. */
-	 for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) {
-	    int output = ir->data.location - FRAG_RESULT_DATA0 + i;
-	    this->outputs[output] = offset(*reg, vector_elements * i);
-	    this->output_components[output] = vector_elements;
-	 }
-      }
-   } else if (ir->data.mode == ir_var_uniform) {
-      int param_index = uniforms;
-
-      /* Thanks to the lower_ubo_reference pass, we will see only
-       * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
-       * variables, so no need for them to be in variable_ht.
-       *
-       * Some uniforms, such as samplers and atomic counters, have no actual
-       * storage, so we should ignore them.
-       */
-      if (ir->is_in_uniform_block() || type_size(ir->type) == 0)
-         return;
-
-      if (dispatch_width == 16) {
-	 if (!variable_storage(ir)) {
-	    fail("Failed to find uniform '%s' in SIMD16\n", ir->name);
-	 }
-	 return;
-      }
-
-      param_size[param_index] = type_size(ir->type);
-      if (!strncmp(ir->name, "gl_", 3)) {
-	 setup_builtin_uniform_values(ir);
-      } else {
-	 setup_uniform_values(ir);
-      }
-
-      reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
-      reg->type = brw_type_for_base_type(ir->type);
-
-   } else if (ir->data.mode == ir_var_system_value) {
-      switch (ir->data.location) {
-      case SYSTEM_VALUE_BASE_VERTEX:
-      case SYSTEM_VALUE_VERTEX_ID:
-      case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
-      case SYSTEM_VALUE_INSTANCE_ID:
-         reg = emit_vs_system_value(ir->data.location);
-         break;
-      case SYSTEM_VALUE_SAMPLE_POS:
-	 reg = emit_samplepos_setup();
-         break;
-      case SYSTEM_VALUE_SAMPLE_ID:
-	 reg = emit_sampleid_setup();
-         break;
-      case SYSTEM_VALUE_SAMPLE_MASK_IN:
-         assert(devinfo->gen >= 7);
-         reg = new(mem_ctx)
-            fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0),
-                          BRW_REGISTER_TYPE_D));
-         break;
-      }
-   }
-
-   if (!reg)
-      reg = new(this->mem_ctx) fs_reg(vgrf(ir->type));
-
-   hash_table_insert(this->variable_ht, reg, ir);
-}
-
-void
-fs_visitor::visit(ir_dereference_variable *ir)
-{
-   fs_reg *reg = variable_storage(ir->var);
-
-   if (!reg) {
-      fail("Failed to find variable storage for %s\n", ir->var->name);
-      this->result = fs_reg(reg_null_d);
-      return;
-   }
-   this->result = *reg;
-}
-
-void
-fs_visitor::visit(ir_dereference_record *ir)
-{
-   const glsl_type *struct_type = ir->record->type;
-
-   ir->record->accept(this);
-
-   unsigned int off = 0;
-   for (unsigned int i = 0; i < struct_type->length; i++) {
-      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
-	 break;
-      off += type_size(struct_type->fields.structure[i].type);
-   }
-   this->result = offset(this->result, off);
-   this->result.type = brw_type_for_base_type(ir->type);
-}
-
-void
-fs_visitor::visit(ir_dereference_array *ir)
-{
-   ir_constant *constant_index;
-   fs_reg src;
-   int element_size = type_size(ir->type);
-
-   constant_index = ir->array_index->as_constant();
-
-   ir->array->accept(this);
-   src = this->result;
-   src.type = brw_type_for_base_type(ir->type);
-
-   if (constant_index) {
-      if (src.file == ATTR) {
-         /* Attribute arrays get loaded as one vec4 per element.  In that case
-          * offset the source register.
-          */
-         src.reg += constant_index->value.i[0];
-      } else {
-         assert(src.file == UNIFORM || src.file == GRF || src.file == HW_REG);
-         src = offset(src, constant_index->value.i[0] * element_size);
-      }
-   } else {
-      /* Variable index array dereference.  We attach the variable index
-       * component to the reg as a pointer to a register containing the
-       * offset.  Currently only uniform arrays are supported in this patch,
-       * and that reladdr pointer is resolved by
-       * move_uniform_array_access_to_pull_constants().  All other array types
-       * are lowered by lower_variable_index_to_cond_assign().
-       */
-      ir->array_index->accept(this);
-
-      fs_reg index_reg;
-      index_reg = vgrf(glsl_type::int_type);
-      emit(BRW_OPCODE_MUL, index_reg, this->result, fs_reg(element_size));
-
-      if (src.reladdr) {
-         emit(BRW_OPCODE_ADD, index_reg, *src.reladdr, index_reg);
-      }
-
-      src.reladdr = ralloc(mem_ctx, fs_reg);
-      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
-   }
-   this->result = src;
-}
-
-fs_inst *
-fs_visitor::emit_lrp(const fs_reg &dst, const fs_reg &x, const fs_reg &y,
-                     const fs_reg &a)
-{
-   if (devinfo->gen < 6) {
-      /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
-      fs_reg y_times_a           = vgrf(glsl_type::float_type);
-      fs_reg one_minus_a         = vgrf(glsl_type::float_type);
-      fs_reg x_times_one_minus_a = vgrf(glsl_type::float_type);
-
-      emit(MUL(y_times_a, y, a));
-
-      fs_reg negative_a = a;
-      negative_a.negate = !a.negate;
-      emit(ADD(one_minus_a, negative_a, fs_reg(1.0f)));
-      emit(MUL(x_times_one_minus_a, x, one_minus_a));
-
-      return emit(ADD(dst, x_times_one_minus_a, y_times_a));
-   } else {
-      /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
-       * we need to reorder the operands.
-       */
-      return emit(LRP(dst, a, y, x));
-   }
-}
-
-void
-fs_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, const fs_reg &dst,
-                        const fs_reg &src0, const fs_reg &src1)
-{
-   assert(conditionalmod == BRW_CONDITIONAL_GE ||
-          conditionalmod == BRW_CONDITIONAL_L);
-
-   fs_inst *inst;
-
-   if (devinfo->gen >= 6) {
-      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
-      inst->conditional_mod = conditionalmod;
-   } else {
-      emit(CMP(reg_null_d, src0, src1, conditionalmod));
-
-      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
-      inst->predicate = BRW_PREDICATE_NORMAL;
-   }
-}
-
-void
-fs_visitor::emit_uniformize(const fs_reg &dst, const fs_reg &src)
-{
-   const fs_reg chan_index = vgrf(glsl_type::uint_type);
-
-   emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0))
-      ->force_writemask_all = true;
-   emit(SHADER_OPCODE_BROADCAST, component(dst, 0),
-        src, component(chan_index, 0))
-      ->force_writemask_all = true;
-}
-
-bool
-fs_visitor::try_emit_saturate(ir_expression *ir)
-{
-   if (ir->operation != ir_unop_saturate)
-      return false;
-
-   ir_rvalue *sat_val = ir->operands[0];
-
-   fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail();
-
-   sat_val->accept(this);
-   fs_reg src = this->result;
-
-   fs_inst *last_inst = (fs_inst *) this->instructions.get_tail();
-
-   /* If the last instruction from our accept() generated our
-    * src, just set the saturate flag instead of emmitting a separate mov.
-    */
-   fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
-   if (modify && modify->regs_written == modify->dst.width / 8 &&
-       modify->can_do_saturate()) {
-      modify->saturate = true;
-      this->result = src;
-      return true;
-   }
-
-   return false;
-}
-
-bool
-fs_visitor::try_emit_line(ir_expression *ir)
-{
-   /* LINE's src0 must be of type float. */
-   if (ir->type != glsl_type::float_type)
-      return false;
-
-   ir_rvalue *nonmul = ir->operands[1];
-   ir_expression *mul = ir->operands[0]->as_expression();
-
-   if (!mul || mul->operation != ir_binop_mul) {
-      nonmul = ir->operands[0];
-      mul = ir->operands[1]->as_expression();
-
-      if (!mul || mul->operation != ir_binop_mul)
-         return false;
-   }
-
-   ir_constant *const_add = nonmul->as_constant();
-   if (!const_add)
-      return false;
-
-   int add_operand_vf = brw_float_to_vf(const_add->value.f[0]);
-   if (add_operand_vf == -1)
-      return false;
-
-   ir_rvalue *non_const_mul = mul->operands[1];
-   ir_constant *const_mul = mul->operands[0]->as_constant();
-   if (!const_mul) {
-      const_mul = mul->operands[1]->as_constant();
-
-      if (!const_mul)
-         return false;
-
-      non_const_mul = mul->operands[0];
-   }
-
-   int mul_operand_vf = brw_float_to_vf(const_mul->value.f[0]);
-   if (mul_operand_vf == -1)
-      return false;
-
-   non_const_mul->accept(this);
-   fs_reg src1 = this->result;
-
-   fs_reg src0 = vgrf(ir->type);
-   emit(BRW_OPCODE_MOV, src0,
-        fs_reg((uint8_t)mul_operand_vf, 0, 0, (uint8_t)add_operand_vf));
-
-   this->result = vgrf(ir->type);
-   emit(BRW_OPCODE_LINE, this->result, src0, src1);
-   return true;
-}
-
-bool
-fs_visitor::try_emit_mad(ir_expression *ir)
-{
-   /* 3-src instructions were introduced in gen6. */
-   if (devinfo->gen < 6)
-      return false;
-
-   /* MAD can only handle floating-point data. */
-   if (ir->type != glsl_type::float_type)
-      return false;
-
-   ir_rvalue *nonmul;
-   ir_expression *mul;
-   bool mul_negate, mul_abs;
-
-   for (int i = 0; i < 2; i++) {
-      mul_negate = false;
-      mul_abs = false;
-
-      mul = ir->operands[i]->as_expression();
-      nonmul = ir->operands[1 - i];
-
-      if (mul && mul->operation == ir_unop_abs) {
-         mul = mul->operands[0]->as_expression();
-         mul_abs = true;
-      } else if (mul && mul->operation == ir_unop_neg) {
-         mul = mul->operands[0]->as_expression();
-         mul_negate = true;
-      }
-
-      if (mul && mul->operation == ir_binop_mul)
-         break;
-   }
-
-   if (!mul || mul->operation != ir_binop_mul)
-      return false;
-
-   nonmul->accept(this);
-   fs_reg src0 = this->result;
-
-   mul->operands[0]->accept(this);
-   fs_reg src1 = this->result;
-   src1.negate ^= mul_negate;
-   src1.abs = mul_abs;
-   if (mul_abs)
-      src1.negate = false;
-
-   mul->operands[1]->accept(this);
-   fs_reg src2 = this->result;
-   src2.abs = mul_abs;
-   if (mul_abs)
-      src2.negate = false;
-
-   this->result = vgrf(ir->type);
-   emit(BRW_OPCODE_MAD, this->result, src0, src1, src2);
-
-   return true;
-}
-
-bool
-fs_visitor::try_emit_b2f_of_comparison(ir_expression *ir)
-{
-   /* On platforms that do not natively generate 0u and ~0u for Boolean
-    * results, b2f expressions that look like
-    *
-    *     f = b2f(expr cmp 0)
-    *
-    * will generate better code by pretending the expression is
-    *
-    *     f = ir_triop_csel(0.0, 1.0, expr cmp 0)
-    *
-    * This is because the last instruction of "expr" can generate the
-    * condition code for the "cmp 0".  This avoids having to do the "-(b & 1)"
-    * trick to generate 0u or ~0u for the Boolean result.  This means code like
-    *
-    *     mov(16)         g16<1>F         1F
-    *     mul.ge.f0(16)   null            g6<8,8,1>F      g14<8,8,1>F
-    *     (+f0) sel(16)   m6<1>F          g16<8,8,1>F     0F
-    *
-    * will be generated instead of
-    *
-    *     mul(16)         g2<1>F          g12<8,8,1>F     g4<8,8,1>F
-    *     cmp.ge.f0(16)   g2<1>D          g4<8,8,1>F      0F
-    *     and(16)         g4<1>D          g2<8,8,1>D      1D
-    *     and(16)         m6<1>D          -g4<8,8,1>D     0x3f800000UD
-    *
-    * When the comparison is != 0.0 using the knowledge that the false case
-    * already results in zero would allow better code generation by possibly
-    * avoiding a load-immediate instruction.
-    */
-   ir_expression *cmp = ir->operands[0]->as_expression();
-   if (cmp == NULL)
-      return false;
-
-   if (cmp->operation == ir_binop_nequal) {
-      for (unsigned i = 0; i < 2; i++) {
-         ir_constant *c = cmp->operands[i]->as_constant();
-         if (c == NULL || !c->is_zero())
-            continue;
-
-         ir_expression *expr = cmp->operands[i ^ 1]->as_expression();
-         if (expr != NULL) {
-            fs_reg op[2];
-
-            for (unsigned j = 0; j < 2; j++) {
-               cmp->operands[j]->accept(this);
-               op[j] = this->result;
-
-               resolve_ud_negate(&op[j]);
-            }
-
-            emit_bool_to_cond_code_of_reg(cmp, op);
-
-            /* In this case we know when the condition is true, op[i ^ 1]
-             * contains zero.  Invert the predicate, use op[i ^ 1] as src0,
-             * and immediate 1.0f as src1.
-             */
-            this->result = vgrf(ir->type);
-            op[i ^ 1].type = BRW_REGISTER_TYPE_F;
-
-            fs_inst *inst = emit(SEL(this->result, op[i ^ 1], fs_reg(1.0f)));
-            inst->predicate = BRW_PREDICATE_NORMAL;
-            inst->predicate_inverse = true;
-            return true;
-         }
-      }
-   }
-
-   emit_bool_to_cond_code(cmp);
-
-   fs_reg temp = vgrf(ir->type);
-   emit(MOV(temp, fs_reg(1.0f)));
-
-   this->result = vgrf(ir->type);
-   fs_inst *inst = emit(SEL(this->result, temp, fs_reg(0.0f)));
-   inst->predicate = BRW_PREDICATE_NORMAL;
-
-   return true;
-}
-
-static int
-pack_pixel_offset(float x)
-{
-   /* Clamp upper end of the range to +7/16. See explanation in non-constant
-    * offset case below. */
-   int n = MIN2((int)(x * 16), 7);
-   return n & 0xf;
-}
-
-void
-fs_visitor::emit_interpolate_expression(ir_expression *ir)
-{
-   /* in SIMD16 mode, the pixel interpolator returns coords interleaved
-    * 8 channels at a time, same as the barycentric coords presented in
-    * the FS payload. this requires a bit of extra work to support.
-    */
-   no16("interpolate_at_* not yet supported in SIMD16 mode.");
-
-   assert(stage == MESA_SHADER_FRAGMENT);
-   brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-
-   ir_dereference * deref = ir->operands[0]->as_dereference();
-   ir_swizzle * swiz = NULL;
-   if (!deref) {
-      /* the api does not allow a swizzle here, but the varying packing code
-       * may have pushed one into here.
-       */
-      swiz = ir->operands[0]->as_swizzle();
-      assert(swiz);
-      deref = swiz->val->as_dereference();
-   }
-   assert(deref);
-   ir_variable * var = deref->variable_referenced();
-   assert(var);
-
-   /* 1. collect interpolation factors */
-
-   fs_reg dst_xy = vgrf(glsl_type::get_instance(ir->type->base_type, 2, 1));
-
-   /* for most messages, we need one reg of ignored data; the hardware requires mlen==1
-    * even when there is no payload. in the per-slot offset case, we'll replace this with
-    * the proper source data. */
-   fs_reg src = vgrf(glsl_type::float_type);
-   int mlen = 1;     /* one reg unless overriden */
-   int reg_width = dispatch_width / 8;
-   fs_inst *inst;
-
-   switch (ir->operation) {
-   case ir_unop_interpolate_at_centroid:
-      inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u));
-      break;
-
-   case ir_binop_interpolate_at_sample: {
-      ir_constant *sample_num = ir->operands[1]->as_constant();
-      assert(sample_num || !"nonconstant sample number should have been lowered.");
-
-      unsigned msg_data = sample_num->value.i[0] << 4;
-      inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, fs_reg(msg_data));
-      break;
-   }
-
-   case ir_binop_interpolate_at_offset: {
-      ir_constant *const_offset = ir->operands[1]->as_constant();
-      if (const_offset) {
-         unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) |
-                            (pack_pixel_offset(const_offset->value.f[1]) << 4);
-         inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
-                     fs_reg(msg_data));
-      } else {
-         /* pack the operands: hw wants offsets as 4 bit signed ints */
-         ir->operands[1]->accept(this);
-         src = vgrf(glsl_type::ivec2_type);
-         fs_reg src2 = src;
-         for (int i = 0; i < 2; i++) {
-            fs_reg temp = vgrf(glsl_type::float_type);
-            emit(MUL(temp, this->result, fs_reg(16.0f)));
-            emit(MOV(src2, temp));  /* float to int */
-
-            /* Clamp the upper end of the range to +7/16. ARB_gpu_shader5 requires
-             * that we support a maximum offset of +0.5, which isn't representable
-             * in a S0.4 value -- if we didn't clamp it, we'd end up with -8/16,
-             * which is the opposite of what the shader author wanted.
-             *
-             * This is legal due to ARB_gpu_shader5's quantization rules:
-             *
-             * "Not all values of <offset> may be supported; x and y offsets may
-             * be rounded to fixed-point values with the number of fraction bits
-             * given by the implementation-dependent constant
-             * FRAGMENT_INTERPOLATION_OFFSET_BITS"
-             */
-
-            fs_inst *inst = emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7));
-            inst->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */
-
-            src2 = offset(src2, 1);
-            this->result = offset(this->result, 1);
-         }
-
-         mlen = 2 * reg_width;
-         inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
-                     fs_reg(0u));
-      }
-      break;
-   }
-
-   default:
-      unreachable("not reached");
-   }
-
-   inst->mlen = mlen;
-   inst->regs_written = 2 * reg_width; /* 2 floats per slot returned */
-   inst->pi_noperspective = var->determine_interpolation_mode(key->flat_shade) ==
-         INTERP_QUALIFIER_NOPERSPECTIVE;
-
-   /* 2. emit linterp */
-
-   fs_reg res = vgrf(ir->type);
-   this->result = res;
-
-   for (int i = 0; i < ir->type->vector_elements; i++) {
-      int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i;
-      emit(FS_OPCODE_LINTERP, res, dst_xy,
-           fs_reg(interp_reg(var->data.location, ch)));
-      res = offset(res, 1);
-   }
-}
-
-void
-fs_visitor::visit(ir_expression *ir)
-{
-   unsigned int operand;
-   fs_reg op[3], temp;
-   fs_inst *inst;
-   struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
-
-   assert(ir->get_num_operands() <= 3);
-
-   if (try_emit_saturate(ir))
-      return;
-
-   /* Deal with the real oddball stuff first */
-   switch (ir->operation) {
-   case ir_binop_add:
-      if (devinfo->gen <= 5 && try_emit_line(ir))
-         return;
-      if (try_emit_mad(ir))
-         return;
-      break;
-
-   case ir_triop_csel:
-      ir->operands[1]->accept(this);
-      op[1] = this->result;
-      ir->operands[2]->accept(this);
-      op[2] = this->result;
-
-      emit_bool_to_cond_code(ir->operands[0]);
-
-      this->result = vgrf(ir->type);
-      inst = emit(SEL(this->result, op[1], op[2]));
-      inst->predicate = BRW_PREDICATE_NORMAL;
-      return;
-
-   case ir_unop_b2f:
-      if (devinfo->gen <= 5 && try_emit_b2f_of_comparison(ir))
-         return;
-      break;
-
-   case ir_unop_interpolate_at_centroid:
-   case ir_binop_interpolate_at_offset:
-   case ir_binop_interpolate_at_sample:
-      emit_interpolate_expression(ir);
-      return;
-
-   default:
-      break;
-   }
-
-   for (operand = 0; operand < ir->get_num_operands(); operand++) {
-      ir->operands[operand]->accept(this);
-      if (this->result.file == BAD_FILE) {
-	 fail("Failed to get tree for expression operand:\n");
-	 ir->operands[operand]->fprint(stderr);
-         fprintf(stderr, "\n");
-      }
-      assert(this->result.file == GRF ||
-             this->result.file == UNIFORM || this->result.file == ATTR);
-      op[operand] = this->result;
-
-      /* Matrix expression operands should have been broken down to vector
-       * operations already.
-       */
-      assert(!ir->operands[operand]->type->is_matrix());
-      /* And then those vector operands should have been broken down to scalar.
-       */
-      assert(!ir->operands[operand]->type->is_vector());
-   }
-
-   /* Storage for our result.  If our result goes into an assignment, it will
-    * just get copy-propagated out, so no worries.
-    */
-   this->result = vgrf(ir->type);
-
-   switch (ir->operation) {
-   case ir_unop_logic_not:
-      emit(NOT(this->result, op[0]));
-      break;
-   case ir_unop_neg:
-      op[0].negate = !op[0].negate;
-      emit(MOV(this->result, op[0]));
-      break;
-   case ir_unop_abs:
-      op[0].abs = true;
-      op[0].negate = false;
-      emit(MOV(this->result, op[0]));
-      break;
-   case ir_unop_sign:
-      if (ir->type->is_float()) {
-         /* AND(val, 0x80000000) gives the sign bit.
-          *
-          * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
-          * zero.
-          */
-         emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
-
-         op[0].type = BRW_REGISTER_TYPE_UD;
-         this->result.type = BRW_REGISTER_TYPE_UD;
-         emit(AND(this->result, op[0], fs_reg(0x80000000u)));
-
-         inst = emit(OR(this->result, this->result, fs_reg(0x3f800000u)));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-
-         this->result.type = BRW_REGISTER_TYPE_F;
-      } else {
-         /*  ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
-          *               -> non-negative val generates 0x00000000.
-          *  Predicated OR sets 1 if val is positive.
-          */
-         emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G));
-
-         emit(ASR(this->result, op[0], fs_reg(31)));
-
-         inst = emit(OR(this->result, this->result, fs_reg(1)));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-      }
-      break;
-   case ir_unop_rcp:
-      emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
-      break;
-
-   case ir_unop_exp2:
-      emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
-      break;
-   case ir_unop_log2:
-      emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
-      break;
-   case ir_unop_exp:
-   case ir_unop_log:
-      unreachable("not reached: should be handled by ir_explog_to_explog2");
-   case ir_unop_sin:
-      emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
-      break;
-   case ir_unop_cos:
-      emit_math(SHADER_OPCODE_COS, this->result, op[0]);
-      break;
-
-   case ir_unop_dFdx:
-      /* Select one of the two opcodes based on the glHint value. */
-      if (fs_key->high_quality_derivatives)
-         emit(FS_OPCODE_DDX_FINE, this->result, op[0]);
-      else
-         emit(FS_OPCODE_DDX_COARSE, this->result, op[0]);
-      break;
-
-   case ir_unop_dFdx_coarse:
-      emit(FS_OPCODE_DDX_COARSE, this->result, op[0]);
-      break;
-
-   case ir_unop_dFdx_fine:
-      emit(FS_OPCODE_DDX_FINE, this->result, op[0]);
-      break;
-
-   case ir_unop_dFdy:
-      /* Select one of the two opcodes based on the glHint value. */
-      if (fs_key->high_quality_derivatives)
-         emit(FS_OPCODE_DDY_FINE, result, op[0], fs_reg(fs_key->render_to_fbo));
-      else
-         emit(FS_OPCODE_DDY_COARSE, result, op[0], fs_reg(fs_key->render_to_fbo));
-      break;
-
-   case ir_unop_dFdy_coarse:
-      emit(FS_OPCODE_DDY_COARSE, result, op[0], fs_reg(fs_key->render_to_fbo));
-      break;
-
-   case ir_unop_dFdy_fine:
-      emit(FS_OPCODE_DDY_FINE, result, op[0], fs_reg(fs_key->render_to_fbo));
-      break;
-
-   case ir_binop_add:
-      emit(ADD(this->result, op[0], op[1]));
-      break;
-   case ir_binop_sub:
-      unreachable("not reached: should be handled by ir_sub_to_add_neg");
-
-   case ir_binop_mul:
-      emit(MUL(this->result, op[0], op[1]));
-      break;
-   case ir_binop_imul_high: {
-      if (devinfo->gen >= 7)
-         no16("SIMD16 explicit accumulator operands unsupported\n");
-
-      struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
-                                  this->result.type);
-
-      fs_inst *mul = emit(MUL(acc, op[0], op[1]));
-      emit(MACH(this->result, op[0], op[1]));
-
-      /* Until Gen8, integer multiplies read 32-bits from one source, and
-       * 16-bits from the other, and relying on the MACH instruction to
-       * generate the high bits of the result.
-       *
-       * On Gen8, the multiply instruction does a full 32x32-bit multiply,
-       * but in order to do a 64x64-bit multiply we have to simulate the
-       * previous behavior and then use a MACH instruction.
-       *
-       * FINISHME: Don't use source modifiers on src1.
-       */
-      if (devinfo->gen >= 8) {
-         assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
-                mul->src[1].type == BRW_REGISTER_TYPE_UD);
-         if (mul->src[1].type == BRW_REGISTER_TYPE_D) {
-            mul->src[1].type = BRW_REGISTER_TYPE_W;
-            mul->src[1].stride = 2;
-         } else {
-            mul->src[1].type = BRW_REGISTER_TYPE_UW;
-            mul->src[1].stride = 2;
-         }
-      }
-
-      break;
-   }
-   case ir_binop_div:
-      /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
-      assert(ir->type->is_integer());
-      emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
-      break;
-   case ir_binop_carry: {
-      if (devinfo->gen >= 7)
-         no16("SIMD16 explicit accumulator operands unsupported\n");
-
-      struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
-                                  BRW_REGISTER_TYPE_UD);
-
-      emit(ADDC(reg_null_ud, op[0], op[1]));
-      emit(MOV(this->result, fs_reg(acc)));
-      break;
-   }
-   case ir_binop_borrow: {
-      if (devinfo->gen >= 7)
-         no16("SIMD16 explicit accumulator operands unsupported\n");
-
-      struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
-                                  BRW_REGISTER_TYPE_UD);
-
-      emit(SUBB(reg_null_ud, op[0], op[1]));
-      emit(MOV(this->result, fs_reg(acc)));
-      break;
-   }
-   case ir_binop_mod:
-      /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
-      assert(ir->type->is_integer());
-      emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
-      break;
-
-   case ir_binop_less:
-   case ir_binop_greater:
-   case ir_binop_lequal:
-   case ir_binop_gequal:
-   case ir_binop_equal:
-   case ir_binop_all_equal:
-   case ir_binop_nequal:
-   case ir_binop_any_nequal:
-      if (devinfo->gen <= 5) {
-         resolve_bool_comparison(ir->operands[0], &op[0]);
-         resolve_bool_comparison(ir->operands[1], &op[1]);
-      }
-
-      emit(CMP(this->result, op[0], op[1],
-               brw_conditional_for_comparison(ir->operation)));
-      break;
-
-   case ir_binop_logic_xor:
-      emit(XOR(this->result, op[0], op[1]));
-      break;
-
-   case ir_binop_logic_or:
-      emit(OR(this->result, op[0], op[1]));
-      break;
-
-   case ir_binop_logic_and:
-      emit(AND(this->result, op[0], op[1]));
-      break;
-
-   case ir_binop_dot:
-   case ir_unop_any:
-      unreachable("not reached: should be handled by brw_fs_channel_expressions");
-
-   case ir_unop_noise:
-      unreachable("not reached: should be handled by lower_noise");
-
-   case ir_quadop_vector:
-      unreachable("not reached: should be handled by lower_quadop_vector");
-
-   case ir_binop_vector_extract:
-      unreachable("not reached: should be handled by lower_vec_index_to_cond_assign()");
-
-   case ir_triop_vector_insert:
-      unreachable("not reached: should be handled by lower_vector_insert()");
-
-   case ir_binop_ldexp:
-      unreachable("not reached: should be handled by ldexp_to_arith()");
-
-   case ir_unop_sqrt:
-      emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
-      break;
-
-   case ir_unop_rsq:
-      emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
-      break;
-
-   case ir_unop_bitcast_i2f:
-   case ir_unop_bitcast_u2f:
-      op[0].type = BRW_REGISTER_TYPE_F;
-      this->result = op[0];
-      break;
-   case ir_unop_i2u:
-   case ir_unop_bitcast_f2u:
-      op[0].type = BRW_REGISTER_TYPE_UD;
-      this->result = op[0];
-      break;
-   case ir_unop_u2i:
-   case ir_unop_bitcast_f2i:
-      op[0].type = BRW_REGISTER_TYPE_D;
-      this->result = op[0];
-      break;
-   case ir_unop_i2f:
-   case ir_unop_u2f:
-   case ir_unop_f2i:
-   case ir_unop_f2u:
-      emit(MOV(this->result, op[0]));
-      break;
-
-   case ir_unop_b2i:
-      emit(AND(this->result, op[0], fs_reg(1)));
-      break;
-   case ir_unop_b2f:
-      if (devinfo->gen <= 5) {
-         resolve_bool_comparison(ir->operands[0], &op[0]);
-      }
-      op[0].type = BRW_REGISTER_TYPE_D;
-      this->result.type = BRW_REGISTER_TYPE_D;
-      emit(AND(this->result, op[0], fs_reg(0x3f800000u)));
-      this->result.type = BRW_REGISTER_TYPE_F;
-      break;
-
-   case ir_unop_f2b:
-      emit(CMP(this->result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
-      break;
-   case ir_unop_i2b:
-      emit(CMP(this->result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
-      break;
-
-   case ir_unop_trunc:
-      emit(RNDZ(this->result, op[0]));
-      break;
-   case ir_unop_ceil: {
-         fs_reg tmp = vgrf(ir->type);
-         op[0].negate = !op[0].negate;
-         emit(RNDD(tmp, op[0]));
-         tmp.negate = true;
-         emit(MOV(this->result, tmp));
-      }
-      break;
-   case ir_unop_floor:
-      emit(RNDD(this->result, op[0]));
-      break;
-   case ir_unop_fract:
-      emit(FRC(this->result, op[0]));
-      break;
-   case ir_unop_round_even:
-      emit(RNDE(this->result, op[0]));
-      break;
-
-   case ir_binop_min:
-   case ir_binop_max:
-      resolve_ud_negate(&op[0]);
-      resolve_ud_negate(&op[1]);
-      emit_minmax(ir->operation == ir_binop_min ?
-                  BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
-                  this->result, op[0], op[1]);
-      break;
-   case ir_unop_pack_snorm_2x16:
-   case ir_unop_pack_snorm_4x8:
-   case ir_unop_pack_unorm_2x16:
-   case ir_unop_pack_unorm_4x8:
-   case ir_unop_unpack_snorm_2x16:
-   case ir_unop_unpack_snorm_4x8:
-   case ir_unop_unpack_unorm_2x16:
-   case ir_unop_unpack_unorm_4x8:
-   case ir_unop_unpack_half_2x16:
-   case ir_unop_pack_half_2x16:
-      unreachable("not reached: should be handled by lower_packing_builtins");
-   case ir_unop_unpack_half_2x16_split_x:
-      emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]);
-      break;
-   case ir_unop_unpack_half_2x16_split_y:
-      emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, this->result, op[0]);
-      break;
-   case ir_binop_pow:
-      emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
-      break;
-
-   case ir_unop_bitfield_reverse:
-      emit(BFREV(this->result, op[0]));
-      break;
-   case ir_unop_bit_count:
-      emit(CBIT(this->result, op[0]));
-      break;
-   case ir_unop_find_msb:
-      temp = vgrf(glsl_type::uint_type);
-      emit(FBH(temp, op[0]));
-
-      /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
-       * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
-       * subtract the result from 31 to convert the MSB count into an LSB count.
-       */
-
-      /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
-      emit(MOV(this->result, temp));
-      emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ));
-
-      temp.negate = true;
-      inst = emit(ADD(this->result, temp, fs_reg(31)));
-      inst->predicate = BRW_PREDICATE_NORMAL;
-      break;
-   case ir_unop_find_lsb:
-      emit(FBL(this->result, op[0]));
-      break;
-   case ir_unop_saturate:
-      inst = emit(MOV(this->result, op[0]));
-      inst->saturate = true;
-      break;
-   case ir_triop_bitfield_extract:
-      /* Note that the instruction's argument order is reversed from GLSL
-       * and the IR.
-       */
-      emit(BFE(this->result, op[2], op[1], op[0]));
-      break;
-   case ir_binop_bfm:
-      emit(BFI1(this->result, op[0], op[1]));
-      break;
-   case ir_triop_bfi:
-      emit(BFI2(this->result, op[0], op[1], op[2]));
-      break;
-   case ir_quadop_bitfield_insert:
-      unreachable("not reached: should be handled by "
-              "lower_instructions::bitfield_insert_to_bfm_bfi");
-
-   case ir_unop_bit_not:
-      emit(NOT(this->result, op[0]));
-      break;
-   case ir_binop_bit_and:
-      emit(AND(this->result, op[0], op[1]));
-      break;
-   case ir_binop_bit_xor:
-      emit(XOR(this->result, op[0], op[1]));
-      break;
-   case ir_binop_bit_or:
-      emit(OR(this->result, op[0], op[1]));
-      break;
-
-   case ir_binop_lshift:
-      emit(SHL(this->result, op[0], op[1]));
-      break;
-
-   case ir_binop_rshift:
-      if (ir->type->base_type == GLSL_TYPE_INT)
-	 emit(ASR(this->result, op[0], op[1]));
-      else
-	 emit(SHR(this->result, op[0], op[1]));
-      break;
-   case ir_binop_pack_half_2x16_split:
-      emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]);
-      break;
-   case ir_binop_ubo_load: {
-      /* This IR node takes a constant uniform block and a constant or
-       * variable byte offset within the block and loads a vector from that.
-       */
-      ir_constant *const_uniform_block = ir->operands[0]->as_constant();
-      ir_constant *const_offset = ir->operands[1]->as_constant();
-      fs_reg surf_index;
-
-      if (const_uniform_block) {
-         /* The block index is a constant, so just emit the binding table entry
-          * as an immediate.
-          */
-         surf_index = fs_reg(stage_prog_data->binding_table.ubo_start +
-                                 const_uniform_block->value.u[0]);
-      } else {
-         /* The block index is not a constant. Evaluate the index expression
-          * per-channel and add the base UBO index; we have to select a value
-          * from any live channel.
-          */
-         surf_index = vgrf(glsl_type::uint_type);
-         emit(ADD(surf_index, op[0],
-                  fs_reg(stage_prog_data->binding_table.ubo_start)));
-         emit_uniformize(surf_index, surf_index);
-
-         /* Assume this may touch any UBO. It would be nice to provide
-          * a tighter bound, but the array information is already lowered away.
-          */
-         brw_mark_surface_used(prog_data,
-                               stage_prog_data->binding_table.ubo_start +
-                               shader_prog->NumUniformBlocks - 1);
-      }
-
-      if (const_offset) {
-         fs_reg packed_consts = vgrf(glsl_type::float_type);
-         packed_consts.type = result.type;
-
-         fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15);
-         emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8,
-                                   packed_consts, surf_index, const_offset_reg));
-
-         for (int i = 0; i < ir->type->vector_elements; i++) {
-            packed_consts.set_smear(const_offset->value.u[0] % 16 / 4 + i);
-
-            /* The std140 packing rules don't allow vectors to cross 16-byte
-             * boundaries, and a reg is 32 bytes.
-             */
-            assert(packed_consts.subreg_offset < 32);
-
-            /* UBO bools are any nonzero value.  We consider bools to be
-             * values with the low bit set to 1.  Convert them using CMP.
-             */
-            if (ir->type->base_type == GLSL_TYPE_BOOL) {
-               emit(CMP(result, packed_consts, fs_reg(0u), BRW_CONDITIONAL_NZ));
-            } else {
-               emit(MOV(result, packed_consts));
-            }
-
-            result = offset(result, 1);
-         }
-      } else {
-         /* Turn the byte offset into a dword offset. */
-         fs_reg base_offset = vgrf(glsl_type::int_type);
-         emit(SHR(base_offset, op[1], fs_reg(2)));
-
-         for (int i = 0; i < ir->type->vector_elements; i++) {
-            emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index,
-                                            base_offset, i));
-
-            if (ir->type->base_type == GLSL_TYPE_BOOL)
-               emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ));
-
-            result = offset(result, 1);
-         }
-      }
-
-      result.reg_offset = 0;
-      break;
-   }
-
-   case ir_triop_fma:
-      /* Note that the instruction's argument order is reversed from GLSL
-       * and the IR.
-       */
-      emit(MAD(this->result, op[2], op[1], op[0]));
-      break;
-
-   case ir_triop_lrp:
-      emit_lrp(this->result, op[0], op[1], op[2]);
-      break;
-
-   case ir_triop_csel:
-   case ir_unop_interpolate_at_centroid:
-   case ir_binop_interpolate_at_offset:
-   case ir_binop_interpolate_at_sample:
-      unreachable("already handled above");
-      break;
-
-   case ir_unop_d2f:
-   case ir_unop_f2d:
-   case ir_unop_d2i:
-   case ir_unop_i2d:
-   case ir_unop_d2u:
-   case ir_unop_u2d:
-   case ir_unop_d2b:
-   case ir_unop_pack_double_2x32:
-   case ir_unop_unpack_double_2x32:
-   case ir_unop_frexp_sig:
-   case ir_unop_frexp_exp:
-      unreachable("fp64 todo");
-      break;
-   }
-}
-
-void
-fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
-				   const glsl_type *type, bool predicated)
-{
-   switch (type->base_type) {
-   case GLSL_TYPE_FLOAT:
-   case GLSL_TYPE_UINT:
-   case GLSL_TYPE_INT:
-   case GLSL_TYPE_BOOL:
-      for (unsigned int i = 0; i < type->components(); i++) {
-	 l.type = brw_type_for_base_type(type);
-	 r.type = brw_type_for_base_type(type);
-
-	 if (predicated || !l.equals(r)) {
-	    fs_inst *inst = emit(MOV(l, r));
-	    inst->predicate = predicated ? BRW_PREDICATE_NORMAL : BRW_PREDICATE_NONE;
-	 }
-
-	 l = offset(l, 1);
-	 r = offset(r, 1);
-      }
-      break;
-   case GLSL_TYPE_ARRAY:
-      for (unsigned int i = 0; i < type->length; i++) {
-	 emit_assignment_writes(l, r, type->fields.array, predicated);
-      }
-      break;
-
-   case GLSL_TYPE_STRUCT:
-      for (unsigned int i = 0; i < type->length; i++) {
-	 emit_assignment_writes(l, r, type->fields.structure[i].type,
-				predicated);
-      }
-      break;
-
-   case GLSL_TYPE_SAMPLER:
-   case GLSL_TYPE_IMAGE:
-   case GLSL_TYPE_ATOMIC_UINT:
-      break;
-
-   case GLSL_TYPE_DOUBLE:
-   case GLSL_TYPE_VOID:
-   case GLSL_TYPE_ERROR:
-   case GLSL_TYPE_INTERFACE:
-      unreachable("not reached");
-   }
-}
-
-/* If the RHS processing resulted in an instruction generating a
- * temporary value, and it would be easy to rewrite the instruction to
- * generate its result right into the LHS instead, do so.  This ends
- * up reliably removing instructions where it can be tricky to do so
- * later without real UD chain information.
- */
-bool
-fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
-                                   fs_reg dst,
-                                   fs_reg src,
-                                   fs_inst *pre_rhs_inst,
-                                   fs_inst *last_rhs_inst)
-{
-   /* Only attempt if we're doing a direct assignment. */
-   if (ir->condition ||
-       !(ir->lhs->type->is_scalar() ||
-        (ir->lhs->type->is_vector() &&
-         ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1)))
-      return false;
-
-   /* Make sure the last instruction generated our source reg. */
-   fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst,
-						    last_rhs_inst,
-						    src);
-   if (!modify)
-      return false;
-
-   /* If last_rhs_inst wrote a different number of components than our LHS,
-    * we can't safely rewrite it.
-    */
-   if (alloc.sizes[dst.reg] != modify->regs_written)
-      return false;
-
-   /* Success!  Rewrite the instruction. */
-   modify->dst = dst;
-
-   return true;
-}
-
-void
-fs_visitor::visit(ir_assignment *ir)
-{
-   fs_reg l, r;
-   fs_inst *inst;
-
-   /* FINISHME: arrays on the lhs */
-   ir->lhs->accept(this);
-   l = this->result;
-
-   fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail();
-
-   ir->rhs->accept(this);
-   r = this->result;
-
-   fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail();
-
-   assert(l.file != BAD_FILE);
-   assert(r.file != BAD_FILE);
-
-   if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst))
-      return;
-
-   if (ir->condition) {
-      emit_bool_to_cond_code(ir->condition);
-   }
-
-   if (ir->lhs->type->is_scalar() ||
-       ir->lhs->type->is_vector()) {
-      for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
-	 if (ir->write_mask & (1 << i)) {
-	    inst = emit(MOV(l, r));
-	    if (ir->condition)
-	       inst->predicate = BRW_PREDICATE_NORMAL;
-	    r = offset(r, 1);
-	 }
-	 l = offset(l, 1);
-      }
-   } else {
-      emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
-   }
-}
-
-fs_inst *
-fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
-                              fs_reg coordinate, int coord_components,
-                              fs_reg shadow_c,
-                              fs_reg lod, fs_reg dPdy, int grad_components,
-                              uint32_t sampler)
-{
-   int mlen;
-   int base_mrf = 1;
-   bool simd16 = false;
-   fs_reg orig_dst;
-
-   /* g0 header. */
-   mlen = 1;
-
-   if (shadow_c.file != BAD_FILE) {
-      for (int i = 0; i < coord_components; i++) {
-	 emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
-	 coordinate = offset(coordinate, 1);
-      }
-
-      /* gen4's SIMD8 sampler always has the slots for u,v,r present.
-       * the unused slots must be zeroed.
-       */
-      for (int i = coord_components; i < 3; i++) {
-         emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
-      }
-      mlen += 3;
-
-      if (op == ir_tex) {
-	 /* There's no plain shadow compare message, so we use shadow
-	  * compare with a bias of 0.0.
-	  */
-	 emit(MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)));
-	 mlen++;
-      } else if (op == ir_txb || op == ir_txl) {
-	 emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
-	 mlen++;
-      } else {
-         unreachable("Should not get here.");
-      }
-
-      emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
-      mlen++;
-   } else if (op == ir_tex) {
-      for (int i = 0; i < coord_components; i++) {
-	 emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
-	 coordinate = offset(coordinate, 1);
-      }
-      /* zero the others. */
-      for (int i = coord_components; i<3; i++) {
-         emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
-      }
-      /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
-      mlen += 3;
-   } else if (op == ir_txd) {
-      fs_reg &dPdx = lod;
-
-      for (int i = 0; i < coord_components; i++) {
-	 emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
-	 coordinate = offset(coordinate, 1);
-      }
-      /* the slots for u and v are always present, but r is optional */
-      mlen += MAX2(coord_components, 2);
-
-      /*  P   = u, v, r
-       * dPdx = dudx, dvdx, drdx
-       * dPdy = dudy, dvdy, drdy
-       *
-       * 1-arg: Does not exist.
-       *
-       * 2-arg: dudx   dvdx   dudy   dvdy
-       *        dPdx.x dPdx.y dPdy.x dPdy.y
-       *        m4     m5     m6     m7
-       *
-       * 3-arg: dudx   dvdx   drdx   dudy   dvdy   drdy
-       *        dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
-       *        m5     m6     m7     m8     m9     m10
-       */
-      for (int i = 0; i < grad_components; i++) {
-	 emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdx));
-	 dPdx = offset(dPdx, 1);
-      }
-      mlen += MAX2(grad_components, 2);
-
-      for (int i = 0; i < grad_components; i++) {
-	 emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdy));
-	 dPdy = offset(dPdy, 1);
-      }
-      mlen += MAX2(grad_components, 2);
-   } else if (op == ir_txs) {
-      /* There's no SIMD8 resinfo message on Gen4.  Use SIMD16 instead. */
-      simd16 = true;
-      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
-      mlen += 2;
-   } else {
-      /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
-       * instructions.  We'll need to do SIMD16 here.
-       */
-      simd16 = true;
-      assert(op == ir_txb || op == ir_txl || op == ir_txf);
-
-      for (int i = 0; i < coord_components; i++) {
-	 emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
-                  coordinate));
-	 coordinate = offset(coordinate, 1);
-      }
-
-      /* Initialize the rest of u/v/r with 0.0.  Empirically, this seems to
-       * be necessary for TXF (ld), but seems wise to do for all messages.
-       */
-      for (int i = coord_components; i < 3; i++) {
-	 emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)));
-      }
-
-      /* lod/bias appears after u/v/r. */
-      mlen += 6;
-
-      emit(MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod));
-      mlen++;
-
-      /* The unused upper half. */
-      mlen++;
-   }
-
-   if (simd16) {
-      /* Now, since we're doing simd16, the return is 2 interleaved
-       * vec4s where the odd-indexed ones are junk. We'll need to move
-       * this weirdness around to the expected layout.
-       */
-      orig_dst = dst;
-      dst = fs_reg(GRF, alloc.allocate(8), orig_dst.type);
-   }
-
-   enum opcode opcode;
-   switch (op) {
-   case ir_tex: opcode = SHADER_OPCODE_TEX; break;
-   case ir_txb: opcode = FS_OPCODE_TXB; break;
-   case ir_txl: opcode = SHADER_OPCODE_TXL; break;
-   case ir_txd: opcode = SHADER_OPCODE_TXD; break;
-   case ir_txs: opcode = SHADER_OPCODE_TXS; break;
-   case ir_txf: opcode = SHADER_OPCODE_TXF; break;
-   default:
-      unreachable("not reached");
-   }
-
-   fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler));
-   inst->base_mrf = base_mrf;
-   inst->mlen = mlen;
-   inst->header_size = 1;
-   inst->regs_written = simd16 ? 8 : 4;
-
-   if (simd16) {
-      for (int i = 0; i < 4; i++) {
-	 emit(MOV(orig_dst, dst));
-	 orig_dst = offset(orig_dst, 1);
-	 dst = offset(dst, 2);
-      }
-   }
-
-   return inst;
-}
-
-fs_inst *
-fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
-                                     fs_reg coordinate, int vector_elements,
-                                     fs_reg shadow_c, fs_reg lod,
-                                     uint32_t sampler)
-{
-   fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width);
-   bool has_lod = op == ir_txl || op == ir_txb || op == ir_txf || op == ir_txs;
-
-   if (has_lod && shadow_c.file != BAD_FILE)
-      no16("TXB and TXL with shadow comparison unsupported in SIMD16.");
-
-   if (op == ir_txd)
-      no16("textureGrad unsupported in SIMD16.");
-
-   /* Copy the coordinates. */
-   for (int i = 0; i < vector_elements; i++) {
-      emit(MOV(retype(offset(message, i), coordinate.type), coordinate));
-      coordinate = offset(coordinate, 1);
-   }
-
-   fs_reg msg_end = offset(message, vector_elements);
-
-   /* Messages other than sample and ld require all three components */
-   if (vector_elements > 0 && (has_lod || shadow_c.file != BAD_FILE)) {
-      for (int i = vector_elements; i < 3; i++) {
-         emit(MOV(offset(message, i), fs_reg(0.0f)));
-      }
-      msg_end = offset(message, 3);
-   }
-
-   if (has_lod) {
-      fs_reg msg_lod = retype(msg_end, op == ir_txf ?
-                              BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);
-      emit(MOV(msg_lod, lod));
-      msg_end = offset(msg_lod, 1);
-   }
-
-   if (shadow_c.file != BAD_FILE) {
-      fs_reg msg_ref = offset(message, 3 + has_lod);
-      emit(MOV(msg_ref, shadow_c));
-      msg_end = offset(msg_ref, 1);
-   }
-
-   enum opcode opcode;
-   switch (op) {
-   case ir_tex: opcode = SHADER_OPCODE_TEX; break;
-   case ir_txb: opcode = FS_OPCODE_TXB;     break;
-   case ir_txd: opcode = SHADER_OPCODE_TXD; break;
-   case ir_txl: opcode = SHADER_OPCODE_TXL; break;
-   case ir_txs: opcode = SHADER_OPCODE_TXS; break;
-   case ir_txf: opcode = SHADER_OPCODE_TXF; break;
-   default: unreachable("not reached");
-   }
-
-   fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler));
-   inst->base_mrf = message.reg - 1;
-   inst->mlen = msg_end.reg - inst->base_mrf;
-   inst->header_size = 1;
-   inst->regs_written = 8;
-
-   return inst;
-}
-
-/* gen5's sampler has slots for u, v, r, array index, then optional
- * parameters like shadow comparitor or LOD bias.  If optional
- * parameters aren't present, those base slots are optional and don't
- * need to be included in the message.
- *
- * We don't fill in the unnecessary slots regardless, which may look
- * surprising in the disassembly.
- */
-fs_inst *
-fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
-                              fs_reg coordinate, int vector_elements,
-                              fs_reg shadow_c,
-                              fs_reg lod, fs_reg lod2, int grad_components,
-                              fs_reg sample_index, uint32_t sampler,
-                              bool has_offset)
-{
-   int reg_width = dispatch_width / 8;
-   unsigned header_size = 0;
-
-   fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width);
-   fs_reg msg_coords = message;
-
-   if (has_offset) {
-      /* The offsets set up by the ir_texture visitor are in the
-       * m1 header, so we can't go headerless.
-       */
-      header_size = 1;
-      message.reg--;
-   }
-
-   for (int i = 0; i < vector_elements; i++) {
-      emit(MOV(retype(offset(msg_coords, i), coordinate.type), coordinate));
-      coordinate = offset(coordinate, 1);
-   }
-   fs_reg msg_end = offset(msg_coords, vector_elements);
-   fs_reg msg_lod = offset(msg_coords, 4);
-
-   if (shadow_c.file != BAD_FILE) {
-      fs_reg msg_shadow = msg_lod;
-      emit(MOV(msg_shadow, shadow_c));
-      msg_lod = offset(msg_shadow, 1);
-      msg_end = msg_lod;
-   }
-
-   enum opcode opcode;
-   switch (op) {
-   case ir_tex:
-      opcode = SHADER_OPCODE_TEX;
-      break;
-   case ir_txb:
-      emit(MOV(msg_lod, lod));
-      msg_end = offset(msg_lod, 1);
-
-      opcode = FS_OPCODE_TXB;
-      break;
-   case ir_txl:
-      emit(MOV(msg_lod, lod));
-      msg_end = offset(msg_lod, 1);
-
-      opcode = SHADER_OPCODE_TXL;
-      break;
-   case ir_txd: {
-      /**
-       *  P   =  u,    v,    r
-       * dPdx = dudx, dvdx, drdx
-       * dPdy = dudy, dvdy, drdy
-       *
-       * Load up these values:
-       * - dudx   dudy   dvdx   dvdy   drdx   drdy
-       * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z
-       */
-      msg_end = msg_lod;
-      for (int i = 0; i < grad_components; i++) {
-         emit(MOV(msg_end, lod));
-         lod = offset(lod, 1);
-         msg_end = offset(msg_end, 1);
-
-         emit(MOV(msg_end, lod2));
-         lod2 = offset(lod2, 1);
-         msg_end = offset(msg_end, 1);
-      }
-
-      opcode = SHADER_OPCODE_TXD;
-      break;
-   }
-   case ir_txs:
-      msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD);
-      emit(MOV(msg_lod, lod));
-      msg_end = offset(msg_lod, 1);
-
-      opcode = SHADER_OPCODE_TXS;
-      break;
-   case ir_query_levels:
-      msg_lod = msg_end;
-      emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
-      msg_end = offset(msg_lod, 1);
-
-      opcode = SHADER_OPCODE_TXS;
-      break;
-   case ir_txf:
-      msg_lod = offset(msg_coords, 3);
-      emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod));
-      msg_end = offset(msg_lod, 1);
-
-      opcode = SHADER_OPCODE_TXF;
-      break;
-   case ir_txf_ms:
-      msg_lod = offset(msg_coords, 3);
-      /* lod */
-      emit(MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
-      /* sample index */
-      emit(MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index));
-      msg_end = offset(msg_lod, 2);
-
-      opcode = SHADER_OPCODE_TXF_CMS;
-      break;
-   case ir_lod:
-      opcode = SHADER_OPCODE_LOD;
-      break;
-   case ir_tg4:
-      opcode = SHADER_OPCODE_TG4;
-      break;
-   default:
-      unreachable("not reached");
-   }
-
-   fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler));
-   inst->base_mrf = message.reg;
-   inst->mlen = msg_end.reg - message.reg;
-   inst->header_size = header_size;
-   inst->regs_written = 4 * reg_width;
-
-   if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) {
-      fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE)
-           " disallowed by hardware\n");
-   }
-
-   return inst;
-}
-
-static bool
-is_high_sampler(const struct brw_device_info *devinfo, fs_reg sampler)
-{
-   if (devinfo->gen < 8 && !devinfo->is_haswell)
-      return false;
-
-   return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
-}
-
-fs_inst *
-fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
-                              fs_reg coordinate, int coord_components,
-                              fs_reg shadow_c,
-                              fs_reg lod, fs_reg lod2, int grad_components,
-                              fs_reg sample_index, fs_reg mcs, fs_reg sampler,
-                              fs_reg offset_value)
-{
-   int reg_width = dispatch_width / 8;
-   unsigned header_size = 0;
-
-   fs_reg *sources = ralloc_array(mem_ctx, fs_reg, MAX_SAMPLER_MESSAGE_SIZE);
-   for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) {
-      sources[i] = vgrf(glsl_type::float_type);
-   }
-   int length = 0;
-
-   if (op == ir_tg4 || offset_value.file != BAD_FILE ||
-       is_high_sampler(devinfo, sampler)) {
-      /* For general texture offsets (no txf workaround), we need a header to
-       * put them in.  Note that for SIMD16 we're making space for two actual
-       * hardware registers here, so the emit will have to fix up for this.
-       *
-       * * ir4_tg4 needs to place its channel select in the header,
-       * for interaction with ARB_texture_swizzle
-       *
-       * The sampler index is only 4-bits, so for larger sampler numbers we
-       * need to offset the Sampler State Pointer in the header.
-       */
-      header_size = 1;
-      sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
-      length++;
-   }
-
-   if (shadow_c.file != BAD_FILE) {
-      emit(MOV(sources[length], shadow_c));
-      length++;
-   }
-
-   bool has_nonconstant_offset =
-      offset_value.file != BAD_FILE && offset_value.file != IMM;
-   bool coordinate_done = false;
-
-   /* The sampler can only meaningfully compute LOD for fragment shader
-    * messages. For all other stages, we change the opcode to ir_txl and
-    * hardcode the LOD to 0.
-    */
-   if (stage != MESA_SHADER_FRAGMENT && op == ir_tex) {
-      op = ir_txl;
-      lod = fs_reg(0.0f);
-   }
-
-   /* Set up the LOD info */
-   switch (op) {
-   case ir_tex:
-   case ir_lod:
-      break;
-   case ir_txb:
-      emit(MOV(sources[length], lod));
-      length++;
-      break;
-   case ir_txl:
-      emit(MOV(sources[length], lod));
-      length++;
-      break;
-   case ir_txd: {
-      no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
-
-      /* Load dPdx and the coordinate together:
-       * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
-       */
-      for (int i = 0; i < coord_components; i++) {
-	 emit(MOV(sources[length], coordinate));
-	 coordinate = offset(coordinate, 1);
-	 length++;
-
-         /* For cube map array, the coordinate is (u,v,r,ai) but there are
-          * only derivatives for (u, v, r).
-          */
-         if (i < grad_components) {
-            emit(MOV(sources[length], lod));
-            lod = offset(lod, 1);
-            length++;
-
-            emit(MOV(sources[length], lod2));
-            lod2 = offset(lod2, 1);
-            length++;
-         }
-      }
-
-      coordinate_done = true;
-      break;
-   }
-   case ir_txs:
-      emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod));
-      length++;
-      break;
-   case ir_query_levels:
-      emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u)));
-      length++;
-      break;
-   case ir_txf:
-      /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r.
-       * On Gen9 they are u, v, lod, r
-       */
-
-      emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
-      coordinate = offset(coordinate, 1);
-      length++;
-
-      if (devinfo->gen >= 9) {
-         if (coord_components >= 2) {
-            emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
-            coordinate = offset(coordinate, 1);
-         }
-         length++;
-      }
-
-      emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod));
-      length++;
-
-      for (int i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) {
-	 emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
-	 coordinate = offset(coordinate, 1);
-	 length++;
-      }
-
-      coordinate_done = true;
-      break;
-   case ir_txf_ms:
-      emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index));
-      length++;
-
-      /* data from the multisample control surface */
-      emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs));
-      length++;
-
-      /* there is no offsetting for this message; just copy in the integer
-       * texture coordinates
-       */
-      for (int i = 0; i < coord_components; i++) {
-         emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
-         coordinate = offset(coordinate, 1);
-         length++;
-      }
-
-      coordinate_done = true;
-      break;
-   case ir_tg4:
-      if (has_nonconstant_offset) {
-         if (shadow_c.file != BAD_FILE)
-            no16("Gen7 does not support gather4_po_c in SIMD16 mode.");
-
-         /* More crazy intermixing */
-         for (int i = 0; i < 2; i++) { /* u, v */
-            emit(MOV(sources[length], coordinate));
-            coordinate = offset(coordinate, 1);
-            length++;
-         }
-
-         for (int i = 0; i < 2; i++) { /* offu, offv */
-            emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value));
-            offset_value = offset(offset_value, 1);
-            length++;
-         }
-
-         if (coord_components == 3) { /* r if present */
-            emit(MOV(sources[length], coordinate));
-            coordinate = offset(coordinate, 1);
-            length++;
-         }
-
-         coordinate_done = true;
-      }
-      break;
-   }
-
-   /* Set up the coordinate (except for cases where it was done above) */
-   if (!coordinate_done) {
-      for (int i = 0; i < coord_components; i++) {
-         emit(MOV(sources[length], coordinate));
-         coordinate = offset(coordinate, 1);
-         length++;
-      }
-   }
-
-   int mlen;
-   if (reg_width == 2)
-      mlen = length * reg_width - header_size;
-   else
-      mlen = length * reg_width;
-
-   fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
-                               BRW_REGISTER_TYPE_F, dispatch_width);
-   emit(LOAD_PAYLOAD(src_payload, sources, length, header_size));
-
-   /* Generate the SEND */
-   enum opcode opcode;
-   switch (op) {
-   case ir_tex: opcode = SHADER_OPCODE_TEX; break;
-   case ir_txb: opcode = FS_OPCODE_TXB; break;
-   case ir_txl: opcode = SHADER_OPCODE_TXL; break;
-   case ir_txd: opcode = SHADER_OPCODE_TXD; break;
-   case ir_txf: opcode = SHADER_OPCODE_TXF; break;
-   case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
-   case ir_txs: opcode = SHADER_OPCODE_TXS; break;
-   case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
-   case ir_lod: opcode = SHADER_OPCODE_LOD; break;
-   case ir_tg4:
-      if (has_nonconstant_offset)
-         opcode = SHADER_OPCODE_TG4_OFFSET;
-      else
-         opcode = SHADER_OPCODE_TG4;
-      break;
-   default:
-      unreachable("not reached");
-   }
-   fs_inst *inst = emit(opcode, dst, src_payload, sampler);
-   inst->base_mrf = -1;
-   inst->mlen = mlen;
-   inst->header_size = header_size;
-   inst->regs_written = 4 * reg_width;
-
-   if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) {
-      fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE)
-           " disallowed by hardware\n");
-   }
-
-   return inst;
-}
-
 fs_reg
 fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
                              bool is_rect, uint32_t sampler, int texunit)
 {
-   fs_inst *inst = NULL;
    bool needs_gl_clamp = true;
    fs_reg scale_x, scale_y;
 
@@ -2075,10 +139,10 @@
       fs_reg src = coordinate;
       coordinate = dst;
 
-      emit(MUL(dst, src, scale_x));
-      dst = offset(dst, 1);
-      src = offset(src, 1);
-      emit(MUL(dst, src, scale_y));
+      bld.MUL(dst, src, scale_x);
+      dst = offset(dst, bld, 1);
+      src = offset(src, bld, 1);
+      bld.MUL(dst, src, scale_y);
    } else if (is_rect) {
       /* On gen6+, the sampler handles the rectangle coordinates
        * natively, without needing rescaling.  But that means we have
@@ -2090,10 +154,10 @@
       for (int i = 0; i < 2; i++) {
 	 if (key_tex->gl_clamp_mask[i] & (1 << sampler)) {
 	    fs_reg chan = coordinate;
-	    chan = offset(chan, i);
+	    chan = offset(chan, bld, i);
 
-	    inst = emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f));
-	    inst->conditional_mod = BRW_CONDITIONAL_GE;
+            set_condmod(BRW_CONDITIONAL_GE,
+                        bld.emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f)));
 
 	    /* Our parameter comes in as 1.0/width or 1.0/height,
 	     * because that's what people normally want for doing
@@ -2102,11 +166,11 @@
 	     * parameter type, so just invert back.
 	     */
 	    fs_reg limit = vgrf(glsl_type::float_type);
-	    emit(MOV(limit, i == 0 ? scale_x : scale_y));
-	    emit(SHADER_OPCODE_RCP, limit, limit);
+            bld.MOV(limit, i == 0 ? scale_x : scale_y);
+            bld.emit(SHADER_OPCODE_RCP, limit, limit);
 
-	    inst = emit(BRW_OPCODE_SEL, chan, chan, limit);
-	    inst->conditional_mod = BRW_CONDITIONAL_L;
+            set_condmod(BRW_CONDITIONAL_L,
+                        bld.emit(BRW_OPCODE_SEL, chan, chan, limit));
 	 }
       }
    }
@@ -2115,10 +179,8 @@
       for (int i = 0; i < MIN2(coord_components, 3); i++) {
 	 if (key_tex->gl_clamp_mask[i] & (1 << sampler)) {
 	    fs_reg chan = coordinate;
-	    chan = offset(chan, i);
-
-	    fs_inst *inst = emit(MOV(chan, chan));
-	    inst->saturate = true;
+	    chan = offset(chan, bld, i);
+            set_saturate(true, bld.MOV(chan, chan));
 	 }
       }
    }
@@ -2127,31 +189,21 @@
 
 /* Sample from the MCS surface attached to this multisample texture. */
 fs_reg
-fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler)
+fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
+                           const fs_reg &sampler)
 {
-   int reg_width = dispatch_width / 8;
-   fs_reg payload = fs_reg(GRF, alloc.allocate(components * reg_width),
-                           BRW_REGISTER_TYPE_F, dispatch_width);
-   fs_reg dest = vgrf(glsl_type::uvec4_type);
-   fs_reg *sources = ralloc_array(mem_ctx, fs_reg, components);
-
-   /* parameters are: u, v, r; missing parameters are treated as zero */
-   for (int i = 0; i < components; i++) {
-      sources[i] = vgrf(glsl_type::float_type);
-      emit(MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate));
-      coordinate = offset(coordinate, 1);
-   }
-
-   emit(LOAD_PAYLOAD(payload, sources, components, 0));
+   const fs_reg dest = vgrf(glsl_type::uvec4_type);
+   const fs_reg srcs[] = {
+      coordinate, fs_reg(), fs_reg(), fs_reg(), fs_reg(), fs_reg(),
+      sampler, fs_reg(), fs_reg(components), fs_reg(0)
+   };
+   fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs,
+                            ARRAY_SIZE(srcs));
 
-   fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload, sampler);
-   inst->base_mrf = -1;
-   inst->mlen = components * reg_width;
-   inst->header_size = 0;
-   inst->regs_written = 4 * reg_width; /* we only care about one reg of
-                                        * response, but the sampler always
-                                        * writes 4/8
-                                        */
+   /* We only care about one reg of response, but the sampler always writes
+    * 4/8.
+    */
+   inst->regs_written = 4 * dispatch_width / 8;
 
    return dest;
 }
@@ -2184,13 +236,21 @@
          this->result = res;
 
          for (int i=0; i<4; i++) {
-            emit(MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f)));
-            res = offset(res, 1);
+            bld.MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f));
+            res = offset(res, bld, 1);
          }
          return;
       }
    }
 
+   if (op == ir_query_levels) {
+      /* textureQueryLevels() is implemented in terms of TXS so we need to
+       * pass a valid LOD argument.
+       */
+      assert(lod.file == BAD_FILE);
+      lod = fs_reg(0u);
+   }
+
    if (coordinate.file != BAD_FILE) {
       /* FINISHME: Texture coordinate rescaling doesn't work with non-constant
        * samplers.  This should only be a problem with GL_CLAMP on Gen7.
@@ -2203,26 +263,50 @@
     * samples, so don't worry about them.
     */
    fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 4, 1));
+   const fs_reg srcs[] = {
+      coordinate, shadow_c, lod, lod2,
+      sample_index, mcs, sampler_reg, offset_value,
+      fs_reg(coord_components), fs_reg(grad_components)
+   };
+   enum opcode opcode;
 
-   if (devinfo->gen >= 7) {
-      inst = emit_texture_gen7(op, dst, coordinate, coord_components,
-                               shadow_c, lod, lod2, grad_components,
-                               sample_index, mcs, sampler_reg,
-                               offset_value);
-   } else if (devinfo->gen >= 5) {
-      inst = emit_texture_gen5(op, dst, coordinate, coord_components,
-                               shadow_c, lod, lod2, grad_components,
-                               sample_index, sampler,
-                               offset_value.file != BAD_FILE);
-   } else if (dispatch_width == 16) {
-      inst = emit_texture_gen4_simd16(op, dst, coordinate, coord_components,
-                                      shadow_c, lod, sampler);
-   } else {
-      inst = emit_texture_gen4(op, dst, coordinate, coord_components,
-                               shadow_c, lod, lod2, grad_components,
-                               sampler);
+   switch (op) {
+   case ir_tex:
+      opcode = SHADER_OPCODE_TEX_LOGICAL;
+      break;
+   case ir_txb:
+      opcode = FS_OPCODE_TXB_LOGICAL;
+      break;
+   case ir_txl:
+      opcode = SHADER_OPCODE_TXL_LOGICAL;
+      break;
+   case ir_txd:
+      opcode = SHADER_OPCODE_TXD_LOGICAL;
+      break;
+   case ir_txf:
+      opcode = SHADER_OPCODE_TXF_LOGICAL;
+      break;
+   case ir_txf_ms:
+      opcode = SHADER_OPCODE_TXF_CMS_LOGICAL;
+      break;
+   case ir_txs:
+   case ir_query_levels:
+      opcode = SHADER_OPCODE_TXS_LOGICAL;
+      break;
+   case ir_lod:
+      opcode = SHADER_OPCODE_LOD_LOGICAL;
+      break;
+   case ir_tg4:
+      opcode = (offset_value.file != BAD_FILE && offset_value.file != IMM ?
+                SHADER_OPCODE_TG4_OFFSET_LOGICAL : SHADER_OPCODE_TG4_LOGICAL);
+      break;
+   default:
+      unreachable("Invalid texture opcode.");
    }
 
+   inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
+   inst->regs_written = 4 * dispatch_width / 8;
+
    if (shadow_c.file != BAD_FILE)
       inst->shadow_compare = true;
 
@@ -2239,175 +323,25 @@
 
    /* fixup #layers for cube map arrays */
    if (op == ir_txs && is_cube_array) {
-      fs_reg depth = offset(dst, 2);
+      fs_reg depth = offset(dst, bld, 2);
       fs_reg fixed_depth = vgrf(glsl_type::int_type);
-      emit_math(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6));
+      bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6));
 
       fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written);
-      int components = inst->regs_written / (dst.width / 8);
+      int components = inst->regs_written / (inst->exec_size / 8);
       for (int i = 0; i < components; i++) {
          if (i == 2) {
             fixed_payload[i] = fixed_depth;
          } else {
-            fixed_payload[i] = offset(dst, i);
+            fixed_payload[i] = offset(dst, bld, i);
          }
       }
-      emit(LOAD_PAYLOAD(dst, fixed_payload, components, 0));
+      bld.LOAD_PAYLOAD(dst, fixed_payload, components, 0);
    }
 
    swizzle_result(op, dest_type->vector_elements, dst, sampler);
 }
 
-void
-fs_visitor::visit(ir_texture *ir)
-{
-   uint32_t sampler =
-      _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
-
-   ir_rvalue *nonconst_sampler_index =
-      _mesa_get_sampler_array_nonconst_index(ir->sampler);
-
-   /* Handle non-constant sampler array indexing */
-   fs_reg sampler_reg;
-   if (nonconst_sampler_index) {
-      /* The highest sampler which may be used by this operation is
-       * the last element of the array. Mark it here, because the generator
-       * doesn't have enough information to determine the bound.
-       */
-      uint32_t array_size = ir->sampler->as_dereference_array()
-         ->array->type->array_size();
-
-      uint32_t max_used = sampler + array_size - 1;
-      if (ir->op == ir_tg4 && devinfo->gen < 8) {
-         max_used += stage_prog_data->binding_table.gather_texture_start;
-      } else {
-         max_used += stage_prog_data->binding_table.texture_start;
-      }
-
-      brw_mark_surface_used(prog_data, max_used);
-
-      /* Emit code to evaluate the actual indexing expression */
-      nonconst_sampler_index->accept(this);
-      fs_reg temp = vgrf(glsl_type::uint_type);
-      emit(ADD(temp, this->result, fs_reg(sampler)));
-      emit_uniformize(temp, temp);
-
-      sampler_reg = temp;
-   } else {
-      /* Single sampler, or constant array index; the indexing expression
-       * is just an immediate.
-       */
-      sampler_reg = fs_reg(sampler);
-   }
-
-   /* FINISHME: We're failing to recompile our programs when the sampler is
-    * updated.  This only matters for the texture rectangle scale parameters
-    * (pre-gen6, or gen6+ with GL_CLAMP).
-    */
-   int texunit = prog->SamplerUnits[sampler];
-
-   /* Should be lowered by do_lower_texture_projection */
-   assert(!ir->projector);
-
-   /* Should be lowered */
-   assert(!ir->offset || !ir->offset->type->is_array());
-
-   /* Generate code to compute all the subexpression trees.  This has to be
-    * done before loading any values into MRFs for the sampler message since
-    * generating these values may involve SEND messages that need the MRFs.
-    */
-   fs_reg coordinate;
-   int coord_components = 0;
-   if (ir->coordinate) {
-      coord_components = ir->coordinate->type->vector_elements;
-      ir->coordinate->accept(this);
-      coordinate = this->result;
-   }
-
-   fs_reg shadow_comparitor;
-   if (ir->shadow_comparitor) {
-      ir->shadow_comparitor->accept(this);
-      shadow_comparitor = this->result;
-   }
-
-   fs_reg offset_value;
-   if (ir->offset) {
-      ir_constant *const_offset = ir->offset->as_constant();
-      if (const_offset) {
-         /* Store the header bitfield in an IMM register.  This allows us to
-          * use offset_value.file to distinguish between no offset, a constant
-          * offset, and a non-constant offset.
-          */
-         offset_value =
-            fs_reg(brw_texture_offset(const_offset->value.i,
-                                      const_offset->type->vector_elements));
-      } else {
-         ir->offset->accept(this);
-         offset_value = this->result;
-      }
-   }
-
-   fs_reg lod, lod2, sample_index, mcs;
-   int grad_components = 0;
-   switch (ir->op) {
-   case ir_tex:
-   case ir_lod:
-   case ir_tg4:
-   case ir_query_levels:
-      break;
-   case ir_txb:
-      ir->lod_info.bias->accept(this);
-      lod = this->result;
-      break;
-   case ir_txd:
-      ir->lod_info.grad.dPdx->accept(this);
-      lod = this->result;
-
-      ir->lod_info.grad.dPdy->accept(this);
-      lod2 = this->result;
-
-      grad_components = ir->lod_info.grad.dPdx->type->vector_elements;
-      break;
-   case ir_txf:
-   case ir_txl:
-   case ir_txs:
-      ir->lod_info.lod->accept(this);
-      lod = this->result;
-      break;
-   case ir_txf_ms:
-      ir->lod_info.sample_index->accept(this);
-      sample_index = this->result;
-
-      if (devinfo->gen >= 7 &&
-          key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
-         mcs = emit_mcs_fetch(coordinate, ir->coordinate->type->vector_elements,
-                              sampler_reg);
-      } else {
-         mcs = fs_reg(0u);
-      }
-      break;
-   default:
-      unreachable("Unrecognized texture opcode");
-   };
-
-   int gather_component = 0;
-   if (ir->op == ir_tg4)
-      gather_component = ir->lod_info.component->as_constant()->value.i[0];
-
-   bool is_rect =
-      ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT;
-
-   bool is_cube_array =
-      ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
-      ir->sampler->type->sampler_array;
-
-   emit_texture(ir->op, ir->type, coordinate, coord_components,
-                shadow_comparitor, lod, lod2, grad_components,
-                sample_index, offset_value, mcs,
-                gather_component, is_cube_array, is_rect, sampler,
-                sampler_reg, texunit);
-}
-
 /**
  * Apply workarounds for Gen6 gather with UINT/SINT
  */
@@ -2422,19 +356,19 @@
    for (int i = 0; i < 4; i++) {
       fs_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F);
       /* Convert from UNORM to UINT */
-      emit(MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1))));
-      emit(MOV(dst, dst_f));
+      bld.MUL(dst_f, dst_f, fs_reg((float)((1 << width) - 1)));
+      bld.MOV(dst, dst_f);
 
       if (wa & WA_SIGN) {
          /* Reinterpret the UINT value as a signed INT value by
           * shifting the sign bit into place, then shifting back
           * preserving sign.
           */
-         emit(SHL(dst, dst, fs_reg(32 - width)));
-         emit(ASR(dst, dst, fs_reg(32 - width)));
+         bld.SHL(dst, dst, fs_reg(32 - width));
+         bld.ASR(dst, dst, fs_reg(32 - width));
       }
 
-      dst = offset(dst, 1);
+      dst = offset(dst, bld, 1);
    }
 }
 
@@ -2461,493 +395,50 @@
    }
 }
 
-/**
- * Swizzle the result of a texture result.  This is necessary for
- * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
- */
-void
-fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components,
-                           fs_reg orig_val, uint32_t sampler)
-{
-   if (op == ir_query_levels) {
-      /* # levels is in .w */
-      this->result = offset(orig_val, 3);
-      return;
-   }
-
-   this->result = orig_val;
-
-   /* txs,lod don't actually sample the texture, so swizzling the result
-    * makes no sense.
-    */
-   if (op == ir_txs || op == ir_lod || op == ir_tg4)
-      return;
-
-   if (dest_components == 1) {
-      /* Ignore DEPTH_TEXTURE_MODE swizzling. */
-   } else if (key_tex->swizzles[sampler] != SWIZZLE_NOOP) {
-      fs_reg swizzled_result = vgrf(glsl_type::vec4_type);
-      swizzled_result.type = orig_val.type;
-
-      for (int i = 0; i < 4; i++) {
-	 int swiz = GET_SWZ(key_tex->swizzles[sampler], i);
-	 fs_reg l = swizzled_result;
-	 l = offset(l, i);
-
-	 if (swiz == SWIZZLE_ZERO) {
-	    emit(MOV(l, fs_reg(0.0f)));
-	 } else if (swiz == SWIZZLE_ONE) {
-	    emit(MOV(l, fs_reg(1.0f)));
-	 } else {
-            emit(MOV(l, offset(orig_val,
-                               GET_SWZ(key_tex->swizzles[sampler], i))));
-	 }
-      }
-      this->result = swizzled_result;
-   }
-}
-
-void
-fs_visitor::visit(ir_swizzle *ir)
-{
-   ir->val->accept(this);
-   fs_reg val = this->result;
-
-   if (ir->type->vector_elements == 1) {
-      this->result = offset(this->result, ir->mask.x);
-      return;
-   }
-
-   fs_reg result = vgrf(ir->type);
-   this->result = result;
-
-   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
-      fs_reg channel = val;
-      int swiz = 0;
-
-      switch (i) {
-      case 0:
-	 swiz = ir->mask.x;
-	 break;
-      case 1:
-	 swiz = ir->mask.y;
-	 break;
-      case 2:
-	 swiz = ir->mask.z;
-	 break;
-      case 3:
-	 swiz = ir->mask.w;
-	 break;
-      }
-
-      emit(MOV(result, offset(channel, swiz)));
-      result = offset(result, 1);
-   }
-}
-
-void
-fs_visitor::visit(ir_discard *ir)
-{
-   /* We track our discarded pixels in f0.1.  By predicating on it, we can
-    * update just the flag bits that aren't yet discarded.  If there's no
-    * condition, we emit a CMP of g0 != g0, so all currently executing
-    * channels will get turned off.
-    */
-   fs_inst *cmp;
-   if (ir->condition) {
-      emit_bool_to_cond_code(ir->condition);
-      cmp = (fs_inst *) this->instructions.get_tail();
-      cmp->conditional_mod = brw_negate_cmod(cmp->conditional_mod);
-   } else {
-      fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
-                                      BRW_REGISTER_TYPE_UW));
-      cmp = emit(CMP(reg_null_f, some_reg, some_reg, BRW_CONDITIONAL_NZ));
-   }
-   cmp->predicate = BRW_PREDICATE_NORMAL;
-   cmp->flag_subreg = 1;
-
-   if (devinfo->gen >= 6) {
-      emit_discard_jump();
-   }
-}
-
-void
-fs_visitor::visit(ir_constant *ir)
-{
-   /* Set this->result to reg at the bottom of the function because some code
-    * paths will cause this visitor to be applied to other fields.  This will
-    * cause the value stored in this->result to be modified.
-    *
-    * Make reg constant so that it doesn't get accidentally modified along the
-    * way.  Yes, I actually had this problem. :(
-    */
-   const fs_reg reg = vgrf(ir->type);
-   fs_reg dst_reg = reg;
-
-   if (ir->type->is_array()) {
-      const unsigned size = type_size(ir->type->fields.array);
-
-      for (unsigned i = 0; i < ir->type->length; i++) {
-	 ir->array_elements[i]->accept(this);
-	 fs_reg src_reg = this->result;
-
-	 dst_reg.type = src_reg.type;
-	 for (unsigned j = 0; j < size; j++) {
-	    emit(MOV(dst_reg, src_reg));
-	    src_reg = offset(src_reg, 1);
-	    dst_reg = offset(dst_reg, 1);
-	 }
-      }
-   } else if (ir->type->is_record()) {
-      foreach_in_list(ir_constant, field, &ir->components) {
-	 const unsigned size = type_size(field->type);
-
-	 field->accept(this);
-	 fs_reg src_reg = this->result;
-
-	 dst_reg.type = src_reg.type;
-	 for (unsigned j = 0; j < size; j++) {
-	    emit(MOV(dst_reg, src_reg));
-	    src_reg = offset(src_reg, 1);
-	    dst_reg = offset(dst_reg, 1);
-	 }
-      }
-   } else {
-      const unsigned size = type_size(ir->type);
-
-      for (unsigned i = 0; i < size; i++) {
-	 switch (ir->type->base_type) {
-	 case GLSL_TYPE_FLOAT:
-	    emit(MOV(dst_reg, fs_reg(ir->value.f[i])));
-	    break;
-	 case GLSL_TYPE_UINT:
-	    emit(MOV(dst_reg, fs_reg(ir->value.u[i])));
-	    break;
-	 case GLSL_TYPE_INT:
-	    emit(MOV(dst_reg, fs_reg(ir->value.i[i])));
-	    break;
-	 case GLSL_TYPE_BOOL:
-            emit(MOV(dst_reg, fs_reg(ir->value.b[i] != 0 ? ~0 : 0)));
-	    break;
-	 default:
-	    unreachable("Non-float/uint/int/bool constant");
-	 }
-	 dst_reg = offset(dst_reg, 1);
-      }
-   }
-
-   this->result = reg;
-}
-
-void
-fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
-{
-   ir_expression *expr = ir->as_expression();
-
-   if (!expr || expr->operation == ir_binop_ubo_load) {
-      ir->accept(this);
-
-      fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1)));
-      inst->conditional_mod = BRW_CONDITIONAL_NZ;
-      return;
-   }
-
-   fs_reg op[3];
-
-   assert(expr->get_num_operands() <= 3);
-   for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
-      assert(expr->operands[i]->type->is_scalar());
-
-      expr->operands[i]->accept(this);
-      op[i] = this->result;
-
-      resolve_ud_negate(&op[i]);
-   }
-
-   emit_bool_to_cond_code_of_reg(expr, op);
-}
-
-void
-fs_visitor::emit_bool_to_cond_code_of_reg(ir_expression *expr, fs_reg op[3])
-{
-   fs_inst *inst;
-
-   switch (expr->operation) {
-   case ir_unop_logic_not:
-      inst = emit(AND(reg_null_d, op[0], fs_reg(1)));
-      inst->conditional_mod = BRW_CONDITIONAL_Z;
-      break;
-
-   case ir_binop_logic_xor:
-      if (devinfo->gen <= 5) {
-         fs_reg temp = vgrf(expr->type);
-         emit(XOR(temp, op[0], op[1]));
-         inst = emit(AND(reg_null_d, temp, fs_reg(1)));
-      } else {
-         inst = emit(XOR(reg_null_d, op[0], op[1]));
-      }
-      inst->conditional_mod = BRW_CONDITIONAL_NZ;
-      break;
-
-   case ir_binop_logic_or:
-      if (devinfo->gen <= 5) {
-         fs_reg temp = vgrf(expr->type);
-         emit(OR(temp, op[0], op[1]));
-         inst = emit(AND(reg_null_d, temp, fs_reg(1)));
-      } else {
-         inst = emit(OR(reg_null_d, op[0], op[1]));
-      }
-      inst->conditional_mod = BRW_CONDITIONAL_NZ;
-      break;
-
-   case ir_binop_logic_and:
-      if (devinfo->gen <= 5) {
-         fs_reg temp = vgrf(expr->type);
-         emit(AND(temp, op[0], op[1]));
-         inst = emit(AND(reg_null_d, temp, fs_reg(1)));
-      } else {
-         inst = emit(AND(reg_null_d, op[0], op[1]));
-      }
-      inst->conditional_mod = BRW_CONDITIONAL_NZ;
-      break;
-
-   case ir_unop_f2b:
-      if (devinfo->gen >= 6) {
-         emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
-      } else {
-         inst = emit(MOV(reg_null_f, op[0]));
-         inst->conditional_mod = BRW_CONDITIONAL_NZ;
-      }
-      break;
-
-   case ir_unop_i2b:
-      if (devinfo->gen >= 6) {
-         emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
-      } else {
-         inst = emit(MOV(reg_null_d, op[0]));
-         inst->conditional_mod = BRW_CONDITIONAL_NZ;
-      }
-      break;
-
-   case ir_binop_greater:
-   case ir_binop_gequal:
-   case ir_binop_less:
-   case ir_binop_lequal:
-   case ir_binop_equal:
-   case ir_binop_all_equal:
-   case ir_binop_nequal:
-   case ir_binop_any_nequal:
-      if (devinfo->gen <= 5) {
-         resolve_bool_comparison(expr->operands[0], &op[0]);
-         resolve_bool_comparison(expr->operands[1], &op[1]);
-      }
-
-      emit(CMP(reg_null_d, op[0], op[1],
-               brw_conditional_for_comparison(expr->operation)));
-      break;
-
-   case ir_triop_csel: {
-      /* Expand the boolean condition into the flag register. */
-      inst = emit(MOV(reg_null_d, op[0]));
-      inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
-      /* Select which boolean to return. */
-      fs_reg temp = vgrf(expr->operands[1]->type);
-      inst = emit(SEL(temp, op[1], op[2]));
-      inst->predicate = BRW_PREDICATE_NORMAL;
-
-      /* Expand the result to a condition code. */
-      inst = emit(MOV(reg_null_d, temp));
-      inst->conditional_mod = BRW_CONDITIONAL_NZ;
-      break;
-   }
-
-   default:
-      unreachable("not reached");
-   }
-}
-
-/**
- * Emit a gen6 IF statement with the comparison folded into the IF
- * instruction.
- */
-void
-fs_visitor::emit_if_gen6(ir_if *ir)
-{
-   ir_expression *expr = ir->condition->as_expression();
-
-   if (expr && expr->operation != ir_binop_ubo_load) {
-      fs_reg op[3];
-      fs_inst *inst;
-      fs_reg temp;
-
-      assert(expr->get_num_operands() <= 3);
-      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
-	 assert(expr->operands[i]->type->is_scalar());
-
-	 expr->operands[i]->accept(this);
-	 op[i] = this->result;
-      }
-
-      switch (expr->operation) {
-      case ir_unop_logic_not:
-         emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_Z));
-         return;
-
-      case ir_binop_logic_xor:
-         emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
-         return;
-
-      case ir_binop_logic_or:
-         temp = vgrf(glsl_type::bool_type);
-         emit(OR(temp, op[0], op[1]));
-         emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ));
-         return;
-
-      case ir_binop_logic_and:
-         temp = vgrf(glsl_type::bool_type);
-         emit(AND(temp, op[0], op[1]));
-         emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ));
-         return;
-
-      case ir_unop_f2b:
-	 inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0));
-	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
-	 return;
-
-      case ir_unop_i2b:
-	 emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
-	 return;
-
-      case ir_binop_greater:
-      case ir_binop_gequal:
-      case ir_binop_less:
-      case ir_binop_lequal:
-      case ir_binop_equal:
-      case ir_binop_all_equal:
-      case ir_binop_nequal:
-      case ir_binop_any_nequal:
-         if (devinfo->gen <= 5) {
-            resolve_bool_comparison(expr->operands[0], &op[0]);
-            resolve_bool_comparison(expr->operands[1], &op[1]);
-         }
-
-	 emit(IF(op[0], op[1],
-                 brw_conditional_for_comparison(expr->operation)));
-	 return;
-
-      case ir_triop_csel: {
-         /* Expand the boolean condition into the flag register. */
-         fs_inst *inst = emit(MOV(reg_null_d, op[0]));
-         inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
-         /* Select which boolean to use as the result. */
-         fs_reg temp = vgrf(expr->operands[1]->type);
-         inst = emit(SEL(temp, op[1], op[2]));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-
-	 emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ));
-	 return;
-      }
-
-      default:
-	 unreachable("not reached");
-      }
-   }
-
-   ir->condition->accept(this);
-   emit(IF(this->result, fs_reg(0), BRW_CONDITIONAL_NZ));
-}
-
-bool
-fs_visitor::try_opt_frontfacing_ternary(ir_if *ir)
+/**
+ * Swizzle the result of a texture result.  This is necessary for
+ * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
+ */
+void
+fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components,
+                           fs_reg orig_val, uint32_t sampler)
 {
-   ir_dereference_variable *deref = ir->condition->as_dereference_variable();
-   if (!deref || strcmp(deref->var->name, "gl_FrontFacing") != 0)
-      return false;
-
-   if (ir->then_instructions.length() != 1 ||
-       ir->else_instructions.length() != 1)
-      return false;
-
-   ir_assignment *then_assign =
-         ((ir_instruction *)ir->then_instructions.head)->as_assignment();
-   ir_assignment *else_assign =
-         ((ir_instruction *)ir->else_instructions.head)->as_assignment();
-
-   if (!then_assign || then_assign->condition ||
-       !else_assign || else_assign->condition ||
-       then_assign->write_mask != else_assign->write_mask ||
-       !then_assign->lhs->equals(else_assign->lhs))
-      return false;
-
-   ir_constant *then_rhs = then_assign->rhs->as_constant();
-   ir_constant *else_rhs = else_assign->rhs->as_constant();
-
-   if (!then_rhs || !else_rhs)
-      return false;
-
-   if (then_rhs->type->base_type != GLSL_TYPE_FLOAT)
-      return false;
-
-   if ((then_rhs->is_one() && else_rhs->is_negative_one()) ||
-       (else_rhs->is_one() && then_rhs->is_negative_one())) {
-      then_assign->lhs->accept(this);
-      fs_reg dst = this->result;
-      dst.type = BRW_REGISTER_TYPE_D;
-      fs_reg tmp = vgrf(glsl_type::int_type);
-
-      if (devinfo->gen >= 6) {
-         /* Bit 15 of g0.0 is 0 if the polygon is front facing. */
-         fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W));
-
-         /* For (gl_FrontFacing ? 1.0 : -1.0), emit:
-          *
-          *    or(8)  tmp.1<2>W  g0.0<0,1,0>W  0x00003f80W
-          *    and(8) dst<1>D    tmp<8,8,1>D   0xbf800000D
-          *
-          * and negate g0.0<0,1,0>W for (gl_FrontFacing ? -1.0 : 1.0).
-          */
+   if (op == ir_query_levels) {
+      /* # levels is in .w */
+      this->result = offset(orig_val, bld, 3);
+      return;
+   }
 
-         if (then_rhs->is_negative_one()) {
-            assert(else_rhs->is_one());
-            g0.negate = true;
-         }
+   this->result = orig_val;
 
-         tmp.type = BRW_REGISTER_TYPE_W;
-         tmp.subreg_offset = 2;
-         tmp.stride = 2;
-
-         fs_inst *or_inst = emit(OR(tmp, g0, fs_reg(0x3f80)));
-         or_inst->src[1].type = BRW_REGISTER_TYPE_UW;
-
-         tmp.type = BRW_REGISTER_TYPE_D;
-         tmp.subreg_offset = 0;
-         tmp.stride = 1;
-      } else {
-         /* Bit 31 of g1.6 is 0 if the polygon is front facing. */
-         fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));
+   /* txs,lod don't actually sample the texture, so swizzling the result
+    * makes no sense.
+    */
+   if (op == ir_txs || op == ir_lod || op == ir_tg4)
+      return;
 
-         /* For (gl_FrontFacing ? 1.0 : -1.0), emit:
-          *
-          *    or(8)  tmp<1>D  g1.6<0,1,0>D  0x3f800000D
-          *    and(8) dst<1>D  tmp<8,8,1>D   0xbf800000D
-          *
-          * and negate g1.6<0,1,0>D for (gl_FrontFacing ? -1.0 : 1.0).
-          */
+   if (dest_components == 1) {
+      /* Ignore DEPTH_TEXTURE_MODE swizzling. */
+   } else if (key_tex->swizzles[sampler] != SWIZZLE_NOOP) {
+      fs_reg swizzled_result = vgrf(glsl_type::vec4_type);
+      swizzled_result.type = orig_val.type;
 
-         if (then_rhs->is_negative_one()) {
-            assert(else_rhs->is_one());
-            g1_6.negate = true;
-         }
+      for (int i = 0; i < 4; i++) {
+	 int swiz = GET_SWZ(key_tex->swizzles[sampler], i);
+	 fs_reg l = swizzled_result;
+	 l = offset(l, bld, i);
 
-         emit(OR(tmp, g1_6, fs_reg(0x3f800000)));
+	 if (swiz == SWIZZLE_ZERO) {
+            bld.MOV(l, fs_reg(0.0f));
+	 } else if (swiz == SWIZZLE_ONE) {
+            bld.MOV(l, fs_reg(1.0f));
+	 } else {
+            bld.MOV(l, offset(orig_val, bld,
+                                  GET_SWZ(key_tex->swizzles[sampler], i)));
+	 }
       }
-      emit(AND(dst, tmp, fs_reg(0xbf800000)));
-      return true;
+      this->result = swizzled_result;
    }
-
-   return false;
 }
 
 /**
@@ -3016,21 +507,21 @@
       if (src0.file == IMM) {
          src0 = vgrf(glsl_type::float_type);
          src0.type = then_mov->src[0].type;
-         emit(MOV(src0, then_mov->src[0]));
+         bld.MOV(src0, then_mov->src[0]);
       }
 
-      fs_inst *sel;
       if (if_inst->conditional_mod) {
          /* Sandybridge-specific IF with embedded comparison */
-         emit(CMP(reg_null_d, if_inst->src[0], if_inst->src[1],
-                  if_inst->conditional_mod));
-         sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]);
-         sel->predicate = BRW_PREDICATE_NORMAL;
+         bld.CMP(bld.null_reg_d(), if_inst->src[0], if_inst->src[1],
+                 if_inst->conditional_mod);
+         set_predicate(BRW_PREDICATE_NORMAL,
+                       bld.emit(BRW_OPCODE_SEL, then_mov->dst,
+                                src0, else_mov->src[0]));
       } else {
          /* Separate CMP and IF instructions */
-         sel = emit(BRW_OPCODE_SEL, then_mov->dst, src0, else_mov->src[0]);
-         sel->predicate = if_inst->predicate;
-         sel->predicate_inverse = if_inst->predicate_inverse;
+         set_predicate_inv(if_inst->predicate, if_inst->predicate_inverse,
+                           bld.emit(BRW_OPCODE_SEL, then_mov->dst,
+                                    src0, else_mov->src[0]));
       }
 
       return true;
@@ -3039,314 +530,6 @@
    return false;
 }
 
-void
-fs_visitor::visit(ir_if *ir)
-{
-   if (try_opt_frontfacing_ternary(ir))
-      return;
-
-   /* Don't point the annotation at the if statement, because then it plus
-    * the then and else blocks get printed.
-    */
-   this->base_ir = ir->condition;
-
-   if (devinfo->gen == 6) {
-      emit_if_gen6(ir);
-   } else {
-      emit_bool_to_cond_code(ir->condition);
-
-      emit(IF(BRW_PREDICATE_NORMAL));
-   }
-
-   foreach_in_list(ir_instruction, ir_, &ir->then_instructions) {
-      this->base_ir = ir_;
-      ir_->accept(this);
-   }
-
-   if (!ir->else_instructions.is_empty()) {
-      emit(BRW_OPCODE_ELSE);
-
-      foreach_in_list(ir_instruction, ir_, &ir->else_instructions) {
-	 this->base_ir = ir_;
-	 ir_->accept(this);
-      }
-   }
-
-   emit(BRW_OPCODE_ENDIF);
-
-   if (!try_replace_with_sel() && devinfo->gen < 6) {
-      no16("Can't support (non-uniform) control flow on SIMD16\n");
-   }
-}
-
-void
-fs_visitor::visit(ir_loop *ir)
-{
-   if (devinfo->gen < 6) {
-      no16("Can't support (non-uniform) control flow on SIMD16\n");
-   }
-
-   this->base_ir = NULL;
-   emit(BRW_OPCODE_DO);
-
-   foreach_in_list(ir_instruction, ir_, &ir->body_instructions) {
-      this->base_ir = ir_;
-      ir_->accept(this);
-   }
-
-   this->base_ir = NULL;
-   emit(BRW_OPCODE_WHILE);
-}
-
-void
-fs_visitor::visit(ir_loop_jump *ir)
-{
-   switch (ir->mode) {
-   case ir_loop_jump::jump_break:
-      emit(BRW_OPCODE_BREAK);
-      break;
-   case ir_loop_jump::jump_continue:
-      emit(BRW_OPCODE_CONTINUE);
-      break;
-   }
-}
-
-void
-fs_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
-{
-   ir_dereference *deref = static_cast<ir_dereference *>(
-      ir->actual_parameters.get_head());
-   ir_variable *location = deref->variable_referenced();
-   unsigned surf_index = (stage_prog_data->binding_table.abo_start +
-                          location->data.binding);
-
-   /* Calculate the surface offset */
-   fs_reg offset = vgrf(glsl_type::uint_type);
-   ir_dereference_array *deref_array = deref->as_dereference_array();
-
-   if (deref_array) {
-      deref_array->array_index->accept(this);
-
-      fs_reg tmp = vgrf(glsl_type::uint_type);
-      emit(MUL(tmp, this->result, fs_reg(ATOMIC_COUNTER_SIZE)));
-      emit(ADD(offset, tmp, fs_reg(location->data.atomic.offset)));
-   } else {
-      offset = fs_reg(location->data.atomic.offset);
-   }
-
-   /* Emit the appropriate machine instruction */
-   const char *callee = ir->callee->function_name();
-   ir->return_deref->accept(this);
-   fs_reg dst = this->result;
-
-   if (!strcmp("__intrinsic_atomic_read", callee)) {
-      emit_untyped_surface_read(surf_index, dst, offset);
-
-   } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
-      emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
-                          fs_reg(), fs_reg());
-
-   } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
-      emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
-                          fs_reg(), fs_reg());
-   }
-}
-
-void
-fs_visitor::visit(ir_call *ir)
-{
-   const char *callee = ir->callee->function_name();
-
-   if (!strcmp("__intrinsic_atomic_read", callee) ||
-       !strcmp("__intrinsic_atomic_increment", callee) ||
-       !strcmp("__intrinsic_atomic_predecrement", callee)) {
-      visit_atomic_counter_intrinsic(ir);
-   } else {
-      unreachable("Unsupported intrinsic.");
-   }
-}
-
-void
-fs_visitor::visit(ir_return *)
-{
-   unreachable("FINISHME");
-}
-
-void
-fs_visitor::visit(ir_function *ir)
-{
-   /* Ignore function bodies other than main() -- we shouldn't see calls to
-    * them since they should all be inlined before we get to ir_to_mesa.
-    */
-   if (strcmp(ir->name, "main") == 0) {
-      const ir_function_signature *sig;
-      exec_list empty;
-
-      sig = ir->matching_signature(NULL, &empty, false);
-
-      assert(sig);
-
-      foreach_in_list(ir_instruction, ir_, &sig->body) {
-	 this->base_ir = ir_;
-	 ir_->accept(this);
-      }
-   }
-}
-
-void
-fs_visitor::visit(ir_function_signature *)
-{
-   unreachable("not reached");
-}
-
-void
-fs_visitor::visit(ir_emit_vertex *)
-{
-   unreachable("not reached");
-}
-
-void
-fs_visitor::visit(ir_end_primitive *)
-{
-   unreachable("not reached");
-}
-
-void
-fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
-                                fs_reg dst, fs_reg offset, fs_reg src0,
-                                fs_reg src1)
-{
-   int reg_width = dispatch_width / 8;
-   int length = 0;
-
-   fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 4);
-
-   sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
-   /* Initialize the sample mask in the message header. */
-   emit(MOV(sources[0], fs_reg(0u)))
-      ->force_writemask_all = true;
-
-   if (stage == MESA_SHADER_FRAGMENT) {
-      if (((brw_wm_prog_data*)this->prog_data)->uses_kill) {
-         emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1)))
-            ->force_writemask_all = true;
-      } else {
-         emit(MOV(component(sources[0], 7),
-                  retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))
-            ->force_writemask_all = true;
-      }
-   } else {
-      /* The execution mask is part of the side-band information sent together with
-       * the message payload to the data port. It's implicitly ANDed with the sample
-       * mask sent in the header to compute the actual set of channels that execute
-       * the atomic operation.
-       */
-      assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_COMPUTE);
-      emit(MOV(component(sources[0], 7),
-               fs_reg(0xffffu)))->force_writemask_all = true;
-   }
-   length++;
-
-   /* Set the atomic operation offset. */
-   sources[1] = vgrf(glsl_type::uint_type);
-   emit(MOV(sources[1], offset));
-   length++;
-
-   /* Set the atomic operation arguments. */
-   if (src0.file != BAD_FILE) {
-      sources[length] = vgrf(glsl_type::uint_type);
-      emit(MOV(sources[length], src0));
-      length++;
-   }
-
-   if (src1.file != BAD_FILE) {
-      sources[length] = vgrf(glsl_type::uint_type);
-      emit(MOV(sources[length], src1));
-      length++;
-   }
-
-   int mlen = 1 + (length - 1) * reg_width;
-   fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
-                               BRW_REGISTER_TYPE_UD, dispatch_width);
-   emit(LOAD_PAYLOAD(src_payload, sources, length, 1));
-
-   /* Emit the instruction. */
-   fs_inst *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload,
-                        fs_reg(surf_index), fs_reg(atomic_op));
-   inst->mlen = mlen;
-}
-
-void
-fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
-                                      fs_reg offset)
-{
-   int reg_width = dispatch_width / 8;
-
-   fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2);
-
-   sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
-   /* Initialize the sample mask in the message header. */
-   emit(MOV(sources[0], fs_reg(0u)))
-      ->force_writemask_all = true;
-
-   if (stage == MESA_SHADER_FRAGMENT) {
-      if (((brw_wm_prog_data*)this->prog_data)->uses_kill) {
-         emit(MOV(component(sources[0], 7), brw_flag_reg(0, 1)))
-            ->force_writemask_all = true;
-      } else {
-         emit(MOV(component(sources[0], 7),
-                  retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)))
-            ->force_writemask_all = true;
-      }
-   } else {
-      /* The execution mask is part of the side-band information sent together with
-       * the message payload to the data port. It's implicitly ANDed with the sample
-       * mask sent in the header to compute the actual set of channels that execute
-       * the atomic operation.
-       */
-      assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_COMPUTE);
-      emit(MOV(component(sources[0], 7),
-               fs_reg(0xffffu)))->force_writemask_all = true;
-   }
-
-   /* Set the surface read offset. */
-   sources[1] = vgrf(glsl_type::uint_type);
-   emit(MOV(sources[1], offset));
-
-   int mlen = 1 + reg_width;
-   fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
-                               BRW_REGISTER_TYPE_UD, dispatch_width);
-   fs_inst *inst = emit(LOAD_PAYLOAD(src_payload, sources, 2, 1));
-
-   /* Emit the instruction. */
-   inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_payload,
-               fs_reg(surf_index), fs_reg(1));
-   inst->mlen = mlen;
-}
-
-fs_inst *
-fs_visitor::emit(fs_inst *inst)
-{
-   if (dispatch_width == 16 && inst->exec_size == 8)
-      inst->force_uncompressed = true;
-
-   inst->annotation = this->current_annotation;
-   inst->ir = this->base_ir;
-
-   this->instructions.push_tail(inst);
-
-   return inst;
-}
-
-void
-fs_visitor::emit(exec_list list)
-{
-   foreach_in_list_safe(fs_inst, inst, &list) {
-      inst->exec_node::remove();
-      emit(inst);
-   }
-}
-
 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
 void
 fs_visitor::emit_dummy_fs()
@@ -3356,12 +539,12 @@
    /* Everyone's favorite color. */
    const float color[4] = { 1.0, 0.0, 1.0, 0.0 };
    for (int i = 0; i < 4; i++) {
-      emit(MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F,
-                      dispatch_width), fs_reg(color[i])));
+      bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F),
+              fs_reg(color[i]));
    }
 
    fs_inst *write;
-   write = emit(FS_OPCODE_FB_WRITE);
+   write = bld.emit(FS_OPCODE_FB_WRITE);
    write->eot = true;
    if (devinfo->gen >= 6) {
       write->base_mrf = 2;
@@ -3414,19 +597,19 @@
 {
    struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
 
-   this->current_annotation = "compute pixel centers";
+   fs_builder abld = bld.annotate("compute pixel centers");
    this->pixel_x = vgrf(glsl_type::uint_type);
    this->pixel_y = vgrf(glsl_type::uint_type);
    this->pixel_x.type = BRW_REGISTER_TYPE_UW;
    this->pixel_y.type = BRW_REGISTER_TYPE_UW;
-   emit(ADD(this->pixel_x,
+   abld.ADD(this->pixel_x,
             fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
-            fs_reg(brw_imm_v(0x10101010))));
-   emit(ADD(this->pixel_y,
+            fs_reg(brw_imm_v(0x10101010)));
+   abld.ADD(this->pixel_y,
             fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
-            fs_reg(brw_imm_v(0x11001100))));
+            fs_reg(brw_imm_v(0x11001100)));
 
-   this->current_annotation = "compute pixel deltas from v0";
+   abld = bld.annotate("compute pixel deltas from v0");
 
    this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
       vgrf(glsl_type::vec2_type);
@@ -3435,27 +618,27 @@
    const fs_reg ystart(negate(brw_vec1_grf(1, 1)));
 
    if (devinfo->has_pln && dispatch_width == 16) {
-      emit(ADD(half(offset(delta_xy, 0), 0), half(this->pixel_x, 0), xstart));
-      emit(ADD(half(offset(delta_xy, 0), 1), half(this->pixel_y, 0), ystart));
-      emit(ADD(half(offset(delta_xy, 1), 0), half(this->pixel_x, 1), xstart))
-         ->force_sechalf = true;
-      emit(ADD(half(offset(delta_xy, 1), 1), half(this->pixel_y, 1), ystart))
-         ->force_sechalf = true;
+      for (unsigned i = 0; i < 2; i++) {
+         abld.half(i).ADD(half(offset(delta_xy, abld, i), 0),
+                          half(this->pixel_x, i), xstart);
+         abld.half(i).ADD(half(offset(delta_xy, abld, i), 1),
+                          half(this->pixel_y, i), ystart);
+      }
    } else {
-      emit(ADD(offset(delta_xy, 0), this->pixel_x, xstart));
-      emit(ADD(offset(delta_xy, 1), this->pixel_y, ystart));
+      abld.ADD(offset(delta_xy, abld, 0), this->pixel_x, xstart);
+      abld.ADD(offset(delta_xy, abld, 1), this->pixel_y, ystart);
    }
 
-   this->current_annotation = "compute pos.w and 1/pos.w";
+   abld = bld.annotate("compute pos.w and 1/pos.w");
    /* Compute wpos.w.  It's always in our setup, since it's needed to
     * interpolate the other attributes.
     */
    this->wpos_w = vgrf(glsl_type::float_type);
-   emit(FS_OPCODE_LINTERP, wpos_w, delta_xy, interp_reg(VARYING_SLOT_POS, 3));
+   abld.emit(FS_OPCODE_LINTERP, wpos_w, delta_xy,
+             interp_reg(VARYING_SLOT_POS, 3));
    /* Compute the pixel 1/W value from wpos.w. */
    this->pixel_w = vgrf(glsl_type::float_type);
-   emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
-   this->current_annotation = NULL;
+   abld.emit(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
 }
 
 /** Emits the interpolation for the varying inputs. */
@@ -3464,8 +647,8 @@
 {
    struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
 
-   this->current_annotation = "compute pixel centers";
-   if (brw->gen >= 8 || dispatch_width == 8) {
+   fs_builder abld = bld.annotate("compute pixel centers");
+   if (devinfo->gen >= 8 || dispatch_width == 8) {
       /* The "Register Region Restrictions" page says for BDW (and newer,
        * presumably):
        *
@@ -3477,16 +660,17 @@
        * compute our pixel centers.
        */
       fs_reg int_pixel_xy(GRF, alloc.allocate(dispatch_width / 8),
-                          BRW_REGISTER_TYPE_UW, dispatch_width * 2);
-      emit(ADD(int_pixel_xy,
+                          BRW_REGISTER_TYPE_UW);
+
+      const fs_builder dbld = abld.exec_all().group(dispatch_width * 2, 0);
+      dbld.ADD(int_pixel_xy,
                fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)),
-               fs_reg(brw_imm_v(0x11001010))))
-         ->force_writemask_all = true;
+               fs_reg(brw_imm_v(0x11001010)));
 
       this->pixel_x = vgrf(glsl_type::float_type);
       this->pixel_y = vgrf(glsl_type::float_type);
-      emit(FS_OPCODE_PIXEL_X, this->pixel_x, int_pixel_xy);
-      emit(FS_OPCODE_PIXEL_Y, this->pixel_y, int_pixel_xy);
+      abld.emit(FS_OPCODE_PIXEL_X, this->pixel_x, int_pixel_xy);
+      abld.emit(FS_OPCODE_PIXEL_Y, this->pixel_y, int_pixel_xy);
    } else {
       /* The "Register Region Restrictions" page says for SNB, IVB, HSW:
        *
@@ -3500,12 +684,12 @@
       fs_reg int_pixel_y = vgrf(glsl_type::uint_type);
       int_pixel_x.type = BRW_REGISTER_TYPE_UW;
       int_pixel_y.type = BRW_REGISTER_TYPE_UW;
-      emit(ADD(int_pixel_x,
+      abld.ADD(int_pixel_x,
                fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
-               fs_reg(brw_imm_v(0x10101010))));
-      emit(ADD(int_pixel_y,
+               fs_reg(brw_imm_v(0x10101010)));
+      abld.ADD(int_pixel_y,
                fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
-               fs_reg(brw_imm_v(0x11001100))));
+               fs_reg(brw_imm_v(0x11001100)));
 
       /* As of gen6, we can no longer mix float and int sources.  We have
        * to turn the integer pixel centers into floats for their actual
@@ -3513,48 +697,19 @@
        */
       this->pixel_x = vgrf(glsl_type::float_type);
       this->pixel_y = vgrf(glsl_type::float_type);
-      emit(MOV(this->pixel_x, int_pixel_x));
-      emit(MOV(this->pixel_y, int_pixel_y));
+      abld.MOV(this->pixel_x, int_pixel_x);
+      abld.MOV(this->pixel_y, int_pixel_y);
    }
 
-   this->current_annotation = "compute pos.w";
+   abld = bld.annotate("compute pos.w");
    this->pixel_w = fs_reg(brw_vec8_grf(payload.source_w_reg, 0));
    this->wpos_w = vgrf(glsl_type::float_type);
-   emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
+   abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
 
    for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
       uint8_t reg = payload.barycentric_coord_reg[i];
       this->delta_xy[i] = fs_reg(brw_vec16_grf(reg, 0));
    }
-
-   this->current_annotation = NULL;
-}
-
-void
-fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
-                                unsigned exec_size, bool use_2nd_half)
-{
-   brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-   fs_inst *inst;
-
-   if (key->clamp_fragment_color) {
-      fs_reg tmp = vgrf(glsl_type::vec4_type);
-      assert(color.type == BRW_REGISTER_TYPE_F);
-      for (unsigned i = 0; i < components; i++) {
-         inst = emit(MOV(offset(tmp, i), offset(color, i)));
-         inst->saturate = true;
-      }
-      color = tmp;
-   }
-
-   if (exec_size < dispatch_width) {
-      unsigned half_idx = use_2nd_half ? 1 : 0;
-      for (unsigned i = 0; i < components; i++)
-         dst[i] = half(offset(color, i), half_idx);
-   } else {
-      for (unsigned i = 0; i < components; i++)
-         dst[i] = offset(color, i);
-   }
 }
 
 static enum brw_conditional_mod
@@ -3587,7 +742,7 @@
 {
    assert(stage == MESA_SHADER_FRAGMENT);
    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-   this->current_annotation = "Alpha test";
+   const fs_builder abld = bld.annotate("Alpha test");
 
    fs_inst *cmp;
    if (key->alpha_test_func == GL_ALWAYS)
@@ -3597,168 +752,53 @@
       /* f0.1 = 0 */
       fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
                                       BRW_REGISTER_TYPE_UW));
-      cmp = emit(CMP(reg_null_f, some_reg, some_reg,
-                     BRW_CONDITIONAL_NEQ));
+      cmp = abld.CMP(bld.null_reg_f(), some_reg, some_reg,
+                     BRW_CONDITIONAL_NEQ);
    } else {
       /* RT0 alpha */
-      fs_reg color = offset(outputs[0], 3);
+      fs_reg color = offset(outputs[0], bld, 3);
 
       /* f0.1 &= func(color, ref) */
-      cmp = emit(CMP(reg_null_f, color, fs_reg(key->alpha_test_ref),
-                     cond_for_alpha_func(key->alpha_test_func)));
+      cmp = abld.CMP(bld.null_reg_f(), color, fs_reg(key->alpha_test_ref),
+                     cond_for_alpha_func(key->alpha_test_func));
    }
    cmp->predicate = BRW_PREDICATE_NORMAL;
    cmp->flag_subreg = 1;
 }
 
 fs_inst *
-fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
-                                 fs_reg src0_alpha, unsigned components,
-                                 unsigned exec_size, bool use_2nd_half)
+fs_visitor::emit_single_fb_write(const fs_builder &bld,
+                                 fs_reg color0, fs_reg color1,
+                                 fs_reg src0_alpha, unsigned components)
 {
    assert(stage == MESA_SHADER_FRAGMENT);
    brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
-   brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-
-   this->current_annotation = "FB write header";
-   int header_size = 2, payload_header_size;
-
-   /* We can potentially have a message length of up to 15, so we have to set
-    * base_mrf to either 0 or 1 in order to fit in m0..m15.
-    */
-   fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 15);
-   int length = 0;
-
-   /* From the Sandy Bridge PRM, volume 4, page 198:
-    *
-    *     "Dispatched Pixel Enables. One bit per pixel indicating
-    *      which pixels were originally enabled when the thread was
-    *      dispatched. This field is only required for the end-of-
-    *      thread message and on all dual-source messages."
-    */
-   if (devinfo->gen >= 6 &&
-       (devinfo->is_haswell || devinfo->gen >= 8 || !prog_data->uses_kill) &&
-       color1.file == BAD_FILE &&
-       key->nr_color_regions == 1) {
-      header_size = 0;
-   }
-
-   if (header_size != 0) {
-      assert(header_size == 2);
-      /* Allocate 2 registers for a header */
-      length += 2;
-   }
-
-   if (payload.aa_dest_stencil_reg) {
-      sources[length] = fs_reg(GRF, alloc.allocate(1));
-      emit(MOV(sources[length],
-               fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))));
-      length++;
-   }
-
-   prog_data->uses_omask =
-      prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
-   if (prog_data->uses_omask) {
-      this->current_annotation = "FB write oMask";
-      assert(this->sample_mask.file != BAD_FILE);
-      /* Hand over gl_SampleMask. Only lower 16 bits are relevant.  Since
-       * it's unsinged single words, one vgrf is always 16-wide.
-       */
-      sources[length] = fs_reg(GRF, alloc.allocate(1),
-                               BRW_REGISTER_TYPE_UW, 16);
-      emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask);
-      length++;
-   }
-
-   payload_header_size = length;
-
-   if (color0.file == BAD_FILE) {
-      /* Even if there's no color buffers enabled, we still need to send
-       * alpha out the pipeline to our null renderbuffer to support
-       * alpha-testing, alpha-to-coverage, and so on.
-       */
-      if (this->outputs[0].file != BAD_FILE)
-         setup_color_payload(&sources[length + 3], offset(this->outputs[0], 3),
-                             1, exec_size, false);
-      length += 4;
-   } else if (color1.file == BAD_FILE) {
-      if (src0_alpha.file != BAD_FILE) {
-         setup_color_payload(&sources[length], src0_alpha, 1, exec_size, false);
-         length++;
-      }
 
-      setup_color_payload(&sources[length], color0, components,
-                          exec_size, use_2nd_half);
-      length += 4;
-   } else {
-      setup_color_payload(&sources[length], color0, components,
-                          exec_size, use_2nd_half);
-      length += 4;
-      setup_color_payload(&sources[length], color1, components,
-                          exec_size, use_2nd_half);
-      length += 4;
-   }
+   /* Hand over gl_FragDepth or the payload depth. */
+   const fs_reg dst_depth = (payload.dest_depth_reg ?
+                             fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)) :
+                             fs_reg());
+   fs_reg src_depth;
 
    if (source_depth_to_render_target) {
-      if (devinfo->gen == 6) {
-	 /* For outputting oDepth on gen6, SIMD8 writes have to be
-	  * used.  This would require SIMD8 moves of each half to
-	  * message regs, kind of like pre-gen5 SIMD16 FB writes.
-	  * Just bail on doing so for now.
-	  */
-	 no16("Missing support for simd16 depth writes on gen6\n");
-      }
-
-      if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
-	 /* Hand over gl_FragDepth. */
-	 assert(this->frag_depth.file != BAD_FILE);
-         if (exec_size < dispatch_width) {
-            sources[length] = half(this->frag_depth, use_2nd_half);
-         } else {
-            sources[length] = this->frag_depth;
-         }
-      } else {
-	 /* Pass through the payload depth. */
-         sources[length] = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
-      }
-      length++;
+      if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+         src_depth = frag_depth;
+      else
+         src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
    }
 
-   if (payload.dest_depth_reg)
-      sources[length++] = fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0));
-
-   fs_inst *load;
-   fs_inst *write;
-   if (devinfo->gen >= 7) {
-      /* Send from the GRF */
-      fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F, exec_size);
-      load = emit(LOAD_PAYLOAD(payload, sources, length, payload_header_size));
-      payload.reg = alloc.allocate(load->regs_written);
-      load->dst = payload;
-      write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload);
-      write->base_mrf = -1;
-   } else {
-      /* Send from the MRF */
-      load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size),
-                               sources, length, payload_header_size));
-
-      /* On pre-SNB, we have to interlace the color values.  LOAD_PAYLOAD
-       * will do this for us if we just give it a COMPR4 destination.
-       */
-      if (brw->gen < 6 && exec_size == 16)
-         load->dst.reg |= BRW_MRF_COMPR4;
-
-      write = emit(FS_OPCODE_FB_WRITE);
-      write->exec_size = exec_size;
-      write->base_mrf = 1;
-   }
+   const fs_reg sources[] = {
+      color0, color1, src0_alpha, src_depth, dst_depth, sample_mask,
+      fs_reg(components)
+   };
+   fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
+                             sources, ARRAY_SIZE(sources));
 
-   write->mlen = load->regs_written;
-   write->header_size = header_size;
    if (prog_data->uses_kill) {
       write->predicate = BRW_PREDICATE_NORMAL;
       write->flag_subreg = 1;
    }
+
    return write;
 }
 
@@ -3770,35 +810,23 @@
    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
 
    fs_inst *inst = NULL;
-   if (do_dual_src) {
-      this->current_annotation = ralloc_asprintf(this->mem_ctx,
-						 "FB dual-source write");
-      inst = emit_single_fb_write(this->outputs[0], this->dual_src_output,
-                                  reg_undef, 4, 8);
-      inst->target = 0;
 
-      /* SIMD16 dual source blending requires to send two SIMD8 dual source
-       * messages, where each message contains color data for 8 pixels. Color
-       * data for the first group of pixels is stored in the "lower" half of
-       * the color registers, so in SIMD16, the previous message did:
-       * m + 0: r0
-       * m + 1: g0
-       * m + 2: b0
-       * m + 3: a0
-       *
-       * Here goes the second message, which packs color data for the
-       * remaining 8 pixels. Color data for these pixels is stored in the
-       * "upper" half of the color registers, so we need to do:
-       * m + 0: r1
-       * m + 1: g1
-       * m + 2: b1
-       * m + 3: a1
+   if (source_depth_to_render_target && devinfo->gen == 6) {
+      /* For outputting oDepth on gen6, SIMD8 writes have to be used.  This
+       * would require SIMD8 moves of each half to message regs, e.g. by using
+       * the SIMD lowering pass.  Unfortunately this is more difficult than it
+       * sounds because the SIMD8 single-source message lacks channel selects
+       * for the second and third subspans.
        */
-      if (dispatch_width == 16) {
-         inst = emit_single_fb_write(this->outputs[0], this->dual_src_output,
-                                     reg_undef, 4, 8, true);
-         inst->target = 0;
-      }
+      no16("Missing support for simd16 depth writes on gen6\n");
+   }
+
+   if (do_dual_src) {
+      const fs_builder abld = bld.annotate("FB dual-source write");
+
+      inst = emit_single_fb_write(abld, this->outputs[0],
+                                  this->dual_src_output, reg_undef, 4);
+      inst->target = 0;
 
       prog_data->dual_src_blend = true;
    } else {
@@ -3807,17 +835,16 @@
          if (this->outputs[target].file == BAD_FILE)
             continue;
 
-         this->current_annotation = ralloc_asprintf(this->mem_ctx,
-                                                    "FB write target %d",
-                                                    target);
+         const fs_builder abld = bld.annotate(
+            ralloc_asprintf(this->mem_ctx, "FB write target %d", target));
+
          fs_reg src0_alpha;
          if (devinfo->gen >= 6 && key->replicate_alpha && target != 0)
-            src0_alpha = offset(outputs[0], 3);
+            src0_alpha = offset(outputs[0], bld, 3);
 
-         inst = emit_single_fb_write(this->outputs[target], reg_undef,
+         inst = emit_single_fb_write(abld, this->outputs[target], reg_undef,
                                      src0_alpha,
-                                     this->output_components[target],
-                                     dispatch_width);
+                                     this->output_components[target]);
          inst->target = target;
       }
    }
@@ -3827,19 +854,24 @@
        * alpha out the pipeline to our null renderbuffer to support
        * alpha-testing, alpha-to-coverage, and so on.
        */
-      inst = emit_single_fb_write(reg_undef, reg_undef, reg_undef, 0,
-                                  dispatch_width);
+      /* FINISHME: Factor out this frequently recurring pattern into a
+       * helper function.
+       */
+      const fs_reg srcs[] = { reg_undef, reg_undef,
+                              reg_undef, offset(this->outputs[0], bld, 3) };
+      const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+      bld.LOAD_PAYLOAD(tmp, srcs, 4, 0);
+
+      inst = emit_single_fb_write(bld, tmp, reg_undef, reg_undef, 4);
       inst->target = 0;
    }
 
    inst->eot = true;
-   this->current_annotation = NULL;
 }
 
 void
-fs_visitor::setup_uniform_clipplane_values()
+fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
 {
-   gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
    const struct brw_vue_prog_key *key =
       (const struct brw_vue_prog_key *) this->key;
 
@@ -3853,13 +885,23 @@
    }
 }
 
-void fs_visitor::compute_clip_distance()
+/**
+ * Lower legacy fixed-function and gl_ClipVertex clipping to clip distances.
+ *
+ * This does nothing if the shader uses gl_ClipDistance or user clipping is
+ * disabled altogether.
+ */
+void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
 {
    struct brw_vue_prog_data *vue_prog_data =
       (struct brw_vue_prog_data *) prog_data;
    const struct brw_vue_prog_key *key =
       (const struct brw_vue_prog_key *) this->key;
 
+   /* Bail unless some sort of legacy clipping is enabled */
+   if (!key->userclip_active || prog->UsesClipDistanceOut)
+      return;
+
    /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
     *
     *     "If a linked set of shaders forming the vertex stage contains no
@@ -3882,9 +924,9 @@
    if (outputs[clip_vertex].file == BAD_FILE)
       return;
 
-   setup_uniform_clipplane_values();
+   setup_uniform_clipplane_values(clip_planes);
 
-   current_annotation = "user clip distances";
+   const fs_builder abld = bld.annotate("user clip distances");
 
    this->outputs[VARYING_SLOT_CLIP_DIST0] = vgrf(glsl_type::vec4_type);
    this->outputs[VARYING_SLOT_CLIP_DIST1] = vgrf(glsl_type::vec4_type);
@@ -3894,10 +936,10 @@
       fs_reg output = outputs[VARYING_SLOT_CLIP_DIST0 + i / 4];
       output.reg_offset = i & 3;
 
-      emit(MUL(output, outputs[clip_vertex], u));
+      abld.MUL(output, outputs[clip_vertex], u);
       for (int j = 1; j < 4; j++) {
          u.reg = userplane[i].reg + j;
-         emit(MAD(output, output, offset(outputs[clip_vertex], j), u));
+         abld.MAD(output, output, offset(outputs[clip_vertex], bld, j), u);
       }
    }
 }
@@ -3916,22 +958,24 @@
    bool flush;
    fs_reg sources[8];
 
-   /* Lower legacy ff and ClipVertex clipping to clip distances */
-   if (key->base.userclip_active && !prog->UsesClipDistanceOut)
-      compute_clip_distance();
-
    /* If we don't have any valid slots to write, just do a minimal urb write
-    * send to terminate the shader. */
+    * send to terminate the shader.  This includes 1 slot of undefined data,
+    * because it's invalid to write 0 data:
+    *
+    * From the Broadwell PRM, Volume 7: 3D Media GPGPU, Shared Functions -
+    * Unified Return Buffer (URB) > URB_SIMD8_Write and URB_SIMD8_Read >
+    * Write Data Payload:
+    *
+    *    "The write data payload can be between 1 and 8 message phases long."
+    */
    if (vue_map->slots_valid == 0) {
+      fs_reg payload = fs_reg(GRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD);
+      bld.exec_all().MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0),
+                                                BRW_REGISTER_TYPE_UD)));
 
-      fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
-      fs_inst *inst = emit(MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0),
-                                                      BRW_REGISTER_TYPE_UD))));
-      inst->force_writemask_all = true;
-
-      inst = emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
+      fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
       inst->eot = true;
-      inst->mlen = 1;
+      inst->mlen = 2;
       inst->offset = 1;
       return;
    }
@@ -3958,7 +1002,7 @@
          }
 
          zero = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
-         emit(MOV(zero, fs_reg(0u)));
+         bld.MOV(zero, fs_reg(0u));
 
          sources[length++] = zero;
          if (vue_map->slots_valid & VARYING_BIT_LAYER)
@@ -4012,19 +1056,18 @@
              */
             for (int i = 0; i < 4; i++) {
                reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type);
-               src = offset(this->outputs[varying], i);
-               fs_inst *inst = emit(MOV(reg, src));
-               inst->saturate = true;
+               src = offset(this->outputs[varying], bld, i);
+               set_saturate(true, bld.MOV(reg, src));
                sources[length++] = reg;
             }
          } else {
             for (int i = 0; i < 4; i++)
-               sources[length++] = offset(this->outputs[varying], i);
+               sources[length++] = offset(this->outputs[varying], bld, i);
          }
          break;
       }
 
-      current_annotation = "URB write";
+      const fs_builder abld = bld.annotate("URB write");
 
       /* If we've queued up 8 registers of payload (2 VUE slots), if this is
        * the last slot or if we need to flush (see BAD_FILE varying case
@@ -4036,23 +1079,15 @@
       if (flush) {
          fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1);
          fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1),
-                                 BRW_REGISTER_TYPE_F, dispatch_width);
-
-         /* We need WE_all on the MOV for the message header (the URB handles)
-          * so do a MOV to a dummy register and set force_writemask_all on the
-          * MOV.  LOAD_PAYLOAD will preserve that.
-          */
-         fs_reg dummy = fs_reg(GRF, alloc.allocate(1),
-                               BRW_REGISTER_TYPE_UD);
-         fs_inst *inst = emit(MOV(dummy, fs_reg(retype(brw_vec8_grf(1, 0),
-                                                       BRW_REGISTER_TYPE_UD))));
-         inst->force_writemask_all = true;
-         payload_sources[0] = dummy;
+                                 BRW_REGISTER_TYPE_F);
+         payload_sources[0] =
+            fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
 
          memcpy(&payload_sources[1], sources, length * sizeof sources[0]);
-         emit(LOAD_PAYLOAD(payload, payload_sources, length + 1, 1));
+         abld.LOAD_PAYLOAD(payload, payload_sources, length + 1, 1);
 
-         inst = emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
+         fs_inst *inst =
+            abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
          inst->eot = last;
          inst->mlen = length + 1;
          inst->offset = urb_offset;
@@ -4064,21 +1099,9 @@
 }
 
 void
-fs_visitor::resolve_ud_negate(fs_reg *reg)
-{
-   if (reg->type != BRW_REGISTER_TYPE_UD ||
-       !reg->negate)
-      return;
-
-   fs_reg temp = vgrf(glsl_type::uint_type);
-   emit(MOV(temp, *reg));
-   *reg = temp;
-}
-
-void
 fs_visitor::emit_cs_terminate()
 {
-   assert(brw->gen >= 7);
+   assert(devinfo->gen >= 7);
 
    /* We are getting the thread ID from the compute shader header */
    assert(stage == MESA_SHADER_COMPUTE);
@@ -4089,52 +1112,54 @@
     */
    struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
    fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
-   fs_inst *inst = emit(MOV(payload, g0));
-   inst->force_writemask_all = true;
+   bld.group(8, 0).exec_all().MOV(payload, g0);
 
    /* Send a message to the thread spawner to terminate the thread. */
-   inst = emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
+   fs_inst *inst = bld.exec_all()
+                      .emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
    inst->eot = true;
 }
 
-/**
- * Resolve the result of a Gen4-5 CMP instruction to a proper boolean.
- *
- * CMP on Gen4-5 only sets the LSB of the result; the rest are undefined.
- * If we need a proper boolean value, we have to fix it up to be 0 or ~0.
- */
 void
-fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)
+fs_visitor::emit_barrier()
 {
-   assert(devinfo->gen <= 5);
+   assert(devinfo->gen >= 7);
 
-   if (rvalue->type != glsl_type::bool_type)
-      return;
+   /* We are getting the barrier ID from the compute shader header */
+   assert(stage == MESA_SHADER_COMPUTE);
+
+   fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+
+   /* Clear the message payload */
+   bld.exec_all().MOV(payload, fs_reg(0u));
 
-   fs_reg and_result = vgrf(glsl_type::bool_type);
-   fs_reg neg_result = vgrf(glsl_type::bool_type);
-   emit(AND(and_result, *reg, fs_reg(1)));
-   emit(MOV(neg_result, negate(and_result)));
-   *reg = neg_result;
+   /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */
+   fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD));
+   bld.exec_all().AND(component(payload, 2), r0_2, fs_reg(0x0f000000u));
+
+   /* Emit a gateway "barrier" message using the payload we set up, followed
+    * by a wait instruction.
+    */
+   bld.exec_all().emit(SHADER_OPCODE_BARRIER, reg_undef, payload);
 }
 
-fs_visitor::fs_visitor(struct brw_context *brw,
+fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
                        void *mem_ctx,
                        gl_shader_stage stage,
                        const void *key,
                        struct brw_stage_prog_data *prog_data,
                        struct gl_shader_program *shader_prog,
                        struct gl_program *prog,
-                       unsigned dispatch_width)
-   : backend_visitor(brw, shader_prog, prog, prog_data, stage),
-     reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)),
-     reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)),
-     reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)),
+                       unsigned dispatch_width,
+                       int shader_time_index)
+   : backend_shader(compiler, log_data, mem_ctx,
+                    shader_prog, prog, prog_data, stage),
      key(key), prog_data(prog_data),
-     dispatch_width(dispatch_width), promoted_constants(0)
+     dispatch_width(dispatch_width),
+     shader_time_index(shader_time_index),
+     promoted_constants(0),
+     bld(fs_builder(this, dispatch_width).at_end())
 {
-   this->mem_ctx = mem_ctx;
-
    switch (stage) {
    case MESA_SHADER_FRAGMENT:
       key_tex = &((const brw_wm_prog_key *) key)->tex;
@@ -4153,12 +1178,9 @@
    this->failed = false;
    this->simd16_unsupported = false;
    this->no16_msg = NULL;
-   this->variable_ht = hash_table_ctor(0,
-                                       hash_table_pointer_hash,
-                                       hash_table_pointer_compare);
 
    this->nir_locals = NULL;
-   this->nir_globals = NULL;
+   this->nir_ssa_values = NULL;
 
    memset(&this->payload, 0, sizeof(this->payload));
    memset(this->outputs, 0, sizeof(this->outputs));
@@ -4168,9 +1190,6 @@
    this->first_non_payload_grf = 0;
    this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
 
-   this->current_annotation = NULL;
-   this->base_ir = NULL;
-
    this->virtual_grf_start = NULL;
    this->virtual_grf_end = NULL;
    this->live_intervals = NULL;
@@ -4190,5 +1209,4 @@
 
 fs_visitor::~fs_visitor()
 {
-   hash_table_dtor(this->variable_ht);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_gs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_gs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_gs.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_gs.c	2015-09-16 14:36:09.000000000 +0000
@@ -64,12 +64,16 @@
 
    /* We also upload clip plane data as uniforms */
    param_count += MAX_CLIP_PLANES * 4;
+   param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
 
    c.prog_data.base.base.param =
       rzalloc_array(NULL, const gl_constant_value *, param_count);
    c.prog_data.base.base.pull_param =
       rzalloc_array(NULL, const gl_constant_value *, param_count);
+   c.prog_data.base.base.image_param =
+      rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
    c.prog_data.base.base.nr_params = param_count;
+   c.prog_data.base.base.nr_image_params = gs->NumImages;
 
    if (brw->gen >= 7) {
       if (gp->program.OutputType == GL_POINTS) {
@@ -267,14 +271,7 @@
    }
 
    /* Scratch space is used for register spilling */
-   if (c.base.last_scratch) {
-      perf_debug("Geometry shader triggered register spilling.  "
-                 "Try reducing the number of live vec4 values to "
-                 "improve performance.\n");
-
-      c.prog_data.base.base.total_scratch
-         = brw_get_scratch_size(c.base.last_scratch*REG_SIZE);
-
+   if (c.prog_data.base.base.total_scratch) {
       brw_get_scratch_bo(brw, &stage_state->scratch_bo,
 			 c.prog_data.base.base.total_scratch *
                          brw->max_gs_threads);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_gs_surface_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_gs_surface_state.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_gs_surface_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -47,11 +47,12 @@
       return;
 
    /* BRW_NEW_GS_PROG_DATA */
-   const struct brw_stage_prog_data *prog_data = &brw->gs.prog_data->base.base;
+   const struct brw_vue_prog_data *prog_data = &brw->gs.prog_data->base;
+   const bool dword_pitch = prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
 
    /* _NEW_PROGRAM_CONSTANTS */
    brw_upload_pull_constants(brw, BRW_NEW_GS_CONSTBUF, &gp->program.Base,
-                             stage_state, prog_data, false);
+                             stage_state, &prog_data->base, dword_pitch);
 }
 
 const struct brw_tracked_state brw_gs_pull_constants = {
@@ -77,8 +78,11 @@
       return;
 
    /* BRW_NEW_GS_PROG_DATA */
+   struct brw_vue_prog_data *prog_data = &brw->gs.prog_data->base;
+   bool dword_pitch = prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
+
    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
-			   &brw->gs.base, &brw->gs.prog_data->base.base, false);
+			   &brw->gs.base, &prog_data->base, dword_pitch);
 }
 
 const struct brw_tracked_state brw_gs_ubo_surfaces = {
@@ -115,3 +119,28 @@
    },
    .emit = brw_upload_gs_abo_surfaces,
 };
+
+static void
+brw_upload_gs_image_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_GEOMETRY_PROGRAM */
+   struct gl_shader_program *prog =
+      ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
+
+   if (prog) {
+      /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+      brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
+                                &brw->gs.base, &brw->gs.prog_data->base.base);
+   }
+}
+
+const struct brw_tracked_state brw_gs_image_surfaces = {
+   .dirty = {
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_GEOMETRY_PROGRAM |
+             BRW_NEW_GS_PROG_DATA |
+             BRW_NEW_IMAGE_UNITS,
+   },
+   .emit = brw_upload_gs_image_surfaces,
+};
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_inst.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_inst.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_inst.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_inst.h	2015-09-16 14:36:09.000000000 +0000
@@ -322,6 +322,9 @@
 FC(gen4_pop_count,  115, 112, devinfo->gen < 6)
 /** @} */
 
+/* Message descriptor bits */
+#define MD(x) ((x) + 96)
+
 /**
  * Fields for SEND messages:
  *  @{
@@ -347,6 +350,7 @@
    /* 6:   */ 115, 115,
    /* 7:   */ 115, 115,
    /* 8:   */ 115, 115)
+F(gateway_notify, MD(16), MD(15))
 FF(function_control,
    /* 4:   */ 111,  96,
    /* 4.5: */ 111,  96,
@@ -354,6 +358,13 @@
    /* 6:   */ 114,  96,
    /* 7:   */ 114,  96,
    /* 8:   */ 114,  96)
+FF(gateway_subfuncid,
+   /* 4:   */ MD(1), MD(0),
+   /* 4.5: */ MD(1), MD(0),
+   /* 5:   */ MD(1), MD(0), /* 2:0, but bit 2 is reserved MBZ */
+   /* 6:   */ MD(2), MD(0),
+   /* 7:   */ MD(2), MD(0),
+   /* 8:   */ MD(2), MD(0))
 FF(sfid,
    /* 4:   */ 123, 120, /* called msg_target */
    /* 4.5  */ 123, 120,
@@ -364,9 +375,6 @@
 FC(base_mrf,   27,  24, devinfo->gen < 6);
 /** @} */
 
-/* Message descriptor bits */
-#define MD(x) (x + 96)
-
 /**
  * URB message function control bits:
  *  @{
@@ -675,9 +683,9 @@
    high %= 64;
    low %= 64;
 
-   const uint64_t mask = (((1ull << (high - low + 1)) - 1) << low);
+   const uint64_t mask = (1ull << (high - low + 1)) - 1;
 
-   return (inst->data[word] & mask) >> low;
+   return (inst->data[word] >> low) & mask;
 }
 
 /**
@@ -694,12 +702,12 @@
    high %= 64;
    low %= 64;
 
-   const uint64_t mask = (((1ull << (high - low + 1)) - 1) << low);
+   const uint64_t mask = ((1ull << (high - low + 1)) - 1) << low;
 
    /* Make sure the supplied value actually fits in the given bitfield. */
    assert((value & (mask >> low)) == value);
 
-   inst->data[word] = (inst->data[word] & ~mask) | ((value << low) & mask);
+   inst->data[word] = (inst->data[word] & ~mask) | (value << low);
 }
 
 #undef BRW_IA16_ADDR_IMM
@@ -723,9 +731,9 @@
 static inline unsigned
 brw_compact_inst_bits(brw_compact_inst *inst, unsigned high, unsigned low)
 {
-   const uint64_t mask = (((1ull << (high - low + 1)) - 1) << low);
+   const uint64_t mask = (1ull << (high - low + 1)) - 1;
 
-   return (inst->data & mask) >> low;
+   return (inst->data >> low) & mask;
 }
 
 /**
@@ -737,12 +745,12 @@
 brw_compact_inst_set_bits(brw_compact_inst *inst, unsigned high, unsigned low,
                           uint64_t value)
 {
-   const uint64_t mask = (((1ull << (high - low + 1)) - 1) << low);
+   const uint64_t mask = ((1ull << (high - low + 1)) - 1) << low;
 
    /* Make sure the supplied value actually fits in the given bitfield. */
    assert((value & (mask >> low)) == value);
 
-   inst->data = (inst->data & ~mask) | ((value << low) & mask);
+   inst->data = (inst->data & ~mask) | (value << low);
 }
 
 #define F(name, high, low)                                      \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_ir_fs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_ir_fs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_ir_fs.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_ir_fs.h	2015-09-16 14:36:09.000000000 +0000
@@ -44,11 +44,16 @@
    fs_reg(struct brw_reg fixed_hw_reg);
    fs_reg(enum register_file file, int reg);
    fs_reg(enum register_file file, int reg, enum brw_reg_type type);
-   fs_reg(enum register_file file, int reg, enum brw_reg_type type, uint8_t width);
 
    bool equals(const fs_reg &r) const;
    bool is_contiguous() const;
 
+   /**
+    * Return the size in bytes of a single logical component of the
+    * register assuming the given execution width.
+    */
+   unsigned component_size(unsigned width) const;
+
    /** Smear a channel of the reg to all channels. */
    fs_reg &set_smear(unsigned subreg);
 
@@ -60,14 +65,6 @@
 
    fs_reg *reladdr;
 
-   /**
-    * The register width.  This indicates how many hardware values are
-    * represented by each virtual value.  Valid values are 1, 8, or 16.
-    * For immediate values, this is 1.  Most of the rest of the time, it
-    * will be equal to the dispatch width.
-    */
-   uint8_t width;
-
    /** Register region horizontal stride */
    uint8_t stride;
 };
@@ -129,33 +126,10 @@
 }
 
 static inline fs_reg
-offset(fs_reg reg, unsigned delta)
-{
-   switch (reg.file) {
-   case BAD_FILE:
-      break;
-   case GRF:
-   case MRF:
-   case ATTR:
-      return byte_offset(reg,
-                         delta * MAX2(reg.width * reg.stride, 1) *
-                         type_sz(reg.type));
-   case UNIFORM:
-      reg.reg_offset += delta;
-      break;
-   default:
-      assert(delta == 0);
-   }
-   return reg;
-}
-
-static inline fs_reg
 component(fs_reg reg, unsigned idx)
 {
    assert(reg.subreg_offset == 0);
-   assert(idx < reg.width);
    reg.subreg_offset = idx * type_sz(reg.type);
-   reg.width = 1;
    reg.stride = 0;
    return reg;
 }
@@ -163,7 +137,7 @@
 static inline bool
 is_uniform(const fs_reg &reg)
 {
-   return (reg.width == 1 || reg.stride == 0 || reg.is_null()) &&
+   return (reg.stride == 0 || reg.is_null()) &&
           (!reg.reladdr || is_uniform(*reg.reladdr));
 }
 
@@ -185,8 +159,6 @@
 
    case GRF:
    case MRF:
-      assert(reg.width == 16);
-      reg.width = 8;
       return horiz_offset(reg, 8 * idx);
 
    case ATTR:
@@ -210,20 +182,13 @@
 
    fs_inst();
    fs_inst(enum opcode opcode, uint8_t exec_size);
-   fs_inst(enum opcode opcode, const fs_reg &dst);
+   fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst);
    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
            const fs_reg &src0);
-   fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0);
    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
            const fs_reg &src0, const fs_reg &src1);
-   fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
-           const fs_reg &src1);
    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
            const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
-   fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
-           const fs_reg &src1, const fs_reg &src2);
-   fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg src[],
-           unsigned sources);
    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
            const fs_reg src[], unsigned sources);
    fs_inst(const fs_inst &that);
@@ -236,6 +201,7 @@
    bool is_send_from_grf() const;
    bool is_partial_write() const;
    bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
+   unsigned components_read(unsigned i) const;
    int regs_read(int arg) const;
    bool can_do_source_mods(const struct brw_device_info *devinfo);
    bool has_side_effects() const;
@@ -256,7 +222,6 @@
    uint8_t exec_size;
 
    bool eot:1;
-   bool force_uncompressed:1;
    bool force_sechalf:1;
    bool pi_noperspective:1;   /**< Pixel interpolator noperspective flag */
 };
@@ -271,4 +236,48 @@
    return inst;
 }
 
+/**
+ * Make the execution of \p inst dependent on the evaluation of a possibly
+ * inverted predicate.
+ */
+static inline fs_inst *
+set_predicate_inv(enum brw_predicate pred, bool inverse,
+                  fs_inst *inst)
+{
+   inst->predicate = pred;
+   inst->predicate_inverse = inverse;
+   return inst;
+}
+
+/**
+ * Make the execution of \p inst dependent on the evaluation of a predicate.
+ */
+static inline fs_inst *
+set_predicate(enum brw_predicate pred, fs_inst *inst)
+{
+   return set_predicate_inv(pred, false, inst);
+}
+
+/**
+ * Write the result of evaluating the condition given by \p mod to a flag
+ * register.
+ */
+static inline fs_inst *
+set_condmod(enum brw_conditional_mod mod, fs_inst *inst)
+{
+   inst->conditional_mod = mod;
+   return inst;
+}
+
+/**
+ * Clamp the result of \p inst to the saturation range of its destination
+ * datatype.
+ */
+static inline fs_inst *
+set_saturate(bool saturate, fs_inst *inst)
+{
+   inst->saturate = saturate;
+   return inst;
+}
+
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_ir_vec4.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_ir_vec4.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_ir_vec4.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_ir_vec4.h	2015-09-16 14:36:09.000000000 +0000
@@ -113,6 +113,8 @@
    dst_reg(register_file file, int reg);
    dst_reg(register_file file, int reg, const glsl_type *type,
            unsigned writemask);
+   dst_reg(register_file file, int reg, brw_reg_type type,
+           unsigned writemask);
    dst_reg(struct brw_reg reg);
    dst_reg(class vec4_visitor *v, const struct glsl_type *type);
 
@@ -190,6 +192,50 @@
    }
 };
 
+/**
+ * Make the execution of \p inst dependent on the evaluation of a possibly
+ * inverted predicate.
+ */
+inline vec4_instruction *
+set_predicate_inv(enum brw_predicate pred, bool inverse,
+                  vec4_instruction *inst)
+{
+   inst->predicate = pred;
+   inst->predicate_inverse = inverse;
+   return inst;
+}
+
+/**
+ * Make the execution of \p inst dependent on the evaluation of a predicate.
+ */
+inline vec4_instruction *
+set_predicate(enum brw_predicate pred, vec4_instruction *inst)
+{
+   return set_predicate_inv(pred, false, inst);
+}
+
+/**
+ * Write the result of evaluating the condition given by \p mod to a flag
+ * register.
+ */
+inline vec4_instruction *
+set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst)
+{
+   inst->conditional_mod = mod;
+   return inst;
+}
+
+/**
+ * Clamp the result of \p inst to the saturation range of its destination
+ * datatype.
+ */
+inline vec4_instruction *
+set_saturate(bool saturate, vec4_instruction *inst)
+{
+   inst->saturate = saturate;
+   return inst;
+}
+
 } /* namespace brw */
 
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -89,19 +89,18 @@
 ir_visitor_status
 lower_texture_grad_visitor::visit_leave(ir_texture *ir)
 {
-   /* Only lower textureGrad with shadow samplers */
-   if (ir->op != ir_txd || !ir->shadow_comparitor)
+   /* Only lower textureGrad with cube maps or shadow samplers */
+   if (ir->op != ir_txd ||
+      (ir->sampler->type->sampler_dimensionality != GLSL_SAMPLER_DIM_CUBE &&
+       !ir->shadow_comparitor))
       return visit_continue;
 
-   /* Lower textureGrad() with samplerCubeShadow even if we have the sample_d_c
+   /* Lower textureGrad() with samplerCube* even if we have the sample_d_c
     * message.  GLSL provides gradients for the 'r' coordinate.  Unfortunately:
     *
     * From the Ivybridge PRM, Volume 4, Part 1, sample_d message description:
     * "The r coordinate contains the faceid, and the r gradients are ignored
     *  by hardware."
-    *
-    * We likely need to do a similar treatment for samplerCube and
-    * samplerCubeArray, but we have insufficient testing for that at the moment.
     */
    bool need_lowering = !has_sample_d_c ||
       ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE;
@@ -155,9 +154,20 @@
 			       expr(ir_unop_sqrt, dot(dPdy, dPdy)));
    }
 
-   /* lambda_base = log2(rho).  We're ignoring GL state biases for now. */
+   /* lambda_base = log2(rho).  We're ignoring GL state biases for now.
+    *
+    * For cube maps the result of these formulas is giving us a value of rho
+    * that is twice the value we should use, so divide it by 2 or,
+    * alternatively, remove one unit from the result of the log2 computation.
+    */
    ir->op = ir_txl;
-   ir->lod_info.lod = expr(ir_unop_log2, rho);
+   if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
+      ir->lod_info.lod = expr(ir_binop_add,
+                              expr(ir_unop_log2, rho),
+                              new(mem_ctx) ir_constant(-1.0f));
+   } else {
+      ir->lod_info.lod = expr(ir_unop_log2, rho);
+   }
 
    progress = true;
    return visit_continue;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c	2015-09-16 14:36:09.000000000 +0000
@@ -128,7 +128,7 @@
    _mesa_AttachShader(clear->shader_prog, vs);
    _mesa_DeleteShader(vs);
    _mesa_BindAttribLocation(clear->shader_prog, 0, "position");
-   _mesa_ObjectLabel(GL_PROGRAM, clear->shader_prog, -1, "meta clear");
+   _mesa_ObjectLabel(GL_PROGRAM, clear->shader_prog, -1, "meta repclear");
    _mesa_LinkProgram(clear->shader_prog);
 
    clear->color_location =
@@ -200,7 +200,7 @@
 
    brw_draw_prims(ctx, &prim, 1, NULL,
                   GL_TRUE, start, start + count - 1,
-                  NULL, NULL);
+                  NULL, 0, NULL);
 }
 
 static void
@@ -339,11 +339,16 @@
                                mesa_format format,
                                const union gl_color_union *color)
 {
-   if (_mesa_is_format_integer_color(format))
+   if (_mesa_is_format_integer_color(format)) {
+      if (brw->gen >= 8) {
+         perf_debug("Integer fast clear not enabled for (%s)",
+                    _mesa_get_format_name(format));
+      }
       return false;
+   }
 
    for (int i = 0; i < 4; i++) {
-      if (color->f[i] != 0.0 && color->f[i] != 1.0 &&
+      if (color->f[i] != 0.0f && color->f[i] != 1.0f &&
           _mesa_format_has_color_component(format, i)) {
          return false;
       }
@@ -361,7 +366,7 @@
    uint32_t bits = 0;
    for (int i = 0; i < 4; i++) {
       /* Testing for non-0 works for integer and float colors */
-      if (color->f[i] != 0.0)
+      if (color->f[i] != 0.0f)
          bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
    }
    return bits;
@@ -466,7 +471,8 @@
        *      linear (untiled) memory is UNDEFINED."
        */
       if (irb->mt->tiling == I915_TILING_NONE) {
-         perf_debug("falling back to plain clear because buffers are untiled\n");
+         perf_debug("Falling back to plain clear because %dx%d buffer is untiled\n",
+                    irb->mt->logical_width0, irb->mt->logical_height0);
          clear_type = PLAIN_CLEAR;
       }
 
@@ -477,7 +483,8 @@
       for (int i = 0; i < 4; i++) {
          if (_mesa_format_has_color_component(irb->mt->format, i) &&
              !color_mask[i]) {
-            perf_debug("falling back to plain clear because of color mask\n");
+            perf_debug("Falling back to plain clear on %dx%d buffer because of color mask\n",
+                       irb->mt->logical_width0, irb->mt->logical_height0);
             clear_type = PLAIN_CLEAR;
          }
       }
@@ -616,7 +623,7 @@
     *     write-flush must be issued before sending any DRAW commands on that
     *     render target.
     */
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    /* If we had to fall back to plain clear for any buffers, clear those now
     * by calling into meta.
@@ -670,7 +677,7 @@
    GLuint fbo, rbo;
    struct rect rect;
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    _mesa_meta_begin(ctx, MESA_META_ALL);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c	2015-09-16 14:36:09.000000000 +0000
@@ -239,10 +239,10 @@
 
    if (mirror) {
       _mesa_Uniform1f(multiplier, -scale);
-      _mesa_Uniform1f(offset, src_0 + (dst_1 - 0.5) * scale);
+      _mesa_Uniform1f(offset, src_0 + (dst_1 - 0.5f) * scale);
    } else {
       _mesa_Uniform1f(multiplier, scale);
-      _mesa_Uniform1f(offset, src_0 + (-dst_0 + 0.5) * scale);
+      _mesa_Uniform1f(offset, src_0 + (-dst_0 + 0.5f) * scale);
    }
 }
 
@@ -414,6 +414,12 @@
    GLenum target;
 
    _mesa_meta_fb_tex_blit_begin(ctx, &blit);
+   /* XXX: Pretend to support stencil textures so _mesa_base_tex_format()
+    * returns a valid format.  When we properly support the extension, we
+    * should remove this.
+    */
+   assert(ctx->Extensions.ARB_texture_stencil8 == false);
+   ctx->Extensions.ARB_texture_stencil8 = true;
 
    _mesa_GenFramebuffers(1, &fbo);
    /* Force the surface to be configured for level zero. */
@@ -451,6 +457,7 @@
    _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
 
 error:
+   ctx->Extensions.ARB_texture_stencil8 = false;
    _mesa_meta_fb_tex_blit_end(ctx, target, &blit);
    _mesa_meta_end(ctx);
 
@@ -493,11 +500,11 @@
                              .mirror_x = mirror_x, .mirror_y = mirror_y };
    adjust_mip_level(dst_mt, dst_irb->mt_level, dst_irb->mt_layer, &dims);
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
    _mesa_meta_begin(ctx, MESA_META_ALL);
    brw_meta_stencil_blit(brw,
                          dst_mt, dst_irb->mt_level, dst_irb->mt_layer, &dims);
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 }
 
 void
@@ -517,7 +524,7 @@
    if (dst->stencil_mt)
       dst = dst->stencil_mt;
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
    _mesa_meta_begin(ctx, MESA_META_ALL);
 
    _mesa_GenFramebuffers(1, &fbo);
@@ -528,7 +535,7 @@
                                  GL_RENDERBUFFER, rbo);
 
    brw_meta_stencil_blit(brw, dst, 0, 0, &dims);
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    _mesa_DeleteRenderbuffers(1, &rbo);
    _mesa_DeleteFramebuffers(1, &fbo);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_meta_updownsample.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_meta_updownsample.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_meta_updownsample.c	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_meta_updownsample.c	2015-09-16 14:36:09.000000000 +0000
@@ -116,7 +116,7 @@
       blit_bit = GL_COLOR_BUFFER_BIT;
    }
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    _mesa_meta_begin(ctx, MESA_META_ALL);
    _mesa_GenFramebuffers(2, fbos);
@@ -147,5 +147,5 @@
 
    _mesa_meta_end(ctx);
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_misc_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_misc_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_misc_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_misc_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -39,19 +39,23 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 
+#include "main/framebuffer.h"
 #include "main/fbobject.h"
 #include "main/glformats.h"
 
 /* Constant single cliprect for framebuffer object or DRI2 drawing */
-static void upload_drawing_rect(struct brw_context *brw)
+static void
+upload_drawing_rect(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
+   const struct gl_framebuffer *fb = ctx->DrawBuffer;
+   const unsigned int fb_width = _mesa_geometric_width(fb);
+   const unsigned int fb_height = _mesa_geometric_height(fb);
 
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
    OUT_BATCH(0); /* xmin, ymin */
-   OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
-	    ((ctx->DrawBuffer->Height - 1) << 16));
+   OUT_BATCH(((fb_width - 1) & 0xffff) | ((fb_height - 1) << 16));
    OUT_BATCH(0);
    ADVANCE_BATCH();
 }
@@ -70,7 +74,8 @@
  * The state pointers in this packet are all relative to the general state
  * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
  */
-static void upload_pipelined_state_pointers(struct brw_context *brw )
+static void
+upload_pipelined_state_pointers(struct brw_context *brw)
 {
    if (brw->gen == 5) {
       /* Need to flush before changing clip max threads for errata. */
@@ -101,7 +106,8 @@
    brw->ctx.NewDriverState |= BRW_NEW_PSP;
 }
 
-static void upload_psp_urb_cbs(struct brw_context *brw )
+static void
+upload_psp_urb_cbs(struct brw_context *brw)
 {
    upload_pipelined_state_pointers(brw);
    brw_upload_urb_fence(brw);
@@ -577,7 +583,7 @@
     * non-pipelined state that will need the PIPE_CONTROL workaround.
     */
    if (brw->gen == 6) {
-      intel_emit_depth_stall_flushes(brw);
+      brw_emit_depth_stall_flushes(brw);
    }
 
    unsigned int len;
@@ -697,13 +703,11 @@
    .emit = brw_emit_depthbuffer,
 };
 
-
-
-/***********************************************************************
+/**
  * Polygon stipple packet
  */
-
-static void upload_polygon_stipple(struct brw_context *brw)
+static void
+upload_polygon_stipple(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
    GLuint i;
@@ -725,8 +729,7 @@
    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
       for (i = 0; i < 32; i++)
 	  OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
-   }
-   else {
+   } else {
       for (i = 0; i < 32; i++)
 	 OUT_BATCH(ctx->PolygonStipple[i]);
    }
@@ -742,12 +745,11 @@
    .emit = upload_polygon_stipple
 };
 
-
-/***********************************************************************
+/**
  * Polygon stipple offset packet
  */
-
-static void upload_polygon_stipple_offset(struct brw_context *brw)
+static void
+upload_polygon_stipple_offset(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
 
@@ -767,7 +769,7 @@
     * works just fine, and there's no window system to worry about.
     */
    if (_mesa_is_winsys_fbo(ctx->DrawBuffer))
-      OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
+      OUT_BATCH((32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31);
    else
       OUT_BATCH(0);
    ADVANCE_BATCH();
@@ -782,10 +784,11 @@
    .emit = upload_polygon_stipple_offset
 };
 
-/**********************************************************************
+/**
  * AA Line parameters
  */
-static void upload_aa_line_parameters(struct brw_context *brw)
+static void
+upload_aa_line_parameters(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
 
@@ -812,11 +815,11 @@
    .emit = upload_aa_line_parameters
 };
 
-/***********************************************************************
+/**
  * Line stipple packet
  */
-
-static void upload_line_stipple(struct brw_context *brw)
+static void
+upload_line_stipple(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
    GLfloat tmp;
@@ -831,13 +834,12 @@
 
    if (brw->gen >= 7) {
       /* in U1.16 */
-      tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
+      tmp = 1.0f / ctx->Line.StippleFactor;
       tmpi = tmp * (1<<16);
       OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor);
-   }
-   else {
+   } else {
       /* in U1.13 */
-      tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
+      tmp = 1.0f / ctx->Line.StippleFactor;
       tmpi = tmp * (1<<13);
       OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
    }
@@ -853,7 +855,6 @@
    .emit = upload_line_stipple
 };
 
-
 void
 brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
 {
@@ -869,17 +870,16 @@
    ADVANCE_BATCH();
 }
 
-
-/***********************************************************************
+/**
  * Misc invariant state packets
  */
-
 void
 brw_upload_invariant_state(struct brw_context *brw)
 {
    const bool is_965 = brw->gen == 4 && !brw->is_g4x;
 
-   brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
+   brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
+   brw->last_pipeline = BRW_RENDER_PIPELINE;
 
    if (brw->gen < 6) {
       /* Disable depth offset clamping. */
@@ -927,7 +927,8 @@
  * surface state objects, but not the surfaces that the surface state
  * objects point to.
  */
-static void upload_state_base_address( struct brw_context *brw )
+static void
+upload_state_base_address(struct brw_context *brw)
 {
    /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
     * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c	2015-09-16 14:36:09.000000000 +0000
@@ -43,8 +43,8 @@
 static uint8_t
 get_resolve_status_for_src(nir_src *src)
 {
-   nir_instr *src_instr = nir_src_get_parent_instr(src);
-   if (src_instr) {
+   if (src->is_ssa) {
+      nir_instr *src_instr = src->ssa->parent_instr;
       uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK;
 
       /* If the source instruction needs resolve, then from the perspective
@@ -66,8 +66,8 @@
 static bool
 src_mark_needs_resolve(nir_src *src, void *void_state)
 {
-   nir_instr *src_instr = nir_src_get_parent_instr(src);
-   if (src_instr) {
+   if (src->is_ssa) {
+      nir_instr *src_instr = src->ssa->parent_instr;
       uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK;
 
       /* If the source instruction is unresolved, then mark it as needing
@@ -109,28 +109,27 @@
          uint8_t resolve_status;
          nir_alu_instr *alu = nir_instr_as_alu(instr);
          switch (alu->op) {
-         case nir_op_flt:
-         case nir_op_ilt:
-         case nir_op_ult:
-         case nir_op_fge:
-         case nir_op_ige:
-         case nir_op_uge:
-         case nir_op_feq:
-         case nir_op_ieq:
-         case nir_op_fne:
-         case nir_op_ine:
-         case nir_op_f2b:
-         case nir_op_i2b:
-            /* This instruction will turn into a CMP when we actually emit
-             * so the result will have to be resolved before it can be used.
+         case nir_op_bany2:
+         case nir_op_bany3:
+         case nir_op_bany4:
+         case nir_op_ball_fequal2:
+         case nir_op_ball_iequal2:
+         case nir_op_ball_fequal3:
+         case nir_op_ball_iequal3:
+         case nir_op_ball_fequal4:
+         case nir_op_ball_iequal4:
+         case nir_op_bany_fnequal2:
+         case nir_op_bany_inequal2:
+         case nir_op_bany_fnequal3:
+         case nir_op_bany_inequal3:
+         case nir_op_bany_fnequal4:
+         case nir_op_bany_inequal4:
+            /* These are only implemented by the vec4 backend and its
+             * implementation emits resolved booleans.  At some point in the
+             * future, this may change and we'll have to remove some of the
+             * above cases.
              */
-            resolve_status = BRW_NIR_BOOLEAN_UNRESOLVED;
-
-            /* Even though the destination is allowed to be left unresolved,
-             * the sources are treated as regular integers or floats so
-             * they need to be resolved.
-             */
-            nir_foreach_src(instr, src_mark_needs_resolve, NULL);
+            resolve_status = BRW_NIR_BOOLEAN_NO_RESOLVE;
             break;
 
          case nir_op_imov:
@@ -169,14 +168,28 @@
          }
 
          default:
-            resolve_status = BRW_NIR_NON_BOOLEAN;
+            if (nir_op_infos[alu->op].output_type == nir_type_bool) {
+               /* This instructions will turn into a CMP when we actually emit
+                * them so the result will have to be resolved before it can be
+                * used.
+                */
+               resolve_status = BRW_NIR_BOOLEAN_UNRESOLVED;
+
+               /* Even though the destination is allowed to be left
+                * unresolved, the sources are treated as regular integers or
+                * floats so they need to be resolved.
+                */
+               nir_foreach_src(instr, src_mark_needs_resolve, NULL);
+            } else {
+               resolve_status = BRW_NIR_NON_BOOLEAN;
+            }
          }
 
-         /* If the destination is SSA-like, go ahead allow unresolved booleans.
+         /* If the destination is SSA, go ahead allow unresolved booleans.
           * If the destination register doesn't have a well-defined parent_instr
           * we need to resolve immediately.
           */
-         if (alu->dest.dest.reg.reg->parent_instr == NULL &&
+         if (!alu->dest.dest.is_ssa &&
              resolve_status == BRW_NIR_BOOLEAN_UNRESOLVED) {
             resolve_status = BRW_NIR_BOOLEAN_NEEDS_RESOLVE;
          }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_nir.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_nir.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_nir.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_nir.c	2015-09-16 14:36:09.000000000 +0000
@@ -27,19 +27,27 @@
 #include "program/prog_to_nir.h"
 
 static void
-nir_optimize(nir_shader *nir)
+nir_optimize(nir_shader *nir, bool is_scalar)
 {
    bool progress;
    do {
       progress = false;
       nir_lower_vars_to_ssa(nir);
       nir_validate_shader(nir);
-      nir_lower_alu_to_scalar(nir);
-      nir_validate_shader(nir);
+
+      if (is_scalar) {
+         nir_lower_alu_to_scalar(nir);
+         nir_validate_shader(nir);
+      }
+
       progress |= nir_copy_prop(nir);
       nir_validate_shader(nir);
-      nir_lower_phis_to_scalar(nir);
-      nir_validate_shader(nir);
+
+      if (is_scalar) {
+         nir_lower_phis_to_scalar(nir);
+         nir_validate_shader(nir);
+      }
+
       progress |= nir_copy_prop(nir);
       nir_validate_shader(nir);
       progress |= nir_opt_dce(nir);
@@ -57,33 +65,12 @@
    } while (progress);
 }
 
-static bool
-count_nir_instrs_in_block(nir_block *block, void *state)
-{
-   int *count = (int *) state;
-   nir_foreach_instr(block, instr) {
-      *count = *count + 1;
-   }
-   return true;
-}
-
-static int
-count_nir_instrs(nir_shader *nir)
-{
-   int count = 0;
-   nir_foreach_overload(nir, overload) {
-      if (!overload->impl)
-         continue;
-      nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count);
-   }
-   return count;
-}
-
 nir_shader *
 brw_create_nir(struct brw_context *brw,
                const struct gl_shader_program *shader_prog,
                const struct gl_program *prog,
-               gl_shader_stage stage)
+               gl_shader_stage stage,
+               bool is_scalar)
 {
    struct gl_context *ctx = &brw->ctx;
    const nir_shader_compiler_options *options =
@@ -113,31 +100,33 @@
    nir_split_var_copies(nir);
    nir_validate_shader(nir);
 
-   nir_optimize(nir);
+   nir_optimize(nir, is_scalar);
 
    /* Lower a bunch of stuff */
    nir_lower_var_copies(nir);
    nir_validate_shader(nir);
 
    /* Get rid of split copies */
-   nir_optimize(nir);
+   nir_optimize(nir, is_scalar);
 
-   if (shader_prog) {
-      nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
-                                                   &nir->num_direct_uniforms,
-                                                   &nir->num_uniforms);
+   if (is_scalar) {
+      nir_assign_var_locations_direct_first(nir, &nir->uniforms,
+                                            &nir->num_direct_uniforms,
+                                            &nir->num_uniforms,
+                                            is_scalar);
+      nir_assign_var_locations(&nir->outputs, &nir->num_outputs, is_scalar);
    } else {
-      /* ARB programs generally create a giant array of "uniform" data, and allow
-       * indirect addressing without any boundaries.  In the absence of bounds
-       * analysis, it's all or nothing.  num_direct_uniforms is only useful when
-       * we have some direct and some indirect access; it doesn't matter here.
-       */
-      nir->num_direct_uniforms = 0;
+      nir_assign_var_locations(&nir->uniforms,
+                               &nir->num_uniforms,
+                               is_scalar);
+
+      foreach_list_typed(nir_variable, var, node, &nir->outputs)
+         var->data.driver_location = var->data.location;
    }
-   nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs);
-   nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs);
+   nir_assign_var_locations(&nir->inputs, &nir->num_inputs, is_scalar);
+
+   nir_lower_io(nir, is_scalar);
 
-   nir_lower_io(nir);
    nir_validate_shader(nir);
 
    nir_remove_dead_variables(nir);
@@ -154,7 +143,7 @@
    nir_lower_atomics(nir);
    nir_validate_shader(nir);
 
-   nir_optimize(nir);
+   nir_optimize(nir, is_scalar);
 
    if (brw->gen >= 6) {
       /* Try and fuse multiply-adds */
@@ -176,23 +165,25 @@
    nir_validate_shader(nir);
 
    if (unlikely(debug_enabled)) {
+      /* Re-index SSA defs so we print more sensible numbers. */
+      nir_foreach_overload(nir, overload) {
+         if (overload->impl)
+            nir_index_ssa_defs(overload->impl);
+      }
+
       fprintf(stderr, "NIR (SSA form) for %s shader:\n",
               _mesa_shader_stage_to_string(stage));
       nir_print_shader(nir, stderr);
    }
 
-   static GLuint msg_id = 0;
-   _mesa_gl_debug(&brw->ctx, &msg_id,
-                  MESA_DEBUG_SOURCE_SHADER_COMPILER,
-                  MESA_DEBUG_TYPE_OTHER,
-                  MESA_DEBUG_SEVERITY_NOTIFICATION,
-                  "%s NIR shader: %d inst\n",
-                  _mesa_shader_stage_to_abbrev(stage),
-                  count_nir_instrs(nir));
-
-   nir_convert_from_ssa(nir);
+   nir_convert_from_ssa(nir, is_scalar);
    nir_validate_shader(nir);
 
+   if (!is_scalar) {
+      nir_lower_vec_to_movs(nir);
+      nir_validate_shader(nir);
+   }
+
    /* This is the last pass we run before we start emitting stuff.  It
     * determines when we need to insert boolean resolves on Gen <= 5.  We
     * run it last because it stashes data in instr->pass_flags and we don't
@@ -211,3 +202,42 @@
 
    return nir;
 }
+
+enum brw_reg_type
+brw_type_for_nir_type(nir_alu_type type)
+{
+   switch (type) {
+   case nir_type_unsigned:
+      return BRW_REGISTER_TYPE_UD;
+   case nir_type_bool:
+   case nir_type_int:
+      return BRW_REGISTER_TYPE_D;
+   case nir_type_float:
+      return BRW_REGISTER_TYPE_F;
+   default:
+      unreachable("unknown type");
+   }
+
+   return BRW_REGISTER_TYPE_F;
+}
+
+/* Returns the glsl_base_type corresponding to a nir_alu_type.
+ * This is used by both brw_vec4_nir and brw_fs_nir.
+ */
+enum glsl_base_type
+brw_glsl_base_type_for_nir_type(nir_alu_type type)
+{
+   switch (type) {
+   case nir_type_float:
+      return GLSL_TYPE_FLOAT;
+
+   case nir_type_int:
+      return GLSL_TYPE_INT;
+
+   case nir_type_unsigned:
+      return GLSL_TYPE_UINT;
+
+   default:
+      unreachable("bad type");
+   }
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_nir.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_nir.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_nir.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_nir.h	2015-09-16 14:36:09.000000000 +0000
@@ -24,6 +24,7 @@
 #pragma once
 
 #include "brw_context.h"
+#include "brw_reg.h"
 #include "glsl/nir/nir.h"
 
 #ifdef __cplusplus
@@ -77,7 +78,12 @@
 nir_shader *brw_create_nir(struct brw_context *brw,
                            const struct gl_shader_program *shader_prog,
                            const struct gl_program *prog,
-                           gl_shader_stage stage);
+                           gl_shader_stage stage,
+                           bool is_scalar);
+
+enum brw_reg_type brw_type_for_nir_type(nir_alu_type type);
+
+enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type);
 
 #ifdef __cplusplus
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_performance_monitor.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_performance_monitor.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_performance_monitor.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_performance_monitor.c	2015-09-16 14:36:09.000000000 +0000
@@ -581,7 +581,7 @@
    const int group = PIPELINE_STATS_COUNTERS;
    const int num_counters = ctx->PerfMonitor.Groups[group].NumCounters;
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    for (int i = 0; i < num_counters; i++) {
       if (BITSET_TEST(monitor->base.ActiveCounters[group], i)) {
@@ -687,7 +687,7 @@
  * The amount of batch space it takes to emit an MI_REPORT_PERF_COUNT snapshot,
  * including the required PIPE_CONTROL flushes.
  *
- * Sandybridge is the worst case scenario: intel_batchbuffer_emit_mi_flush
+ * Sandybridge is the worst case scenario: brw_emit_mi_flush
  * expands to three PIPE_CONTROLs which are 4 DWords each.  We have to flush
  * before and after MI_REPORT_PERF_COUNT, so multiply by two.  Finally, add
  * the 3 DWords for MI_REPORT_PERF_COUNT itself.
@@ -710,10 +710,10 @@
    /* Make sure the commands to take a snapshot fits in a single batch. */
    intel_batchbuffer_require_space(brw, MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4,
                                    RENDER_RING);
-   int batch_used = brw->batch.used;
+   int batch_used = USED_BATCH(brw->batch);
 
    /* Reports apparently don't always get written unless we flush first. */
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    if (brw->gen == 5) {
       /* Ironlake requires two MI_REPORT_PERF_COUNT commands to write all
@@ -751,10 +751,10 @@
    }
 
    /* Reports apparently don't always get written unless we flush after. */
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    (void) batch_used;
-   assert(brw->batch.used - batch_used <= MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4);
+   assert(USED_BATCH(brw->batch) - batch_used <= MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4);
 }
 
 /**
@@ -1386,7 +1386,7 @@
 brw_perf_monitor_new_batch(struct brw_context *brw)
 {
    assert(brw->batch.ring == RENDER_RING);
-   assert(brw->gen < 6 || brw->batch.used == 0);
+   assert(brw->gen < 6 || USED_BATCH(brw->batch) == 0);
 
    if (brw->perfmon.oa_users == 0)
       return;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_pipe_control.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_pipe_control.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_pipe_control.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_pipe_control.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,359 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_reg.h"
+
+/**
+ * According to the latest documentation, any PIPE_CONTROL with the
+ * "Command Streamer Stall" bit set must also have another bit set,
+ * with five different options:
+ *
+ *  - Render Target Cache Flush
+ *  - Depth Cache Flush
+ *  - Stall at Pixel Scoreboard
+ *  - Post-Sync Operation
+ *  - Depth Stall
+ *
+ * I chose "Stall at Pixel Scoreboard" since we've used it effectively
+ * in the past, but the choice is fairly arbitrary.
+ */
+static void
+gen8_add_cs_stall_workaround_bits(uint32_t *flags)
+{
+   uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                      PIPE_CONTROL_WRITE_IMMEDIATE |
+                      PIPE_CONTROL_WRITE_DEPTH_COUNT |
+                      PIPE_CONTROL_WRITE_TIMESTAMP |
+                      PIPE_CONTROL_STALL_AT_SCOREBOARD |
+                      PIPE_CONTROL_DEPTH_STALL;
+
+   /* If we're doing a CS stall, and don't already have one of the
+    * workaround bits set, add "Stall at Pixel Scoreboard."
+    */
+   if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
+      *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
+}
+
+/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
+ *
+ * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
+ *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
+ *
+ * Note that the kernel does CS stalls between batches, so we only need
+ * to count them within a batch.
+ */
+static uint32_t
+gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
+{
+   if (brw->gen == 7 && !brw->is_haswell) {
+      if (flags & PIPE_CONTROL_CS_STALL) {
+         /* If we're doing a CS stall, reset the counter and carry on. */
+         brw->pipe_controls_since_last_cs_stall = 0;
+         return 0;
+      }
+
+      /* If this is the fourth pipe control without a CS stall, do one now. */
+      if (++brw->pipe_controls_since_last_cs_stall == 4) {
+         brw->pipe_controls_since_last_cs_stall = 0;
+         return PIPE_CONTROL_CS_STALL;
+      }
+   }
+   return 0;
+}
+
+/**
+ * Emit a PIPE_CONTROL with various flushing flags.
+ *
+ * The caller is responsible for deciding what flags are appropriate for the
+ * given generation.
+ */
+void
+brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
+{
+   if (brw->gen >= 8) {
+      gen8_add_cs_stall_workaround_bits(&flags);
+
+      BEGIN_BATCH(6);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
+      OUT_BATCH(flags);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else if (brw->gen >= 6) {
+      flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
+
+      BEGIN_BATCH(5);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
+      OUT_BATCH(flags);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+}
+
+/**
+ * Emit a PIPE_CONTROL that writes to a buffer object.
+ *
+ * \p flags should contain one of the following items:
+ *  - PIPE_CONTROL_WRITE_IMMEDIATE
+ *  - PIPE_CONTROL_WRITE_TIMESTAMP
+ *  - PIPE_CONTROL_WRITE_DEPTH_COUNT
+ */
+void
+brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
+                            drm_intel_bo *bo, uint32_t offset,
+                            uint32_t imm_lower, uint32_t imm_upper)
+{
+   if (brw->gen >= 8) {
+      gen8_add_cs_stall_workaround_bits(&flags);
+
+      BEGIN_BATCH(6);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
+      OUT_BATCH(flags);
+      OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  offset);
+      OUT_BATCH(imm_lower);
+      OUT_BATCH(imm_upper);
+      ADVANCE_BATCH();
+   } else if (brw->gen >= 6) {
+      flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
+
+      /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
+       * on later platforms.  We always use PPGTT on Gen7+.
+       */
+      unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
+
+      BEGIN_BATCH(5);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
+      OUT_BATCH(flags);
+      OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                gen6_gtt | offset);
+      OUT_BATCH(imm_lower);
+      OUT_BATCH(imm_upper);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
+      OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
+      OUT_BATCH(imm_lower);
+      OUT_BATCH(imm_upper);
+      ADVANCE_BATCH();
+   }
+}
+
+/**
+ * Restriction [DevSNB, DevIVB]:
+ *
+ * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
+ * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
+ * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
+ * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
+ * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
+ * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
+ * unless SW can otherwise guarantee that the pipeline from WM onwards is
+ * already flushed (e.g., via a preceding MI_FLUSH).
+ */
+void
+brw_emit_depth_stall_flushes(struct brw_context *brw)
+{
+   assert(brw->gen >= 6 && brw->gen <= 9);
+
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
+}
+
+/**
+ * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
+ * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
+ *  stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
+ *  3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
+ *  3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL needs
+ *  to be sent before any combination of VS associated 3DSTATE."
+ */
+void
+gen7_emit_vs_workaround_flush(struct brw_context *brw)
+{
+   assert(brw->gen == 7);
+   brw_emit_pipe_control_write(brw,
+                               PIPE_CONTROL_WRITE_IMMEDIATE
+                               | PIPE_CONTROL_DEPTH_STALL,
+                               brw->workaround_bo, 0,
+                               0, 0);
+}
+
+
+/**
+ * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
+ */
+void
+gen7_emit_cs_stall_flush(struct brw_context *brw)
+{
+   brw_emit_pipe_control_write(brw,
+                               PIPE_CONTROL_CS_STALL
+                               | PIPE_CONTROL_WRITE_IMMEDIATE,
+                               brw->workaround_bo, 0,
+                               0, 0);
+}
+
+
+/**
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6.  From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ *     "1 of the following must also be set:
+ *      - Render Target Cache Flush Enable ([12] of DW1)
+ *      - Depth Cache Flush Enable ([0] of DW1)
+ *      - Stall at Pixel Scoreboard ([1] of DW1)
+ *      - Depth Stall ([13] of DW1)
+ *      - Post-Sync Operation ([13] of DW1)
+ *      - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it.  Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either.  Notify enable is IRQs, which aren't
+ * really our business.  That leaves only stall at scoreboard.
+ */
+void
+brw_emit_post_sync_nonzero_flush(struct brw_context *brw)
+{
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_CS_STALL |
+                               PIPE_CONTROL_STALL_AT_SCOREBOARD);
+
+   brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
+                               brw->workaround_bo, 0, 0, 0);
+}
+
+/* Emit a pipelined flush to either flush render and texture cache for
+ * reading from a FBO-drawn texture, or flush so that frontbuffer
+ * render appears on the screen in DRI1.
+ *
+ * This is also used for the always_flush_cache driconf debug option.
+ */
+void
+brw_emit_mi_flush(struct brw_context *brw)
+{
+   if (brw->batch.ring == BLT_RING && brw->gen >= 6) {
+      BEGIN_BATCH_BLT(4);
+      OUT_BATCH(MI_FLUSH_DW);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
+      if (brw->gen >= 6) {
+         if (brw->gen == 9) {
+            /* Hardware workaround: SKL
+             *
+             * Emit Pipe Control with all bits set to zero before emitting
+             * a Pipe Control with VF Cache Invalidate set.
+             */
+            brw_emit_pipe_control_flush(brw, 0);
+         }
+
+         flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                  PIPE_CONTROL_VF_CACHE_INVALIDATE |
+                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+                  PIPE_CONTROL_CS_STALL;
+
+         if (brw->gen == 6) {
+            /* Hardware workaround: SNB B-Spec says:
+             *
+             * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
+             * Flush Enable =1, a PIPE_CONTROL with any non-zero
+             * post-sync-op is required.
+             */
+            brw_emit_post_sync_nonzero_flush(brw);
+         }
+      }
+      brw_emit_pipe_control_flush(brw, flags);
+   }
+
+   brw_render_cache_set_clear(brw);
+}
+
+int
+brw_init_pipe_control(struct brw_context *brw,
+                      const struct brw_device_info *devinfo)
+{
+   if (devinfo->gen < 6)
+      return 0;
+
+   /* We can't just use brw_state_batch to get a chunk of space for
+    * the gen6 workaround because it involves actually writing to
+    * the buffer, and the kernel doesn't let us write to the batch.
+    */
+   brw->workaround_bo = drm_intel_bo_alloc(brw->bufmgr,
+                                           "pipe_control workaround",
+                                           4096, 4096);
+   if (brw->workaround_bo == NULL)
+      return -ENOMEM;
+
+   brw->pipe_controls_since_last_cs_stall = 0;
+
+   return 0;
+}
+
+void
+brw_fini_pipe_control(struct brw_context *brw)
+{
+   drm_intel_bo_unreference(brw->workaround_bo);
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_primitive_restart.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_primitive_restart.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_primitive_restart.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_primitive_restart.c	2015-09-16 14:36:09.000000000 +0000
@@ -91,7 +91,7 @@
       return false;
    }
 
-   for (int i = 0; i < nr_prims; i++) {
+   for (unsigned i = 0; i < nr_prims; i++) {
       switch (prim[i].mode) {
       case GL_POINTS:
       case GL_LINES:
@@ -161,7 +161,8 @@
       /* Cut index should work for primitive restart, so use it
        */
       brw->prim_restart.enable_cut_index = true;
-      brw_draw_prims(ctx, prims, nr_prims, ib, GL_FALSE, -1, -1, NULL, indirect);
+      brw_draw_prims(ctx, prims, nr_prims, ib, GL_FALSE, -1, -1, NULL, 0,
+                     indirect);
       brw->prim_restart.enable_cut_index = false;
    } else {
       /* Not all the primitive draw modes are supported by the cut index,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_program.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_program.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_program.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_program.c	2015-09-16 14:36:09.000000000 +0000
@@ -88,7 +88,7 @@
 	 return NULL;
    }
 
-   case MESA_GEOMETRY_PROGRAM: {
+   case GL_GEOMETRY_PROGRAM_NV: {
       struct brw_geometry_program *prog = CALLOC_STRUCT(brw_geometry_program);
       if (prog) {
          prog->id = get_new_program_id(brw->intelScreen);
@@ -143,7 +143,7 @@
       brw_add_texrect_params(prog);
 
       if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) {
-         prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT);
+         prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
       }
 
       brw_fs_precompile(ctx, NULL, prog);
@@ -169,7 +169,8 @@
       brw_add_texrect_params(prog);
 
       if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) {
-         prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX);
+         prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
+                                    brw->intelScreen->compiler->scalar_vs);
       }
 
       brw_vs_precompile(ctx, NULL, prog);
@@ -196,7 +197,7 @@
    unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE |
                     PIPE_CONTROL_NO_WRITE |
                     PIPE_CONTROL_CS_STALL);
-   assert(brw->gen >= 7 && brw->gen <= 8);
+   assert(brw->gen >= 7 && brw->gen <= 9);
 
    if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
                    GL_ELEMENT_ARRAY_BARRIER_BIT |
@@ -287,18 +288,24 @@
    functions->MemoryBarrier = brw_memory_barrier;
 }
 
+struct shader_times {
+   uint64_t time;
+   uint64_t written;
+   uint64_t reset;
+};
+
 void
 brw_init_shader_time(struct brw_context *brw)
 {
-   const int max_entries = 4096;
-   brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time",
-                                            max_entries * SHADER_TIME_STRIDE,
-                                            4096);
+   const int max_entries = 2048;
+   brw->shader_time.bo =
+      drm_intel_bo_alloc(brw->bufmgr, "shader time",
+                         max_entries * SHADER_TIME_STRIDE * 3, 4096);
    brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
    brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
                                           max_entries);
-   brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
+   brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
                                                max_entries);
    brw->shader_time.max_entries = max_entries;
 }
@@ -319,27 +326,6 @@
 }
 
 static void
-get_written_and_reset(struct brw_context *brw, int i,
-                      uint64_t *written, uint64_t *reset)
-{
-   enum shader_time_shader_type type = brw->shader_time.types[i];
-   assert(type == ST_VS || type == ST_GS || type == ST_FS8 ||
-          type == ST_FS16 || type == ST_CS);
-
-   /* Find where we recorded written and reset. */
-   int wi, ri;
-
-   for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
-      ;
-
-   for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
-      ;
-
-   *written = brw->shader_time.cumulative[wi];
-   *reset = brw->shader_time.cumulative[ri];
-}
-
-static void
 print_shader_time_line(const char *stage, const char *name,
                        int shader_num, uint64_t time, uint64_t total)
 {
@@ -374,26 +360,13 @@
       sorted[i] = &scaled[i];
 
       switch (type) {
-      case ST_VS_WRITTEN:
-      case ST_VS_RESET:
-      case ST_GS_WRITTEN:
-      case ST_GS_RESET:
-      case ST_FS8_WRITTEN:
-      case ST_FS8_RESET:
-      case ST_FS16_WRITTEN:
-      case ST_FS16_RESET:
-      case ST_CS_WRITTEN:
-      case ST_CS_RESET:
-         /* We'll handle these when along with the time. */
-         scaled[i] = 0;
-         continue;
-
       case ST_VS:
       case ST_GS:
       case ST_FS8:
       case ST_FS16:
       case ST_CS:
-         get_written_and_reset(brw, i, &written, &reset);
+         written = brw->shader_time.cumulative[i].written;
+         reset = brw->shader_time.cumulative[i].reset;
          break;
 
       default:
@@ -405,7 +378,7 @@
          break;
       }
 
-      uint64_t time = brw->shader_time.cumulative[i];
+      uint64_t time = brw->shader_time.cumulative[i].time;
       if (written) {
          scaled[i] = time / written * (written + reset);
       } else {
@@ -491,16 +464,19 @@
     * overhead compared to the cost of tracking the time in the first place.
     */
    drm_intel_bo_map(brw->shader_time.bo, true);
-
-   uint32_t *times = brw->shader_time.bo->virtual;
+   void *bo_map = brw->shader_time.bo->virtual;
 
    for (int i = 0; i < brw->shader_time.num_entries; i++) {
-      brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4];
+      uint32_t *times = bo_map + i * 3 * SHADER_TIME_STRIDE;
+
+      brw->shader_time.cumulative[i].time += times[SHADER_TIME_STRIDE * 0 / 4];
+      brw->shader_time.cumulative[i].written += times[SHADER_TIME_STRIDE * 1 / 4];
+      brw->shader_time.cumulative[i].reset += times[SHADER_TIME_STRIDE * 2 / 4];
    }
 
    /* Zero the BO out to clear it out for our next collection.
     */
-   memset(times, 0, brw->shader_time.bo->size);
+   memset(bo_map, 0, brw->shader_time.bo->size);
    drm_intel_bo_unmap(brw->shader_time.bo);
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_queryobj.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_queryobj.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_queryobj.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_queryobj.c	2015-09-16 14:36:09.000000000 +0000
@@ -66,20 +66,11 @@
 void
 brw_write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
 {
-   uint32_t flags;
-
-   flags = (PIPE_CONTROL_WRITE_DEPTH_COUNT |
-            PIPE_CONTROL_DEPTH_STALL);
-
-   /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
-    * command when loading the values into the predicate source registers for
-    * conditional rendering.
-    */
-   if (brw->predicate.supported)
-      flags |= PIPE_CONTROL_FLUSH_ENABLE;
-
-   brw_emit_pipe_control_write(brw, flags, query_bo,
-                               idx * sizeof(uint64_t), 0, 0);
+   brw_emit_pipe_control_write(brw,
+                               PIPE_CONTROL_WRITE_DEPTH_COUNT |
+                               PIPE_CONTROL_DEPTH_STALL,
+                               query_bo, idx * sizeof(uint64_t),
+                               0, 0);
 }
 
 /**
@@ -497,13 +488,22 @@
    struct brw_context *brw = brw_context(ctx);
    uint64_t result = 0;
 
-   drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &result);
+   switch (brw->intelScreen->hw_has_timestamp) {
+   case 3: /* New kernel, always full 36bit accuracy */
+      drm_intel_reg_read(brw->bufmgr, TIMESTAMP | 1, &result);
+      break;
+   case 2: /* 64bit kernel, result is left-shifted by 32bits, losing 4bits */
+      drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &result);
+      result = result >> 32;
+      break;
+   case 1: /* 32bit kernel, result is 36bit wide but may be inaccurate! */
+      drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &result);
+      break;
+   }
 
    /* See logic in brw_queryobj_get_results() */
-   result = result >> 32;
    result *= 80;
    result &= (1ull << 36) - 1;
-
    return result;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_reg.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_reg.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_reg.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_reg.h	2015-09-16 14:36:09.000000000 +0000
@@ -765,6 +765,22 @@
 }
 
 static inline struct brw_reg
+brw_notification_reg(void)
+{
+   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                  BRW_ARF_NOTIFICATION_COUNT,
+                  0,
+                  0,
+                  0,
+                  BRW_REGISTER_TYPE_UD,
+                  BRW_VERTICAL_STRIDE_0,
+                  BRW_WIDTH_1,
+                  BRW_HORIZONTAL_STRIDE_0,
+                  BRW_SWIZZLE_XXXX,
+                  WRITEMASK_X);
+}
+
+static inline struct brw_reg
 brw_acc_reg(unsigned width)
 {
    return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE,
@@ -837,7 +853,7 @@
 spread(struct brw_reg reg, unsigned s)
 {
    if (s) {
-      assert(is_power_of_two(s));
+      assert(_mesa_is_pow_two(s));
 
       if (reg.hstride)
          reg.hstride += cvt(s) - 1;
@@ -934,6 +950,12 @@
    return reg;
 }
 
+static inline unsigned
+brw_writemask_for_size(unsigned n)
+{
+   return (1 << n) - 1;
+}
+
 static inline struct brw_reg
 negate(struct brw_reg reg)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_sampler_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_sampler_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_sampler_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_sampler_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -425,11 +425,11 @@
 
    /* Enable anisotropic filtering if desired. */
    unsigned max_anisotropy = BRW_ANISORATIO_2;
-   if (sampler->MaxAnisotropy > 1.0) {
+   if (sampler->MaxAnisotropy > 1.0f) {
       min_filter = BRW_MAPFILTER_ANISOTROPIC;
       mag_filter = BRW_MAPFILTER_ANISOTROPIC;
 
-      if (sampler->MaxAnisotropy > 2.0) {
+      if (sampler->MaxAnisotropy > 2.0f) {
 	 max_anisotropy =
             MIN2((sampler->MaxAnisotropy - 2) / 2, BRW_ANISORATIO_16);
       }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -399,10 +399,10 @@
 
 class instruction_scheduler {
 public:
-   instruction_scheduler(backend_visitor *v, int grf_count,
+   instruction_scheduler(backend_shader *s, int grf_count,
                          instruction_scheduler_mode mode)
    {
-      this->bv = v;
+      this->bs = s;
       this->mem_ctx = ralloc_context(NULL);
       this->grf_count = grf_count;
       this->instructions.make_empty();
@@ -455,7 +455,7 @@
    int grf_count;
    int time;
    exec_list instructions;
-   backend_visitor *bv;
+   backend_shader *bs;
 
    instruction_scheduler_mode mode;
 
@@ -606,7 +606,7 @@
 schedule_node::schedule_node(backend_instruction *inst,
                              instruction_scheduler *sched)
 {
-   const struct brw_device_info *devinfo = sched->bv->devinfo;
+   const struct brw_device_info *devinfo = sched->bs->devinfo;
 
    this->inst = inst;
    this->child_array_size = 0;
@@ -1314,8 +1314,8 @@
                 * single-result send is probably actually reducing register
                 * pressure.
                 */
-               if (inst->regs_written <= inst->dst.width / 8 &&
-                   chosen_inst->regs_written > chosen_inst->dst.width / 8) {
+               if (inst->regs_written <= inst->exec_size / 8 &&
+                   chosen_inst->regs_written > chosen_inst->exec_size / 8) {
                   chosen = n;
                   continue;
                } else if (inst->regs_written > chosen_inst->regs_written) {
@@ -1384,7 +1384,7 @@
 void
 instruction_scheduler::schedule_instructions(bblock_t *block)
 {
-   const struct brw_device_info *devinfo = bv->devinfo;
+   const struct brw_device_info *devinfo = bs->devinfo;
    backend_instruction *inst = block->end();
    time = 0;
 
@@ -1419,7 +1419,7 @@
 
       if (debug) {
          fprintf(stderr, "clock %4d, scheduled: ", time);
-         bv->dump_instruction(chosen->inst);
+         bs->dump_instruction(chosen->inst);
       }
 
       /* Now that we've scheduled a new instruction, some of its
@@ -1435,7 +1435,7 @@
 
          if (debug) {
             fprintf(stderr, "\tchild %d, %d parents: ", i, child->parent_count);
-            bv->dump_instruction(child->inst);
+            bs->dump_instruction(child->inst);
          }
 
          child->cand_generation = cand_generation;
@@ -1474,7 +1474,7 @@
    if (debug) {
       fprintf(stderr, "\nInstructions before scheduling (reg_alloc %d)\n",
               post_reg_alloc);
-      bv->dump_instructions();
+      bs->dump_instructions();
    }
 
    /* Populate the remaining GRF uses array to improve the pre-regalloc
@@ -1504,7 +1504,7 @@
    if (debug) {
       fprintf(stderr, "\nInstructions after scheduling (reg_alloc %d)\n",
               post_reg_alloc);
-      bv->dump_instructions();
+      bs->dump_instructions();
    }
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_sf_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_sf_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_sf_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_sf_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -45,13 +45,19 @@
    struct gl_context *ctx = &brw->ctx;
    struct brw_sf_viewport *sfv;
    GLfloat y_scale, y_bias;
-   double scale[3], translate[3];
+   float scale[3], translate[3];
    const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
 
    sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE,
 			 sizeof(*sfv), 32, &brw->sf.vp_offset);
    memset(sfv, 0, sizeof(*sfv));
 
+   /* Accessing the fields Width and Height of gl_framebuffer to produce the
+    * values to program the viewport and scissor is fine as long as the
+    * gl_framebuffer has atleast one attachment.
+    */
+   assert(ctx->DrawBuffer->_HasAttachments);
+
    if (render_to_fbo) {
       y_scale = 1.0;
       y_bias = 0;
@@ -214,7 +220,7 @@
 
    /* _NEW_LINE */
    sf->sf6.line_width =
-      CLAMP(ctx->Line.Width, 1.0, ctx->Const.MaxLineWidth) * (1<<1);
+      CLAMP(ctx->Line.Width, 1.0f, ctx->Const.MaxLineWidth) * (1<<1);
 
    sf->sf6.line_endcap_aa_region_width = 1;
    if (ctx->Line.SmoothFlag)
@@ -253,9 +259,10 @@
 
    /* _NEW_POINT */
    sf->sf7.sprite_point = ctx->Point.PointSprite;
-   sf->sf7.point_size = CLAMP(rint(CLAMP(ctx->Point.Size,
-					 ctx->Point.MinSize,
-					 ctx->Point.MaxSize)), 1, 255) * (1<<3);
+   sf->sf7.point_size = CLAMP(rintf(CLAMP(ctx->Point.Size,
+                                          ctx->Point.MinSize,
+                                          ctx->Point.MaxSize)), 1.0f, 255.0f) *
+                        (1<<3);
    /* _NEW_PROGRAM | _NEW_POINT */
    sf->sf7.use_point_size_state = !(ctx->VertexProgram.PointSizeEnabled ||
 				    ctx->Point._Attenuated);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_shader.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_shader.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_shader.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_shader.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -32,16 +32,116 @@
 #include "glsl/glsl_parser_extras.h"
 #include "main/shaderapi.h"
 
+static void
+shader_debug_log_mesa(void *data, const char *fmt, ...)
+{
+   struct brw_context *brw = (struct brw_context *)data;
+   va_list args;
+
+   va_start(args, fmt);
+   GLuint msg_id = 0;
+   _mesa_gl_vdebug(&brw->ctx, &msg_id,
+                   MESA_DEBUG_SOURCE_SHADER_COMPILER,
+                   MESA_DEBUG_TYPE_OTHER,
+                   MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args);
+   va_end(args);
+}
+
+static void
+shader_perf_log_mesa(void *data, const char *fmt, ...)
+{
+   struct brw_context *brw = (struct brw_context *)data;
+
+   va_list args;
+   va_start(args, fmt);
+
+   if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+      va_list args_copy;
+      va_copy(args_copy, args);
+      vfprintf(stderr, fmt, args_copy);
+      va_end(args_copy);
+   }
+
+   if (brw->perf_debug) {
+      GLuint msg_id = 0;
+      _mesa_gl_vdebug(&brw->ctx, &msg_id,
+                      MESA_DEBUG_SOURCE_SHADER_COMPILER,
+                      MESA_DEBUG_TYPE_PERFORMANCE,
+                      MESA_DEBUG_SEVERITY_MEDIUM, fmt, args);
+   }
+   va_end(args);
+}
+
 struct brw_compiler *
 brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
 {
    struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler);
 
    compiler->devinfo = devinfo;
+   compiler->shader_debug_log = shader_debug_log_mesa;
+   compiler->shader_perf_log = shader_perf_log_mesa;
 
    brw_fs_alloc_reg_sets(compiler);
    brw_vec4_alloc_reg_set(compiler);
 
+   if (devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
+      compiler->scalar_vs = true;
+
+   nir_shader_compiler_options *nir_options =
+      rzalloc(compiler, nir_shader_compiler_options);
+   nir_options->native_integers = true;
+   /* In order to help allow for better CSE at the NIR level we tell NIR
+    * to split all ffma instructions during opt_algebraic and we then
+    * re-combine them as a later step.
+    */
+   nir_options->lower_ffma = true;
+   nir_options->lower_sub = true;
+
+   /* We want the GLSL compiler to emit code that uses condition codes */
+   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+      compiler->glsl_compiler_options[i].MaxUnrollIterations = 32;
+      compiler->glsl_compiler_options[i].MaxIfDepth =
+         devinfo->gen < 6 ? 16 : UINT_MAX;
+
+      compiler->glsl_compiler_options[i].EmitCondCodes = true;
+      compiler->glsl_compiler_options[i].EmitNoNoise = true;
+      compiler->glsl_compiler_options[i].EmitNoMainReturn = true;
+      compiler->glsl_compiler_options[i].EmitNoIndirectInput = true;
+      compiler->glsl_compiler_options[i].EmitNoIndirectOutput =
+	 (i == MESA_SHADER_FRAGMENT);
+      compiler->glsl_compiler_options[i].EmitNoIndirectTemp =
+	 (i == MESA_SHADER_FRAGMENT);
+      compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
+      compiler->glsl_compiler_options[i].LowerClipDistance = true;
+
+      /* !ARB_gpu_shader5 */
+      if (devinfo->gen < 7)
+         compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
+   }
+
+   compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true;
+   compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
+
+   if (compiler->scalar_vs || brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
+      if (compiler->scalar_vs) {
+         /* If we're using the scalar backend for vertex shaders, we need to
+          * configure these accordingly.
+          */
+         compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
+         compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
+         compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false;
+      }
+
+      compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options;
+   }
+
+   if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) {
+      compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].NirOptions = nir_options;
+   }
+
+   compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions = nir_options;
+   compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions = nir_options;
+
    return compiler;
 }
 
@@ -97,7 +197,7 @@
    case MESA_SHADER_FRAGMENT:
       return true;
    case MESA_SHADER_VERTEX:
-      return brw->scalar_vs;
+      return brw->intelScreen->compiler->scalar_vs;
    default:
       return false;
    }
@@ -139,7 +239,8 @@
 }
 
 static void
-process_glsl_ir(struct brw_context *brw,
+process_glsl_ir(gl_shader_stage stage,
+                struct brw_context *brw,
                 struct gl_shader_program *shader_prog,
                 struct gl_shader *shader)
 {
@@ -165,7 +266,9 @@
                       EXP_TO_EXP2 |
                       LOG_TO_LOG2 |
                       bitfield_insert |
-                      LDEXP_TO_ARITH);
+                      LDEXP_TO_ARITH |
+                      CARRY_TO_ARITH |
+                      BORROW_TO_ARITH);
 
    /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
     * if-statements need to be flattened.
@@ -185,15 +288,17 @@
    lower_quadop_vector(shader->ir, false);
 
    bool lowered_variable_indexing =
-      lower_variable_index_to_cond_assign(shader->ir,
+      lower_variable_index_to_cond_assign((gl_shader_stage)stage,
+                                          shader->ir,
                                           options->EmitNoIndirectInput,
                                           options->EmitNoIndirectOutput,
                                           options->EmitNoIndirectTemp,
                                           options->EmitNoIndirectUniform);
 
    if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
-      perf_debug("Unsupported form of variable indexing in FS; falling "
-                 "back to very inefficient code generation\n");
+      perf_debug("Unsupported form of variable indexing in %s; falling "
+                 "back to very inefficient code generation\n",
+                 _mesa_shader_stage_to_abbrev(shader->Stage));
    }
 
    lower_ubo_reference(shader, shader->ir);
@@ -218,7 +323,7 @@
    } while (progress);
 
    if (options->NirOptions != NULL)
-      lower_output_reads(shader->ir);
+      lower_output_reads(stage, shader->ir);
 
    validate_ir_tree(shader->ir);
 
@@ -262,7 +367,7 @@
 
       _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
 
-      process_glsl_ir(brw, shProg, shader);
+      process_glsl_ir((gl_shader_stage) stage, brw, shProg, shader);
 
       /* Make a pass over the IR to add state references for any built-in
        * uniforms that are used.  This has to be done now (during linking).
@@ -297,8 +402,10 @@
 
       brw_add_texrect_params(prog);
 
-      if (options->NirOptions)
-         prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage);
+      if (options->NirOptions) {
+         prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
+                                    is_scalar_shader_stage(brw, stage));
+      }
 
       _mesa_reference_program(ctx, &prog, NULL);
    }
@@ -332,6 +439,7 @@
       return BRW_REGISTER_TYPE_F;
    case GLSL_TYPE_INT:
    case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_SUBROUTINE:
       return BRW_REGISTER_TYPE_D;
    case GLSL_TYPE_UINT:
       return BRW_REGISTER_TYPE_UD;
@@ -437,6 +545,8 @@
       return opcode_descs[op].name;
    case FS_OPCODE_FB_WRITE:
       return "fb_write";
+   case FS_OPCODE_FB_WRITE_LOGICAL:
+      return "fb_write_logical";
    case FS_OPCODE_BLORP_FB_WRITE:
       return "blorp_fb_write";
    case FS_OPCODE_REP_FB_WRITE:
@@ -465,43 +575,80 @@
 
    case SHADER_OPCODE_TEX:
       return "tex";
+   case SHADER_OPCODE_TEX_LOGICAL:
+      return "tex_logical";
    case SHADER_OPCODE_TXD:
       return "txd";
+   case SHADER_OPCODE_TXD_LOGICAL:
+      return "txd_logical";
    case SHADER_OPCODE_TXF:
       return "txf";
+   case SHADER_OPCODE_TXF_LOGICAL:
+      return "txf_logical";
    case SHADER_OPCODE_TXL:
       return "txl";
+   case SHADER_OPCODE_TXL_LOGICAL:
+      return "txl_logical";
    case SHADER_OPCODE_TXS:
       return "txs";
+   case SHADER_OPCODE_TXS_LOGICAL:
+      return "txs_logical";
    case FS_OPCODE_TXB:
       return "txb";
+   case FS_OPCODE_TXB_LOGICAL:
+      return "txb_logical";
    case SHADER_OPCODE_TXF_CMS:
       return "txf_cms";
+   case SHADER_OPCODE_TXF_CMS_LOGICAL:
+      return "txf_cms_logical";
    case SHADER_OPCODE_TXF_UMS:
       return "txf_ums";
+   case SHADER_OPCODE_TXF_UMS_LOGICAL:
+      return "txf_ums_logical";
    case SHADER_OPCODE_TXF_MCS:
       return "txf_mcs";
+   case SHADER_OPCODE_TXF_MCS_LOGICAL:
+      return "txf_mcs_logical";
    case SHADER_OPCODE_LOD:
       return "lod";
+   case SHADER_OPCODE_LOD_LOGICAL:
+      return "lod_logical";
    case SHADER_OPCODE_TG4:
       return "tg4";
+   case SHADER_OPCODE_TG4_LOGICAL:
+      return "tg4_logical";
    case SHADER_OPCODE_TG4_OFFSET:
       return "tg4_offset";
+   case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
+      return "tg4_offset_logical";
+
    case SHADER_OPCODE_SHADER_TIME_ADD:
       return "shader_time_add";
 
    case SHADER_OPCODE_UNTYPED_ATOMIC:
       return "untyped_atomic";
+   case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+      return "untyped_atomic_logical";
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
       return "untyped_surface_read";
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+      return "untyped_surface_read_logical";
    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
       return "untyped_surface_write";
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+      return "untyped_surface_write_logical";
    case SHADER_OPCODE_TYPED_ATOMIC:
       return "typed_atomic";
+   case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+      return "typed_atomic_logical";
    case SHADER_OPCODE_TYPED_SURFACE_READ:
       return "typed_surface_read";
+   case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+      return "typed_surface_read_logical";
    case SHADER_OPCODE_TYPED_SURFACE_WRITE:
       return "typed_surface_write";
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+      return "typed_surface_write_logical";
    case SHADER_OPCODE_MEMORY_FENCE:
       return "memory_fence";
 
@@ -562,8 +709,6 @@
    case FS_OPCODE_DISCARD_JUMP:
       return "discard_jump";
 
-   case FS_OPCODE_SET_OMASK:
-      return "set_omask";
    case FS_OPCODE_SET_SAMPLE_ID:
       return "set_sample_id";
    case FS_OPCODE_SET_SIMD4X2_OFFSET:
@@ -631,6 +776,10 @@
       return "gs_ff_sync_set_primitives";
    case CS_OPCODE_CS_TERMINATE:
       return "cs_terminate";
+   case SHADER_OPCODE_BARRIER:
+      return "barrier";
+   case SHADER_OPCODE_MULH:
+      return "mulh";
    }
 
    unreachable("not reached");
@@ -754,19 +903,22 @@
    return false;
 }
 
-backend_visitor::backend_visitor(struct brw_context *brw,
-                                 struct gl_shader_program *shader_prog,
-                                 struct gl_program *prog,
-                                 struct brw_stage_prog_data *stage_prog_data,
-                                 gl_shader_stage stage)
-   : brw(brw),
-     devinfo(brw->intelScreen->devinfo),
-     ctx(&brw->ctx),
+backend_shader::backend_shader(const struct brw_compiler *compiler,
+                               void *log_data,
+                               void *mem_ctx,
+                               struct gl_shader_program *shader_prog,
+                               struct gl_program *prog,
+                               struct brw_stage_prog_data *stage_prog_data,
+                               gl_shader_stage stage)
+   : compiler(compiler),
+     log_data(log_data),
+     devinfo(compiler->devinfo),
      shader(shader_prog ?
         (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL),
      shader_prog(shader_prog),
      prog(prog),
      stage_prog_data(stage_prog_data),
+     mem_ctx(mem_ctx),
      cfg(NULL),
      stage(stage)
 {
@@ -846,6 +998,7 @@
    case BRW_OPCODE_XOR:
    case BRW_OPCODE_ADD:
    case BRW_OPCODE_MUL:
+   case SHADER_OPCODE_MULH:
       return true;
    case BRW_OPCODE_SEL:
       /* MIN and MAX are commutative. */
@@ -949,11 +1102,11 @@
    case BRW_OPCODE_LINE:
    case BRW_OPCODE_LRP:
    case BRW_OPCODE_MAC:
-   case BRW_OPCODE_MACH:
    case BRW_OPCODE_MAD:
    case BRW_OPCODE_MATH:
    case BRW_OPCODE_MOV:
    case BRW_OPCODE_MUL:
+   case SHADER_OPCODE_MULH:
    case BRW_OPCODE_PLN:
    case BRW_OPCODE_RNDD:
    case BRW_OPCODE_RNDE:
@@ -1052,13 +1205,18 @@
 {
    switch (opcode) {
    case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
    case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
    case SHADER_OPCODE_TYPED_ATOMIC:
+   case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
    case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
    case SHADER_OPCODE_MEMORY_FENCE:
    case SHADER_OPCODE_URB_WRITE_SIMD8:
    case FS_OPCODE_FB_WRITE:
+   case SHADER_OPCODE_BARRIER:
       return true;
    default:
       return false;
@@ -1147,13 +1305,13 @@
 }
 
 void
-backend_visitor::dump_instructions()
+backend_shader::dump_instructions()
 {
    dump_instructions(NULL);
 }
 
 void
-backend_visitor::dump_instructions(const char *name)
+backend_shader::dump_instructions(const char *name)
 {
    FILE *file = stderr;
    if (name && geteuid() != 0) {
@@ -1182,7 +1340,7 @@
 }
 
 void
-backend_visitor::calculate_cfg()
+backend_shader::calculate_cfg()
 {
    if (this->cfg)
       return;
@@ -1190,7 +1348,7 @@
 }
 
 void
-backend_visitor::invalidate_cfg()
+backend_shader::invalidate_cfg()
 {
    ralloc_free(this->cfg);
    this->cfg = NULL;
@@ -1205,7 +1363,7 @@
  * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
  */
 void
-backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table_offset)
+backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_offset)
 {
    int num_textures = _mesa_fls(prog->SamplersUsed);
 
@@ -1260,3 +1418,34 @@
 
    /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
 }
+
+void
+backend_shader::setup_image_uniform_values(const gl_uniform_storage *storage)
+{
+   const unsigned stage = _mesa_program_enum_to_shader_stage(prog->Target);
+
+   for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) {
+      const unsigned image_idx = storage->image[stage].index + i;
+      const brw_image_param *param = &stage_prog_data->image_param[image_idx];
+
+      /* Upload the brw_image_param structure.  The order is expected to match
+       * the BRW_IMAGE_PARAM_*_OFFSET defines.
+       */
+      setup_vector_uniform_values(
+         (const gl_constant_value *)&param->surface_idx, 1);
+      setup_vector_uniform_values(
+         (const gl_constant_value *)param->offset, 2);
+      setup_vector_uniform_values(
+         (const gl_constant_value *)param->size, 3);
+      setup_vector_uniform_values(
+         (const gl_constant_value *)param->stride, 4);
+      setup_vector_uniform_values(
+         (const gl_constant_value *)param->tiling, 3);
+      setup_vector_uniform_values(
+         (const gl_constant_value *)param->swizzling, 2);
+
+      brw_mark_surface_used(
+         stage_prog_data,
+         stage_prog_data->binding_table.image_start + image_idx);
+   }
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_shader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_shader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_shader.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_shader.h	2015-09-16 14:36:09.000000000 +0000
@@ -26,6 +26,7 @@
 #include "brw_defines.h"
 #include "main/compiler.h"
 #include "glsl/ir.h"
+#include "program/prog_parameter.h"
 
 #ifdef __cplusplus
 #include "brw_ir_allocator.h"
@@ -86,6 +87,12 @@
        */
       int aligned_pairs_class;
    } fs_reg_sets[2];
+
+   void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
+   void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
+
+   bool scalar_vs;
+   struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
 };
 
 enum PACKED register_file {
@@ -211,20 +218,23 @@
    SCHEDULE_POST,
 };
 
-class backend_visitor : public ir_visitor {
+class backend_shader {
 protected:
 
-   backend_visitor(struct brw_context *brw,
-                   struct gl_shader_program *shader_prog,
-                   struct gl_program *prog,
-                   struct brw_stage_prog_data *stage_prog_data,
-                   gl_shader_stage stage);
+   backend_shader(const struct brw_compiler *compiler,
+                  void *log_data,
+                  void *mem_ctx,
+                  struct gl_shader_program *shader_prog,
+                  struct gl_program *prog,
+                  struct brw_stage_prog_data *stage_prog_data,
+                  gl_shader_stage stage);
 
 public:
 
-   struct brw_context * const brw;
+   const struct brw_compiler *compiler;
+   void *log_data; /* Passed to compiler->*_log functions */
+
    const struct brw_device_info * const devinfo;
-   struct gl_context * const ctx;
    struct brw_shader * const shader;
    struct gl_shader_program * const shader_prog;
    struct gl_program * const prog;
@@ -259,6 +269,10 @@
    void assign_common_binding_table_offsets(uint32_t next_binding_table_offset);
 
    virtual void invalidate_live_intervals() = 0;
+
+   virtual void setup_vector_uniform_values(const gl_constant_value *values,
+                                            unsigned n) = 0;
+   void setup_image_uniform_values(const gl_uniform_storage *storage);
 };
 
 uint32_t brw_texture_offset(int *offsets, unsigned num_components);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_state_batch.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_state_batch.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_state_batch.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_state_batch.c	2015-09-16 14:36:09.000000000 +0000
@@ -87,7 +87,7 @@
    drm_intel_aub_annotation annotations[annotation_count];
    int a = 0;
    make_annotation(&annotations[a++], AUB_TRACE_TYPE_BATCH, 0,
-                   4*brw->batch.used);
+                   4 * USED_BATCH(brw->batch));
    for (int i = brw->state_batch_count; i-- > 0; ) {
       uint32_t type = brw->state_batch_list[i].type;
       uint32_t start_offset = brw->state_batch_list[i].offset;
@@ -136,7 +136,7 @@
     * space, then flush and try again.
     */
    if (batch->state_batch_offset < size ||
-       offset < 4*batch->used + batch->reserved_space) {
+       offset < 4 * USED_BATCH(*batch) + batch->reserved_space) {
       intel_batchbuffer_flush(brw);
       offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_state_cache.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_state_cache.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_state_cache.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_state_cache.c	2015-09-16 14:36:09.000000000 +0000
@@ -200,36 +200,23 @@
 }
 
 /**
- * Attempts to find an item in the cache with identical data and aux
- * data to use
+ * Attempts to find an item in the cache with identical data.
  */
-static bool
-brw_try_upload_using_copy(struct brw_cache *cache,
-			  struct brw_cache_item *result_item,
-			  const void *data,
-			  const void *aux)
+static const struct brw_cache_item *
+brw_lookup_prog(const struct brw_cache *cache,
+                enum brw_cache_id cache_id,
+                const void *data, unsigned data_size)
 {
-   struct brw_context *brw = cache->brw;
-   int i;
-   struct brw_cache_item *item;
+   const struct brw_context *brw = cache->brw;
+   unsigned i;
+   const struct brw_cache_item *item;
 
    for (i = 0; i < cache->size; i++) {
       for (item = cache->items[i]; item; item = item->next) {
-	 const void *item_aux = item->key + item->key_size;
 	 int ret;
 
-	 if (item->cache_id != result_item->cache_id ||
-	     item->size != result_item->size ||
-	     item->aux_size != result_item->aux_size) {
-	    continue;
-	 }
-
-         if (cache->aux_compare[result_item->cache_id]) {
-            if (!cache->aux_compare[result_item->cache_id](item_aux, aux))
-               continue;
-         } else if (memcmp(item_aux, aux, item->aux_size) != 0) {
+	 if (item->cache_id != cache_id || item->size != data_size)
 	    continue;
-	 }
 
          if (!brw->has_llc)
             drm_intel_bo_map(cache->bo, false);
@@ -239,27 +226,24 @@
 	 if (ret)
 	    continue;
 
-	 result_item->offset = item->offset;
-
-	 return true;
+	 return item;
       }
    }
 
-   return false;
+   return NULL;
 }
 
-static void
-brw_upload_item_data(struct brw_cache *cache,
-		     struct brw_cache_item *item,
-		     const void *data)
+static uint32_t
+brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
 {
+   uint32_t offset;
    struct brw_context *brw = cache->brw;
 
    /* Allocate space in the cache BO for our new program. */
-   if (cache->next_offset + item->size > cache->bo->size) {
+   if (cache->next_offset + size > cache->bo->size) {
       uint32_t new_size = cache->bo->size * 2;
 
-      while (cache->next_offset + item->size > new_size)
+      while (cache->next_offset + size > new_size)
 	 new_size *= 2;
 
       brw_cache_new_bo(cache, new_size);
@@ -273,10 +257,12 @@
       brw_cache_new_bo(cache, cache->bo->size);
    }
 
-   item->offset = cache->next_offset;
+   offset = cache->next_offset;
 
    /* Programs are always 64-byte aligned, so set up the next one now */
-   cache->next_offset = ALIGN(item->offset + item->size, 64);
+   cache->next_offset = ALIGN(offset + size, 64);
+
+   return offset;
 }
 
 void
@@ -293,6 +279,8 @@
 {
    struct brw_context *brw = cache->brw;
    struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+   const struct brw_cache_item *matching_data =
+      brw_lookup_prog(cache, cache_id, data, data_size);
    GLuint hash;
    void *tmp;
 
@@ -304,15 +292,23 @@
    hash = hash_key(item);
    item->hash = hash;
 
-   /* If we can find a matching prog/prog_data combo in the cache
-    * already, then reuse the existing stuff.  This will mean not
-    * flagging CACHE_NEW_* when transitioning between the two
-    * equivalent hash keys.  This is notably useful for programs
-    * generating shaders at runtime, where multiple shaders may
-    * compile to the thing in our backend.
+   /* If we can find a matching prog in the cache already, then reuse the
+    * existing stuff without creating new copy into the underlying buffer
+    * object. This is notably useful for programs generating shaders at
+    * runtime, where multiple shaders may compile to the same thing in our
+    * backend.
     */
-   if (!brw_try_upload_using_copy(cache, item, data, aux)) {
-      brw_upload_item_data(cache, item, data);
+   if (matching_data) {
+      item->offset = matching_data->offset;
+   } else {
+      item->offset = brw_alloc_item_data(cache, data_size);
+
+      /* Copy data to the buffer */
+      if (brw->has_llc) {
+         memcpy((char *)cache->bo->virtual + item->offset, data, data_size);
+      } else {
+         drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
+      }
    }
 
    /* Set up the memory containing the key and aux_data */
@@ -323,7 +319,7 @@
 
    item->key = tmp;
 
-   if (cache->n_items > cache->size * 1.5)
+   if (cache->n_items > cache->size * 1.5f)
       rehash(cache);
 
    hash %= cache->size;
@@ -331,13 +327,6 @@
    cache->items[hash] = item;
    cache->n_items++;
 
-   /* Copy data to the buffer */
-   if (brw->has_llc) {
-      memcpy((char *) cache->bo->virtual + item->offset, data, data_size);
-   } else {
-      drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
-   }
-
    *out_offset = item->offset;
    *(void **)out_aux = (void *)((char *)item->key + item->key_size);
    cache->brw->ctx.NewDriverState |= 1 << cache_id;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_state.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_state.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_state.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_state.h	2015-09-16 14:36:09.000000000 +0000
@@ -72,8 +72,10 @@
 extern const struct brw_tracked_state brw_gs_samplers;
 extern const struct brw_tracked_state brw_vs_ubo_surfaces;
 extern const struct brw_tracked_state brw_vs_abo_surfaces;
+extern const struct brw_tracked_state brw_vs_image_surfaces;
 extern const struct brw_tracked_state brw_gs_ubo_surfaces;
 extern const struct brw_tracked_state brw_gs_abo_surfaces;
+extern const struct brw_tracked_state brw_gs_image_surfaces;
 extern const struct brw_tracked_state brw_vs_unit;
 extern const struct brw_tracked_state brw_gs_prog;
 extern const struct brw_tracked_state brw_wm_prog;
@@ -84,7 +86,9 @@
 extern const struct brw_tracked_state brw_vs_binding_table;
 extern const struct brw_tracked_state brw_wm_ubo_surfaces;
 extern const struct brw_tracked_state brw_wm_abo_surfaces;
+extern const struct brw_tracked_state brw_wm_image_surfaces;
 extern const struct brw_tracked_state brw_cs_abo_surfaces;
+extern const struct brw_tracked_state brw_cs_image_surfaces;
 extern const struct brw_tracked_state brw_wm_unit;
 extern const struct brw_tracked_state brw_interpolation_map;
 
@@ -121,7 +125,6 @@
 extern const struct brw_tracked_state gen7_depthbuffer;
 extern const struct brw_tracked_state gen7_clip_state;
 extern const struct brw_tracked_state gen7_disable_stages;
-extern const struct brw_tracked_state gen7_gs_push_constants;
 extern const struct brw_tracked_state gen7_gs_state;
 extern const struct brw_tracked_state gen7_ps_state;
 extern const struct brw_tracked_state gen7_push_constant_space;
@@ -132,6 +135,7 @@
 extern const struct brw_tracked_state gen7_urb;
 extern const struct brw_tracked_state gen7_vs_state;
 extern const struct brw_tracked_state gen7_wm_state;
+extern const struct brw_tracked_state gen7_hw_binding_tables;
 extern const struct brw_tracked_state haswell_cut_index;
 extern const struct brw_tracked_state gen8_blend_state;
 extern const struct brw_tracked_state gen8_disable_stages;
@@ -266,15 +270,6 @@
                                       uint32_t render_target_start,
                                       uint32_t *surf_offset);
 
-/* gen7_wm_state.c */
-void
-gen7_upload_ps_state(struct brw_context *brw,
-                     const struct gl_fragment_program *fp,
-                     const struct brw_stage_state *stage_state,
-                     const struct brw_wm_prog_data *prog_data,
-                     bool enable_dual_src_blend, unsigned sample_mask,
-                     unsigned fast_clear_op);
-
 /* gen7_wm_surface_state.c */
 uint32_t gen7_surface_tiling_mode(uint32_t tiling);
 uint32_t gen7_surface_msaa_bits(unsigned num_samples, enum intel_msaa_layout l);
@@ -372,6 +367,20 @@
                            const struct brw_stage_state *stage_state,
                            bool active, unsigned opcode);
 
+void gen7_rs_control(struct brw_context *brw, int enable);
+
+void gen7_edit_hw_binding_table_entry(struct brw_context *brw,
+                                      gl_shader_stage stage,
+                                      uint32_t index,
+                                      uint32_t surf_offset);
+void gen7_update_binding_table_from_array(struct brw_context *brw,
+                                          gl_shader_stage stage,
+                                          const uint32_t* binding_table,
+                                          int num_surfaces);
+void gen7_enable_hw_binding_tables(struct brw_context *brw);
+void gen7_disable_hw_binding_tables(struct brw_context *brw);
+void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw);
+
 #ifdef __cplusplus
 }
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_state_upload.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_state_upload.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_state_upload.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_state_upload.c	2015-09-16 14:36:09.000000000 +0000
@@ -41,6 +41,7 @@
 #include "brw_gs.h"
 #include "brw_wm.h"
 #include "brw_cs.h"
+#include "main/framebuffer.h"
 
 static const struct brw_tracked_state *gen4_atoms[] =
 {
@@ -191,6 +192,12 @@
    &gen6_color_calc_state,	/* must do before cc unit */
    &gen6_depth_stencil_state,	/* must do before cc unit */
 
+   &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
+
+   &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
+   &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
+   &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
+
    &gen6_vs_push_constants, /* Before vs_state */
    &gen6_gs_push_constants, /* Before gs_state */
    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
@@ -250,6 +257,7 @@
 static const struct brw_tracked_state *gen7_compute_atoms[] =
 {
    &brw_state_base_address,
+   &brw_cs_image_surfaces,
    &brw_cs_abo_surfaces,
    &brw_cs_state,
 };
@@ -267,6 +275,12 @@
    &gen8_blend_state,
    &gen6_color_calc_state,
 
+   &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
+
+   &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
+   &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
+   &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
+
    &gen6_vs_push_constants, /* Before vs_state */
    &gen6_gs_push_constants, /* Before gs_state */
    &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
@@ -333,6 +347,7 @@
 static const struct brw_tracked_state *gen8_compute_atoms[] =
 {
    &gen8_state_base_address,
+   &brw_cs_image_surfaces,
    &brw_cs_abo_surfaces,
    &brw_cs_state,
 };
@@ -348,7 +363,7 @@
       return;
 
    if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
+      brw_emit_post_sync_nonzero_flush(brw);
 
    brw_upload_invariant_state(brw);
 
@@ -467,6 +482,7 @@
    ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
    ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
    ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
+   ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
 }
 
 
@@ -580,6 +596,7 @@
    DEFINE_BIT(BRW_NEW_STATS_WM),
    DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
    DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
+   DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
    DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
    DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
    DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
@@ -660,6 +677,7 @@
    int i;
    static int dirty_count = 0;
    struct brw_state_flags state = brw->state.pipelines[pipeline];
+   unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
 
    brw_select_pipeline(brw, pipeline);
 
@@ -696,8 +714,8 @@
       brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
    }
 
-   if (brw->num_samples != ctx->DrawBuffer->Visual.samples) {
-      brw->num_samples = ctx->DrawBuffer->Visual.samples;
+   if (brw->num_samples != fb_samples) {
+      brw->num_samples = fb_samples;
       brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
    }
 
@@ -708,7 +726,7 @@
 
    /* Emit Sandybridge workaround flushes on every primitive, for safety. */
    if (brw->gen == 6)
-      intel_emit_post_sync_nonzero_flush(brw);
+      brw_emit_post_sync_nonzero_flush(brw);
 
    brw_upload_programs(brw, pipeline);
    merge_ctx_state(brw, &state);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_surface_formats.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_surface_formats.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_surface_formats.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_surface_formats.c	2015-09-16 14:36:09.000000000 +0000
@@ -813,3 +813,112 @@
       unreachable("Unexpected depth format.");
    }
 }
+
+mesa_format
+brw_lower_mesa_image_format(const struct brw_device_info *devinfo,
+                            mesa_format format)
+{
+   switch (format) {
+   /* These are never lowered.  Up to BDW we'll have to fall back to untyped
+    * surface access for 128bpp formats.
+    */
+   case MESA_FORMAT_RGBA_UINT32:
+   case MESA_FORMAT_RGBA_SINT32:
+   case MESA_FORMAT_RGBA_FLOAT32:
+   case MESA_FORMAT_R_UINT32:
+   case MESA_FORMAT_R_SINT32:
+   case MESA_FORMAT_R_FLOAT32:
+      return format;
+
+   /* From HSW to BDW the only 64bpp format supported for typed access is
+    * RGBA_UINT16.  IVB falls back to untyped.
+    */
+   case MESA_FORMAT_RGBA_UINT16:
+   case MESA_FORMAT_RGBA_SINT16:
+   case MESA_FORMAT_RGBA_FLOAT16:
+   case MESA_FORMAT_RG_UINT32:
+   case MESA_FORMAT_RG_SINT32:
+   case MESA_FORMAT_RG_FLOAT32:
+      return (devinfo->gen >= 9 ? format :
+              devinfo->gen >= 8 || devinfo->is_haswell ?
+              MESA_FORMAT_RGBA_UINT16 : MESA_FORMAT_RG_UINT32);
+
+   /* Up to BDW no SINT or FLOAT formats of less than 32 bits per component
+    * are supported.  IVB doesn't support formats with more than one component
+    * for typed access.  For 8 and 16 bpp formats IVB relies on the
+    * undocumented behavior that typed reads from R_UINT8 and R_UINT16
+    * surfaces actually do a 32-bit misaligned read.  The alternative would be
+    * to use two surface state entries with different formats for each image,
+    * one for reading (using R_UINT32) and another one for writing (using
+    * R_UINT8 or R_UINT16), but that would complicate the shaders we generate
+    * even more.
+    */
+   case MESA_FORMAT_RGBA_UINT8:
+   case MESA_FORMAT_RGBA_SINT8:
+      return (devinfo->gen >= 9 ? format :
+              devinfo->gen >= 8 || devinfo->is_haswell ?
+              MESA_FORMAT_RGBA_UINT8 : MESA_FORMAT_R_UINT32);
+
+   case MESA_FORMAT_RG_UINT16:
+   case MESA_FORMAT_RG_SINT16:
+   case MESA_FORMAT_RG_FLOAT16:
+      return (devinfo->gen >= 9 ? format :
+              devinfo->gen >= 8 || devinfo->is_haswell ?
+              MESA_FORMAT_RG_UINT16 : MESA_FORMAT_R_UINT32);
+
+   case MESA_FORMAT_RG_UINT8:
+   case MESA_FORMAT_RG_SINT8:
+      return (devinfo->gen >= 9 ? format :
+              devinfo->gen >= 8 || devinfo->is_haswell ?
+              MESA_FORMAT_RG_UINT8 : MESA_FORMAT_R_UINT16);
+
+   case MESA_FORMAT_R_UINT16:
+   case MESA_FORMAT_R_FLOAT16:
+   case MESA_FORMAT_R_SINT16:
+      return (devinfo->gen >= 9 ? format : MESA_FORMAT_R_UINT16);
+
+   case MESA_FORMAT_R_UINT8:
+   case MESA_FORMAT_R_SINT8:
+      return (devinfo->gen >= 9 ? format : MESA_FORMAT_R_UINT8);
+
+   /* Neither the 2/10/10/10 nor the 11/11/10 packed formats are supported
+    * by the hardware.
+    */
+   case MESA_FORMAT_R10G10B10A2_UINT:
+   case MESA_FORMAT_R10G10B10A2_UNORM:
+   case MESA_FORMAT_R11G11B10_FLOAT:
+      return MESA_FORMAT_R_UINT32;
+
+   /* No normalized fixed-point formats are supported by the hardware. */
+   case MESA_FORMAT_RGBA_UNORM16:
+   case MESA_FORMAT_RGBA_SNORM16:
+      return (devinfo->gen >= 8 || devinfo->is_haswell ?
+              MESA_FORMAT_RGBA_UINT16 : MESA_FORMAT_RG_UINT32);
+
+   case MESA_FORMAT_R8G8B8A8_UNORM:
+   case MESA_FORMAT_R8G8B8A8_SNORM:
+      return (devinfo->gen >= 8 || devinfo->is_haswell ?
+              MESA_FORMAT_RGBA_UINT8 : MESA_FORMAT_R_UINT32);
+
+   case MESA_FORMAT_R16G16_UNORM:
+   case MESA_FORMAT_R16G16_SNORM:
+      return (devinfo->gen >= 8 || devinfo->is_haswell ?
+              MESA_FORMAT_RG_UINT16 : MESA_FORMAT_R_UINT32);
+
+   case MESA_FORMAT_R8G8_UNORM:
+   case MESA_FORMAT_R8G8_SNORM:
+      return (devinfo->gen >= 8 || devinfo->is_haswell ?
+              MESA_FORMAT_RG_UINT8 : MESA_FORMAT_R_UINT16);
+
+   case MESA_FORMAT_R_UNORM16:
+   case MESA_FORMAT_R_SNORM16:
+      return MESA_FORMAT_R_UINT16;
+
+   case MESA_FORMAT_R_UNORM8:
+   case MESA_FORMAT_R_SNORM8:
+      return MESA_FORMAT_R_UINT8;
+
+   default:
+      unreachable("Unknown image format");
+   }
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_tex_layout.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_tex_layout.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_tex_layout.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_tex_layout.c	2015-09-16 14:36:09.000000000 +0000
@@ -40,9 +40,88 @@
 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
 
 static unsigned int
+tr_mode_horizontal_texture_alignment(const struct brw_context *brw,
+                                     const struct intel_mipmap_tree *mt)
+{
+   const unsigned *align_yf, *align_ys;
+   const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8;
+   unsigned ret_align, divisor;
+
+   /* Horizontal alignment tables for TRMODE_{YF,YS}. Value in below
+    * tables specifies the horizontal alignment requirement in elements
+    * for the surface. An element is defined as a pixel in uncompressed
+    * surface formats, and as a compression block in compressed surface
+    * formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
+    * element is a sample.
+    */
+   const unsigned align_1d_yf[] = {4096, 2048, 1024, 512, 256};
+   const unsigned align_1d_ys[] = {65536, 32768, 16384, 8192, 4096};
+   const unsigned align_2d_yf[] = {64, 64, 32, 32, 16};
+   const unsigned align_2d_ys[] = {256, 256, 128, 128, 64};
+   const unsigned align_3d_yf[] = {16, 8, 8, 8, 4};
+   const unsigned align_3d_ys[] = {64, 32, 32, 32, 16};
+   int i = 0;
+
+   /* Alignment computations below assume bpp >= 8 and a power of 2. */
+   assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp));
+
+   switch(mt->target) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_1D_ARRAY:
+      align_yf = align_1d_yf;
+      align_ys = align_1d_ys;
+      break;
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_RECTANGLE:
+   case GL_TEXTURE_2D_ARRAY:
+   case GL_TEXTURE_CUBE_MAP:
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_2D_MULTISAMPLE:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+      align_yf = align_2d_yf;
+      align_ys = align_2d_ys;
+      break;
+   case GL_TEXTURE_3D:
+      align_yf = align_3d_yf;
+      align_ys = align_3d_ys;
+      break;
+   default:
+      unreachable("not reached");
+   }
+
+   /* Compute array index. */
+   i = ffs(bpp/8) - 1;
+
+   ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ?
+               align_yf[i] : align_ys[i];
+
+   assert(_mesa_is_pow_two(mt->num_samples));
+
+   switch (mt->num_samples) {
+   case 2:
+   case 4:
+      divisor = 2;
+      break;
+   case 8:
+   case 16:
+      divisor = 4;
+      break;
+   default:
+      divisor = 1;
+      break;
+   }
+   return ret_align / divisor;
+}
+
+
+static unsigned int
 intel_horizontal_texture_alignment_unit(struct brw_context *brw,
-                                        struct intel_mipmap_tree *mt)
+                                        struct intel_mipmap_tree *mt,
+                                        uint32_t layout_flags)
 {
+   if (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16)
+      return 16;
+
    /**
     * From the "Alignment Unit Size" section of various specs, namely:
     * - Gen3 Spec: "Memory Data Formats" Volume,         Section 1.20.1.4
@@ -88,18 +167,85 @@
    if (mt->format == MESA_FORMAT_S_UINT8)
       return 8;
 
+   if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
+      uint32_t align = tr_mode_horizontal_texture_alignment(brw, mt);
+      /* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32. */
+      return align < 32 ? 32 : align;
+   }
+
    if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16)
       return 8;
 
-   if (brw->gen == 8 && mt->mcs_mt && mt->num_samples <= 1)
-      return 16;
-
    return 4;
 }
 
 static unsigned int
+tr_mode_vertical_texture_alignment(const struct brw_context *brw,
+                                   const struct intel_mipmap_tree *mt)
+{
+   const unsigned *align_yf, *align_ys;
+   const unsigned bpp = _mesa_get_format_bytes(mt->format) * 8;
+   unsigned ret_align, divisor;
+
+   /* Vertical alignment tables for TRMODE_YF and TRMODE_YS. */
+   const unsigned align_2d_yf[] = {64, 32, 32, 16, 16};
+   const unsigned align_2d_ys[] = {256, 128, 128, 64, 64};
+   const unsigned align_3d_yf[] = {16, 16, 16, 8, 8};
+   const unsigned align_3d_ys[] = {32, 32, 32, 16, 16};
+   int i = 0;
+
+   assert(brw->gen >= 9 &&
+          mt->target != GL_TEXTURE_1D &&
+          mt->target != GL_TEXTURE_1D_ARRAY);
+
+   /* Alignment computations below assume bpp >= 8 and a power of 2. */
+   assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp)) ;
+
+   switch(mt->target) {
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_RECTANGLE:
+   case GL_TEXTURE_2D_ARRAY:
+   case GL_TEXTURE_CUBE_MAP:
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_2D_MULTISAMPLE:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+      align_yf = align_2d_yf;
+      align_ys = align_2d_ys;
+      break;
+   case GL_TEXTURE_3D:
+      align_yf = align_3d_yf;
+      align_ys = align_3d_ys;
+      break;
+   default:
+      unreachable("not reached");
+   }
+
+   /* Compute array index. */
+   i = ffs(bpp / 8) - 1;
+
+   ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ?
+               align_yf[i] : align_ys[i];
+
+   assert(_mesa_is_pow_two(mt->num_samples));
+
+   switch (mt->num_samples) {
+   case 4:
+   case 8:
+      divisor = 2;
+      break;
+   case 16:
+      divisor = 4;
+      break;
+   default:
+      divisor = 1;
+      break;
+   }
+   return ret_align / divisor;
+}
+
+static unsigned int
 intel_vertical_texture_alignment_unit(struct brw_context *brw,
-                                      mesa_format format, bool multisampled)
+                                      const struct intel_mipmap_tree *mt)
 {
    /**
     * From the "Alignment Unit Size" section of various specs, namely:
@@ -124,23 +270,29 @@
     * Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting of
     * the SURFACE_STATE "Surface Vertical Alignment" field.
     */
-   if (_mesa_is_format_compressed(format))
+   if (_mesa_is_format_compressed(mt->format))
       /* See comment above for the horizontal alignment */
       return brw->gen >= 9 ? 16 : 4;
 
-   if (format == MESA_FORMAT_S_UINT8)
+   if (mt->format == MESA_FORMAT_S_UINT8)
       return brw->gen >= 7 ? 8 : 4;
 
+   if (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
+      uint32_t align = tr_mode_vertical_texture_alignment(brw, mt);
+      /* XY_FAST_COPY_BLT doesn't support vertical alignment < 64 */
+      return align < 64 ? 64 : align;
+   }
+
    /* Broadwell only supports VALIGN of 4, 8, and 16.  The BSpec says 4
     * should always be used, except for stencil buffers, which should be 8.
     */
    if (brw->gen >= 8)
       return 4;
 
-   if (multisampled)
+   if (mt->num_samples > 1)
       return 4;
 
-   GLenum base_format = _mesa_get_format_base_format(format);
+   GLenum base_format = _mesa_get_format_base_format(mt->format);
 
    if (brw->gen >= 6 &&
        (base_format == GL_DEPTH_COMPONENT ||
@@ -161,7 +313,7 @@
        *
        *     VALIGN_4 is not supported for surface format R32G32B32_FLOAT.
        */
-      if (base_format == GL_YCBCR_MESA || format == MESA_FORMAT_RGB_FLOAT32)
+      if (base_format == GL_YCBCR_MESA || mt->format == MESA_FORMAT_RGB_FLOAT32)
          return 2;
 
       return 4;
@@ -214,9 +366,8 @@
 
    mt->total_width = mt->physical_width0;
 
-   if (mt->compressed) {
-       mt->total_width = ALIGN(mt->physical_width0, mt->align_w);
-   }
+   if (mt->compressed)
+       mt->total_width = ALIGN(mt->total_width, bw);
 
    /* May need to adjust width to accommodate the placement of
     * the 2nd mipmap.  This occurs when the alignment
@@ -281,9 +432,7 @@
                                        const struct intel_mipmap_tree *mt,
                                        unsigned level)
 {
-   assert(brw->gen < 9);
-
-   if (mt->target == GL_TEXTURE_3D ||
+   if ((brw->gen < 9 && mt->target == GL_TEXTURE_3D) ||
        (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) {
       return ALIGN(minify(mt->physical_width0, level), mt->align_w);
    } else {
@@ -348,9 +497,9 @@
       mt->total_height += 2;
 }
 
-static bool
-use_linear_1d_layout(struct brw_context *brw,
-                     struct intel_mipmap_tree *mt)
+bool
+gen9_use_linear_1d_layout(const struct brw_context *brw,
+                          const struct intel_mipmap_tree *mt)
 {
    /* On Gen9+ the mipmap levels of a 1D surface are all laid out in a
     * horizontal line. This isn't done for depth/stencil buffers however
@@ -375,7 +524,7 @@
 				 struct intel_mipmap_tree *mt)
 {
    unsigned height = mt->physical_height0;
-   bool layout_1d = use_linear_1d_layout(brw, mt);
+   bool layout_1d = gen9_use_linear_1d_layout(brw, mt);
    int physical_qpitch;
 
    if (layout_1d)
@@ -406,7 +555,7 @@
       if (mt->compressed)
          img_height /= mt->align_h;
 
-      for (int q = 0; q < mt->level[level].depth; q++) {
+      for (unsigned q = 0; q < mt->level[level].depth; q++) {
          if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
             intel_miptree_set_image_offset(mt, level, q, 0, q * img_height);
          } else {
@@ -458,46 +607,114 @@
    align_cube(mt);
 }
 
-void
-brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
-{
-   bool multisampled = mt->num_samples > 1;
-   bool gen6_hiz_or_stencil = false;
+/**
+ * \brief Helper function for intel_miptree_create().
+ */
+static uint32_t
+brw_miptree_choose_tiling(struct brw_context *brw,
+                          const struct intel_mipmap_tree *mt,
+                          uint32_t layout_flags)
+{
+   if (mt->format == MESA_FORMAT_S_UINT8) {
+      /* The stencil buffer is W tiled. However, we request from the kernel a
+       * non-tiled buffer because the GTT is incapable of W fencing.
+       */
+      return I915_TILING_NONE;
+   }
 
-   if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
-      const GLenum base_format = _mesa_get_format_base_format(mt->format);
-      gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format);
+   /* Do not support changing the tiling for miptrees with pre-allocated BOs. */
+   assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0);
+
+   /* Some usages may want only one type of tiling, like depth miptrees (Y
+    * tiled), or temporary BOs for uploading data once (linear).
+    */
+   switch (layout_flags & MIPTREE_LAYOUT_TILING_ANY) {
+   case MIPTREE_LAYOUT_TILING_ANY:
+      break;
+   case MIPTREE_LAYOUT_TILING_Y:
+      return I915_TILING_Y;
+   case MIPTREE_LAYOUT_TILING_NONE:
+      return I915_TILING_NONE;
    }
 
-   if (gen6_hiz_or_stencil) {
-      /* On gen6, we use ALL_SLICES_AT_EACH_LOD for stencil/hiz because the
-       * hardware doesn't support multiple mip levels on stencil/hiz.
+   if (mt->num_samples > 1) {
+      /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
+       * Surface"):
        *
-       * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer:
-       * "The hierarchical depth buffer does not support the LOD field"
+       *   [DevSNB+]: For multi-sample render targets, this field must be
+       *   1. MSRTs can only be tiled.
        *
-       * PRM Vol 2, Part 1, 7.5.4.1 Separate Stencil Buffer:
-       * "The stencil depth buffer does not support the LOD field"
-       */
-      if (mt->format == MESA_FORMAT_S_UINT8) {
-         /* Stencil uses W tiling, so we force W tiling alignment for the
-          * ALL_SLICES_AT_EACH_LOD miptree layout.
-          */
-         mt->align_w = 64;
-         mt->align_h = 64;
-      } else {
-         /* Depth uses Y tiling, so we force need Y tiling alignment for the
-          * ALL_SLICES_AT_EACH_LOD miptree layout.
-          */
-         mt->align_w = 128 / mt->cpp;
-         mt->align_h = 32;
-      }
-   } else {
-      mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt);
-      mt->align_h =
-         intel_vertical_texture_alignment_unit(brw, mt->format, multisampled);
+       * Our usual reason for preferring X tiling (fast blits using the
+       * blitting engine) doesn't apply to MSAA, since we'll generally be
+       * downsampling or upsampling when blitting between the MSAA buffer
+       * and another buffer, and the blitting engine doesn't support that.
+       * So use Y tiling, since it makes better use of the cache.
+       */
+      return I915_TILING_Y;
+   }
+
+   GLenum base_format = _mesa_get_format_base_format(mt->format);
+   if (base_format == GL_DEPTH_COMPONENT ||
+       base_format == GL_DEPTH_STENCIL_EXT)
+      return I915_TILING_Y;
+
+   /* 1D textures (and 1D array textures) don't get any benefit from tiling,
+    * in fact it leads to a less efficient use of memory space and bandwidth
+    * due to tile alignment.
+    */
+   if (mt->logical_height0 == 1)
+      return I915_TILING_NONE;
+
+   int minimum_pitch = mt->total_width * mt->cpp;
+
+   /* If the width is much smaller than a tile, don't bother tiling. */
+   if (minimum_pitch < 64)
+      return I915_TILING_NONE;
+
+   if (ALIGN(minimum_pitch, 512) >= 32768 ||
+       mt->total_width >= 32768 || mt->total_height >= 32768) {
+      perf_debug("%dx%d miptree too large to blit, falling back to untiled",
+                 mt->total_width, mt->total_height);
+      return I915_TILING_NONE;
+   }
+
+   /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
+   if (brw->gen < 6)
+      return I915_TILING_X;
+
+   /* From the Sandybridge PRM, Volume 1, Part 2, page 32:
+    * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX
+    *  or Linear."
+    * 128 bits per pixel translates to 16 bytes per pixel. This is necessary
+    * all the way back to 965, but is permitted on Gen7+.
+    */
+   if (brw->gen < 7 && mt->cpp >= 16)
+      return I915_TILING_X;
+
+   /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most
+    * messages), on p64, under the heading "Surface Vertical Alignment":
+    *
+    *     This field must be set to VALIGN_4 for all tiled Y Render Target
+    *     surfaces.
+    *
+    * So if the surface is renderable and uses a vertical alignment of 2,
+    * force it to be X tiled.  This is somewhat conservative (it's possible
+    * that the client won't ever render to this surface), but it's difficult
+    * to know that ahead of time.  And besides, since we use a vertical
+    * alignment of 4 as often as we can, this shouldn't happen very often.
+    */
+   if (brw->gen == 7 && mt->align_h == 2 &&
+       brw->format_supported_as_render_target[mt->format]) {
+      return I915_TILING_X;
    }
 
+   return I915_TILING_Y | I915_TILING_X;
+}
+
+static void
+intel_miptree_set_total_width_height(struct brw_context *brw,
+                                     struct intel_mipmap_tree *mt)
+{
    switch (mt->target) {
    case GL_TEXTURE_CUBE_MAP:
       if (brw->gen == 4) {
@@ -532,7 +749,7 @@
          break;
       case INTEL_MSAA_LAYOUT_NONE:
       case INTEL_MSAA_LAYOUT_IMS:
-         if (use_linear_1d_layout(brw, mt))
+         if (gen9_use_linear_1d_layout(brw, mt))
             gen9_miptree_layout_1d(mt);
          else
             brw_miptree_layout_2d(mt);
@@ -540,8 +757,68 @@
       }
       break;
    }
+
    DBG("%s: %dx%dx%d\n", __func__,
        mt->total_width, mt->total_height, mt->cpp);
+}
+
+static void
+intel_miptree_set_alignment(struct brw_context *brw,
+                            struct intel_mipmap_tree *mt,
+                            uint32_t layout_flags)
+{
+   bool gen6_hiz_or_stencil = false;
+
+   if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
+      const GLenum base_format = _mesa_get_format_base_format(mt->format);
+      gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format);
+   }
+
+   if (gen6_hiz_or_stencil) {
+      /* On gen6, we use ALL_SLICES_AT_EACH_LOD for stencil/hiz because the
+       * hardware doesn't support multiple mip levels on stencil/hiz.
+       *
+       * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer:
+       * "The hierarchical depth buffer does not support the LOD field"
+       *
+       * PRM Vol 2, Part 1, 7.5.4.1 Separate Stencil Buffer:
+       * "The stencil depth buffer does not support the LOD field"
+       */
+      if (mt->format == MESA_FORMAT_S_UINT8) {
+         /* Stencil uses W tiling, so we force W tiling alignment for the
+          * ALL_SLICES_AT_EACH_LOD miptree layout.
+          */
+         mt->align_w = 64;
+         mt->align_h = 64;
+         assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
+      } else {
+         /* Depth uses Y tiling, so we force need Y tiling alignment for the
+          * ALL_SLICES_AT_EACH_LOD miptree layout.
+          */
+         mt->align_w = 128 / mt->cpp;
+         mt->align_h = 32;
+      }
+   } else {
+      mt->align_w =
+         intel_horizontal_texture_alignment_unit(brw, mt, layout_flags);
+      mt->align_h = intel_vertical_texture_alignment_unit(brw, mt);
+   }
+}
+
+void
+brw_miptree_layout(struct brw_context *brw,
+                   struct intel_mipmap_tree *mt,
+                   uint32_t layout_flags)
+{
+   mt->tr_mode = INTEL_MIPTREE_TRMODE_NONE;
+
+   intel_miptree_set_alignment(brw, mt, layout_flags);
+   intel_miptree_set_total_width_height(brw, mt);
+
+   if (!mt->total_width || !mt->total_height) {
+      intel_miptree_release(&mt);
+      return;
+   }
 
    /* On Gen9+ the alignment values are expressed in multiples of the block
     * size
@@ -552,5 +829,8 @@
       mt->align_w /= i;
       mt->align_h /= j;
    }
+
+   if ((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0)
+      mt->tiling = brw_miptree_choose_tiling(brw, mt, layout_flags);
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_urb.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_urb.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_urb.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_urb.c	2015-09-16 14:36:09.000000000 +0000
@@ -249,10 +249,10 @@
    uf.bits1.cs_fence  = brw->urb.size;
 
    /* erratum: URB_FENCE must not cross a 64byte cacheline */
-   if ((brw->batch.used & 15) > 12) {
-      int pad = 16 - (brw->batch.used & 15);
+   if ((USED_BATCH(brw->batch) & 15) > 12) {
+      int pad = 16 - (USED_BATCH(brw->batch) & 15);
       do
-	 brw->batch.map[brw->batch.used++] = MI_NOOP;
+         *brw->batch.map_next++ = MI_NOOP;
       while (--pad);
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_util.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_util.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_util.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_util.h	2015-09-16 14:36:09.000000000 +0000
@@ -41,7 +41,7 @@
 extern GLuint brw_translate_blend_equation( GLenum mode );
 extern GLenum brw_fix_xRGB_alpha(GLenum function);
 
-static inline float
+static inline uint32_t
 brw_get_line_width(struct brw_context *brw)
 {
    /* From the OpenGL 4.4 spec:
@@ -50,9 +50,32 @@
     * the supplied width to the nearest integer, then clamping it to the
     * implementation-dependent maximum non-antialiased line width."
     */
-   return CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag
-                ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width,
-                0.0, brw->ctx.Const.MaxLineWidth);
+   float line_width =
+      CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag
+            ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width,
+            0.0f, brw->ctx.Const.MaxLineWidth);
+   uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
+
+   /* Line width of 0 is not allowed when MSAA enabled */
+   if (brw->ctx.Multisample._Enabled) {
+      if (line_width_u3_7 == 0)
+         line_width_u3_7 = 1;
+   } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5f) {
+      /* For 1 pixel line thickness or less, the general
+       * anti-aliasing algorithm gives up, and a garbage line is
+       * generated.  Setting a Line Width of 0.0 specifies the
+       * rasterization of the "thinnest" (one-pixel-wide),
+       * non-antialiased lines.
+       *
+       * Lines rendered with zero Line Width are rasterized using
+       * Grid Intersection Quantization rules as specified by
+       * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line
+       * Rasterization.
+       */
+      line_width_u3_7 = 0;
+   }
+
+   return line_width_u3_7;
 }
 
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -179,6 +179,7 @@
 
    case BRW_OPCODE_MACH:
    case BRW_OPCODE_MUL:
+   case SHADER_OPCODE_MULH:
    case BRW_OPCODE_ADD:
    case BRW_OPCODE_OR:
    case BRW_OPCODE_AND:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -35,6 +35,7 @@
 #include "program/prog_print.h"
 #include "program/prog_parameter.h"
 }
+#include "main/context.h"
 
 #define MAX_INSTRUCTION (1 << 30)
 
@@ -170,6 +171,17 @@
    this->writemask = writemask;
 }
 
+dst_reg::dst_reg(register_file file, int reg, brw_reg_type type,
+                 unsigned writemask)
+{
+   init();
+
+   this->file = file;
+   this->reg = reg;
+   this->type = type;
+   this->writemask = writemask;
+}
+
 dst_reg::dst_reg(struct brw_reg reg)
 {
    init();
@@ -1676,20 +1688,16 @@
     */
    emit(ADD(diff, src_reg(diff), src_reg(-2u)));
 
-   emit_shader_time_write(st_base, src_reg(diff));
-   emit_shader_time_write(st_written, src_reg(1u));
+   emit_shader_time_write(0, src_reg(diff));
+   emit_shader_time_write(1, src_reg(1u));
    emit(BRW_OPCODE_ELSE);
-   emit_shader_time_write(st_reset, src_reg(1u));
+   emit_shader_time_write(2, src_reg(1u));
    emit(BRW_OPCODE_ENDIF);
 }
 
 void
-vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
-                                     src_reg value)
+vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
 {
-   int shader_time_index =
-      brw_get_shader_time_index(brw, shader_prog, prog, type);
-
    dst_reg dst =
       dst_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 2));
 
@@ -1698,7 +1706,8 @@
    time.reg_offset++;
 
    offset.type = BRW_REGISTER_TYPE_UD;
-   emit(MOV(offset, src_reg(shader_time_index * SHADER_TIME_STRIDE)));
+   int index = shader_time_index * 3 + shader_time_subindex;
+   emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE)));
 
    time.type = BRW_REGISTER_TYPE_UD;
    emit(MOV(time, src_reg(value)));
@@ -1709,21 +1718,29 @@
 }
 
 bool
-vec4_visitor::run()
+vec4_visitor::run(gl_clip_plane *clip_planes)
 {
+   bool use_vec4_nir =
+      compiler->glsl_compiler_options[stage].NirOptions != NULL;
+
    sanity_param_count = prog->Parameters->NumParameters;
 
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+   if (shader_time_index >= 0)
       emit_shader_time_begin();
 
    assign_binding_table_offsets();
 
    emit_prolog();
 
-   /* Generate VS IR for main().  (the visitor only descends into
-    * functions called "main").
-    */
-   if (shader) {
+   if (use_vec4_nir) {
+      assert(prog->nir != NULL);
+      emit_nir_code();
+      if (failed)
+         return false;
+   } else if (shader) {
+      /* Generate VS IR for main().  (the visitor only descends into
+       * functions called "main").
+       */
       visit_instructions(shader->base.ir);
    } else {
       emit_program_code();
@@ -1731,7 +1748,7 @@
    base_ir = NULL;
 
    if (key->userclip_active && !prog->UsesClipDistanceOut)
-      setup_uniform_clipplane_values();
+      setup_uniform_clipplane_values(clip_planes);
 
    emit_thread_end();
 
@@ -1743,7 +1760,7 @@
     * that we have reladdr computations available for CSE, since we'll
     * often do repeated subexpressions for those.
     */
-   if (shader) {
+   if (shader || use_vec4_nir) {
       move_grf_array_access_to_scratch();
       move_uniform_array_access_to_pull_constants();
    } else {
@@ -1768,7 +1785,7 @@
          snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass,            \
                   stage_abbrev, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
                                                                        \
-         backend_visitor::dump_instructions(filename);                 \
+         backend_shader::dump_instructions(filename);                  \
       }                                                                \
                                                                        \
       progress = progress || this_progress;                            \
@@ -1781,7 +1798,7 @@
       snprintf(filename, 64, "%s-%04d-00-start",
                stage_abbrev, shader_prog ? shader_prog->Name : 0);
 
-      backend_visitor::dump_instructions(filename);
+      backend_shader::dump_instructions(filename);
    }
 
    bool progress;
@@ -1829,15 +1846,30 @@
       }
    }
 
-   while (!reg_allocate()) {
-      if (failed)
-         return false;
+   bool allocated_without_spills = reg_allocate();
+
+   if (!allocated_without_spills) {
+      compiler->shader_perf_log(log_data,
+                                "%s shader triggered register spilling.  "
+                                "Try reducing the number of live vec4 values "
+                                "to improve performance.\n",
+                                stage_name);
+
+      while (!reg_allocate()) {
+         if (failed)
+            return false;
+      }
    }
 
    opt_schedule_instructions();
 
    opt_set_dependency_control();
 
+   if (last_scratch > 0) {
+      prog_data->base.total_scratch =
+         brw_get_scratch_size(last_scratch * REG_SIZE);
+   }
+
    /* If any state parameters were appended, then ParameterValues could have
     * been realloced, in which case the driver uniform storage set up by
     * _mesa_associate_uniform_storage() would point to freed memory.  Make
@@ -1859,17 +1891,16 @@
  */
 const unsigned *
 brw_vs_emit(struct brw_context *brw,
-            struct gl_shader_program *prog,
-            struct brw_vs_compile *c,
-            struct brw_vs_prog_data *prog_data,
             void *mem_ctx,
+            const struct brw_vs_prog_key *key,
+            struct brw_vs_prog_data *prog_data,
+            struct gl_vertex_program *vp,
+            struct gl_shader_program *prog,
             unsigned *final_assembly_size)
 {
    bool start_busy = false;
    double start_time = 0;
    const unsigned *assembly = NULL;
-   bool use_nir =
-      brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions != NULL;
 
    if (unlikely(brw->perf_debug)) {
       start_busy = (brw->batch.last_bo &&
@@ -1881,23 +1912,35 @@
    if (prog)
       shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
 
+   int st_index = -1;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      st_index = brw_get_shader_time_index(brw, prog, &vp->Base, ST_VS);
+
    if (unlikely(INTEL_DEBUG & DEBUG_VS))
-      brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base);
+      brw_dump_ir("vertex", prog, &shader->base, &vp->Base);
 
-   if (use_nir && !c->vp->program.Base.nir) {
-      /* Normally we generate NIR in LinkShader() or ProgramStringNotify(), but
-       * Mesa's fixed-function vertex program handling doesn't notify the driver
-       * at all.  Just do it here, at the last minute, even though it's lame.
+   if (!vp->Base.nir &&
+       (brw->intelScreen->compiler->scalar_vs ||
+        brw->intelScreen->compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions != NULL)) {
+      /* Normally we generate NIR in LinkShader() or
+       * ProgramStringNotify(), but Mesa's fixed-function vertex program
+       * handling doesn't notify the driver at all.  Just do it here, at
+       * the last minute, even though it's lame.
        */
-      assert(c->vp->program.Base.Id == 0 && prog == NULL);
-      c->vp->program.Base.nir =
-         brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX);
+      assert(vp->Base.Id == 0 && prog == NULL);
+      vp->Base.nir =
+         brw_create_nir(brw, NULL, &vp->Base, MESA_SHADER_VERTEX,
+                        brw->intelScreen->compiler->scalar_vs);
    }
 
-   if (brw->scalar_vs && (prog || use_nir)) {
-      fs_visitor v(brw, mem_ctx, MESA_SHADER_VERTEX, &c->key,
-                   &prog_data->base.base, prog, &c->vp->program.Base, 8);
-      if (!v.run_vs()) {
+   if (brw->intelScreen->compiler->scalar_vs) {
+      prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
+
+      fs_visitor v(brw->intelScreen->compiler, brw,
+                   mem_ctx, MESA_SHADER_VERTEX, key,
+                   &prog_data->base.base, prog, &vp->Base,
+                   8, st_index);
+      if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) {
          if (prog) {
             prog->LinkStatus = false;
             ralloc_strcat(&prog->InfoLog, v.fail_msg);
@@ -1909,8 +1952,9 @@
          return NULL;
       }
 
-      fs_generator g(brw, mem_ctx, (void *) &c->key, &prog_data->base.base,
-                     &c->vp->program.Base, v.promoted_constants,
+      fs_generator g(brw->intelScreen->compiler, brw,
+                     mem_ctx, (void *) key, &prog_data->base.base,
+                     &vp->Base, v.promoted_constants,
                      v.runtime_check_aads_emit, "VS");
       if (INTEL_DEBUG & DEBUG_VS) {
          char *name;
@@ -1920,20 +1964,21 @@
                                    prog->Name);
          } else {
             name = ralloc_asprintf(mem_ctx, "vertex program %d",
-                                   c->vp->program.Base.Id);
+                                   vp->Base.Id);
          }
          g.enable_debug(name);
       }
       g.generate_code(v.cfg, 8);
       assembly = g.get_assembly(final_assembly_size);
-
-      prog_data->base.simd8 = true;
-      c->base.last_scratch = v.last_scratch;
    }
 
    if (!assembly) {
-      vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx);
-      if (!v.run()) {
+      prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
+
+      vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
+                        vp, prog, mem_ctx, st_index,
+                        !_mesa_is_gles3(&brw->ctx));
+      if (!v.run(brw_select_clip_planes(&brw->ctx))) {
          if (prog) {
             prog->LinkStatus = false;
             ralloc_strcat(&prog->InfoLog, v.fail_msg);
@@ -1945,14 +1990,15 @@
          return NULL;
       }
 
-      vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base,
+      vec4_generator g(brw->intelScreen->compiler, brw,
+                       prog, &vp->Base, &prog_data->base,
                        mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS");
       assembly = g.generate_assembly(v.cfg, final_assembly_size);
    }
 
    if (unlikely(brw->perf_debug) && shader) {
       if (shader->compiled_once) {
-         brw_vs_debug_recompile(brw, prog, &c->key);
+         brw_vs_debug_recompile(brw, prog, key);
       }
       if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
          perf_debug("VS compile took %.03f ms and stalled the GPU\n",
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -62,6 +62,7 @@
    case BRW_OPCODE_CMPN:
    case BRW_OPCODE_ADD:
    case BRW_OPCODE_MUL:
+   case SHADER_OPCODE_MULH:
    case BRW_OPCODE_FRC:
    case BRW_OPCODE_RNDU:
    case BRW_OPCODE_RNDD:
@@ -114,8 +115,16 @@
 {
    return a->opcode == b->opcode &&
           a->saturate == b->saturate &&
+          a->predicate == b->predicate &&
+          a->predicate_inverse == b->predicate_inverse &&
           a->conditional_mod == b->conditional_mod &&
+          a->flag_subreg == b->flag_subreg &&
           a->dst.type == b->dst.type &&
+          a->offset == b->offset &&
+          a->mlen == b->mlen &&
+          a->base_mrf == b->base_mrf &&
+          a->header_size == b->header_size &&
+          a->shadow_compare == b->shadow_compare &&
           a->dst.writemask == b->dst.writemask &&
           a->force_writemask_all == b->force_writemask_all &&
           a->regs_written == b->regs_written &&
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -134,7 +134,8 @@
    return brw_reg;
 }
 
-vec4_generator::vec4_generator(struct brw_context *brw,
+vec4_generator::vec4_generator(const struct brw_compiler *compiler,
+                               void *log_data,
                                struct gl_shader_program *shader_prog,
                                struct gl_program *prog,
                                struct brw_vue_prog_data *prog_data,
@@ -142,13 +143,13 @@
                                bool debug_flag,
                                const char *stage_name,
                                const char *stage_abbrev)
-   : brw(brw), devinfo(brw->intelScreen->devinfo),
+   : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo),
      shader_prog(shader_prog), prog(prog), prog_data(prog_data),
      mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev),
      debug_flag(debug_flag)
 {
    p = rzalloc(mem_ctx, struct brw_codegen);
-   brw_init_codegen(brw->intelScreen->devinfo, p, mem_ctx);
+   brw_init_codegen(devinfo, p, mem_ctx);
 }
 
 vec4_generator::~vec4_generator()
@@ -414,6 +415,9 @@
 
       brw_pop_insn_state(p);
 
+      if (inst->base_mrf != -1)
+         gen6_resolve_implied_move(p, &src, inst->base_mrf);
+
       /* dst = send(offset, a0.0 | <descriptor>) */
       brw_inst *insn = brw_send_indirect_message(
          p, BRW_SFID_SAMPLER, dst, src, addr);
@@ -1461,19 +1465,15 @@
          break;
 
       case SHADER_OPCODE_UNTYPED_ATOMIC:
-         assert(src[1].file == BRW_IMMEDIATE_VALUE &&
-                src[2].file == BRW_IMMEDIATE_VALUE);
+         assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_atomic(p, dst, src[0], src[1], src[2].dw1.ud, inst->mlen,
                             !inst->dst.is_null());
-         brw_mark_surface_used(&prog_data->base, src[1].dw1.ud);
          break;
 
       case SHADER_OPCODE_UNTYPED_SURFACE_READ:
-         assert(src[1].file == BRW_IMMEDIATE_VALUE &&
-                src[2].file == BRW_IMMEDIATE_VALUE);
+         assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
                                   src[2].dw1.ud);
-         brw_mark_surface_used(&prog_data->base, src[1].dw1.ud);
          break;
 
       case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
@@ -1545,7 +1545,7 @@
           *
           * where they pack the four bytes from the low and high four DW.
           */
-         assert(is_power_of_two(dst.dw1.bits.writemask) &&
+         assert(_mesa_is_pow_two(dst.dw1.bits.writemask) &&
                 dst.dw1.bits.writemask != 0);
          unsigned offset = __builtin_ctz(dst.dw1.bits.writemask);
 
@@ -1623,16 +1623,11 @@
       ralloc_free(annotation.ann);
    }
 
-   static GLuint msg_id = 0;
-   _mesa_gl_debug(&brw->ctx, &msg_id,
-                  MESA_DEBUG_SOURCE_SHADER_COMPILER,
-                  MESA_DEBUG_TYPE_OTHER,
-                  MESA_DEBUG_SEVERITY_NOTIFICATION,
-                  "%s vec4 shader: %d inst, %d loops, "
-                  "compacted %d to %d bytes.\n",
-                  stage_abbrev,
-                  before_size / 16, loop_count,
-                  before_size, after_size);
+   compiler->shader_debug_log(log_data,
+                              "%s vec4 shader: %d inst, %d loops, "
+                              "compacted %d to %d bytes.\n",
+                              stage_abbrev, before_size / 16, loop_count,
+                              before_size, after_size);
 }
 
 const unsigned *
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,118 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "brw_vec4_gs_visitor.h"
+
+namespace brw {
+
+void
+vec4_gs_visitor::nir_setup_inputs(nir_shader *shader)
+{
+   nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs);
+
+   foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+      int offset = var->data.driver_location;
+      if (var->type->base_type == GLSL_TYPE_ARRAY) {
+         /* Geometry shader inputs are arrays, but they use an unusual array
+          * layout: instead of all array elements for a given geometry shader
+          * input being stored consecutively, all geometry shader inputs are
+          * interleaved into one giant array. At this stage of compilation, we
+          * assume that the stride of the array is BRW_VARYING_SLOT_COUNT.
+          * Later, setup_attributes() will remap our accesses to the actual
+          * input array.
+          */
+         assert(var->type->length > 0);
+         int length = var->type->length;
+         int size = type_size(var->type) / length;
+         for (int i = 0; i < length; i++) {
+            int location = var->data.location + i * BRW_VARYING_SLOT_COUNT;
+            for (int j = 0; j < size; j++) {
+               src_reg src = src_reg(ATTR, location + j, var->type);
+               src = retype(src, brw_type_for_base_type(var->type));
+               nir_inputs[offset] = src;
+               offset++;
+            }
+         }
+      } else {
+         int size = type_size(var->type);
+         for (int i = 0; i < size; i++) {
+            src_reg src = src_reg(ATTR, var->data.location + i, var->type);
+            src = retype(src, brw_type_for_base_type(var->type));
+            nir_inputs[offset] = src;
+            offset++;
+         }
+      }
+   }
+}
+
+void
+vec4_gs_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
+{
+   dst_reg *reg;
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_invocation_id:
+      reg = &this->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
+      if (reg->file == BAD_FILE)
+         *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INVOCATION_ID,
+                                                 glsl_type::int_type);
+      break;
+
+   default:
+      vec4_visitor::nir_setup_system_value_intrinsic(instr);
+   }
+
+}
+
+void
+vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
+{
+   dst_reg dest;
+   src_reg src;
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_emit_vertex: {
+      int stream_id = instr->const_index[0];
+      gs_emit_vertex(stream_id);
+      break;
+   }
+
+   case nir_intrinsic_end_primitive:
+      gs_end_primitive();
+      break;
+
+   case nir_intrinsic_load_invocation_id: {
+      src_reg invocation_id =
+         src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
+      assert(invocation_id.file != BAD_FILE);
+      dest = get_nir_dest(instr->dest, invocation_id.type);
+      emit(MOV(dest, invocation_id));
+      break;
+   }
+
+   default:
+      vec4_visitor::nir_emit_intrinsic(instr);
+   }
+}
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -34,26 +34,29 @@
 
 namespace brw {
 
-vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw,
+vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
+                                 void *log_data,
                                  struct brw_gs_compile *c,
                                  struct gl_shader_program *prog,
                                  void *mem_ctx,
-                                 bool no_spills)
-   : vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base,
+                                 bool no_spills,
+                                 int shader_time_index)
+   : vec4_visitor(compiler, log_data,
+                  &c->gp->program.Base, &c->key.base,
                   &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx,
-                  no_spills,
-                  ST_GS, ST_GS_WRITTEN, ST_GS_RESET),
+                  no_spills, shader_time_index),
      c(c)
 {
 }
 
 
 dst_reg *
-vec4_gs_visitor::make_reg_for_system_value(ir_variable *ir)
+vec4_gs_visitor::make_reg_for_system_value(int location,
+                                           const glsl_type *type)
 {
-   dst_reg *reg = new(mem_ctx) dst_reg(this, ir->type);
+   dst_reg *reg = new(mem_ctx) dst_reg(this, type);
 
-   switch (ir->data.location) {
+   switch (location) {
    case SYSTEM_VALUE_INVOCATION_ID:
       this->current_annotation = "initialize gl_InvocationID";
       emit(GS_OPCODE_GET_INSTANCE_ID, *reg);
@@ -106,7 +109,7 @@
     * to be interleaved, so one register contains two attribute slots.
     */
    int attributes_per_reg =
-      c->prog_data.dispatch_mode == GEN7_GS_DISPATCH_MODE_DUAL_OBJECT ? 1 : 2;
+      c->prog_data.base.dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
 
    /* If a geometry shader tries to read from an input that wasn't written by
     * the vertex shader, that produces undefined results, but it shouldn't
@@ -346,90 +349,82 @@
    if (c->control_data_header_size_bits > 128)
       urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET;
 
-   /* If vertex_count is 0, then no control data bits have been accumulated
-    * yet, so we should do nothing.
+   /* If we are using either channel masks or a per-slot offset, then we
+    * need to figure out which DWORD we are trying to write to, using the
+    * formula:
+    *
+    *     dword_index = (vertex_count - 1) * bits_per_vertex / 32
+    *
+    * Since bits_per_vertex is a power of two, and is known at compile
+    * time, this can be optimized to:
+    *
+    *     dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex))
+    */
+   src_reg dword_index(this, glsl_type::uint_type);
+   if (urb_write_flags) {
+      src_reg prev_count(this, glsl_type::uint_type);
+      emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
+      unsigned log2_bits_per_vertex =
+         _mesa_fls(c->control_data_bits_per_vertex);
+      emit(SHR(dst_reg(dword_index), prev_count,
+               (uint32_t) (6 - log2_bits_per_vertex)));
+   }
+
+   /* Start building the URB write message.  The first MRF gets a copy of
+    * R0.
     */
-   emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_NEQ));
-   emit(IF(BRW_PREDICATE_NORMAL));
-   {
-      /* If we are using either channel masks or a per-slot offset, then we
-       * need to figure out which DWORD we are trying to write to, using the
-       * formula:
-       *
-       *     dword_index = (vertex_count - 1) * bits_per_vertex / 32
-       *
-       * Since bits_per_vertex is a power of two, and is known at compile
-       * time, this can be optimized to:
-       *
-       *     dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex))
+   int base_mrf = 1;
+   dst_reg mrf_reg(MRF, base_mrf);
+   src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+   vec4_instruction *inst = emit(MOV(mrf_reg, r0));
+   inst->force_writemask_all = true;
+
+   if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) {
+      /* Set the per-slot offset to dword_index / 4, to that we'll write to
+       * the appropriate OWORD within the control data header.
        */
-      src_reg dword_index(this, glsl_type::uint_type);
-      if (urb_write_flags) {
-         src_reg prev_count(this, glsl_type::uint_type);
-         emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
-         unsigned log2_bits_per_vertex =
-            _mesa_fls(c->control_data_bits_per_vertex);
-         emit(SHR(dst_reg(dword_index), prev_count,
-                  (uint32_t) (6 - log2_bits_per_vertex)));
-      }
+      src_reg per_slot_offset(this, glsl_type::uint_type);
+      emit(SHR(dst_reg(per_slot_offset), dword_index, 2u));
+      emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u);
+   }
 
-      /* Start building the URB write message.  The first MRF gets a copy of
-       * R0.
+   if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
+      /* Set the channel masks to 1 << (dword_index % 4), so that we'll
+       * write to the appropriate DWORD within the OWORD.  We need to do
+       * this computation with force_writemask_all, otherwise garbage data
+       * from invocation 0 might clobber the mask for invocation 1 when
+       * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks
+       * together.
        */
-      int base_mrf = 1;
-      dst_reg mrf_reg(MRF, base_mrf);
-      src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-      vec4_instruction *inst = emit(MOV(mrf_reg, r0));
+      src_reg channel(this, glsl_type::uint_type);
+      inst = emit(AND(dst_reg(channel), dword_index, 3u));
       inst->force_writemask_all = true;
-
-      if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) {
-         /* Set the per-slot offset to dword_index / 4, to that we'll write to
-          * the appropriate OWORD within the control data header.
-          */
-         src_reg per_slot_offset(this, glsl_type::uint_type);
-         emit(SHR(dst_reg(per_slot_offset), dword_index, 2u));
-         emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u);
-      }
-
-      if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
-         /* Set the channel masks to 1 << (dword_index % 4), so that we'll
-          * write to the appropriate DWORD within the OWORD.  We need to do
-          * this computation with force_writemask_all, otherwise garbage data
-          * from invocation 0 might clobber the mask for invocation 1 when
-          * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks
-          * together.
-          */
-         src_reg channel(this, glsl_type::uint_type);
-         inst = emit(AND(dst_reg(channel), dword_index, 3u));
-         inst->force_writemask_all = true;
-         src_reg one(this, glsl_type::uint_type);
-         inst = emit(MOV(dst_reg(one), 1u));
-         inst->force_writemask_all = true;
-         src_reg channel_mask(this, glsl_type::uint_type);
-         inst = emit(SHL(dst_reg(channel_mask), one, channel));
-         inst->force_writemask_all = true;
-         emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask),
-                                               channel_mask);
-         emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask);
-      }
-
-      /* Store the control data bits in the message payload and send it. */
-      dst_reg mrf_reg2(MRF, base_mrf + 1);
-      inst = emit(MOV(mrf_reg2, this->control_data_bits));
+      src_reg one(this, glsl_type::uint_type);
+      inst = emit(MOV(dst_reg(one), 1u));
       inst->force_writemask_all = true;
-      inst = emit(GS_OPCODE_URB_WRITE);
-      inst->urb_write_flags = urb_write_flags;
-      /* We need to increment Global Offset by 256-bits to make room for
-       * Broadwell's extra "Vertex Count" payload at the beginning of the
-       * URB entry.  Since this is an OWord message, Global Offset is counted
-       * in 128-bit units, so we must set it to 2.
-       */
-      if (devinfo->gen >= 8)
-         inst->offset = 2;
-      inst->base_mrf = base_mrf;
-      inst->mlen = 2;
+      src_reg channel_mask(this, glsl_type::uint_type);
+      inst = emit(SHL(dst_reg(channel_mask), one, channel));
+      inst->force_writemask_all = true;
+      emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask),
+                                            channel_mask);
+      emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask);
    }
-   emit(BRW_OPCODE_ENDIF);
+
+   /* Store the control data bits in the message payload and send it. */
+   dst_reg mrf_reg2(MRF, base_mrf + 1);
+   inst = emit(MOV(mrf_reg2, this->control_data_bits));
+   inst->force_writemask_all = true;
+   inst = emit(GS_OPCODE_URB_WRITE);
+   inst->urb_write_flags = urb_write_flags;
+   /* We need to increment Global Offset by 256-bits to make room for
+    * Broadwell's extra "Vertex Count" payload at the beginning of the
+    * URB entry.  Since this is an OWord message, Global Offset is counted
+    * in 128-bit units, so we must set it to 2.
+    */
+   if (devinfo->gen >= 8)
+      inst->offset = 2;
+   inst->base_mrf = base_mrf;
+   inst->mlen = 2;
 }
 
 void
@@ -472,7 +467,7 @@
 }
 
 void
-vec4_gs_visitor::visit(ir_emit_vertex *ir)
+vec4_gs_visitor::gs_emit_vertex(int stream_id)
 {
    this->current_annotation = "emit vertex: safety check";
 
@@ -486,7 +481,7 @@
     * be recorded by transform feedback, we can simply discard all geometry
     * bound to these streams when transform feedback is disabled.
     */
-   if (ir->stream_id() > 0 && shader_prog->TransformFeedback.NumVarying == 0)
+   if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0)
       return;
 
    /* To ensure that we don't output more vertices than the shader specified
@@ -529,9 +524,17 @@
             emit(AND(dst_null_d(), this->vertex_count,
                      (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
          inst->conditional_mod = BRW_CONDITIONAL_Z;
+
          emit(IF(BRW_PREDICATE_NORMAL));
          {
+            /* If vertex_count is 0, then no control data bits have been
+             * accumulated yet, so we skip emitting them.
+             */
+            emit(CMP(dst_null_d(), this->vertex_count, 0u,
+                     BRW_CONDITIONAL_NEQ));
+            emit(IF(BRW_PREDICATE_NORMAL));
             emit_control_data_bits();
+            emit(BRW_OPCODE_ENDIF);
 
             /* Reset control_data_bits to 0 so we can start accumulating a new
              * batch.
@@ -557,7 +560,7 @@
           c->prog_data.control_data_format ==
              GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
           this->current_annotation = "emit vertex: Stream control data bits";
-          set_stream_control_data_bits(ir->stream_id());
+          set_stream_control_data_bits(stream_id);
       }
 
       this->current_annotation = "emit vertex: increment vertex count";
@@ -570,7 +573,13 @@
 }
 
 void
-vec4_gs_visitor::visit(ir_end_primitive *)
+vec4_gs_visitor::visit(ir_emit_vertex *ir)
+{
+   gs_emit_vertex(ir->stream_id());
+}
+
+void
+vec4_gs_visitor::gs_end_primitive()
 {
    /* We can only do EndPrimitive() functionality when the control data
     * consists of cut bits.  Fortunately, the only time it isn't is when the
@@ -620,6 +629,12 @@
    emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
 }
 
+void
+vec4_gs_visitor::visit(ir_end_primitive *)
+{
+   gs_end_primitive();
+}
+
 static const unsigned *
 generate_assembly(struct brw_context *brw,
                   struct gl_shader_program *shader_prog,
@@ -629,7 +644,8 @@
                   const cfg_t *cfg,
                   unsigned *final_assembly_size)
 {
-   vec4_generator g(brw, shader_prog, prog, prog_data, mem_ctx,
+   vec4_generator g(brw->intelScreen->compiler, brw,
+                    shader_prog, prog, prog_data, mem_ctx,
                     INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
    return g.generate_assembly(cfg, final_assembly_size);
 }
@@ -648,6 +664,10 @@
       brw_dump_ir("geometry", prog, &shader->base, NULL);
    }
 
+   int st_index = -1;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      st_index = brw_get_shader_time_index(brw, prog, NULL, ST_GS);
+
    if (brw->gen >= 7) {
       /* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do
        * so without spilling. If the GS invocations count > 1, then we can't use
@@ -655,10 +675,11 @@
        */
       if (c->prog_data.invocations <= 1 &&
           likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
-         c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_DUAL_OBJECT;
+         c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
 
-         vec4_gs_visitor v(brw, c, prog, mem_ctx, true /* no_spills */);
-         if (v.run()) {
+         vec4_gs_visitor v(brw->intelScreen->compiler, brw,
+                           c, prog, mem_ctx, true /* no_spills */, st_index);
+         if (v.run(NULL /* clip planes */)) {
             return generate_assembly(brw, prog, &c->gp->program.Base,
                                      &c->prog_data.base, mem_ctx, v.cfg,
                                      final_assembly_size);
@@ -690,19 +711,23 @@
     * SINGLE mode.
     */
    if (c->prog_data.invocations <= 1 || brw->gen < 7)
-      c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_SINGLE;
+      c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE;
    else
-      c->prog_data.dispatch_mode = GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE;
+      c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
 
    vec4_gs_visitor *gs = NULL;
    const unsigned *ret = NULL;
 
    if (brw->gen >= 7)
-      gs = new vec4_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */);
+      gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw,
+                               c, prog, mem_ctx, false /* no_spills */,
+                               st_index);
    else
-      gs = new gen6_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */);
+      gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw,
+                               c, prog, mem_ctx, false /* no_spills */,
+                               st_index);
 
-   if (!gs->run()) {
+   if (!gs->run(NULL /* clip planes */)) {
       prog->LinkStatus = false;
       ralloc_strcat(&prog->InfoLog, gs->fail_msg);
    } else {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h	2015-09-16 14:36:09.000000000 +0000
@@ -37,7 +37,6 @@
  */
 struct brw_gs_compile
 {
-   struct brw_vec4_compile base;
    struct brw_gs_prog_key key;
    struct brw_gs_prog_data prog_data;
    struct brw_vue_map input_vue_map;
@@ -68,14 +67,20 @@
 class vec4_gs_visitor : public vec4_visitor
 {
 public:
-   vec4_gs_visitor(struct brw_context *brw,
+   vec4_gs_visitor(const struct brw_compiler *compiler,
+                   void *log_data,
                    struct brw_gs_compile *c,
                    struct gl_shader_program *prog,
                    void *mem_ctx,
-                   bool no_spills);
+                   bool no_spills,
+                   int shader_time_index);
+
+   virtual void nir_setup_inputs(nir_shader *shader);
+   virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
 
 protected:
-   virtual dst_reg *make_reg_for_system_value(ir_variable *ir);
+   virtual dst_reg *make_reg_for_system_value(int location,
+                                              const glsl_type *type);
    virtual void setup_payload();
    virtual void emit_prolog();
    virtual void emit_program_code();
@@ -85,6 +90,9 @@
    virtual int compute_array_stride(ir_dereference_array *ir);
    virtual void visit(ir_emit_vertex *);
    virtual void visit(ir_end_primitive *);
+   virtual void gs_emit_vertex(int stream_id);
+   virtual void gs_end_primitive();
+   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
 
 protected:
    int setup_varying_inputs(int payload_reg, int *attribute_map,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4.h	2015-09-16 14:36:09.000000000 +0000
@@ -45,12 +45,9 @@
 #endif
 
 #include "glsl/ir.h"
+#include "glsl/nir/nir.h"
 
 
-struct brw_vec4_compile {
-   GLuint last_scratch; /**< measured in 32-byte (register size) units */
-};
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -73,11 +70,11 @@
  * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
  * fixed-function) into VS IR.
  */
-class vec4_visitor : public backend_visitor
+class vec4_visitor : public backend_shader, public ir_visitor
 {
 public:
-   vec4_visitor(struct brw_context *brw,
-                struct brw_vec4_compile *c,
+   vec4_visitor(const struct brw_compiler *compiler,
+                void *log_data,
                 struct gl_program *prog,
                 const struct brw_vue_prog_key *key,
                 struct brw_vue_prog_data *prog_data,
@@ -85,9 +82,7 @@
                 gl_shader_stage stage,
 		void *mem_ctx,
                 bool no_spills,
-                shader_time_shader_type st_base,
-                shader_time_shader_type st_written,
-                shader_time_shader_type st_reset);
+                int shader_time_index);
    ~vec4_visitor();
 
    dst_reg dst_null_f()
@@ -105,7 +100,6 @@
       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
    }
 
-   struct brw_vec4_compile * const c;
    const struct brw_vue_prog_key * const key;
    struct brw_vue_prog_data * const prog_data;
    unsigned int sanity_param_count;
@@ -160,6 +154,7 @@
    virtual void visit(ir_if *);
    virtual void visit(ir_emit_vertex *);
    virtual void visit(ir_end_primitive *);
+   virtual void visit(ir_barrier *);
    /*@}*/
 
    src_reg result;
@@ -178,13 +173,16 @@
 
    struct hash_table *variable_ht;
 
-   bool run(void);
+   bool run(gl_clip_plane *clip_planes);
    void fail(const char *msg, ...);
 
-   void setup_uniform_clipplane_values();
+   void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
+   virtual void setup_vector_uniform_values(const gl_constant_value *values,
+                                            unsigned n);
    void setup_uniform_values(ir_variable *ir);
    void setup_builtin_uniform_values(ir_variable *ir);
    int setup_uniforms(int payload_reg);
+
    bool reg_allocate_trivial();
    bool reg_allocate();
    void evaluate_spill_costs(float *spill_costs, bool *no_spill);
@@ -293,14 +291,17 @@
    void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
    void emit_if_gen6(ir_if *ir);
 
-   void emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
-                    src_reg src0, src_reg src1);
+   vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
+                                 src_reg src0, src_reg src1);
 
-   void emit_lrp(const dst_reg &dst,
-                 const src_reg &x, const src_reg &y, const src_reg &a);
+   vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
+                              const src_reg &y, const src_reg &a);
 
-   /** Copy any live channel from \p src to the first channel of \p dst. */
-   void emit_uniformize(const dst_reg &dst, const src_reg &src);
+   /**
+    * Copy any live channel from \p src to the first channel of the
+    * result.
+    */
+   src_reg emit_uniformize(const src_reg &src);
 
    void emit_block_move(dst_reg *dst, src_reg *src,
                         const struct glsl_type *type, brw_predicate predicate);
@@ -318,11 +319,13 @@
    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 		    dst_reg dst, src_reg src0, src_reg src1);
 
-   src_reg fix_3src_operand(src_reg src);
+   src_reg fix_3src_operand(const src_reg &src);
+   src_reg resolve_source_modifiers(const src_reg &src);
 
-   void emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
-                  const src_reg &src1 = src_reg());
-   src_reg fix_math_operand(src_reg src);
+   vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+                               const src_reg &src1 = src_reg());
+
+   src_reg fix_math_operand(const src_reg &src);
 
    void emit_pack_half_2x16(dst_reg dst, src_reg src0);
    void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
@@ -331,10 +334,27 @@
    void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
    void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
 
-   uint32_t gather_channel(ir_texture *ir, uint32_t sampler);
-   src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler);
+   void emit_texture(ir_texture_opcode op,
+                     dst_reg dest,
+                     const glsl_type *dest_type,
+                     src_reg coordinate,
+                     int coord_components,
+                     src_reg shadow_comparitor,
+                     src_reg lod, src_reg lod2,
+                     src_reg sample_index,
+                     uint32_t constant_offset,
+                     src_reg offset_value,
+                     src_reg mcs,
+                     bool is_cube_array,
+                     uint32_t sampler, src_reg sampler_reg);
+
+   uint32_t gather_channel(unsigned gather_component, uint32_t sampler);
+   src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
+                          src_reg sampler);
    void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
-   void swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler);
+   void swizzle_result(ir_texture_opcode op, dst_reg dest,
+                       src_reg orig_val, uint32_t sampler,
+                       const glsl_type *dest_type);
 
    void emit_ndc_computation();
    void emit_psiz_and_flags(dst_reg reg);
@@ -344,8 +364,7 @@
 
    void emit_shader_time_begin();
    void emit_shader_time_end();
-   void emit_shader_time_write(enum shader_time_shader_type type,
-                               src_reg value);
+   void emit_shader_time_write(int shader_time_subindex, src_reg value);
 
    void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
                             dst_reg dst, src_reg offset, src_reg src0,
@@ -390,13 +409,53 @@
 
    void visit_atomic_counter_intrinsic(ir_call *ir);
 
+   int type_size(const struct glsl_type *type);
+   bool is_high_sampler(src_reg sampler);
+
+   virtual void emit_nir_code();
+   virtual void nir_setup_inputs(nir_shader *shader);
+   virtual void nir_setup_uniforms(nir_shader *shader);
+   virtual void nir_setup_uniform(nir_variable *var);
+   virtual void nir_setup_builtin_uniform(nir_variable *var);
+   virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
+   virtual void nir_setup_system_values(nir_shader *shader);
+   virtual void nir_emit_impl(nir_function_impl *impl);
+   virtual void nir_emit_cf_list(exec_list *list);
+   virtual void nir_emit_if(nir_if *if_stmt);
+   virtual void nir_emit_loop(nir_loop *loop);
+   virtual void nir_emit_block(nir_block *block);
+   virtual void nir_emit_instr(nir_instr *instr);
+   virtual void nir_emit_load_const(nir_load_const_instr *instr);
+   virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
+   virtual void nir_emit_alu(nir_alu_instr *instr);
+   virtual void nir_emit_jump(nir_jump_instr *instr);
+   virtual void nir_emit_texture(nir_tex_instr *instr);
+
+   dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
+   dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
+   dst_reg get_nir_dest(nir_dest dest);
+   src_reg get_nir_src(nir_src src, enum brw_reg_type type,
+                       unsigned num_components = 4);
+   src_reg get_nir_src(nir_src src, nir_alu_type type,
+                       unsigned num_components = 4);
+   src_reg get_nir_src(nir_src src,
+                       unsigned num_components = 4);
+
+   virtual dst_reg *make_reg_for_system_value(int location,
+                                              const glsl_type *type) = 0;
+
+   dst_reg *nir_locals;
+   dst_reg *nir_ssa_values;
+   src_reg *nir_inputs;
+   unsigned *nir_uniform_driver_location;
+   dst_reg *nir_system_values;
+
 protected:
    void emit_vertex();
    void lower_attributes_to_hw_regs(const int *attribute_map,
                                     bool interleaved);
    void setup_payload_interference(struct ra_graph *g, int first_payload_node,
                                    int reg_node_count);
-   virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0;
    virtual void assign_binding_table_offsets();
    virtual void setup_payload() = 0;
    virtual void emit_prolog() = 0;
@@ -405,6 +464,8 @@
    virtual void emit_urb_write_header(int mrf) = 0;
    virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
    virtual int compute_array_stride(ir_dereference_array *ir);
+   virtual void gs_emit_vertex(int stream_id);
+   virtual void gs_end_primitive();
 
 private:
    /**
@@ -412,9 +473,9 @@
     */
    const bool no_spills;
 
-   const shader_time_shader_type st_base;
-   const shader_time_shader_type st_written;
-   const shader_time_shader_type st_reset;
+   int shader_time_index;
+
+   unsigned last_scratch; /**< measured in 32-byte (register size) units */
 };
 
 
@@ -426,7 +487,7 @@
 class vec4_generator
 {
 public:
-   vec4_generator(struct brw_context *brw,
+   vec4_generator(const struct brw_compiler *compiler, void *log_data,
                   struct gl_shader_program *shader_prog,
                   struct gl_program *prog,
                   struct brw_vue_prog_data *prog_data,
@@ -508,7 +569,9 @@
                                          struct brw_reg dst);
    void generate_unpack_flags(struct brw_reg dst);
 
-   struct brw_context *brw;
+   const struct brw_compiler *compiler;
+   void *log_data; /* Passed to compiler->*_log functions */
+
    const struct brw_device_info *devinfo;
 
    struct brw_codegen *p;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -96,7 +96,8 @@
 	  * are the things that screen off preceding definitions of a
 	  * variable, and thus qualify for being in def[].
 	  */
-	 if (inst->dst.file == GRF && !inst->predicate) {
+	 if (inst->dst.file == GRF &&
+	     (!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) {
             for (unsigned i = 0; i < inst->regs_written; i++) {
                for (int c = 0; c < 4; c++) {
                   if (inst->dst.writemask & (1 << c)) {
@@ -133,27 +134,9 @@
    while (cont) {
       cont = false;
 
-      foreach_block (block, cfg) {
+      foreach_block_reverse (block, cfg) {
          struct block_data *bd = &block_data[block->num];
 
-	 /* Update livein */
-	 for (int i = 0; i < bitset_words; i++) {
-            BITSET_WORD new_livein = (bd->use[i] |
-                                      (bd->liveout[i] &
-                                       ~bd->def[i]));
-            if (new_livein & ~bd->livein[i]) {
-               bd->livein[i] |= new_livein;
-               cont = true;
-	    }
-	 }
-         BITSET_WORD new_livein = (bd->flag_use[0] |
-                                   (bd->flag_liveout[0] &
-                                    ~bd->flag_def[0]));
-         if (new_livein & ~bd->flag_livein[0]) {
-            bd->flag_livein[0] |= new_livein;
-            cont = true;
-         }
-
 	 /* Update liveout */
 	 foreach_list_typed(bblock_link, child_link, link, &block->children) {
             struct block_data *child_bd = &block_data[child_link->block->num];
@@ -173,6 +156,24 @@
                cont = true;
             }
 	 }
+
+         /* Update livein */
+         for (int i = 0; i < bitset_words; i++) {
+            BITSET_WORD new_livein = (bd->use[i] |
+                                      (bd->liveout[i] &
+                                       ~bd->def[i]));
+            if (new_livein & ~bd->livein[i]) {
+               bd->livein[i] |= new_livein;
+               cont = true;
+            }
+         }
+         BITSET_WORD new_livein = (bd->flag_use[0] |
+                                   (bd->flag_liveout[0] &
+                                    ~bd->flag_def[0]));
+         if (new_livein & ~bd->flag_livein[0]) {
+            bd->flag_livein[0] |= new_livein;
+            cont = true;
+         }
       }
    }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,1563 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "brw_vec4.h"
+#include "glsl/ir_uniform.h"
+
+namespace brw {
+
+void
+vec4_visitor::emit_nir_code()
+{
+   nir_shader *nir = prog->nir;
+
+   if (nir->num_inputs > 0)
+      nir_setup_inputs(nir);
+
+   if (nir->num_uniforms > 0)
+      nir_setup_uniforms(nir);
+
+   nir_setup_system_values(nir);
+
+   /* get the main function and emit it */
+   nir_foreach_overload(nir, overload) {
+      assert(strcmp(overload->function->name, "main") == 0);
+      assert(overload->impl);
+      nir_emit_impl(overload->impl);
+   }
+}
+
+void
+vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
+{
+   dst_reg *reg;
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_vertex_id:
+      unreachable("should be lowered by lower_vertex_id().");
+
+   case nir_intrinsic_load_vertex_id_zero_base:
+      reg = &this->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
+      if (reg->file == BAD_FILE)
+         *reg =
+            *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
+                                             glsl_type::int_type);
+      break;
+
+   case nir_intrinsic_load_base_vertex:
+      reg = &this->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
+      if (reg->file == BAD_FILE)
+         *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
+                                                 glsl_type::int_type);
+      break;
+
+   case nir_intrinsic_load_instance_id:
+      reg = &this->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
+      if (reg->file == BAD_FILE)
+         *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
+                                                 glsl_type::int_type);
+      break;
+
+   default:
+      break;
+   }
+}
+
+static bool
+setup_system_values_block(nir_block *block, void *void_visitor)
+{
+   vec4_visitor *v = (vec4_visitor *)void_visitor;
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      v->nir_setup_system_value_intrinsic(intrin);
+   }
+
+   return true;
+}
+
+void
+vec4_visitor::nir_setup_system_values(nir_shader *shader)
+{
+   nir_system_values = ralloc_array(mem_ctx, dst_reg, SYSTEM_VALUE_MAX);
+
+   nir_foreach_overload(shader, overload) {
+      assert(strcmp(overload->function->name, "main") == 0);
+      assert(overload->impl);
+      nir_foreach_block(overload->impl, setup_system_values_block, this);
+   }
+}
+
+void
+vec4_visitor::nir_setup_inputs(nir_shader *shader)
+{
+   nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs);
+
+   foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+      int offset = var->data.driver_location;
+      unsigned size = type_size(var->type);
+      for (unsigned i = 0; i < size; i++) {
+         src_reg src = src_reg(ATTR, var->data.location + i, var->type);
+         nir_inputs[offset + i] = src;
+      }
+   }
+}
+
+void
+vec4_visitor::nir_setup_uniforms(nir_shader *shader)
+{
+   uniforms = 0;
+
+   nir_uniform_driver_location =
+      rzalloc_array(mem_ctx, unsigned, this->uniform_array_size);
+
+   if (shader_prog) {
+      foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
+         /* UBO's, atomics and samplers don't take up space in the
+            uniform file */
+         if (var->interface_type != NULL || var->type->contains_atomic() ||
+             type_size(var->type) == 0) {
+            continue;
+         }
+
+         assert(uniforms < uniform_array_size);
+         this->uniform_size[uniforms] = type_size(var->type);
+
+         if (strncmp(var->name, "gl_", 3) == 0)
+            nir_setup_builtin_uniform(var);
+         else
+            nir_setup_uniform(var);
+      }
+   } else {
+      /* For ARB_vertex_program, only a single "parameters" variable is
+       * generated to support uniform data.
+       */
+      nir_variable *var = (nir_variable *) shader->uniforms.get_head();
+      assert(shader->uniforms.length() == 1 &&
+             strcmp(var->name, "parameters") == 0);
+
+      assert(uniforms < uniform_array_size);
+      this->uniform_size[uniforms] = type_size(var->type);
+
+      struct gl_program_parameter_list *plist = prog->Parameters;
+      for (unsigned p = 0; p < plist->NumParameters; p++) {
+         uniform_vector_size[uniforms] = plist->Parameters[p].Size;
+
+         /* Parameters should be either vec4 uniforms or single component
+          * constants; matrices and other larger types should have been broken
+          * down earlier.
+          */
+         assert(uniform_vector_size[uniforms] <= 4);
+
+         int i;
+         for (i = 0; i < uniform_vector_size[uniforms]; i++) {
+            stage_prog_data->param[uniforms * 4 + i] = &plist->ParameterValues[p][i];
+         }
+         for (; i < 4; i++) {
+            static const gl_constant_value zero = { 0.0 };
+            stage_prog_data->param[uniforms * 4 + i] = &zero;
+         }
+
+         nir_uniform_driver_location[uniforms] = var->data.driver_location;
+         uniforms++;
+      }
+   }
+}
+
+void
+vec4_visitor::nir_setup_uniform(nir_variable *var)
+{
+   int namelen = strlen(var->name);
+
+   /* The data for our (non-builtin) uniforms is stored in a series of
+    * gl_uniform_driver_storage structs for each subcomponent that
+    * glGetUniformLocation() could name.  We know it's been set up in the same
+    * order we'd walk the type, so walk the list of storage and find anything
+    * with our name, or the prefix of a component that starts with our name.
+    */
+    for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
+       struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
+
+       if (storage->builtin)
+          continue;
+
+       if (strncmp(var->name, storage->name, namelen) != 0 ||
+           (storage->name[namelen] != 0 &&
+            storage->name[namelen] != '.' &&
+            storage->name[namelen] != '[')) {
+          continue;
+       }
+
+       gl_constant_value *components = storage->storage;
+       unsigned vector_count = (MAX2(storage->array_elements, 1) *
+                                storage->type->matrix_columns);
+
+       for (unsigned s = 0; s < vector_count; s++) {
+          assert(uniforms < uniform_array_size);
+          uniform_vector_size[uniforms] = storage->type->vector_elements;
+
+          int i;
+          for (i = 0; i < uniform_vector_size[uniforms]; i++) {
+             stage_prog_data->param[uniforms * 4 + i] = components;
+             components++;
+          }
+          for (; i < 4; i++) {
+             static const gl_constant_value zero = { 0.0 };
+             stage_prog_data->param[uniforms * 4 + i] = &zero;
+          }
+
+          nir_uniform_driver_location[uniforms] = var->data.driver_location;
+          uniforms++;
+       }
+    }
+}
+
+void
+vec4_visitor::nir_setup_builtin_uniform(nir_variable *var)
+{
+   const nir_state_slot *const slots = var->state_slots;
+   assert(var->state_slots != NULL);
+
+   for (unsigned int i = 0; i < var->num_state_slots; i++) {
+      /* This state reference has already been setup by ir_to_mesa,
+       * but we'll get the same index back here.  We can reference
+       * ParameterValues directly, since unlike brw_fs.cpp, we never
+       * add new state references during compile.
+       */
+      int index = _mesa_add_state_reference(this->prog->Parameters,
+					    (gl_state_index *)slots[i].tokens);
+      gl_constant_value *values =
+         &this->prog->Parameters->ParameterValues[index][0];
+
+      assert(uniforms < uniform_array_size);
+
+      for (unsigned j = 0; j < 4; j++)
+         stage_prog_data->param[uniforms * 4 + j] =
+            &values[GET_SWZ(slots[i].swizzle, j)];
+
+      this->uniform_vector_size[uniforms] =
+         (var->type->is_scalar() || var->type->is_vector() ||
+          var->type->is_matrix() ? var->type->vector_elements : 4);
+
+      nir_uniform_driver_location[uniforms] = var->data.driver_location;
+      uniforms++;
+   }
+}
+
+void
+vec4_visitor::nir_emit_impl(nir_function_impl *impl)
+{
+   nir_locals = ralloc_array(mem_ctx, dst_reg, impl->reg_alloc);
+
+   foreach_list_typed(nir_register, reg, node, &impl->registers) {
+      unsigned array_elems =
+         reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
+
+      nir_locals[reg->index] = dst_reg(GRF, alloc.allocate(array_elems));
+   }
+
+   nir_ssa_values = ralloc_array(mem_ctx, dst_reg, impl->ssa_alloc);
+
+   nir_emit_cf_list(&impl->body);
+}
+
+void
+vec4_visitor::nir_emit_cf_list(exec_list *list)
+{
+   exec_list_validate(list);
+   foreach_list_typed(nir_cf_node, node, node, list) {
+      switch (node->type) {
+      case nir_cf_node_if:
+         nir_emit_if(nir_cf_node_as_if(node));
+         break;
+
+      case nir_cf_node_loop:
+         nir_emit_loop(nir_cf_node_as_loop(node));
+         break;
+
+      case nir_cf_node_block:
+         nir_emit_block(nir_cf_node_as_block(node));
+         break;
+
+      default:
+         unreachable("Invalid CFG node block");
+      }
+   }
+}
+
+void
+vec4_visitor::nir_emit_if(nir_if *if_stmt)
+{
+   /* First, put the condition in f0 */
+   src_reg condition = get_nir_src(if_stmt->condition, BRW_REGISTER_TYPE_D, 1);
+   vec4_instruction *inst = emit(MOV(dst_null_d(), condition));
+   inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+   emit(IF(BRW_PREDICATE_NORMAL));
+
+   nir_emit_cf_list(&if_stmt->then_list);
+
+   /* note: if the else is empty, dead CF elimination will remove it */
+   emit(BRW_OPCODE_ELSE);
+
+   nir_emit_cf_list(&if_stmt->else_list);
+
+   emit(BRW_OPCODE_ENDIF);
+}
+
+void
+vec4_visitor::nir_emit_loop(nir_loop *loop)
+{
+   emit(BRW_OPCODE_DO);
+
+   nir_emit_cf_list(&loop->body);
+
+   emit(BRW_OPCODE_WHILE);
+}
+
+void
+vec4_visitor::nir_emit_block(nir_block *block)
+{
+   nir_foreach_instr(block, instr) {
+      nir_emit_instr(instr);
+   }
+}
+
+void
+vec4_visitor::nir_emit_instr(nir_instr *instr)
+{
+   this->base_ir = instr;
+
+   switch (instr->type) {
+   case nir_instr_type_load_const:
+      nir_emit_load_const(nir_instr_as_load_const(instr));
+      break;
+
+   case nir_instr_type_intrinsic:
+      nir_emit_intrinsic(nir_instr_as_intrinsic(instr));
+      break;
+
+   case nir_instr_type_alu:
+      nir_emit_alu(nir_instr_as_alu(instr));
+      break;
+
+   case nir_instr_type_jump:
+      nir_emit_jump(nir_instr_as_jump(instr));
+      break;
+
+   case nir_instr_type_tex:
+      nir_emit_texture(nir_instr_as_tex(instr));
+      break;
+
+   default:
+      fprintf(stderr, "VS instruction not yet implemented by NIR->vec4\n");
+      break;
+   }
+}
+
+static dst_reg
+dst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg,
+                    unsigned base_offset, nir_src *indirect)
+{
+   dst_reg reg;
+
+   reg = v->nir_locals[nir_reg->index];
+   reg = offset(reg, base_offset);
+   if (indirect) {
+      reg.reladdr =
+         new(v->mem_ctx) src_reg(v->get_nir_src(*indirect,
+                                                BRW_REGISTER_TYPE_D,
+                                                1));
+   }
+   return reg;
+}
+
+dst_reg
+vec4_visitor::get_nir_dest(nir_dest dest)
+{
+   assert(!dest.is_ssa);
+   return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
+                              dest.reg.indirect);
+}
+
+dst_reg
+vec4_visitor::get_nir_dest(nir_dest dest, enum brw_reg_type type)
+{
+   return retype(get_nir_dest(dest), type);
+}
+
+dst_reg
+vec4_visitor::get_nir_dest(nir_dest dest, nir_alu_type type)
+{
+   return get_nir_dest(dest, brw_type_for_nir_type(type));
+}
+
+src_reg
+vec4_visitor::get_nir_src(nir_src src, enum brw_reg_type type,
+                          unsigned num_components)
+{
+   dst_reg reg;
+
+   if (src.is_ssa) {
+      assert(src.ssa != NULL);
+      reg = nir_ssa_values[src.ssa->index];
+   }
+   else {
+     reg = dst_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
+                               src.reg.indirect);
+   }
+
+   reg = retype(reg, type);
+
+   src_reg reg_as_src = src_reg(reg);
+   reg_as_src.swizzle = brw_swizzle_for_size(num_components);
+   return reg_as_src;
+}
+
+src_reg
+vec4_visitor::get_nir_src(nir_src src, nir_alu_type type,
+                          unsigned num_components)
+{
+   return get_nir_src(src, brw_type_for_nir_type(type), num_components);
+}
+
+src_reg
+vec4_visitor::get_nir_src(nir_src src, unsigned num_components)
+{
+   /* if type is not specified, default to signed int */
+   return get_nir_src(src, nir_type_int, num_components);
+}
+
+void
+vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
+{
+   dst_reg reg = dst_reg(GRF, alloc.allocate(1));
+   reg.type =  BRW_REGISTER_TYPE_F;
+
+   unsigned remaining = brw_writemask_for_size(instr->def.num_components);
+
+   /* @FIXME: consider emitting vector operations to save some MOVs in
+    * cases where the components are representable in 8 bits.
+    * For now, we emit a MOV for each distinct value.
+    */
+   for (unsigned i = 0; i < instr->def.num_components; i++) {
+      unsigned writemask = 1 << i;
+
+      if ((remaining & writemask) == 0)
+         continue;
+
+      for (unsigned j = i; j < instr->def.num_components; j++) {
+         if (instr->value.u[i] == instr->value.u[j]) {
+            writemask |= 1 << j;
+         }
+      }
+
+      reg.writemask = writemask;
+      emit(MOV(reg, src_reg(instr->value.f[i])));
+
+      remaining &= ~writemask;
+   }
+
+   /* Set final writemask */
+   reg.writemask = brw_writemask_for_size(instr->def.num_components);
+
+   nir_ssa_values[instr->def.index] = reg;
+}
+
+void
+vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
+{
+   dst_reg dest;
+   src_reg src;
+
+   bool has_indirect = false;
+
+   switch (instr->intrinsic) {
+
+   case nir_intrinsic_load_input_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_load_input: {
+      int offset = instr->const_index[0];
+      src = nir_inputs[offset];
+
+      if (has_indirect) {
+         dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[0],
+                                                         BRW_REGISTER_TYPE_D,
+                                                         1));
+      }
+      dest = get_nir_dest(instr->dest, src.type);
+      dest.writemask = brw_writemask_for_size(instr->num_components);
+
+      emit(MOV(dest, src));
+      break;
+   }
+
+   case nir_intrinsic_store_output_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_store_output: {
+      int varying = instr->const_index[0];
+
+      src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
+                        instr->num_components);
+      dest = dst_reg(src);
+
+      if (has_indirect) {
+         dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[1],
+                                                         BRW_REGISTER_TYPE_D,
+                                                         1));
+      }
+      output_reg[varying] = dest;
+      break;
+   }
+
+   case nir_intrinsic_load_vertex_id:
+      unreachable("should be lowered by lower_vertex_id()");
+
+   case nir_intrinsic_load_vertex_id_zero_base: {
+      src_reg vertex_id =
+         src_reg(nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
+      assert(vertex_id.file != BAD_FILE);
+      dest = get_nir_dest(instr->dest, vertex_id.type);
+      emit(MOV(dest, vertex_id));
+      break;
+   }
+
+   case nir_intrinsic_load_base_vertex: {
+      src_reg base_vertex =
+         src_reg(nir_system_values[SYSTEM_VALUE_BASE_VERTEX]);
+      assert(base_vertex.file != BAD_FILE);
+      dest = get_nir_dest(instr->dest, base_vertex.type);
+      emit(MOV(dest, base_vertex));
+      break;
+   }
+
+   case nir_intrinsic_load_instance_id: {
+      src_reg instance_id =
+         src_reg(nir_system_values[SYSTEM_VALUE_INSTANCE_ID]);
+      assert(instance_id.file != BAD_FILE);
+      dest = get_nir_dest(instr->dest, instance_id.type);
+      emit(MOV(dest, instance_id));
+      break;
+   }
+
+   case nir_intrinsic_load_uniform_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_load_uniform: {
+      int uniform = instr->const_index[0];
+
+      dest = get_nir_dest(instr->dest);
+
+      if (has_indirect) {
+         /* Split addressing into uniform and offset */
+         int offset = uniform - nir_uniform_driver_location[uniform];
+         assert(offset >= 0);
+
+         uniform -= offset;
+         assert(uniform >= 0);
+
+         src = src_reg(dst_reg(UNIFORM, uniform));
+         src.reg_offset = offset;
+         src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
+         src.reladdr = new(mem_ctx) src_reg(tmp);
+      } else {
+         src = src_reg(dst_reg(UNIFORM, uniform));
+      }
+
+      emit(MOV(dest, src));
+      break;
+   }
+
+   case nir_intrinsic_atomic_counter_read:
+   case nir_intrinsic_atomic_counter_inc:
+   case nir_intrinsic_atomic_counter_dec: {
+      unsigned surf_index = prog_data->base.binding_table.abo_start +
+         (unsigned) instr->const_index[0];
+      src_reg offset = get_nir_src(instr->src[0], nir_type_int,
+                                   instr->num_components);
+      dest = get_nir_dest(instr->dest);
+
+      switch (instr->intrinsic) {
+         case nir_intrinsic_atomic_counter_inc:
+            emit_untyped_atomic(BRW_AOP_INC, surf_index, dest, offset,
+                                src_reg(), src_reg());
+            break;
+         case nir_intrinsic_atomic_counter_dec:
+            emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dest, offset,
+                                src_reg(), src_reg());
+            break;
+         case nir_intrinsic_atomic_counter_read:
+            emit_untyped_surface_read(surf_index, dest, offset);
+            break;
+         default:
+            unreachable("Unreachable");
+      }
+
+      brw_mark_surface_used(stage_prog_data, surf_index);
+      break;
+   }
+
+   case nir_intrinsic_load_ubo_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_load_ubo: {
+      nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]);
+      src_reg surf_index;
+
+      dest = get_nir_dest(instr->dest);
+
+      if (const_block_index) {
+         /* The block index is a constant, so just emit the binding table entry
+          * as an immediate.
+          */
+         surf_index = src_reg(prog_data->base.binding_table.ubo_start +
+                              const_block_index->u[0]);
+      } else {
+         /* The block index is not a constant. Evaluate the index expression
+          * per-channel and add the base UBO index; we have to select a value
+          * from any live channel.
+          */
+         surf_index = src_reg(this, glsl_type::uint_type);
+         emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], nir_type_int,
+                                                   instr->num_components),
+                  src_reg(prog_data->base.binding_table.ubo_start)));
+         surf_index = emit_uniformize(surf_index);
+
+         /* Assume this may touch any UBO. It would be nice to provide
+          * a tighter bound, but the array information is already lowered away.
+          */
+         brw_mark_surface_used(&prog_data->base,
+                               prog_data->base.binding_table.ubo_start +
+                               shader_prog->NumUniformBlocks - 1);
+      }
+
+      unsigned const_offset = instr->const_index[0];
+      src_reg offset;
+
+      if (!has_indirect)  {
+         offset = src_reg(const_offset / 16);
+      } else {
+         offset = src_reg(this, glsl_type::uint_type);
+         emit(SHR(dst_reg(offset), get_nir_src(instr->src[1], nir_type_int, 1),
+                  src_reg(4u)));
+      }
+
+      src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
+      packed_consts.type = dest.type;
+
+      emit_pull_constant_load_reg(dst_reg(packed_consts),
+                                  surf_index,
+                                  offset,
+                                  NULL, NULL /* before_block/inst */);
+
+      packed_consts.swizzle = brw_swizzle_for_size(instr->num_components);
+      packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
+                                            const_offset % 16 / 4,
+                                            const_offset % 16 / 4,
+                                            const_offset % 16 / 4);
+
+      emit(MOV(dest, packed_consts));
+      break;
+   }
+
+   default:
+      unreachable("Unknown intrinsic");
+   }
+}
+
+static unsigned
+brw_swizzle_for_nir_swizzle(uint8_t swizzle[4])
+{
+   return BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+}
+
+static enum brw_conditional_mod
+brw_conditional_for_nir_comparison(nir_op op)
+{
+   switch (op) {
+   case nir_op_flt:
+   case nir_op_ilt:
+   case nir_op_ult:
+      return BRW_CONDITIONAL_L;
+
+   case nir_op_fge:
+   case nir_op_ige:
+   case nir_op_uge:
+      return BRW_CONDITIONAL_GE;
+
+   case nir_op_feq:
+   case nir_op_ieq:
+   case nir_op_ball_fequal2:
+   case nir_op_ball_iequal2:
+   case nir_op_ball_fequal3:
+   case nir_op_ball_iequal3:
+   case nir_op_ball_fequal4:
+   case nir_op_ball_iequal4:
+      return BRW_CONDITIONAL_Z;
+
+   case nir_op_fne:
+   case nir_op_ine:
+   case nir_op_bany_fnequal2:
+   case nir_op_bany_inequal2:
+   case nir_op_bany_fnequal3:
+   case nir_op_bany_inequal3:
+   case nir_op_bany_fnequal4:
+   case nir_op_bany_inequal4:
+      return BRW_CONDITIONAL_NZ;
+
+   default:
+      unreachable("not reached: bad operation for comparison");
+   }
+}
+
+void
+vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
+{
+   vec4_instruction *inst;
+
+   dst_reg dst = get_nir_dest(instr->dest.dest,
+                              nir_op_infos[instr->op].output_type);
+   dst.writemask = instr->dest.write_mask;
+
+   src_reg op[4];
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      op[i] = get_nir_src(instr->src[i].src,
+                          nir_op_infos[instr->op].input_types[i], 4);
+      op[i].swizzle = brw_swizzle_for_nir_swizzle(instr->src[i].swizzle);
+      op[i].abs = instr->src[i].abs;
+      op[i].negate = instr->src[i].negate;
+   }
+
+   switch (instr->op) {
+   case nir_op_imov:
+   case nir_op_fmov:
+      inst = emit(MOV(dst, op[0]));
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_vec2:
+   case nir_op_vec3:
+   case nir_op_vec4:
+      unreachable("not reached: should be handled by lower_vec_to_movs()");
+
+   case nir_op_i2f:
+   case nir_op_u2f:
+      inst = emit(MOV(dst, op[0]));
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_f2i:
+   case nir_op_f2u:
+      inst = emit(MOV(dst, op[0]));
+      break;
+
+   case nir_op_fadd:
+      /* fall through */
+   case nir_op_iadd:
+      inst = emit(ADD(dst, op[0], op[1]));
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_fmul:
+      inst = emit(MUL(dst, op[0], op[1]));
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_imul: {
+      if (devinfo->gen < 8) {
+         nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src);
+         nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
+
+         /* For integer multiplication, the MUL uses the low 16 bits of one of
+          * the operands (src0 through SNB, src1 on IVB and later). The MACH
+          * accumulates in the contribution of the upper 16 bits of that
+          * operand. If we can determine that one of the args is in the low
+          * 16 bits, though, we can just emit a single MUL.
+          */
+         if (value0 && value0->u[0] < (1 << 16)) {
+            if (devinfo->gen < 7)
+               emit(MUL(dst, op[0], op[1]));
+            else
+               emit(MUL(dst, op[1], op[0]));
+         } else if (value1 && value1->u[0] < (1 << 16)) {
+            if (devinfo->gen < 7)
+               emit(MUL(dst, op[1], op[0]));
+            else
+               emit(MUL(dst, op[0], op[1]));
+         } else {
+            struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
+
+            emit(MUL(acc, op[0], op[1]));
+            emit(MACH(dst_null_d(), op[0], op[1]));
+            emit(MOV(dst, src_reg(acc)));
+         }
+      } else {
+	 emit(MUL(dst, op[0], op[1]));
+      }
+      break;
+   }
+
+   case nir_op_imul_high:
+   case nir_op_umul_high: {
+      struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
+
+      emit(MUL(acc, op[0], op[1]));
+      emit(MACH(dst, op[0], op[1]));
+      break;
+   }
+
+   case nir_op_frcp:
+      inst = emit_math(SHADER_OPCODE_RCP, dst, op[0]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_fexp2:
+      inst = emit_math(SHADER_OPCODE_EXP2, dst, op[0]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_flog2:
+      inst = emit_math(SHADER_OPCODE_LOG2, dst, op[0]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_fsin:
+      inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_fcos:
+      inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_idiv:
+   case nir_op_udiv:
+      emit_math(SHADER_OPCODE_INT_QUOTIENT, dst, op[0], op[1]);
+      break;
+
+   case nir_op_umod:
+      emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]);
+      break;
+
+   case nir_op_ldexp:
+      unreachable("not reached: should be handled by ldexp_to_arith()");
+
+   case nir_op_fsqrt:
+      inst = emit_math(SHADER_OPCODE_SQRT, dst, op[0]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_frsq:
+      inst = emit_math(SHADER_OPCODE_RSQ, dst, op[0]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_fpow:
+      inst = emit_math(SHADER_OPCODE_POW, dst, op[0], op[1]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_uadd_carry: {
+      struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
+
+      emit(ADDC(dst_null_ud(), op[0], op[1]));
+      emit(MOV(dst, src_reg(acc)));
+      break;
+   }
+
+   case nir_op_usub_borrow: {
+      struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
+
+      emit(SUBB(dst_null_ud(), op[0], op[1]));
+      emit(MOV(dst, src_reg(acc)));
+      break;
+   }
+
+   case nir_op_ftrunc:
+      inst = emit(RNDZ(dst, op[0]));
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_fceil: {
+      src_reg tmp = src_reg(this, glsl_type::float_type);
+      tmp.swizzle =
+         brw_swizzle_for_size(instr->src[0].src.is_ssa ?
+                              instr->src[0].src.ssa->num_components :
+                              instr->src[0].src.reg.reg->num_components);
+
+      op[0].negate = !op[0].negate;
+      emit(RNDD(dst_reg(tmp), op[0]));
+      tmp.negate = true;
+      inst = emit(MOV(dst, tmp));
+      inst->saturate = instr->dest.saturate;
+      break;
+   }
+
+   case nir_op_ffloor:
+      inst = emit(RNDD(dst, op[0]));
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_ffract:
+      inst = emit(FRC(dst, op[0]));
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_fround_even:
+      inst = emit(RNDE(dst, op[0]));
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_fmin:
+   case nir_op_imin:
+   case nir_op_umin:
+      inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_fmax:
+   case nir_op_imax:
+   case nir_op_umax:
+      inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_fddx:
+   case nir_op_fddx_coarse:
+   case nir_op_fddx_fine:
+   case nir_op_fddy:
+   case nir_op_fddy_coarse:
+   case nir_op_fddy_fine:
+      unreachable("derivatives are not valid in vertex shaders");
+
+   case nir_op_flt:
+   case nir_op_ilt:
+   case nir_op_ult:
+   case nir_op_fge:
+   case nir_op_ige:
+   case nir_op_uge:
+   case nir_op_feq:
+   case nir_op_ieq:
+   case nir_op_fne:
+   case nir_op_ine:
+      emit(CMP(dst, op[0], op[1],
+               brw_conditional_for_nir_comparison(instr->op)));
+      break;
+
+   case nir_op_ball_fequal2:
+   case nir_op_ball_iequal2:
+   case nir_op_ball_fequal3:
+   case nir_op_ball_iequal3:
+   case nir_op_ball_fequal4:
+   case nir_op_ball_iequal4: {
+      dst_reg tmp = dst_reg(this, glsl_type::bool_type);
+
+      switch (instr->op) {
+      case nir_op_ball_fequal2:
+      case nir_op_ball_iequal2:
+         tmp.writemask = WRITEMASK_XY;
+         break;
+      case nir_op_ball_fequal3:
+      case nir_op_ball_iequal3:
+         tmp.writemask = WRITEMASK_XYZ;
+         break;
+      case nir_op_ball_fequal4:
+      case nir_op_ball_iequal4:
+         tmp.writemask = WRITEMASK_XYZW;
+         break;
+      default:
+         unreachable("not reached");
+      }
+
+      emit(CMP(tmp, op[0], op[1],
+               brw_conditional_for_nir_comparison(instr->op)));
+      emit(MOV(dst, src_reg(0)));
+      inst = emit(MOV(dst, src_reg(~0)));
+      inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+      break;
+   }
+
+   case nir_op_bany_fnequal2:
+   case nir_op_bany_inequal2:
+   case nir_op_bany_fnequal3:
+   case nir_op_bany_inequal3:
+   case nir_op_bany_fnequal4:
+   case nir_op_bany_inequal4: {
+      dst_reg tmp = dst_reg(this, glsl_type::bool_type);
+
+      switch (instr->op) {
+      case nir_op_bany_fnequal2:
+      case nir_op_bany_inequal2:
+         tmp.writemask = WRITEMASK_XY;
+         break;
+      case nir_op_bany_fnequal3:
+      case nir_op_bany_inequal3:
+         tmp.writemask = WRITEMASK_XYZ;
+         break;
+      case nir_op_bany_fnequal4:
+      case nir_op_bany_inequal4:
+         tmp.writemask = WRITEMASK_XYZW;
+         break;
+      default:
+         unreachable("not reached");
+      }
+
+      emit(CMP(tmp, op[0], op[1],
+               brw_conditional_for_nir_comparison(instr->op)));
+
+      emit(MOV(dst, src_reg(0)));
+      inst = emit(MOV(dst, src_reg(~0)));
+      inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+      break;
+   }
+
+   case nir_op_inot:
+      if (devinfo->gen >= 8) {
+         op[0] = resolve_source_modifiers(op[0]);
+      }
+      emit(NOT(dst, op[0]));
+      break;
+
+   case nir_op_ixor:
+      if (devinfo->gen >= 8) {
+         op[0] = resolve_source_modifiers(op[0]);
+         op[1] = resolve_source_modifiers(op[1]);
+      }
+      emit(XOR(dst, op[0], op[1]));
+      break;
+
+   case nir_op_ior:
+      if (devinfo->gen >= 8) {
+         op[0] = resolve_source_modifiers(op[0]);
+         op[1] = resolve_source_modifiers(op[1]);
+      }
+      emit(OR(dst, op[0], op[1]));
+      break;
+
+   case nir_op_iand:
+      if (devinfo->gen >= 8) {
+         op[0] = resolve_source_modifiers(op[0]);
+         op[1] = resolve_source_modifiers(op[1]);
+      }
+      emit(AND(dst, op[0], op[1]));
+      break;
+
+   case nir_op_b2i:
+      emit(AND(dst, op[0], src_reg(1)));
+      break;
+
+   case nir_op_b2f:
+      op[0].type = BRW_REGISTER_TYPE_D;
+      dst.type = BRW_REGISTER_TYPE_D;
+      emit(AND(dst, op[0], src_reg(0x3f800000u)));
+      dst.type = BRW_REGISTER_TYPE_F;
+      break;
+
+   case nir_op_f2b:
+      emit(CMP(dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
+      break;
+
+   case nir_op_i2b:
+      emit(CMP(dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+      break;
+
+   case nir_op_fnoise1_1:
+   case nir_op_fnoise1_2:
+   case nir_op_fnoise1_3:
+   case nir_op_fnoise1_4:
+   case nir_op_fnoise2_1:
+   case nir_op_fnoise2_2:
+   case nir_op_fnoise2_3:
+   case nir_op_fnoise2_4:
+   case nir_op_fnoise3_1:
+   case nir_op_fnoise3_2:
+   case nir_op_fnoise3_3:
+   case nir_op_fnoise3_4:
+   case nir_op_fnoise4_1:
+   case nir_op_fnoise4_2:
+   case nir_op_fnoise4_3:
+   case nir_op_fnoise4_4:
+      unreachable("not reached: should be handled by lower_noise");
+
+   case nir_op_unpack_half_2x16_split_x:
+   case nir_op_unpack_half_2x16_split_y:
+   case nir_op_pack_half_2x16_split:
+      unreachable("not reached: should not occur in vertex shader");
+
+   case nir_op_unpack_snorm_2x16:
+   case nir_op_unpack_unorm_2x16:
+   case nir_op_pack_snorm_2x16:
+   case nir_op_pack_unorm_2x16:
+      unreachable("not reached: should be handled by lower_packing_builtins");
+
+   case nir_op_unpack_half_2x16:
+      /* As NIR does not guarantee that we have a correct swizzle outside the
+       * boundaries of a vector, and the implementation of emit_unpack_half_2x16
+       * uses the source operand in an operation with WRITEMASK_Y while our
+       * source operand has only size 1, it accessed incorrect data producing
+       * regressions in Piglit. We repeat the swizzle of the first component on the
+       * rest of components to avoid regressions. In the vec4_visitor IR code path
+       * this is not needed because the operand has already the correct swizzle.
+       */
+      op[0].swizzle = brw_compose_swizzle(BRW_SWIZZLE_XXXX, op[0].swizzle);
+      emit_unpack_half_2x16(dst, op[0]);
+      break;
+
+   case nir_op_pack_half_2x16:
+      emit_pack_half_2x16(dst, op[0]);
+      break;
+
+   case nir_op_unpack_unorm_4x8:
+      emit_unpack_unorm_4x8(dst, op[0]);
+      break;
+
+   case nir_op_pack_unorm_4x8:
+      emit_pack_unorm_4x8(dst, op[0]);
+      break;
+
+   case nir_op_unpack_snorm_4x8:
+      emit_unpack_snorm_4x8(dst, op[0]);
+      break;
+
+   case nir_op_pack_snorm_4x8:
+      emit_pack_snorm_4x8(dst, op[0]);
+      break;
+
+   case nir_op_bitfield_reverse:
+      emit(BFREV(dst, op[0]));
+      break;
+
+   case nir_op_bit_count:
+      emit(CBIT(dst, op[0]));
+      break;
+
+   case nir_op_ufind_msb:
+   case nir_op_ifind_msb: {
+      src_reg temp = src_reg(this, glsl_type::uint_type);
+
+      inst = emit(FBH(dst_reg(temp), op[0]));
+      inst->dst.writemask = WRITEMASK_XYZW;
+
+      /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
+       * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
+       * subtract the result from 31 to convert the MSB count into an LSB count.
+       */
+
+      /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
+      temp.swizzle = BRW_SWIZZLE_NOOP;
+      emit(MOV(dst, temp));
+
+      src_reg src_tmp = src_reg(dst);
+      emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
+
+      src_tmp.negate = true;
+      inst = emit(ADD(dst, src_tmp, src_reg(31)));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+   }
+
+   case nir_op_find_lsb:
+      emit(FBL(dst, op[0]));
+      break;
+
+   case nir_op_ubitfield_extract:
+   case nir_op_ibitfield_extract:
+      op[0] = fix_3src_operand(op[0]);
+      op[1] = fix_3src_operand(op[1]);
+      op[2] = fix_3src_operand(op[2]);
+
+      emit(BFE(dst, op[2], op[1], op[0]));
+      break;
+
+   case nir_op_bfm:
+      emit(BFI1(dst, op[0], op[1]));
+      break;
+
+   case nir_op_bfi:
+      op[0] = fix_3src_operand(op[0]);
+      op[1] = fix_3src_operand(op[1]);
+      op[2] = fix_3src_operand(op[2]);
+
+      emit(BFI2(dst, op[0], op[1], op[2]));
+      break;
+
+   case nir_op_bitfield_insert:
+      unreachable("not reached: should be handled by "
+                  "lower_instructions::bitfield_insert_to_bfm_bfi");
+
+   case nir_op_fsign:
+      /* AND(val, 0x80000000) gives the sign bit.
+       *
+       * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
+       * zero.
+       */
+      emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
+
+      op[0].type = BRW_REGISTER_TYPE_UD;
+      dst.type = BRW_REGISTER_TYPE_UD;
+      emit(AND(dst, op[0], src_reg(0x80000000u)));
+
+      inst = emit(OR(dst, src_reg(dst), src_reg(0x3f800000u)));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      dst.type = BRW_REGISTER_TYPE_F;
+
+      if (instr->dest.saturate) {
+         inst = emit(MOV(dst, src_reg(dst)));
+         inst->saturate = true;
+      }
+      break;
+
+   case nir_op_isign:
+      /*  ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
+       *               -> non-negative val generates 0x00000000.
+       *  Predicated OR sets 1 if val is positive.
+       */
+      emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
+      emit(ASR(dst, op[0], src_reg(31)));
+      inst = emit(OR(dst, src_reg(dst), src_reg(1)));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+
+   case nir_op_ishl:
+      emit(SHL(dst, op[0], op[1]));
+      break;
+
+   case nir_op_ishr:
+      emit(ASR(dst, op[0], op[1]));
+      break;
+
+   case nir_op_ushr:
+      emit(SHR(dst, op[0], op[1]));
+      break;
+
+   case nir_op_ffma:
+      op[0] = fix_3src_operand(op[0]);
+      op[1] = fix_3src_operand(op[1]);
+      op[2] = fix_3src_operand(op[2]);
+
+      inst = emit(MAD(dst, op[2], op[1], op[0]));
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_flrp:
+      inst = emit_lrp(dst, op[0], op[1], op[2]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_bcsel:
+      emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+      inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+
+   case nir_op_fdot2:
+      inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_fdot3:
+      inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_fdot4:
+      inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_bany2:
+   case nir_op_bany3:
+   case nir_op_bany4: {
+      dst_reg tmp = dst_reg(this, glsl_type::bool_type);
+      tmp.writemask = brw_writemask_for_size(nir_op_infos[instr->op].input_sizes[0]);
+
+      emit(CMP(tmp, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+
+      emit(MOV(dst, src_reg(0)));
+      inst = emit(MOV(dst, src_reg(~0)));
+      inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+      break;
+   }
+
+   case nir_op_fabs:
+   case nir_op_iabs:
+   case nir_op_fneg:
+   case nir_op_ineg:
+   case nir_op_fsat:
+      unreachable("not reached: should be lowered by lower_source mods");
+
+   case nir_op_fdiv:
+      unreachable("not reached: should be lowered by DIV_TO_MUL_RCP in the compiler");
+
+   case nir_op_fmod:
+      unreachable("not reached: should be lowered by MOD_TO_FLOOR in the compiler");
+
+   case nir_op_fsub:
+   case nir_op_isub:
+      unreachable("not reached: should be handled by ir_sub_to_add_neg");
+
+   default:
+      unreachable("Unimplemented ALU operation");
+   }
+
+   /* If we need to do a boolean resolve, replace the result with -(x & 1)
+    * to sign extend the low bit to 0/~0
+    */
+   if (devinfo->gen <= 5 &&
+       (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) ==
+       BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
+      dst_reg masked = dst_reg(this, glsl_type::int_type);
+      masked.writemask = dst.writemask;
+      emit(AND(masked, src_reg(dst), src_reg(1)));
+      src_reg masked_neg = src_reg(masked);
+      masked_neg.negate = true;
+      emit(MOV(retype(dst, BRW_REGISTER_TYPE_D), masked_neg));
+   }
+}
+
+void
+vec4_visitor::nir_emit_jump(nir_jump_instr *instr)
+{
+   switch (instr->type) {
+   case nir_jump_break:
+      emit(BRW_OPCODE_BREAK);
+      break;
+
+   case nir_jump_continue:
+      emit(BRW_OPCODE_CONTINUE);
+      break;
+
+   case nir_jump_return:
+      /* fall through */
+   default:
+      unreachable("unknown jump");
+   }
+}
+
+enum ir_texture_opcode
+ir_texture_opcode_for_nir_texop(nir_texop texop)
+{
+   enum ir_texture_opcode op;
+
+   switch (texop) {
+   case nir_texop_lod: op = ir_lod; break;
+   case nir_texop_query_levels: op = ir_query_levels; break;
+   case nir_texop_tex: op = ir_tex; break;
+   case nir_texop_tg4: op = ir_tg4; break;
+   case nir_texop_txb: op = ir_txb; break;
+   case nir_texop_txd: op = ir_txd; break;
+   case nir_texop_txf: op = ir_txf; break;
+   case nir_texop_txf_ms: op = ir_txf_ms; break;
+   case nir_texop_txl: op = ir_txl; break;
+   case nir_texop_txs: op = ir_txs; break;
+   default:
+      unreachable("unknown texture opcode");
+   }
+
+   return op;
+}
+const glsl_type *
+glsl_type_for_nir_alu_type(nir_alu_type alu_type,
+                           unsigned components)
+{
+   switch (alu_type) {
+   case nir_type_float:
+      return glsl_type::vec(components);
+   case nir_type_int:
+      return glsl_type::ivec(components);
+   case nir_type_unsigned:
+      return glsl_type::uvec(components);
+   case nir_type_bool:
+      return glsl_type::bvec(components);
+   default:
+      return glsl_type::error_type;
+   }
+
+   return glsl_type::error_type;
+}
+
+void
+vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
+{
+   unsigned sampler = instr->sampler_index;
+   src_reg sampler_reg = src_reg(sampler);
+   src_reg coordinate;
+   const glsl_type *coord_type = NULL;
+   src_reg shadow_comparitor;
+   src_reg offset_value;
+   src_reg lod, lod2;
+   src_reg sample_index;
+   src_reg mcs;
+
+   const glsl_type *dest_type =
+      glsl_type_for_nir_alu_type(instr->dest_type,
+                                 nir_tex_instr_dest_size(instr));
+   dst_reg dest = get_nir_dest(instr->dest, instr->dest_type);
+
+   /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
+    * emitting anything other than setting up the constant result.
+    */
+   if (instr->op == nir_texop_tg4) {
+      int swiz = GET_SWZ(key->tex.swizzles[sampler], instr->component);
+      if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
+         emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
+         return;
+      }
+   }
+
+   /* Load the texture operation sources */
+   for (unsigned i = 0; i < instr->num_srcs; i++) {
+      switch (instr->src[i].src_type) {
+      case nir_tex_src_comparitor:
+         shadow_comparitor = get_nir_src(instr->src[i].src,
+                                         BRW_REGISTER_TYPE_F, 1);
+         break;
+
+      case nir_tex_src_coord: {
+         unsigned src_size = nir_tex_instr_src_size(instr, i);
+
+         switch (instr->op) {
+         case nir_texop_txf:
+         case nir_texop_txf_ms:
+            coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D,
+                                     src_size);
+            coord_type = glsl_type::ivec(src_size);
+            break;
+
+         default:
+            coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F,
+                                     src_size);
+            coord_type = glsl_type::vec(src_size);
+            break;
+         }
+         break;
+      }
+
+      case nir_tex_src_ddx:
+         lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F,
+                           nir_tex_instr_src_size(instr, i));
+         break;
+
+      case nir_tex_src_ddy:
+         lod2 = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F,
+                           nir_tex_instr_src_size(instr, i));
+         break;
+
+      case nir_tex_src_lod:
+         switch (instr->op) {
+         case nir_texop_txs:
+         case nir_texop_txf:
+            lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
+            break;
+
+         default:
+            lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, 1);
+            break;
+         }
+         break;
+
+      case nir_tex_src_ms_index: {
+         sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
+         assert(coord_type != NULL);
+         if (devinfo->gen >= 7 &&
+             key->tex.compressed_multisample_layout_mask & (1<<sampler)) {
+            mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
+         } else {
+            mcs = src_reg(0u);
+         }
+         mcs = retype(mcs, BRW_REGISTER_TYPE_UD);
+         break;
+      }
+
+      case nir_tex_src_offset:
+         offset_value = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2);
+         break;
+
+      case nir_tex_src_sampler_offset: {
+         /* The highest sampler which may be used by this operation is
+          * the last element of the array. Mark it here, because the generator
+          * doesn't have enough information to determine the bound.
+          */
+         uint32_t array_size = instr->sampler_array_size;
+         uint32_t max_used = sampler + array_size - 1;
+         if (instr->op == nir_texop_tg4) {
+            max_used += prog_data->base.binding_table.gather_texture_start;
+         } else {
+            max_used += prog_data->base.binding_table.texture_start;
+         }
+
+         brw_mark_surface_used(&prog_data->base, max_used);
+
+         /* Emit code to evaluate the actual indexing expression */
+         src_reg src = get_nir_src(instr->src[i].src, 1);
+         src_reg temp(this, glsl_type::uint_type);
+         emit(ADD(dst_reg(temp), src, src_reg(sampler)));
+         sampler_reg = emit_uniformize(temp);
+         break;
+      }
+
+      case nir_tex_src_projector:
+         unreachable("Should be lowered by do_lower_texture_projection");
+
+      case nir_tex_src_bias:
+         unreachable("LOD bias is not valid for vertex shaders.\n");
+
+      default:
+         unreachable("unknown texture source");
+      }
+   }
+
+   uint32_t constant_offset = 0;
+   for (unsigned i = 0; i < 3; i++) {
+      if (instr->const_offset[i] != 0) {
+         constant_offset = brw_texture_offset(instr->const_offset, 3);
+         break;
+      }
+   }
+
+   /* Stuff the channel select bits in the top of the texture offset */
+   if (instr->op == nir_texop_tg4)
+      constant_offset |= gather_channel(instr->component, sampler) << 16;
+
+   ir_texture_opcode op = ir_texture_opcode_for_nir_texop(instr->op);
+
+   bool is_cube_array =
+      instr->op == nir_texop_txs &&
+      instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
+      instr->is_array;
+
+   emit_texture(op, dest, dest_type, coordinate, instr->coord_components,
+                shadow_comparitor,
+                lod, lod2, sample_index,
+                constant_offset, offset_value,
+                mcs, is_cube_array, sampler, sampler_reg);
+}
+
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -115,7 +115,7 @@
    ralloc_free(compiler->vec4_reg_set.ra_reg_to_grf);
    compiler->vec4_reg_set.ra_reg_to_grf = ralloc_array(compiler, uint8_t, ra_reg_count);
    ralloc_free(compiler->vec4_reg_set.regs);
-   compiler->vec4_reg_set.regs = ra_alloc_reg_set(compiler, ra_reg_count);
+   compiler->vec4_reg_set.regs = ra_alloc_reg_set(compiler, ra_reg_count, false);
    if (compiler->devinfo->gen >= 6)
       ra_set_allocate_round_robin(compiler->vec4_reg_set.regs);
    ralloc_free(compiler->vec4_reg_set.classes);
@@ -140,7 +140,7 @@
 	 for (int base_reg = j;
 	      base_reg < j + class_sizes[i];
 	      base_reg++) {
-	    ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
+	    ra_add_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
 	 }
 
 	 reg++;
@@ -158,6 +158,9 @@
    }
    assert(reg == ra_reg_count);
 
+   for (int reg = 0; reg < base_reg_count; reg++)
+      ra_make_reg_conflicts_transitive(compiler->vec4_reg_set.regs, reg);
+
    ra_set_finalize(compiler->vec4_reg_set.regs, q_values);
 
    for (int i = 0; i < MAX_VGRF_SIZE; i++)
@@ -191,7 +194,6 @@
 bool
 vec4_visitor::reg_allocate()
 {
-   struct brw_compiler *compiler = brw->intelScreen->compiler;
    unsigned int hw_reg_mapping[alloc.count];
    int payload_reg_count = this->first_non_payload_grf;
 
@@ -281,15 +283,15 @@
     */
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       for (unsigned int i = 0; i < 3; i++) {
-	 if (inst->src[i].file == GRF) {
-	    spill_costs[inst->src[i].reg] += loop_scale;
+         if (inst->src[i].file == GRF) {
+            spill_costs[inst->src[i].reg] += loop_scale;
             if (inst->src[i].reladdr)
                no_spill[inst->src[i].reg] = true;
-	 }
+         }
       }
 
       if (inst->dst.file == GRF) {
-	 spill_costs[inst->dst.reg] += loop_scale;
+         spill_costs[inst->dst.reg] += loop_scale;
          if (inst->dst.reladdr)
             no_spill[inst->dst.reg] = true;
       }
@@ -297,12 +299,12 @@
       switch (inst->opcode) {
 
       case BRW_OPCODE_DO:
-	 loop_scale *= 10;
-	 break;
+         loop_scale *= 10;
+         break;
 
       case BRW_OPCODE_WHILE:
-	 loop_scale /= 10;
-	 break;
+         loop_scale /= 10;
+         break;
 
       case SHADER_OPCODE_GEN4_SCRATCH_READ:
       case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
@@ -310,12 +312,12 @@
             if (inst->src[i].file == GRF)
                no_spill[inst->src[i].reg] = true;
          }
-	 if (inst->dst.file == GRF)
-	    no_spill[inst->dst.reg] = true;
-	 break;
+         if (inst->dst.file == GRF)
+            no_spill[inst->dst.reg] = true;
+         break;
 
       default:
-	 break;
+         break;
       }
    }
 }
@@ -340,7 +342,7 @@
 vec4_visitor::spill_reg(int spill_reg_nr)
 {
    assert(alloc.sizes[spill_reg_nr] == 1);
-   unsigned int spill_offset = c->last_scratch++;
+   unsigned int spill_offset = last_scratch++;
 
    /* Generate spill/unspill instructions for the objects being spilled. */
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -287,7 +287,7 @@
 }
 
 src_reg
-vec4_visitor::fix_3src_operand(src_reg src)
+vec4_visitor::fix_3src_operand(const src_reg &src)
 {
    /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
     * able to use vertical stride of zero to replicate the vec4 uniform, like
@@ -313,7 +313,20 @@
 }
 
 src_reg
-vec4_visitor::fix_math_operand(src_reg src)
+vec4_visitor::resolve_source_modifiers(const src_reg &src)
+{
+   if (!src.abs && !src.negate)
+      return src;
+
+   dst_reg resolved = dst_reg(this, glsl_type::ivec4_type);
+   resolved.type = src.type;
+   emit(MOV(resolved, src));
+
+   return src_reg(resolved);
+}
+
+src_reg
+vec4_visitor::fix_math_operand(const src_reg &src)
 {
    if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE)
       return src;
@@ -338,7 +351,7 @@
    return src_reg(expanded);
 }
 
-void
+vec4_instruction *
 vec4_visitor::emit_math(enum opcode opcode,
                         const dst_reg &dst,
                         const src_reg &src0, const src_reg &src1)
@@ -350,11 +363,13 @@
       /* MATH on Gen6 must be align1, so we can't do writemasks. */
       math->dst = dst_reg(this, glsl_type::vec4_type);
       math->dst.type = dst.type;
-      emit(MOV(dst, src_reg(math->dst)));
+      math = emit(MOV(dst, src_reg(math->dst)));
    } else if (devinfo->gen < 6) {
       math->base_mrf = 1;
       math->mlen = src1.file == BAD_FILE ? 1 : 2;
    }
+
+   return math;
 }
 
 void
@@ -572,9 +587,18 @@
    }
 }
 
-
-static int
-type_size(const struct glsl_type *type)
+/**
+ * Returns the minimum number of vec4 elements needed to pack a type.
+ *
+ * For simple types, it will return 1 (a single vec4); for matrices, the
+ * number of columns; for array and struct, the sum of the vec4_size of
+ * each of its elements; and for sampler and atomic, zero.
+ *
+ * This method is useful to calculate how much register space is needed to
+ * store a particular type.
+ */
+int
+vec4_visitor::type_size(const struct glsl_type *type)
 {
    unsigned int i;
    int size;
@@ -603,6 +627,9 @@
 	 size += type_size(type->fields.structure[i].type);
       }
       return size;
+   case GLSL_TYPE_SUBROUTINE:
+      return 1;
+
    case GLSL_TYPE_SAMPLER:
       /* Samplers take up no register space, since they're baked in at
        * link time.
@@ -611,6 +638,7 @@
    case GLSL_TYPE_ATOMIC_UINT:
       return 0;
    case GLSL_TYPE_IMAGE:
+      return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
    case GLSL_TYPE_VOID:
    case GLSL_TYPE_DOUBLE:
    case GLSL_TYPE_ERROR:
@@ -626,7 +654,7 @@
    init();
 
    this->file = GRF;
-   this->reg = v->alloc.allocate(type_size(type));
+   this->reg = v->alloc.allocate(v->type_size(type));
 
    if (type->is_array() || type->is_record()) {
       this->swizzle = BRW_SWIZZLE_NOOP;
@@ -644,7 +672,7 @@
    init();
 
    this->file = GRF;
-   this->reg = v->alloc.allocate(type_size(type) * size);
+   this->reg = v->alloc.allocate(v->type_size(type) * size);
 
    this->swizzle = BRW_SWIZZLE_NOOP;
 
@@ -656,7 +684,7 @@
    init();
 
    this->file = GRF;
-   this->reg = v->alloc.allocate(type_size(type));
+   this->reg = v->alloc.allocate(v->type_size(type));
 
    if (type->is_array() || type->is_record()) {
       this->writemask = WRITEMASK_XYZW;
@@ -667,6 +695,21 @@
    this->type = brw_type_for_base_type(type);
 }
 
+void
+vec4_visitor::setup_vector_uniform_values(const gl_constant_value *values,
+                                          unsigned n)
+{
+   static const gl_constant_value zero = { 0 };
+
+   for (unsigned i = 0; i < n; ++i)
+      stage_prog_data->param[4 * uniforms + i] = &values[i];
+
+   for (unsigned i = n; i < 4; ++i)
+      stage_prog_data->param[4 * uniforms + i] = &zero;
+
+   uniform_vector_size[uniforms++] = n;
+}
+
 /* Our support for uniforms is piggy-backed on the struct
  * gl_fragment_program, because that's where the values actually
  * get stored, rather than in some global gl_shader_program uniform
@@ -683,9 +726,12 @@
     * order we'd walk the type, so walk the list of storage and find anything
     * with our name, or the prefix of a component that starts with our name.
     */
-   for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
+   for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
       struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
 
+      if (storage->builtin)
+         continue;
+
       if (strncmp(ir->name, storage->name, namelen) != 0 ||
           (storage->name[namelen] != 0 &&
            storage->name[namelen] != '.' &&
@@ -693,34 +739,19 @@
          continue;
       }
 
-      gl_constant_value *components = storage->storage;
-      unsigned vector_count = (MAX2(storage->array_elements, 1) *
-                               storage->type->matrix_columns);
-
-      for (unsigned s = 0; s < vector_count; s++) {
-         assert(uniforms < uniform_array_size);
-         uniform_vector_size[uniforms] = storage->type->vector_elements;
-
-         int i;
-         for (i = 0; i < uniform_vector_size[uniforms]; i++) {
-            stage_prog_data->param[uniforms * 4 + i] = components;
-            components++;
-         }
-         for (; i < 4; i++) {
-            static gl_constant_value zero = { 0.0 };
-            stage_prog_data->param[uniforms * 4 + i] = &zero;
-         }
+      const unsigned vector_count = (MAX2(storage->array_elements, 1) *
+                                     storage->type->matrix_columns);
+      const unsigned vector_size = storage->type->vector_elements;
 
-         uniforms++;
-      }
+      for (unsigned s = 0; s < vector_count; s++)
+         setup_vector_uniform_values(&storage->storage[s * vector_size],
+                                     vector_size);
    }
 }
 
 void
-vec4_visitor::setup_uniform_clipplane_values()
+vec4_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
 {
-   gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
-
    for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
       assert(this->uniforms < uniform_array_size);
       this->uniform_vector_size[this->uniforms] = 4;
@@ -1041,8 +1072,6 @@
       for (int i = 0; i < type_size(ir->type); i++) {
 	 output_reg[ir->data.location + i] = *reg;
 	 output_reg[ir->data.location + i].reg_offset = i;
-	 output_reg[ir->data.location + i].type =
-            brw_type_for_base_type(ir->type->get_scalar_type());
 	 output_reg_annotation[ir->data.location + i] = ir->name;
       }
       break;
@@ -1062,7 +1091,7 @@
        * Some uniforms, such as samplers and atomic counters, have no actual
        * storage, so we should ignore them.
        */
-      if (ir->is_in_uniform_block() || type_size(ir->type) == 0)
+      if (ir->is_in_buffer_block() || type_size(ir->type) == 0)
          return;
 
       /* Track how big the whole uniform variable is, in case we need to put a
@@ -1079,7 +1108,7 @@
       break;
 
    case ir_var_system_value:
-      reg = make_reg_for_system_value(ir);
+      reg = make_reg_for_system_value(ir->data.location, ir->type);
       break;
 
    default:
@@ -1251,7 +1280,7 @@
    return true;
 }
 
-void
+vec4_instruction *
 vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
                           src_reg src0, src_reg src1)
 {
@@ -1266,9 +1295,11 @@
       inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
       inst->predicate = BRW_PREDICATE_NORMAL;
    }
+
+   return inst;
 }
 
-void
+vec4_instruction *
 vec4_visitor::emit_lrp(const dst_reg &dst,
                        const src_reg &x, const src_reg &y, const src_reg &a)
 {
@@ -1276,8 +1307,8 @@
       /* Note that the instruction's argument order is reversed from GLSL
        * and the IR.
        */
-      emit(LRP(dst,
-               fix_3src_operand(a), fix_3src_operand(y), fix_3src_operand(x)));
+     return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
+                     fix_3src_operand(x)));
    } else {
       /* Earlier generations don't support three source operations, so we
        * need to emit x*(1-a) + y*a.
@@ -1292,7 +1323,7 @@
       emit(MUL(y_times_a, y, a));
       emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
       emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
-      emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
+      return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
    }
 }
 
@@ -1373,15 +1404,19 @@
       emit(pull);
 }
 
-void
-vec4_visitor::emit_uniformize(const dst_reg &dst, const src_reg &src)
+src_reg
+vec4_visitor::emit_uniformize(const src_reg &src)
 {
    const src_reg chan_index(this, glsl_type::uint_type);
+   const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
+                              src.type);
 
    emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
       ->force_writemask_all = true;
    emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
       ->force_writemask_all = true;
+
+   return src_reg(dst);
 }
 
 void
@@ -1553,6 +1588,10 @@
    case ir_unop_noise:
       unreachable("not reached: should be handled by lower_noise");
 
+   case ir_unop_subroutine_to_int:
+      emit(MOV(result_dst, op[0]));
+      break;
+
    case ir_binop_add:
       emit(ADD(result_dst, op[0], op[1]));
       break;
@@ -1600,20 +1639,13 @@
       assert(ir->type->is_integer());
       emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
       break;
-   case ir_binop_carry: {
-      struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
 
-      emit(ADDC(dst_null_ud(), op[0], op[1]));
-      emit(MOV(result_dst, src_reg(acc)));
-      break;
-   }
-   case ir_binop_borrow: {
-      struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
+   case ir_binop_carry:
+      unreachable("Should have been lowered by carry_to_arith().");
+
+   case ir_binop_borrow:
+      unreachable("Should have been lowered by borrow_to_arith().");
 
-      emit(SUBB(dst_null_ud(), op[0], op[1]));
-      emit(MOV(result_dst, src_reg(acc)));
-      break;
-   }
    case ir_binop_mod:
       /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
       assert(ir->type->is_integer());
@@ -1732,16 +1764,11 @@
       emit(MOV(result_dst, op[0]));
       break;
    case ir_unop_b2i:
-      emit(AND(result_dst, op[0], src_reg(1)));
-      break;
    case ir_unop_b2f:
       if (devinfo->gen <= 5) {
          resolve_bool_comparison(ir->operands[0], &op[0]);
       }
-      op[0].type = BRW_REGISTER_TYPE_D;
-      result_dst.type = BRW_REGISTER_TYPE_D;
-      emit(AND(result_dst, op[0], src_reg(0x3f800000u)));
-      result_dst.type = BRW_REGISTER_TYPE_F;
+      emit(MOV(result_dst, negate(op[0])));
       break;
    case ir_unop_f2b:
       emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
@@ -1837,7 +1864,7 @@
          surf_index = src_reg(this, glsl_type::uint_type);
          emit(ADD(dst_reg(surf_index), op[0],
                   src_reg(prog_data->base.binding_table.ubo_start)));
-         emit_uniformize(dst_reg(surf_index), surf_index);
+         surf_index = emit_uniformize(surf_index);
 
          /* Assume this may touch any UBO. It would be nice to provide
           * a tighter bound, but the array information is already lowered away.
@@ -2437,6 +2464,8 @@
       emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
                           src_reg(), src_reg());
    }
+
+   brw_mark_surface_used(stage_prog_data, surf_index);
 }
 
 void
@@ -2454,7 +2483,8 @@
 }
 
 src_reg
-vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler)
+vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
+                             src_reg coordinate, src_reg sampler)
 {
    vec4_instruction *inst =
       new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
@@ -2481,21 +2511,21 @@
    }
 
    /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
-   int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
+   int coord_mask = (1 << coordinate_type->vector_elements) - 1;
    int zero_mask = 0xf & ~coord_mask;
 
-   emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
+   emit(MOV(dst_reg(MRF, param_base, coordinate_type, coord_mask),
             coordinate));
 
-   emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
+   emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask),
             src_reg(0)));
 
    emit(inst);
    return src_reg(inst->dst);
 }
 
-static bool
-is_high_sampler(const struct brw_device_info *devinfo, src_reg sampler)
+bool
+vec4_visitor::is_high_sampler(src_reg sampler)
 {
    if (devinfo->gen < 8 && !devinfo->is_haswell)
       return false;
@@ -2504,6 +2534,183 @@
 }
 
 void
+vec4_visitor::emit_texture(ir_texture_opcode op,
+                           dst_reg dest,
+                           const glsl_type *dest_type,
+                           src_reg coordinate,
+                           int coord_components,
+                           src_reg shadow_comparitor,
+                           src_reg lod, src_reg lod2,
+                           src_reg sample_index,
+                           uint32_t constant_offset,
+                           src_reg offset_value,
+                           src_reg mcs,
+                           bool is_cube_array,
+                           uint32_t sampler,
+                           src_reg sampler_reg)
+{
+   enum opcode opcode;
+   switch (op) {
+   case ir_tex: opcode = SHADER_OPCODE_TXL; break;
+   case ir_txl: opcode = SHADER_OPCODE_TXL; break;
+   case ir_txd: opcode = SHADER_OPCODE_TXD; break;
+   case ir_txf: opcode = SHADER_OPCODE_TXF; break;
+   case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
+   case ir_txs: opcode = SHADER_OPCODE_TXS; break;
+   case ir_tg4: opcode = offset_value.file != BAD_FILE
+                         ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
+   case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
+   case ir_txb:
+      unreachable("TXB is not valid for vertex shaders.");
+   case ir_lod:
+      unreachable("LOD is not valid for vertex shaders.");
+   default:
+      unreachable("Unrecognized tex op");
+   }
+
+   vec4_instruction *inst = new(mem_ctx) vec4_instruction(
+      opcode, dst_reg(this, dest_type));
+
+   inst->offset = constant_offset;
+
+   /* The message header is necessary for:
+    * - Gen4 (always)
+    * - Gen9+ for selecting SIMD4x2
+    * - Texel offsets
+    * - Gather channel selection
+    * - Sampler indices too large to fit in a 4-bit value.
+    */
+   inst->header_size =
+      (devinfo->gen < 5 || devinfo->gen >= 9 ||
+       inst->offset != 0 || op == ir_tg4 ||
+       is_high_sampler(sampler_reg)) ? 1 : 0;
+   inst->base_mrf = 2;
+   inst->mlen = inst->header_size + 1; /* always at least one */
+   inst->dst.writemask = WRITEMASK_XYZW;
+   inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
+
+   inst->src[1] = sampler_reg;
+
+   /* MRF for the first parameter */
+   int param_base = inst->base_mrf + inst->header_size;
+
+   if (op == ir_txs || op == ir_query_levels) {
+      int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
+      emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
+   } else {
+      /* Load the coordinate */
+      /* FINISHME: gl_clamp_mask and saturate */
+      int coord_mask = (1 << coord_components) - 1;
+      int zero_mask = 0xf & ~coord_mask;
+
+      emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
+               coordinate));
+
+      if (zero_mask != 0) {
+         emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
+                  src_reg(0)));
+      }
+      /* Load the shadow comparitor */
+      if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) {
+	 emit(MOV(dst_reg(MRF, param_base + 1, shadow_comparitor.type,
+			  WRITEMASK_X),
+		  shadow_comparitor));
+	 inst->mlen++;
+      }
+
+      /* Load the LOD info */
+      if (op == ir_tex || op == ir_txl) {
+	 int mrf, writemask;
+	 if (devinfo->gen >= 5) {
+	    mrf = param_base + 1;
+	    if (shadow_comparitor.file != BAD_FILE) {
+	       writemask = WRITEMASK_Y;
+	       /* mlen already incremented */
+	    } else {
+	       writemask = WRITEMASK_X;
+	       inst->mlen++;
+	    }
+	 } else /* devinfo->gen == 4 */ {
+	    mrf = param_base;
+	    writemask = WRITEMASK_W;
+	 }
+         lod.swizzle = BRW_SWIZZLE_XXXX;
+	 emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
+      } else if (op == ir_txf) {
+         emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
+      } else if (op == ir_txf_ms) {
+         emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X),
+                  sample_index));
+         if (devinfo->gen >= 7) {
+            /* MCS data is in the first channel of `mcs`, but we need to get it into
+             * the .y channel of the second vec4 of params, so replicate .x across
+             * the whole vec4 and then mask off everything except .y
+             */
+            mcs.swizzle = BRW_SWIZZLE_XXXX;
+            emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
+                     mcs));
+         }
+         inst->mlen++;
+      } else if (op == ir_txd) {
+         const brw_reg_type type = lod.type;
+
+	 if (devinfo->gen >= 5) {
+	    lod.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
+	    lod2.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
+	    emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), lod));
+	    emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), lod2));
+	    inst->mlen++;
+
+	    if (dest_type->vector_elements == 3 || shadow_comparitor.file != BAD_FILE) {
+	       lod.swizzle = BRW_SWIZZLE_ZZZZ;
+	       lod2.swizzle = BRW_SWIZZLE_ZZZZ;
+	       emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), lod));
+	       emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), lod2));
+	       inst->mlen++;
+
+               if (shadow_comparitor.file != BAD_FILE) {
+                  emit(MOV(dst_reg(MRF, param_base + 2,
+                                   shadow_comparitor.type, WRITEMASK_Z),
+                           shadow_comparitor));
+               }
+	    }
+	 } else /* devinfo->gen == 4 */ {
+	    emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), lod));
+	    emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), lod2));
+	    inst->mlen += 2;
+	 }
+      } else if (op == ir_tg4 && offset_value.file != BAD_FILE) {
+         if (shadow_comparitor.file != BAD_FILE) {
+            emit(MOV(dst_reg(MRF, param_base, shadow_comparitor.type, WRITEMASK_W),
+                     shadow_comparitor));
+         }
+
+         emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
+                  offset_value));
+         inst->mlen++;
+      }
+   }
+
+   emit(inst);
+
+   /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
+    * spec requires layers.
+    */
+   if (op == ir_txs && is_cube_array) {
+      emit_math(SHADER_OPCODE_INT_QUOTIENT,
+                writemask(inst->dst, WRITEMASK_Z),
+                src_reg(inst->dst), src_reg(6));
+   }
+
+   if (devinfo->gen == 6 && op == ir_tg4) {
+      emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst);
+   }
+
+   swizzle_result(op, dest,
+                  src_reg(inst->dst), sampler, dest_type);
+}
+
+void
 vec4_visitor::visit(ir_texture *ir)
 {
    uint32_t sampler =
@@ -2533,11 +2740,9 @@
 
       /* Emit code to evaluate the actual indexing expression */
       nonconst_sampler_index->accept(this);
-      dst_reg temp(this, glsl_type::uint_type);
-      emit(ADD(temp, this->result, src_reg(sampler)));
-      emit_uniformize(temp, src_reg(temp));
-
-      sampler_reg = src_reg(temp);
+      src_reg temp(this, glsl_type::uint_type);
+      emit(ADD(dst_reg(temp), this->result, src_reg(sampler)));
+      sampler_reg = emit_uniformize(temp);
    } else {
       /* Single sampler, or constant array index; the indexing expression
        * is just an immediate.
@@ -2570,7 +2775,9 @@
     * generating these values may involve SEND messages that need the MRFs.
     */
    src_reg coordinate;
+   int coord_components = 0;
    if (ir->coordinate) {
+      coord_components = ir->coordinate->type->vector_elements;
       ir->coordinate->accept(this);
       coordinate = this->result;
    }
@@ -2588,42 +2795,35 @@
       offset_value = src_reg(this->result);
    }
 
-   const glsl_type *lod_type = NULL, *sample_index_type = NULL;
-   src_reg lod, dPdx, dPdy, sample_index, mcs;
+   src_reg lod, lod2, sample_index, mcs;
    switch (ir->op) {
    case ir_tex:
       lod = src_reg(0.0f);
-      lod_type = glsl_type::float_type;
       break;
    case ir_txf:
    case ir_txl:
    case ir_txs:
       ir->lod_info.lod->accept(this);
       lod = this->result;
-      lod_type = ir->lod_info.lod->type;
       break;
    case ir_query_levels:
       lod = src_reg(0);
-      lod_type = glsl_type::int_type;
       break;
    case ir_txf_ms:
       ir->lod_info.sample_index->accept(this);
       sample_index = this->result;
-      sample_index_type = ir->lod_info.sample_index->type;
 
       if (devinfo->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
-         mcs = emit_mcs_fetch(ir, coordinate, sampler_reg);
+         mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg);
       else
          mcs = src_reg(0u);
       break;
    case ir_txd:
       ir->lod_info.grad.dPdx->accept(this);
-      dPdx = this->result;
+      lod = this->result;
 
       ir->lod_info.grad.dPdy->accept(this);
-      dPdy = this->result;
-
-      lod_type = ir->lod_info.grad.dPdx->type;
+      lod2 = this->result;
       break;
    case ir_txb:
    case ir_lod:
@@ -2631,175 +2831,31 @@
       break;
    }
 
-   enum opcode opcode;
-   switch (ir->op) {
-   case ir_tex: opcode = SHADER_OPCODE_TXL; break;
-   case ir_txl: opcode = SHADER_OPCODE_TXL; break;
-   case ir_txd: opcode = SHADER_OPCODE_TXD; break;
-   case ir_txf: opcode = SHADER_OPCODE_TXF; break;
-   case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
-   case ir_txs: opcode = SHADER_OPCODE_TXS; break;
-   case ir_tg4: opcode = has_nonconstant_offset
-                         ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
-   case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
-   case ir_txb:
-      unreachable("TXB is not valid for vertex shaders.");
-   case ir_lod:
-      unreachable("LOD is not valid for vertex shaders.");
-   default:
-      unreachable("Unrecognized tex op");
-   }
-
-   vec4_instruction *inst = new(mem_ctx) vec4_instruction(
-      opcode, dst_reg(this, ir->type));
-
+   uint32_t constant_offset = 0;
    if (ir->offset != NULL && !has_nonconstant_offset) {
-      inst->offset =
+      constant_offset  =
          brw_texture_offset(ir->offset->as_constant()->value.i,
                             ir->offset->type->vector_elements);
    }
 
    /* Stuff the channel select bits in the top of the texture offset */
    if (ir->op == ir_tg4)
-      inst->offset |= gather_channel(ir, sampler) << 16;
-
-   /* The message header is necessary for:
-    * - Gen4 (always)
-    * - Gen9+ for selecting SIMD4x2
-    * - Texel offsets
-    * - Gather channel selection
-    * - Sampler indices too large to fit in a 4-bit value.
-    */
-   inst->header_size =
-      (devinfo->gen < 5 || devinfo->gen >= 9 ||
-       inst->offset != 0 || ir->op == ir_tg4 ||
-       is_high_sampler(devinfo, sampler_reg)) ? 1 : 0;
-   inst->base_mrf = 2;
-   inst->mlen = inst->header_size + 1; /* always at least one */
-   inst->dst.writemask = WRITEMASK_XYZW;
-   inst->shadow_compare = ir->shadow_comparitor != NULL;
-
-   inst->src[1] = sampler_reg;
-
-   /* MRF for the first parameter */
-   int param_base = inst->base_mrf + inst->header_size;
+      constant_offset |=
+         gather_channel( ir->lod_info.component->as_constant()->value.i[0],
+                         sampler) << 16;
+
+   glsl_type const *type = ir->sampler->type;
+   bool is_cube_array = type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
+      type->sampler_array;
 
-   if (ir->op == ir_txs || ir->op == ir_query_levels) {
-      int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
-      emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod));
-   } else {
-      /* Load the coordinate */
-      /* FINISHME: gl_clamp_mask and saturate */
-      int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
-      int zero_mask = 0xf & ~coord_mask;
-
-      emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
-               coordinate));
-
-      if (zero_mask != 0) {
-         emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
-                  src_reg(0)));
-      }
-      /* Load the shadow comparitor */
-      if (ir->shadow_comparitor && ir->op != ir_txd && (ir->op != ir_tg4 || !has_nonconstant_offset)) {
-	 emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
-			  WRITEMASK_X),
-		  shadow_comparitor));
-	 inst->mlen++;
-      }
-
-      /* Load the LOD info */
-      if (ir->op == ir_tex || ir->op == ir_txl) {
-	 int mrf, writemask;
-	 if (devinfo->gen >= 5) {
-	    mrf = param_base + 1;
-	    if (ir->shadow_comparitor) {
-	       writemask = WRITEMASK_Y;
-	       /* mlen already incremented */
-	    } else {
-	       writemask = WRITEMASK_X;
-	       inst->mlen++;
-	    }
-	 } else /* devinfo->gen == 4 */ {
-	    mrf = param_base;
-	    writemask = WRITEMASK_W;
-	 }
-	 emit(MOV(dst_reg(MRF, mrf, lod_type, writemask), lod));
-      } else if (ir->op == ir_txf) {
-         emit(MOV(dst_reg(MRF, param_base, lod_type, WRITEMASK_W), lod));
-      } else if (ir->op == ir_txf_ms) {
-         emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
-                  sample_index));
-         if (devinfo->gen >= 7) {
-            /* MCS data is in the first channel of `mcs`, but we need to get it into
-             * the .y channel of the second vec4 of params, so replicate .x across
-             * the whole vec4 and then mask off everything except .y
-             */
-            mcs.swizzle = BRW_SWIZZLE_XXXX;
-            emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
-                     mcs));
-         }
-         inst->mlen++;
-      } else if (ir->op == ir_txd) {
-	 const glsl_type *type = lod_type;
-
-	 if (devinfo->gen >= 5) {
-	    dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
-	    dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
-	    emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
-	    emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
-	    inst->mlen++;
-
-	    if (ir->type->vector_elements == 3 || ir->shadow_comparitor) {
-	       dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
-	       dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
-	       emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
-	       emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
-	       inst->mlen++;
-
-               if (ir->shadow_comparitor) {
-                  emit(MOV(dst_reg(MRF, param_base + 2,
-                                   ir->shadow_comparitor->type, WRITEMASK_Z),
-                           shadow_comparitor));
-               }
-	    }
-	 } else /* devinfo->gen == 4 */ {
-	    emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
-	    emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
-	    inst->mlen += 2;
-	 }
-      } else if (ir->op == ir_tg4 && has_nonconstant_offset) {
-         if (ir->shadow_comparitor) {
-            emit(MOV(dst_reg(MRF, param_base, ir->shadow_comparitor->type, WRITEMASK_W),
-                     shadow_comparitor));
-         }
-
-         emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
-                  offset_value));
-         inst->mlen++;
-      }
-   }
-
-   emit(inst);
-
-   /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
-    * spec requires layers.
-    */
-   if (ir->op == ir_txs) {
-      glsl_type const *type = ir->sampler->type;
-      if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
-          type->sampler_array) {
-         emit_math(SHADER_OPCODE_INT_QUOTIENT,
-                   writemask(inst->dst, WRITEMASK_Z),
-                   src_reg(inst->dst), src_reg(6));
-      }
-   }
-
-   if (devinfo->gen == 6 && ir->op == ir_tg4) {
-      emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst);
-   }
+   this->result = src_reg(this, ir->type);
+   dst_reg dest = dst_reg(this->result);
 
-   swizzle_result(ir, src_reg(inst->dst), sampler);
+   emit_texture(ir->op, dest, ir->type, coordinate, coord_components,
+                shadow_comparitor,
+                lod, lod2, sample_index,
+                constant_offset, offset_value,
+                mcs, is_cube_array, sampler, sampler_reg);
 }
 
 /**
@@ -2833,10 +2889,9 @@
  * Set up the gather channel based on the swizzle, for gather4.
  */
 uint32_t
-vec4_visitor::gather_channel(ir_texture *ir, uint32_t sampler)
+vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
 {
-   ir_constant *chan = ir->lod_info.component->as_constant();
-   int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
+   int swiz = GET_SWZ(key->tex.swizzles[sampler], gather_component);
    switch (swiz) {
       case SWIZZLE_X: return 0;
       case SWIZZLE_Y:
@@ -2854,22 +2909,23 @@
 }
 
 void
-vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler)
+vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
+                             src_reg orig_val, uint32_t sampler,
+                             const glsl_type *dest_type)
 {
    int s = key->tex.swizzles[sampler];
 
-   this->result = src_reg(this, ir->type);
-   dst_reg swizzled_result(this->result);
+   dst_reg swizzled_result = dest;
 
-   if (ir->op == ir_query_levels) {
+   if (op == ir_query_levels) {
       /* # levels is in .w */
       orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
       emit(MOV(swizzled_result, orig_val));
       return;
    }
 
-   if (ir->op == ir_txs || ir->type == glsl_type::float_type
-			|| s == SWIZZLE_NOOP || ir->op == ir_tg4) {
+   if (op == ir_txs || dest_type == glsl_type::float_type
+			|| s == SWIZZLE_NOOP || op == ir_tg4) {
       emit(MOV(swizzled_result, orig_val));
       return;
    }
@@ -2952,18 +3008,37 @@
 }
 
 void
+vec4_visitor::gs_emit_vertex(int stream_id)
+{
+   unreachable("not reached");
+}
+
+void
 vec4_visitor::visit(ir_emit_vertex *)
 {
    unreachable("not reached");
 }
 
 void
+vec4_visitor::gs_end_primitive()
+{
+   unreachable("not reached");
+}
+
+
+void
 vec4_visitor::visit(ir_end_primitive *)
 {
    unreachable("not reached");
 }
 
 void
+vec4_visitor::visit(ir_barrier *)
+{
+   unreachable("not reached");
+}
+
+void
 vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
                                   dst_reg dst, src_reg offset,
                                   src_reg src0, src_reg src1)
@@ -3086,6 +3161,7 @@
          vec4_instruction *inst;
          inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
          inst->predicate = BRW_PREDICATE_NORMAL;
+         output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F;
          inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
          inst->predicate = BRW_PREDICATE_NORMAL;
       }
@@ -3098,18 +3174,23 @@
       if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
          dst_reg reg_w = reg;
          reg_w.writemask = WRITEMASK_W;
-         emit(MOV(reg_w, src_reg(output_reg[VARYING_SLOT_PSIZ])));
+         src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ]);
+         reg_as_src.type = reg_w.type;
+         reg_as_src.swizzle = brw_swizzle_for_size(1);
+         emit(MOV(reg_w, reg_as_src));
       }
       if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
          dst_reg reg_y = reg;
          reg_y.writemask = WRITEMASK_Y;
          reg_y.type = BRW_REGISTER_TYPE_D;
+         output_reg[VARYING_SLOT_LAYER].type = reg_y.type;
          emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER])));
       }
       if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
          dst_reg reg_z = reg;
          reg_z.writemask = WRITEMASK_Z;
          reg_z.type = BRW_REGISTER_TYPE_D;
+         output_reg[VARYING_SLOT_VIEWPORT].type = reg_z.type;
          emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT])));
       }
    }
@@ -3147,8 +3228,8 @@
 vec4_instruction *
 vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
 {
-   assert (varying < VARYING_SLOT_MAX);
-   reg.type = output_reg[varying].type;
+   assert(varying < VARYING_SLOT_MAX);
+   assert(output_reg[varying].type == reg.type);
    current_annotation = output_reg_annotation[varying];
    /* Copy the register, saturating if necessary */
    return emit(MOV(reg, src_reg(output_reg[varying])));
@@ -3158,6 +3239,7 @@
 vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
 {
    reg.type = BRW_REGISTER_TYPE_F;
+   output_reg[varying].type = reg.type;
 
    switch (varying) {
    case VARYING_SLOT_PSIZ:
@@ -3414,7 +3496,8 @@
    dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
 				       inst->dst.writemask));
    vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
-   write->predicate = inst->predicate;
+   if (inst->opcode != BRW_OPCODE_SEL)
+      write->predicate = inst->predicate;
    write->ir = inst->ir;
    write->annotation = inst->annotation;
    inst->insert_after(block, write);
@@ -3477,16 +3560,16 @@
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       if (inst->dst.file == GRF && inst->dst.reladdr) {
          if (scratch_loc[inst->dst.reg] == -1) {
-            scratch_loc[inst->dst.reg] = c->last_scratch;
-            c->last_scratch += this->alloc.sizes[inst->dst.reg];
+            scratch_loc[inst->dst.reg] = last_scratch;
+            last_scratch += this->alloc.sizes[inst->dst.reg];
          }
 
          for (src_reg *iter = inst->dst.reladdr;
               iter->reladdr;
               iter = iter->reladdr) {
             if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
-               scratch_loc[iter->reg] = c->last_scratch;
-               c->last_scratch += this->alloc.sizes[iter->reg];
+               scratch_loc[iter->reg] = last_scratch;
+               last_scratch += this->alloc.sizes[iter->reg];
             }
          }
       }
@@ -3496,8 +3579,8 @@
               iter->reladdr;
               iter = iter->reladdr) {
             if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
-               scratch_loc[iter->reg] = c->last_scratch;
-               c->last_scratch += this->alloc.sizes[iter->reg];
+               scratch_loc[iter->reg] = last_scratch;
+               last_scratch += this->alloc.sizes[iter->reg];
             }
          }
       }
@@ -3670,8 +3753,8 @@
    *reg = neg_result;
 }
 
-vec4_visitor::vec4_visitor(struct brw_context *brw,
-                           struct brw_vec4_compile *c,
+vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
+                           void *log_data,
                            struct gl_program *prog,
                            const struct brw_vue_prog_key *key,
                            struct brw_vue_prog_data *prog_data,
@@ -3679,11 +3762,9 @@
                            gl_shader_stage stage,
 			   void *mem_ctx,
                            bool no_spills,
-                           shader_time_shader_type st_base,
-                           shader_time_shader_type st_written,
-                           shader_time_shader_type st_reset)
-   : backend_visitor(brw, shader_prog, prog, &prog_data->base, stage),
-     c(c),
+                           int shader_time_index)
+   : backend_shader(compiler, log_data, mem_ctx,
+                    shader_prog, prog, &prog_data->base, stage),
      key(key),
      prog_data(prog_data),
      sanity_param_count(0),
@@ -3691,11 +3772,9 @@
      first_non_payload_grf(0),
      need_all_constants_in_pull_buffer(false),
      no_spills(no_spills),
-     st_base(st_base),
-     st_written(st_written),
-     st_reset(st_reset)
+     shader_time_index(shader_time_index),
+     last_scratch(0)
 {
-   this->mem_ctx = mem_ctx;
    this->failed = false;
 
    this->base_ir = NULL;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -381,8 +381,7 @@
          break;
 
       default:
-         _mesa_problem(ctx, "Unsupported opcode %s in vertex program\n",
-                       _mesa_opcode_string(vpi->Opcode));
+         assert(!"Unsupported opcode in vertex program");
       }
 
       /* Copy the temporary back into the actual destination register. */
@@ -395,8 +394,7 @@
     * pull constants.  Do that now.
     */
    if (this->need_all_constants_in_pull_buffer) {
-      const struct gl_program_parameter_list *params =
-         vs_compile->vp->program.Base.Parameters;
+      const struct gl_program_parameter_list *params = vp->Base.Parameters;
       unsigned i;
       for (i = 0; i < params->NumParameters * 4; i++) {
          stage_prog_data->pull_param[i] =
@@ -416,8 +414,7 @@
       vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
 
    /* PROGRAM_STATE_VAR etc. */
-   struct gl_program_parameter_list *plist =
-      vs_compile->vp->program.Base.Parameters;
+   struct gl_program_parameter_list *plist = vp->Base.Parameters;
    for (unsigned p = 0; p < plist->NumParameters; p++) {
       unsigned components = plist->Parameters[p].Size;
 
@@ -487,8 +484,7 @@
 src_reg
 vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
 {
-   struct gl_program_parameter_list *plist =
-      vs_compile->vp->program.Base.Parameters;
+   struct gl_program_parameter_list *plist = vp->Base.Parameters;
 
    src_reg result;
 
@@ -574,15 +570,13 @@
          break;
 
       default:
-         _mesa_problem(ctx, "bad uniform src register file: %s\n",
-                       _mesa_register_file_name((gl_register_file)src.File));
+         assert(!"Bad uniform in src register file");
          return src_reg(this, glsl_type::vec4_type);
       }
       break;
 
    default:
-      _mesa_problem(ctx, "bad src register file: %s\n",
-                    _mesa_register_file_name((gl_register_file)src.File));
+      assert(!"Bad src register file");
       return src_reg(this, glsl_type::vec4_type);
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -23,7 +23,6 @@
 
 
 #include "brw_vs.h"
-#include "main/context.h"
 
 
 namespace brw {
@@ -37,7 +36,7 @@
 
    for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
       if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
-         uint8_t wa_flags = vs_compile->key.gl_attrib_wa_flags[i];
+         uint8_t wa_flags = key->gl_attrib_wa_flags[i];
          dst_reg reg(ATTR, i);
          dst_reg reg_d = reg;
          reg_d.type = BRW_REGISTER_TYPE_D;
@@ -78,7 +77,7 @@
             /* ES 3.0 has different rules for converting signed normalized
              * fixed-point numbers than desktop GL.
              */
-            if (_mesa_is_gles3(ctx) && (wa_flags & BRW_ATTRIB_WA_SIGN)) {
+            if ((wa_flags & BRW_ATTRIB_WA_SIGN) && !use_legacy_snorm_formula) {
                /* According to equation 2.2 of the ES 3.0 specification,
                 * signed normalization conversion is done by:
                 *
@@ -144,7 +143,8 @@
 
 
 dst_reg *
-vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
+vec4_vs_visitor::make_reg_for_system_value(int location,
+                                           const glsl_type *type)
 {
    /* VertexID is stored by the VF as the last vertex element, but
     * we don't represent it with a flag in inputs_read, so we call
@@ -152,7 +152,7 @@
     */
    dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
 
-   switch (ir->data.location) {
+   switch (location) {
    case SYSTEM_VALUE_BASE_VERTEX:
       reg->writemask = WRITEMASK_X;
       vs_prog_data->uses_vertexid = true;
@@ -212,18 +212,24 @@
 }
 
 
-vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw,
-                                 struct brw_vs_compile *vs_compile,
+vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
+                                 void *log_data,
+                                 const struct brw_vs_prog_key *key,
                                  struct brw_vs_prog_data *vs_prog_data,
+                                 struct gl_vertex_program *vp,
                                  struct gl_shader_program *prog,
-                                 void *mem_ctx)
-   : vec4_visitor(brw, &vs_compile->base, &vs_compile->vp->program.Base,
-                  &vs_compile->key.base, &vs_prog_data->base, prog,
+                                 void *mem_ctx,
+                                 int shader_time_index,
+                                 bool use_legacy_snorm_formula)
+   : vec4_visitor(compiler, log_data,
+                  &vp->Base, &key->base, &vs_prog_data->base, prog,
                   MESA_SHADER_VERTEX,
                   mem_ctx, false /* no_spills */,
-                  ST_VS, ST_VS_WRITTEN, ST_VS_RESET),
-     vs_compile(vs_compile),
-     vs_prog_data(vs_prog_data)
+                  shader_time_index),
+     key(key),
+     vs_prog_data(vs_prog_data),
+     vp(vp),
+     use_legacy_snorm_formula(use_legacy_snorm_formula)
 {
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vs.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vs.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vs.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vs.c	2015-09-16 14:36:09.000000000 +0000
@@ -40,108 +40,6 @@
 
 #include "util/ralloc.h"
 
-static inline void assign_vue_slot(struct brw_vue_map *vue_map,
-                                   int varying)
-{
-   /* Make sure this varying hasn't been assigned a slot already */
-   assert (vue_map->varying_to_slot[varying] == -1);
-
-   vue_map->varying_to_slot[varying] = vue_map->num_slots;
-   vue_map->slot_to_varying[vue_map->num_slots++] = varying;
-}
-
-/**
- * Compute the VUE map for vertex shader program.
- */
-void
-brw_compute_vue_map(const struct brw_device_info *devinfo,
-                    struct brw_vue_map *vue_map,
-                    GLbitfield64 slots_valid)
-{
-   vue_map->slots_valid = slots_valid;
-   int i;
-
-   /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they
-    * are stored in the first VUE slot (VARYING_SLOT_PSIZ).
-    */
-   slots_valid &= ~(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
-
-   /* Make sure that the values we store in vue_map->varying_to_slot and
-    * vue_map->slot_to_varying won't overflow the signed chars that are used
-    * to store them.  Note that since vue_map->slot_to_varying sometimes holds
-    * values equal to BRW_VARYING_SLOT_COUNT, we need to ensure that
-    * BRW_VARYING_SLOT_COUNT is <= 127, not 128.
-    */
-   STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127);
-
-   vue_map->num_slots = 0;
-   for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
-      vue_map->varying_to_slot[i] = -1;
-      vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_COUNT;
-   }
-
-   /* VUE header: format depends on chip generation and whether clipping is
-    * enabled.
-    */
-   if (devinfo->gen < 6) {
-      /* There are 8 dwords in VUE header pre-Ironlake:
-       * dword 0-3 is indices, point width, clip flags.
-       * dword 4-7 is ndc position
-       * dword 8-11 is the first vertex data.
-       *
-       * On Ironlake the VUE header is nominally 20 dwords, but the hardware
-       * will accept the same header layout as Gen4 [and should be a bit faster]
-       */
-      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
-      assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
-      assign_vue_slot(vue_map, VARYING_SLOT_POS);
-   } else {
-      /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
-       * dword 0-3 of the header is indices, point width, clip flags.
-       * dword 4-7 is the 4D space position
-       * dword 8-15 of the vertex header is the user clip distance if
-       * enabled.
-       * dword 8-11 or 16-19 is the first vertex element data we fill.
-       */
-      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
-      assign_vue_slot(vue_map, VARYING_SLOT_POS);
-      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0))
-         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
-      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))
-         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
-
-      /* front and back colors need to be consecutive so that we can use
-       * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
-       * two-sided color.
-       */
-      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0))
-         assign_vue_slot(vue_map, VARYING_SLOT_COL0);
-      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0))
-         assign_vue_slot(vue_map, VARYING_SLOT_BFC0);
-      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1))
-         assign_vue_slot(vue_map, VARYING_SLOT_COL1);
-      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1))
-         assign_vue_slot(vue_map, VARYING_SLOT_BFC1);
-   }
-
-   /* The hardware doesn't care about the rest of the vertex outputs, so just
-    * assign them contiguously.  Don't reassign outputs that already have a
-    * slot.
-    *
-    * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX,
-    * since it's encoded as the clip distances by emit_clip_distances().
-    * However, it may be output by transform feedback, and we'd rather not
-    * recompute state when TF changes, so we just always include it.
-    */
-   for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
-      if ((slots_valid & BITFIELD64_BIT(i)) &&
-          vue_map->varying_to_slot[i] == -1) {
-         assign_vue_slot(vue_map, i);
-      }
-   }
-}
-
-
 /**
  * Decide which set of clip planes should be used when clipping via
  * gl_Position or gl_ClipVertex.
@@ -196,7 +94,6 @@
 {
    GLuint program_size;
    const GLuint *program;
-   struct brw_vs_compile c;
    struct brw_vs_prog_data prog_data;
    struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
    void *mem_ctx;
@@ -206,8 +103,6 @@
    if (prog)
       vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
 
-   memset(&c, 0, sizeof(c));
-   memcpy(&c.key, key, sizeof(*key));
    memset(&prog_data, 0, sizeof(prog_data));
 
    /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
@@ -216,8 +111,6 @@
 
    mem_ctx = ralloc_context(NULL);
 
-   c.vp = vp;
-
    /* Allocate the references to the uniforms that will end up in the
     * prog_data associated with the compiled program, and which will be freed
     * by the state cache.
@@ -228,26 +121,30 @@
        * case being a float value that gets blown up to a vec4, so be
        * conservative here.
        */
-      param_count = vs->num_uniform_components * 4;
-
+      param_count = vs->num_uniform_components * 4 +
+                    vs->NumImages * BRW_IMAGE_PARAM_SIZE;
+      stage_prog_data->nr_image_params = vs->NumImages;
    } else {
       param_count = vp->program.Base.Parameters->NumParameters * 4;
    }
    /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
     * planes as uniforms.
     */
-   param_count += c.key.base.nr_userclip_plane_consts * 4;
+   param_count += key->base.nr_userclip_plane_consts * 4;
 
    stage_prog_data->param =
       rzalloc_array(NULL, const gl_constant_value *, param_count);
    stage_prog_data->pull_param =
       rzalloc_array(NULL, const gl_constant_value *, param_count);
+   stage_prog_data->image_param =
+      rzalloc_array(NULL, struct brw_image_param,
+                    stage_prog_data->nr_image_params);
    stage_prog_data->nr_params = param_count;
 
    GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
    prog_data.inputs_read = vp->program.Base.InputsRead;
 
-   if (c.key.copy_edgeflag) {
+   if (key->copy_edgeflag) {
       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
       prog_data.inputs_read |= VERT_BIT_EDGEFLAG;
    }
@@ -260,7 +157,7 @@
        * coords, which would be a pain to handle.
        */
       for (i = 0; i < 8; i++) {
-         if (c.key.point_coord_replace & (1 << i))
+         if (key->point_coord_replace & (1 << i))
             outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
       }
 
@@ -275,7 +172,7 @@
     * distance varying slots whenever clipping is enabled, even if the vertex
     * shader doesn't write to gl_ClipDistance.
     */
-   if (c.key.base.userclip_active) {
+   if (key->base.userclip_active) {
       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
    }
@@ -284,34 +181,28 @@
                        &prog_data.base.vue_map, outputs_written);
 
    if (0) {
-      _mesa_fprint_program_opt(stderr, &c.vp->program.Base, PROG_PRINT_DEBUG,
+      _mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG,
 			       true);
    }
 
    /* Emit GEN4 code.
     */
-   program = brw_vs_emit(brw, prog, &c, &prog_data, mem_ctx, &program_size);
+   program = brw_vs_emit(brw, mem_ctx, key, &prog_data,
+                         &vp->program, prog, &program_size);
    if (program == NULL) {
       ralloc_free(mem_ctx);
       return false;
    }
 
    /* Scratch space is used for register spilling */
-   if (c.base.last_scratch) {
-      perf_debug("Vertex shader triggered register spilling.  "
-                 "Try reducing the number of live vec4 values to "
-                 "improve performance.\n");
-
-      prog_data.base.base.total_scratch
-         = brw_get_scratch_size(c.base.last_scratch*REG_SIZE);
-
+   if (prog_data.base.base.total_scratch) {
       brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo,
 			 prog_data.base.base.total_scratch *
                          brw->max_vs_threads);
    }
 
    brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
-		    &c.key, sizeof(c.key),
+		    key, sizeof(struct brw_vs_prog_key),
 		    program, program_size,
 		    &prog_data, sizeof(prog_data),
 		    &brw->vs.base.prog_offset, &brw->vs.prog_data);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vs.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vs.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vs.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vs.h	2015-09-16 14:36:09.000000000 +0000
@@ -50,22 +50,16 @@
 #define BRW_ATTRIB_WA_SIGN          32  /* interpret as signed in shader */
 #define BRW_ATTRIB_WA_SCALE         64  /* interpret as scaled in shader */
 
-struct brw_vs_compile {
-   struct brw_vec4_compile base;
-   struct brw_vs_prog_key key;
-
-   struct brw_vertex_program *vp;
-};
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 const unsigned *brw_vs_emit(struct brw_context *brw,
-                            struct gl_shader_program *prog,
-                            struct brw_vs_compile *c,
-                            struct brw_vs_prog_data *prog_data,
                             void *mem_ctx,
+                            const struct brw_vs_prog_key *key,
+                            struct brw_vs_prog_data *prog_data,
+                            struct gl_vertex_program *vp,
+                            struct gl_shader_program *shader_prog,
                             unsigned *program_size);
 void brw_vs_debug_recompile(struct brw_context *brw,
                             struct gl_shader_program *prog,
@@ -90,14 +84,19 @@
 class vec4_vs_visitor : public vec4_visitor
 {
 public:
-   vec4_vs_visitor(struct brw_context *brw,
-                   struct brw_vs_compile *vs_compile,
+   vec4_vs_visitor(const struct brw_compiler *compiler,
+                   void *log_data,
+                   const struct brw_vs_prog_key *key,
                    struct brw_vs_prog_data *vs_prog_data,
+                   struct gl_vertex_program *vp,
                    struct gl_shader_program *prog,
-                   void *mem_ctx);
+                   void *mem_ctx,
+                   int shader_time_index,
+                   bool use_legacy_snorm_formula);
 
 protected:
-   virtual dst_reg *make_reg_for_system_value(ir_variable *ir);
+   virtual dst_reg *make_reg_for_system_value(int location,
+                                              const glsl_type *type);
    virtual void setup_payload();
    virtual void emit_prolog();
    virtual void emit_program_code();
@@ -111,10 +110,13 @@
    dst_reg get_vp_dst_reg(const prog_dst_register &dst);
    src_reg get_vp_src_reg(const prog_src_register &src);
 
-   struct brw_vs_compile * const vs_compile;
+   const struct brw_vs_prog_key *const key;
    struct brw_vs_prog_data * const vs_prog_data;
+   struct gl_vertex_program *const vp;
    src_reg *vp_temp_regs;
    src_reg vp_addr_reg;
+
+   bool use_legacy_snorm_formula;
 };
 
 } /* namespace brw */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vs_surface_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vs_surface_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vs_surface_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -56,7 +56,7 @@
                           const struct brw_stage_prog_data *prog_data,
                           bool dword_pitch)
 {
-   int i;
+   unsigned i;
    uint32_t surf_index = prog_data->binding_table.pull_constants_start;
 
    if (!prog_data->nr_pull_params) {
@@ -121,7 +121,7 @@
    /* BRW_NEW_VS_PROG_DATA */
    const struct brw_stage_prog_data *prog_data = &brw->vs.prog_data->base.base;
 
-   dword_pitch = brw->vs.prog_data->base.simd8;
+   dword_pitch = brw->vs.prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
 
    /* _NEW_PROGRAM_CONSTANTS */
    brw_upload_pull_constants(brw, BRW_NEW_VS_CONSTBUF, &vp->program.Base,
@@ -151,7 +151,7 @@
       return;
 
    /* BRW_NEW_VS_PROG_DATA */
-   dword_pitch = brw->vs.prog_data->base.simd8;
+   dword_pitch = brw->vs.prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
    brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
                            &brw->vs.base, &brw->vs.prog_data->base.base,
                            dword_pitch);
@@ -191,3 +191,28 @@
    },
    .emit = brw_upload_vs_abo_surfaces,
 };
+
+static void
+brw_upload_vs_image_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_VERTEX_PROGRAM */
+   struct gl_shader_program *prog =
+      ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
+
+   if (prog) {
+      /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+      brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
+                                &brw->vs.base, &brw->vs.prog_data->base.base);
+   }
+}
+
+const struct brw_tracked_state brw_vs_image_surfaces = {
+   .dirty = {
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_IMAGE_UNITS |
+             BRW_NEW_VERTEX_PROGRAM |
+             BRW_NEW_VS_PROG_DATA,
+   },
+   .emit = brw_upload_vs_image_surfaces,
+};
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vue_map.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vue_map.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_vue_map.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_vue_map.c	2015-09-16 14:36:09.000000000 +0000
@@ -0,0 +1,148 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file brw_vue_map.c
+ *
+ * This file computes the "VUE map" for a (non-fragment) shader stage, which
+ * describes the layout of its output varyings.  The VUE map is used to match
+ * outputs from one stage with the inputs of the next.
+ *
+ * Largely, varyings can be placed however we like - producers/consumers simply
+ * have to agree on the layout.  However, there is also a "VUE Header" that
+ * prescribes a fixed-layout for items that interact with fixed function
+ * hardware, such as the clipper and rasterizer.
+ *
+ * Authors:
+ *   Paul Berry <stereotype441@gmail.com>
+ *   Chris Forbes <chrisf@ijw.co.nz>
+ *   Eric Anholt <eric@anholt.net>
+ */
+
+
+#include "main/compiler.h"
+#include "brw_context.h"
+
+static inline void
+assign_vue_slot(struct brw_vue_map *vue_map, int varying)
+{
+   /* Make sure this varying hasn't been assigned a slot already */
+   assert (vue_map->varying_to_slot[varying] == -1);
+
+   vue_map->varying_to_slot[varying] = vue_map->num_slots;
+   vue_map->slot_to_varying[vue_map->num_slots++] = varying;
+}
+
+/**
+ * Compute the VUE map for a shader stage.
+ */
+void
+brw_compute_vue_map(const struct brw_device_info *devinfo,
+                    struct brw_vue_map *vue_map,
+                    GLbitfield64 slots_valid)
+{
+   vue_map->slots_valid = slots_valid;
+   int i;
+
+   /* gl_Layer and gl_ViewportIndex don't get their own varying slots -- they
+    * are stored in the first VUE slot (VARYING_SLOT_PSIZ).
+    */
+   slots_valid &= ~(VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+
+   /* Make sure that the values we store in vue_map->varying_to_slot and
+    * vue_map->slot_to_varying won't overflow the signed chars that are used
+    * to store them.  Note that since vue_map->slot_to_varying sometimes holds
+    * values equal to BRW_VARYING_SLOT_COUNT, we need to ensure that
+    * BRW_VARYING_SLOT_COUNT is <= 127, not 128.
+    */
+   STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127);
+
+   vue_map->num_slots = 0;
+   for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
+      vue_map->varying_to_slot[i] = -1;
+      vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_COUNT;
+   }
+
+   /* VUE header: format depends on chip generation and whether clipping is
+    * enabled.
+    *
+    * See the Sandybridge PRM, Volume 2 Part 1, section 1.5.1 (page 30),
+    * "Vertex URB Entry (VUE) Formats" which describes the VUE header layout.
+    */
+   if (devinfo->gen < 6) {
+      /* There are 8 dwords in VUE header pre-Ironlake:
+       * dword 0-3 is indices, point width, clip flags.
+       * dword 4-7 is ndc position
+       * dword 8-11 is the first vertex data.
+       *
+       * On Ironlake the VUE header is nominally 20 dwords, but the hardware
+       * will accept the same header layout as Gen4 [and should be a bit faster]
+       */
+      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
+      assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
+      assign_vue_slot(vue_map, VARYING_SLOT_POS);
+   } else {
+      /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
+       * dword 0-3 of the header is indices, point width, clip flags.
+       * dword 4-7 is the 4D space position
+       * dword 8-15 of the vertex header is the user clip distance if
+       * enabled.
+       * dword 8-11 or 16-19 is the first vertex element data we fill.
+       */
+      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
+      assign_vue_slot(vue_map, VARYING_SLOT_POS);
+      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0))
+         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
+      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))
+         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
+
+      /* front and back colors need to be consecutive so that we can use
+       * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
+       * two-sided color.
+       */
+      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0))
+         assign_vue_slot(vue_map, VARYING_SLOT_COL0);
+      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0))
+         assign_vue_slot(vue_map, VARYING_SLOT_BFC0);
+      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1))
+         assign_vue_slot(vue_map, VARYING_SLOT_COL1);
+      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1))
+         assign_vue_slot(vue_map, VARYING_SLOT_BFC1);
+   }
+
+   /* The hardware doesn't care about the rest of the vertex outputs, so just
+    * assign them contiguously.  Don't reassign outputs that already have a
+    * slot.
+    *
+    * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX,
+    * since it's encoded as the clip distances by emit_clip_distances().
+    * However, it may be output by transform feedback, and we'd rather not
+    * recompute state when TF changes, so we just always include it.
+    */
+   for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
+      if ((slots_valid & BITFIELD64_BIT(i)) &&
+          vue_map->varying_to_slot[i] == -1) {
+         assign_vue_slot(vue_map, i);
+      }
+   }
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_wm.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_wm.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_wm.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_wm.c	2015-09-16 14:36:09.000000000 +0000
@@ -1,34 +1,28 @@
 /*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw@vmware.com>
-  */
-
+ * Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ * Intel funded Tungsten Graphics to
+ * develop this 3D driver.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
 #include "brw_context.h"
 #include "brw_wm.h"
 #include "brw_state.h"
@@ -36,6 +30,7 @@
 #include "main/formats.h"
 #include "main/fbobject.h"
 #include "main/samplerobj.h"
+#include "main/framebuffer.h"
 #include "program/prog_parameter.h"
 #include "program/program.h"
 #include "intel_mipmap_tree.h"
@@ -180,9 +175,12 @@
     * so the shader definitely kills pixels.
     */
    prog_data.uses_kill = fp->program.UsesKill || key->alpha_test_func;
-
+   prog_data.uses_omask =
+      fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
    prog_data.computed_depth_mode = computed_depth_mode(&fp->program);
 
+   prog_data.early_fragment_tests = fs && fs->EarlyFragmentTests;
+
    /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
    if (!prog)
       prog_data.base.use_alt_mode = true;
@@ -193,7 +191,9 @@
     */
    int param_count;
    if (fs) {
-      param_count = fs->num_uniform_components;
+      param_count = fs->num_uniform_components +
+                    fs->NumImages * BRW_IMAGE_PARAM_SIZE;
+      prog_data.base.nr_image_params = fs->NumImages;
    } else {
       param_count = fp->program.Base.Parameters->NumParameters * 4;
    }
@@ -203,6 +203,9 @@
       rzalloc_array(NULL, const gl_constant_value *, param_count);
    prog_data.base.pull_param =
       rzalloc_array(NULL, const gl_constant_value *, param_count);
+   prog_data.base.image_param =
+      rzalloc_array(NULL, struct brw_image_param,
+                    prog_data.base.nr_image_params);
    prog_data.base.nr_params = param_count;
 
    prog_data.barycentric_interp_modes =
@@ -348,13 +351,15 @@
 gen6_gather_workaround(GLenum internalformat)
 {
    switch (internalformat) {
-      case GL_R8I: return WA_SIGN | WA_8BIT;
-      case GL_R8UI: return WA_8BIT;
-      case GL_R16I: return WA_SIGN | WA_16BIT;
-      case GL_R16UI: return WA_16BIT;
-      /* note that even though GL_R32I and GL_R32UI have format overrides
-       * in the surface state, there is no shader w/a required */
-      default: return 0;
+   case GL_R8I: return WA_SIGN | WA_8BIT;
+   case GL_R8UI: return WA_8BIT;
+   case GL_R16I: return WA_SIGN | WA_16BIT;
+   case GL_R16UI: return WA_16BIT;
+   default:
+      /* Note that even though GL_R32I and GL_R32UI have format overrides in
+       * the surface state, there is no shader w/a required.
+       */
+      return 0;
    }
 }
 
@@ -401,8 +406,9 @@
 	       key->gl_clamp_mask[2] |= 1 << s;
 	 }
 
-         /* gather4's channel select for green from RG32F is broken;
-          * requires a shader w/a on IVB; fixable with just SCS on HSW. */
+         /* gather4's channel select for green from RG32F is broken; requires
+          * a shader w/a on IVB; fixable with just SCS on HSW.
+          */
          if (brw->gen == 7 && !brw->is_haswell && prog->UsesGather) {
             if (img->InternalFormat == GL_RG32F)
                key->gather_channel_quirk_mask |= 1 << s;
@@ -451,18 +457,18 @@
                           BRW_NEW_VUE_MAP_GEOM_OUT);
 }
 
-static void brw_wm_populate_key( struct brw_context *brw,
-				 struct brw_wm_prog_key *key )
+static void
+brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key)
 {
    struct gl_context *ctx = &brw->ctx;
    /* BRW_NEW_FRAGMENT_PROGRAM */
    const struct brw_fragment_program *fp =
-      (struct brw_fragment_program *)brw->fragment_program;
+      (struct brw_fragment_program *) brw->fragment_program;
    const struct gl_program *prog = (struct gl_program *) brw->fragment_program;
    GLuint lookup = 0;
    GLuint line_aa;
    bool program_uses_dfdy = fp->program.UsesDFdy;
-   bool multisample_fbo = ctx->DrawBuffer->Visual.samples > 1;
+   const bool multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
 
    memset(key, 0, sizeof(*key));
 
@@ -561,7 +567,7 @@
     * drawable height in order to invert the Y axis.
     */
    if (fp->program.Base.InputsRead & VARYING_BIT_POS) {
-      key->drawable_height = ctx->DrawBuffer->Height;
+      key->drawable_height = _mesa_geometric_height(ctx->DrawBuffer);
    }
 
    if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
@@ -580,7 +586,7 @@
    key->persample_shading =
       _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1;
    if (key->persample_shading)
-      key->persample_2x = ctx->DrawBuffer->Visual.samples == 2;
+      key->persample_2x = _mesa_geometric_samples(ctx->DrawBuffer) == 2;
 
    key->compute_pos_offset =
       _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 &&
@@ -603,7 +609,8 @@
     * like GL requires.  Fix that by building the alpha test into the
     * shader, and we'll skip enabling the fixed function alpha test.
     */
-   if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) {
+   if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
+       ctx->Color.AlphaEnabled) {
       key->alpha_test_func = ctx->Color.AlphaFunc;
       key->alpha_test_ref = ctx->Color.AlphaRef;
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_wm_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_wm_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_wm_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_wm_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -47,7 +47,7 @@
    struct gl_context *ctx = &brw->ctx;
    /* BRW_NEW_FRAGMENT_PROGRAM */
    const struct gl_fragment_program *fp = brw->fragment_program;
-   int i;
+   unsigned i;
 
    /* _NEW_BUFFERS */
    for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_wm_surface_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/brw_wm_surface_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/brw_wm_surface_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -35,6 +35,7 @@
 #include "main/mtypes.h"
 #include "main/samplerobj.h"
 #include "program/prog_parameter.h"
+#include "main/framebuffer.h"
 
 #include "intel_mipmap_tree.h"
 #include "intel_batchbuffer.h"
@@ -738,6 +739,9 @@
                                  uint32_t *surf_offset)
 {
    GLuint i;
+   const unsigned int w = _mesa_geometric_width(fb);
+   const unsigned int h = _mesa_geometric_height(fb);
+   const unsigned int s = _mesa_geometric_samples(fb);
 
    /* Update surfaces for drawing buffers */
    if (fb->_NumColorDrawBuffers >= 1) {
@@ -748,17 +752,15 @@
             surf_offset[surf_index] = 
                brw->vtbl.update_renderbuffer_surface(
                   brw, fb->_ColorDrawBuffers[i],
-                  fb->MaxNumLayers > 0, i, surf_index);
+                  _mesa_geometric_layers(fb) > 0, i, surf_index);
 	 } else {
-            brw->vtbl.emit_null_surface_state(
-               brw, fb->Width, fb->Height, fb->Visual.samples,
+            brw->vtbl.emit_null_surface_state(brw, w, h, s,
                &surf_offset[surf_index]);
 	 }
       }
    } else {
       const uint32_t surf_index = render_target_start;
-      brw->vtbl.emit_null_surface_state(
-         brw, fb->Width, fb->Height, fb->Visual.samples,
+      brw->vtbl.emit_null_surface_state(brw, w, h, s,
          &surf_offset[surf_index]);
    }
 }
@@ -896,7 +898,7 @@
    uint32_t *surf_offsets =
       &stage_state->surf_offset[prog_data->binding_table.ubo_start];
 
-   for (int i = 0; i < shader->NumUniformBlocks; i++) {
+   for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
       struct gl_uniform_buffer_binding *binding;
       struct intel_buffer_object *intel_bo;
 
@@ -956,7 +958,7 @@
    uint32_t *surf_offsets =
       &stage_state->surf_offset[prog_data->binding_table.abo_start];
 
-   for (int i = 0; i < prog->NumAtomicBuffers; i++) {
+   for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) {
       struct gl_atomic_buffer_binding *binding =
          &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
       struct intel_buffer_object *intel_bo =
@@ -1022,6 +1024,257 @@
    .emit = brw_upload_cs_abo_surfaces,
 };
 
+static void
+brw_upload_cs_image_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* _NEW_PROGRAM */
+   struct gl_shader_program *prog =
+      ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+   if (prog) {
+      /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+      brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
+                                &brw->cs.base, &brw->cs.prog_data->base);
+   }
+}
+
+const struct brw_tracked_state brw_cs_image_surfaces = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_CS_PROG_DATA |
+             BRW_NEW_IMAGE_UNITS
+   },
+   .emit = brw_upload_cs_image_surfaces,
+};
+
+static uint32_t
+get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
+{
+   if (access == GL_WRITE_ONLY) {
+      return brw_format_for_mesa_format(format);
+   } else {
+      /* Typed surface reads support a very limited subset of the shader
+       * image formats.  Translate it into the closest format the
+       * hardware supports.
+       */
+      if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
+          (_mesa_get_format_bytes(format) >= 8 &&
+           (brw->gen == 7 && !brw->is_haswell)))
+         return BRW_SURFACEFORMAT_RAW;
+      else
+         return brw_format_for_mesa_format(
+            brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
+   }
+}
+
+static void
+update_default_image_param(struct brw_context *brw,
+                           struct gl_image_unit *u,
+                           unsigned surface_idx,
+                           struct brw_image_param *param)
+{
+   memset(param, 0, sizeof(*param));
+   param->surface_idx = surface_idx;
+   /* Set the swizzling shifts to all-ones to effectively disable swizzling --
+    * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
+    * detailed explanation of these parameters.
+    */
+   param->swizzling[0] = 0xff;
+   param->swizzling[1] = 0xff;
+}
+
+static void
+update_buffer_image_param(struct brw_context *brw,
+                          struct gl_image_unit *u,
+                          unsigned surface_idx,
+                          struct brw_image_param *param)
+{
+   struct gl_buffer_object *obj = u->TexObj->BufferObject;
+
+   update_default_image_param(brw, u, surface_idx, param);
+
+   param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
+   param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
+}
+
+static void
+update_texture_image_param(struct brw_context *brw,
+                           struct gl_image_unit *u,
+                           unsigned surface_idx,
+                           struct brw_image_param *param)
+{
+   struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
+
+   update_default_image_param(brw, u, surface_idx, param);
+
+   param->size[0] = minify(mt->logical_width0, u->Level);
+   param->size[1] = minify(mt->logical_height0, u->Level);
+   param->size[2] = (!u->Layered ? 1 :
+                     u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
+                     u->TexObj->Target == GL_TEXTURE_3D ?
+                     minify(mt->logical_depth0, u->Level) :
+                     mt->logical_depth0);
+
+   intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
+                                  &param->offset[0],
+                                  &param->offset[1]);
+
+   param->stride[0] = mt->cpp;
+   param->stride[1] = mt->pitch / mt->cpp;
+   param->stride[2] =
+      brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
+   param->stride[3] =
+      brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
+
+   if (mt->tiling == I915_TILING_X) {
+      /* An X tile is a rectangular block of 512x8 bytes. */
+      param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
+      param->tiling[1] = _mesa_logbase2(8);
+
+      if (brw->has_swizzling) {
+         /* Right shifts required to swizzle bits 9 and 10 of the memory
+          * address with bit 6.
+          */
+         param->swizzling[0] = 3;
+         param->swizzling[1] = 4;
+      }
+   } else if (mt->tiling == I915_TILING_Y) {
+      /* The layout of a Y-tiled surface in memory isn't really fundamentally
+       * different to the layout of an X-tiled surface, we simply pretend that
+       * the surface is broken up in a number of smaller 16Bx32 tiles, each
+       * one arranged in X-major order just like is the case for X-tiling.
+       */
+      param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
+      param->tiling[1] = _mesa_logbase2(32);
+
+      if (brw->has_swizzling) {
+         /* Right shift required to swizzle bit 9 of the memory address with
+          * bit 6.
+          */
+         param->swizzling[0] = 3;
+      }
+   }
+
+   /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
+    * address calculation algorithm (emit_address_calculation() in
+    * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
+    * modulus equal to the LOD.
+    */
+   param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
+                       0);
+}
+
+static void
+update_image_surface(struct brw_context *brw,
+                     struct gl_image_unit *u,
+                     GLenum access,
+                     unsigned surface_idx,
+                     uint32_t *surf_offset,
+                     struct brw_image_param *param)
+{
+   if (u->_Valid) {
+      struct gl_texture_object *obj = u->TexObj;
+      const unsigned format = get_image_format(brw, u->_ActualFormat, access);
+
+      if (obj->Target == GL_TEXTURE_BUFFER) {
+         struct intel_buffer_object *intel_obj =
+            intel_buffer_object(obj->BufferObject);
+         const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
+                                      _mesa_get_format_bytes(u->_ActualFormat));
+
+         brw->vtbl.emit_buffer_surface_state(
+            brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
+            format, intel_obj->Base.Size / texel_size, texel_size,
+            access != GL_READ_ONLY);
+
+         update_buffer_image_param(brw, u, surface_idx, param);
+
+      } else {
+         struct intel_texture_object *intel_obj = intel_texture_object(obj);
+         struct intel_mipmap_tree *mt = intel_obj->mt;
+
+         if (format == BRW_SURFACEFORMAT_RAW) {
+            brw->vtbl.emit_buffer_surface_state(
+               brw, surf_offset, mt->bo, mt->offset,
+               format, mt->bo->size - mt->offset, 1 /* pitch */,
+               access != GL_READ_ONLY);
+
+         } else {
+            const unsigned min_layer = obj->MinLayer + u->_Layer;
+            const unsigned min_level = obj->MinLevel + u->Level;
+            const unsigned num_layers = (!u->Layered ? 1 :
+                                         obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
+                                         mt->logical_depth0);
+            const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
+                                   obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
+                                   GL_TEXTURE_2D_ARRAY : obj->Target);
+
+            brw->vtbl.emit_texture_surface_state(
+               brw, mt, target,
+               min_layer, min_layer + num_layers,
+               min_level, min_level + 1,
+               format, SWIZZLE_XYZW,
+               surf_offset, access != GL_READ_ONLY, false);
+         }
+
+         update_texture_image_param(brw, u, surface_idx, param);
+      }
+
+   } else {
+      brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
+      update_default_image_param(brw, u, surface_idx, param);
+   }
+}
+
+void
+brw_upload_image_surfaces(struct brw_context *brw,
+                          struct gl_shader *shader,
+                          struct brw_stage_state *stage_state,
+                          struct brw_stage_prog_data *prog_data)
+{
+   struct gl_context *ctx = &brw->ctx;
+
+   if (shader && shader->NumImages) {
+      for (unsigned i = 0; i < shader->NumImages; i++) {
+         struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
+         const unsigned surf_idx = prog_data->binding_table.image_start + i;
+
+         update_image_surface(brw, u, shader->ImageAccess[i],
+                              surf_idx,
+                              &stage_state->surf_offset[surf_idx],
+                              &prog_data->image_param[i]);
+      }
+
+      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+   }
+}
+
+static void
+brw_upload_wm_image_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
+
+   if (prog) {
+      /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+      brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
+                                &brw->wm.base, &brw->wm.prog_data->base);
+   }
+}
+
+const struct brw_tracked_state brw_wm_image_surfaces = {
+   .dirty = {
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_FRAGMENT_PROGRAM |
+             BRW_NEW_FS_PROG_DATA |
+             BRW_NEW_IMAGE_UNITS
+   },
+   .emit = brw_upload_wm_image_surfaces,
+};
+
 void
 gen4_init_vtable_surface_functions(struct brw_context *brw)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_blorp.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_blorp.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_blorp.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_blorp.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -821,7 +821,7 @@
 
    /* 3DSTATE_DEPTH_BUFFER */
    {
-      intel_emit_depth_stall_flushes(brw);
+      brw_emit_depth_stall_flushes(brw);
 
       BEGIN_BATCH(7);
       /* 3DSTATE_DEPTH_BUFFER dw0 */
@@ -896,7 +896,7 @@
 gen6_blorp_emit_depth_disable(struct brw_context *brw,
                               const brw_blorp_params *params)
 {
-   intel_emit_depth_stall_flushes(brw);
+   brw_emit_depth_stall_flushes(brw);
 
    BEGIN_BATCH(7);
    OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
@@ -1021,7 +1021,7 @@
    uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
 
    /* Emit workaround flushes when we switch from drawing to blorping. */
-   intel_emit_post_sync_nonzero_flush(brw);
+   brw_emit_post_sync_nonzero_flush(brw);
 
    gen6_emit_3dstate_multisample(brw, params->dst.num_samples);
    gen6_emit_3dstate_sample_mask(brw,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_cc.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_cc.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_cc.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_cc.c	2015-09-16 14:36:09.000000000 +0000
@@ -97,8 +97,8 @@
                    rb_type != GL_UNSIGNED_NORMALIZED &&
                    rb_type != GL_FLOAT, "Ignoring %s logic op on %s "
                    "renderbuffer\n",
-                   _mesa_lookup_enum_by_nr(ctx->Color.LogicOp),
-                   _mesa_lookup_enum_by_nr(rb_type));
+                   _mesa_enum_to_string(ctx->Color.LogicOp),
+                   _mesa_enum_to_string(rb_type));
 	 if (rb_type == GL_UNSIGNED_NORMALIZED) {
 	    blend[b].blend1.logic_op_enable = 1;
 	    blend[b].blend1.logic_op_func =
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_clip_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_clip_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_clip_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_clip_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -31,6 +31,7 @@
 #include "brw_util.h"
 #include "intel_batchbuffer.h"
 #include "main/fbobject.h"
+#include "main/framebuffer.h"
 
 static void
 upload_clip_state(struct brw_context *brw)
@@ -145,11 +146,14 @@
     * the viewport, so we can ignore this restriction.
     */
    if (brw->gen < 8) {
+      const float fb_width = (float)_mesa_geometric_width(fb);
+      const float fb_height = (float)_mesa_geometric_height(fb);
+
       for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
          if (ctx->ViewportArray[i].X != 0 ||
              ctx->ViewportArray[i].Y != 0 ||
-             ctx->ViewportArray[i].Width != (float) fb->Width ||
-             ctx->ViewportArray[i].Height != (float) fb->Height) {
+             ctx->ViewportArray[i].Width != fb_width ||
+             ctx->ViewportArray[i].Height != fb_height) {
             dw2 &= ~GEN6_CLIP_GB_TEST;
             break;
          }
@@ -179,7 +183,7 @@
 	     dw2);
    OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
              U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
-             (fb->MaxNumLayers > 0 ? 0 : GEN6_CLIP_FORCE_ZERO_RTAINDEX) |
+             (_mesa_geometric_layers(fb) > 0 ? 0 : GEN6_CLIP_FORCE_ZERO_RTAINDEX) |
              ((ctx->Const.MaxViewports - 1) & GEN6_CLIP_MAX_VP_INDEX_MASK));
    ADVANCE_BATCH();
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_depth_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_depth_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_depth_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_depth_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -65,7 +65,7 @@
     */
    bool enable_hiz_ss = hiz || separate_stencil;
 
-   intel_emit_depth_stall_flushes(brw);
+   brw_emit_depth_stall_flushes(brw);
 
    irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
    if (!irb)
@@ -73,7 +73,7 @@
    rb = (struct gl_renderbuffer*) irb;
 
    if (rb) {
-      depth = MAX2(rb->Depth, 1);
+      depth = MAX2(irb->layer_count, 1);
       if (rb->TexImage)
          gl_target = rb->TexImage->TexObject->Target;
    }
@@ -89,6 +89,10 @@
       surftype = BRW_SURFACE_2D;
       depth *= 6;
       break;
+   case GL_TEXTURE_3D:
+      assert(mt);
+      depth = MAX2(mt->logical_depth0, 1);
+      /* fallthrough */
    default:
       surftype = translate_tex_target(gl_target);
       break;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -147,7 +147,12 @@
 }
 
 void
-gen6_gs_visitor::visit(ir_emit_vertex *)
+gen6_gs_visitor::visit(ir_emit_vertex *ir)
+{
+   gs_emit_vertex(ir->stream_id());
+}
+void
+gen6_gs_visitor::gs_emit_vertex(int stream_id)
 {
    this->current_annotation = "gen6 emit vertex";
    /* Honor max_vertex layout indication in geometry shader by ignoring any
@@ -224,6 +229,12 @@
 void
 gen6_gs_visitor::visit(ir_end_primitive *)
 {
+   gs_end_primitive();
+}
+
+void
+gen6_gs_visitor::gs_end_primitive()
+{
    this->current_annotation = "gen6 end primitive";
    /* Calling EndPrimitive() is optional for point output. In this case we set
     * the PrimEnd flag when we process EmitVertex().
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_gs_visitor.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_gs_visitor.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_gs_visitor.h	2015-09-16 14:36:09.000000000 +0000
@@ -35,12 +35,15 @@
 class gen6_gs_visitor : public vec4_gs_visitor
 {
 public:
-   gen6_gs_visitor(struct brw_context *brw,
+   gen6_gs_visitor(const struct brw_compiler *comp,
+                   void *log_data,
                    struct brw_gs_compile *c,
                    struct gl_shader_program *prog,
                    void *mem_ctx,
-                   bool no_spills) :
-      vec4_gs_visitor(brw, c, prog, mem_ctx, no_spills) {}
+                   bool no_spills,
+                   int shader_time_index) :
+      vec4_gs_visitor(comp, log_data, c, prog, mem_ctx, no_spills,
+                      shader_time_index) {}
 
 protected:
    virtual void assign_binding_table_offsets();
@@ -48,6 +51,8 @@
    virtual void emit_thread_end();
    virtual void visit(ir_emit_vertex *);
    virtual void visit(ir_end_primitive *);
+   virtual void gs_emit_vertex(int stream_id);
+   virtual void gs_end_primitive();
    virtual void emit_urb_write_header(int mrf);
    virtual void emit_urb_write_opcode(bool complete,
                                       int base_mrf,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_multisample_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_multisample_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_multisample_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_multisample_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -26,6 +26,7 @@
 #include "brw_context.h"
 #include "brw_defines.h"
 #include "brw_multisample_state.h"
+#include "main/framebuffer.h"
 
 void
 gen6_get_sample_position(struct gl_context *ctx,
@@ -34,7 +35,7 @@
 {
    uint8_t bits;
 
-   switch (fb->Visual.samples) {
+   switch (_mesa_geometric_samples(fb)) {
    case 1:
       result[0] = result[1] = 0.5f;
       return;
@@ -142,12 +143,11 @@
    ADVANCE_BATCH();
 }
 
-
 unsigned
 gen6_determine_sample_mask(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
-   float coverage = 1.0;
+   float coverage = 1.0f;
    float coverage_invert = false;
    unsigned sample_mask = ~0u;
 
@@ -165,7 +165,7 @@
    }
 
    if (num_samples > 1) {
-      int coverage_int = (int) (num_samples * coverage + 0.5);
+      int coverage_int = (int) (num_samples * coverage + 0.5f);
       uint32_t coverage_bits = (1 << coverage_int) - 1;
       if (coverage_invert)
          coverage_bits ^= (1 << num_samples) - 1;
@@ -175,7 +175,6 @@
    }
 }
 
-
 /**
  * 3DSTATE_SAMPLE_MASK
  */
@@ -188,15 +187,14 @@
    ADVANCE_BATCH();
 }
 
-
-static void upload_multisample_state(struct brw_context *brw)
+static void
+upload_multisample_state(struct brw_context *brw)
 {
    /* BRW_NEW_NUM_SAMPLES */
    gen6_emit_3dstate_multisample(brw, brw->num_samples);
    gen6_emit_3dstate_sample_mask(brw, gen6_determine_sample_mask(brw));
 }
 
-
 const struct brw_tracked_state gen6_multisample_state = {
    .dirty = {
       .mesa = _NEW_MULTISAMPLE,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_queryobj.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_queryobj.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_queryobj.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_queryobj.c	2015-09-16 14:36:09.000000000 +0000
@@ -86,7 +86,7 @@
 write_primitives_generated(struct brw_context *brw,
                            drm_intel_bo *query_bo, int stream, int idx)
 {
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    if (brw->gen >= 7 && stream > 0) {
       brw_store_register_mem64(brw, query_bo,
@@ -100,7 +100,7 @@
 write_xfb_primitives_written(struct brw_context *brw,
                              drm_intel_bo *bo, int stream, int idx)
 {
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    if (brw->gen >= 7) {
       brw_store_register_mem64(brw, bo, GEN7_SO_NUM_PRIMS_WRITTEN(stream), idx);
@@ -157,7 +157,7 @@
    /* Emit a flush to make sure various parts of the pipeline are complete and
     * we get an accurate value
     */
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    brw_store_register_mem64(brw, bo, reg, idx);
 }
@@ -246,7 +246,7 @@
        * and correctly emitted the number of pixel shader invocations, but,
        * whomever forgot to undo the multiply by 4.
        */
-      if (brw->gen >= 8 || brw->is_haswell)
+      if (brw->gen == 8 || brw->is_haswell)
          query->Base.Result /= 4;
       break;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_scissor_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_scissor_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_scissor_state.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_scissor_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -39,6 +39,8 @@
    const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
    struct gen6_scissor_rect *scissor;
    uint32_t scissor_state_offset;
+   const unsigned int fb_width= _mesa_geometric_width(ctx->DrawBuffer);
+   const unsigned int fb_height = _mesa_geometric_height(ctx->DrawBuffer);
 
    scissor = brw_state_batch(brw, AUB_TRACE_SCISSOR_STATE,
 			     sizeof(*scissor) * ctx->Const.MaxViewports, 32,
@@ -56,7 +58,11 @@
    for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
       int bbox[4];
 
-      _mesa_scissor_bounding_box(ctx, ctx->DrawBuffer, i, bbox);
+      bbox[0] = 0;
+      bbox[1] = fb_width;
+      bbox[2] = 0;
+      bbox[3] = fb_height;
+      _mesa_intersect_scissor_bounding_box(ctx, i, bbox);
 
       if (bbox[0] == bbox[1] || bbox[2] == bbox[3]) {
          /* If the scissor was out of bounds and got clamped to 0 width/height
@@ -80,8 +86,8 @@
          /* memory: Y=0=top */
          scissor[i].xmin = bbox[0];
          scissor[i].xmax = bbox[1] - 1;
-         scissor[i].ymin = ctx->DrawBuffer->Height - bbox[3];
-         scissor[i].ymax = ctx->DrawBuffer->Height - bbox[2] - 1;
+         scissor[i].ymin = fb_height - bbox[3];
+         scissor[i].ymax = fb_height - bbox[2] - 1;
       }
    }
    BEGIN_BATCH(2);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_sf_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_sf_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_sf_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_sf_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -31,6 +31,7 @@
 #include "brw_util.h"
 #include "main/macros.h"
 #include "main/fbobject.h"
+#include "main/framebuffer.h"
 #include "intel_batchbuffer.h"
 
 /**
@@ -273,7 +274,7 @@
    int i;
    /* _NEW_BUFFER */
    bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
-   bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+   const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
 
    const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
    float point_size;
@@ -361,27 +362,7 @@
 
    /* _NEW_LINE */
    {
-      float line_width = brw_get_line_width(brw);
-      uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
-
-      /* Line width of 0 is not allowed when MSAA enabled */
-      if (ctx->Multisample._Enabled) {
-         if (line_width_u3_7 == 0)
-             line_width_u3_7 = 1;
-      } else if (ctx->Line.SmoothFlag && ctx->Line.Width < 1.5) {
-         /* For 1 pixel line thickness or less, the general
-          * anti-aliasing algorithm gives up, and a garbage line is
-          * generated.  Setting a Line Width of 0.0 specifies the
-          * rasterization of the "thinnest" (one-pixel-wide),
-          * non-antialiased lines.
-          *
-          * Lines rendered with zero Line Width are rasterized using
-          * Grid Intersection Quantization rules as specified by
-          * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line
-          * Rasterization.
-          */
-         line_width_u3_7 = 0;
-      }
+      uint32_t line_width_u3_7 = brw_get_line_width(brw);
       dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
    }
    if (ctx->Line.SmoothFlag) {
@@ -402,7 +383,7 @@
    point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
 
    /* Clamp to the hardware limits and convert to fixed point */
-   dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
+   dw4 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
 
    /*
     * Window coordinates in an FBO are inverted, which means point
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_sol.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_sol.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_sol.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_sol.c	2015-09-16 14:36:09.000000000 +0000
@@ -292,5 +292,5 @@
     * simplicity, just do a full flush.
     */
    struct brw_context *brw = brw_context(ctx);
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_surface_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_surface_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_surface_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_surface_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -88,7 +88,8 @@
       break;
    }
 
-   const int min_array_element = layered ? 0 : irb->mt_layer;
+   const int min_array_element = irb->mt_layer;
+   assert(!layered || irb->mt_layer == 0);
 
    surf[0] = SET_FIELD(surftype, BRW_SURFACE_TYPE) |
              SET_FIELD(format, BRW_SURFACE_FORMAT);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_urb.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_urb.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_urb.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_urb.c	2015-09-16 14:36:09.000000000 +0000
@@ -120,7 +120,7 @@
     * a workaround.
     */
    if (brw->urb.gs_present && !gs_present)
-      intel_batchbuffer_emit_mi_flush(brw);
+      brw_emit_mi_flush(brw);
    brw->urb.gs_present = gs_present;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_viewport_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_viewport_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_viewport_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_viewport_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -30,6 +30,7 @@
 #include "brw_defines.h"
 #include "intel_batchbuffer.h"
 #include "main/fbobject.h"
+#include "main/framebuffer.h"
 #include "main/viewport.h"
 
 /* The clip VP defines the guardband region where expensive clipping is skipped
@@ -93,14 +94,14 @@
    /* _NEW_BUFFERS */
    if (render_to_fbo) {
       y_scale = 1.0;
-      y_bias = 0;
+      y_bias = 0.0;
    } else {
       y_scale = -1.0;
-      y_bias = ctx->DrawBuffer->Height;
+      y_bias = (float)_mesa_geometric_height(ctx->DrawBuffer);
    }
 
    for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
-      double scale[3], translate[3];
+      float scale[3], translate[3];
 
       /* _NEW_VIEWPORT */
       _mesa_get_viewport_xform(ctx, i, scale, translate);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_vs_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_vs_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_vs_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_vs_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -68,7 +68,7 @@
       _mesa_load_state_parameters(ctx, prog->Parameters);
 
       gl_constant_value *param;
-      int i;
+      unsigned i;
 
       param = brw_state_batch(brw, type,
 			      prog_data->nr_params * sizeof(gl_constant_value),
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_wm_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_wm_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen6_wm_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen6_wm_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -33,6 +33,7 @@
 #include "program/program.h"
 #include "program/prog_parameter.h"
 #include "program/prog_statevars.h"
+#include "main/framebuffer.h"
 #include "intel_batchbuffer.h"
 
 static void
@@ -284,7 +285,7 @@
    const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
 
    /* _NEW_BUFFERS */
-   const bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+   const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
 
    /* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
     * should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_blorp.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_blorp.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_blorp.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_blorp.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -645,7 +645,7 @@
 
    /* 3DSTATE_DEPTH_BUFFER */
    {
-      intel_emit_depth_stall_flushes(brw);
+      brw_emit_depth_stall_flushes(brw);
 
       BEGIN_BATCH(7);
       OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
@@ -696,7 +696,7 @@
 static void
 gen7_blorp_emit_depth_disable(struct brw_context *brw)
 {
-   intel_emit_depth_stall_flushes(brw);
+   brw_emit_depth_stall_flushes(brw);
 
    BEGIN_BATCH(7);
    OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
@@ -794,6 +794,8 @@
    }
    depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params);
    gen7_blorp_emit_depth_stencil_state_pointers(brw, depthstencil_offset);
+   if (brw->use_resource_streamer)
+      gen7_disable_hw_binding_tables(brw);
    if (params->use_wm_prog) {
       uint32_t wm_surf_offset_renderbuffer;
       uint32_t wm_surf_offset_texture = 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_disable.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_disable.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_disable.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_disable.c	2015-09-16 14:36:09.000000000 +0000
@@ -52,7 +52,7 @@
 
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
-   OUT_BATCH(0);
+   OUT_BATCH(brw->hw_bt_pool.next_offset);
    ADVANCE_BATCH();
 
    /* Disable the TE */
@@ -85,7 +85,7 @@
 
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
-   OUT_BATCH(0);
+   OUT_BATCH(brw->hw_bt_pool.next_offset);
    ADVANCE_BATCH();
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_gs_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_gs_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_gs_state.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_gs_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -59,7 +59,9 @@
       OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
                  GEN6_GS_SAMPLER_COUNT_SHIFT) |
                 ((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) <<
-                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+                (brw->is_haswell && prog_data->base.nr_image_params ?
+                 HSW_GS_UAV_ACCESS_ENABLE : 0));
 
       if (brw->gs.prog_data->base.base.total_scratch) {
          OUT_RELOC(stage_state->scratch_bo,
@@ -112,7 +114,7 @@
           GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
          ((brw->gs.prog_data->invocations - 1) <<
           GEN7_GS_INSTANCE_CONTROL_SHIFT) |
-         brw->gs.prog_data->dispatch_mode |
+         SET_FIELD(prog_data->dispatch_mode, GEN7_GS_DISPATCH_MODE) |
          GEN6_GS_STATISTICS_ENABLE |
          (brw->gs.prog_data->include_primitive_id ?
           GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) |
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_misc_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_misc_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_misc_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_misc_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -57,7 +57,7 @@
       return;
    }
 
-   intel_emit_depth_stall_flushes(brw);
+   brw_emit_depth_stall_flushes(brw);
 
    irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
    if (!irb)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_sf_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_sf_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_sf_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_sf_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -27,6 +27,7 @@
 #include "brw_util.h"
 #include "main/macros.h"
 #include "main/fbobject.h"
+#include "main/framebuffer.h"
 #include "intel_batchbuffer.h"
 
 static void
@@ -109,7 +110,7 @@
    float point_size;
    /* _NEW_BUFFERS */
    bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
-   bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+   const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
 
    dw1 = GEN6_SF_STATISTICS_ENABLE;
 
@@ -192,26 +193,7 @@
 
    /* _NEW_LINE */
    {
-      float line_width = brw_get_line_width(brw);
-      uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
-      /* Line width of 0 is not allowed when MSAA enabled */
-      if (ctx->Multisample._Enabled) {
-         if (line_width_u3_7 == 0)
-             line_width_u3_7 = 1;
-      } else if (ctx->Line.SmoothFlag && ctx->Line.Width < 1.5) {
-         /* For 1 pixel line thickness or less, the general
-          * anti-aliasing algorithm gives up, and a garbage line is
-          * generated.  Setting a Line Width of 0.0 specifies the
-          * rasterization of the "thinnest" (one-pixel-wide),
-          * non-antialiased lines.
-          *
-          * Lines rendered with zero Line Width are rasterized using
-          * Grid Intersection Quantization rules as specified by
-          * bspec section 6.3.12.1 Zero-Width (Cosmetic) Line
-          * Rasterization.
-          */
-         line_width_u3_7 = 0;
-      }
+      uint32_t line_width_u3_7 = brw_get_line_width(brw);
       dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
    }
    if (ctx->Line.SmoothFlag) {
@@ -238,7 +220,7 @@
    point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
 
    /* Clamp to the hardware limits and convert to fixed point */
-   dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
+   dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
 
    /* _NEW_LIGHT */
    if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_sol_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_sol_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_sol_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_sol_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -116,7 +116,7 @@
    /* Construct the list of SO_DECLs to be emitted.  The formatting of the
     * command is feels strange -- each dword pair contains a SO_DECL per stream.
     */
-   for (int i = 0; i < linked_xfb_info->NumOutputs; i++) {
+   for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) {
       int buffer = linked_xfb_info->Outputs[i].OutputBuffer;
       uint16_t decl = 0;
       int varying = linked_xfb_info->Outputs[i].OutputRegister;
@@ -365,7 +365,7 @@
    }
 
    /* Flush any drawing so that the counters have the right values. */
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
    for (int i = 0; i < streams; i++) {
@@ -502,7 +502,7 @@
       (struct brw_transform_feedback_object *) obj;
 
    /* Flush any drawing so that the counters have the right values. */
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    /* Save the SOL buffer offset register values. */
    if (brw->gen < 8) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_urb.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_urb.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_urb.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_urb.c	2015-09-16 14:36:09.000000000 +0000
@@ -228,7 +228,7 @@
       remaining_space = total_wants;
    if (remaining_space > 0) {
       unsigned vs_additional = (unsigned)
-         round(vs_wants * (((double) remaining_space) / total_wants));
+         roundf(vs_wants * (((float) remaining_space) / total_wants));
       vs_chunks += vs_additional;
       remaining_space -= vs_additional;
       gs_chunks += remaining_space;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_viewport_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_viewport_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_viewport_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_viewport_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -26,6 +26,7 @@
 #include "brw_defines.h"
 #include "intel_batchbuffer.h"
 #include "main/fbobject.h"
+#include "main/framebuffer.h"
 #include "main/viewport.h"
 
 static void
@@ -45,14 +46,14 @@
    /* _NEW_BUFFERS */
    if (render_to_fbo) {
       y_scale = 1.0;
-      y_bias = 0;
+      y_bias = 0.0;
    } else {
       y_scale = -1.0;
-      y_bias = ctx->DrawBuffer->Height;
+      y_bias = (float)_mesa_geometric_height(ctx->DrawBuffer);
    }
 
    for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
-      double scale[3], translate[3];
+      float scale[3], translate[3];
       _mesa_get_viewport_xform(ctx, i, scale, translate);
 
       /* According to the "Vertex X,Y Clamping and Quantization" section of
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_vs_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_vs_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_vs_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_vs_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -62,6 +62,7 @@
       OUT_BATCH(active ? stage_state->push_const_size : 0);
       OUT_BATCH(0);
    }
+
    /* Pointer to the constant buffer.  Covered by the set of state flags
     * from gen6_prepare_wm_contants
     */
@@ -95,15 +96,14 @@
 
    ADVANCE_BATCH();
 
-  /* On SKL+ the new constants don't take effect until the next corresponding
-   * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure
-   * that is sent
-   */
+   /* On SKL+ the new constants don't take effect until the next corresponding
+    * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure
+    * that is sent
+    */
    if (brw->gen >= 9)
       brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 }
 
-
 static void
 upload_vs_state(struct brw_context *brw)
 {
@@ -111,6 +111,7 @@
    uint32_t floating_point_mode = 0;
    const int max_threads_shift = brw->is_haswell ?
       HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;
+   const struct brw_vue_prog_data *prog_data = &brw->vs.prog_data->base;
 
    if (!brw->is_haswell && !brw->is_baytrail)
       gen7_emit_vs_workaround_flush(brw);
@@ -125,19 +126,21 @@
 	     ((ALIGN(stage_state->sampler_count, 4)/4) <<
               GEN6_VS_SAMPLER_COUNT_SHIFT) |
              ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
-              GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+              GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+             (brw->is_haswell && prog_data->base.nr_image_params ?
+              HSW_VS_UAV_ACCESS_ENABLE : 0));
 
-   if (brw->vs.prog_data->base.base.total_scratch) {
+   if (prog_data->base.total_scratch) {
       OUT_RELOC(stage_state->scratch_bo,
 		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-		ffs(brw->vs.prog_data->base.base.total_scratch) - 11);
+		ffs(prog_data->base.total_scratch) - 11);
    } else {
       OUT_BATCH(0);
    }
 
-   OUT_BATCH((brw->vs.prog_data->base.base.dispatch_grf_start_reg <<
+   OUT_BATCH((prog_data->base.dispatch_grf_start_reg <<
               GEN6_VS_DISPATCH_START_GRF_SHIFT) |
-	     (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
+	     (prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
 	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
 
    OUT_BATCH(((brw->max_vs_threads - 1) << max_threads_shift) |
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_wm_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_wm_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen7_wm_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen7_wm_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -30,6 +30,7 @@
 #include "program/program.h"
 #include "program/prog_parameter.h"
 #include "program/prog_statevars.h"
+#include "main/framebuffer.h"
 #include "intel_batchbuffer.h"
 
 static void
@@ -45,7 +46,7 @@
    uint32_t dw1, dw2;
 
    /* _NEW_BUFFERS */
-   bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+   const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
 
    dw1 = dw2 = 0;
    dw1 |= GEN7_WM_STATISTICS_ENABLE;
@@ -76,8 +77,13 @@
       dw1 |= GEN7_WM_KILL_ENABLE;
    }
 
+   if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx)) {
+      dw1 |= GEN7_WM_DISPATCH_ENABLE;
+   }
+
    /* _NEW_BUFFERS | _NEW_COLOR */
    if (brw_color_buffer_write_enabled(brw) || writes_depth ||
+       prog_data->base.nr_image_params ||
        dw1 & GEN7_WM_KILL_ENABLE) {
       dw1 |= GEN7_WM_DISPATCH_ENABLE;
    }
@@ -101,6 +107,18 @@
       dw1 |= GEN7_WM_USES_INPUT_COVERAGE_MASK;
    }
 
+   /* BRW_NEW_FS_PROG_DATA */
+   if (prog_data->early_fragment_tests)
+      dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS;
+   else if (prog_data->base.nr_image_params)
+      dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC;
+
+   /* _NEW_BUFFERS | _NEW_COLOR */
+   if (brw->is_haswell &&
+       !(brw_color_buffer_write_enabled(brw) || writes_depth) &&
+       prog_data->base.nr_image_params)
+      dw2 |= HSW_WM_UAV_ONLY;
+
    BEGIN_BATCH(3);
    OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
    OUT_BATCH(dw1);
@@ -122,7 +140,7 @@
    .emit = upload_wm_state,
 };
 
-void
+static void
 gen7_upload_ps_state(struct brw_context *brw,
                      const struct gl_fragment_program *fp,
                      const struct brw_stage_state *stage_state,
@@ -203,6 +221,9 @@
       _mesa_get_min_invocations_per_fragment(ctx, fp, false);
    assert(min_inv_per_frag >= 1);
 
+   if (brw->is_haswell && prog_data->base.nr_image_params)
+      dw4 |= HSW_PS_UAV_ACCESS_ENABLE;
+
    if (prog_data->prog_offset_16 || prog_data->no_8) {
       dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
       if (!prog_data->no_8 && min_inv_per_frag == 1) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_depth_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_depth_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_depth_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_depth_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -41,7 +41,6 @@
                    bool depth_writable,
                    struct intel_mipmap_tree *stencil_mt,
                    bool stencil_writable,
-                   uint32_t stencil_offset,
                    bool hiz,
                    uint32_t width,
                    uint32_t height,
@@ -57,7 +56,7 @@
       return;
    }
 
-   intel_emit_depth_stall_flushes(brw);
+   brw_emit_depth_stall_flushes(brw);
 
    /* _NEW_BUFFERS, _NEW_DEPTH, _NEW_STENCIL */
    BEGIN_BATCH(8);
@@ -100,7 +99,7 @@
    }
 
    if (stencil_mt == NULL) {
-     BEGIN_BATCH(5);
+      BEGIN_BATCH(5);
       OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2));
       OUT_BATCH(0);
       OUT_BATCH(0);
@@ -127,8 +126,7 @@
       OUT_BATCH(HSW_STENCIL_ENABLED | mocs_wb << 22 |
                 (2 * stencil_mt->pitch - 1));
       OUT_RELOC64(stencil_mt->bo,
-                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                  stencil_offset);
+                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
       OUT_BATCH(stencil_mt ? stencil_mt->qpitch >> 2 : 0);
       ADVANCE_BATCH();
    }
@@ -220,7 +218,6 @@
    emit_depth_packets(brw, depth_mt, brw_depthbuffer_format(brw), surftype,
                       ctx->Depth.Mask != 0,
                       stencil_mt, ctx->Stencil._WriteEnabled,
-                      brw->depthstencil.stencil_offset,
                       hiz, width, height, depth, lod, min_array_element);
 }
 
@@ -253,10 +250,10 @@
     */
    const bool hiz_enabled = depth_irb && intel_renderbuffer_has_hiz(depth_irb);
 
-   /* 3DSTATE_WM::Early Depth/Stencil Control != EDSC_PREPS (2).
-    * We always leave this set to EDSC_NORMAL (0).
+   /* BRW_NEW_FS_PROG_DATA:
+    * 3DSTATE_WM::Early Depth/Stencil Control != EDSC_PREPS (2).
     */
-   const bool edsc_not_preps = true;
+   const bool edsc_not_preps = !brw->wm.prog_data->early_fragment_tests;
 
    /* 3DSTATE_PS_EXTRA::PixelShaderValid is always true. */
    const bool pixel_shader_valid = true;
@@ -439,7 +436,7 @@
                       brw_depth_format(brw, mt->format),
                       BRW_SURFACE_2D,
                       true, /* depth writes */
-                      NULL, false, 0, /* no stencil for now */
+                      NULL, false, /* no stencil for now */
                       true, /* hiz */
                       surface_width,
                       surface_height,
@@ -499,7 +496,7 @@
     */
    brw_emit_pipe_control_write(brw,
                                PIPE_CONTROL_WRITE_IMMEDIATE,
-                               brw->batch.workaround_bo, 0, 0, 0);
+                               brw->workaround_bo, 0, 0, 0);
 
    /* Emit 3DSTATE_WM_HZ_OP again to disable the state overrides. */
    BEGIN_BATCH(5);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_disable.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_disable.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_disable.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_disable.c	2015-09-16 14:36:09.000000000 +0000
@@ -66,7 +66,7 @@
 
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
-   OUT_BATCH(0);
+   OUT_BATCH(brw->hw_bt_pool.next_offset);
    ADVANCE_BATCH();
 
    /* Disable the TE */
@@ -101,7 +101,7 @@
 
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
-   OUT_BATCH(0);
+   OUT_BATCH(brw->hw_bt_pool.next_offset);
    ADVANCE_BATCH();
 
    BEGIN_BATCH(2);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_draw_upload.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_draw_upload.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_draw_upload.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_draw_upload.c	2015-09-16 14:36:09.000000000 +0000
@@ -40,16 +40,25 @@
 {
    struct gl_context *ctx = &brw->ctx;
    uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
+   bool uses_edge_flag;
 
    brw_prepare_vertices(brw);
    brw_prepare_shader_draw_parameters(brw);
 
+   uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
+                     ctx->Polygon.BackMode != GL_FILL);
+
    if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
       unsigned vue = brw->vb.nr_enabled;
 
-      WARN_ONCE(brw->vs.prog_data->inputs_read & VERT_BIT_EDGEFLAG,
-                "Using VID/IID with edgeflags, need to reorder the "
-                "vertex attributes");
+      /* The element for the edge flags must always be last, so we have to
+       * insert the SGVS before it in that case.
+       */
+      if (uses_edge_flag) {
+         assert(vue > 0);
+         vue--;
+      }
+
       WARN_ONCE(vue >= 33,
                 "Trying to insert VID/IID past 33rd vertex element, "
                 "need to reorder the vertex attrbutes.");
@@ -138,7 +147,18 @@
       ADVANCE_BATCH();
    }
 
-   unsigned nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
+   /* Normally we don't need an element for the SGVS attribute because the
+    * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
+    * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if the
+    * vertex ID is used then it needs an element for the base vertex buffer.
+    * Additionally if there is an edge flag element then the SGVS can't be
+    * inserted past that so we need a dummy element to ensure that the edge
+    * flag is the last one.
+    */
+   bool needs_sgvs_element = (brw->vs.prog_data->uses_vertexid ||
+                              (brw->vs.prog_data->uses_instanceid &&
+                               uses_edge_flag));
+   unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;
 
    /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
     * presumably for VertexID/InstanceID.
@@ -192,6 +212,24 @@
                 (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
    }
 
+   if (needs_sgvs_element) {
+      if (brw->vs.prog_data->uses_vertexid) {
+         OUT_BATCH(GEN6_VE0_VALID |
+                   brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
+                   BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
+         OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+      } else {
+         OUT_BATCH(GEN6_VE0_VALID);
+         OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+      }
+   }
+
    if (gen6_edgeflag_input) {
       uint32_t format =
          brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
@@ -206,25 +244,26 @@
                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
    }
-
-   if (brw->vs.prog_data->uses_vertexid) {
-      OUT_BATCH(GEN6_VE0_VALID |
-                brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
-                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
-      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
-   }
    ADVANCE_BATCH();
 
-   for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
+   for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) {
       const struct brw_vertex_element *input = brw->vb.enabled[i];
       const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer];
+      unsigned element_index;
+
+      /* The edge flag element is reordered to be the last one in the code
+       * above so we need to compensate for that in the element indices used
+       * below.
+       */
+      if (input == gen6_edgeflag_input)
+         element_index = nr_elements - 1;
+      else
+         element_index = j++;
 
       BEGIN_BATCH(3);
       OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
-      OUT_BATCH(i | (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
+      OUT_BATCH(element_index |
+                (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
       OUT_BATCH(buffer->step_rate);
       ADVANCE_BATCH();
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_gs_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_gs_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_gs_state.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_gs_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -48,21 +48,18 @@
       OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2));
       OUT_BATCH(stage_state->prog_offset);
       OUT_BATCH(0);
-      OUT_BATCH(GEN6_GS_VECTOR_MASK_ENABLE |
-                brw->geometry_program->VerticesIn |
+      OUT_BATCH(brw->geometry_program->VerticesIn |
                 ((ALIGN(stage_state->sampler_count, 4)/4) <<
                  GEN6_GS_SAMPLER_COUNT_SHIFT) |
                 ((prog_data->base.binding_table.size_bytes / 4) <<
-                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+                (prog_data->base.nr_image_params ?
+                 HSW_GS_UAV_ACCESS_ENABLE : 0));
 
       if (brw->gs.prog_data->base.base.total_scratch) {
          OUT_RELOC64(stage_state->scratch_bo,
                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                      ffs(brw->gs.prog_data->base.base.total_scratch) - 11);
-         WARN_ONCE(true,
-                   "May need to implement a temporary workaround: GS Number of "
-                   "URB Entries must be less than or equal to the GS Maximum "
-                   "Number of Threads.\n");
       } else {
          OUT_BATCH(0);
          OUT_BATCH(0);
@@ -81,7 +78,8 @@
 
       uint32_t dw7 = (brw->gs.prog_data->control_data_header_size_hwords <<
                       GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
-                      brw->gs.prog_data->dispatch_mode |
+                     SET_FIELD(prog_data->dispatch_mode,
+                               GEN7_GS_DISPATCH_MODE) |
                      ((brw->gs.prog_data->invocations - 1) <<
                       GEN7_GS_INSTANCE_CONTROL_SHIFT) |
                       GEN6_GS_STATISTICS_ENABLE |
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_ps_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_ps_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_ps_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_ps_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -61,6 +61,10 @@
    if (brw->gen >= 9 && prog_data->pulls_bary)
       dw1 |= GEN9_PSX_SHADER_PULLS_BARY;
 
+   if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) ||
+       prog_data->base.nr_image_params)
+      dw1 |= GEN8_PSX_SHADER_HAS_UAV;
+
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
    OUT_BATCH(dw1);
@@ -75,7 +79,7 @@
       brw_fragment_program_const(brw->fragment_program);
    /* BRW_NEW_FS_PROG_DATA */
    const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
-   /* BRW_NEW_NUM_SAMPLES | _NEW_MULTISAMPLE */
+   /* BRW_NEW_NUM_SAMPLES */
    const bool multisampled_fbo = brw->num_samples > 1;
 
    gen8_upload_ps_extra(brw, &fp->program, prog_data, multisampled_fbo);
@@ -83,7 +87,7 @@
 
 const struct brw_tracked_state gen8_ps_extra = {
    .dirty = {
-      .mesa  = _NEW_MULTISAMPLE,
+      .mesa  = 0,
       .brw   = BRW_NEW_CONTEXT |
                BRW_NEW_FRAGMENT_PROGRAM |
                BRW_NEW_FS_PROG_DATA |
@@ -115,6 +119,12 @@
    dw1 |= brw->wm.prog_data->barycentric_interp_modes <<
       GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
 
+   /* BRW_NEW_FS_PROG_DATA */
+   if (brw->wm.prog_data->early_fragment_tests)
+      dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS;
+   else if (brw->wm.prog_data->base.nr_image_params)
+      dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC;
+
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2));
    OUT_BATCH(dw1);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_sf_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_sf_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_sf_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_sf_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -154,10 +154,7 @@
        dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
 
    /* _NEW_LINE */
-   float line_width = brw_get_line_width(brw);
-   uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
-   if (line_width_u3_7 == 0)
-      line_width_u3_7 = 1;
+   uint32_t line_width_u3_7 = brw_get_line_width(brw);
    if (brw->gen >= 9 || brw->is_cherryview) {
       dw1 |= line_width_u3_7 << GEN9_SF_LINE_WIDTH_SHIFT;
    } else {
@@ -172,7 +169,7 @@
    point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
 
    /* Clamp to the hardware limits and convert to fixed point */
-   dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
+   dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
 
    /* _NEW_PROGRAM | _NEW_POINT */
    if (!(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated))
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_surface_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_surface_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_surface_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_surface_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -57,6 +57,19 @@
 }
 
 static uint32_t
+surface_tiling_resource_mode(uint32_t tr_mode)
+{
+   switch (tr_mode) {
+   case INTEL_MIPTREE_TRMODE_YF:
+      return GEN9_SURFACE_TRMODE_TILEYF;
+   case INTEL_MIPTREE_TRMODE_YS:
+      return GEN9_SURFACE_TRMODE_TILEYS;
+   default:
+      return GEN9_SURFACE_TRMODE_NONE;
+   }
+}
+
+static uint32_t
 surface_tiling_mode(uint32_t tiling)
 {
    switch (tiling) {
@@ -70,8 +83,18 @@
 }
 
 static unsigned
-vertical_alignment(const struct intel_mipmap_tree *mt)
+vertical_alignment(const struct brw_context *brw,
+                   const struct intel_mipmap_tree *mt,
+                   uint32_t surf_type)
 {
+   /* On Gen9+ vertical alignment is ignored for 1D surfaces and when
+    * tr_mode is not TRMODE_NONE. Set to an arbitrary non-reserved value.
+    */
+   if (brw->gen > 8 &&
+       (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE ||
+        surf_type == BRW_SURFACE_1D))
+      return GEN8_SURFACE_VALIGN_4;
+
    switch (mt->align_h) {
    case 4:
       return GEN8_SURFACE_VALIGN_4;
@@ -85,8 +108,18 @@
 }
 
 static unsigned
-horizontal_alignment(const struct intel_mipmap_tree *mt)
+horizontal_alignment(const struct brw_context *brw,
+                     const struct intel_mipmap_tree *mt,
+                     uint32_t surf_type)
 {
+   /* On Gen9+ horizontal alignment is ignored when tr_mode is not
+    * TRMODE_NONE. Set to an arbitrary non-reserved value.
+    */
+   if (brw->gen > 8 &&
+       (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE ||
+        gen9_use_linear_1d_layout(brw, mt)))
+      return GEN8_SURFACE_HALIGN_4;
+
    switch (mt->align_w) {
    case 4:
       return GEN8_SURFACE_HALIGN_4;
@@ -166,6 +199,7 @@
    uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
    int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
    unsigned tiling_mode, pitch;
+   const unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode);
 
    if (mt->format == MESA_FORMAT_S_UINT8) {
       tiling_mode = GEN8_SURFACE_TILING_W;
@@ -178,18 +212,29 @@
    if (mt->mcs_mt) {
       aux_mt = mt->mcs_mt;
       aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
+
+      /*
+       * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
+       * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
+       *
+       * From the hardware spec for GEN9:
+       * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
+       *  16 must be used."
+       */
+      assert(brw->gen < 9 || mt->align_w == 16);
+      assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
    }
 
+   const uint32_t surf_type = translate_tex_target(target);
    uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index);
 
-   surf[0] = translate_tex_target(target) << BRW_SURFACE_TYPE_SHIFT |
+   surf[0] = SET_FIELD(surf_type, BRW_SURFACE_TYPE) |
              format << BRW_SURFACE_FORMAT_SHIFT |
-             vertical_alignment(mt) |
-             horizontal_alignment(mt) |
+             vertical_alignment(brw, mt, surf_type) |
+             horizontal_alignment(brw, mt, surf_type) |
              tiling_mode;
 
-   if (target == GL_TEXTURE_CUBE_MAP ||
-       target == GL_TEXTURE_CUBE_MAP_ARRAY) {
+   if (surf_type == BRW_SURFACE_CUBE) {
       surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
    }
 
@@ -210,6 +255,12 @@
    surf[5] = SET_FIELD(min_level - mt->first_level, GEN7_SURFACE_MIN_LOD) |
              (max_level - min_level - 1); /* mip count */
 
+   if (brw->gen >= 9) {
+      surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE);
+      /* Disable Mip Tail by setting a large value. */
+      surf[5] |= SET_FIELD(15, GEN9_SURFACE_MIP_TAIL_START_LOD);
+   }
+
    if (aux_mt) {
       surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
                 SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
@@ -340,6 +391,7 @@
    unsigned height = mt->logical_height0;
    unsigned pitch = mt->pitch;
    uint32_t tiling = mt->tiling;
+   unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode);
    uint32_t format = 0;
    uint32_t surf_type;
    uint32_t offset;
@@ -390,6 +442,17 @@
    if (mt->mcs_mt) {
       aux_mt = mt->mcs_mt;
       aux_mode = GEN8_SURFACE_AUX_MODE_MCS;
+
+      /*
+       * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE):
+       * "When MCS is enabled for non-MSRT, HALIGN_16 must be used"
+       *
+       * From the hardware spec for GEN9:
+       * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
+       *  16 must be used."
+       */
+      assert(brw->gen < 9 || mt->align_w == 16);
+      assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
    }
 
    uint32_t *surf = allocate_surface_state(brw, &offset, surf_index);
@@ -397,8 +460,8 @@
    surf[0] = (surf_type << BRW_SURFACE_TYPE_SHIFT) |
              (is_array ? GEN7_SURFACE_IS_ARRAY : 0) |
              (format << BRW_SURFACE_FORMAT_SHIFT) |
-             vertical_alignment(mt) |
-             horizontal_alignment(mt) |
+             vertical_alignment(brw, mt, surf_type) |
+             horizontal_alignment(brw, mt, surf_type) |
              surface_tiling_mode(tiling);
 
    surf[1] = SET_FIELD(mocs, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
@@ -417,6 +480,12 @@
 
    surf[5] = irb->mt_level - irb->mt->first_level;
 
+   if (brw->gen >= 9) {
+      surf[5] |= SET_FIELD(tr_mode, GEN9_SURFACE_TRMODE);
+      /* Disable Mip Tail by setting a large value. */
+      surf[5] |= SET_FIELD(15, GEN9_SURFACE_MIP_TAIL_START_LOD);
+   }
+
    if (aux_mt) {
       surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
                 SET_FIELD((aux_mt->pitch / 128) - 1, GEN8_SURFACE_AUX_PITCH) |
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_viewport_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_viewport_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_viewport_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_viewport_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -26,6 +26,7 @@
 #include "brw_defines.h"
 #include "intel_batchbuffer.h"
 #include "main/fbobject.h"
+#include "main/framebuffer.h"
 #include "main/viewport.h"
 
 static void
@@ -33,6 +34,7 @@
 {
    struct gl_context *ctx = &brw->ctx;
    float y_scale, y_bias;
+   const float fb_height = (float)_mesa_geometric_height(ctx->DrawBuffer);
    const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
 
    float *vp = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE,
@@ -47,11 +49,11 @@
       y_bias = 0;
    } else {
       y_scale = -1.0;
-      y_bias = ctx->DrawBuffer->Height;
+      y_bias = fb_height;
    }
 
    for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
-      double scale[3], translate[3];
+      float scale[3], translate[3];
       _mesa_get_viewport_xform(ctx, i, scale, translate);
 
       /* _NEW_VIEWPORT: Viewport Matrix Elements */
@@ -116,8 +118,8 @@
       } else {
          vp[12] = ctx->ViewportArray[i].X;
          vp[13] = viewport_Xmax - 1;
-         vp[14] = ctx->DrawBuffer->Height - viewport_Ymax;
-         vp[15] = ctx->DrawBuffer->Height - ctx->ViewportArray[i].Y - 1;
+         vp[14] = fb_height - viewport_Ymax;
+         vp[15] = fb_height - ctx->ViewportArray[i].Y - 1;
       }
 
       vp += 16;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_vs_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_vs_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/gen8_vs_state.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/gen8_vs_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -39,6 +39,9 @@
    /* BRW_NEW_VS_PROG_DATA */
    const struct brw_vue_prog_data *prog_data = &brw->vs.prog_data->base;
 
+   assert(prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ||
+          prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT);
+
    if (prog_data->base.use_alt_mode)
       floating_point_mode = GEN6_VS_FLOATING_POINT_MODE_ALT;
 
@@ -50,7 +53,9 @@
              ((ALIGN(stage_state->sampler_count, 4) / 4) <<
                GEN6_VS_SAMPLER_COUNT_SHIFT) |
              ((prog_data->base.binding_table.size_bytes / 4) <<
-               GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+               GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+             (prog_data->base.nr_image_params ?
+              HSW_VS_UAV_ACCESS_ENABLE : 0));
 
    if (prog_data->base.total_scratch) {
       OUT_RELOC64(stage_state->scratch_bo,
@@ -66,7 +71,8 @@
              (prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
              (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
 
-   uint32_t simd8_enable = prog_data->simd8 ? GEN8_VS_SIMD8_ENABLE : 0;
+   uint32_t simd8_enable = prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ?
+      GEN8_VS_SIMD8_ENABLE : 0;
    OUT_BATCH(((brw->max_vs_threads - 1) << HSW_VS_MAX_THREADS_SHIFT) |
              GEN6_VS_STATISTICS_ENABLE |
              simd8_enable |
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_batchbuffer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_batchbuffer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_batchbuffer.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_batchbuffer.c	2015-09-16 14:36:09.000000000 +0000
@@ -32,6 +32,8 @@
 #include "intel_buffers.h"
 #include "intel_fbo.h"
 #include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
 
 #include <xf86drm.h>
 #include <i915_drm.h>
@@ -44,19 +46,10 @@
 {
    intel_batchbuffer_reset(brw);
 
-   if (brw->gen >= 6) {
-      /* We can't just use brw_state_batch to get a chunk of space for
-       * the gen6 workaround because it involves actually writing to
-       * the buffer, and the kernel doesn't let us write to the batch.
-       */
-      brw->batch.workaround_bo = drm_intel_bo_alloc(brw->bufmgr,
-						      "pipe_control workaround",
-						      4096, 4096);
-   }
-
    if (!brw->has_llc) {
       brw->batch.cpu_map = malloc(BATCH_SZ);
       brw->batch.map = brw->batch.cpu_map;
+      brw->batch.map_next = brw->batch.cpu_map;
    }
 }
 
@@ -77,12 +70,11 @@
       drm_intel_bo_map(brw->batch.bo, true);
       brw->batch.map = brw->batch.bo->virtual;
    }
+   brw->batch.map_next = brw->batch.map;
 
    brw->batch.reserved_space = BATCH_RESERVED;
    brw->batch.state_batch_offset = brw->batch.bo->size;
-   brw->batch.used = 0;
    brw->batch.needs_sol_reset = false;
-   brw->batch.pipe_controls_since_last_cs_stall = 0;
 
    /* We don't know what ring the new batch will be sent to until we see the
     * first BEGIN_BATCH or BEGIN_BATCH_BLT.  Mark it as unknown.
@@ -93,7 +85,7 @@
 void
 intel_batchbuffer_save_state(struct brw_context *brw)
 {
-   brw->batch.saved.used = brw->batch.used;
+   brw->batch.saved.map_next = brw->batch.map_next;
    brw->batch.saved.reloc_count =
       drm_intel_gem_bo_get_reloc_count(brw->batch.bo);
 }
@@ -103,8 +95,8 @@
 {
    drm_intel_gem_bo_clear_relocs(brw->batch.bo, brw->batch.saved.reloc_count);
 
-   brw->batch.used = brw->batch.saved.used;
-   if (brw->batch.used == 0)
+   brw->batch.map_next = brw->batch.saved.map_next;
+   if (USED_BATCH(brw->batch) == 0)
       brw->batch.ring = UNKNOWN_RING;
 }
 
@@ -114,7 +106,6 @@
    free(brw->batch.cpu_map);
    drm_intel_bo_unreference(brw->batch.last_bo);
    drm_intel_bo_unreference(brw->batch.bo);
-   drm_intel_bo_unreference(brw->batch.workaround_bo);
 }
 
 static void
@@ -133,7 +124,7 @@
       drm_intel_decode_set_batch_pointer(decode,
 					 batch->bo->virtual,
 					 batch->bo->offset64,
-					 batch->used);
+                                         USED_BATCH(*batch));
    } else {
       fprintf(stderr,
 	      "WARNING: failed to map batchbuffer (%s), "
@@ -142,7 +133,7 @@
       drm_intel_decode_set_batch_pointer(decode,
 					 batch->map,
 					 batch->bo->offset64,
-					 batch->used);
+                                         USED_BATCH(*batch));
    }
 
    drm_intel_decode_set_output_file(decode, stderr);
@@ -218,10 +209,32 @@
     */
    brw_emit_query_end(brw);
 
-   /* We may also need to snapshot and disable OA counters. */
-   if (brw->batch.ring == RENDER_RING)
+   if (brw->batch.ring == RENDER_RING) {
+      /* We may also need to snapshot and disable OA counters. */
       brw_perf_monitor_finish_batch(brw);
 
+      if (brw->is_haswell) {
+         /* From the Haswell PRM, Volume 2b, Command Reference: Instructions,
+          * 3DSTATE_CC_STATE_POINTERS > "Note":
+          *
+          * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every
+          *  3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall."
+          *
+          * From the example in the docs, it seems to expect a regular pipe control
+          * flush here as well. We may have done it already, but meh.
+          *
+          * See also WaAvoidRCZCounterRollover.
+          */
+         brw_emit_mi_flush(brw);
+         BEGIN_BATCH(2);
+         OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
+         OUT_BATCH(brw->cc.state_offset | 1);
+         ADVANCE_BATCH();
+         brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                                          PIPE_CONTROL_CS_STALL);
+      }
+   }
+
    /* Mark that the current program cache BO has been used by the GPU.
     * It will be reallocated if we need to put new programs in for the
     * next batch.
@@ -267,6 +280,11 @@
    }
 }
 
+/* Drop when RS headers get pulled to libdrm */
+#ifndef I915_EXEC_RESOURCE_STREAMER
+#define I915_EXEC_RESOURCE_STREAMER (1<<15)
+#endif
+
 /* TODO: Push this whole function into bufmgr.
  */
 static int
@@ -278,7 +296,7 @@
    if (brw->has_llc) {
       drm_intel_bo_unmap(batch->bo);
    } else {
-      ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
+      ret = drm_intel_bo_subdata(batch->bo, 0, 4 * USED_BATCH(*batch), batch->map);
       if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
 	 ret = drm_intel_bo_subdata(batch->bo,
 				    batch->state_batch_offset,
@@ -293,7 +311,8 @@
       if (brw->gen >= 6 && batch->ring == BLT_RING) {
          flags = I915_EXEC_BLT;
       } else {
-         flags = I915_EXEC_RENDER;
+         flags = I915_EXEC_RENDER |
+            (brw->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0);
       }
       if (batch->needs_sol_reset)
 	 flags |= I915_EXEC_GEN7_SOL_RESET;
@@ -303,11 +322,11 @@
             brw_annotate_aub(brw);
 
 	 if (brw->hw_ctx == NULL || batch->ring != RENDER_RING) {
-	    ret = drm_intel_bo_mrb_exec(batch->bo, 4 * batch->used, NULL, 0, 0,
-					flags);
+            ret = drm_intel_bo_mrb_exec(batch->bo, 4 * USED_BATCH(*batch),
+                                        NULL, 0, 0, flags);
 	 } else {
 	    ret = drm_intel_gem_bo_context_exec(batch->bo, brw->hw_ctx,
-						4 * batch->used, flags);
+                                                4 * USED_BATCH(*batch), flags);
 	 }
       }
 
@@ -331,7 +350,7 @@
 {
    int ret;
 
-   if (brw->batch.used == 0)
+   if (USED_BATCH(brw->batch) == 0)
       return 0;
 
    if (brw->throttle_batch[0] == NULL) {
@@ -340,7 +359,7 @@
    }
 
    if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
-      int bytes_for_commands = 4 * brw->batch.used;
+      int bytes_for_commands = 4 * USED_BATCH(brw->batch);
       int bytes_for_state = brw->batch.bo->size - brw->batch.state_batch_offset;
       int total_bytes = bytes_for_commands + bytes_for_state;
       fprintf(stderr, "%s:%d: Batchbuffer flush with %4db (pkt) + "
@@ -356,7 +375,7 @@
 
    /* Mark the end of the buffer. */
    intel_batchbuffer_emit_dword(brw, MI_BATCH_BUFFER_END);
-   if (brw->batch.used & 1) {
+   if (USED_BATCH(brw->batch) & 1) {
       /* Round batchbuffer usage to 2 DWORDs. */
       intel_batchbuffer_emit_dword(brw, MI_NOOP);
    }
@@ -373,6 +392,9 @@
       drm_intel_bo_wait_rendering(brw->batch.bo);
    }
 
+   if (brw->use_resource_streamer)
+      gen7_reset_hw_bt_pool_offsets(brw);
+
    /* Start a new batch buffer. */
    brw_new_batch(brw);
 
@@ -382,15 +404,15 @@
 
 /*  This is the only way buffers get added to the validate list.
  */
-bool
-intel_batchbuffer_emit_reloc(struct brw_context *brw,
-                             drm_intel_bo *buffer,
-                             uint32_t read_domains, uint32_t write_domain,
-			     uint32_t delta)
+uint32_t
+intel_batchbuffer_reloc(struct brw_context *brw,
+                        drm_intel_bo *buffer, uint32_t offset,
+                        uint32_t read_domains, uint32_t write_domain,
+                        uint32_t delta)
 {
    int ret;
 
-   ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
+   ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset,
 				 buffer, delta,
 				 read_domains, write_domain);
    assert(ret == 0);
@@ -400,18 +422,16 @@
     * case the buffer doesn't move and we can short-circuit the relocation
     * processing in the kernel
     */
-   intel_batchbuffer_emit_dword(brw, buffer->offset64 + delta);
-
-   return true;
+   return buffer->offset64 + delta;
 }
 
-bool
-intel_batchbuffer_emit_reloc64(struct brw_context *brw,
-                               drm_intel_bo *buffer,
-                               uint32_t read_domains, uint32_t write_domain,
-			       uint32_t delta)
+uint64_t
+intel_batchbuffer_reloc64(struct brw_context *brw,
+                          drm_intel_bo *buffer, uint32_t offset,
+                          uint32_t read_domains, uint32_t write_domain,
+                          uint32_t delta)
 {
-   int ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
+   int ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset,
                                      buffer, delta,
                                      read_domains, write_domain);
    assert(ret == 0);
@@ -421,11 +441,7 @@
     * case the buffer doesn't move and we can short-circuit the relocation
     * processing in the kernel
     */
-   uint64_t offset = buffer->offset64 + delta;
-   intel_batchbuffer_emit_dword(brw, offset);
-   intel_batchbuffer_emit_dword(brw, offset >> 32);
-
-   return true;
+   return buffer->offset64 + delta;
 }
 
 
@@ -435,312 +451,8 @@
 {
    assert((bytes & 3) == 0);
    intel_batchbuffer_require_space(brw, bytes, ring);
-   memcpy(brw->batch.map + brw->batch.used, data, bytes);
-   brw->batch.used += bytes >> 2;
-}
-
-/**
- * According to the latest documentation, any PIPE_CONTROL with the
- * "Command Streamer Stall" bit set must also have another bit set,
- * with five different options:
- *
- *  - Render Target Cache Flush
- *  - Depth Cache Flush
- *  - Stall at Pixel Scoreboard
- *  - Post-Sync Operation
- *  - Depth Stall
- *
- * I chose "Stall at Pixel Scoreboard" since we've used it effectively
- * in the past, but the choice is fairly arbitrary.
- */
-static void
-gen8_add_cs_stall_workaround_bits(uint32_t *flags)
-{
-   uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                      PIPE_CONTROL_WRITE_IMMEDIATE |
-                      PIPE_CONTROL_WRITE_DEPTH_COUNT |
-                      PIPE_CONTROL_WRITE_TIMESTAMP |
-                      PIPE_CONTROL_STALL_AT_SCOREBOARD |
-                      PIPE_CONTROL_DEPTH_STALL;
-
-   /* If we're doing a CS stall, and don't already have one of the
-    * workaround bits set, add "Stall at Pixel Scoreboard."
-    */
-   if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
-      *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
-}
-
-/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
- *
- * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
- *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
- *
- * Note that the kernel does CS stalls between batches, so we only need
- * to count them within a batch.
- */
-static uint32_t
-gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
-{
-   if (brw->gen == 7 && !brw->is_haswell) {
-      if (flags & PIPE_CONTROL_CS_STALL) {
-         /* If we're doing a CS stall, reset the counter and carry on. */
-         brw->batch.pipe_controls_since_last_cs_stall = 0;
-         return 0;
-      }
-
-      /* If this is the fourth pipe control without a CS stall, do one now. */
-      if (++brw->batch.pipe_controls_since_last_cs_stall == 4) {
-         brw->batch.pipe_controls_since_last_cs_stall = 0;
-         return PIPE_CONTROL_CS_STALL;
-      }
-   }
-   return 0;
-}
-
-/**
- * Emit a PIPE_CONTROL with various flushing flags.
- *
- * The caller is responsible for deciding what flags are appropriate for the
- * given generation.
- */
-void
-brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
-{
-   if (brw->gen >= 8) {
-      gen8_add_cs_stall_workaround_bits(&flags);
-
-      BEGIN_BATCH(6);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
-      OUT_BATCH(flags);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   } else if (brw->gen >= 6) {
-      flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
-
-      BEGIN_BATCH(5);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
-      OUT_BATCH(flags);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   } else {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-}
-
-/**
- * Emit a PIPE_CONTROL that writes to a buffer object.
- *
- * \p flags should contain one of the following items:
- *  - PIPE_CONTROL_WRITE_IMMEDIATE
- *  - PIPE_CONTROL_WRITE_TIMESTAMP
- *  - PIPE_CONTROL_WRITE_DEPTH_COUNT
- */
-void
-brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
-                            drm_intel_bo *bo, uint32_t offset,
-                            uint32_t imm_lower, uint32_t imm_upper)
-{
-   if (brw->gen >= 8) {
-      gen8_add_cs_stall_workaround_bits(&flags);
-
-      BEGIN_BATCH(6);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
-      OUT_BATCH(flags);
-      OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-                  offset);
-      OUT_BATCH(imm_lower);
-      OUT_BATCH(imm_upper);
-      ADVANCE_BATCH();
-   } else if (brw->gen >= 6) {
-      flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
-
-      /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
-       * on later platforms.  We always use PPGTT on Gen7+.
-       */
-      unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
-
-      BEGIN_BATCH(5);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
-      OUT_BATCH(flags);
-      OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-                gen6_gtt | offset);
-      OUT_BATCH(imm_lower);
-      OUT_BATCH(imm_upper);
-      ADVANCE_BATCH();
-   } else {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
-      OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-                PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
-      OUT_BATCH(imm_lower);
-      OUT_BATCH(imm_upper);
-      ADVANCE_BATCH();
-   }
-}
-
-/**
- * Restriction [DevSNB, DevIVB]:
- *
- * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
- * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
- * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
- * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
- * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
- * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
- * unless SW can otherwise guarantee that the pipeline from WM onwards is
- * already flushed (e.g., via a preceding MI_FLUSH).
- */
-void
-intel_emit_depth_stall_flushes(struct brw_context *brw)
-{
-   assert(brw->gen >= 6 && brw->gen <= 9);
-
-   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
-   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
-   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
-}
-
-/**
- * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
- * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
- *  stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
- *  3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
- *  3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL needs
- *  to be sent before any combination of VS associated 3DSTATE."
- */
-void
-gen7_emit_vs_workaround_flush(struct brw_context *brw)
-{
-   assert(brw->gen == 7);
-   brw_emit_pipe_control_write(brw,
-                               PIPE_CONTROL_WRITE_IMMEDIATE
-                               | PIPE_CONTROL_DEPTH_STALL,
-                               brw->batch.workaround_bo, 0,
-                               0, 0);
-}
-
-
-/**
- * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
- */
-void
-gen7_emit_cs_stall_flush(struct brw_context *brw)
-{
-   brw_emit_pipe_control_write(brw,
-                               PIPE_CONTROL_CS_STALL
-                               | PIPE_CONTROL_WRITE_IMMEDIATE,
-                               brw->batch.workaround_bo, 0,
-                               0, 0);
-}
-
-
-/**
- * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
- * implementing two workarounds on gen6.  From section 1.4.7.1
- * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
- *
- * [DevSNB-C+{W/A}] Before any depth stall flush (including those
- * produced by non-pipelined state commands), software needs to first
- * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
- * 0.
- *
- * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
- * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
- *
- * And the workaround for these two requires this workaround first:
- *
- * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
- * BEFORE the pipe-control with a post-sync op and no write-cache
- * flushes.
- *
- * And this last workaround is tricky because of the requirements on
- * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
- * volume 2 part 1:
- *
- *     "1 of the following must also be set:
- *      - Render Target Cache Flush Enable ([12] of DW1)
- *      - Depth Cache Flush Enable ([0] of DW1)
- *      - Stall at Pixel Scoreboard ([1] of DW1)
- *      - Depth Stall ([13] of DW1)
- *      - Post-Sync Operation ([13] of DW1)
- *      - Notify Enable ([8] of DW1)"
- *
- * The cache flushes require the workaround flush that triggered this
- * one, so we can't use it.  Depth stall would trigger the same.
- * Post-sync nonzero is what triggered this second workaround, so we
- * can't use that one either.  Notify enable is IRQs, which aren't
- * really our business.  That leaves only stall at scoreboard.
- */
-void
-intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
-{
-   brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_CS_STALL |
-                               PIPE_CONTROL_STALL_AT_SCOREBOARD);
-
-   brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
-                               brw->batch.workaround_bo, 0, 0, 0);
-}
-
-/* Emit a pipelined flush to either flush render and texture cache for
- * reading from a FBO-drawn texture, or flush so that frontbuffer
- * render appears on the screen in DRI1.
- *
- * This is also used for the always_flush_cache driconf debug option.
- */
-void
-intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
-{
-   if (brw->batch.ring == BLT_RING && brw->gen >= 6) {
-      BEGIN_BATCH_BLT(4);
-      OUT_BATCH(MI_FLUSH_DW);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   } else {
-      int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
-      if (brw->gen >= 6) {
-         if (brw->gen == 9) {
-            /* Hardware workaround: SKL
-             *
-             * Emit Pipe Control with all bits set to zero before emitting
-             * a Pipe Control with VF Cache Invalidate set.
-             */
-            brw_emit_pipe_control_flush(brw, 0);
-         }
-
-         flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
-                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-                  PIPE_CONTROL_VF_CACHE_INVALIDATE |
-                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
-                  PIPE_CONTROL_CS_STALL;
-
-         if (brw->gen == 6) {
-            /* Hardware workaround: SNB B-Spec says:
-             *
-             * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
-             * Flush Enable =1, a PIPE_CONTROL with any non-zero
-             * post-sync-op is required.
-             */
-            intel_emit_post_sync_nonzero_flush(brw);
-         }
-      }
-      brw_emit_pipe_control_flush(brw, flags);
-   }
-
-   brw_render_cache_set_clear(brw);
+   memcpy(brw->batch.map_next, data, bytes);
+   brw->batch.map_next += bytes >> 2;
 }
 
 static void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_batchbuffer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_batchbuffer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_batchbuffer.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_batchbuffer.h	2015-09-16 14:36:09.000000000 +0000
@@ -26,6 +26,10 @@
  *     - 3 DWords for MI_REPORT_PERF_COUNT itself on Gen6+.  ==> 12 bytes.
  *       On Ironlake, it's 6 DWords, but we have some slack due to the lack of
  *       Sandybridge PIPE_CONTROL madness.
+ *   - CC_STATE workaround on HSW (12 * 4 = 48 bytes)
+ *     - 5 dwords for initial mi_flush
+ *     - 2 dwords for CC state setup
+ *     - 5 dwords for the required pipe control at the end
  */
 #define BATCH_RESERVED 152
 
@@ -53,25 +57,20 @@
                             const void *data, GLuint bytes,
                             enum brw_gpu_ring ring);
 
-bool intel_batchbuffer_emit_reloc(struct brw_context *brw,
-                                       drm_intel_bo *buffer,
-				       uint32_t read_domains,
-				       uint32_t write_domain,
-				       uint32_t offset);
-bool intel_batchbuffer_emit_reloc64(struct brw_context *brw,
-                                    drm_intel_bo *buffer,
-                                    uint32_t read_domains,
-                                    uint32_t write_domain,
-                                    uint32_t offset);
-void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
-void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
-                                 drm_intel_bo *bo, uint32_t offset,
-                                 uint32_t imm_lower, uint32_t imm_upper);
-void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
-void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
-void intel_emit_depth_stall_flushes(struct brw_context *brw);
-void gen7_emit_vs_workaround_flush(struct brw_context *brw);
-void gen7_emit_cs_stall_flush(struct brw_context *brw);
+uint32_t intel_batchbuffer_reloc(struct brw_context *brw,
+                                 drm_intel_bo *buffer,
+                                 uint32_t offset,
+                                 uint32_t read_domains,
+                                 uint32_t write_domain,
+                                 uint32_t delta);
+uint64_t intel_batchbuffer_reloc64(struct brw_context *brw,
+                                   drm_intel_bo *buffer,
+                                   uint32_t offset,
+                                   uint32_t read_domains,
+                                   uint32_t write_domain,
+                                   uint32_t delta);
+
+#define USED_BATCH(batch) ((uintptr_t)((batch).map_next - (batch).map))
 
 static inline uint32_t float_as_int(float f)
 {
@@ -93,7 +92,7 @@
 intel_batchbuffer_space(struct brw_context *brw)
 {
    return (brw->batch.state_batch_offset - brw->batch.reserved_space)
-      - brw->batch.used*4;
+      - USED_BATCH(brw->batch) * 4;
 }
 
 
@@ -103,7 +102,7 @@
 #ifdef DEBUG
    assert(intel_batchbuffer_space(brw) >= 4);
 #endif
-   brw->batch.map[brw->batch.used++] = dword;
+   *brw->batch.map_next++ = dword;
    assert(brw->batch.ring != UNKNOWN_RING);
 }
 
@@ -144,8 +143,8 @@
 {
    intel_batchbuffer_require_space(brw, n * 4, ring);
 
-   brw->batch.emit = brw->batch.used;
 #ifdef DEBUG
+   brw->batch.emit = USED_BATCH(brw->batch);
    brw->batch.total = n;
 #endif
 }
@@ -155,7 +154,7 @@
 {
 #ifdef DEBUG
    struct intel_batchbuffer *batch = &brw->batch;
-   unsigned int _n = batch->used - batch->emit;
+   unsigned int _n = USED_BATCH(*batch) - batch->emit;
    assert(batch->total != 0);
    if (_n != batch->total) {
       fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",
@@ -166,21 +165,42 @@
 #endif
 }
 
-#define BEGIN_BATCH(n) intel_batchbuffer_begin(brw, n, RENDER_RING)
-#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(brw, n, BLT_RING)
-#define OUT_BATCH(d) intel_batchbuffer_emit_dword(brw, d)
-#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(brw, f)
-#define OUT_RELOC(buf, read_domains, write_domain, delta) do {		\
-   intel_batchbuffer_emit_reloc(brw, buf,			\
-				read_domains, write_domain, delta);	\
+#define BEGIN_BATCH(n) do {                            \
+   intel_batchbuffer_begin(brw, (n), RENDER_RING);     \
+   uint32_t *__map = brw->batch.map_next;              \
+   brw->batch.map_next += (n)
+
+#define BEGIN_BATCH_BLT(n) do {                        \
+   intel_batchbuffer_begin(brw, (n), BLT_RING);        \
+   uint32_t *__map = brw->batch.map_next;              \
+   brw->batch.map_next += (n)
+
+#define OUT_BATCH(d) *__map++ = (d)
+#define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f)))
+
+#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \
+   uint32_t __offset = (__map - brw->batch.map) * 4;           \
+   OUT_BATCH(intel_batchbuffer_reloc(brw, (buf), __offset,     \
+                                     (read_domains),           \
+                                     (write_domain),           \
+                                     (delta)));                \
 } while (0)
 
 /* Handle 48-bit address relocations for Gen8+ */
-#define OUT_RELOC64(buf, read_domains, write_domain, delta) do { \
-   intel_batchbuffer_emit_reloc64(brw, buf, read_domains, write_domain, delta);	\
+#define OUT_RELOC64(buf, read_domains, write_domain, delta) do {      \
+   uint32_t __offset = (__map - brw->batch.map) * 4;                  \
+   uint64_t reloc64 = intel_batchbuffer_reloc64(brw, (buf), __offset, \
+                                                (read_domains),       \
+                                                (write_domain),       \
+                                                (delta));             \
+   OUT_BATCH(reloc64);                                                \
+   OUT_BATCH(reloc64 >> 32);                                          \
 } while (0)
 
-#define ADVANCE_BATCH() intel_batchbuffer_advance(brw);
+#define ADVANCE_BATCH()                  \
+   assert(__map == brw->batch.map_next); \
+   intel_batchbuffer_advance(brw);       \
+} while (0)
 
 #ifdef __cplusplus
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_blit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_blit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_blit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_blit.c	2015-09-16 14:36:09.000000000 +0000
@@ -27,6 +27,7 @@
 
 
 #include "main/mtypes.h"
+#include "main/blit.h"
 #include "main/context.h"
 #include "main/enums.h"
 #include "main/colormac.h"
@@ -43,6 +44,23 @@
 
 #define FILE_DEBUG_FLAG DEBUG_BLIT
 
+#define SET_TILING_XY_FAST_COPY_BLT(tiling, tr_mode, type)           \
+({                                                                   \
+   switch (tiling) {                                                 \
+   case I915_TILING_X:                                               \
+      CMD |= type ## _TILED_X;                                       \
+      break;                                                         \
+   case I915_TILING_Y:                                               \
+      if (tr_mode == INTEL_MIPTREE_TRMODE_YS)                        \
+         CMD |= type ## _TILED_64K;                                  \
+      else                                                           \
+         CMD |= type ## _TILED_Y;                                    \
+      break;                                                         \
+   default:                                                          \
+      unreachable("not reached");                                    \
+   }                                                                 \
+})
+
 static void
 intel_miptree_set_alpha_to_one(struct brw_context *brw,
                                struct intel_mipmap_tree *mt,
@@ -75,20 +93,79 @@
 br13_for_cpp(int cpp)
 {
    switch (cpp) {
+   case 16:
+      return BR13_32323232;
+   case 8:
+      return BR13_16161616;
    case 4:
       return BR13_8888;
-      break;
    case 2:
       return BR13_565;
-      break;
    case 1:
       return BR13_8;
-      break;
    default:
       unreachable("not reached");
    }
 }
 
+static uint32_t
+get_tr_horizontal_align(uint32_t tr_mode, uint32_t cpp, bool is_src) {
+   /* Alignment tables for YF/YS tiled surfaces. */
+   const uint32_t align_2d_yf[] = {64, 64, 32, 32, 16};
+   const uint32_t bpp = cpp * 8;
+   const uint32_t shift = is_src ? 17 : 10;
+   uint32_t align;
+   int i = 0;
+
+   if (tr_mode == INTEL_MIPTREE_TRMODE_NONE)
+      return 0;
+
+   /* Compute array index. */
+   assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp));
+   i = ffs(bpp / 8) - 1;
+
+   align = tr_mode == INTEL_MIPTREE_TRMODE_YF ?
+           align_2d_yf[i] :
+           4 * align_2d_yf[i];
+
+   assert(_mesa_is_pow_two(align));
+
+   /* XY_FAST_COPY_BLT doesn't support horizontal alignment of 16. */
+   if (align == 16)
+      align = 32;
+
+   return (ffs(align) - 6) << shift;
+}
+
+static uint32_t
+get_tr_vertical_align(uint32_t tr_mode, uint32_t cpp, bool is_src) {
+   /* Vertical alignment tables for YF/YS tiled surfaces. */
+   const unsigned align_2d_yf[] = {64, 32, 32, 16, 16};
+   const uint32_t bpp = cpp * 8;
+   const uint32_t shift = is_src ? 15 : 8;
+   uint32_t align;
+   int i = 0;
+
+   if (tr_mode == INTEL_MIPTREE_TRMODE_NONE)
+      return 0;
+
+   /* Compute array index. */
+   assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp));
+   i = ffs(bpp / 8) - 1;
+
+   align = tr_mode == INTEL_MIPTREE_TRMODE_YF ?
+           align_2d_yf[i] :
+           4 * align_2d_yf[i];
+
+   assert(_mesa_is_pow_two(align));
+
+   /* XY_FAST_COPY_BLT doesn't support vertical alignments of 16 and 32. */
+   if (align == 16 || align == 32)
+      align = 64;
+
+   return (ffs(align) - 7) << shift;
+}
+
 /**
  * Emits the packet for switching the blitter from X to Y tiled or back.
  *
@@ -99,9 +176,10 @@
  * tiling state would leak into other unsuspecting applications (like the X
  * server).
  */
-static void
+static uint32_t *
 set_blitter_tiling(struct brw_context *brw,
-                   bool dst_y_tiled, bool src_y_tiled)
+                   bool dst_y_tiled, bool src_y_tiled,
+                   uint32_t *__map)
 {
    assert(brw->gen >= 6);
 
@@ -116,19 +194,19 @@
    OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 |
              (dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) |
              (src_y_tiled ? BCS_SWCTRL_SRC_Y : 0));
+   return __map;
 }
+#define SET_BLITTER_TILING(...) __map = set_blitter_tiling(__VA_ARGS__, __map)
 
-#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) do {         \
+#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled)              \
       BEGIN_BATCH_BLT(n + ((dst_y_tiled || src_y_tiled) ? 14 : 0));     \
       if (dst_y_tiled || src_y_tiled)                                   \
-         set_blitter_tiling(brw, dst_y_tiled, src_y_tiled);             \
-   } while (0)
+         SET_BLITTER_TILING(brw, dst_y_tiled, src_y_tiled)
 
-#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) do {              \
+#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled)                   \
       if (dst_y_tiled || src_y_tiled)                                   \
-         set_blitter_tiling(brw, false, false);                         \
-      ADVANCE_BATCH();                                                  \
-   } while (0)
+         SET_BLITTER_TILING(brw, false, false);                         \
+      ADVANCE_BATCH()
 
 static int
 blt_pitch(struct intel_mipmap_tree *mt)
@@ -281,9 +359,11 @@
                           src_pitch,
                           src_mt->bo, src_mt->offset,
                           src_mt->tiling,
+                          src_mt->tr_mode,
                           dst_mt->pitch,
                           dst_mt->bo, dst_mt->offset,
                           dst_mt->tiling,
+                          dst_mt->tr_mode,
                           src_x, src_y,
                           dst_x, dst_y,
                           width, height,
@@ -316,6 +396,112 @@
    return true;
 }
 
+static bool
+can_fast_copy_blit(struct brw_context *brw,
+		   drm_intel_bo *src_buffer,
+                   int16_t src_x, int16_t src_y,
+                   uintptr_t src_offset, uint32_t src_pitch,
+                   uint32_t src_tiling, uint32_t src_tr_mode,
+		   drm_intel_bo *dst_buffer,
+                   int16_t dst_x, int16_t dst_y,
+                   uintptr_t dst_offset, uint32_t dst_pitch,
+                   uint32_t dst_tiling, uint32_t dst_tr_mode,
+                   int16_t w, int16_t h, uint32_t cpp)
+{
+   const bool dst_tiling_none = dst_tiling == I915_TILING_NONE;
+   const bool src_tiling_none = src_tiling == I915_TILING_NONE;
+
+   if (brw->gen < 9)
+      return false;
+
+   if (src_buffer->handle == dst_buffer->handle &&
+       _mesa_regions_overlap(src_x, src_y, src_x + w, src_y + h,
+                             dst_x, dst_y, dst_x + w, dst_y + h))
+      return false;
+
+   /* Enable fast copy blit only if the surfaces are Yf/Ys tiled.
+    * FIXME: Based on performance data, remove this condition later to
+    * enable for all types of surfaces.
+    */
+   if (src_tr_mode == INTEL_MIPTREE_TRMODE_NONE &&
+       dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE)
+      return false;
+
+   /* For all surface types buffers must be cacheline-aligned. */
+   if ((dst_offset | src_offset) & 63)
+      return false;
+
+   /* Color depth greater than 128 bits not supported. */
+   if (cpp > 16)
+      return false;
+
+   /* For Fast Copy Blits the pitch cannot be a negative number. So, bit 15
+    * of the destination pitch must be zero.
+    */
+   if ((src_pitch >> 15 & 1) != 0 || (dst_pitch >> 15 & 1) != 0)
+      return false;
+
+   /* For Linear surfaces, the pitch has to be an OWord (16byte) multiple. */
+   if ((src_tiling_none && src_pitch % 16 != 0) ||
+       (dst_tiling_none && dst_pitch % 16 != 0))
+      return false;
+
+   /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
+    * (X direction width of the Tile). This means the pitch value will
+    * always be Cache Line aligned (64byte multiple).
+    */
+   if ((!dst_tiling_none && dst_pitch % 64 != 0) ||
+       (!src_tiling_none && src_pitch % 64 != 0))
+      return false;
+
+   return true;
+}
+
+static uint32_t
+xy_blit_cmd(uint32_t src_tiling, uint32_t src_tr_mode,
+            uint32_t dst_tiling, uint32_t dst_tr_mode,
+            uint32_t cpp, bool use_fast_copy_blit)
+{
+   uint32_t CMD = 0;
+
+   if (use_fast_copy_blit) {
+      CMD = XY_FAST_COPY_BLT_CMD;
+
+      if (dst_tiling != I915_TILING_NONE)
+         SET_TILING_XY_FAST_COPY_BLT(dst_tiling, dst_tr_mode, XY_FAST_DST);
+
+      if (src_tiling != I915_TILING_NONE)
+         SET_TILING_XY_FAST_COPY_BLT(src_tiling, src_tr_mode, XY_FAST_SRC);
+
+      CMD |= get_tr_horizontal_align(src_tr_mode, cpp, true /* is_src */);
+      CMD |= get_tr_vertical_align(src_tr_mode, cpp, true /* is_src */);
+
+      CMD |= get_tr_horizontal_align(dst_tr_mode, cpp, false /* is_src */);
+      CMD |= get_tr_vertical_align(dst_tr_mode, cpp, false /* is_src */);
+
+   } else {
+      assert(cpp <= 4);
+      switch (cpp) {
+      case 1:
+      case 2:
+         CMD = XY_SRC_COPY_BLT_CMD;
+         break;
+      case 4:
+         CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+         break;
+      default:
+         unreachable("not reached");
+      }
+
+      if (dst_tiling != I915_TILING_NONE)
+         CMD |= XY_DST_TILED;
+
+      if (src_tiling != I915_TILING_NONE)
+         CMD |= XY_SRC_TILED;
+   }
+   return CMD;
+}
+
 /* Copy BitBlt
  */
 bool
@@ -325,10 +511,12 @@
 		  drm_intel_bo *src_buffer,
 		  GLuint src_offset,
 		  uint32_t src_tiling,
+		  uint32_t src_tr_mode,
 		  GLshort dst_pitch,
 		  drm_intel_bo *dst_buffer,
 		  GLuint dst_offset,
 		  uint32_t dst_tiling,
+		  uint32_t dst_tr_mode,
 		  GLshort src_x, GLshort src_y,
 		  GLshort dst_x, GLshort dst_y,
 		  GLshort w, GLshort h,
@@ -340,18 +528,11 @@
    drm_intel_bo *aper_array[3];
    bool dst_y_tiled = dst_tiling == I915_TILING_Y;
    bool src_y_tiled = src_tiling == I915_TILING_Y;
-
-   if (!alignment_valid(brw, dst_offset, dst_tiling))
-      return false;
-   if (!alignment_valid(brw, src_offset, src_tiling))
-      return false;
+   bool use_fast_copy_blit = false;
 
    if ((dst_y_tiled || src_y_tiled) && brw->gen < 6)
       return false;
 
-   assert(!dst_y_tiled || (dst_pitch % 128) == 0);
-   assert(!src_y_tiled || (src_pitch % 128) == 0);
-
    /* do space check before going any further */
    do {
        aper_array[0] = brw->batch.bo;
@@ -376,52 +557,98 @@
        src_buffer, src_pitch, src_offset, src_x, src_y,
        dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
 
-   /* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to drop
-    * the low bits.  Offsets must be naturally aligned.
-    */
-   if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
-       dst_pitch % 4 != 0 || dst_offset % cpp != 0)
-      return false;
+   use_fast_copy_blit = can_fast_copy_blit(brw,
+                                           src_buffer,
+                                           src_x, src_y,
+                                           src_offset, src_pitch,
+                                           src_tiling, src_tr_mode,
+                                           dst_buffer,
+                                           dst_x, dst_y,
+                                           dst_offset, dst_pitch,
+                                           dst_tiling, dst_tr_mode,
+                                           w, h, cpp);
+   assert(use_fast_copy_blit ||
+          (src_tr_mode == INTEL_MIPTREE_TRMODE_NONE &&
+           dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE));
+
+   if (use_fast_copy_blit) {
+      /* When two sequential fast copy blits have different source surfaces,
+       * but their destinations refer to the same destination surfaces and
+       * therefore destinations overlap it is imperative that a flush be
+       * inserted between the two blits.
+       *
+       * FIXME: Figure out a way to avoid flushing when not required.
+       */
+      brw_emit_mi_flush(brw);
+
+      assert(cpp <= 16);
+      BR13 = br13_for_cpp(cpp);
+
+      if (src_tr_mode == INTEL_MIPTREE_TRMODE_YF)
+         BR13 |= XY_FAST_SRC_TRMODE_YF;
+
+      if (dst_tr_mode == INTEL_MIPTREE_TRMODE_YF)
+         BR13 |= XY_FAST_DST_TRMODE_YF;
+
+      CMD = xy_blit_cmd(src_tiling, src_tr_mode,
+                        dst_tiling, dst_tr_mode,
+                        cpp, use_fast_copy_blit);
+
+      /* For tiled source and destination, pitch value should be specified
+       * as a number of Dwords.
+       */
+      if (dst_tiling != I915_TILING_NONE)
+         dst_pitch /= 4;
 
-   /* For big formats (such as floating point), do the copy using 16 or 32bpp
-    * and multiply the coordinates.
-    */
-   if (cpp > 4) {
-      if (cpp % 4 == 2) {
-         dst_x *= cpp / 2;
-         dst_x2 *= cpp / 2;
-         src_x *= cpp / 2;
-         cpp = 2;
-      } else {
-         assert(cpp % 4 == 0);
-         dst_x *= cpp / 4;
-         dst_x2 *= cpp / 4;
-         src_x *= cpp / 4;
-         cpp = 4;
+      if (src_tiling != I915_TILING_NONE)
+         src_pitch /= 4;
+
+   } else {
+      assert(!dst_y_tiled || (dst_pitch % 128) == 0);
+      assert(!src_y_tiled || (src_pitch % 128) == 0);
+
+      /* For big formats (such as floating point), do the copy using 16 or
+       * 32bpp and multiply the coordinates.
+       */
+      if (cpp > 4) {
+         if (cpp % 4 == 2) {
+            dst_x *= cpp / 2;
+            dst_x2 *= cpp / 2;
+            src_x *= cpp / 2;
+            cpp = 2;
+         } else {
+            assert(cpp % 4 == 0);
+            dst_x *= cpp / 4;
+            dst_x2 *= cpp / 4;
+            src_x *= cpp / 4;
+            cpp = 4;
+         }
       }
-   }
 
-   BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
+      if (!alignment_valid(brw, dst_offset, dst_tiling))
+         return false;
+      if (!alignment_valid(brw, src_offset, src_tiling))
+         return false;
+
+      /* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to drop
+       * the low bits.  Offsets must be naturally aligned.
+       */
+      if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
+          dst_pitch % 4 != 0 || dst_offset % cpp != 0)
+         return false;
+
+      assert(cpp <= 4);
+      BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
+
+      CMD = xy_blit_cmd(src_tiling, src_tr_mode,
+                        dst_tiling, dst_tr_mode,
+                        cpp, use_fast_copy_blit);
 
-   switch (cpp) {
-   case 1:
-   case 2:
-      CMD = XY_SRC_COPY_BLT_CMD;
-      break;
-   case 4:
-      CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
-      break;
-   default:
-      return false;
-   }
+      if (dst_tiling != I915_TILING_NONE)
+         dst_pitch /= 4;
 
-   if (dst_tiling != I915_TILING_NONE) {
-      CMD |= XY_DST_TILED;
-      dst_pitch /= 4;
-   }
-   if (src_tiling != I915_TILING_NONE) {
-      CMD |= XY_SRC_TILED;
-      src_pitch /= 4;
+      if (src_tiling != I915_TILING_NONE)
+         src_pitch /= 4;
    }
 
    if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
@@ -463,7 +690,7 @@
 
    ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled);
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    return true;
 }
@@ -547,7 +774,7 @@
 
    intel_batchbuffer_data(brw, src_bits, dwords * 4, BLT_RING);
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    return true;
 }
@@ -569,43 +796,43 @@
    int16_t src_x, dst_x;
    bool ok;
 
-   /* The pitch given to the GPU must be DWORD aligned, and
-    * we want width to match pitch. Max width is (1 << 15 - 1),
-    * rounding that down to the nearest DWORD is 1 << 15 - 4
-    */
-   pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
-   height = (pitch == 0) ? 1 : size / pitch;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   ok = intelEmitCopyBlit(brw, 1,
-			  pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
-			  pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
-			  src_x, 0, /* src x/y */
-			  dst_x, 0, /* dst x/y */
-			  pitch, height, /* w, h */
-			  GL_COPY);
-   if (!ok)
-      _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height);
-
-   src_offset += pitch * height;
-   dst_offset += pitch * height;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   size -= pitch * height;
-   assert (size < (1 << 15));
-   pitch = ALIGN(size, 4);
+   do {
+      /* The pitch given to the GPU must be DWORD aligned, and
+       * we want width to match pitch. Max width is (1 << 15 - 1),
+       * rounding that down to the nearest DWORD is 1 << 15 - 4
+       */
+      pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 64), 4);
+      height = (size < pitch || pitch == 0) ? 1 : size / pitch;
+
+      src_x = src_offset % 64;
+      dst_x = dst_offset % 64;
+      pitch = ALIGN(MIN2(size, (1 << 15) - 64), 4);
+      assert(src_x + pitch < 1 << 15);
+      assert(dst_x + pitch < 1 << 15);
 
-   if (size != 0) {
       ok = intelEmitCopyBlit(brw, 1,
-			     pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
-			     pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
-			     src_x, 0, /* src x/y */
-			     dst_x, 0, /* dst x/y */
-			     size, 1, /* w, h */
-			     GL_COPY);
-      if (!ok)
-         _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1);
-   }
+                             pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+                             INTEL_MIPTREE_TRMODE_NONE,
+                             pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+                             INTEL_MIPTREE_TRMODE_NONE,
+                             src_x, 0, /* src x/y */
+                             dst_x, 0, /* dst x/y */
+                             MIN2(size, pitch), height, /* w, h */
+                             GL_COPY);
+      if (!ok) {
+         _mesa_problem(ctx, "Failed to linear blit %dx%d\n",
+                       MIN2(size, pitch), height);
+         return;
+      }
+
+      pitch *= height;
+      if (size <= pitch)
+         return;
+
+      src_offset += pitch;
+      dst_offset += pitch;
+      size -= pitch;
+   } while (1);
 }
 
 /**
@@ -670,5 +897,5 @@
    OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
    ADVANCE_BATCH_TILED(dst_y_tiled, false);
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_blit.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_blit.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_blit.h	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_blit.h	2015-09-16 14:36:09.000000000 +0000
@@ -32,19 +32,21 @@
 
 bool
 intelEmitCopyBlit(struct brw_context *brw,
-                              GLuint cpp,
-                              GLshort src_pitch,
-                              drm_intel_bo *src_buffer,
-                              GLuint src_offset,
-			      uint32_t src_tiling,
-                              GLshort dst_pitch,
-                              drm_intel_bo *dst_buffer,
-                              GLuint dst_offset,
-			      uint32_t dst_tiling,
-                              GLshort srcx, GLshort srcy,
-                              GLshort dstx, GLshort dsty,
-                              GLshort w, GLshort h,
-			      GLenum logicop );
+                  GLuint cpp,
+                  GLshort src_pitch,
+                  drm_intel_bo *src_buffer,
+                  GLuint src_offset,
+                  uint32_t src_tiling,
+                  uint32_t src_tr_mode,
+                  GLshort dst_pitch,
+                  drm_intel_bo *dst_buffer,
+                  GLuint dst_offset,
+                  uint32_t dst_tiling,
+                  uint32_t dst_tr_mode,
+                  GLshort srcx, GLshort srcy,
+                  GLshort dstx, GLshort dsty,
+                  GLshort w, GLshort h,
+                  GLenum logicop);
 
 bool intel_miptree_blit_compatible_formats(mesa_format src, mesa_format dst);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_buffer_objects.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_buffer_objects.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_buffer_objects.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_buffer_objects.c	2015-09-16 14:36:09.000000000 +0000
@@ -560,7 +560,7 @@
        * flush.  Once again, we wish for a domain tracker in libdrm to cover
        * usage inside of a batchbuffer.
        */
-      intel_batchbuffer_emit_mi_flush(brw);
+      brw_emit_mi_flush(brw);
 
       drm_intel_bo_unreference(intel_obj->range_map_bo[index]);
       intel_obj->range_map_bo[index] = NULL;
@@ -632,7 +632,7 @@
     * flush.  Once again, we wish for a domain tracker in libdrm to cover
     * usage inside of a batchbuffer.
     */
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 }
 
 void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_copy_image.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_copy_image.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_copy_image.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_copy_image.c	2015-09-16 14:36:09.000000000 +0000
@@ -126,9 +126,11 @@
                             src_mt->pitch,
                             src_mt->bo, src_mt->offset,
                             src_mt->tiling,
+                            src_mt->tr_mode,
                             dst_mt->pitch,
                             dst_mt->bo, dst_mt->offset,
                             dst_mt->tiling,
+                            dst_mt->tr_mode,
                             src_x, src_y,
                             dst_x, dst_y,
                             src_width, src_height,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_debug.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_debug.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_debug.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_debug.c	2015-09-16 14:36:09.000000000 +0000
@@ -79,34 +79,33 @@
 {
    uint64_t flags[] = {
       [MESA_SHADER_VERTEX] = DEBUG_VS,
+      [MESA_SHADER_TESS_CTRL] = 0,
+      [MESA_SHADER_TESS_EVAL] = 0,
       [MESA_SHADER_GEOMETRY] = DEBUG_GS,
       [MESA_SHADER_FRAGMENT] = DEBUG_WM,
       [MESA_SHADER_COMPUTE] = DEBUG_CS,
    };
-   STATIC_ASSERT(MESA_SHADER_STAGES == 4);
+   STATIC_ASSERT(MESA_SHADER_STAGES == 6);
    return flags[stage];
 }
 
 void
-brw_process_intel_debug_variable(struct brw_context *brw)
+brw_process_intel_debug_variable(struct intel_screen *screen)
 {
    uint64_t intel_debug = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
    (void) p_atomic_cmpxchg(&INTEL_DEBUG, 0, intel_debug);
 
    if (INTEL_DEBUG & DEBUG_BUFMGR)
-      dri_bufmgr_set_debug(brw->bufmgr, true);
+      dri_bufmgr_set_debug(screen->bufmgr, true);
 
-   if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && brw->gen < 7) {
+   if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && screen->devinfo->gen < 7) {
       fprintf(stderr,
               "shader_time debugging requires gen7 (Ivybridge) or better.\n");
       INTEL_DEBUG &= ~DEBUG_SHADER_TIME;
    }
 
-   if (INTEL_DEBUG & DEBUG_PERF)
-      brw->perf_debug = true;
-
    if (INTEL_DEBUG & DEBUG_AUB)
-      drm_intel_bufmgr_gem_set_aub_dump(brw->bufmgr, true);
+      drm_intel_bufmgr_gem_set_aub_dump(screen->bufmgr, true);
 }
 
 /**
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_debug.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_debug.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_debug.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_debug.h	2015-09-16 14:36:09.000000000 +0000
@@ -114,8 +114,8 @@
 
 extern uint64_t intel_debug_flag_for_shader_stage(gl_shader_stage stage);
 
-struct brw_context;
+struct intel_screen;
 
-extern void brw_process_intel_debug_variable(struct brw_context *brw);
+extern void brw_process_intel_debug_variable(struct intel_screen *);
 
 extern bool brw_env_var_as_boolean(const char *var_name, bool default_value);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_extensions.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_extensions.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_extensions.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_extensions.c	2015-09-16 14:36:09.000000000 +0000
@@ -64,10 +64,10 @@
    /* Set a value in a BO to a known quantity.  The workaround BO already
     * exists and doesn't contain anything important, so we may as well use it.
     */
-   drm_intel_bo_map(brw->batch.workaround_bo, true);
-   data = brw->batch.workaround_bo->virtual;
+   drm_intel_bo_map(brw->workaround_bo, true);
+   data = brw->workaround_bo->virtual;
    data[offset] = 0xffffffff;
-   drm_intel_bo_unmap(brw->batch.workaround_bo);
+   drm_intel_bo_unmap(brw->workaround_bo);
 
    /* Write the register. */
    BEGIN_BATCH(3);
@@ -76,13 +76,13 @@
    OUT_BATCH(expected_value);
    ADVANCE_BATCH();
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    /* Save the register's value back to the buffer. */
    BEGIN_BATCH(3);
    OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
    OUT_BATCH(reg);
-   OUT_RELOC(brw->batch.workaround_bo,
+   OUT_RELOC(brw->workaround_bo,
              I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
              offset * sizeof(uint32_t));
    ADVANCE_BATCH();
@@ -90,10 +90,10 @@
    intel_batchbuffer_flush(brw);
 
    /* Check whether the value got written. */
-   drm_intel_bo_map(brw->batch.workaround_bo, false);
-   data = brw->batch.workaround_bo->virtual;
+   drm_intel_bo_map(brw->workaround_bo, false);
+   data = brw->workaround_bo->virtual;
    bool success = data[offset] == expected_value;
-   drm_intel_bo_unmap(brw->batch.workaround_bo);
+   drm_intel_bo_unmap(brw->workaround_bo);
 
    result = success;
 
@@ -120,10 +120,10 @@
    /* Set a value in a BO to a known quantity.  The workaround BO already
     * exists and doesn't contain anything important, so we may as well use it.
     */
-   drm_intel_bo_map(brw->batch.workaround_bo, true);
-   data = brw->batch.workaround_bo->virtual;
+   drm_intel_bo_map(brw->workaround_bo, true);
+   data = brw->workaround_bo->virtual;
    data[offset] = 0xffffffff;
-   drm_intel_bo_unmap(brw->batch.workaround_bo);
+   drm_intel_bo_unmap(brw->workaround_bo);
 
    /* Write OACONTROL. */
    BEGIN_BATCH(3);
@@ -132,18 +132,18 @@
    OUT_BATCH(expected_value);
    ADVANCE_BATCH();
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    /* Save the register's value back to the buffer. */
    BEGIN_BATCH(3);
    OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
    OUT_BATCH(OACONTROL);
-   OUT_RELOC(brw->batch.workaround_bo,
+   OUT_RELOC(brw->workaround_bo,
              I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
              offset * sizeof(uint32_t));
    ADVANCE_BATCH();
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 
    /* Set OACONTROL back to zero (everything off). */
    BEGIN_BATCH(3);
@@ -155,10 +155,10 @@
    intel_batchbuffer_flush(brw);
 
    /* Check whether the value got written. */
-   drm_intel_bo_map(brw->batch.workaround_bo, false);
-   data = brw->batch.workaround_bo->virtual;
+   drm_intel_bo_map(brw->workaround_bo, false);
+   data = brw->workaround_bo->virtual;
    bool success = data[offset] == expected_value;
-   drm_intel_bo_unmap(brw->batch.workaround_bo);
+   drm_intel_bo_unmap(brw->workaround_bo);
 
    result = success;
 
@@ -282,8 +282,6 @@
    }
 
    if (brw->gen >= 6) {
-      uint64_t dummy;
-
       ctx->Extensions.ARB_blend_func_extended =
          !driQueryOptionb(&brw->optionCache, "disable_blend_func_extended");
       ctx->Extensions.ARB_conditional_render_inverted = true;
@@ -307,14 +305,13 @@
       ctx->Extensions.EXT_transform_feedback = true;
       ctx->Extensions.OES_depth_texture_cube_map = true;
 
-      /* Test if the kernel has the ioctl. */
-      if (drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &dummy) == 0)
-         ctx->Extensions.ARB_timer_query = true;
+      ctx->Extensions.ARB_timer_query = brw->intelScreen->hw_has_timestamp;
 
       /* Only enable this in core profile because other parts of Mesa behave
        * slightly differently when the extension is enabled.
        */
       if (ctx->API == API_OPENGL_CORE) {
+         ctx->Extensions.ARB_shader_subroutine = true;
          ctx->Extensions.ARB_viewport_array = true;
          ctx->Extensions.AMD_vertex_shader_viewport_index = true;
       }
@@ -325,8 +322,11 @@
    if (brw->gen >= 7) {
       ctx->Extensions.ARB_conservative_depth = true;
       ctx->Extensions.ARB_derivative_control = true;
+      ctx->Extensions.ARB_framebuffer_no_attachments = true;
       ctx->Extensions.ARB_gpu_shader5 = true;
       ctx->Extensions.ARB_shader_atomic_counters = true;
+      ctx->Extensions.ARB_shader_image_load_store = true;
+      ctx->Extensions.ARB_shader_image_size = true;
       ctx->Extensions.ARB_texture_compression_bptc = true;
       ctx->Extensions.ARB_texture_view = true;
 
@@ -346,6 +346,7 @@
       if (ctx->API == API_OPENGL_CORE) {
          ctx->Extensions.ARB_viewport_array = true;
          ctx->Extensions.AMD_vertex_shader_viewport_index = true;
+         ctx->Extensions.ARB_shader_subroutine = true;
       }
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_fbo.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_fbo.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_fbo.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_fbo.c	2015-09-16 14:36:09.000000000 +0000
@@ -310,7 +310,7 @@
    intel_miptree_release(&irb->mt);
 
    DBG("%s: %s: %s (%dx%d)\n", __func__,
-       _mesa_lookup_enum_by_nr(internalFormat),
+       _mesa_enum_to_string(internalFormat),
        _mesa_get_format_name(rb->Format), width, height);
 
    if (width == 0 || height == 0)
@@ -390,7 +390,7 @@
                                          image->height,
                                          1,
                                          image->pitch,
-                                         true /*disable_aux_buffers*/);
+                                         MIPTREE_LAYOUT_DISABLE_AUX);
    if (!irb->mt)
       return;
 
@@ -551,10 +551,12 @@
 
    irb->mt_layer = layer_multiplier * layer;
 
-   if (layered) {
-      irb->layer_count = image->TexObject->NumLayers ?: mt->level[level].depth / layer_multiplier;
-   } else {
+   if (!layered) {
       irb->layer_count = 1;
+   } else if (image->TexObject->NumLayers > 0) {
+      irb->layer_count = image->TexObject->NumLayers;
+   } else {
+      irb->layer_count = mt->level[level].depth / layer_multiplier;
    }
 
    intel_miptree_reference(&irb->mt, mt);
@@ -660,7 +662,7 @@
    struct intel_renderbuffer *stencilRb =
       intel_get_renderbuffer(fb, BUFFER_STENCIL);
    struct intel_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL;
-   int i;
+   unsigned i;
 
    DBG("%s() on fb %p (%s)\n", __func__,
        fb, (fb == ctx->DrawBuffer ? "drawbuffer" :
@@ -795,7 +797,7 @@
    intel_prepare_render(brw);
 
    if (mask & GL_COLOR_BUFFER_BIT) {
-      GLint i;
+      unsigned i;
       struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer;
       struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
 
@@ -1020,6 +1022,9 @@
    struct intel_mipmap_tree *new_mt;
    int width, height, depth;
 
+   uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
+                           MIPTREE_LAYOUT_TILING_ANY;
+
    intel_miptree_get_dimensions_for_image(rb->TexImage, &width, &height, &depth);
 
    new_mt = intel_miptree_create(brw, rb->TexImage->TexObject->Target,
@@ -1027,10 +1032,8 @@
                                  intel_image->base.Base.Level,
                                  intel_image->base.Base.Level,
                                  width, height, depth,
-                                 true,
                                  irb->mt->num_samples,
-                                 INTEL_MIPTREE_TILING_ANY,
-                                 false);
+                                 layout_flags);
 
    if (intel_miptree_wants_hiz_buffer(brw, new_mt)) {
       intel_miptree_alloc_hiz(brw, new_mt);
@@ -1077,7 +1080,7 @@
    if (!_mesa_set_search(brw->render_cache, bo))
       return;
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
 }
 
 /**
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_mipmap_tree.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_mipmap_tree.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_mipmap_tree.c	2015-09-16 14:36:09.000000000 +0000
@@ -158,15 +158,32 @@
    }
 }
 
+bool
+intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
+{
+   /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
+    * Target(s)", beneath the "Fast Color Clear" bullet (p326):
+    *
+    *     - Support is limited to tiled render targets.
+    *
+    * Gen9 changes the restriction to Y-tile only.
+    */
+   if (brw->gen >= 9)
+      return tiling == I915_TILING_Y;
+   else if (brw->gen >= 7)
+      return tiling != I915_TILING_NONE;
+   else
+      return false;
+}
 
 /**
  * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
- * can be used.
+ * can be used. This doesn't (and should not) inspect any of the properties of
+ * the miptree's BO.
  *
  * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
  * beneath the "Fast Color Clear" bullet (p326):
  *
- *     - Support is limited to tiled render targets.
  *     - Support is for non-mip-mapped and non-array surface types only.
  *
  * And then later, on p327:
@@ -175,8 +192,8 @@
  *       64bpp, and 128bpp.
  */
 bool
-intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
-                                       struct intel_mipmap_tree *mt)
+intel_miptree_is_fast_clear_capable(struct brw_context *brw,
+                                    struct intel_mipmap_tree *mt)
 {
    /* MCS support does not exist prior to Gen7 */
    if (brw->gen < 7)
@@ -193,15 +210,25 @@
       return false;
    }
 
-   if (mt->tiling != I915_TILING_X &&
-       mt->tiling != I915_TILING_Y)
-      return false;
    if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
       return false;
-   if (mt->first_level != 0 || mt->last_level != 0)
+   if (mt->first_level != 0 || mt->last_level != 0) {
+      if (brw->gen >= 8) {
+         perf_debug("Multi-LOD fast clear - giving up (%dx%dx%d).\n",
+                    mt->logical_width0, mt->logical_height0, mt->last_level);
+      }
+
       return false;
-   if (mt->physical_depth0 != 1)
+   }
+   if (mt->physical_depth0 != 1) {
+      if (brw->gen >= 8) {
+         perf_debug("Layered fast clear - giving up. (%dx%d%d)\n",
+                    mt->logical_width0, mt->logical_height0,
+                    mt->physical_depth0);
+      }
+
       return false;
+   }
 
    /* There's no point in using an MCS buffer if the surface isn't in a
     * renderable format.
@@ -244,17 +271,15 @@
                             GLuint width0,
                             GLuint height0,
                             GLuint depth0,
-                            bool for_bo,
                             GLuint num_samples,
-                            bool force_all_slices_at_each_lod,
-                            bool disable_aux_buffers)
+                            uint32_t layout_flags)
 {
    struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
    if (!mt)
       return NULL;
 
    DBG("%s target %s format %s level %d..%d slices %d <-- %p\n", __func__,
-       _mesa_lookup_enum_by_nr(target),
+       _mesa_enum_to_string(target),
        _mesa_get_format_name(format),
        first_level, last_level, depth0, mt);
 
@@ -286,7 +311,7 @@
    mt->logical_height0 = height0;
    mt->logical_depth0 = depth0;
    mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
-   mt->disable_aux_buffers = disable_aux_buffers;
+   mt->disable_aux_buffers = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0;
    exec_list_make_empty(&mt->hiz_map);
 
    /* The cpp is bytes per (1, blockheight)-sized block for compressed
@@ -422,12 +447,17 @@
    mt->physical_height0 = height0;
    mt->physical_depth0 = depth0;
 
-   if (!for_bo &&
+   if (!(layout_flags & MIPTREE_LAYOUT_FOR_BO) &&
        _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
        (brw->must_use_separate_stencil ||
 	(brw->has_separate_stencil &&
          intel_miptree_wants_hiz_buffer(brw, mt)))) {
-      const bool force_all_slices_at_each_lod = brw->gen == 6;
+      uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
+      if (brw->gen == 6) {
+         stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD |
+                          MIPTREE_LAYOUT_TILING_ANY;
+      }
+
       mt->stencil_mt = intel_miptree_create(brw,
                                             mt->target,
                                             MESA_FORMAT_S_UINT8,
@@ -436,10 +466,9 @@
                                             mt->logical_width0,
                                             mt->logical_height0,
                                             mt->logical_depth0,
-                                            true,
                                             num_samples,
-                                            INTEL_MIPTREE_TILING_ANY,
-                                            force_all_slices_at_each_lod);
+                                            stencil_flags);
+
       if (!mt->stencil_mt) {
 	 intel_miptree_release(&mt);
 	 return NULL;
@@ -457,10 +486,31 @@
       }
    }
 
-   if (force_all_slices_at_each_lod)
+   if (layout_flags & MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD)
       mt->array_layout = ALL_SLICES_AT_EACH_LOD;
 
-   brw_miptree_layout(brw, mt);
+   /*
+    * Obey HALIGN_16 constraints for Gen8 and Gen9 buffers which are
+    * multisampled or have an AUX buffer attached to it.
+    *
+    * GEN  |    MSRT        | AUX_CCS_* or AUX_MCS
+    *  -------------------------------------------
+    *  9   |  HALIGN_16     |    HALIGN_16
+    *  8   |  HALIGN_ANY    |    HALIGN_16
+    *  7   |      ?         |        ?
+    *  6   |      ?         |        ?
+    */
+   if (intel_miptree_is_fast_clear_capable(brw, mt)) {
+      if (brw->gen >= 9 || (brw->gen == 8 && num_samples <= 1))
+         layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
+   } else if (brw->gen >= 9 && num_samples > 1) {
+      layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
+   } else {
+      /* For now, nothing else has this requirement */
+      assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
+   }
+
+   brw_miptree_layout(brw, mt, layout_flags);
 
    if (mt->disable_aux_buffers)
       assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS);
@@ -468,110 +518,6 @@
    return mt;
 }
 
-/**
- * \brief Helper function for intel_miptree_create().
- */
-static uint32_t
-intel_miptree_choose_tiling(struct brw_context *brw,
-                            mesa_format format,
-                            uint32_t width0,
-                            uint32_t num_samples,
-                            enum intel_miptree_tiling_mode requested,
-                            struct intel_mipmap_tree *mt)
-{
-   if (format == MESA_FORMAT_S_UINT8) {
-      /* The stencil buffer is W tiled. However, we request from the kernel a
-       * non-tiled buffer because the GTT is incapable of W fencing.
-       */
-      return I915_TILING_NONE;
-   }
-
-   /* Some usages may want only one type of tiling, like depth miptrees (Y
-    * tiled), or temporary BOs for uploading data once (linear).
-    */
-   switch (requested) {
-   case INTEL_MIPTREE_TILING_ANY:
-      break;
-   case INTEL_MIPTREE_TILING_Y:
-      return I915_TILING_Y;
-   case INTEL_MIPTREE_TILING_NONE:
-      return I915_TILING_NONE;
-   }
-
-   if (num_samples > 1) {
-      /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
-       * Surface"):
-       *
-       *   [DevSNB+]: For multi-sample render targets, this field must be
-       *   1. MSRTs can only be tiled.
-       *
-       * Our usual reason for preferring X tiling (fast blits using the
-       * blitting engine) doesn't apply to MSAA, since we'll generally be
-       * downsampling or upsampling when blitting between the MSAA buffer
-       * and another buffer, and the blitting engine doesn't support that.
-       * So use Y tiling, since it makes better use of the cache.
-       */
-      return I915_TILING_Y;
-   }
-
-   GLenum base_format = _mesa_get_format_base_format(format);
-   if (base_format == GL_DEPTH_COMPONENT ||
-       base_format == GL_DEPTH_STENCIL_EXT)
-      return I915_TILING_Y;
-
-   /* 1D textures (and 1D array textures) don't get any benefit from tiling,
-    * in fact it leads to a less efficient use of memory space and bandwidth
-    * due to tile alignment.
-    */
-   if (mt->logical_height0 == 1)
-      return I915_TILING_NONE;
-
-   int minimum_pitch = mt->total_width * mt->cpp;
-
-   /* If the width is much smaller than a tile, don't bother tiling. */
-   if (minimum_pitch < 64)
-      return I915_TILING_NONE;
-
-   if (ALIGN(minimum_pitch, 512) >= 32768 ||
-       mt->total_width >= 32768 || mt->total_height >= 32768) {
-      perf_debug("%dx%d miptree too large to blit, falling back to untiled",
-                 mt->total_width, mt->total_height);
-      return I915_TILING_NONE;
-   }
-
-   /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
-   if (brw->gen < 6)
-      return I915_TILING_X;
-
-   /* From the Sandybridge PRM, Volume 1, Part 2, page 32:
-    * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX
-    *  or Linear."
-    * 128 bits per pixel translates to 16 bytes per pixel. This is necessary
-    * all the way back to 965, but is permitted on Gen7+.
-    */
-   if (brw->gen < 7 && mt->cpp >= 16)
-      return I915_TILING_X;
-
-   /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most
-    * messages), on p64, under the heading "Surface Vertical Alignment":
-    *
-    *     This field must be set to VALIGN_4 for all tiled Y Render Target
-    *     surfaces.
-    *
-    * So if the surface is renderable and uses a vertical alignment of 2,
-    * force it to be X tiled.  This is somewhat conservative (it's possible
-    * that the client won't ever render to this surface), but it's difficult
-    * to know that ahead of time.  And besides, since we use a vertical
-    * alignment of 4 as often as we can, this shouldn't happen very often.
-    */
-   if (brw->gen == 7 && mt->align_h == 2 &&
-       brw->format_supported_as_render_target[format]) {
-      return I915_TILING_X;
-   }
-
-   return I915_TILING_Y | I915_TILING_X;
-}
-
 
 /**
  * Choose an appropriate uncompressed format for a requested
@@ -612,36 +558,82 @@
    }
 }
 
+/* This function computes Yf/Ys tiled bo size, alignment and pitch. */
+static unsigned long
+intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned *alignment,
+                        unsigned long *pitch)
+{
+   const uint32_t bpp = mt->cpp * 8;
+   const uint32_t aspect_ratio = (bpp == 16 || bpp == 64) ? 2 : 1;
+   uint32_t tile_width, tile_height;
+   unsigned long stride, size, aligned_y;
+
+   assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
+
+   switch (bpp) {
+   case 8:
+      tile_height = 64;
+      break;
+   case 16:
+   case 32:
+      tile_height = 32;
+      break;
+   case 64:
+   case 128:
+      tile_height = 16;
+      break;
+   default:
+      unreachable("not reached");
+   }
+
+   if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS)
+      tile_height *= 4;
+
+   aligned_y = ALIGN(mt->total_height, tile_height);
+   stride = mt->total_width * mt->cpp;
+   tile_width = tile_height * mt->cpp * aspect_ratio;
+   stride = ALIGN(stride, tile_width);
+   size = stride * aligned_y;
+
+   if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YF) {
+      assert(size % 4096 == 0);
+      *alignment = 4096;
+   } else {
+      assert(size % (64 * 1024) == 0);
+      *alignment = 64 * 1024;
+   }
+   *pitch = stride;
+   return size;
+}
 
 struct intel_mipmap_tree *
 intel_miptree_create(struct brw_context *brw,
-		     GLenum target,
-		     mesa_format format,
-		     GLuint first_level,
-		     GLuint last_level,
-		     GLuint width0,
-		     GLuint height0,
-		     GLuint depth0,
-		     bool expect_accelerated_upload,
+                     GLenum target,
+                     mesa_format format,
+                     GLuint first_level,
+                     GLuint last_level,
+                     GLuint width0,
+                     GLuint height0,
+                     GLuint depth0,
                      GLuint num_samples,
-                     enum intel_miptree_tiling_mode requested_tiling,
-                     bool force_all_slices_at_each_lod)
+                     uint32_t layout_flags)
 {
    struct intel_mipmap_tree *mt;
    mesa_format tex_format = format;
    mesa_format etc_format = MESA_FORMAT_NONE;
    GLuint total_width, total_height;
+   uint32_t alloc_flags = 0;
 
    format = intel_lower_compressed_format(brw, format);
 
    etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
 
+   assert((layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) == 0);
+   assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0);
    mt = intel_miptree_create_layout(brw, target, format,
-				      first_level, last_level, width0,
-				      height0, depth0,
-                                    false, num_samples,
-                                    force_all_slices_at_each_lod,
-                                    false /*disable_aux_buffers*/);
+                                    first_level, last_level, width0,
+                                    height0, depth0, num_samples,
+                                    layout_flags);
    /*
     * pitch == 0 || height == 0  indicates the null texture
     */
@@ -659,25 +651,33 @@
       total_height = ALIGN(total_height, 64);
    }
 
-   uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0,
-                                                 num_samples, requested_tiling,
-                                                 mt);
    bool y_or_x = false;
 
-   if (tiling == (I915_TILING_Y | I915_TILING_X)) {
+   if (mt->tiling == (I915_TILING_Y | I915_TILING_X)) {
       y_or_x = true;
       mt->tiling = I915_TILING_Y;
-   } else {
-      mt->tiling = tiling;
    }
 
+   if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD)
+      alloc_flags |= BO_ALLOC_FOR_RENDER;
+
    unsigned long pitch;
    mt->etc_format = etc_format;
-   mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
-                                     total_width, total_height, mt->cpp,
-                                     &mt->tiling, &pitch,
-                                     (expect_accelerated_upload ?
-                                      BO_ALLOC_FOR_RENDER : 0));
+
+   if (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
+      unsigned alignment = 0;
+      unsigned long size;
+      size = intel_get_yf_ys_bo_size(mt, &alignment, &pitch);
+      assert(size);
+      mt->bo = drm_intel_bo_alloc_for_render(brw->bufmgr, "miptree",
+                                             size, alignment);
+   } else {
+      mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
+                                        total_width, total_height, mt->cpp,
+                                        &mt->tiling, &pitch,
+                                        alloc_flags);
+   }
+
    mt->pitch = pitch;
 
    /* If the BO is too large to fit in the aperture, we need to use the
@@ -691,10 +691,8 @@
       mt->tiling = I915_TILING_X;
       drm_intel_bo_unreference(mt->bo);
       mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
-                                        total_width, total_height, mt->cpp,
-                                        &mt->tiling, &pitch,
-                                        (expect_accelerated_upload ?
-                                         BO_ALLOC_FOR_RENDER : 0));
+                                  total_width, total_height, mt->cpp,
+                                  &mt->tiling, &pitch, alloc_flags);
       mt->pitch = pitch;
    }
 
@@ -707,6 +705,7 @@
 
 
    if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
+      assert(mt->num_samples > 1);
       if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) {
          intel_miptree_release(&mt);
          return NULL;
@@ -718,8 +717,11 @@
     * Allocation of the MCS miptree will be deferred until the first fast
     * clear actually occurs.
     */
-   if (intel_is_non_msrt_mcs_buffer_supported(brw, mt))
+   if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) &&
+       intel_miptree_is_fast_clear_capable(brw, mt)) {
       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
+      assert(brw->gen < 8 || mt->align_w == 16 || num_samples <= 1);
+   }
 
    return mt;
 }
@@ -733,7 +735,7 @@
                             uint32_t height,
                             uint32_t depth,
                             int pitch,
-                            bool disable_aux_buffers)
+                            uint32_t layout_flags)
 {
    struct intel_mipmap_tree *mt;
    uint32_t tiling, swizzle;
@@ -754,11 +756,17 @@
 
    target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
 
+   /* The BO already has a tiling format and we shouldn't confuse the lower
+    * layers by making it try to find a tiling format again.
+    */
+   assert((layout_flags & MIPTREE_LAYOUT_TILING_ANY) == 0);
+   assert((layout_flags & MIPTREE_LAYOUT_TILING_NONE) == 0);
+
+   layout_flags |= MIPTREE_LAYOUT_FOR_BO;
    mt = intel_miptree_create_layout(brw, target, format,
                                     0, 0,
-                                    width, height, depth,
-                                    true, 0, false,
-                                    disable_aux_buffers);
+                                    width, height, depth, 0,
+                                    layout_flags);
    if (!mt)
       return NULL;
 
@@ -808,7 +816,7 @@
                                                  height,
                                                  1,
                                                  pitch,
-                                                 false);
+                                                 0);
    if (!singlesample_mt)
       goto fail;
 
@@ -817,7 +825,8 @@
     * Allocation of the MCS miptree will be deferred until the first fast
     * clear actually occurs.
     */
-   if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt))
+   if (intel_tiling_supports_non_msrt_mcs(intel, singlesample_mt->tiling) &&
+       intel_miptree_is_fast_clear_capable(intel, singlesample_mt))
       singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
 
    if (num_samples == 0) {
@@ -864,10 +873,13 @@
    uint32_t depth = 1;
    bool ok;
    GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
+   const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
+                                 MIPTREE_LAYOUT_TILING_ANY;
+
 
    mt = intel_miptree_create(brw, target, format, 0, 0,
-			     width, height, depth, true, num_samples,
-                             INTEL_MIPTREE_TILING_ANY, false);
+                             width, height, depth, num_samples,
+                             layout_flags);
    if (!mt)
       goto fail;
 
@@ -1258,8 +1270,10 @@
    assert(src_mt->format == dst_mt->format);
 
    if (dst_mt->compressed) {
-      height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
-      width = ALIGN(width, dst_mt->align_w);
+      unsigned int i, j;
+      _mesa_get_format_block_size(dst_mt->format, &i, &j);
+      height = ALIGN(height, j) / j;
+      width = ALIGN(width, i);
    }
 
    /* If it's a packed depth/stencil buffer with separate stencil, the blit
@@ -1370,6 +1384,8 @@
     *
     *     "The MCS surface must be stored as Tile Y."
     */
+   const uint32_t mcs_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
+                              MIPTREE_LAYOUT_TILING_Y;
    mt->mcs_mt = intel_miptree_create(brw,
                                      mt->target,
                                      format,
@@ -1378,10 +1394,8 @@
                                      mt->logical_width0,
                                      mt->logical_height0,
                                      mt->logical_depth0,
-                                     true,
                                      0 /* num_samples */,
-                                     INTEL_MIPTREE_TILING_Y,
-                                     false);
+                                     mcs_flags);
 
    /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
     *
@@ -1429,6 +1443,11 @@
    unsigned mcs_height =
       ALIGN(mt->logical_height0, height_divisor) / height_divisor;
    assert(mt->logical_depth0 == 1);
+   uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
+                           MIPTREE_LAYOUT_TILING_Y;
+   if (brw->gen >= 8) {
+      layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
+   }
    mt->mcs_mt = intel_miptree_create(brw,
                                      mt->target,
                                      format,
@@ -1437,10 +1456,8 @@
                                      mcs_width,
                                      mcs_height,
                                      mt->logical_depth0,
-                                     true,
                                      0 /* num_samples */,
-                                     INTEL_MIPTREE_TILING_Y,
-                                     false);
+                                     layout_flags);
 
    return mt->mcs_mt;
 }
@@ -1500,21 +1517,23 @@
    /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
     * adjustments required for Z_Height and Z_Width based on multisampling.
     */
-   switch (mt->num_samples) {
-   case 0:
-   case 1:
-      break;
-   case 2:
-   case 4:
-      z_width *= 2;
-      z_height *= 2;
-      break;
-   case 8:
-      z_width *= 4;
-      z_height *= 2;
-      break;
-   default:
-      unreachable("unsupported sample count");
+   if (brw->gen < 9) {
+      switch (mt->num_samples) {
+      case 0:
+      case 1:
+         break;
+      case 2:
+      case 4:
+         z_width *= 2;
+         z_height *= 2;
+         break;
+      case 8:
+         z_width *= 4;
+         z_height *= 2;
+         break;
+      default:
+         unreachable("unsupported sample count");
+      }
    }
 
    const unsigned vertical_align = 8; /* 'j' in the docs */
@@ -1682,11 +1701,15 @@
                              struct intel_mipmap_tree *mt)
 {
    struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
-   const bool force_all_slices_at_each_lod = brw->gen == 6;
+   uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
+
+   if (brw->gen == 6)
+      layout_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD;
 
    if (!buf)
       return NULL;
 
+   layout_flags |= MIPTREE_LAYOUT_TILING_ANY;
    buf->mt = intel_miptree_create(brw,
                                   mt->target,
                                   mt->format,
@@ -1695,10 +1718,8 @@
                                   mt->logical_width0,
                                   mt->logical_height0,
                                   mt->logical_depth0,
-                                  true,
                                   mt->num_samples,
-                                  INTEL_MIPTREE_TILING_ANY,
-                                  force_all_slices_at_each_lod);
+                                  layout_flags);
    if (!buf->mt) {
       free(buf);
       return NULL;
@@ -2128,9 +2149,8 @@
    map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
                                   0, 0,
                                   map->w, map->h, 1,
-                                  false, 0,
-                                  INTEL_MIPTREE_TILING_NONE,
-                                  false);
+                                  0, MIPTREE_LAYOUT_TILING_NONE);
+
    if (!map->mt) {
       fprintf(stderr, "Failed to allocate blit temporary\n");
       goto fail;
@@ -2675,7 +2695,9 @@
    } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
       intel_miptree_map_blit(brw, mt, map, level, slice);
 #if defined(USE_SSE41)
-   } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed && cpu_has_sse4_1) {
+   } else if (!(mode & GL_MAP_WRITE_BIT) &&
+              !mt->compressed && cpu_has_sse4_1 &&
+              (mt->pitch % 16 == 0)) {
       intel_miptree_map_movntdqa(brw, mt, map, level, slice);
 #endif
    } else {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_mipmap_tree.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_mipmap_tree.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_mipmap_tree.h	2015-09-16 14:36:09.000000000 +0000
@@ -330,6 +330,13 @@
    struct intel_mipmap_tree *mt; /**< hiz miptree used with Gen6 */
 };
 
+/* Tile resource modes */
+enum intel_miptree_tr_mode {
+   INTEL_MIPTREE_TRMODE_NONE,
+   INTEL_MIPTREE_TRMODE_YF,
+   INTEL_MIPTREE_TRMODE_YS
+};
+
 struct intel_mipmap_tree
 {
    /** Buffer object containing the pixel data. */
@@ -338,6 +345,7 @@
    uint32_t pitch; /**< pitch in bytes. */
 
    uint32_t tiling; /**< One of the I915_TILING_* flags */
+   enum intel_miptree_tr_mode tr_mode;
 
    /* Effectively the key:
     */
@@ -508,25 +516,32 @@
    GLuint refcount;
 };
 
-enum intel_miptree_tiling_mode {
-   INTEL_MIPTREE_TILING_ANY,
-   INTEL_MIPTREE_TILING_Y,
-   INTEL_MIPTREE_TILING_NONE,
-};
-
-bool
-intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
-                                       struct intel_mipmap_tree *mt);
-
 void
 intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
                                  struct intel_mipmap_tree *mt,
                                  unsigned *width_px, unsigned *height);
-
+bool
+intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling);
+bool
+intel_miptree_is_fast_clear_capable(struct brw_context *brw,
+                                    struct intel_mipmap_tree *mt);
 bool
 intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
                                  struct intel_mipmap_tree *mt);
 
+enum {
+   MIPTREE_LAYOUT_ACCELERATED_UPLOAD       = 1 << 0,
+   MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD   = 1 << 1,
+   MIPTREE_LAYOUT_FOR_BO                   = 1 << 2,
+   MIPTREE_LAYOUT_DISABLE_AUX              = 1 << 3,
+   MIPTREE_LAYOUT_FORCE_HALIGN16           = 1 << 4,
+
+   MIPTREE_LAYOUT_TILING_Y                 = 1 << 5,
+   MIPTREE_LAYOUT_TILING_NONE              = 1 << 6,
+   MIPTREE_LAYOUT_TILING_ANY               = MIPTREE_LAYOUT_TILING_Y |
+                                             MIPTREE_LAYOUT_TILING_NONE,
+};
+
 struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw,
                                                GLenum target,
 					       mesa_format format,
@@ -535,10 +550,8 @@
                                                GLuint width0,
                                                GLuint height0,
                                                GLuint depth0,
-					       bool expect_accelerated_upload,
                                                GLuint num_samples,
-                                               enum intel_miptree_tiling_mode,
-                                               bool force_all_slices_at_each_lod);
+                                               uint32_t flags);
 
 struct intel_mipmap_tree *
 intel_miptree_create_for_bo(struct brw_context *brw,
@@ -549,7 +562,7 @@
                             uint32_t height,
                             uint32_t depth,
                             int pitch,
-                            bool disable_aux_buffers);
+                            uint32_t layout_flags);
 
 void
 intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
@@ -753,7 +766,10 @@
                                      const struct intel_mipmap_tree *mt,
                                      unsigned level);
 
-void brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt);
+void
+brw_miptree_layout(struct brw_context *brw,
+                   struct intel_mipmap_tree *mt,
+                   uint32_t layout_flags);
 
 void *intel_miptree_map_raw(struct brw_context *brw,
                             struct intel_mipmap_tree *mt);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_pixel_draw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_pixel_draw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_pixel_draw.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_pixel_draw.c	2015-09-16 14:36:09.000000000 +0000
@@ -121,7 +121,7 @@
                                   src_offset,
                                   width, height, 1,
                                   src_stride,
-                                  false /*disable_aux_buffers*/);
+                                  0);
    if (!pbo_mt)
       return false;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_pixel_read.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_pixel_read.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_pixel_read.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_pixel_read.c	2015-09-16 14:36:09.000000000 +0000
@@ -109,6 +109,10 @@
        pack->Invert)
       return false;
 
+   /* Only a simple blit, no scale, bias or other mapping. */
+   if (ctx->_ImageTransferState)
+      return false;
+
    /* This renderbuffer can come from a texture.  In this case, we impose
     * some of the same restrictions we have for textures and adjust for
     * miplevels.
@@ -247,7 +251,7 @@
           * rendered to via a PBO at any point, so it seems better to just
           * flush here unconditionally.
           */
-         intel_batchbuffer_emit_mi_flush(brw);
+         brw_emit_mi_flush(brw);
          return;
       }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_reg.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_reg.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_reg.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_reg.h	2015-09-16 14:36:09.000000000 +0000
@@ -47,6 +47,9 @@
 /* Load a value from memory into a register.  Only available on Gen7+. */
 #define GEN7_MI_LOAD_REGISTER_MEM	(CMD_MI | (0x29 << 23))
 # define MI_LOAD_REGISTER_MEM_USE_GGTT		(1 << 22)
+/* Haswell RS control */
+#define MI_RS_CONTROL                   (CMD_MI | (0x6 << 23))
+#define MI_RS_STORE_DATA_IMM            (CMD_MI | (0x2b << 23))
 
 /* Manipulate the predicate bit based on some register values. Only on Gen7+ */
 #define GEN7_MI_PREDICATE		(CMD_MI | (0xC << 23))
@@ -102,6 +105,8 @@
 
 #define XY_SRC_COPY_BLT_CMD             (CMD_2D | (0x53 << 22))
 
+#define XY_FAST_COPY_BLT_CMD             (CMD_2D | (0x42 << 22))
+
 #define XY_TEXT_IMMEDIATE_BLIT_CMD	(CMD_2D | (0x31 << 22))
 # define XY_TEXT_BYTE_PACKED		(1 << 16)
 
@@ -111,10 +116,24 @@
 #define XY_SRC_TILED		(1 << 15)
 #define XY_DST_TILED		(1 << 11)
 
+/* BR00 */
+#define XY_FAST_SRC_TILED_64K        (3 << 20)
+#define XY_FAST_SRC_TILED_Y          (2 << 20)
+#define XY_FAST_SRC_TILED_X          (1 << 20)
+
+#define XY_FAST_DST_TILED_64K        (3 << 13)
+#define XY_FAST_DST_TILED_Y          (2 << 13)
+#define XY_FAST_DST_TILED_X          (1 << 13)
+
 /* BR13 */
 #define BR13_8			(0x0 << 24)
 #define BR13_565		(0x1 << 24)
 #define BR13_8888		(0x3 << 24)
+#define BR13_16161616		(0x4 << 24)
+#define BR13_32323232		(0x5 << 24)
+
+#define XY_FAST_SRC_TRMODE_YF        (1 << 31)
+#define XY_FAST_DST_TRMODE_YF        (1 << 30)
 
 /* Pipeline Statistics Counter Registers */
 #define IA_VERTICES_COUNT               0x2310
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_screen.c	2015-09-16 14:36:09.000000000 +0000
@@ -39,6 +39,7 @@
 #include "swrast/s_renderbuffer.h"
 #include "util/ralloc.h"
 #include "brw_shader.h"
+#include "glsl/nir/nir.h"
 
 #include "utils.h"
 #include "xmlpool.h"
@@ -121,7 +122,7 @@
 {
    struct gl_framebuffer *fb = ctx->DrawBuffer;
 
-   for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
+   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
       struct intel_renderbuffer *irb =
 	 intel_renderbuffer(fb->_ColorDrawBuffers[i]);
 
@@ -228,6 +229,12 @@
    { __DRI_IMAGE_FOURCC_RGB565, __DRI_IMAGE_COMPONENTS_RGB, 1,
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_RGB565, 2 } } },
 
+   { __DRI_IMAGE_FOURCC_R8, __DRI_IMAGE_COMPONENTS_R, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, } },
+
+   { __DRI_IMAGE_FOURCC_GR88, __DRI_IMAGE_COMPONENTS_RG, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 }, } },
+
    { __DRI_IMAGE_FOURCC_YUV410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
        { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 },
@@ -1122,6 +1129,50 @@
       return true;
 }
 
+static int
+intel_detect_timestamp(struct intel_screen *screen)
+{
+   uint64_t dummy = 0, last = 0;
+   int upper, lower, loops;
+
+   /* On 64bit systems, some old kernels trigger a hw bug resulting in the
+    * TIMESTAMP register being shifted and the low 32bits always zero.
+    *
+    * More recent kernels offer an interface to read the full 36bits
+    * everywhere.
+    */
+   if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP | 1, &dummy) == 0)
+      return 3;
+
+   /* Determine if we have a 32bit or 64bit kernel by inspecting the
+    * upper 32bits for a rapidly changing timestamp.
+    */
+   if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP, &last))
+      return 0;
+
+   upper = lower = 0;
+   for (loops = 0; loops < 10; loops++) {
+      /* The TIMESTAMP should change every 80ns, so several round trips
+       * through the kernel should be enough to advance it.
+       */
+      if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP, &dummy))
+         return 0;
+
+      upper += (dummy >> 32) != (last >> 32);
+      if (upper > 1) /* beware 32bit counter overflow */
+         return 2; /* upper dword holds the low 32bits of the timestamp */
+
+      lower += (dummy & 0xffffffff) != (last & 0xffffffff);
+      if (lower > 1)
+         return 1; /* timestamp is unshifted */
+
+      last = dummy;
+   }
+
+   /* No advancement? No timestamp! */
+   return 0;
+}
+
 /**
  * Return array of MSAA modes supported by the hardware. The array is
  * zero-terminated and sorted in decreasing order.
@@ -1168,7 +1219,7 @@
    __DRIconfig **configs = NULL;
 
    /* Generate singlesample configs without accumulation buffer. */
-   for (int i = 0; i < ARRAY_SIZE(formats); i++) {
+   for (unsigned i = 0; i < ARRAY_SIZE(formats); i++) {
       __DRIconfig **new_configs;
       int num_depth_stencil_bits = 2;
 
@@ -1205,7 +1256,7 @@
    /* Generate the minimum possible set of configs that include an
     * accumulation buffer.
     */
-   for (int i = 0; i < ARRAY_SIZE(formats); i++) {
+   for (unsigned i = 0; i < ARRAY_SIZE(formats); i++) {
       __DRIconfig **new_configs;
 
       if (formats[i] == MESA_FORMAT_B5G6R5_UNORM) {
@@ -1237,7 +1288,7 @@
     * supported.  Singlebuffer configs are not supported because no one wants
     * them.
     */
-   for (int i = 0; i < ARRAY_SIZE(formats); i++) {
+   for (unsigned i = 0; i < ARRAY_SIZE(formats); i++) {
       if (devinfo->gen < 6)
          break;
 
@@ -1308,11 +1359,6 @@
    }
 }
 
-/* drop when libdrm 2.4.61 is released */
-#ifndef I915_PARAM_REVISION
-#define I915_PARAM_REVISION 32
-#endif
-
 static int
 brw_get_revision(int fd)
 {
@@ -1331,6 +1377,11 @@
    return revision;
 }
 
+/* Drop when RS headers get pulled to libdrm */
+#ifndef I915_PARAM_HAS_RESOURCE_STREAMER
+#define I915_PARAM_HAS_RESOURCE_STREAMER 36
+#endif
+
 /**
  * This is the driver specific part of the createNewScreen entry point.
  * Called when using DRI2.
@@ -1372,9 +1423,12 @@
    if (!intelScreen->devinfo)
       return false;
 
+   brw_process_intel_debug_variable(intelScreen);
+
    intelScreen->hw_must_use_separate_stencil = intelScreen->devinfo->gen >= 7;
 
    intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
+   intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen);
 
    const char *force_msaa = getenv("INTEL_FORCE_MSAA");
    if (force_msaa) {
@@ -1420,6 +1474,15 @@
    intelScreen->compiler = brw_compiler_create(intelScreen,
                                                intelScreen->devinfo);
 
+   if (intelScreen->devinfo->has_resource_streamer) {
+      int val = -1;
+      getparam.param = I915_PARAM_HAS_RESOURCE_STREAMER;
+      getparam.value = &val;
+
+      drmIoctl(psp->fd, DRM_IOCTL_I915_GETPARAM, &getparam);
+      intelScreen->has_resource_streamer = val > 0;
+   }
+
    return (const __DRIconfig**) intel_screen_make_configs(psp);
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_screen.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_screen.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_screen.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_screen.h	2015-09-16 14:36:09.000000000 +0000
@@ -52,6 +52,13 @@
 
    bool hw_has_swizzling;
 
+   int hw_has_timestamp;
+
+   /**
+    * Does the kernel support resource streamer?
+    */
+   bool has_resource_streamer;
+
    /**
     * Does the kernel support context reset notifications?
     */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_syncobj.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_syncobj.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_syncobj.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_syncobj.c	2015-09-16 14:36:09.000000000 +0000
@@ -69,7 +69,7 @@
    assert(!fence->batch_bo);
    assert(!fence->signalled);
 
-   intel_batchbuffer_emit_mi_flush(brw);
+   brw_emit_mi_flush(brw);
    fence->batch_bo = brw->batch.bo;
    drm_intel_bo_reference(fence->batch_bo);
    intel_batchbuffer_flush(brw);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_tex.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_tex.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_tex.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_tex.c	2015-09-16 14:36:09.000000000 +0000
@@ -93,7 +93,7 @@
    } else {
       intel_image->mt = intel_miptree_create_for_teximage(brw, intel_texobj,
                                                           intel_image,
-                                                          false);
+                                                          0);
 
       /* Even if the object currently has a mipmap tree associated
        * with it, this one is a more likely candidate to represent the
@@ -144,10 +144,8 @@
                                               first_image->TexFormat,
                                               0, levels - 1,
                                               width, height, depth,
-                                              false, /* expect_accelerated */
                                               num_samples,
-                                              INTEL_MIPTREE_TILING_ANY,
-                                              false);
+                                              MIPTREE_LAYOUT_TILING_ANY);
 
       if (intel_texobj->mt == NULL) {
          return false;
@@ -341,7 +339,7 @@
                                   buffer_offset,
                                   image->Width, image->Height, image->Depth,
                                   row_stride,
-                                  false /*disable_aux_buffers*/);
+                                  0);
    if (!intel_texobj->mt)
       return false;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_tex_copy.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_tex_copy.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_tex_copy.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_tex_copy.c	2015-09-16 14:36:09.000000000 +0000
@@ -55,6 +55,10 @@
    const GLenum internalFormat = intelImage->base.Base.InternalFormat;
    bool ret;
 
+   /* No pixel transfer operations (zoom, bias, mapping), just a blit */
+   if (brw->ctx._ImageTransferState)
+      return false;
+
    intel_prepare_render(brw);
 
    /* glCopyTexSubImage() can be called on a multisampled renderbuffer (if
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_tex.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_tex.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_tex.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_tex.h	2015-09-16 14:36:09.000000000 +0000
@@ -53,7 +53,7 @@
 intel_miptree_create_for_teximage(struct brw_context *brw,
 				  struct intel_texture_object *intelObj,
 				  struct intel_texture_image *intelImage,
-				  bool expect_accelerated_upload);
+                                  uint32_t layout_flags);
 
 GLuint intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_tex_image.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_tex_image.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_tex_image.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_tex_image.c	2015-09-16 14:36:09.000000000 +0000
@@ -36,7 +36,7 @@
 intel_miptree_create_for_teximage(struct brw_context *brw,
 				  struct intel_texture_object *intelObj,
 				  struct intel_texture_image *intelImage,
-				  bool expect_accelerated_upload)
+                                  uint32_t layout_flags)
 {
    GLuint lastLevel;
    int width, height, depth;
@@ -79,10 +79,8 @@
 			       width,
 			       height,
 			       depth,
-			       expect_accelerated_upload,
                                intelImage->base.Base.NumSamples,
-                               INTEL_MIPTREE_TILING_ANY,
-                               false);
+                               layout_flags | MIPTREE_LAYOUT_TILING_ANY);
 }
 
 static void
@@ -99,8 +97,8 @@
 
    DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
        __func__, _mesa_get_format_name(texImage->TexFormat),
-       _mesa_lookup_enum_by_nr(texImage->TexObject->Target),
-       _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type),
+       _mesa_enum_to_string(texImage->TexObject->Target),
+       _mesa_enum_to_string(format), _mesa_enum_to_string(type),
        texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
 
    /* Allocate storage for texture data. */
@@ -155,7 +153,7 @@
                            GLuint width, GLuint height,
                            GLuint pitch,
                            GLuint tile_x, GLuint tile_y,
-                           bool disable_aux_buffers)
+                           uint32_t layout_flags)
 {
    struct brw_context *brw = brw_context(ctx);
    struct intel_texture_image *intel_image = intel_texture_image(image);
@@ -171,7 +169,7 @@
 
    intel_image->mt = intel_miptree_create_for_bo(brw, bo, image->TexFormat,
                                                  0, width, height, 1, pitch,
-                                                 disable_aux_buffers);
+                                                 layout_flags);
    if (intel_image->mt == NULL)
        return;
    intel_image->mt->target = target;
@@ -255,8 +253,7 @@
                               rb->Base.Base.Width,
                               rb->Base.Base.Height,
                               rb->mt->pitch,
-                              0, 0,
-                              false /*disable_aux_buffers*/);
+                              0, 0, 0);
    _mesa_unlock_texture(&brw->ctx, texObj);
 }
 
@@ -349,7 +346,7 @@
                               image->width,  image->height,
                               image->pitch,
                               image->tile_x, image->tile_y,
-                              true /*disable_aux_buffers*/);
+                              MIPTREE_LAYOUT_DISABLE_AUX);
 }
 
 /**
@@ -474,39 +471,44 @@
 }
 
 static void
-intel_get_tex_image(struct gl_context *ctx,
-                    GLenum format, GLenum type, GLvoid *pixels,
-                    struct gl_texture_image *texImage) {
+intel_get_tex_sub_image(struct gl_context *ctx,
+                        GLint xoffset, GLint yoffset, GLint zoffset,
+                        GLsizei width, GLsizei height, GLint depth,
+                        GLenum format, GLenum type, GLvoid *pixels,
+                        struct gl_texture_image *texImage)
+{
    struct brw_context *brw = brw_context(ctx);
    bool ok;
 
    DBG("%s\n", __func__);
 
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-      if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, 0, 0, 0,
-                                        texImage->Width, texImage->Height,
-                                        texImage->Depth, format, type,
+      if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage,
+                                        xoffset, yoffset, zoffset,
+                                        width, height, depth, format, type,
                                         pixels, &ctx->Pack)) {
          /* Flush to guarantee coherency between the render cache and other
           * caches the PBO could potentially be bound to after this point.
           * See the related comment in intelReadPixels() for a more detailed
           * explanation.
           */
-         intel_batchbuffer_emit_mi_flush(brw);
+         brw_emit_mi_flush(brw);
          return;
       }
 
       perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
    }
 
-   ok = intel_gettexsubimage_tiled_memcpy(ctx, texImage, 0, 0,
-                                          texImage->Width, texImage->Height,
+   ok = intel_gettexsubimage_tiled_memcpy(ctx, texImage, xoffset, yoffset,
+                                          width, height,
                                           format, type, pixels, &ctx->Pack);
 
    if(ok)
       return;
 
-   _mesa_meta_GetTexImage(ctx, format, type, pixels, texImage);
+   _mesa_meta_GetTexSubImage(ctx, xoffset, yoffset, zoffset,
+                             width, height, depth,
+                             format, type, pixels, texImage);
 
    DBG("%s - DONE\n", __func__);
 }
@@ -517,5 +519,5 @@
    functions->TexImage = intelTexImage;
    functions->EGLImageTargetTexture2D = intel_image_target_texture_2d;
    functions->BindRenderbufferTexImage = intel_bind_renderbuffer_tex_image;
-   functions->GetTexImage = intel_get_tex_image;
+   functions->GetTexSubImage = intel_get_tex_sub_image;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_tex_subimage.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_tex_subimage.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_tex_subimage.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_tex_subimage.c	2015-09-16 14:36:09.000000000 +0000
@@ -118,6 +118,10 @@
        packing->Invert)
       return false;
 
+   /* Only a simple blit, no scale, bias or other mapping. */
+   if (ctx->_ImageTransferState)
+      return false;
+
    if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp,
                          INTEL_UPLOAD))
       return false;
@@ -206,8 +210,8 @@
 
    DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
        __func__, _mesa_get_format_name(texImage->TexFormat),
-       _mesa_lookup_enum_by_nr(texImage->TexObject->Target),
-       _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type),
+       _mesa_enum_to_string(texImage->TexObject->Target),
+       _mesa_enum_to_string(format), _mesa_enum_to_string(type),
        texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
 
    ok = _mesa_meta_pbo_TexSubImage(ctx, dims, texImage,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_tex_validate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_tex_validate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/intel_tex_validate.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/intel_tex_validate.c	2015-09-16 14:36:09.000000000 +0000
@@ -136,6 +136,8 @@
                  _mesa_get_format_name(firstImage->base.Base.TexFormat),
                  width, height, depth, validate_last_level + 1);
 
+      const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
+                                    MIPTREE_LAYOUT_TILING_ANY;
       intelObj->mt = intel_miptree_create(brw,
                                           intelObj->base.Target,
 					  firstImage->base.Base.TexFormat,
@@ -144,10 +146,8 @@
                                           width,
                                           height,
                                           depth,
-					  true,
                                           0 /* num_samples */,
-                                          INTEL_MIPTREE_TILING_ANY,
-                                          false);
+                                          layout_flags);
       if (!intelObj->mt)
          return false;
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/Makefile.sources	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/Makefile.sources	2015-09-16 14:36:09.000000000 +0000
@@ -42,6 +42,7 @@
 	brw_ff_gs.c \
 	brw_ff_gs_emit.c \
 	brw_ff_gs.h \
+	brw_fs_builder.h \
 	brw_fs_channel_expressions.cpp \
 	brw_fs_cmod_propagation.cpp \
 	brw_fs_combine_constants.cpp \
@@ -49,7 +50,6 @@
 	brw_fs.cpp \
 	brw_fs_cse.cpp \
 	brw_fs_dead_code_eliminate.cpp \
-	brw_fs_fp.cpp \
 	brw_fs_generator.cpp \
 	brw_fs.h \
 	brw_fs_live_variables.cpp \
@@ -60,6 +60,8 @@
 	brw_fs_register_coalesce.cpp \
 	brw_fs_saturate_propagation.cpp \
 	brw_fs_sel_peephole.cpp \
+	brw_fs_surface_builder.cpp \
+	brw_fs_surface_builder.h \
 	brw_fs_vector_splitting.cpp \
 	brw_fs_visitor.cpp \
 	brw_gs.c \
@@ -86,6 +88,7 @@
 	brw_object_purgeable.c \
 	brw_packed_float.c \
 	brw_performance_monitor.c \
+	brw_pipe_control.c \
 	brw_primitive_restart.c \
 	brw_program.c \
 	brw_program.h \
@@ -122,6 +125,8 @@
 	brw_vec4.h \
 	brw_vec4_live_variables.cpp \
 	brw_vec4_live_variables.h \
+	brw_vec4_nir.cpp \
+	brw_vec4_gs_nir.cpp \
 	brw_vec4_reg_allocate.cpp \
 	brw_vec4_visitor.cpp \
 	brw_vec4_vp.cpp \
@@ -130,6 +135,7 @@
 	brw_vs.h \
 	brw_vs_state.c \
 	brw_vs_surface_state.c \
+	brw_vue_map.c \
 	brw_wm.c \
 	brw_wm.h \
 	brw_wm_iz.cpp \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -26,11 +26,13 @@
 #include "brw_cfg.h"
 #include "program/program.h"
 
+using namespace brw;
+
 class cmod_propagation_test : public ::testing::Test {
    virtual void SetUp();
 
 public:
-   struct brw_context *brw;
+   struct brw_compiler *compiler;
    struct brw_device_info *devinfo;
    struct gl_context *ctx;
    struct brw_wm_prog_data *prog_data;
@@ -42,31 +44,31 @@
 class cmod_propagation_fs_visitor : public fs_visitor
 {
 public:
-   cmod_propagation_fs_visitor(struct brw_context *brw,
+   cmod_propagation_fs_visitor(struct brw_compiler *compiler,
                                struct brw_wm_prog_data *prog_data,
                                struct gl_shader_program *shader_prog)
-      : fs_visitor(brw, NULL, MESA_SHADER_FRAGMENT, NULL, &prog_data->base,
-                   shader_prog, (struct gl_program *) NULL, 8) {}
+      : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL,
+                   &prog_data->base, shader_prog,
+                   (struct gl_program *) NULL, 8, -1) {}
 };
 
 
 void cmod_propagation_test::SetUp()
 {
-   brw = (struct brw_context *)calloc(1, sizeof(*brw));
-   devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw));
-   brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen));
-   brw->intelScreen->devinfo = devinfo;
-   ctx = &brw->ctx;
+   ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+   compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+   devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+   compiler->devinfo = devinfo;
 
    fp = ralloc(NULL, struct brw_fragment_program);
    prog_data = ralloc(NULL, struct brw_wm_prog_data);
    shader_prog = ralloc(NULL, struct gl_shader_program);
 
-   v = new cmod_propagation_fs_visitor(brw, prog_data, shader_prog);
+   v = new cmod_propagation_fs_visitor(compiler, prog_data, shader_prog);
 
    _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
 
-   brw->gen = devinfo->gen = 4;
+   devinfo->gen = 4;
 }
 
 static fs_inst *
@@ -101,13 +103,13 @@
 
 TEST_F(cmod_propagation_test, basic)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
    fs_reg zero(0.0f);
-   v->emit(BRW_OPCODE_ADD, dest, src0, src1);
-   v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, zero)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
+   bld.ADD(dest, src0, src1);
+   bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
 
    /* = Before =
     *
@@ -133,13 +135,13 @@
 
 TEST_F(cmod_propagation_test, cmp_nonzero)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
    fs_reg nonzero(1.0f);
-   v->emit(BRW_OPCODE_ADD, dest, src0, src1);
-   v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, nonzero)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
+   bld.ADD(dest, src0, src1);
+   bld.CMP(bld.null_reg_f(), dest, nonzero, BRW_CONDITIONAL_GE);
 
    /* = Before =
     *
@@ -166,12 +168,12 @@
 
 TEST_F(cmod_propagation_test, non_cmod_instruction)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::uint_type);
    fs_reg src0 = v->vgrf(glsl_type::uint_type);
    fs_reg zero(0u);
-   v->emit(BRW_OPCODE_FBL, dest, src0);
-   v->emit(BRW_OPCODE_CMP, v->reg_null_ud, dest, zero)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
+   bld.FBL(dest, src0);
+   bld.CMP(bld.null_reg_ud(), dest, zero, BRW_CONDITIONAL_GE);
 
    /* = Before =
     *
@@ -198,16 +200,15 @@
 
 TEST_F(cmod_propagation_test, intervening_flag_write)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
    fs_reg src2 = v->vgrf(glsl_type::float_type);
    fs_reg zero(0.0f);
-   v->emit(BRW_OPCODE_ADD, dest, src0, src1);
-   v->emit(BRW_OPCODE_CMP, v->reg_null_f, src2, zero)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
-   v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, zero)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
+   bld.ADD(dest, src0, src1);
+   bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE);
+   bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
 
    /* = Before =
     *
@@ -237,17 +238,16 @@
 
 TEST_F(cmod_propagation_test, intervening_flag_read)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dest0 = v->vgrf(glsl_type::float_type);
    fs_reg dest1 = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
    fs_reg src2 = v->vgrf(glsl_type::float_type);
    fs_reg zero(0.0f);
-   v->emit(BRW_OPCODE_ADD, dest0, src0, src1);
-   v->emit(BRW_OPCODE_SEL, dest1, src2, zero)
-      ->predicate = BRW_PREDICATE_NORMAL;
-   v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest0, zero)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
+   bld.ADD(dest0, src0, src1);
+   set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
+   bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE);
 
    /* = Before =
     *
@@ -277,16 +277,16 @@
 
 TEST_F(cmod_propagation_test, intervening_dest_write)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::vec4_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
    fs_reg src2 = v->vgrf(glsl_type::vec2_type);
    fs_reg zero(0.0f);
-   v->emit(BRW_OPCODE_ADD, offset(dest, 2), src0, src1);
-   v->emit(SHADER_OPCODE_TEX, dest, src2)
+   bld.ADD(offset(dest, bld, 2), src0, src1);
+   bld.emit(SHADER_OPCODE_TEX, dest, src2)
       ->regs_written = 4;
-   v->emit(BRW_OPCODE_CMP, v->reg_null_f, offset(dest, 2), zero)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
+   bld.CMP(bld.null_reg_f(), offset(dest, bld, 2), zero, BRW_CONDITIONAL_GE);
 
    /* = Before =
     *
@@ -317,18 +317,16 @@
 
 TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dest0 = v->vgrf(glsl_type::float_type);
    fs_reg dest1 = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
    fs_reg src2 = v->vgrf(glsl_type::float_type);
    fs_reg zero(0.0f);
-   v->emit(BRW_OPCODE_ADD, dest0, src0, src1)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
-   v->emit(BRW_OPCODE_SEL, dest1, src2, zero)
-      ->predicate = BRW_PREDICATE_NORMAL;
-   v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest0, zero)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
+   set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1));
+   set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
+   bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE);
 
    /* = Before =
     *
@@ -358,14 +356,14 @@
 
 TEST_F(cmod_propagation_test, negate)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
    fs_reg zero(0.0f);
-   v->emit(BRW_OPCODE_ADD, dest, src0, src1);
+   bld.ADD(dest, src0, src1);
    dest.negate = true;
-   v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest, zero)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
+   bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
 
    /* = Before =
     *
@@ -391,13 +389,13 @@
 
 TEST_F(cmod_propagation_test, movnz)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
-   v->emit(BRW_OPCODE_CMP, dest, src0, src1)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
-   v->emit(BRW_OPCODE_MOV, v->reg_null_f, dest)
-      ->conditional_mod = BRW_CONDITIONAL_NZ;
+   bld.CMP(dest, src0, src1, BRW_CONDITIONAL_GE);
+   set_condmod(BRW_CONDITIONAL_NZ,
+               bld.MOV(bld.null_reg_f(), dest));
 
    /* = Before =
     *
@@ -423,14 +421,14 @@
 
 TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::int_type);
    fs_reg src0 = v->vgrf(glsl_type::int_type);
    fs_reg src1 = v->vgrf(glsl_type::int_type);
    fs_reg zero(0.0f);
-   v->emit(BRW_OPCODE_ADD, dest, src0, src1);
-   v->emit(BRW_OPCODE_CMP, v->reg_null_f, retype(dest, BRW_REGISTER_TYPE_F),
-                                          zero)
-      ->conditional_mod = BRW_CONDITIONAL_GE;
+   bld.ADD(dest, src0, src1);
+   bld.CMP(bld.null_reg_f(), retype(dest, BRW_REGISTER_TYPE_F), zero,
+           BRW_CONDITIONAL_GE);
 
    /* = Before =
     *
@@ -457,15 +455,15 @@
 
 TEST_F(cmod_propagation_test, andnz_one)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::int_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg zero(0.0f);
    fs_reg one(1);
 
-   v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero)
-      ->conditional_mod = BRW_CONDITIONAL_L;
-   v->emit(BRW_OPCODE_AND, v->reg_null_d, dest, one)
-      ->conditional_mod = BRW_CONDITIONAL_NZ;
+   bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
+   set_condmod(BRW_CONDITIONAL_NZ,
+               bld.AND(bld.null_reg_d(), dest, one));
 
    /* = Before =
     * 0: cmp.l.f0(8)     dest:F  src0:F  0F
@@ -492,15 +490,15 @@
 
 TEST_F(cmod_propagation_test, andnz_non_one)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::int_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg zero(0.0f);
    fs_reg nonone(38);
 
-   v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero)
-      ->conditional_mod = BRW_CONDITIONAL_L;
-   v->emit(BRW_OPCODE_AND, v->reg_null_d, dest, nonone)
-      ->conditional_mod = BRW_CONDITIONAL_NZ;
+   bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
+   set_condmod(BRW_CONDITIONAL_NZ,
+               bld.AND(bld.null_reg_d(), dest, nonone));
 
    /* = Before =
     * 0: cmp.l.f0(8)     dest:F  src0:F  0F
@@ -527,15 +525,15 @@
 
 TEST_F(cmod_propagation_test, andz_one)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dest = v->vgrf(glsl_type::int_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg zero(0.0f);
    fs_reg one(1);
 
-   v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero)
-      ->conditional_mod = BRW_CONDITIONAL_L;
-   v->emit(BRW_OPCODE_AND, v->reg_null_d, dest, one)
-      ->conditional_mod = BRW_CONDITIONAL_Z;
+   bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
+   set_condmod(BRW_CONDITIONAL_Z,
+               bld.AND(bld.null_reg_d(), dest, one));
 
    /* = Before =
     * 0: cmp.l.f0(8)     dest:F  src0:F  0F
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -26,11 +26,13 @@
 #include "brw_cfg.h"
 #include "program/program.h"
 
+using namespace brw;
+
 class saturate_propagation_test : public ::testing::Test {
    virtual void SetUp();
 
 public:
-   struct brw_context *brw;
+   struct brw_compiler *compiler;
    struct brw_device_info *devinfo;
    struct gl_context *ctx;
    struct brw_wm_prog_data *prog_data;
@@ -42,31 +44,31 @@
 class saturate_propagation_fs_visitor : public fs_visitor
 {
 public:
-   saturate_propagation_fs_visitor(struct brw_context *brw,
+   saturate_propagation_fs_visitor(struct brw_compiler *compiler,
                                    struct brw_wm_prog_data *prog_data,
                                    struct gl_shader_program *shader_prog)
-      : fs_visitor(brw, NULL, MESA_SHADER_FRAGMENT, NULL, &prog_data->base,
-                   shader_prog, (struct gl_program *) NULL, 8) {}
+      : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL,
+                   &prog_data->base, shader_prog,
+                   (struct gl_program *) NULL, 8, -1) {}
 };
 
 
 void saturate_propagation_test::SetUp()
 {
-   brw = (struct brw_context *)calloc(1, sizeof(*brw));
-   devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw));
-   brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen));
-   brw->intelScreen->devinfo = devinfo;
-   ctx = &brw->ctx;
+   ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+   compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+   devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+   compiler->devinfo = devinfo;
 
    fp = ralloc(NULL, struct brw_fragment_program);
    prog_data = ralloc(NULL, struct brw_wm_prog_data);
    shader_prog = ralloc(NULL, struct gl_shader_program);
 
-   v = new saturate_propagation_fs_visitor(brw, prog_data, shader_prog);
+   v = new saturate_propagation_fs_visitor(compiler, prog_data, shader_prog);
 
    _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
 
-   brw->gen = devinfo->gen = 4;
+   devinfo->gen = 4;
 }
 
 static fs_inst *
@@ -101,13 +103,13 @@
 
 TEST_F(saturate_propagation_test, basic)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dst0 = v->vgrf(glsl_type::float_type);
    fs_reg dst1 = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
-   v->emit(BRW_OPCODE_ADD, dst0, src0, src1);
-   v->emit(BRW_OPCODE_MOV, dst1, dst0)
-      ->saturate = true;
+   bld.ADD(dst0, src0, src1);
+   set_saturate(true, bld.MOV(dst1, dst0));
 
    /* = Before =
     *
@@ -136,15 +138,15 @@
 
 TEST_F(saturate_propagation_test, other_non_saturated_use)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dst0 = v->vgrf(glsl_type::float_type);
    fs_reg dst1 = v->vgrf(glsl_type::float_type);
    fs_reg dst2 = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
-   v->emit(BRW_OPCODE_ADD, dst0, src0, src1);
-   v->emit(BRW_OPCODE_MOV, dst1, dst0)
-      ->saturate = true;
-   v->emit(BRW_OPCODE_ADD, dst2, dst0, src0);
+   bld.ADD(dst0, src0, src1);
+   set_saturate(true, bld.MOV(dst1, dst0));
+   bld.ADD(dst2, dst0, src0);
 
    /* = Before =
     *
@@ -174,14 +176,14 @@
 
 TEST_F(saturate_propagation_test, predicated_instruction)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dst0 = v->vgrf(glsl_type::float_type);
    fs_reg dst1 = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
-   v->emit(BRW_OPCODE_ADD, dst0, src0, src1)
+   bld.ADD(dst0, src0, src1)
       ->predicate = BRW_PREDICATE_NORMAL;
-   v->emit(BRW_OPCODE_MOV, dst1, dst0)
-      ->saturate = true;
+   set_saturate(true, bld.MOV(dst1, dst0));
 
    /* = Before =
     *
@@ -209,14 +211,14 @@
 
 TEST_F(saturate_propagation_test, neg_mov_sat)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dst0 = v->vgrf(glsl_type::float_type);
    fs_reg dst1 = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
-   v->emit(BRW_OPCODE_ADD, dst0, src0, src1);
+   bld.ADD(dst0, src0, src1);
    dst0.negate = true;
-   v->emit(BRW_OPCODE_MOV, dst1, dst0)
-      ->saturate = true;
+   set_saturate(true, bld.MOV(dst1, dst0));
 
    /* = Before =
     *
@@ -244,14 +246,14 @@
 
 TEST_F(saturate_propagation_test, abs_mov_sat)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dst0 = v->vgrf(glsl_type::float_type);
    fs_reg dst1 = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
-   v->emit(BRW_OPCODE_ADD, dst0, src0, src1);
+   bld.ADD(dst0, src0, src1);
    dst0.abs = true;
-   v->emit(BRW_OPCODE_MOV, dst1, dst0)
-      ->saturate = true;
+   set_saturate(true, bld.MOV(dst1, dst0));
 
    /* = Before =
     *
@@ -279,16 +281,15 @@
 
 TEST_F(saturate_propagation_test, producer_saturates)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dst0 = v->vgrf(glsl_type::float_type);
    fs_reg dst1 = v->vgrf(glsl_type::float_type);
    fs_reg dst2 = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
-   v->emit(BRW_OPCODE_ADD, dst0, src0, src1)
-      ->saturate = true;
-   v->emit(BRW_OPCODE_MOV, dst1, dst0)
-      ->saturate = true;
-   v->emit(BRW_OPCODE_MOV, dst2, dst0);
+   set_saturate(true, bld.ADD(dst0, src0, src1));
+   set_saturate(true, bld.MOV(dst1, dst0));
+   bld.MOV(dst2, dst0);
 
    /* = Before =
     *
@@ -319,16 +320,15 @@
 
 TEST_F(saturate_propagation_test, intervening_saturating_copy)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dst0 = v->vgrf(glsl_type::float_type);
    fs_reg dst1 = v->vgrf(glsl_type::float_type);
    fs_reg dst2 = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
-   v->emit(BRW_OPCODE_ADD, dst0, src0, src1);
-   v->emit(BRW_OPCODE_MOV, dst1, dst0)
-      ->saturate = true;
-   v->emit(BRW_OPCODE_MOV, dst2, dst0)
-      ->saturate = true;
+   bld.ADD(dst0, src0, src1);
+   set_saturate(true, bld.MOV(dst1, dst0));
+   set_saturate(true, bld.MOV(dst2, dst0));
 
    /* = Before =
     *
@@ -361,16 +361,16 @@
 
 TEST_F(saturate_propagation_test, intervening_dest_write)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dst0 = v->vgrf(glsl_type::vec4_type);
    fs_reg dst1 = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
    fs_reg src2 = v->vgrf(glsl_type::vec2_type);
-   v->emit(BRW_OPCODE_ADD, offset(dst0, 2), src0, src1);
-   v->emit(SHADER_OPCODE_TEX, dst0, src2)
+   bld.ADD(offset(dst0, bld, 2), src0, src1);
+   bld.emit(SHADER_OPCODE_TEX, dst0, src2)
       ->regs_written = 4;
-   v->emit(BRW_OPCODE_MOV, dst1, offset(dst0, 2))
-      ->saturate = true;
+   set_saturate(true, bld.MOV(dst1, offset(dst0, bld, 2)));
 
    /* = Before =
     *
@@ -401,18 +401,17 @@
 
 TEST_F(saturate_propagation_test, mul_neg_mov_sat_mov_sat)
 {
+   const fs_builder &bld = v->bld;
    fs_reg dst0 = v->vgrf(glsl_type::float_type);
    fs_reg dst1 = v->vgrf(glsl_type::float_type);
    fs_reg dst2 = v->vgrf(glsl_type::float_type);
    fs_reg src0 = v->vgrf(glsl_type::float_type);
    fs_reg src1 = v->vgrf(glsl_type::float_type);
-   v->emit(BRW_OPCODE_MUL, dst0, src0, src1);
+   bld.MUL(dst0, src0, src1);
    dst0.negate = true;
-   v->emit(BRW_OPCODE_MOV, dst1, dst0)
-      ->saturate = true;
+   set_saturate(true, bld.MOV(dst1, dst0));
    dst0.negate = false;
-   v->emit(BRW_OPCODE_MOV, dst2, dst0)
-      ->saturate = true;
+   set_saturate(true, bld.MOV(dst2, dst0));
 
    /* = Before =
     *
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -33,7 +33,7 @@
    virtual void SetUp();
 
 public:
-   struct brw_context *brw;
+   struct brw_compiler *compiler;
    struct brw_device_info *devinfo;
    struct gl_context *ctx;
    struct gl_shader_program *shader_prog;
@@ -44,17 +44,17 @@
 class copy_propagation_vec4_visitor : public vec4_visitor
 {
 public:
-   copy_propagation_vec4_visitor(struct brw_context *brw,
+   copy_propagation_vec4_visitor(struct brw_compiler *compiler,
                                   struct gl_shader_program *shader_prog)
-      : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog,
+      : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog,
                      MESA_SHADER_VERTEX, NULL,
-                     false /* no_spills */,
-                     ST_NONE, ST_NONE, ST_NONE)
+                     false /* no_spills */, -1)
    {
    }
 
 protected:
-   virtual dst_reg *make_reg_for_system_value(ir_variable *ir)
+   virtual dst_reg *make_reg_for_system_value(int location,
+                                              const glsl_type *type)
    {
       unreachable("Not reached");
    }
@@ -93,21 +93,20 @@
 
 void copy_propagation_test::SetUp()
 {
-   brw = (struct brw_context *)calloc(1, sizeof(*brw));
-   devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw));
-   brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen));
-   brw->intelScreen->devinfo = devinfo;
-   ctx = &brw->ctx;
+   ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+   compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+   devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+   compiler->devinfo = devinfo;
 
    vp = ralloc(NULL, struct brw_vertex_program);
 
    shader_prog = ralloc(NULL, struct gl_shader_program);
 
-   v = new copy_propagation_vec4_visitor(brw, shader_prog);
+   v = new copy_propagation_vec4_visitor(compiler, shader_prog);
 
    _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
 
-   brw->gen = devinfo->gen = 4;
+   devinfo->gen = 4;
 }
 
 static void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp	2015-09-16 14:36:09.000000000 +0000
@@ -35,7 +35,7 @@
    virtual void SetUp();
 
 public:
-   struct brw_context *brw;
+   struct brw_compiler *compiler;
    struct brw_device_info *devinfo;
    struct gl_context *ctx;
    struct gl_shader_program *shader_prog;
@@ -47,17 +47,17 @@
 class register_coalesce_vec4_visitor : public vec4_visitor
 {
 public:
-   register_coalesce_vec4_visitor(struct brw_context *brw,
+   register_coalesce_vec4_visitor(struct brw_compiler *compiler,
                                   struct gl_shader_program *shader_prog)
-      : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog,
+      : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog,
                      MESA_SHADER_VERTEX, NULL,
-                     false /* no_spills */,
-                     ST_NONE, ST_NONE, ST_NONE)
+                     false /* no_spills */, -1)
    {
    }
 
 protected:
-   virtual dst_reg *make_reg_for_system_value(ir_variable *ir)
+   virtual dst_reg *make_reg_for_system_value(int location,
+                                              const glsl_type *type)
    {
       unreachable("Not reached");
    }
@@ -96,21 +96,20 @@
 
 void register_coalesce_test::SetUp()
 {
-   brw = (struct brw_context *)calloc(1, sizeof(*brw));
-   devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw));
-   brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen));
-   brw->intelScreen->devinfo = devinfo;
-   ctx = &brw->ctx;
+   ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+   compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+   devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+   compiler->devinfo = devinfo;
 
    vp = ralloc(NULL, struct brw_vertex_program);
 
    shader_prog = ralloc(NULL, struct gl_shader_program);
 
-   v = new register_coalesce_vec4_visitor(brw, shader_prog);
+   v = new register_coalesce_vec4_visitor(compiler, shader_prog);
 
    _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
 
-   brw->gen = devinfo->gen = 4;
+   devinfo->gen = 4;
 }
 
 static void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/nouveau/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/nouveau/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/nouveau/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/nouveau/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -38,8 +38,8 @@
 	-I$(top_srcdir)/src/mesa/drivers/dri/common \
 	$(DEFINES) \
 	$(VISIBILITY_CFLAGS) \
-	$(NOUVEAU_CFLAGS)
+	$(NVVIEUX_CFLAGS)
 
 noinst_LTLIBRARIES = libnouveau_dri.la
 libnouveau_dri_la_SOURCES = $(NOUVEAU_C_FILES)
-libnouveau_dri_la_LIBADD = $(NOUVEAU_LIBS)
+libnouveau_dri_la_LIBADD = $(NVVIEUX_LIBS)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c	2015-09-16 14:36:09.000000000 +0000
@@ -223,6 +223,7 @@
 		      GLboolean index_bounds_valid,
 		      GLuint min_index, GLuint max_index,
 		      struct gl_transform_feedback_object *tfb_vertcount,
+                      unsigned stream,
 		      struct gl_buffer_object *indirect);
 
 static GLboolean
@@ -455,6 +456,7 @@
 		      GLboolean index_bounds_valid,
 		      GLuint min_index, GLuint max_index,
 		      struct gl_transform_feedback_object *tfb_vertcount,
+                      unsigned stream,
 		      struct gl_buffer_object *indirect)
 {
 	struct nouveau_render_state *render = to_render_state(ctx);
@@ -492,6 +494,7 @@
 			    GLboolean index_bounds_valid,
 			    GLuint min_index, GLuint max_index,
 			    struct gl_transform_feedback_object *tfb_vertcount,
+                            unsigned stream,
 			    struct gl_buffer_object *indirect)
 {
 	struct nouveau_context *nctx = to_nouveau_context(ctx);
@@ -501,12 +504,12 @@
 	if (nctx->fallback == HWTNL)
 		TAG(vbo_render_prims)(ctx, prims, nr_prims, ib,
 				      index_bounds_valid, min_index, max_index,
-				      tfb_vertcount, indirect);
+				      tfb_vertcount, stream, indirect);
 
 	if (nctx->fallback == SWTNL)
 		_tnl_draw_prims(ctx, prims, nr_prims, ib,
 				index_bounds_valid, min_index, max_index,
-				tfb_vertcount, indirect);
+				tfb_vertcount, stream, indirect);
 }
 
 void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c	2015-09-16 14:36:09.000000000 +0000
@@ -31,6 +31,8 @@
 #include "nv10_3d.xml.h"
 #include "nv10_driver.h"
 
+#include "util/simple_list.h"
+
 void
 nv10_emit_clip_plane(struct gl_context *ctx, int emit)
 {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c	2015-09-16 14:36:09.000000000 +0000
@@ -32,6 +32,8 @@
 #include "nv10_driver.h"
 #include "nv20_driver.h"
 
+#include "util/simple_list.h"
+
 #define LIGHT_MODEL_AMBIENT_R(side)			\
 	((side) ? NV20_3D_LIGHT_MODEL_BACK_AMBIENT_R :	\
 	 NV20_3D_LIGHT_MODEL_FRONT_AMBIENT_R)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_blit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_blit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_blit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_blit.c	2015-09-16 14:36:09.000000000 +0000
@@ -28,6 +28,7 @@
 #include "radeon_common.h"
 #include "r200_context.h"
 #include "r200_blit.h"
+#include "r200_tex.h"
 
 static inline uint32_t cmdpacket0(struct radeon_screen *rscrn,
                                   int reg, int count)
@@ -40,22 +41,42 @@
 /* common formats supported as both textures and render targets */
 unsigned r200_check_blit(mesa_format mesa_format, uint32_t dst_pitch)
 {
-    /* XXX others?  BE/LE? */
-    switch (mesa_format) {
-    case MESA_FORMAT_B8G8R8A8_UNORM:
-    case MESA_FORMAT_B8G8R8X8_UNORM:
-    case MESA_FORMAT_B5G6R5_UNORM:
-    case MESA_FORMAT_B4G4R4A4_UNORM:
-    case MESA_FORMAT_B5G5R5A1_UNORM:
-    case MESA_FORMAT_A_UNORM8:
-    case MESA_FORMAT_L_UNORM8:
-    case MESA_FORMAT_I_UNORM8:
-    /* swizzled */
-    case MESA_FORMAT_A8B8G8R8_UNORM:
-    case MESA_FORMAT_R8G8B8A8_UNORM:
+    /* XXX others? */
+    if (_mesa_little_endian()) {
+	switch (mesa_format) {
+	case MESA_FORMAT_B8G8R8A8_UNORM:
+	case MESA_FORMAT_B8G8R8X8_UNORM:
+	case MESA_FORMAT_B5G6R5_UNORM:
+	case MESA_FORMAT_B4G4R4A4_UNORM:
+	case MESA_FORMAT_B5G5R5A1_UNORM:
+	case MESA_FORMAT_A_UNORM8:
+	case MESA_FORMAT_L_UNORM8:
+	case MESA_FORMAT_I_UNORM8:
+	/* swizzled - probably can't happen with the disabled Choose8888TexFormat code */
+	case MESA_FORMAT_A8B8G8R8_UNORM:
+	case MESA_FORMAT_R8G8B8A8_UNORM:
 	    break;
-    default:
+	default:
 	    return 0;
+	}
+    }
+    else {
+	switch (mesa_format) {
+	case MESA_FORMAT_A8R8G8B8_UNORM:
+	case MESA_FORMAT_X8R8G8B8_UNORM:
+	case MESA_FORMAT_R5G6B5_UNORM:
+	case MESA_FORMAT_A4R4G4B4_UNORM:
+	case MESA_FORMAT_A1R5G5B5_UNORM:
+	case MESA_FORMAT_A_UNORM8:
+	case MESA_FORMAT_L_UNORM8:
+	case MESA_FORMAT_I_UNORM8:
+	/* swizzled  - probably can't happen with the disabled Choose8888TexFormat code */
+	case MESA_FORMAT_R8G8B8A8_UNORM:
+	case MESA_FORMAT_A8B8G8R8_UNORM:
+	   break;
+	default:
+	   return 0;
+	}
     }
 
     /* Rendering to small buffer doesn't work.
@@ -112,41 +133,11 @@
     assert(height <= 2048);
     assert(offset % 32 == 0);
 
-    /* XXX others?  BE/LE? */
-    switch (src_mesa_format) {
-    case MESA_FORMAT_B8G8R8A8_UNORM:
-	    txformat |= R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP;
-	    break;
-    case MESA_FORMAT_A8B8G8R8_UNORM:
-	    txformat |= R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP;
-	    break;
-    case MESA_FORMAT_R8G8B8A8_UNORM:
-	    txformat |= R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP;
-	    break;
-    case MESA_FORMAT_B8G8R8X8_UNORM:
-	    txformat |= R200_TXFORMAT_ARGB8888;
-	    break;
-    case MESA_FORMAT_B5G6R5_UNORM:
-	    txformat |= R200_TXFORMAT_RGB565;
-	    break;
-    case MESA_FORMAT_B4G4R4A4_UNORM:
-	    txformat |= R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP;
-	    break;
-    case MESA_FORMAT_B5G5R5A1_UNORM:
-	    txformat |= R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP;
-	    break;
-    case MESA_FORMAT_A_UNORM8:
-    case MESA_FORMAT_I_UNORM8:
-	    txformat |= R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP;
-	    break;
-    case MESA_FORMAT_L_UNORM8:
-	    txformat |= R200_TXFORMAT_I8;
-	    break;
-    case MESA_FORMAT_L8A8_UNORM:
-	    txformat |= R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP;
-	    break;
-    default:
-	    break;
+    if (_mesa_little_endian()) {
+	txformat |= tx_table_le[src_mesa_format].format;
+    }
+    else {
+	txformat |= tx_table_be[src_mesa_format].format;
     }
 
     if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
@@ -155,11 +146,19 @@
 	offset |= R200_TXO_MICRO_TILE;
 
     switch (dst_mesa_format) {
+    /* le */
     case MESA_FORMAT_B8G8R8A8_UNORM:
     case MESA_FORMAT_B8G8R8X8_UNORM:
     case MESA_FORMAT_B5G6R5_UNORM:
     case MESA_FORMAT_B4G4R4A4_UNORM:
     case MESA_FORMAT_B5G5R5A1_UNORM:
+    /* be */
+    case MESA_FORMAT_A8R8G8B8_UNORM:
+    case MESA_FORMAT_X8R8G8B8_UNORM:
+    case MESA_FORMAT_R5G6B5_UNORM:
+    case MESA_FORMAT_A4R4G4B4_UNORM:
+    case MESA_FORMAT_A1R5G5B5_UNORM:
+    /* little and big */
     case MESA_FORMAT_A_UNORM8:
     case MESA_FORMAT_L_UNORM8:
     case MESA_FORMAT_I_UNORM8:
@@ -183,6 +182,9 @@
 	    END_BATCH();
 	    break;
     case MESA_FORMAT_A8B8G8R8_UNORM:
+    case MESA_FORMAT_R8G8B8A8_UNORM:
+       if ((dst_mesa_format == MESA_FORMAT_A8B8G8R8_UNORM && _mesa_little_endian()) ||
+	   (dst_mesa_format == MESA_FORMAT_R8G8B8A8_UNORM && !_mesa_little_endian())) {
 	    BEGIN_BATCH(10);
 	    OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
 					      RADEON_TEX_BLEND_0_ENABLE));
@@ -190,6 +192,8 @@
 						  R200_TXC_ARG_B_ZERO |
 						  R200_TXC_ARG_C_R0_COLOR |
 						  R200_TXC_OP_MADD));
+	    /* XXX I don't think this can work. This is output rotation, and alpha contains
+	     * red, not alpha (we'd write gbrr). */
 	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
 						   R200_TXC_OUTPUT_ROTATE_GBA |
 						   R200_TXC_OUTPUT_REG_R0));
@@ -201,8 +205,16 @@
 						   (R200_TXA_REPL_RED << R200_TXA_REPL_ARG_C_SHIFT) |
 						   R200_TXA_OUTPUT_REG_R0));
 	    END_BATCH();
-	    break;
-    case MESA_FORMAT_R8G8B8A8_UNORM:
+       }
+       else {
+	    /* XXX pretty sure could do this with just 2 instead of 4 instructions.
+	     * Like so:
+	     * 1st: use RGA output rotation, rgb arg replicate b, a arg r, write mask rb.
+	     * That's just one instruction in fact but I'm not entirely sure it works
+	     * if some of those incoming r0 components are never written (due to mask)
+	     * in the shader itself to r0.
+	     * In any case this case (and the one above) may not be reachable with
+	     * disabled Choose8888TexFormat code. */
 	    BEGIN_BATCH(34);
 	    OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
 					      RADEON_TEX_BLEND_0_ENABLE |
@@ -272,7 +284,8 @@
 	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_3, (R200_TXA_CLAMP_0_1 |
 						   R200_TXA_OUTPUT_REG_R0));
 	    END_BATCH();
-	    break;
+	}
+	break;
     }
 
     BEGIN_BATCH(18);
@@ -306,21 +319,27 @@
     uint32_t dst_format = 0;
     BATCH_LOCALS(&r200->radeon);
 
-    /* XXX others?  BE/LE? */
     switch (mesa_format) {
+    /* The first of each pair is for little, the second for big endian */
     case MESA_FORMAT_B8G8R8A8_UNORM:
+    case MESA_FORMAT_A8R8G8B8_UNORM:
     case MESA_FORMAT_B8G8R8X8_UNORM:
+    case MESA_FORMAT_X8R8G8B8_UNORM:
+    /* These two are valid both for little and big endian (swizzled) */
     case MESA_FORMAT_A8B8G8R8_UNORM:
     case MESA_FORMAT_R8G8B8A8_UNORM:
 	    dst_format = RADEON_COLOR_FORMAT_ARGB8888;
 	    break;
     case MESA_FORMAT_B5G6R5_UNORM:
+    case MESA_FORMAT_R5G6B5_UNORM:
 	    dst_format = RADEON_COLOR_FORMAT_RGB565;
 	    break;
     case MESA_FORMAT_B4G4R4A4_UNORM:
+    case MESA_FORMAT_A4R4G4B4_UNORM:
 	    dst_format = RADEON_COLOR_FORMAT_ARGB4444;
 	    break;
     case MESA_FORMAT_B5G5R5A1_UNORM:
+    case MESA_FORMAT_A1R5G5B5_UNORM:
 	    dst_format = RADEON_COLOR_FORMAT_ARGB1555;
 	    break;
     case MESA_FORMAT_A_UNORM8:
@@ -547,5 +566,21 @@
 
     radeonFlush(ctx);
 
+    /* We submitted those packets outside our state atom mechanism. Thus
+     * make sure the atoms are resubmitted the next time. */
+    r200->hw.cst.dirty = GL_TRUE;
+    r200->hw.ctx.dirty = GL_TRUE;
+    r200->hw.vap.dirty = GL_TRUE;
+    r200->hw.msk.dirty = GL_TRUE;
+    r200->hw.pix[0].dirty = GL_TRUE;
+    r200->hw.pix[1].dirty = GL_TRUE;
+    r200->hw.pix[2].dirty = GL_TRUE;
+    r200->hw.pix[3].dirty = GL_TRUE;
+    r200->hw.sci.dirty = GL_TRUE;
+    r200->hw.set.dirty = GL_TRUE;
+    r200->hw.tex[0].dirty = GL_TRUE;
+    r200->hw.vte.dirty = GL_TRUE;
+    r200->hw.vtx.dirty = GL_TRUE;
+
     return GL_TRUE;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_context.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_context.c	2015-09-16 14:36:09.000000000 +0000
@@ -225,18 +225,9 @@
    rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
 							"def_max_anisotropy");
 
-   if ( sPriv->drm_version.major == 1
-       && driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
-      if ( sPriv->drm_version.minor < 13 )
-	 fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
-			  "disabling.\n", sPriv->drm_version.minor );
-      else
-	 rmesa->using_hyperz = GL_TRUE;
-   }
+   if (driQueryOptionb( &rmesa->radeon.optionCache, "hyperz"))
+      rmesa->using_hyperz = GL_TRUE;
  
-   if ( sPriv->drm_version.minor >= 15 )
-      rmesa->texmicrotile = GL_TRUE;
-
    /* Init default driver functions then plug in our R200-specific functions
     * (the texture functions are especially important)
     */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_context.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_context.h	2015-09-16 14:36:09.000000000 +0000
@@ -109,7 +109,6 @@
 #define CTX_RB3D_COLOROFFSET  11
 #define CTX_CMD_2             12 /* why */
 #define CTX_RB3D_COLORPITCH   13 /* why */
-#define CTX_STATE_SIZE_OLDDRM 14
 #define CTX_CMD_3             14
 #define CTX_RB3D_BLENDCOLOR   15
 #define CTX_RB3D_ABLENDCNTL   16
@@ -167,9 +166,6 @@
 #define TEX_PP_TXSIZE               4  /*2c0c*/
 #define TEX_PP_TXPITCH              5  /*2c10*/
 #define TEX_PP_BORDER_COLOR         6  /*2c14*/
-#define TEX_CMD_1_OLDDRM            7
-#define TEX_PP_TXOFFSET_OLDDRM      8  /*2d00 */
-#define TEX_STATE_SIZE_OLDDRM       9
 #define TEX_PP_CUBIC_FACES          7
 #define TEX_PP_TXMULTI_CTL          8
 #define TEX_CMD_1_NEWDRM            9
@@ -618,7 +614,6 @@
    struct r200_swtcl_info swtcl;
 
    GLboolean using_hyperz;
-   GLboolean texmicrotile;
 
   struct ati_fragment_shader *afs_loaded;
 };
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_state.c	2015-09-16 14:36:09.000000000 +0000
@@ -1546,7 +1546,7 @@
    GLfloat xoffset = 0;
    GLfloat yoffset = dPriv ? (GLfloat) dPriv->h : 0;
    const GLboolean render_to_fbo = (ctx->DrawBuffer ? _mesa_is_user_fbo(ctx->DrawBuffer) : 0);
-   double scale[3], translate[3];
+   float scale[3], translate[3];
    GLfloat y_scale, y_bias;
 
    if (render_to_fbo) {
@@ -1669,7 +1669,7 @@
 
    if ( R200_DEBUG & RADEON_STATE )
       fprintf( stderr, "%s( %s = %s )\n", __func__,
-	       _mesa_lookup_enum_by_nr( cap ),
+	       _mesa_enum_to_string( cap ),
 	       state ? "GL_TRUE" : "GL_FALSE" );
 
    switch ( cap ) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_state_init.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_state_init.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_state_init.c	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_state_init.c	2015-09-16 14:36:09.000000000 +0000
@@ -254,7 +254,7 @@
 CHECK( tex_any, ctx->Texture._MaxEnabledTexImageUnit != -1, 0 )
 CHECK( tf, (ctx->Texture._MaxEnabledTexImageUnit != -1 && !ctx->ATIFragmentShader._Enabled), 0 );
 CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled, 0 )
-   CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled), 0 )
+CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled), 0 )
 CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)), 0 )
 CHECK( afs, ctx->ATIFragmentShader._Enabled, 0 )
 CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 3 + 3*5 - CUBE_STATE_SIZE )
@@ -453,12 +453,15 @@
 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
    else switch (rrb->base.Base.Format) {
    case MESA_FORMAT_B5G6R5_UNORM:
+   case MESA_FORMAT_R5G6B5_UNORM:
 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
 	break;
    case MESA_FORMAT_B4G4R4A4_UNORM:
+   case MESA_FORMAT_A4R4G4B4_UNORM:
 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
 	break;
    case MESA_FORMAT_B5G5R5A1_UNORM:
+   case MESA_FORMAT_A1R5G5B5_UNORM:
 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
 	break;
    default:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_tex.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_tex.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_tex.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_tex.c	2015-09-16 14:36:09.000000000 +0000
@@ -68,9 +68,9 @@
    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
 		"%s(tex %p) sw %s, tw %s, rw %s\n",
 		__func__, t,
-		_mesa_lookup_enum_by_nr(swrap),
-		_mesa_lookup_enum_by_nr(twrap),
-		_mesa_lookup_enum_by_nr(rwrap));
+		_mesa_enum_to_string(swrap),
+		_mesa_enum_to_string(twrap),
+		_mesa_enum_to_string(rwrap));
 
    t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D);
 
@@ -225,8 +225,8 @@
    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
 	"%s(tex %p) minf %s, maxf %s, anisotropy %d.\n",
 	__func__, t,
-	_mesa_lookup_enum_by_nr(minf),
-	_mesa_lookup_enum_by_nr(magf),
+	_mesa_enum_to_string(minf),
+	_mesa_enum_to_string(magf),
 	anisotropy);
 
    if ( anisotropy == R200_MAX_ANISO_1_TO_1 ) {
@@ -302,7 +302,7 @@
    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
 
    radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE, "%s( %s )\n",
-	       __func__, _mesa_lookup_enum_by_nr( pname ) );
+	       __func__, _mesa_enum_to_string( pname ) );
 
    /* This is incorrect: Need to maintain this data for each of
     * GL_TEXTURE_{123}D, GL_TEXTURE_RECTANGLE_NV, etc, and switch
@@ -384,7 +384,7 @@
    radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE,
 		"%s(%p, tex %p)  pname %s\n",
 		__func__, ctx, texObj,
-	       _mesa_lookup_enum_by_nr( pname ) );
+	       _mesa_enum_to_string( pname ) );
 
    switch ( pname ) {
    case GL_TEXTURE_MIN_FILTER:
@@ -415,7 +415,7 @@
    radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_NORMAL,
            "%s( %p (target = %s) )\n", __func__,
 	   (void *)texObj,
-	   _mesa_lookup_enum_by_nr(texObj->Target));
+	   _mesa_enum_to_string(texObj->Target));
 
    if (rmesa) {
       int i;
@@ -473,7 +473,7 @@
    radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL,
            "%s(%p) target %s, new texture %p.\n",
 	   __func__, ctx,
-	   _mesa_lookup_enum_by_nr(target), t);
+	   _mesa_enum_to_string(target), t);
 
    _mesa_initialize_texture_object(ctx, &t->base, name, target);
    t->base.Sampler.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_tex.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_tex.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_tex.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_tex.h	2015-09-16 14:36:09.000000000 +0000
@@ -52,4 +52,68 @@
 
 extern void set_re_cntl_d3d( struct gl_context *ctx, int unit, GLboolean use_d3d );
 
+struct tx_table {
+   GLuint format, filter;
+};
+
+/* Note the tables (have to) contain invalid entries (if they are only valid
+ * for either be/le) */
+static const struct tx_table tx_table_be[] =
+{
+   [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_BGR_UNORM8 ] = { 0xffffffff, 0 },
+   [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
+   [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
+   [ MESA_FORMAT_B4G4R4A4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A4R4G4B4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_B5G5R5A1_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A1R5G5B5_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_L8A8_UNORM ] = { R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A8L8_UNORM ] = { R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_L_UNORM8 ] = { R200_TXFORMAT_I8, 0 },
+   [ MESA_FORMAT_I_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YVYU422, R200_YUV_TO_RGB },
+   [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_VYUY422, R200_YUV_TO_RGB },
+   [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 },
+   [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 },
+   [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_DXT1, 0 },
+   [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_DXT1 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_DXT23 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_DXT45 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+};
+
+static const struct tx_table tx_table_le[] =
+{
+   [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_BGR_UNORM8 ] = { R200_TXFORMAT_ARGB8888, 0 },
+   [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
+   [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
+   [ MESA_FORMAT_B4G4R4A4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A4R4G4B4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_B5G5R5A1_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A1R5G5B5_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_L8A8_UNORM ] = { R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A8L8_UNORM ] = { R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_L_UNORM8 ] = { R200_TXFORMAT_I8, 0 },
+   [ MESA_FORMAT_I_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YVYU422, R200_YUV_TO_RGB },
+   [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_VYUY422, R200_YUV_TO_RGB },
+   [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 },
+   [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 },
+   [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_DXT1, 0 },
+   [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_DXT1 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_DXT23 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_DXT45 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+};
+
+
+
 #endif /* __R200_TEX_H__ */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_texstate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_texstate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/r200_texstate.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/r200_texstate.c	2015-09-16 14:36:09.000000000 +0000
@@ -49,80 +49,9 @@
 #include "r200_tex.h"
 #include "r200_tcl.h"
 
-
-#define R200_TXFORMAT_A8        R200_TXFORMAT_I8
-#define R200_TXFORMAT_L8        R200_TXFORMAT_I8
-#define R200_TXFORMAT_AL88      R200_TXFORMAT_AI88
-#define R200_TXFORMAT_YCBCR     R200_TXFORMAT_YVYU422
-#define R200_TXFORMAT_YCBCR_REV R200_TXFORMAT_VYUY422
-#define R200_TXFORMAT_RGB_DXT1  R200_TXFORMAT_DXT1
-#define R200_TXFORMAT_RGBA_DXT1 R200_TXFORMAT_DXT1
-#define R200_TXFORMAT_RGBA_DXT3 R200_TXFORMAT_DXT23
-#define R200_TXFORMAT_RGBA_DXT5 R200_TXFORMAT_DXT45
-
 #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
                              && (tx_table_be[f].format != 0xffffffff) )
 
-struct tx_table {
-   GLuint format, filter;
-};
-
-static const struct tx_table tx_table_be[] =
-{
-   [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_BGR_UNORM8 ] = { 0xffffffff, 0 },
-   [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
-   [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
-   [ MESA_FORMAT_B4G4R4A4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A4R4G4B4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_B5G5R5A1_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A1R5G5B5_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_L8A8_UNORM ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A8L8_UNORM ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A_UNORM8 ] = { R200_TXFORMAT_A8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_L_UNORM8 ] = { R200_TXFORMAT_L8, 0 },
-   [ MESA_FORMAT_I_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YCBCR, R200_YUV_TO_RGB },
-   [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_YCBCR_REV, R200_YUV_TO_RGB },
-   [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 },
-   [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 },
-   [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_RGB_DXT1, 0 },
-   [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_RGBA_DXT1 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_RGBA_DXT3 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_RGBA_DXT5 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-};
-
-static const struct tx_table tx_table_le[] =
-{
-   [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_BGR_UNORM8 ] = { R200_TXFORMAT_ARGB8888, 0 },
-   [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
-   [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
-   [ MESA_FORMAT_B4G4R4A4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A4R4G4B4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_B5G5R5A1_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A1R5G5B5_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_L8A8_UNORM ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A8L8_UNORM ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A_UNORM8 ] = { R200_TXFORMAT_A8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_L_UNORM8 ] = { R200_TXFORMAT_L8, 0 },
-   [ MESA_FORMAT_I_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YCBCR, R200_YUV_TO_RGB },
-   [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_YCBCR_REV, R200_YUV_TO_RGB },
-   [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 },
-   [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 },
-   [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_RGB_DXT1, 0 },
-   [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_RGBA_DXT1 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_RGBA_DXT3 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_RGBA_DXT5 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-};
-
 /* ================================================================
  * Texture combine functions
  */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_common.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_common.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_common.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_common.c	2015-09-16 14:36:10.000000000 +0000
@@ -164,7 +164,7 @@
 
 	gp.param = RADEON_PARAM_LAST_CLEAR;
 	gp.value = (int *)&age;
-	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+	ret = drmCommandWriteRead(radeon->radeonScreen->driScreen->fd, DRM_RADEON_GETPARAM,
 				  &gp, sizeof(gp));
 	if (ret) {
 		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __func__,
@@ -343,7 +343,7 @@
 {
 	if (RADEON_DEBUG & RADEON_DRI)
 		fprintf(stderr, "%s %s\n", __func__,
-			_mesa_lookup_enum_by_nr( mode ));
+			_mesa_enum_to_string( mode ));
 
 	if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 		radeonContextPtr radeon = RADEON_CONTEXT(ctx);
@@ -358,8 +358,8 @@
        * that the front-buffer has actually been allocated.
        */
 		if (!was_front_buffer_rendering && radeon->is_front_buffer_rendering) {
-			radeon_update_renderbuffers(radeon->dri.context,
-				radeon->dri.context->driDrawablePriv, GL_FALSE);
+			radeon_update_renderbuffers(radeon->driContext,
+				radeon->driContext->driDrawablePriv, GL_FALSE);
       }
 	}
 
@@ -375,8 +375,8 @@
 					|| (mode == GL_FRONT);
 
 		if (!was_front_buffer_reading && rmesa->is_front_buffer_reading) {
-			radeon_update_renderbuffers(rmesa->dri.context,
-						    rmesa->dri.context->driReadablePriv, GL_FALSE);
+			radeon_update_renderbuffers(rmesa->driContext,
+						    rmesa->driContext->driReadablePriv, GL_FALSE);
 	 	}
 	}
 	/* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
@@ -399,7 +399,7 @@
 void radeon_viewport(struct gl_context *ctx)
 {
 	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-	__DRIcontext *driContext = radeon->dri.context;
+	__DRIcontext *driContext = radeon->driContext;
 	void (*old_viewport)(struct gl_context *ctx);
 
 	if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
@@ -693,6 +693,7 @@
 {
 	GLuint size;
 	struct drm_radeon_gem_info mminfo = { 0 };
+	int fd = rmesa->radeonScreen->driScreen->fd;
 
 	/* Initialize command buffer */
 	size = 256 * driQueryOptioni(&rmesa->optionCache,
@@ -711,8 +712,7 @@
 			"Allocating %d bytes command buffer (max state is %d bytes)\n",
 			size * 4, rmesa->hw.max_state_size * 4);
 
-	rmesa->cmdbuf.csm =
-		radeon_cs_manager_gem_ctor(rmesa->radeonScreen->driScreen->fd);
+	rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
 	if (rmesa->cmdbuf.csm == NULL) {
 		/* FIXME: fatal error */
 		return;
@@ -725,7 +725,7 @@
 				  (void (*)(void *))rmesa->glCtx.Driver.Flush, &rmesa->glCtx);
 
 
-	if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO,
+	if (!drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO,
 				 &mminfo, sizeof(mminfo))) {
 		radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM,
 				    mminfo.vram_visible);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_common_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_common_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_common_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_common_context.c	2015-09-16 14:36:10.000000000 +0000
@@ -162,10 +162,7 @@
 	_mesa_meta_init(ctx);
 
 	/* DRI fields */
-	radeon->dri.context = driContextPriv;
-	radeon->dri.screen = sPriv;
-	radeon->dri.fd = sPriv->fd;
-	radeon->dri.drmMinor = sPriv->drm_version.minor;
+	radeon->driContext = driContextPriv;
 
 	/* Setup IRQs */
 	fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
@@ -194,6 +191,29 @@
 
 	radeon_init_dma(radeon);
 
+        /* _mesa_initialize_context calls _mesa_init_queryobj which
+         * initializes all of the counter sizes to 64.  The counters on r100
+         * and r200 are only 32-bits for occlusion queries.  Those are the
+         * only counters, so set the other sizes to zero.
+         */
+        radeon->glCtx.Const.QueryCounterBits.SamplesPassed = 32;
+
+        radeon->glCtx.Const.QueryCounterBits.TimeElapsed = 0;
+        radeon->glCtx.Const.QueryCounterBits.Timestamp = 0;
+        radeon->glCtx.Const.QueryCounterBits.PrimitivesGenerated = 0;
+        radeon->glCtx.Const.QueryCounterBits.PrimitivesWritten = 0;
+        radeon->glCtx.Const.QueryCounterBits.VerticesSubmitted = 0;
+        radeon->glCtx.Const.QueryCounterBits.PrimitivesSubmitted = 0;
+        radeon->glCtx.Const.QueryCounterBits.VsInvocations = 0;
+        radeon->glCtx.Const.QueryCounterBits.TessPatches = 0;
+        radeon->glCtx.Const.QueryCounterBits.TessInvocations = 0;
+        radeon->glCtx.Const.QueryCounterBits.GsInvocations = 0;
+        radeon->glCtx.Const.QueryCounterBits.GsPrimitives = 0;
+        radeon->glCtx.Const.QueryCounterBits.FsInvocations = 0;
+        radeon->glCtx.Const.QueryCounterBits.ComputeInvocations = 0;
+        radeon->glCtx.Const.QueryCounterBits.ClInPrimitives = 0;
+        radeon->glCtx.Const.QueryCounterBits.ClOutPrimitives = 0;
+
 	return GL_TRUE;
 }
 
@@ -302,7 +322,7 @@
  */
 void radeon_prepare_render(radeonContextPtr radeon)
 {
-    __DRIcontext *driContext = radeon->dri.context;
+    __DRIcontext *driContext = radeon->driContext;
     __DRIdrawable *drawable;
     __DRIscreen *screen;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_common_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_common_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_common_context.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_common_context.h	2015-09-16 14:36:10.000000000 +0000
@@ -342,17 +342,6 @@
 	int elts_start;
 };
 
-struct radeon_dri_mirror {
-	__DRIcontext *context;	/* DRI context */
-	__DRIscreen *screen;	/* DRI screen */
-
-	drm_context_t hwContext;
-	drm_hw_lock_t *hwLock;
-	int hwLockCount;
-	int fd;
-	int drmMinor;
-};
-
 typedef void (*radeon_tri_func) (radeonContextPtr,
 				 radeonVertex *,
 				 radeonVertex *, radeonVertex *);
@@ -385,6 +374,7 @@
 
 struct radeon_context {
    struct gl_context glCtx;             /**< base class, must be first */
+   __DRIcontext *driContext;               /* DRI context */
    radeonScreenPtr radeonScreen;	/* Screen private DRI data */
 
    /* Texture object bookkeeping
@@ -407,9 +397,6 @@
    /* Drawable information */
    unsigned int lastStamp;
 
-   /* Mirrors of some DRI state */
-   struct radeon_dri_mirror dri;
-
    /* Busy waiting */
    GLuint do_usleeps;
    GLuint do_irqs;
@@ -502,12 +489,12 @@
 
 static inline __DRIdrawable* radeon_get_drawable(radeonContextPtr radeon)
 {
-	return radeon->dri.context->driDrawablePriv;
+	return radeon->driContext->driDrawablePriv;
 }
 
 static inline __DRIdrawable* radeon_get_readable(radeonContextPtr radeon)
 {
-	return radeon->dri.context->driReadablePriv;
+	return radeon->driContext->driReadablePriv;
 }
 
 extern const char const *radeonVendorString;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_fbo.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_fbo.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_fbo.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_fbo.c	2015-09-16 14:36:10.000000000 +0000
@@ -169,6 +169,7 @@
     rrb->map_buffer = malloc(w * h * 4);
     ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT));
     assert(!ret);
+    (void) ret;
     untiled_s8z24_map = rrb->map_buffer;
     tiled_s8z24_map = rrb->bo->ptr;
 
@@ -207,6 +208,7 @@
     rrb->map_buffer = malloc(w * h * 2);
     ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT));
     assert(!ret);
+    (void) ret;
 
     untiled_z16_map = rrb->map_buffer;
     tiled_z16_map = rrb->bo->ptr;
@@ -324,6 +326,7 @@
 
    ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT));
    assert(!ret);
+   (void) ret;
 
    map = rrb->bo->ptr;
    stride = rrb->map_pitch;
@@ -416,7 +419,6 @@
 {
    struct radeon_context *const rmesa = RADEON_CONTEXT(ctx);
    struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
-   GLboolean ok;
 
    if ((rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_DEPTH_ALWAYS_TILED) && !rrb->has_surface) {
        if (rb->Format == MESA_FORMAT_Z24_UNORM_S8_UINT || rb->Format == MESA_FORMAT_Z24_UNORM_X8_UINT) {
@@ -438,6 +440,7 @@
    radeon_bo_unmap(rrb->map_bo);
 
    if (rrb->map_mode & GL_MAP_WRITE_BIT) {
+      GLboolean ok;
       ok = rmesa->vtbl.blit(ctx, rrb->map_bo, 0,
 			    rb->Format, rrb->map_pitch / rrb->cpp,
 			    rrb->map_w, rrb->map_h,
@@ -449,6 +452,7 @@
 			    rrb->map_w, rrb->map_h,
 			    GL_FALSE);
       assert(ok);
+      (void) ok;
    }
 
    radeon_bo_unref(rrb->map_bo);
@@ -700,7 +704,7 @@
   radeon_print(RADEON_TEXTURE, RADEON_TRACE,
 		"%s(%p, fb %p, target %s) \n",
 		__func__, ctx, fb,
-		_mesa_lookup_enum_by_nr(target));
+		_mesa_enum_to_string(target));
 
    if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
       radeon_draw_buffer(ctx, fb);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_mipmap_tree.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_mipmap_tree.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_mipmap_tree.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_mipmap_tree.c	2015-09-16 14:36:10.000000000 +0000
@@ -276,7 +276,7 @@
 	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
 			"%s(%p) target %s, min %d, max %d.\n",
 			__func__, tObj,
-			_mesa_lookup_enum_by_nr(tObj->Target),
+			_mesa_enum_to_string(tObj->Target),
 			minLod, maxLod);
 
 	/* save these values */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_pixel_read.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_pixel_read.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_pixel_read.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_pixel_read.c	2015-09-16 14:36:10.000000000 +0000
@@ -212,7 +212,7 @@
      */
     radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
                  "Falling back to sw for ReadPixels (format %s, type %s)\n",
-                 _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type));
+                 _mesa_enum_to_string(format), _mesa_enum_to_string(type));
 
     if (ctx->NewState)
         _mesa_update_state(ctx);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_screen.c	2015-09-16 14:36:10.000000000 +0000
@@ -135,36 +135,26 @@
 static int
 radeonGetParam(__DRIscreen *sPriv, int param, void *value)
 {
-  int ret;
-  drm_radeon_getparam_t gp = { 0 };
   struct drm_radeon_info info = { 0 };
 
-  if (sPriv->drm_version.major >= 2) {
-      info.value = (uint64_t)(uintptr_t)value;
-      switch (param) {
-      case RADEON_PARAM_DEVICE_ID:
-          info.request = RADEON_INFO_DEVICE_ID;
-          break;
-      case RADEON_PARAM_NUM_GB_PIPES:
-          info.request = RADEON_INFO_NUM_GB_PIPES;
-          break;
-      case RADEON_PARAM_NUM_Z_PIPES:
-          info.request = RADEON_INFO_NUM_Z_PIPES;
-          break;
-      case RADEON_INFO_TILE_CONFIG:
-	  info.request = RADEON_INFO_TILE_CONFIG;
-          break;
-      default:
-          return -EINVAL;
-      }
-      ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info));
-  } else {
-      gp.param = param;
-      gp.value = value;
-
-      ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
+  info.value = (uint64_t)(uintptr_t)value;
+  switch (param) {
+  case RADEON_PARAM_DEVICE_ID:
+    info.request = RADEON_INFO_DEVICE_ID;
+    break;
+  case RADEON_PARAM_NUM_GB_PIPES:
+    info.request = RADEON_INFO_NUM_GB_PIPES;
+    break;
+  case RADEON_PARAM_NUM_Z_PIPES:
+    info.request = RADEON_INFO_NUM_Z_PIPES;
+    break;
+  case RADEON_INFO_TILE_CONFIG:
+    info.request = RADEON_INFO_TILE_CONFIG;
+    break;
+  default:
+    return -EINVAL;
   }
-  return ret;
+  return drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info));
 }
 
 #if defined(RADEON_R100)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_texture.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_texture.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/r200/radeon_texture.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/r200/radeon_texture.c	2015-09-16 14:36:10.000000000 +0000
@@ -224,7 +224,19 @@
 	const GLuint ui = 1;
 	const GLubyte littleEndian = *((const GLubyte *)&ui);
 
-	if (fbo)
+
+	/* Unfortunately, regardless the fbo flag, we might still be asked to
+	 * attach a texture to a fbo later, which then won't succeed if we chose
+	 * one which isn't renderable. And unlike more exotic formats, apps aren't
+	 * really prepared for the incomplete framebuffer this results in (they'd
+	 * have to retry with same internalFormat even, just different
+	 * srcFormat/srcType, which can't really be expected anyway).
+	 * Ideally, we'd defer format selection until later (if the texture is
+	 * used as a rt it's likely there's never data uploaded to it before attached
+	 * to a fbo), but this isn't really possible, so for now just always use
+	 * a renderable format.
+	 */
+	if (1 || fbo)
 		return _radeon_texformat_argb8888;
 
 	if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
@@ -267,8 +279,8 @@
 	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
 		"%s InternalFormat=%s(%d) type=%s format=%s\n",
 		__func__,
-		_mesa_lookup_enum_by_nr(internalFormat), internalFormat,
-		_mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+		_mesa_enum_to_string(internalFormat), internalFormat,
+		_mesa_enum_to_string(type), _mesa_enum_to_string(format));
 	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
 			"%s do32bpt=%d force16bpt=%d\n",
 			__func__, do32bpt, force16bpt);
@@ -531,7 +543,7 @@
 	__DRIscreen *screen;
 	__DRIimage *image;
 
-	screen = radeon->dri.screen;
+	screen = radeon->radeonScreen->driScreen;
 	image = screen->dri2.image->lookupEGLImage(screen, image_handle,
 						   screen->loaderPrivate);
 	if (image == NULL)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_blit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_blit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_blit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_blit.c	2015-09-16 14:36:10.000000000 +0000
@@ -28,6 +28,7 @@
 #include "radeon_common.h"
 #include "radeon_context.h"
 #include "radeon_blit.h"
+#include "radeon_tex.h"
 
 static inline uint32_t cmdpacket0(struct radeon_screen *rscrn,
                                   int reg, int count)
@@ -40,19 +41,36 @@
 /* common formats supported as both textures and render targets */
 unsigned r100_check_blit(mesa_format mesa_format, uint32_t dst_pitch)
 {
-    /* XXX others?  BE/LE? */
-    switch (mesa_format) {
-    case MESA_FORMAT_B8G8R8A8_UNORM:
-    case MESA_FORMAT_B8G8R8X8_UNORM:
-    case MESA_FORMAT_B5G6R5_UNORM:
-    case MESA_FORMAT_B4G4R4A4_UNORM:
-    case MESA_FORMAT_B5G5R5A1_UNORM:
-    case MESA_FORMAT_A_UNORM8:
-    case MESA_FORMAT_L_UNORM8:
-    case MESA_FORMAT_I_UNORM8:
+    /* XXX others?  */
+    if (_mesa_little_endian()) {
+	switch (mesa_format) {
+	case MESA_FORMAT_B8G8R8A8_UNORM:
+	case MESA_FORMAT_B8G8R8X8_UNORM:
+	case MESA_FORMAT_B5G6R5_UNORM:
+	case MESA_FORMAT_B4G4R4A4_UNORM:
+	case MESA_FORMAT_B5G5R5A1_UNORM:
+	case MESA_FORMAT_A_UNORM8:
+	case MESA_FORMAT_L_UNORM8:
+	case MESA_FORMAT_I_UNORM8:
 	    break;
-    default:
+	default:
+	    return 0;
+	}
+    }
+    else {
+	switch (mesa_format) {
+	case MESA_FORMAT_A8R8G8B8_UNORM:
+	case MESA_FORMAT_X8R8G8B8_UNORM:
+	case MESA_FORMAT_R5G6B5_UNORM:
+	case MESA_FORMAT_A4R4G4B4_UNORM:
+	case MESA_FORMAT_A1R5G5B5_UNORM:
+	case MESA_FORMAT_A_UNORM8:
+	case MESA_FORMAT_L_UNORM8:
+	case MESA_FORMAT_I_UNORM8:
+	    break;
+	default:
 	    return 0;
+	}
     }
 
     /* Rendering to small buffer doesn't work.
@@ -106,40 +124,8 @@
     assert(height <= 2048);
     assert(offset % 32 == 0);
 
-    /* XXX others?  BE/LE? */
-    switch (mesa_format) {
-    case MESA_FORMAT_B8G8R8A8_UNORM:
-	    txformat |= RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
-	    break;
-    case MESA_FORMAT_A8B8G8R8_UNORM:
-            txformat |= RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
-            break;
-    case MESA_FORMAT_B8G8R8X8_UNORM:
-	    txformat |= RADEON_TXFORMAT_ARGB8888;
-	    break;
-    case MESA_FORMAT_B5G6R5_UNORM:
-	    txformat |= RADEON_TXFORMAT_RGB565;
-	    break;
-    case MESA_FORMAT_B4G4R4A4_UNORM:
-	    txformat |= RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP;
-	    break;
-    case MESA_FORMAT_B5G5R5A1_UNORM:
-	    txformat |= RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP;
-	    break;
-    case MESA_FORMAT_A_UNORM8:
-    case MESA_FORMAT_I_UNORM8:
-	    txformat |= RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP;
-	    break;
-    case MESA_FORMAT_L_UNORM8:
-            txformat |= RADEON_TXFORMAT_I8;
-            break;
-    case MESA_FORMAT_L8A8_UNORM:
-            txformat |= RADEON_TXFORMAT_AI88 | RADEON_TXFORMAT_ALPHA_IN_MAP;
-            break;
-    default:
-	    break;
-    }
-    
+    txformat |= tx_table[mesa_format].format;
+
     if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
        offset |= RADEON_TXO_MACRO_TILE;
     if (bo->flags & RADEON_BO_FLAGS_MICRO_TILE)
@@ -184,19 +170,25 @@
     uint32_t dst_format = 0;
     BATCH_LOCALS(&r100->radeon);
 
-    /* XXX others?  BE/LE? */
+    /* XXX others? */
     switch (mesa_format) {
+    /* The first of each pair is for little, the second for big endian. */
     case MESA_FORMAT_B8G8R8A8_UNORM:
+    case MESA_FORMAT_A8R8G8B8_UNORM:
     case MESA_FORMAT_B8G8R8X8_UNORM:
+    case MESA_FORMAT_X8R8G8B8_UNORM:
 	    dst_format = RADEON_COLOR_FORMAT_ARGB8888;
 	    break;
     case MESA_FORMAT_B5G6R5_UNORM:
+    case MESA_FORMAT_R5G6B5_UNORM:
 	    dst_format = RADEON_COLOR_FORMAT_RGB565;
 	    break;
     case MESA_FORMAT_B4G4R4A4_UNORM:
+    case MESA_FORMAT_A4R4G4B4_UNORM:
 	    dst_format = RADEON_COLOR_FORMAT_ARGB4444;
 	    break;
     case MESA_FORMAT_B5G5R5A1_UNORM:
+    case MESA_FORMAT_A1R5G5B5_UNORM:
 	    dst_format = RADEON_COLOR_FORMAT_ARGB1555;
 	    break;
     case MESA_FORMAT_A_UNORM8:
@@ -425,5 +417,13 @@
 
     radeonFlush(ctx);
 
+    /* We submitted those packets outside our state atom mechanism. Thus
+     * make sure they are all resubmitted the next time. */
+    r100->hw.ctx.dirty = GL_TRUE;
+    r100->hw.msk.dirty = GL_TRUE;
+    r100->hw.set.dirty = GL_TRUE;
+    r100->hw.tex[0].dirty = GL_TRUE;
+    r100->hw.txr[0].dirty = GL_TRUE;
+
     return GL_TRUE;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_common.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_common.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_common.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_common.c	2015-09-16 14:36:10.000000000 +0000
@@ -164,7 +164,7 @@
 
 	gp.param = RADEON_PARAM_LAST_CLEAR;
 	gp.value = (int *)&age;
-	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+	ret = drmCommandWriteRead(radeon->radeonScreen->driScreen->fd, DRM_RADEON_GETPARAM,
 				  &gp, sizeof(gp));
 	if (ret) {
 		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __func__,
@@ -343,7 +343,7 @@
 {
 	if (RADEON_DEBUG & RADEON_DRI)
 		fprintf(stderr, "%s %s\n", __func__,
-			_mesa_lookup_enum_by_nr( mode ));
+			_mesa_enum_to_string( mode ));
 
 	if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
 		radeonContextPtr radeon = RADEON_CONTEXT(ctx);
@@ -358,8 +358,8 @@
        * that the front-buffer has actually been allocated.
        */
 		if (!was_front_buffer_rendering && radeon->is_front_buffer_rendering) {
-			radeon_update_renderbuffers(radeon->dri.context,
-				radeon->dri.context->driDrawablePriv, GL_FALSE);
+			radeon_update_renderbuffers(radeon->driContext,
+				radeon->driContext->driDrawablePriv, GL_FALSE);
       }
 	}
 
@@ -375,8 +375,8 @@
 					|| (mode == GL_FRONT);
 
 		if (!was_front_buffer_reading && rmesa->is_front_buffer_reading) {
-			radeon_update_renderbuffers(rmesa->dri.context,
-						    rmesa->dri.context->driReadablePriv, GL_FALSE);
+			radeon_update_renderbuffers(rmesa->driContext,
+						    rmesa->driContext->driReadablePriv, GL_FALSE);
 	 	}
 	}
 	/* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
@@ -399,7 +399,7 @@
 void radeon_viewport(struct gl_context *ctx)
 {
 	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
-	__DRIcontext *driContext = radeon->dri.context;
+	__DRIcontext *driContext = radeon->driContext;
 	void (*old_viewport)(struct gl_context *ctx);
 
 	if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
@@ -693,6 +693,7 @@
 {
 	GLuint size;
 	struct drm_radeon_gem_info mminfo = { 0 };
+	int fd = rmesa->radeonScreen->driScreen->fd;
 
 	/* Initialize command buffer */
 	size = 256 * driQueryOptioni(&rmesa->optionCache,
@@ -711,8 +712,7 @@
 			"Allocating %d bytes command buffer (max state is %d bytes)\n",
 			size * 4, rmesa->hw.max_state_size * 4);
 
-	rmesa->cmdbuf.csm =
-		radeon_cs_manager_gem_ctor(rmesa->radeonScreen->driScreen->fd);
+	rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
 	if (rmesa->cmdbuf.csm == NULL) {
 		/* FIXME: fatal error */
 		return;
@@ -725,7 +725,7 @@
 				  (void (*)(void *))rmesa->glCtx.Driver.Flush, &rmesa->glCtx);
 
 
-	if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO,
+	if (!drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO,
 				 &mminfo, sizeof(mminfo))) {
 		radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM,
 				    mminfo.vram_visible);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_common_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_common_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_common_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_common_context.c	2015-09-16 14:36:10.000000000 +0000
@@ -162,10 +162,7 @@
 	_mesa_meta_init(ctx);
 
 	/* DRI fields */
-	radeon->dri.context = driContextPriv;
-	radeon->dri.screen = sPriv;
-	radeon->dri.fd = sPriv->fd;
-	radeon->dri.drmMinor = sPriv->drm_version.minor;
+	radeon->driContext = driContextPriv;
 
 	/* Setup IRQs */
 	fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
@@ -194,6 +191,29 @@
 
 	radeon_init_dma(radeon);
 
+        /* _mesa_initialize_context calls _mesa_init_queryobj which
+         * initializes all of the counter sizes to 64.  The counters on r100
+         * and r200 are only 32-bits for occlusion queries.  Those are the
+         * only counters, so set the other sizes to zero.
+         */
+        radeon->glCtx.Const.QueryCounterBits.SamplesPassed = 32;
+
+        radeon->glCtx.Const.QueryCounterBits.TimeElapsed = 0;
+        radeon->glCtx.Const.QueryCounterBits.Timestamp = 0;
+        radeon->glCtx.Const.QueryCounterBits.PrimitivesGenerated = 0;
+        radeon->glCtx.Const.QueryCounterBits.PrimitivesWritten = 0;
+        radeon->glCtx.Const.QueryCounterBits.VerticesSubmitted = 0;
+        radeon->glCtx.Const.QueryCounterBits.PrimitivesSubmitted = 0;
+        radeon->glCtx.Const.QueryCounterBits.VsInvocations = 0;
+        radeon->glCtx.Const.QueryCounterBits.TessPatches = 0;
+        radeon->glCtx.Const.QueryCounterBits.TessInvocations = 0;
+        radeon->glCtx.Const.QueryCounterBits.GsInvocations = 0;
+        radeon->glCtx.Const.QueryCounterBits.GsPrimitives = 0;
+        radeon->glCtx.Const.QueryCounterBits.FsInvocations = 0;
+        radeon->glCtx.Const.QueryCounterBits.ComputeInvocations = 0;
+        radeon->glCtx.Const.QueryCounterBits.ClInPrimitives = 0;
+        radeon->glCtx.Const.QueryCounterBits.ClOutPrimitives = 0;
+
 	return GL_TRUE;
 }
 
@@ -302,7 +322,7 @@
  */
 void radeon_prepare_render(radeonContextPtr radeon)
 {
-    __DRIcontext *driContext = radeon->dri.context;
+    __DRIcontext *driContext = radeon->driContext;
     __DRIdrawable *drawable;
     __DRIscreen *screen;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_common_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_common_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_common_context.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_common_context.h	2015-09-16 14:36:10.000000000 +0000
@@ -342,17 +342,6 @@
 	int elts_start;
 };
 
-struct radeon_dri_mirror {
-	__DRIcontext *context;	/* DRI context */
-	__DRIscreen *screen;	/* DRI screen */
-
-	drm_context_t hwContext;
-	drm_hw_lock_t *hwLock;
-	int hwLockCount;
-	int fd;
-	int drmMinor;
-};
-
 typedef void (*radeon_tri_func) (radeonContextPtr,
 				 radeonVertex *,
 				 radeonVertex *, radeonVertex *);
@@ -385,6 +374,7 @@
 
 struct radeon_context {
    struct gl_context glCtx;             /**< base class, must be first */
+   __DRIcontext *driContext;               /* DRI context */
    radeonScreenPtr radeonScreen;	/* Screen private DRI data */
 
    /* Texture object bookkeeping
@@ -407,9 +397,6 @@
    /* Drawable information */
    unsigned int lastStamp;
 
-   /* Mirrors of some DRI state */
-   struct radeon_dri_mirror dri;
-
    /* Busy waiting */
    GLuint do_usleeps;
    GLuint do_irqs;
@@ -502,12 +489,12 @@
 
 static inline __DRIdrawable* radeon_get_drawable(radeonContextPtr radeon)
 {
-	return radeon->dri.context->driDrawablePriv;
+	return radeon->driContext->driDrawablePriv;
 }
 
 static inline __DRIdrawable* radeon_get_readable(radeonContextPtr radeon)
 {
-	return radeon->dri.context->driReadablePriv;
+	return radeon->driContext->driReadablePriv;
 }
 
 extern const char const *radeonVendorString;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_context.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_context.c	2015-09-16 14:36:10.000000000 +0000
@@ -191,16 +191,8 @@
    rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
                                                  "def_max_anisotropy");
 
-   if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
-      if ( sPriv->drm_version.minor < 13 )
-	 fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
-			  "disabling.\n", sPriv->drm_version.minor );
-      else
-	 rmesa->using_hyperz = GL_TRUE;
-   }
-
-   if ( sPriv->drm_version.minor >= 15 )
-      rmesa->texmicrotile = GL_TRUE;
+   if (driQueryOptionb(&rmesa->radeon.optionCache, "hyperz"))
+      rmesa->using_hyperz = GL_TRUE;
 
    /* Init default driver functions then plug in our Radeon-specific functions
     * (the texture functions are especially important)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_context.h	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_context.h	2015-09-16 14:36:10.000000000 +0000
@@ -426,7 +426,6 @@
 	struct r100_swtcl_info swtcl;
 
 	GLboolean using_hyperz;
-	GLboolean texmicrotile;
 
 	/* Performance counters
 	 */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_fbo.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_fbo.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_fbo.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_fbo.c	2015-09-16 14:36:10.000000000 +0000
@@ -169,6 +169,7 @@
     rrb->map_buffer = malloc(w * h * 4);
     ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT));
     assert(!ret);
+    (void) ret;
     untiled_s8z24_map = rrb->map_buffer;
     tiled_s8z24_map = rrb->bo->ptr;
 
@@ -207,6 +208,7 @@
     rrb->map_buffer = malloc(w * h * 2);
     ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT));
     assert(!ret);
+    (void) ret;
 
     untiled_z16_map = rrb->map_buffer;
     tiled_z16_map = rrb->bo->ptr;
@@ -324,6 +326,7 @@
 
    ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT));
    assert(!ret);
+   (void) ret;
 
    map = rrb->bo->ptr;
    stride = rrb->map_pitch;
@@ -416,7 +419,6 @@
 {
    struct radeon_context *const rmesa = RADEON_CONTEXT(ctx);
    struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
-   GLboolean ok;
 
    if ((rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_DEPTH_ALWAYS_TILED) && !rrb->has_surface) {
        if (rb->Format == MESA_FORMAT_Z24_UNORM_S8_UINT || rb->Format == MESA_FORMAT_Z24_UNORM_X8_UINT) {
@@ -438,6 +440,7 @@
    radeon_bo_unmap(rrb->map_bo);
 
    if (rrb->map_mode & GL_MAP_WRITE_BIT) {
+      GLboolean ok;
       ok = rmesa->vtbl.blit(ctx, rrb->map_bo, 0,
 			    rb->Format, rrb->map_pitch / rrb->cpp,
 			    rrb->map_w, rrb->map_h,
@@ -449,6 +452,7 @@
 			    rrb->map_w, rrb->map_h,
 			    GL_FALSE);
       assert(ok);
+      (void) ok;
    }
 
    radeon_bo_unref(rrb->map_bo);
@@ -700,7 +704,7 @@
   radeon_print(RADEON_TEXTURE, RADEON_TRACE,
 		"%s(%p, fb %p, target %s) \n",
 		__func__, ctx, fb,
-		_mesa_lookup_enum_by_nr(target));
+		_mesa_enum_to_string(target));
 
    if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
       radeon_draw_buffer(ctx, fb);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c	2015-09-16 14:36:10.000000000 +0000
@@ -276,7 +276,7 @@
 	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
 			"%s(%p) target %s, min %d, max %d.\n",
 			__func__, tObj,
-			_mesa_lookup_enum_by_nr(tObj->Target),
+			_mesa_enum_to_string(tObj->Target),
 			minLod, maxLod);
 
 	/* save these values */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_pixel_read.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_pixel_read.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_pixel_read.c	2015-09-16 14:36:10.000000000 +0000
@@ -212,7 +212,7 @@
      */
     radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
                  "Falling back to sw for ReadPixels (format %s, type %s)\n",
-                 _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type));
+                 _mesa_enum_to_string(format), _mesa_enum_to_string(type));
 
     if (ctx->NewState)
         _mesa_update_state(ctx);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_screen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_screen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_screen.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_screen.c	2015-09-16 14:36:10.000000000 +0000
@@ -135,36 +135,26 @@
 static int
 radeonGetParam(__DRIscreen *sPriv, int param, void *value)
 {
-  int ret;
-  drm_radeon_getparam_t gp = { 0 };
   struct drm_radeon_info info = { 0 };
 
-  if (sPriv->drm_version.major >= 2) {
-      info.value = (uint64_t)(uintptr_t)value;
-      switch (param) {
-      case RADEON_PARAM_DEVICE_ID:
-          info.request = RADEON_INFO_DEVICE_ID;
-          break;
-      case RADEON_PARAM_NUM_GB_PIPES:
-          info.request = RADEON_INFO_NUM_GB_PIPES;
-          break;
-      case RADEON_PARAM_NUM_Z_PIPES:
-          info.request = RADEON_INFO_NUM_Z_PIPES;
-          break;
-      case RADEON_INFO_TILE_CONFIG:
-	  info.request = RADEON_INFO_TILE_CONFIG;
-          break;
-      default:
-          return -EINVAL;
-      }
-      ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info));
-  } else {
-      gp.param = param;
-      gp.value = value;
-
-      ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
+  info.value = (uint64_t)(uintptr_t)value;
+  switch (param) {
+  case RADEON_PARAM_DEVICE_ID:
+    info.request = RADEON_INFO_DEVICE_ID;
+    break;
+  case RADEON_PARAM_NUM_GB_PIPES:
+    info.request = RADEON_INFO_NUM_GB_PIPES;
+    break;
+  case RADEON_PARAM_NUM_Z_PIPES:
+    info.request = RADEON_INFO_NUM_Z_PIPES;
+    break;
+  case RADEON_INFO_TILE_CONFIG:
+    info.request = RADEON_INFO_TILE_CONFIG;
+    break;
+  default:
+    return -EINVAL;
   }
-  return ret;
+  return drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info));
 }
 
 #if defined(RADEON_R100)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_state.c	2015-09-16 14:36:10.000000000 +0000
@@ -1354,7 +1354,7 @@
    GLfloat xoffset = 0.0;
    GLfloat yoffset = dPriv ? (GLfloat) dPriv->h : 0;
    const GLboolean render_to_fbo = (ctx->DrawBuffer ? _mesa_is_user_fbo(ctx->DrawBuffer) : 0);
-   double scale[3], translate[3];
+   float scale[3], translate[3];
    GLfloat y_scale, y_bias;
 
    if (render_to_fbo) {
@@ -1452,7 +1452,7 @@
 
    if ( RADEON_DEBUG & RADEON_STATE )
       fprintf( stderr, "%s( %s = %s )\n", __func__,
-	       _mesa_lookup_enum_by_nr( cap ),
+	       _mesa_enum_to_string( cap ),
 	       state ? "GL_TRUE" : "GL_FALSE" );
 
    switch ( cap ) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_state_init.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_state_init.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_state_init.c	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_state_init.c	2015-09-16 14:36:10.000000000 +0000
@@ -336,12 +336,15 @@
 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
    else switch (rrb->base.Base.Format) {
    case MESA_FORMAT_B5G6R5_UNORM:
+   case MESA_FORMAT_R5G6B5_UNORM:
 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
 	break;
    case MESA_FORMAT_B4G4R4A4_UNORM:
+   case MESA_FORMAT_A4R4G4B4_UNORM:
 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
 	break;
    case MESA_FORMAT_B5G5R5A1_UNORM:
+   case MESA_FORMAT_A1R5G5B5_UNORM:
 	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
 	break;
    default:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_swtcl.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_swtcl.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_swtcl.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_swtcl.c	2015-09-16 14:36:10.000000000 +0000
@@ -442,7 +442,7 @@
 
       radeon_print(RADEON_SWRENDER, RADEON_NORMAL,
 	  "radeon_render.c: prim %s %d..%d\n",
-		 _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK), 
+		 _mesa_enum_to_string(prim & PRIM_MODE_MASK), 
 		 start, start+length);
 
       if (length)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_tex.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_tex.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_tex.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_tex.c	2015-09-16 14:36:10.000000000 +0000
@@ -263,7 +263,7 @@
 
    if ( RADEON_DEBUG & RADEON_STATE ) {
       fprintf( stderr, "%s( %s )\n",
-	       __func__, _mesa_lookup_enum_by_nr( pname ) );
+	       __func__, _mesa_enum_to_string( pname ) );
    }
 
    switch ( pname ) {
@@ -335,7 +335,7 @@
    radeonTexObj* t = radeon_tex_obj(texObj);
 
    radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, "%s( %s )\n", __func__,
-	       _mesa_lookup_enum_by_nr( pname ) );
+	       _mesa_enum_to_string( pname ) );
 
    switch ( pname ) {
    case GL_TEXTURE_BASE_LEVEL:
@@ -359,7 +359,7 @@
 
    radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
 	 "%s( %p (target = %s) )\n", __func__, (void *)texObj,
-	       _mesa_lookup_enum_by_nr( texObj->Target ) );
+	       _mesa_enum_to_string( texObj->Target ) );
 
    if ( rmesa ) {
      radeon_firevertices(&rmesa->radeon);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_tex.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_tex.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_tex.h	2012-08-30 05:23:52.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_tex.h	2015-09-16 14:36:10.000000000 +0000
@@ -51,4 +51,39 @@
 
 extern void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions );
 
+struct tx_table {
+   GLuint format, filter;
+};
+
+/* XXX verify this table against MESA_FORMAT_x values */
+static const struct tx_table tx_table[] =
+{
+   [ MESA_FORMAT_NONE ] = { 0xffffffff, 0 },
+   [ MESA_FORMAT_A8B8G8R8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_R8G8B8A8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_B8G8R8A8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_BGR_UNORM8 ] = { RADEON_TXFORMAT_ARGB8888, 0 },
+   [ MESA_FORMAT_B5G6R5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 },
+   [ MESA_FORMAT_R5G6B5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 },
+   [ MESA_FORMAT_B4G4R4A4_UNORM ] = { RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A4R4G4B4_UNORM ] = { RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_B5G5R5A1_UNORM ] = { RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A1R5G5B5_UNORM ] = { RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_L8A8_UNORM ] = { RADEON_TXFORMAT_AI88 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A8L8_UNORM ] = { RADEON_TXFORMAT_AI88 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A_UNORM8 ] = { RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_L_UNORM8 ] = { RADEON_TXFORMAT_I8, 0 },
+   [ MESA_FORMAT_I_UNORM8 ] = { RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_YCBCR ] = { RADEON_TXFORMAT_YVYU422, RADEON_YUV_TO_RGB },
+   [ MESA_FORMAT_YCBCR_REV ] = { RADEON_TXFORMAT_VYUY422, RADEON_YUV_TO_RGB },
+   [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 },
+   [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 },
+   [ MESA_FORMAT_RGB_DXT1 ] = { RADEON_TXFORMAT_DXT1, 0 },
+   [ MESA_FORMAT_RGBA_DXT1 ] = { RADEON_TXFORMAT_DXT1 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_RGBA_DXT3 ] = { RADEON_TXFORMAT_DXT23 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_RGBA_DXT5 ] = { RADEON_TXFORMAT_DXT45 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+};
+
+
 #endif /* __RADEON_TEX_H__ */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_texstate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_texstate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_texstate.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_texstate.c	2015-09-16 14:36:10.000000000 +0000
@@ -53,53 +53,9 @@
 #include "radeon_tcl.h"
 
 
-#define RADEON_TXFORMAT_A8        RADEON_TXFORMAT_I8
-#define RADEON_TXFORMAT_L8        RADEON_TXFORMAT_I8
-#define RADEON_TXFORMAT_AL88      RADEON_TXFORMAT_AI88
-#define RADEON_TXFORMAT_YCBCR     RADEON_TXFORMAT_YVYU422
-#define RADEON_TXFORMAT_YCBCR_REV RADEON_TXFORMAT_VYUY422
-#define RADEON_TXFORMAT_RGB_DXT1  RADEON_TXFORMAT_DXT1
-#define RADEON_TXFORMAT_RGBA_DXT1 RADEON_TXFORMAT_DXT1
-#define RADEON_TXFORMAT_RGBA_DXT3 RADEON_TXFORMAT_DXT23
-#define RADEON_TXFORMAT_RGBA_DXT5 RADEON_TXFORMAT_DXT45
-
 #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
 			     && (tx_table[f].format != 0xffffffff) )
 
-struct tx_table {
-   GLuint format, filter;
-};
-
-/* XXX verify this table against MESA_FORMAT_x values */
-static const struct tx_table tx_table[] =
-{
-   [ MESA_FORMAT_NONE ] = { 0xffffffff, 0 },
-   [ MESA_FORMAT_A8B8G8R8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_R8G8B8A8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_B8G8R8A8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_BGR_UNORM8 ] = { RADEON_TXFORMAT_ARGB8888, 0 },
-   [ MESA_FORMAT_B5G6R5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 },
-   [ MESA_FORMAT_R5G6B5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 },
-   [ MESA_FORMAT_B4G4R4A4_UNORM ] = { RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A4R4G4B4_UNORM ] = { RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_B5G5R5A1_UNORM ] = { RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A1R5G5B5_UNORM ] = { RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_L8A8_UNORM ] = { RADEON_TXFORMAT_AL88 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A8L8_UNORM ] = { RADEON_TXFORMAT_AL88 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_A_UNORM8 ] = { RADEON_TXFORMAT_A8 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_L_UNORM8 ] = { RADEON_TXFORMAT_L8, 0 },
-   [ MESA_FORMAT_I_UNORM8 ] = { RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_YCBCR ] = { RADEON_TXFORMAT_YCBCR, RADEON_YUV_TO_RGB },
-   [ MESA_FORMAT_YCBCR_REV ] = { RADEON_TXFORMAT_YCBCR_REV, RADEON_YUV_TO_RGB },
-   [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 },
-   [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 },
-   [ MESA_FORMAT_RGB_DXT1 ] = { RADEON_TXFORMAT_RGB_DXT1, 0 },
-   [ MESA_FORMAT_RGBA_DXT1 ] = { RADEON_TXFORMAT_RGBA_DXT1 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_RGBA_DXT3 ] = { RADEON_TXFORMAT_RGBA_DXT3 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-   [ MESA_FORMAT_RGBA_DXT5 ] = { RADEON_TXFORMAT_RGBA_DXT5 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-};
-
 /* ================================================================
  * Texture combine functions
  */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_texture.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_texture.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/radeon/radeon_texture.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/radeon/radeon_texture.c	2015-09-16 14:36:10.000000000 +0000
@@ -224,7 +224,19 @@
 	const GLuint ui = 1;
 	const GLubyte littleEndian = *((const GLubyte *)&ui);
 
-	if (fbo)
+
+	/* Unfortunately, regardless the fbo flag, we might still be asked to
+	 * attach a texture to a fbo later, which then won't succeed if we chose
+	 * one which isn't renderable. And unlike more exotic formats, apps aren't
+	 * really prepared for the incomplete framebuffer this results in (they'd
+	 * have to retry with same internalFormat even, just different
+	 * srcFormat/srcType, which can't really be expected anyway).
+	 * Ideally, we'd defer format selection until later (if the texture is
+	 * used as a rt it's likely there's never data uploaded to it before attached
+	 * to a fbo), but this isn't really possible, so for now just always use
+	 * a renderable format.
+	 */
+	if (1 || fbo)
 		return _radeon_texformat_argb8888;
 
 	if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
@@ -267,8 +279,8 @@
 	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
 		"%s InternalFormat=%s(%d) type=%s format=%s\n",
 		__func__,
-		_mesa_lookup_enum_by_nr(internalFormat), internalFormat,
-		_mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+		_mesa_enum_to_string(internalFormat), internalFormat,
+		_mesa_enum_to_string(type), _mesa_enum_to_string(format));
 	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
 			"%s do32bpt=%d force16bpt=%d\n",
 			__func__, do32bpt, force16bpt);
@@ -531,7 +543,7 @@
 	__DRIscreen *screen;
 	__DRIimage *image;
 
-	screen = radeon->dri.screen;
+	screen = radeon->radeonScreen->driScreen;
 	image = screen->dri2.image->lookupEGLImage(screen, image_handle,
 						   screen->loaderPrivate);
 	if (image == NULL)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/swrast/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/swrast/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/dri/swrast/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/dri/swrast/Makefile.am	2015-09-16 14:36:10.000000000 +0000
@@ -24,7 +24,6 @@
 include Makefile.sources
 
 AM_CFLAGS = \
-	-D__NOT_HAVE_DRM_H \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src/ \
 	-I$(top_srcdir)/src/mapi \
@@ -33,6 +32,7 @@
 	-I$(top_srcdir)/src/gallium/auxiliary \
 	-I$(top_srcdir)/src/mesa/drivers/dri/common \
 	-I$(top_builddir)/src/mesa/drivers/dri/common \
+	$(LIBDRM_CFLAGS) \
 	$(DEFINES) \
 	$(VISIBILITY_CFLAGS)
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/x11/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/x11/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/drivers/x11/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/drivers/x11/Makefile.am	2015-09-16 14:36:10.000000000 +0000
@@ -25,6 +25,11 @@
 
 EXTRA_DIST = SConscript
 
+if HAVE_SHARED_GLAPI
+SHARED_GLAPI_CFLAGS = -DGLX_SHARED_GLAPI
+SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la
+endif
+
 AM_CPPFLAGS = \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src/mapi \
@@ -34,11 +39,10 @@
 	-I$(top_srcdir)/src/gallium/auxiliary \
 	-I$(top_srcdir)/src/mesa/main \
 	$(X11_INCLUDES) \
+	$(SHARED_GLAPI_CFLAGS) \
 	$(DEFINES)
 
-if HAVE_X11_DRIVER
 lib_LTLIBRARIES = lib@GL_LIB@.la
-endif
 
 lib@GL_LIB@_la_SOURCES = \
 	glxapi.h \
@@ -66,6 +70,7 @@
 lib@GL_LIB@_la_LIBADD = \
 	$(top_builddir)/src/mesa/libmesa.la \
 	$(top_builddir)/src/mapi/glapi/libglapi.la \
+	$(SHARED_GLAPI_LIB) \
 	$(GL_LIB_DEPS)
 
 lib@GL_LIB@_la_LDFLAGS = \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/api_validate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/api_validate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/api_validate.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/api_validate.c	2015-09-16 14:36:10.000000000 +0000
@@ -69,6 +69,25 @@
          return false;
       }
 
+      /* The spec argues that this is allowed because a tess ctrl shader
+       * without a tess eval shader can be used with transform feedback.
+       * However, glBeginTransformFeedback doesn't allow GL_PATCHES and
+       * therefore doesn't allow tessellation.
+       *
+       * Further investigation showed that this is indeed a spec bug and
+       * a tess ctrl shader without a tess eval shader shouldn't have been
+       * allowed, because there is no API in GL 4.0 that can make use this
+       * to produce something useful.
+       *
+       * Also, all vendors except one don't support a tess ctrl shader without
+       * a tess eval shader anyway.
+       */
+      if (ctx->TessCtrlProgram._Current && !ctx->TessEvalProgram._Current) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "%s(tess eval shader is missing)", function);
+         return false;
+      }
+
       /* Section 7.3 (Program Objects) of the OpenGL 4.5 Core Profile spec
        * says:
        *
@@ -127,6 +146,9 @@
    if (mode <= GL_TRIANGLE_STRIP_ADJACENCY)
       return _mesa_has_geometry_shaders(ctx);
 
+   if (mode == GL_PATCHES)
+      return _mesa_has_tessellation(ctx);
+
    return false;
 }
 
@@ -136,6 +158,7 @@
  * etc?  Also, do additional checking related to transformation feedback.
  * Note: this function cannot be called during glNewList(GL_COMPILE) because
  * this code depends on current transform feedback state.
+ * Also, do additional checking related to tessellation shaders.
  */
 GLboolean
 _mesa_valid_prim_mode(struct gl_context *ctx, GLenum mode, const char *name)
@@ -170,11 +193,29 @@
     *   TRIANGLES_ADJACENCY_ARB and <mode> is not
     *   TRIANGLES_ADJACENCY_ARB or TRIANGLE_STRIP_ADJACENCY_ARB.
     *
+    * The GL spec doesn't mention any interaction with tessellation, which
+    * is clearly a spec bug. The same rule should apply, but instead of
+    * the draw primitive mode, the tessellation evaluation shader primitive
+    * mode should be used for the checking.
    */
    if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]) {
       const GLenum geom_mode =
          ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]->Geom.InputType;
-      switch (mode) {
+      struct gl_shader_program *tes =
+         ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+      GLenum mode_before_gs = mode;
+
+      if (tes) {
+         if (tes->TessEval.PointMode)
+            mode_before_gs = GL_POINTS;
+         else if (tes->TessEval.PrimitiveMode == GL_ISOLINES)
+            mode_before_gs = GL_LINES;
+         else
+            /* the GL_QUADS mode generates triangles too */
+            mode_before_gs = GL_TRIANGLES;
+      }
+
+      switch (mode_before_gs) {
       case GL_POINTS:
          valid_enum = (geom_mode == GL_POINTS);
          break;
@@ -209,12 +250,42 @@
          _mesa_error(ctx, GL_INVALID_OPERATION,
                      "%s(mode=%s vs geometry shader input %s)",
                      name,
-                     _mesa_lookup_prim_by_nr(mode),
+                     _mesa_lookup_prim_by_nr(mode_before_gs),
                      _mesa_lookup_prim_by_nr(geom_mode));
          return GL_FALSE;
       }
    }
 
+   /* From the OpenGL 4.0 (Core Profile) spec (section 2.12):
+    *
+    *     "Tessellation operates only on patch primitives. If tessellation is
+    *      active, any command that transfers vertices to the GL will
+    *      generate an INVALID_OPERATION error if the primitive mode is not
+    *      PATCHES.
+    *      Patch primitives are not supported by pipeline stages below the
+    *      tessellation evaluation shader. If there is no active program
+    *      object or the active program object does not contain a tessellation
+    *      evaluation shader, the error INVALID_OPERATION is generated by any
+    *      command that transfers vertices to the GL if the primitive mode is
+    *      PATCHES."
+    *
+    */
+   if (ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL] ||
+       ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]) {
+      if (mode != GL_PATCHES) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "only GL_PATCHES valid with tessellation");
+         return GL_FALSE;
+      }
+   }
+   else {
+      if (mode == GL_PATCHES) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "GL_PATCHES only valid with tessellation");
+         return GL_FALSE;
+      }
+   }
+
    /* From the GL_EXT_transform_feedback spec:
     *
     *     "The error INVALID_OPERATION is generated if Begin, or any command
@@ -247,6 +318,17 @@
             pass = GL_FALSE;
          }
       }
+      else if (ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]) {
+         struct gl_shader_program *tes =
+            ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+
+         if (tes->TessEval.PointMode)
+            pass = ctx->TransformFeedback.Mode == GL_POINTS;
+         else if (tes->TessEval.PrimitiveMode == GL_ISOLINES)
+            pass = ctx->TransformFeedback.Mode == GL_LINES;
+         else
+            pass = ctx->TransformFeedback.Mode == GL_TRIANGLES;
+      }
       else {
          switch (mode) {
          case GL_POINTS:
@@ -291,7 +373,7 @@
 
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(type = %s)", name,
-                  _mesa_lookup_enum_by_nr(type));
+                  _mesa_enum_to_string(type));
       return false;
    }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/atifragshader.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/atifragshader.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/atifragshader.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/atifragshader.c	2015-09-16 14:36:10.000000000 +0000
@@ -132,21 +132,21 @@
 
   op_name = atifs_ops[(arg_count-1)+(optype?3:0)];
   
-  fprintf(stderr, "%s(%s, %s", op_name, _mesa_lookup_enum_by_nr(op),
-	      _mesa_lookup_enum_by_nr(dst));
+  fprintf(stderr, "%s(%s, %s", op_name, _mesa_enum_to_string(op),
+	      _mesa_enum_to_string(dst));
   if (!optype)
     fprintf(stderr, ", %d", dstMask);
   
   fprintf(stderr, ", %s", create_dst_mod_str(dstMod));
   
-  fprintf(stderr, ", %s, %s, %d", _mesa_lookup_enum_by_nr(arg1),
-	      _mesa_lookup_enum_by_nr(arg1Rep), arg1Mod);
+  fprintf(stderr, ", %s, %s, %d", _mesa_enum_to_string(arg1),
+	      _mesa_enum_to_string(arg1Rep), arg1Mod);
   if (arg_count>1)
-    fprintf(stderr, ", %s, %s, %d", _mesa_lookup_enum_by_nr(arg2),
-	      _mesa_lookup_enum_by_nr(arg2Rep), arg2Mod);
+    fprintf(stderr, ", %s, %s, %d", _mesa_enum_to_string(arg2),
+	      _mesa_enum_to_string(arg2Rep), arg2Mod);
   if (arg_count>2)
-    fprintf(stderr, ", %s, %s, %d", _mesa_lookup_enum_by_nr(arg3),
-	      _mesa_lookup_enum_by_nr(arg3Rep), arg3Mod);
+    fprintf(stderr, ", %s, %s, %d", _mesa_enum_to_string(arg3),
+	      _mesa_enum_to_string(arg3Rep), arg3Mod);
 
   fprintf(stderr,")\n");
 
@@ -383,7 +383,7 @@
    for (j = 0; j < MAX_NUM_PASSES_ATI; j++) {
       for (i = 0; i < MAX_NUM_FRAGMENT_REGISTERS_ATI; i++) {
 	 GLuint op = curProg->SetupInst[j][i].Opcode;
-	 const char *op_enum = op > 5 ? _mesa_lookup_enum_by_nr(op) : "0";
+	 const char *op_enum = op > 5 ? _mesa_enum_to_string(op) : "0";
 	 GLuint src = curProg->SetupInst[j][i].src;
 	 GLuint swizzle = curProg->SetupInst[j][i].swizzle;
 	 fprintf(stderr, "%2d %04X %s %d %04X\n", i, op, op_enum, src,
@@ -392,8 +392,8 @@
       for (i = 0; i < curProg->numArithInstr[j]; i++) {
 	 GLuint op0 = curProg->Instructions[j][i].Opcode[0];
 	 GLuint op1 = curProg->Instructions[j][i].Opcode[1];
-	 const char *op0_enum = op0 > 5 ? _mesa_lookup_enum_by_nr(op0) : "0";
-	 const char *op1_enum = op1 > 5 ? _mesa_lookup_enum_by_nr(op1) : "0";
+	 const char *op0_enum = op0 > 5 ? _mesa_enum_to_string(op0) : "0";
+	 const char *op1_enum = op1 > 5 ? _mesa_enum_to_string(op1) : "0";
 	 GLuint count0 = curProg->Instructions[j][i].ArgCount[0];
 	 GLuint count1 = curProg->Instructions[j][i].ArgCount[1];
 	 fprintf(stderr, "%2d %04X %s %d %04X %s %d\n", i, op0, op0_enum, count0,
@@ -477,8 +477,8 @@
 
 #if MESA_DEBUG_ATI_FS
    _mesa_debug(ctx, "%s(%s, %s, %s)\n", __func__,
-	       _mesa_lookup_enum_by_nr(dst), _mesa_lookup_enum_by_nr(coord),
-	       _mesa_lookup_enum_by_nr(swizzle));
+	       _mesa_enum_to_string(dst), _mesa_enum_to_string(coord),
+	       _mesa_enum_to_string(swizzle));
 #endif
 }
 
@@ -550,8 +550,8 @@
 
 #if MESA_DEBUG_ATI_FS
    _mesa_debug(ctx, "%s(%s, %s, %s)\n", __func__,
-	       _mesa_lookup_enum_by_nr(dst), _mesa_lookup_enum_by_nr(interp),
-	       _mesa_lookup_enum_by_nr(swizzle));
+	       _mesa_enum_to_string(dst), _mesa_enum_to_string(interp),
+	       _mesa_enum_to_string(swizzle));
 #endif
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/attrib.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/attrib.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/attrib.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/attrib.c	2015-09-16 14:36:10.000000000 +0000
@@ -177,6 +177,10 @@
 };
 
 
+/** An unused GL_*_BIT value */
+#define DUMMY_BIT 0x10000000
+
+
 /**
  * Allocate new attribute node of given type/kind.  Attach payload data.
  * Insert it into the linked list named by 'head'.
@@ -253,6 +257,15 @@
    /* groups specified by the mask. */
    head = NULL;
 
+   if (mask == 0) {
+      /* if mask is zero we still need to push something so that we
+       * don't get a GL_STACK_UNDERFLOW error in glPopAttrib().
+       */
+      GLuint dummy = 0;
+      if (!push_attrib(ctx, &head, DUMMY_BIT, sizeof(dummy), &dummy))
+         goto end;
+   }
+
    if (mask & GL_ACCUM_BUFFER_BIT) {
       if (!push_attrib(ctx, &head, GL_ACCUM_BUFFER_BIT,
                        sizeof(struct gl_accum_attrib),
@@ -924,10 +937,14 @@
 
       if (MESA_VERBOSE & VERBOSE_API) {
          _mesa_debug(ctx, "glPopAttrib %s\n",
-                     _mesa_lookup_enum_by_nr(attr->kind));
+                     _mesa_enum_to_string(attr->kind));
       }
 
       switch (attr->kind) {
+         case DUMMY_BIT:
+            /* do nothing */
+            break;
+
          case GL_ACCUM_BUFFER_BIT:
             {
                const struct gl_accum_attrib *accum;
@@ -1074,6 +1091,11 @@
                _mesa_ClearDepth(depth->Clear);
                _mesa_set_enable(ctx, GL_DEPTH_TEST, depth->Test);
                _mesa_DepthMask(depth->Mask);
+               if (ctx->Extensions.EXT_depth_bounds_test) {
+                  _mesa_set_enable(ctx, GL_DEPTH_BOUNDS_TEST_EXT,
+                                   depth->BoundsTest);
+                  _mesa_DepthBoundsEXT(depth->BoundsMin, depth->BoundsMax);
+               }
             }
             break;
          case GL_ENABLE_BIT:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/blend.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/blend.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/blend.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/blend.c	2015-09-16 14:36:10.000000000 +0000
@@ -128,28 +128,28 @@
    if (!legal_src_factor(ctx, sfactorRGB)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "%s(sfactorRGB = %s)", func,
-                  _mesa_lookup_enum_by_nr(sfactorRGB));
+                  _mesa_enum_to_string(sfactorRGB));
       return GL_FALSE;
    }
 
    if (!legal_dst_factor(ctx, dfactorRGB)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "%s(dfactorRGB = %s)", func,
-                  _mesa_lookup_enum_by_nr(dfactorRGB));
+                  _mesa_enum_to_string(dfactorRGB));
       return GL_FALSE;
    }
 
    if (sfactorA != sfactorRGB && !legal_src_factor(ctx, sfactorA)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "%s(sfactorA = %s)", func,
-                  _mesa_lookup_enum_by_nr(sfactorA));
+                  _mesa_enum_to_string(sfactorA));
       return GL_FALSE;
    }
 
    if (dfactorA != dfactorRGB && !legal_dst_factor(ctx, dfactorA)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "%s(dfactorA = %s)", func,
-                  _mesa_lookup_enum_by_nr(dfactorA));
+                  _mesa_enum_to_string(dfactorA));
       return GL_FALSE;
    }
 
@@ -208,10 +208,10 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glBlendFuncSeparate %s %s %s %s\n",
-                  _mesa_lookup_enum_by_nr(sfactorRGB),
-                  _mesa_lookup_enum_by_nr(dfactorRGB),
-                  _mesa_lookup_enum_by_nr(sfactorA),
-                  _mesa_lookup_enum_by_nr(dfactorA));
+                  _mesa_enum_to_string(sfactorRGB),
+                  _mesa_enum_to_string(dfactorRGB),
+                  _mesa_enum_to_string(sfactorA),
+                  _mesa_enum_to_string(dfactorA));
 
    if (!validate_blend_factors(ctx, "glBlendFuncSeparate",
                                sfactorRGB, dfactorRGB,
@@ -342,7 +342,7 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glBlendEquation(%s)\n",
-                  _mesa_lookup_enum_by_nr(mode));
+                  _mesa_enum_to_string(mode));
 
    if (!legal_blend_equation(ctx, mode)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "glBlendEquation");
@@ -385,7 +385,7 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glBlendEquationi(%u, %s)\n",
-                  buf, _mesa_lookup_enum_by_nr(mode));
+                  buf, _mesa_enum_to_string(mode));
 
    if (buf >= ctx->Const.MaxDrawBuffers) {
       _mesa_error(ctx, GL_INVALID_VALUE, "glBlendFuncSeparatei(buffer=%u)",
@@ -421,8 +421,8 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glBlendEquationSeparateEXT(%s %s)\n",
-                  _mesa_lookup_enum_by_nr(modeRGB),
-                  _mesa_lookup_enum_by_nr(modeA));
+                  _mesa_enum_to_string(modeRGB),
+                  _mesa_enum_to_string(modeA));
 
    if ( (modeRGB != modeA) && !ctx->Extensions.EXT_blend_equation_separate ) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
@@ -476,8 +476,8 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glBlendEquationSeparatei(%u, %s %s)\n", buf,
-                  _mesa_lookup_enum_by_nr(modeRGB),
-                  _mesa_lookup_enum_by_nr(modeA));
+                  _mesa_enum_to_string(modeRGB),
+                  _mesa_enum_to_string(modeA));
 
    if (buf >= ctx->Const.MaxDrawBuffers) {
       _mesa_error(ctx, GL_INVALID_VALUE, "glBlendEquationSeparatei(buffer=%u)",
@@ -567,7 +567,10 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glAlphaFunc(%s, %f)\n",
-                  _mesa_lookup_enum_by_nr(func), ref);
+                  _mesa_enum_to_string(func), ref);
+
+   if (ctx->Color.AlphaFunc == func && ctx->Color.AlphaRefUnclamped == ref)
+      return; /* no change */
 
    switch (func) {
    case GL_NEVER:
@@ -578,9 +581,6 @@
    case GL_NOTEQUAL:
    case GL_GEQUAL:
    case GL_ALWAYS:
-      if (ctx->Color.AlphaFunc == func && ctx->Color.AlphaRefUnclamped == ref)
-         return; /* no change */
-
       FLUSH_VERTICES(ctx, _NEW_COLOR);
       ctx->Color.AlphaFunc = func;
       ctx->Color.AlphaRefUnclamped = ref;
@@ -613,7 +613,7 @@
    GET_CURRENT_CONTEXT(ctx);
 
    if (MESA_VERBOSE & VERBOSE_API)
-      _mesa_debug(ctx, "glLogicOp(%s)\n", _mesa_lookup_enum_by_nr(opcode));
+      _mesa_debug(ctx, "glLogicOp(%s)\n", _mesa_enum_to_string(opcode));
 
    switch (opcode) {
       case GL_CLEAR:
@@ -790,7 +790,7 @@
 
 invalid_enum:
    _mesa_error(ctx, GL_INVALID_ENUM, "glClampColor(%s)",
-               _mesa_lookup_enum_by_nr(target));
+               _mesa_enum_to_string(target));
 }
 
 static GLboolean
@@ -930,12 +930,10 @@
    ctx->Color._ClampFragmentColor = GL_FALSE;
    ctx->Color.ClampReadColor = GL_FIXED_ONLY_ARB;
 
-   if (ctx->API == API_OPENGLES2) {
-      /* GLES 3 behaves as though GL_FRAMEBUFFER_SRGB is always enabled. */
-      ctx->Color.sRGBEnabled = GL_TRUE;
-   } else {
-      ctx->Color.sRGBEnabled = GL_FALSE;
-   }
+   /* GLES 1/2/3 behaves as though GL_FRAMEBUFFER_SRGB is always enabled
+    * if EGL_KHR_gl_colorspace has been used to request sRGB.
+    */
+   ctx->Color.sRGBEnabled = _mesa_is_gles(ctx);
 }
 
 /*@}*/
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/blit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/blit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/blit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/blit.c	2015-09-16 14:36:10.000000000 +0000
@@ -37,6 +37,7 @@
 #include "framebuffer.h"
 #include "glformats.h"
 #include "mtypes.h"
+#include "macros.h"
 #include "state.h"
 
 
@@ -59,6 +60,31 @@
 
 
 /**
+ * \return true if two regions overlap, false otherwise
+ */
+bool
+_mesa_regions_overlap(int srcX0, int srcY0,
+                      int srcX1, int srcY1,
+                      int dstX0, int dstY0,
+                      int dstX1, int dstY1)
+{
+   if (MAX2(srcX0, srcX1) < MIN2(dstX0, dstX1))
+      return false; /* dst completely right of src */
+
+   if (MAX2(dstX0, dstX1) < MIN2(srcX0, srcX1))
+      return false; /* dst completely left of src */
+
+   if (MAX2(srcY0, srcY1) < MIN2(dstY0, dstY1))
+      return false; /* dst completely above src */
+
+   if (MAX2(dstY0, dstY1) < MIN2(srcY0, srcY1))
+      return false; /* dst completely below src */
+
+   return true; /* some overlap */
+}
+
+
+/**
  * Helper function for checking if the datatypes of color buffers are
  * compatible for glBlitFramebuffer.  From the 3.1 spec, page 198:
  *
@@ -186,7 +212,7 @@
 
    if (!is_valid_blit_filter(ctx, filter)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid filter %s)", func,
-                  _mesa_lookup_enum_by_nr(filter));
+                  _mesa_enum_to_string(filter));
       return;
    }
 
@@ -194,7 +220,7 @@
         filter == GL_SCALED_RESOLVE_NICEST_EXT) &&
         (readFb->Visual.samples == 0 || drawFb->Visual.samples > 0)) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "%s(%s: invalid samples)", func,
-                  _mesa_lookup_enum_by_nr(filter));
+                  _mesa_enum_to_string(filter));
       return;
    }
 
@@ -522,7 +548,7 @@
                   " %d, %d, %d, %d, 0x%x, %s)\n",
                   srcX0, srcY0, srcX1, srcY1,
                   dstX0, dstY0, dstX1, dstY1,
-                  mask, _mesa_lookup_enum_by_nr(filter));
+                  mask, _mesa_enum_to_string(filter));
 
    _mesa_blit_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
                           srcX0, srcY0, srcX1, srcY1,
@@ -547,7 +573,7 @@
                   readFramebuffer, drawFramebuffer,
                   srcX0, srcY0, srcX1, srcY1,
                   dstX0, dstY0, dstX1, dstY1,
-                  mask, _mesa_lookup_enum_by_nr(filter));
+                  mask, _mesa_enum_to_string(filter));
 
    /*
     * According to PDF page 533 of the OpenGL 4.5 core spec (30.10.2014,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/blit.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/blit.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/blit.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/blit.h	2015-09-16 14:36:10.000000000 +0000
@@ -28,6 +28,12 @@
 
 #include "glheader.h"
 
+extern bool
+_mesa_regions_overlap(int srcX0, int srcY0,
+                      int srcX1, int srcY1,
+                      int dstX0, int dstY0,
+                      int dstX1, int dstY1);
+
 extern void
 _mesa_blit_framebuffer(struct gl_context *ctx,
                        struct gl_framebuffer *readFb,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/bufferobj.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/bufferobj.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/bufferobj.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/bufferobj.c	2015-09-16 14:36:10.000000000 +0000
@@ -91,8 +91,9 @@
    case GL_COPY_WRITE_BUFFER:
       return &ctx->CopyWriteBuffer;
    case GL_DRAW_INDIRECT_BUFFER:
-      if (ctx->API == API_OPENGL_CORE &&
-          ctx->Extensions.ARB_draw_indirect) {
+      if ((ctx->API == API_OPENGL_CORE &&
+           ctx->Extensions.ARB_draw_indirect) ||
+           _mesa_is_gles31(ctx)) {
          return &ctx->DrawIndirectBuffer;
       }
       break;
@@ -112,6 +113,11 @@
          return &ctx->UniformBuffer;
       }
       break;
+   case GL_SHADER_STORAGE_BUFFER:
+      if (ctx->Extensions.ARB_shader_storage_buffer_object) {
+         return &ctx->ShaderStorageBuffer;
+      }
+      break;
    case GL_ATOMIC_COUNTER_BUFFER:
       if (ctx->Extensions.ARB_shader_atomic_counters) {
          return &ctx->AtomicBuffer;
@@ -831,6 +837,9 @@
    _mesa_reference_buffer_object(ctx, &ctx->UniformBuffer,
 				 ctx->Shared->NullBufferObj);
 
+   _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer,
+                                 ctx->Shared->NullBufferObj);
+
    _mesa_reference_buffer_object(ctx, &ctx->AtomicBuffer,
 				 ctx->Shared->NullBufferObj);
 
@@ -845,12 +854,20 @@
       ctx->UniformBufferBindings[i].Size = -1;
    }
 
+   for (i = 0; i < MAX_COMBINED_SHADER_STORAGE_BUFFERS; i++) {
+      _mesa_reference_buffer_object(ctx,
+                                    &ctx->ShaderStorageBufferBindings[i].BufferObject,
+                                    ctx->Shared->NullBufferObj);
+      ctx->ShaderStorageBufferBindings[i].Offset = -1;
+      ctx->ShaderStorageBufferBindings[i].Size = -1;
+   }
+
    for (i = 0; i < MAX_COMBINED_ATOMIC_BUFFERS; i++) {
       _mesa_reference_buffer_object(ctx,
 				    &ctx->AtomicBufferBindings[i].BufferObject,
 				    ctx->Shared->NullBufferObj);
-      ctx->AtomicBufferBindings[i].Offset = -1;
-      ctx->AtomicBufferBindings[i].Size = -1;
+      ctx->AtomicBufferBindings[i].Offset = 0;
+      ctx->AtomicBufferBindings[i].Size = 0;
    }
 }
 
@@ -867,6 +884,8 @@
 
    _mesa_reference_buffer_object(ctx, &ctx->UniformBuffer, NULL);
 
+   _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer, NULL);
+
    _mesa_reference_buffer_object(ctx, &ctx->AtomicBuffer, NULL);
 
    _mesa_reference_buffer_object(ctx, &ctx->DrawIndirectBuffer, NULL);
@@ -877,6 +896,12 @@
 				    NULL);
    }
 
+   for (i = 0; i < MAX_COMBINED_SHADER_STORAGE_BUFFERS; i++) {
+      _mesa_reference_buffer_object(ctx,
+                                    &ctx->ShaderStorageBufferBindings[i].BufferObject,
+                                    NULL);
+   }
+
    for (i = 0; i < MAX_COMBINED_ATOMIC_BUFFERS; i++) {
       _mesa_reference_buffer_object(ctx,
 				    &ctx->AtomicBufferBindings[i].BufferObject,
@@ -1158,7 +1183,7 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glBindBuffer(%s, %u)\n",
-                  _mesa_lookup_enum_by_nr(target), buffer);
+                  _mesa_enum_to_string(target), buffer);
 
    bind_buffer_object(ctx, target, buffer);
 }
@@ -1240,6 +1265,17 @@
             _mesa_BindBuffer( GL_UNIFORM_BUFFER, 0 );
          }
 
+         /* unbind SSBO binding points */
+         for (j = 0; j < ctx->Const.MaxShaderStorageBufferBindings; j++) {
+            if (ctx->ShaderStorageBufferBindings[j].BufferObject == bufObj) {
+               _mesa_BindBufferBase(GL_SHADER_STORAGE_BUFFER, j, 0);
+            }
+         }
+
+         if (ctx->ShaderStorageBuffer == bufObj) {
+            _mesa_BindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
+         }
+
          /* unbind Atomci Buffer binding points */
          for (j = 0; j < ctx->Const.MaxAtomicBufferBindings; j++) {
             if (ctx->AtomicBufferBindings[j].BufferObject == bufObj) {
@@ -1500,9 +1536,9 @@
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "%s(%s, %ld, %p, %s)\n",
                   func,
-                  _mesa_lookup_enum_by_nr(target),
+                  _mesa_enum_to_string(target),
                   (long int) size, data,
-                  _mesa_lookup_enum_by_nr(usage));
+                  _mesa_enum_to_string(usage));
 
    if (size < 0) {
       _mesa_error(ctx, GL_INVALID_VALUE, "%s(size < 0)", func);
@@ -1535,7 +1571,7 @@
 
    if (!valid_usage) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid usage: %s)", func,
-                  _mesa_lookup_enum_by_nr(usage));
+                  _mesa_enum_to_string(usage));
       return;
    }
 
@@ -1990,7 +2026,7 @@
 
 invalid_pname:
    _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid pname: %s)", func,
-               _mesa_lookup_enum_by_nr(pname));
+               _mesa_enum_to_string(pname));
    return false;
 }
 
@@ -2337,7 +2373,7 @@
 
    if (offset + length > bufObj->Size) {
       _mesa_error(ctx, GL_INVALID_VALUE,
-                  "%s(offset %ld + length %ld > buffer_size %ld)", func,
+                  "%s(offset %td + length %td > buffer_size %td)", func,
                   offset, length, bufObj->Size);
       return NULL;
    }
@@ -2999,6 +3035,33 @@
 }
 
 /**
+ * Binds a buffer object to a shader storage buffer binding point.
+ *
+ * The caller is responsible for flushing vertices and updating
+ * NewDriverState.
+ */
+static void
+set_ssbo_binding(struct gl_context *ctx,
+                 struct gl_shader_storage_buffer_binding *binding,
+                 struct gl_buffer_object *bufObj,
+                 GLintptr offset,
+                 GLsizeiptr size,
+                 GLboolean autoSize)
+{
+   _mesa_reference_buffer_object(ctx, &binding->BufferObject, bufObj);
+
+   binding->Offset = offset;
+   binding->Size = size;
+   binding->AutomaticSize = autoSize;
+
+   /* If this is a real buffer object, mark it has having been used
+    * at some point as a SSBO.
+    */
+   if (size >= 0)
+      bufObj->UsageHistory |= USAGE_SHADER_STORAGE_BUFFER;
+}
+
+/**
  * Binds a buffer object to a uniform buffer binding point.
  *
  * Unlike set_ubo_binding(), this function also flushes vertices
@@ -3030,6 +3093,37 @@
 }
 
 /**
+ * Binds a buffer object to a shader storage buffer binding point.
+ *
+ * Unlike set_ssbo_binding(), this function also flushes vertices
+ * and updates NewDriverState.  It also checks if the binding
+ * has actually changed before updating it.
+ */
+static void
+bind_shader_storage_buffer(struct gl_context *ctx,
+                           GLuint index,
+                           struct gl_buffer_object *bufObj,
+                           GLintptr offset,
+                           GLsizeiptr size,
+                           GLboolean autoSize)
+{
+   struct gl_shader_storage_buffer_binding *binding =
+      &ctx->ShaderStorageBufferBindings[index];
+
+   if (binding->BufferObject == bufObj &&
+       binding->Offset == offset &&
+       binding->Size == size &&
+       binding->AutomaticSize == autoSize) {
+      return;
+   }
+
+   FLUSH_VERTICES(ctx, 0);
+   ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
+
+   set_ssbo_binding(ctx, binding, bufObj, offset, size, autoSize);
+}
+
+/**
  * Bind a region of a buffer object to a uniform block binding point.
  * \param index  the uniform buffer binding point index
  * \param bufObj  the buffer object
@@ -3064,6 +3158,40 @@
    bind_uniform_buffer(ctx, index, bufObj, offset, size, GL_FALSE);
 }
 
+/**
+ * Bind a region of a buffer object to a shader storage block binding point.
+ * \param index  the shader storage buffer binding point index
+ * \param bufObj  the buffer object
+ * \param offset  offset to the start of buffer object region
+ * \param size  size of the buffer object region
+ */
+static void
+bind_buffer_range_shader_storage_buffer(struct gl_context *ctx,
+                                        GLuint index,
+                                        struct gl_buffer_object *bufObj,
+                                        GLintptr offset,
+                                        GLsizeiptr size)
+{
+   if (index >= ctx->Const.MaxShaderStorageBufferBindings) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glBindBufferRange(index=%d)", index);
+      return;
+   }
+
+   if (offset & (ctx->Const.ShaderStorageBufferOffsetAlignment - 1)) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glBindBufferRange(offset misaligned %d/%d)", (int) offset,
+                  ctx->Const.ShaderStorageBufferOffsetAlignment);
+      return;
+   }
+
+   if (bufObj == ctx->Shared->NullBufferObj) {
+      offset = -1;
+      size = -1;
+   }
+
+   _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer, bufObj);
+   bind_shader_storage_buffer(ctx, index, bufObj, offset, size, GL_FALSE);
+}
 
 /**
  * Bind a buffer object to a uniform block binding point.
@@ -3088,6 +3216,28 @@
 }
 
 /**
+ * Bind a buffer object to a shader storage block binding point.
+ * As above, but offset = 0.
+ */
+static void
+bind_buffer_base_shader_storage_buffer(struct gl_context *ctx,
+                                       GLuint index,
+                                       struct gl_buffer_object *bufObj)
+{
+   if (index >= ctx->Const.MaxShaderStorageBufferBindings) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glBindBufferBase(index=%d)", index);
+      return;
+   }
+
+   _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer, bufObj);
+
+   if (bufObj == ctx->Shared->NullBufferObj)
+      bind_shader_storage_buffer(ctx, index, bufObj, -1, -1, GL_TRUE);
+   else
+      bind_shader_storage_buffer(ctx, index, bufObj, 0, 0, GL_TRUE);
+}
+
+/**
  * Binds a buffer object to an atomic buffer binding point.
  *
  * The caller is responsible for validating the offset,
@@ -3219,6 +3369,35 @@
    return true;
 }
 
+static bool
+error_check_bind_shader_storage_buffers(struct gl_context *ctx,
+                                        GLuint first, GLsizei count,
+                                        const char *caller)
+{
+   if (!ctx->Extensions.ARB_shader_storage_buffer_object) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "%s(target=GL_SHADER_STORAGE_BUFFER)", caller);
+      return false;
+   }
+
+   /* The ARB_multi_bind_spec says:
+    *
+    *     "An INVALID_OPERATION error is generated if <first> + <count> is
+    *      greater than the number of target-specific indexed binding points,
+    *      as described in section 6.7.1."
+    */
+   if (first + count > ctx->Const.MaxShaderStorageBufferBindings) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(first=%u + count=%d > the value of "
+                  "GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS=%u)",
+                  caller, first, count,
+                  ctx->Const.MaxShaderStorageBufferBindings);
+      return false;
+   }
+
+   return true;
+}
+
 /**
  * Unbind all uniform buffers in the range
  * <first> through <first>+<count>-1
@@ -3234,6 +3413,22 @@
                       bufObj, -1, -1, GL_TRUE);
 }
 
+/**
+ * Unbind all shader storage buffers in the range
+ * <first> through <first>+<count>-1
+ */
+static void
+unbind_shader_storage_buffers(struct gl_context *ctx, GLuint first,
+                              GLsizei count)
+{
+   struct gl_buffer_object *bufObj = ctx->Shared->NullBufferObj;
+   GLint i;
+
+   for (i = 0; i < count; i++)
+      set_ssbo_binding(ctx, &ctx->ShaderStorageBufferBindings[first + i],
+                       bufObj, -1, -1, GL_TRUE);
+}
+
 static void
 bind_uniform_buffers_base(struct gl_context *ctx, GLuint first, GLsizei count,
                           const GLuint *buffers)
@@ -3301,6 +3496,73 @@
 }
 
 static void
+bind_shader_storage_buffers_base(struct gl_context *ctx, GLuint first,
+                                 GLsizei count, const GLuint *buffers)
+{
+   GLint i;
+
+   if (!error_check_bind_shader_storage_buffers(ctx, first, count,
+                                                "glBindBuffersBase"))
+      return;
+
+   /* Assume that at least one binding will be changed */
+   FLUSH_VERTICES(ctx, 0);
+   ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
+
+   if (!buffers) {
+      /* The ARB_multi_bind spec says:
+       *
+       *   "If <buffers> is NULL, all bindings from <first> through
+       *    <first>+<count>-1 are reset to their unbound (zero) state."
+       */
+      unbind_shader_storage_buffers(ctx, first, count);
+      return;
+   }
+
+   /* Note that the error semantics for multi-bind commands differ from
+    * those of other GL commands.
+    *
+    * The Issues section in the ARB_multi_bind spec says:
+    *
+    *    "(11) Typically, OpenGL specifies that if an error is generated by a
+    *          command, that command has no effect.  This is somewhat
+    *          unfortunate for multi-bind commands, because it would require a
+    *          first pass to scan the entire list of bound objects for errors
+    *          and then a second pass to actually perform the bindings.
+    *          Should we have different error semantics?
+    *
+    *       RESOLVED:  Yes.  In this specification, when the parameters for
+    *       one of the <count> binding points are invalid, that binding point
+    *       is not updated and an error will be generated.  However, other
+    *       binding points in the same command will be updated if their
+    *       parameters are valid and no other error occurs."
+    */
+
+   _mesa_begin_bufferobj_lookups(ctx);
+
+   for (i = 0; i < count; i++) {
+      struct gl_shader_storage_buffer_binding *binding =
+          &ctx->ShaderStorageBufferBindings[first + i];
+      struct gl_buffer_object *bufObj;
+
+      if (binding->BufferObject && binding->BufferObject->Name == buffers[i])
+         bufObj = binding->BufferObject;
+      else
+         bufObj = _mesa_multi_bind_lookup_bufferobj(ctx, buffers, i,
+                                                    "glBindBuffersBase");
+
+      if (bufObj) {
+         if (bufObj == ctx->Shared->NullBufferObj)
+            set_ssbo_binding(ctx, binding, bufObj, -1, -1, GL_TRUE);
+         else
+            set_ssbo_binding(ctx, binding, bufObj, 0, 0, GL_TRUE);
+      }
+   }
+
+   _mesa_end_bufferobj_lookups(ctx);
+}
+
+static void
 bind_uniform_buffers_range(struct gl_context *ctx, GLuint first, GLsizei count,
                            const GLuint *buffers,
                            const GLintptr *offsets, const GLsizeiptr *sizes)
@@ -3405,6 +3667,112 @@
    _mesa_end_bufferobj_lookups(ctx);
 }
 
+static void
+bind_shader_storage_buffers_range(struct gl_context *ctx, GLuint first,
+                                  GLsizei count, const GLuint *buffers,
+                                  const GLintptr *offsets,
+                                  const GLsizeiptr *sizes)
+{
+   GLint i;
+
+   if (!error_check_bind_shader_storage_buffers(ctx, first, count,
+                                                "glBindBuffersRange"))
+      return;
+
+   /* Assume that at least one binding will be changed */
+   FLUSH_VERTICES(ctx, 0);
+   ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
+
+   if (!buffers) {
+      /* The ARB_multi_bind spec says:
+       *
+       *    "If <buffers> is NULL, all bindings from <first> through
+       *     <first>+<count>-1 are reset to their unbound (zero) state.
+       *     In this case, the offsets and sizes associated with the
+       *     binding points are set to default values, ignoring
+       *     <offsets> and <sizes>."
+       */
+      unbind_shader_storage_buffers(ctx, first, count);
+      return;
+   }
+
+   /* Note that the error semantics for multi-bind commands differ from
+    * those of other GL commands.
+    *
+    * The Issues section in the ARB_multi_bind spec says:
+    *
+    *    "(11) Typically, OpenGL specifies that if an error is generated by a
+    *          command, that command has no effect.  This is somewhat
+    *          unfortunate for multi-bind commands, because it would require a
+    *          first pass to scan the entire list of bound objects for errors
+    *          and then a second pass to actually perform the bindings.
+    *          Should we have different error semantics?
+    *
+    *       RESOLVED:  Yes.  In this specification, when the parameters for
+    *       one of the <count> binding points are invalid, that binding point
+    *       is not updated and an error will be generated.  However, other
+    *       binding points in the same command will be updated if their
+    *       parameters are valid and no other error occurs."
+    */
+
+   _mesa_begin_bufferobj_lookups(ctx);
+
+   for (i = 0; i < count; i++) {
+      struct gl_shader_storage_buffer_binding *binding =
+         &ctx->ShaderStorageBufferBindings[first + i];
+      struct gl_buffer_object *bufObj;
+
+      if (!bind_buffers_check_offset_and_size(ctx, i, offsets, sizes))
+         continue;
+
+      /* The ARB_multi_bind spec says:
+       *
+       *     "An INVALID_VALUE error is generated by BindBuffersRange if any
+       *      pair of values in <offsets> and <sizes> does not respectively
+       *      satisfy the constraints described for those parameters for the
+       *      specified target, as described in section 6.7.1 (per binding)."
+       *
+       * Section 6.7.1 refers to table 6.5, which says:
+       *
+       *     "┌───────────────────────────────────────────────────────────────┐
+       *      │ Shader storage buffer array bindings (see sec. 7.8)           │
+       *      ├─────────────────────┬─────────────────────────────────────────┤
+       *      │  ...                │  ...                                    │
+       *      │  offset restriction │  multiple of value of SHADER_STORAGE_-  │
+       *      │                     │  BUFFER_OFFSET_ALIGNMENT                │
+       *      │  ...                │  ...                                    │
+       *      │  size restriction   │  none                                   │
+       *      └─────────────────────┴─────────────────────────────────────────┘"
+       */
+      if (offsets[i] & (ctx->Const.ShaderStorageBufferOffsetAlignment - 1)) {
+         _mesa_error(ctx, GL_INVALID_VALUE,
+                     "glBindBuffersRange(offsets[%u]=%" PRId64
+                     " is misaligned; it must be a multiple of the value of "
+                     "GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT=%u when "
+                     "target=GL_SHADER_STORAGE_BUFFER)",
+                     i, (int64_t) offsets[i],
+                     ctx->Const.ShaderStorageBufferOffsetAlignment);
+         continue;
+      }
+
+      if (binding->BufferObject && binding->BufferObject->Name == buffers[i])
+         bufObj = binding->BufferObject;
+      else
+         bufObj = _mesa_multi_bind_lookup_bufferobj(ctx, buffers, i,
+                                                    "glBindBuffersRange");
+
+      if (bufObj) {
+         if (bufObj == ctx->Shared->NullBufferObj)
+            set_ssbo_binding(ctx, binding, bufObj, -1, -1, GL_FALSE);
+         else
+            set_ssbo_binding(ctx, binding, bufObj,
+                             offsets[i], sizes[i], GL_FALSE);
+      }
+   }
+
+   _mesa_end_bufferobj_lookups(ctx);
+}
+
 static bool
 error_check_bind_xfb_buffers(struct gl_context *ctx,
                              struct gl_transform_feedback_object *tfObj,
@@ -3894,6 +4262,9 @@
    case GL_UNIFORM_BUFFER:
       bind_buffer_range_uniform_buffer(ctx, index, bufObj, offset, size);
       return;
+   case GL_SHADER_STORAGE_BUFFER:
+      bind_buffer_range_shader_storage_buffer(ctx, index, bufObj, offset, size);
+      return;
    case GL_ATOMIC_COUNTER_BUFFER:
       bind_atomic_buffer(ctx, index, bufObj, offset, size,
                          "glBindBufferRange");
@@ -3960,6 +4331,9 @@
    case GL_UNIFORM_BUFFER:
       bind_buffer_base_uniform_buffer(ctx, index, bufObj);
       return;
+   case GL_SHADER_STORAGE_BUFFER:
+      bind_buffer_base_shader_storage_buffer(ctx, index, bufObj);
+      return;
    case GL_ATOMIC_COUNTER_BUFFER:
       bind_atomic_buffer(ctx, index, bufObj, 0, 0,
                          "glBindBufferBase");
@@ -3984,13 +4358,17 @@
    case GL_UNIFORM_BUFFER:
       bind_uniform_buffers_range(ctx, first, count, buffers, offsets, sizes);
       return;
+   case GL_SHADER_STORAGE_BUFFER:
+      bind_shader_storage_buffers_range(ctx, first, count, buffers, offsets,
+                                        sizes);
+      return;
    case GL_ATOMIC_COUNTER_BUFFER:
       bind_atomic_buffers_range(ctx, first, count, buffers,
                                 offsets, sizes);
       return;
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "glBindBuffersRange(target=%s)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       break;
    }
 }
@@ -4008,12 +4386,15 @@
    case GL_UNIFORM_BUFFER:
       bind_uniform_buffers_base(ctx, first, count, buffers);
       return;
+   case GL_SHADER_STORAGE_BUFFER:
+      bind_shader_storage_buffers_base(ctx, first, count, buffers);
+      return;
    case GL_ATOMIC_COUNTER_BUFFER:
       bind_atomic_buffers_base(ctx, first, count, buffers);
       return;
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "glBindBuffersBase(target=%s)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       break;
    }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/buffers.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/buffers.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/buffers.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/buffers.c	2015-09-16 14:36:10.000000000 +0000
@@ -251,7 +251,7 @@
    FLUSH_VERTICES(ctx, 0);
 
    if (MESA_VERBOSE & VERBOSE_API) {
-      _mesa_debug(ctx, "%s %s\n", caller, _mesa_lookup_enum_by_nr(buffer));
+      _mesa_debug(ctx, "%s %s\n", caller, _mesa_enum_to_string(buffer));
    }
 
    if (buffer == GL_NONE) {
@@ -264,14 +264,14 @@
       if (destMask == BAD_MASK) {
          /* totally bogus buffer */
          _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", caller,
-                     _mesa_lookup_enum_by_nr(buffer));
+                     _mesa_enum_to_string(buffer));
          return;
       }
       destMask &= supportedMask;
       if (destMask == 0x0) {
          /* none of the named color buffers exist! */
          _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid buffer %s)",
-                     caller, _mesa_lookup_enum_by_nr(buffer));
+                     caller, _mesa_enum_to_string(buffer));
          return;
       }
    }
@@ -411,7 +411,7 @@
           */
          if (destMask[output] == BAD_MASK) {
             _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)",
-                        caller, _mesa_lookup_enum_by_nr(buffers[output]));
+                        caller, _mesa_enum_to_string(buffers[output]));
             return;
          }
 
@@ -427,7 +427,7 @@
           */
          if (_mesa_bitcount(destMask[output]) > 1) {
             _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)",
-                        caller, _mesa_lookup_enum_by_nr(buffers[output]));
+                        caller, _mesa_enum_to_string(buffers[output]));
             return;
          }
 
@@ -445,7 +445,7 @@
          if (destMask[output] == 0) {
             _mesa_error(ctx, GL_INVALID_OPERATION,
                         "%s(unsupported buffer %s)",
-                        caller, _mesa_lookup_enum_by_nr(buffers[output]));
+                        caller, _mesa_enum_to_string(buffers[output]));
             return;
          }
 
@@ -459,7 +459,7 @@
              buffers[output] != GL_COLOR_ATTACHMENT0 + output) {
             _mesa_error(ctx, GL_INVALID_OPERATION,
                         "%s(unsupported buffer %s)",
-                        caller, _mesa_lookup_enum_by_nr(buffers[output]));
+                        caller, _mesa_enum_to_string(buffers[output]));
             return;
          }
 
@@ -471,7 +471,7 @@
          if (destMask[output] & usedBufferMask) {
             _mesa_error(ctx, GL_INVALID_OPERATION,
                         "%s(duplicated buffer %s)",
-                        caller, _mesa_lookup_enum_by_nr(buffers[output]));
+                        caller, _mesa_enum_to_string(buffers[output]));
             return;
          }
 
@@ -700,7 +700,7 @@
    FLUSH_VERTICES(ctx, 0);
 
    if (MESA_VERBOSE & VERBOSE_API)
-      _mesa_debug(ctx, "%s %s\n", caller, _mesa_lookup_enum_by_nr(buffer));
+      _mesa_debug(ctx, "%s %s\n", caller, _mesa_enum_to_string(buffer));
 
    if (buffer == GL_NONE) {
       /* This is legal--it means that no buffer should be bound for reading. */
@@ -712,14 +712,14 @@
       if (srcBuffer == -1) {
          _mesa_error(ctx, GL_INVALID_ENUM,
                      "%s(invalid buffer %s)", caller,
-                     _mesa_lookup_enum_by_nr(buffer));
+                     _mesa_enum_to_string(buffer));
          return;
       }
       supportedMask = supported_buffer_bitmask(ctx, fb);
       if (((1 << srcBuffer) & supportedMask) == 0) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
                      "%s(invalid buffer %s)", caller,
-                     _mesa_lookup_enum_by_nr(buffer));
+                     _mesa_enum_to_string(buffer));
          return;
       }
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/clear.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/clear.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/clear.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/clear.c	2015-09-16 14:36:10.000000000 +0000
@@ -325,6 +325,18 @@
       _mesa_update_state( ctx );
    }
 
+   /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers'
+    * of the OpenGL 4.5 spec states:
+    *
+    *    "An INVALID_ENUM error is generated by ClearBufferiv and
+    *     ClearNamedFramebufferiv if buffer is not COLOR or STENCIL."
+    */
+   if (buffer == GL_DEPTH || buffer == GL_DEPTH_STENCIL) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glClearBufferiv(buffer=GL_DEPTH || GL_DEPTH_STENCIL)");
+      return;
+   }
+
    switch (buffer) {
    case GL_STENCIL:
       /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says:
@@ -395,7 +407,7 @@
       return;
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferiv(buffer=%s)",
-                  _mesa_lookup_enum_by_nr(buffer));
+                  _mesa_enum_to_string(buffer));
       return;
    }
 }
@@ -485,7 +497,7 @@
       return;
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferuiv(buffer=%s)",
-                  _mesa_lookup_enum_by_nr(buffer));
+                  _mesa_enum_to_string(buffer));
       return;
    }
 }
@@ -596,7 +608,7 @@
       return;
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferfv(buffer=%s)",
-                  _mesa_lookup_enum_by_nr(buffer));
+                  _mesa_enum_to_string(buffer));
       return;
    }
 }
@@ -636,7 +648,7 @@
 
    if (buffer != GL_DEPTH_STENCIL) {
       _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferfi(buffer=%s)",
-                  _mesa_lookup_enum_by_nr(buffer));
+                  _mesa_enum_to_string(buffer));
       return;
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/condrender.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/condrender.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/condrender.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/condrender.c	2015-09-16 14:36:10.000000000 +0000
@@ -87,7 +87,7 @@
       /* fallthrough - invalid */
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "glBeginConditionalRender(mode=%s)",
-                  _mesa_lookup_enum_by_nr(mode));
+                  _mesa_enum_to_string(mode));
       return;
    }
 
@@ -184,7 +184,7 @@
    default:
       _mesa_problem(ctx, "Bad cond render mode %s in "
                     " _mesa_check_conditional_render()",
-                    _mesa_lookup_enum_by_nr(ctx->Query.CondRenderMode));
+                    _mesa_enum_to_string(ctx->Query.CondRenderMode));
       return GL_TRUE;
    }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/config.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/config.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/config.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/config.h	2015-09-16 14:36:10.000000000 +0000
@@ -171,8 +171,10 @@
 #define MAX_PROGRAM_LOCAL_PARAMS       4096
 #define MAX_UNIFORMS                   4096
 #define MAX_UNIFORM_BUFFERS            15 /* + 1 default uniform buffer */
+#define MAX_SHADER_STORAGE_BUFFERS     7  /* + 1 default shader storage buffer */
 /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
 #define MAX_COMBINED_UNIFORM_BUFFERS   (MAX_UNIFORM_BUFFERS * 6)
+#define MAX_COMBINED_SHADER_STORAGE_BUFFERS   (MAX_SHADER_STORAGE_BUFFERS * 6)
 #define MAX_ATOMIC_COUNTERS            4096
 /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
 #define MAX_COMBINED_ATOMIC_BUFFERS    (MAX_UNIFORM_BUFFERS * 6)
@@ -213,19 +215,10 @@
 /** For GL_ARB_fragment_program */
 /*@{*/
 #define MAX_FRAGMENT_PROGRAM_ADDRESS_REGS 0
+#define MAX_FRAGMENT_PROGRAM_PARAMS       64
+#define MAX_FRAGMENT_PROGRAM_INPUTS       12
 /*@}*/
 
-/** For GL_NV_fragment_program */
-/*@{*/
-#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */
-#define MAX_NV_FRAGMENT_PROGRAM_TEMPS         96
-#define MAX_NV_FRAGMENT_PROGRAM_PARAMS        64
-#define MAX_NV_FRAGMENT_PROGRAM_INPUTS        12
-#define MAX_NV_FRAGMENT_PROGRAM_OUTPUTS        3
-#define MAX_NV_FRAGMENT_PROGRAM_WRITE_ONLYS    2
-/*@}*/
-
-
 /** For GL_ARB_vertex_shader */
 /*@{*/
 #define MAX_VERTEX_GENERIC_ATTRIBS 16
@@ -281,6 +274,12 @@
 #define MAX_VERTEX_STREAMS                  4
 /*@}*/
 
+/** For GL_ARB_shader_subroutine */
+/*@{*/
+#define MAX_SUBROUTINES                   256
+#define MAX_SUBROUTINE_UNIFORM_LOCATIONS  1024
+/*@}*/
+
 /** For GL_INTEL_performance_query */
 /*@{*/
 #define MAX_PERFQUERY_QUERY_NAME_LENGTH     256
@@ -303,6 +302,14 @@
 /** For GL_ARB_pipeline_statistics_query */
 #define MAX_PIPELINE_STATISTICS             11
 
+/** For GL_ARB_tessellation_shader */
+/*@{*/
+#define MAX_TESS_GEN_LEVEL 64
+#define MAX_PATCH_VERTICES 32
+#define MAX_TESS_PATCH_COMPONENTS 120
+#define MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS 4096
+/*@}*/
+
 /*
  * Color channel component order
  * 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/context.c	2015-09-16 14:36:10.000000000 +0000
@@ -120,6 +120,7 @@
 #include "shaderobj.h"
 #include "shaderimage.h"
 #include "util/simple_list.h"
+#include "util/strtod.h"
 #include "state.h"
 #include "stencil.h"
 #include "texcompress_s3tc.h"
@@ -338,31 +339,6 @@
 
 
 /**
- * This is lame.  gdb only seems to recognize enum types that are
- * actually used somewhere.  We want to be able to print/use enum
- * values such as TEXTURE_2D_INDEX in gdb.  But we don't actually use
- * the gl_texture_index type anywhere.  Thus, this lame function.
- */
-static void
-dummy_enum_func(void)
-{
-   gl_buffer_index bi = BUFFER_FRONT_LEFT;
-   gl_face_index fi = FACE_POS_X;
-   gl_frag_result fr = FRAG_RESULT_DEPTH;
-   gl_texture_index ti = TEXTURE_2D_ARRAY_INDEX;
-   gl_vert_attrib va = VERT_ATTRIB_POS;
-   gl_varying_slot vs = VARYING_SLOT_POS;
-
-   (void) bi;
-   (void) fi;
-   (void) fr;
-   (void) ti;
-   (void) va;
-   (void) vs;
-}
-
-
-/**
  * One-time initialization mutex lock.
  *
  * \sa Used by one_time_init().
@@ -370,6 +346,16 @@
 mtx_t OneTimeLock = _MTX_INITIALIZER_NP;
 
 
+/**
+ * Calls all the various one-time-fini functions in Mesa
+ */
+
+static void
+one_time_fini(void)
+{
+   _mesa_destroy_shader_compiler();
+   _mesa_locale_fini();
+}
 
 /**
  * Calls all the various one-time-init functions in Mesa.
@@ -391,13 +377,14 @@
    if (!api_init_mask) {
       GLuint i;
 
-      /* do some implementation tests */
-      assert( sizeof(GLbyte) == 1 );
-      assert( sizeof(GLubyte) == 1 );
-      assert( sizeof(GLshort) == 2 );
-      assert( sizeof(GLushort) == 2 );
-      assert( sizeof(GLint) == 4 );
-      assert( sizeof(GLuint) == 4 );
+      STATIC_ASSERT(sizeof(GLbyte) == 1);
+      STATIC_ASSERT(sizeof(GLubyte) == 1);
+      STATIC_ASSERT(sizeof(GLshort) == 2);
+      STATIC_ASSERT(sizeof(GLushort) == 2);
+      STATIC_ASSERT(sizeof(GLint) == 4);
+      STATIC_ASSERT(sizeof(GLuint) == 4);
+
+      _mesa_locale_init();
 
       _mesa_one_time_init_extension_overrides();
 
@@ -407,6 +394,8 @@
          _mesa_ubyte_to_float_color_tab[i] = (float) i / 255.0F;
       }
 
+      atexit(one_time_fini);
+
 #if defined(DEBUG) && defined(__DATE__) && defined(__TIME__)
       if (MESA_VERBOSE != 0) {
 	 _mesa_debug(ctx, "Mesa %s DEBUG build %s %s\n",
@@ -429,13 +418,6 @@
    api_init_mask |= 1 << ctx->API;
 
    mtx_unlock(&OneTimeLock);
-
-   /* Hopefully atexit() is widely available.  If not, we may need some
-    * #ifdef tests here.
-    */
-   atexit(_mesa_destroy_shader_compiler);
-
-   dummy_enum_func();
 }
 
 
@@ -489,13 +471,15 @@
       prog->MaxOutputComponents = 16 * 4; /* old limit not to break tnl and swrast */
       break;
    case MESA_SHADER_FRAGMENT:
-      prog->MaxParameters = MAX_NV_FRAGMENT_PROGRAM_PARAMS;
-      prog->MaxAttribs = MAX_NV_FRAGMENT_PROGRAM_INPUTS;
+      prog->MaxParameters = MAX_FRAGMENT_PROGRAM_PARAMS;
+      prog->MaxAttribs = MAX_FRAGMENT_PROGRAM_INPUTS;
       prog->MaxAddressRegs = MAX_FRAGMENT_PROGRAM_ADDRESS_REGS;
       prog->MaxUniformComponents = 4 * MAX_UNIFORMS;
       prog->MaxInputComponents = 16 * 4; /* old limit not to break tnl and swrast */
       prog->MaxOutputComponents = 0; /* value not used */
       break;
+   case MESA_SHADER_TESS_CTRL:
+   case MESA_SHADER_TESS_EVAL:
    case MESA_SHADER_GEOMETRY:
       prog->MaxParameters = MAX_VERTEX_PROGRAM_PARAMS;
       prog->MaxAttribs = MAX_VERTEX_GENERIC_ATTRIBS;
@@ -554,6 +538,8 @@
 
    prog->MaxAtomicBuffers = 0;
    prog->MaxAtomicCounters = 0;
+
+   prog->MaxShaderStorageBlocks = 8;
 }
 
 
@@ -615,6 +601,12 @@
    consts->MaxUniformBlockSize = 16384;
    consts->UniformBufferOffsetAlignment = 1;
 
+   /** GL_ARB_shader_storage_buffer_object */
+   consts->MaxCombinedShaderStorageBlocks = 8;
+   consts->MaxShaderStorageBufferBindings = 8;
+   consts->MaxShaderStorageBlockSize = 128 * 1024 * 1024; /* 2^27 */
+   consts->ShaderStorageBufferOffsetAlignment = 256;
+
    /* GL_ARB_explicit_uniform_location, GL_MAX_UNIFORM_LOCATIONS */
    consts->MaxUserAssignableUniformLocations =
       4 * MESA_SHADER_STAGES * MAX_UNIFORMS;
@@ -724,6 +716,14 @@
 
    /** GL_KHR_context_flush_control */
    consts->ContextReleaseBehavior = GL_CONTEXT_RELEASE_BEHAVIOR_FLUSH;
+
+   /** GL_ARB_tessellation_shader */
+   consts->MaxTessGenLevel = MAX_TESS_GEN_LEVEL;
+   consts->MaxPatchVertices = MAX_PATCH_VERTICES;
+   consts->Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits = MAX_TEXTURE_IMAGE_UNITS;
+   consts->Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits = MAX_TEXTURE_IMAGE_UNITS;
+   consts->MaxTessPatchComponents = MAX_TESS_PATCH_COMPONENTS;
+   consts->MaxTessControlTotalOutputComponents = MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS;
 }
 
 
@@ -1169,9 +1169,7 @@
       ctx->HasConfig = GL_FALSE;
    }
 
-   if (_mesa_is_desktop_gl(ctx)) {
-      _mesa_override_gl_version(ctx);
-   }
+   _mesa_override_gl_version(ctx);
 
    /* misc one-time initializations */
    one_time_init(ctx);
@@ -1333,7 +1331,8 @@
    _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL);
    _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, NULL);
 
-   _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, NULL);
+   _mesa_reference_tesscprog(ctx, &ctx->TessCtrlProgram._Current, NULL);
+   _mesa_reference_tesseprog(ctx, &ctx->TessEvalProgram._Current, NULL);
    _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL);
 
    _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/context.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/context.h	2015-09-16 14:36:10.000000000 +0000
@@ -343,6 +343,26 @@
       (ctx->API == API_OPENGLES2 && ctx->Version >= 31);
 }
 
+/**
+ * Checks if the context supports shader subroutines.
+ */
+static inline bool
+_mesa_has_shader_subroutine(const struct gl_context *ctx)
+{
+   return ctx->API == API_OPENGL_CORE &&
+      (ctx->Version >= 40 || ctx->Extensions.ARB_shader_subroutine);
+}
+
+/**
+ * Checks if the context supports tessellation.
+ */
+static inline GLboolean
+_mesa_has_tessellation(const struct gl_context *ctx)
+{
+   return ctx->API == API_OPENGL_CORE &&
+          ctx->Extensions.ARB_tessellation_shader;
+}
+
 
 #ifdef __cplusplus
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/copyimage.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/copyimage.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/copyimage.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/copyimage.c	2015-09-16 14:36:10.000000000 +0000
@@ -40,14 +40,25 @@
    BLOCK_CLASS_64_BITS
 };
 
+/**
+ * Prepare the source or destination resource, including:
+ * - Error checking
+ * - Creating texture wrappers for renderbuffers
+ * \param name  the texture or renderbuffer name
+ * \param target  GL_TEXTURE target or GL_RENDERBUFFER.  For the later, will
+ *                be changed to a compatible GL_TEXTURE target.
+ * \param level  mipmap level
+ * \param tex_obj  returns a pointer to a texture object
+ * \param tex_image  returns a pointer to a texture image
+ * \param tmp_tex  returns temporary texture object name
+ * \return true if success, false if error
+ */
 static bool
 prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
                struct gl_texture_object **tex_obj,
                struct gl_texture_image **tex_image, GLuint *tmp_tex,
                const char *dbg_prefix)
 {
-   struct gl_renderbuffer *rb;
-
    if (name == 0) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glCopyImageSubData(%sName = %d)", dbg_prefix, name);
@@ -82,12 +93,12 @@
    default:
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glCopyImageSubData(%sTarget = %s)", dbg_prefix,
-                  _mesa_lookup_enum_by_nr(*target));
+                  _mesa_enum_to_string(*target));
       return false;
    }
 
    if (*target == GL_RENDERBUFFER) {
-      rb = _mesa_lookup_renderbuffer(ctx, name);
+      struct gl_renderbuffer *rb = _mesa_lookup_renderbuffer(ctx, name);
       if (!rb) {
          _mesa_error(ctx, GL_INVALID_VALUE,
                      "glCopyImageSubData(%sName = %u)", dbg_prefix, name);
@@ -148,7 +159,7 @@
       if ((*tex_obj)->Target != *target) {
          _mesa_error(ctx, GL_INVALID_ENUM,
                      "glCopyImageSubData(%sTarget = %s)", dbg_prefix,
-                     _mesa_lookup_enum_by_nr(*target));
+                     _mesa_enum_to_string(*target));
          return false;
       }
 
@@ -169,8 +180,15 @@
    return true;
 }
 
+
+/**
+ * Check that the x,y,z,width,height,region is within the texture image
+ * dimensions.
+ * \return true if bounds OK, false if regions is out of bounds
+ */
 static bool
-check_region_bounds(struct gl_context *ctx, struct gl_texture_image *tex_image,
+check_region_bounds(struct gl_context *ctx,
+                    const struct gl_texture_image *tex_image,
                     int x, int y, int z, int width, int height, int depth,
                     const char *dbg_prefix)
 {
@@ -188,6 +206,7 @@
       return false;
    }
 
+   /* Check X direction */
    if (x + width > tex_image->Width) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glCopyImageSubData(%sX or %sWidth exceeds image bounds)",
@@ -195,6 +214,7 @@
       return false;
    }
 
+   /* Check Y direction */
    switch (tex_image->TexObject->Target) {
    case GL_TEXTURE_1D:
    case GL_TEXTURE_1D_ARRAY:
@@ -215,6 +235,7 @@
       break;
    }
 
+   /* Check Z direction */
    switch (tex_image->TexObject->Target) {
    case GL_TEXTURE_1D:
    case GL_TEXTURE_2D:
@@ -260,7 +281,7 @@
 }
 
 static bool
-compressed_format_compatible(struct gl_context *ctx,
+compressed_format_compatible(const struct gl_context *ctx,
                              GLenum compressedFormat, GLenum otherFormat)
 {
    enum mesa_block_class compressedClass, otherClass;
@@ -348,8 +369,8 @@
 }
 
 static bool
-copy_format_compatible(struct gl_context *ctx,
-                                GLenum srcFormat, GLenum dstFormat)
+copy_format_compatible(const struct gl_context *ctx,
+                       GLenum srcFormat, GLenum dstFormat)
 {
    /*
     * From ARB_copy_image spec:
@@ -389,15 +410,15 @@
    struct gl_texture_object *srcTexObj, *dstTexObj;
    struct gl_texture_image *srcTexImage, *dstTexImage;
    GLuint src_bw, src_bh, dst_bw, dst_bh;
-   int i, srcNewZ, dstNewZ;
+   int i;
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glCopyImageSubData(%u, %s, %d, %d, %d, %d, "
                                           "%u, %s, %d, %d, %d, %d, "
                                           "%d, %d, %d)\n",
-                  srcName, _mesa_lookup_enum_by_nr(srcTarget), srcLevel,
+                  srcName, _mesa_enum_to_string(srcTarget), srcLevel,
                   srcX, srcY, srcZ,
-                  dstName, _mesa_lookup_enum_by_nr(dstTarget), dstLevel,
+                  dstName, _mesa_enum_to_string(dstTarget), dstLevel,
                   dstX, dstY, dstZ,
                   srcWidth, srcHeight, srcWidth);
 
@@ -447,6 +468,8 @@
    }
 
    for (i = 0; i < srcDepth; ++i) {
+      int srcNewZ, dstNewZ;
+
       if (srcTexObj->Target == GL_TEXTURE_CUBE_MAP) {
          srcTexImage = srcTexObj->Image[i + srcZ][srcLevel];
          srcNewZ = 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/dd.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/dd.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/dd.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/dd.h	2015-09-16 14:36:10.000000000 +0000
@@ -232,11 +232,13 @@
 
 
    /**
-    * Called by glGetTexImage().
+    * Called by glGetTexImage(), glGetTextureSubImage().
     */
-   void (*GetTexImage)( struct gl_context *ctx,
-                        GLenum format, GLenum type, GLvoid *pixels,
-                        struct gl_texture_image *texImage );
+   void (*GetTexSubImage)(struct gl_context *ctx,
+                          GLint xoffset, GLint yoffset, GLint zoffset,
+                          GLsizei width, GLsizei height, GLsizei depth,
+                          GLenum format, GLenum type, GLvoid *pixels,
+                          struct gl_texture_image *texImage);
 
    /**
     * Called by glClearTex[Sub]Image
@@ -326,16 +328,19 @@
    void (*CompressedTexSubImage)(struct gl_context *ctx, GLuint dims,
                                  struct gl_texture_image *texImage,
                                  GLint xoffset, GLint yoffset, GLint zoffset,
-                                 GLsizei width, GLint height, GLint depth,
+                                 GLsizei width, GLsizei height, GLsizei depth,
                                  GLenum format,
                                  GLsizei imageSize, const GLvoid *data);
 
    /**
     * Called by glGetCompressedTexImage.
     */
-   void (*GetCompressedTexImage)(struct gl_context *ctx,
-                                 struct gl_texture_image *texImage,
-                                 GLvoid *data);
+   void (*GetCompressedTexSubImage)(struct gl_context *ctx,
+                                    struct gl_texture_image *texImage,
+                                    GLint xoffset, GLint yoffset,
+                                    GLint zoffset, GLsizei width,
+                                    GLsizei height, GLsizei depth,
+                                    GLvoid *data);
    /*@}*/
 
    /**
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/debug.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/debug.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/debug.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/debug.c	2015-09-16 14:36:10.000000000 +0000
@@ -272,7 +272,9 @@
       store = ctx->Pack; /* save */
       ctx->Pack = ctx->DefaultPacking;
 
-      ctx->Driver.GetTexImage(ctx, GL_RGBA, GL_UNSIGNED_BYTE, buffer, img);
+      ctx->Driver.GetTexSubImage(ctx,
+                                 0, 0, 0, img->Width, img->Height, img->Depth,
+                                 GL_RGBA, GL_UNSIGNED_BYTE, buffer, img);
 
       /* make filename */
       _mesa_snprintf(s, sizeof(s), "/tmp/tex%u.l%u.f%u.ppm", texObj->Name, level, face);
@@ -411,7 +413,7 @@
 {
    printf("Renderbuffer %u: %u x %u  IntFormat = %s\n",
 	  rb->Name, rb->Width, rb->Height,
-	  _mesa_lookup_enum_by_nr(rb->InternalFormat));
+	  _mesa_enum_to_string(rb->InternalFormat));
    if (writeImage) {
       _mesa_write_renderbuffer_image(rb);
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/depth.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/depth.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/depth.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/depth.c	2015-09-16 14:36:10.000000000 +0000
@@ -63,7 +63,10 @@
    GET_CURRENT_CONTEXT(ctx);
 
    if (MESA_VERBOSE & VERBOSE_API)
-      _mesa_debug(ctx, "glDepthFunc %s\n", _mesa_lookup_enum_by_nr(func));
+      _mesa_debug(ctx, "glDepthFunc %s\n", _mesa_enum_to_string(func));
+
+   if (ctx->Depth.Func == func)
+      return;
 
    switch (func) {
    case GL_LESS:    /* (default) pass if incoming z < stored z */
@@ -80,9 +83,6 @@
       return;
    }
 
-   if (ctx->Depth.Func == func)
-      return;
-
    FLUSH_VERTICES(ctx, _NEW_DEPTH);
    ctx->Depth.Func = func;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/dlist.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/dlist.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/dlist.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/dlist.c	2015-09-16 14:36:10.000000000 +0000
@@ -9000,7 +9000,7 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glNewList %u %s\n", name,
-                  _mesa_lookup_enum_by_nr(mode));
+                  _mesa_enum_to_string(mode));
 
    if (name == 0) {
       _mesa_error(ctx, GL_INVALID_VALUE, "glNewList");
@@ -9688,7 +9688,7 @@
 static const char *
 enum_string(GLenum k)
 {
-   return _mesa_lookup_enum_by_nr(k);
+   return _mesa_enum_to_string(k);
 }
 
 
@@ -9827,19 +9827,19 @@
             break;
          case OPCODE_BIND_TEXTURE:
             fprintf(f, "BindTexture %s %d\n",
-                         _mesa_lookup_enum_by_nr(n[1].ui), n[2].ui);
+                         _mesa_enum_to_string(n[1].ui), n[2].ui);
             break;
          case OPCODE_SHADE_MODEL:
-            fprintf(f, "ShadeModel %s\n", _mesa_lookup_enum_by_nr(n[1].ui));
+            fprintf(f, "ShadeModel %s\n", _mesa_enum_to_string(n[1].ui));
             break;
          case OPCODE_MAP1:
             fprintf(f, "Map1 %s %.3f %.3f %d %d\n",
-                         _mesa_lookup_enum_by_nr(n[1].ui),
+                         _mesa_enum_to_string(n[1].ui),
                          n[2].f, n[3].f, n[4].i, n[5].i);
             break;
          case OPCODE_MAP2:
             fprintf(f, "Map2 %s %.3f %.3f %.3f %.3f %d %d %d %d\n",
-                         _mesa_lookup_enum_by_nr(n[1].ui),
+                         _mesa_enum_to_string(n[1].ui),
                          n[2].f, n[3].f, n[4].f, n[5].f,
                          n[6].i, n[7].i, n[8].i, n[9].i);
             break;
@@ -9918,7 +9918,7 @@
 
          case OPCODE_PROVOKING_VERTEX:
             fprintf(f, "ProvokingVertex %s\n",
-                         _mesa_lookup_enum_by_nr(n[1].ui));
+                         _mesa_enum_to_string(n[1].ui));
             break;
 
             /*
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/drawpix.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/drawpix.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/drawpix.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/drawpix.c	2015-09-16 14:36:10.000000000 +0000
@@ -53,10 +53,10 @@
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glDrawPixels(%d, %d, %s, %s, %p) // to %s at %d, %d\n",
                   width, height,
-                  _mesa_lookup_enum_by_nr(format),
-                  _mesa_lookup_enum_by_nr(type),
+                  _mesa_enum_to_string(format),
+                  _mesa_enum_to_string(type),
                   pixels,
-                  _mesa_lookup_enum_by_nr(ctx->DrawBuffer->ColorDrawBuffer[0]),
+                  _mesa_enum_to_string(ctx->DrawBuffer->ColorDrawBuffer[0]),
                   IROUND(ctx->Current.RasterPos[0]),
                   IROUND(ctx->Current.RasterPos[1]));
 
@@ -96,8 +96,8 @@
    err = _mesa_error_check_format_and_type(ctx, format, type);
    if (err != GL_NO_ERROR) {
       _mesa_error(ctx, err, "glDrawPixels(invalid format %s and/or type %s)",
-                  _mesa_lookup_enum_by_nr(format),
-                  _mesa_lookup_enum_by_nr(type));
+                  _mesa_enum_to_string(format),
+                  _mesa_enum_to_string(type));
       goto end;
    }
 
@@ -198,9 +198,9 @@
       _mesa_debug(ctx,
                   "glCopyPixels(%d, %d, %d, %d, %s) // from %s to %s at %d, %d\n",
                   srcx, srcy, width, height,
-                  _mesa_lookup_enum_by_nr(type),
-                  _mesa_lookup_enum_by_nr(ctx->ReadBuffer->ColorReadBuffer),
-                  _mesa_lookup_enum_by_nr(ctx->DrawBuffer->ColorDrawBuffer[0]),
+                  _mesa_enum_to_string(type),
+                  _mesa_enum_to_string(ctx->ReadBuffer->ColorReadBuffer),
+                  _mesa_enum_to_string(ctx->DrawBuffer->ColorDrawBuffer[0]),
                   IROUND(ctx->Current.RasterPos[0]),
                   IROUND(ctx->Current.RasterPos[1]));
 
@@ -218,7 +218,7 @@
        type != GL_STENCIL &&
        type != GL_DEPTH_STENCIL) {
       _mesa_error(ctx, GL_INVALID_ENUM, "glCopyPixels(type=%s)",
-                  _mesa_lookup_enum_by_nr(type));
+                  _mesa_enum_to_string(type));
       return;
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/enable.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/enable.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/enable.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/enable.c	2015-09-16 14:36:10.000000000 +0000
@@ -146,7 +146,7 @@
 
 invalid_enum_error:
    _mesa_error(ctx, GL_INVALID_ENUM, "gl%sClientState(%s)",
-               state ? "Enable" : "Disable", _mesa_lookup_enum_by_nr(cap));
+               state ? "Enable" : "Disable", _mesa_enum_to_string(cap));
 }
 
 
@@ -283,7 +283,7 @@
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "%s %s (newstate is %x)\n",
                   state ? "glEnable" : "glDisable",
-                  _mesa_lookup_enum_by_nr(cap),
+                  _mesa_enum_to_string(cap),
                   ctx->NewState);
 
    switch (cap) {
@@ -1001,7 +1001,7 @@
 
       /* ARB_texture_multisample */
       case GL_SAMPLE_MASK:
-         if (!_mesa_is_desktop_gl(ctx))
+         if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles31(ctx))
             goto invalid_enum_error;
          CHECK_EXTENSION(ARB_texture_multisample, cap);
          if (ctx->Multisample.SampleMask == state)
@@ -1022,7 +1022,7 @@
 
 invalid_enum_error:
    _mesa_error(ctx, GL_INVALID_ENUM, "gl%s(%s)",
-               state ? "Enable" : "Disable", _mesa_lookup_enum_by_nr(cap));
+               state ? "Enable" : "Disable", _mesa_enum_to_string(cap));
 }
 
 
@@ -1101,7 +1101,7 @@
 invalid_enum_error:
     _mesa_error(ctx, GL_INVALID_ENUM, "%s(cap=%s)",
                 state ? "glEnablei" : "glDisablei",
-                _mesa_lookup_enum_by_nr(cap));
+                _mesa_enum_to_string(cap));
 }
 
 
@@ -1143,7 +1143,7 @@
       return (ctx->Scissor.EnableFlags >> index) & 1;
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "glIsEnabledIndexed(cap=%s)",
-                  _mesa_lookup_enum_by_nr(cap));
+                  _mesa_enum_to_string(cap));
       return GL_FALSE;
    }
 }
@@ -1603,7 +1603,7 @@
 
       /* ARB_texture_multisample */
       case GL_SAMPLE_MASK:
-         if (!_mesa_is_desktop_gl(ctx))
+         if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles31(ctx))
             goto invalid_enum_error;
          CHECK_EXTENSION(ARB_texture_multisample);
          return ctx->Multisample.SampleMask;
@@ -1623,6 +1623,6 @@
 
 invalid_enum_error:
    _mesa_error(ctx, GL_INVALID_ENUM, "glIsEnabled(%s)",
-               _mesa_lookup_enum_by_nr(cap));
+               _mesa_enum_to_string(cap));
    return GL_FALSE;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/enums.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/enums.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/enums.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/enums.h	2015-09-16 14:36:10.000000000 +0000
@@ -42,7 +42,7 @@
 #endif
 
 
-extern const char *_mesa_lookup_enum_by_nr( int nr );
+extern const char *_mesa_enum_to_string( int nr );
 
 /* Get the name of an enum given that it is a primitive type.  Avoids
  * GL_FALSE/GL_POINTS ambiguity and others.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/errors.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/errors.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/errors.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/errors.c	2015-09-16 14:36:10.000000000 +0000
@@ -39,6 +39,7 @@
 #include "mtypes.h"
 #include "version.h"
 #include "util/hash_table.h"
+#include "util/simple_list.h"
 
 static mtx_t DynamicIDMutex = _MTX_INITIALIZER_NP;
 static GLuint NextDynamicID = 1;
@@ -1313,7 +1314,7 @@
    if (ctx->ErrorDebugCount) {
       _mesa_snprintf(s, MAX_DEBUG_MESSAGE_LENGTH, "%d similar %s errors", 
                      ctx->ErrorDebugCount,
-                     _mesa_lookup_enum_by_nr(ctx->ErrorValue));
+                     _mesa_enum_to_string(ctx->ErrorValue));
 
       output_if_debug("Mesa", s, GL_TRUE);
 
@@ -1412,6 +1413,26 @@
 
 
 void
+_mesa_gl_vdebug(struct gl_context *ctx,
+                GLuint *id,
+                enum mesa_debug_source source,
+                enum mesa_debug_type type,
+                enum mesa_debug_severity severity,
+                const char *fmtString,
+                va_list args)
+{
+   char s[MAX_DEBUG_MESSAGE_LENGTH];
+   int len;
+
+   debug_get_id(id);
+
+   len = _mesa_vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, fmtString, args);
+
+   log_msg(ctx, source, type, *id, severity, len, s);
+}
+
+
+void
 _mesa_gl_debug(struct gl_context *ctx,
                GLuint *id,
                enum mesa_debug_source source,
@@ -1419,17 +1440,10 @@
                enum mesa_debug_severity severity,
                const char *fmtString, ...)
 {
-   char s[MAX_DEBUG_MESSAGE_LENGTH];
-   int len;
    va_list args;
-
-   debug_get_id(id);
-
    va_start(args, fmtString);
-   len = _mesa_vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, fmtString, args);
+   _mesa_gl_vdebug(ctx, id, source, type, severity, fmtString, args);
    va_end(args);
-
-   log_msg(ctx, source, type, *id, severity, len, s);
 }
 
 
@@ -1489,7 +1503,7 @@
       }
 
       len = _mesa_snprintf(s2, MAX_DEBUG_MESSAGE_LENGTH, "%s in %s",
-                           _mesa_lookup_enum_by_nr(error), s);
+                           _mesa_enum_to_string(error), s);
       if (len >= MAX_DEBUG_MESSAGE_LENGTH) {
          /* Same as above. */
          assert(0);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/errors.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/errors.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/errors.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/errors.h	2015-09-16 14:36:10.000000000 +0000
@@ -37,6 +37,7 @@
 
 
 #include <stdio.h>
+#include <stdarg.h>
 #include "compiler.h"
 #include "glheader.h"
 #include "mtypes.h"
@@ -76,6 +77,15 @@
 _mesa_get_log_file(void);
 
 extern void
+_mesa_gl_vdebug(struct gl_context *ctx,
+                GLuint *id,
+                enum mesa_debug_source source,
+                enum mesa_debug_type type,
+                enum mesa_debug_severity severity,
+                const char *fmtString,
+                va_list args);
+
+extern void
 _mesa_gl_debug(struct gl_context *ctx,
                GLuint *id,
                enum mesa_debug_source source,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/extensions.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/extensions.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/extensions.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/extensions.c	2015-09-16 14:36:10.000000000 +0000
@@ -50,6 +50,7 @@
    ES1 = 1 << API_OPENGLES,
    ES2 = 1 << API_OPENGLES2,
    ES3 = 1 << (API_OPENGL_LAST + 1),
+   ES31 = 1 << (API_OPENGL_LAST + 2),
 };
 
 /**
@@ -117,9 +118,11 @@
    { "GL_ARB_fragment_program",                    o(ARB_fragment_program),                    GLL,            2002 },
    { "GL_ARB_fragment_program_shadow",             o(ARB_fragment_program_shadow),             GLL,            2003 },
    { "GL_ARB_fragment_shader",                     o(ARB_fragment_shader),                     GL,             2002 },
+   { "GL_ARB_framebuffer_no_attachments",          o(ARB_framebuffer_no_attachments),          GL,             2012 },
    { "GL_ARB_framebuffer_object",                  o(ARB_framebuffer_object),                  GL,             2005 },
    { "GL_ARB_framebuffer_sRGB",                    o(EXT_framebuffer_sRGB),                    GL,             1998 },
    { "GL_ARB_get_program_binary",                  o(dummy_true),                              GL,             2010 },
+   { "GL_ARB_get_texture_sub_image",               o(dummy_true),                              GL,             2014 },
    { "GL_ARB_gpu_shader5",                         o(ARB_gpu_shader5),                         GLC,            2010 },
    { "GL_ARB_gpu_shader_fp64",                     o(ARB_gpu_shader_fp64),                     GLC,            2010 },
    { "GL_ARB_half_float_pixel",                    o(dummy_true),                              GL,             2003 },
@@ -150,9 +153,12 @@
    { "GL_ARB_shader_atomic_counters",              o(ARB_shader_atomic_counters),              GL,             2011 },
    { "GL_ARB_shader_bit_encoding",                 o(ARB_shader_bit_encoding),                 GL,             2010 },
    { "GL_ARB_shader_image_load_store",             o(ARB_shader_image_load_store),             GL,             2011 },
+   { "GL_ARB_shader_image_size",                   o(ARB_shader_image_size),                   GL,             2012 },
    { "GL_ARB_shader_objects",                      o(dummy_true),                              GL,             2002 },
    { "GL_ARB_shader_precision",                    o(ARB_shader_precision),                    GL,             2010 },
    { "GL_ARB_shader_stencil_export",               o(ARB_shader_stencil_export),               GL,             2009 },
+   { "GL_ARB_shader_storage_buffer_object",        o(ARB_shader_storage_buffer_object),        GL,             2012 },
+   { "GL_ARB_shader_subroutine",                   o(ARB_shader_subroutine),                   GLC,            2010 },
    { "GL_ARB_shader_texture_lod",                  o(ARB_shader_texture_lod),                  GL,             2009 },
    { "GL_ARB_shading_language_100",                o(dummy_true),                              GLL,            2003 },
    { "GL_ARB_shading_language_packing",            o(ARB_shading_language_packing),            GL,             2011 },
@@ -381,6 +387,9 @@
    { "GL_NV_point_sprite",                         o(NV_point_sprite),                         GL,             2001 },
    { "GL_NV_primitive_restart",                    o(NV_primitive_restart),                    GLL,            2002 },
    { "GL_NV_read_buffer",                          o(dummy_true),                              ES2,            2011 },
+   { "GL_NV_read_depth",                           o(dummy_true),                              ES2,            2011 },
+   { "GL_NV_read_depth_stencil",                   o(dummy_true),                              ES2,            2011 },
+   { "GL_NV_read_stencil",                         o(dummy_true),                              ES2,            2011 },
    { "GL_NV_texgen_reflection",                    o(dummy_true),                              GLL,            1999 },
    { "GL_NV_texture_barrier",                      o(NV_texture_barrier),                      GL,             2009 },
    { "GL_NV_texture_env_combine4",                 o(NV_texture_env_combine4),                 GLL,            1999 },
@@ -766,6 +775,8 @@
    unsigned api_set = (1 << ctx->API);
    if (_mesa_is_gles3(ctx))
       api_set |= ES3;
+   if (_mesa_is_gles31(ctx))
+      api_set |= ES31;
 
    /* Check if the MESA_EXTENSION_MAX_YEAR env var is set */
    {
@@ -847,6 +858,8 @@
    unsigned api_set = (1 << ctx->API);
    if (_mesa_is_gles3(ctx))
       api_set |= ES3;
+   if (_mesa_is_gles31(ctx))
+      api_set |= ES31;
 
    /* only count once */
    if (ctx->Extensions.Count != 0)
@@ -873,6 +886,8 @@
    unsigned api_set = (1 << ctx->API);
    if (_mesa_is_gles3(ctx))
       api_set |= ES3;
+   if (_mesa_is_gles31(ctx))
+      api_set |= ES31;
 
    base = (GLboolean*) &ctx->Extensions;
    n = 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/fbobject.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/fbobject.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/fbobject.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/fbobject.c	2015-09-16 14:36:10.000000000 +0000
@@ -957,6 +957,7 @@
    fb->Height = 0;
    fb->_AllColorBuffersFixedPoint = GL_TRUE;
    fb->_HasSNormOrFloatColorBuffer = GL_FALSE;
+   fb->_HasAttachments = true;
 
    /* Start at -2 to more easily loop over all attachment points.
     *  -2: depth buffer
@@ -1155,14 +1156,48 @@
       } else if (att_layer_count > max_layer_count) {
          max_layer_count = att_layer_count;
       }
+
+      /*
+       * The extension GL_ARB_framebuffer_no_attachments places additional
+       * requirement on each attachment. Those additional requirements are
+       * tighter that those of previous versions of GL. In interest of better
+       * compatibility, we will not enforce these restrictions. For the record
+       * those additional restrictions are quoted below:
+       *
+       * "The width and height of image are greater than zero and less than or
+       *  equal to the values of the implementation-dependent limits
+       *  MAX_FRAMEBUFFER_WIDTH and MAX_FRAMEBUFFER_HEIGHT, respectively."
+       *
+       * "If <image> is a three-dimensional texture or a one- or two-dimensional
+       *  array texture and the attachment is layered, the depth or layer count
+       *  of the texture is less than or equal to the implementation-dependent
+       *  limit MAX_FRAMEBUFFER_LAYERS."
+       *
+       * "If image has multiple samples, its sample count is less than or equal
+       *  to the value of the implementation-dependent limit
+       *  MAX_FRAMEBUFFER_SAMPLES."
+       *
+       * The same requirements are also in place for GL 4.5,
+       * Section 9.4.1 "Framebuffer Attachment Completeness", pg 310-311
+       */
    }
 
    fb->MaxNumLayers = max_layer_count;
 
    if (numImages == 0) {
-      fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT;
-      fbo_incomplete(ctx, "no attachments", -1);
-      return;
+      fb->_HasAttachments = false;
+
+      if (!ctx->Extensions.ARB_framebuffer_no_attachments) {
+         fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT;
+         fbo_incomplete(ctx, "no attachments", -1);
+         return;
+      }
+
+      if (fb->DefaultGeometry.Width == 0 || fb->DefaultGeometry.Height == 0) {
+         fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT;
+         fbo_incomplete(ctx, "no attachments and default width or height is 0", -1);
+         return;
+      }
    }
 
    if (_mesa_is_desktop_gl(ctx) && !ctx->Extensions.ARB_ES2_compatibility) {
@@ -1227,8 +1262,10 @@
        * renderbuffers/textures are different sizes, the framebuffer
        * width/height will be set to the smallest width/height.
        */
-      fb->Width = minWidth;
-      fb->Height = minHeight;
+      if (numImages != 0) {
+         fb->Width = minWidth;
+         fb->Height = minHeight;
+      }
 
       /* finally, update the visual info for the framebuffer */
       _mesa_update_framebuffer_visual(ctx, fb);
@@ -1334,6 +1371,131 @@
    bind_renderbuffer(target, renderbuffer, true);
 }
 
+static void
+framebuffer_parameteri(struct gl_context *ctx, struct gl_framebuffer *fb,
+                       GLenum pname, GLint param, const char *func)
+{
+   switch (pname) {
+   case GL_FRAMEBUFFER_DEFAULT_WIDTH:
+      if (param < 0 || param > ctx->Const.MaxFramebufferWidth)
+        _mesa_error(ctx, GL_INVALID_VALUE, "%s", func);
+      else
+         fb->DefaultGeometry.Width = param;
+      break;
+   case GL_FRAMEBUFFER_DEFAULT_HEIGHT:
+      if (param < 0 || param > ctx->Const.MaxFramebufferHeight)
+        _mesa_error(ctx, GL_INVALID_VALUE, "%s", func);
+      else
+         fb->DefaultGeometry.Height = param;
+      break;
+   case GL_FRAMEBUFFER_DEFAULT_LAYERS:
+      if (param < 0 || param > ctx->Const.MaxFramebufferLayers)
+        _mesa_error(ctx, GL_INVALID_VALUE, "%s", func);
+      else
+         fb->DefaultGeometry.Layers = param;
+      break;
+   case GL_FRAMEBUFFER_DEFAULT_SAMPLES:
+      if (param < 0 || param > ctx->Const.MaxFramebufferSamples)
+        _mesa_error(ctx, GL_INVALID_VALUE, "%s", func);
+      else
+        fb->DefaultGeometry.NumSamples = param;
+      break;
+   case GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS:
+      fb->DefaultGeometry.FixedSampleLocations = param;
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "%s(pname=0x%x)", func, pname);
+   }
+}
+
+void GLAPIENTRY
+_mesa_FramebufferParameteri(GLenum target, GLenum pname, GLint param)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_framebuffer *fb;
+
+   if (!ctx->Extensions.ARB_framebuffer_no_attachments) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glFramebufferParameteriv not supported "
+                  "(ARB_framebuffer_no_attachments not implemented)");
+      return;
+   }
+
+   fb = get_framebuffer_target(ctx, target);
+   if (!fb) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glFramebufferParameteri(target=0x%x)", target);
+      return;
+   }
+
+   /* check framebuffer binding */
+   if (_mesa_is_winsys_fbo(fb)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glFramebufferParameteri");
+      return;
+   }
+
+   framebuffer_parameteri(ctx, fb, pname, param, "glFramebufferParameteri");
+}
+
+static void
+get_framebuffer_parameteriv(struct gl_context *ctx, struct gl_framebuffer *fb,
+                            GLenum pname, GLint *params, const char *func)
+{
+   switch (pname) {
+   case GL_FRAMEBUFFER_DEFAULT_WIDTH:
+      *params = fb->DefaultGeometry.Width;
+      break;
+   case GL_FRAMEBUFFER_DEFAULT_HEIGHT:
+      *params = fb->DefaultGeometry.Height;
+      break;
+   case GL_FRAMEBUFFER_DEFAULT_LAYERS:
+      *params = fb->DefaultGeometry.Layers;
+      break;
+   case GL_FRAMEBUFFER_DEFAULT_SAMPLES:
+      *params = fb->DefaultGeometry.NumSamples;
+      break;
+   case GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS:
+      *params = fb->DefaultGeometry.FixedSampleLocations;
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "%s(pname=0x%x)", func, pname);
+   }
+}
+
+void GLAPIENTRY
+_mesa_GetFramebufferParameteriv(GLenum target, GLenum pname, GLint *params)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_framebuffer *fb;
+
+   if (!ctx->Extensions.ARB_framebuffer_no_attachments) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetFramebufferParameteriv not supported "
+                  "(ARB_framebuffer_no_attachments not implemented)");
+      return;
+   }
+
+   fb = get_framebuffer_target(ctx, target);
+   if (!fb) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetFramebufferParameteriv(target=0x%x)", target);
+      return;
+   }
+
+   /* check framebuffer binding */
+   if (_mesa_is_winsys_fbo(fb)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetFramebufferParameteriv");
+      return;
+   }
+
+   get_framebuffer_parameteriv(ctx, fb, pname, params,
+                               "glGetFramebufferParameteriv");
+}
+
 
 /**
  * Remove the specified renderbuffer or texture from any attachment point in
@@ -1845,7 +2007,7 @@
    baseFormat = _mesa_base_fbo_format(ctx, internalFormat);
    if (baseFormat == 0) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(internalFormat=%s)",
-                  func, _mesa_lookup_enum_by_nr(internalFormat));
+                  func, _mesa_enum_to_string(internalFormat));
       return;
    }
 
@@ -1871,6 +2033,16 @@
        */
       sample_count_error = _mesa_check_sample_count(ctx, GL_RENDERBUFFER,
             internalFormat, samples);
+
+      /* Section 2.5 (GL Errors) of OpenGL 3.0 specification, page 16:
+       *
+       * "If a negative number is provided where an argument of type sizei or
+       * sizeiptr is specified, the error INVALID VALUE is generated."
+       */
+      if (samples < 0) {
+         sample_count_error = GL_INVALID_VALUE;
+      }
+
       if (sample_count_error != GL_NO_ERROR) {
          _mesa_error(ctx, sample_count_error, "%s(samples)", func);
          return;
@@ -1933,12 +2105,12 @@
       if (samples == NO_SAMPLES)
          _mesa_debug(ctx, "%s(%u, %s, %d, %d)\n",
                      func, renderbuffer,
-                     _mesa_lookup_enum_by_nr(internalFormat),
+                     _mesa_enum_to_string(internalFormat),
                      width, height);
       else
          _mesa_debug(ctx, "%s(%u, %s, %d, %d, %d)\n",
                      func, renderbuffer,
-                     _mesa_lookup_enum_by_nr(internalFormat),
+                     _mesa_enum_to_string(internalFormat),
                      width, height, samples);
    }
 
@@ -1969,14 +2141,14 @@
       if (samples == NO_SAMPLES)
          _mesa_debug(ctx, "%s(%s, %s, %d, %d)\n",
                      func,
-                     _mesa_lookup_enum_by_nr(target),
-                     _mesa_lookup_enum_by_nr(internalFormat),
+                     _mesa_enum_to_string(target),
+                     _mesa_enum_to_string(internalFormat),
                      width, height);
       else
          _mesa_debug(ctx, "%s(%s, %s, %d, %d, %d)\n",
                      func,
-                     _mesa_lookup_enum_by_nr(target),
-                     _mesa_lookup_enum_by_nr(internalFormat),
+                     _mesa_enum_to_string(target),
+                     _mesa_enum_to_string(internalFormat),
                      width, height, samples);
    }
 
@@ -2149,7 +2321,7 @@
       /* fallthrough */
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid pname=%s)", func,
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
       return;
    }
 }
@@ -2532,13 +2704,13 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glCheckFramebufferStatus(%s)\n",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
 
    fb = get_framebuffer_target(ctx, target);
    if (!fb) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glCheckFramebufferStatus(invalid target %s)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return 0;
    }
 
@@ -2570,7 +2742,7 @@
       default:
          _mesa_error(ctx, GL_INVALID_ENUM,
                      "glCheckNamedFramebufferStatus(invalid target %s)",
-                     _mesa_lookup_enum_by_nr(target));
+                     _mesa_enum_to_string(target));
          return 0;
    }
 
@@ -2689,7 +2861,7 @@
 
    _mesa_error(ctx, GL_INVALID_OPERATION,
                "%s(invalid texture target %s)", caller,
-               _mesa_lookup_enum_by_nr(target));
+               _mesa_enum_to_string(target));
    return false;
 }
 
@@ -2731,7 +2903,7 @@
 
    _mesa_error(ctx, GL_INVALID_OPERATION,
                "%s(invalid texture target %s)", caller,
-               _mesa_lookup_enum_by_nr(target));
+               _mesa_enum_to_string(target));
    return false;
 }
 
@@ -2782,8 +2954,9 @@
          break;
       case GL_TEXTURE_2D_MULTISAMPLE:
       case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-         err = _mesa_is_gles(ctx)
-               || !ctx->Extensions.ARB_texture_multisample;
+         err = (_mesa_is_gles(ctx) ||
+                !ctx->Extensions.ARB_texture_multisample) &&
+               !_mesa_is_gles31(ctx);
          break;
       default:
          err = true;
@@ -2800,7 +2973,7 @@
    if (err) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "%s(invalid textarget %s)",
-                  caller, _mesa_lookup_enum_by_nr(textarget));
+                  caller, _mesa_enum_to_string(textarget));
       return false;
    }
 
@@ -2912,7 +3085,7 @@
    att = get_attachment(ctx, fb, attachment);
    if (att == NULL) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller,
-                  _mesa_lookup_enum_by_nr(attachment));
+                  _mesa_enum_to_string(attachment));
       return;
    }
 
@@ -2995,7 +3168,7 @@
    fb = get_framebuffer_target(ctx, target);
    if (!fb) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", caller,
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -3063,7 +3236,7 @@
    if (!fb) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glFramebufferTextureLayer(invalid target %s)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -3142,7 +3315,7 @@
    GET_CURRENT_CONTEXT(ctx);
    struct gl_framebuffer *fb;
    struct gl_texture_object *texObj;
-   GLboolean layered;
+   GLboolean layered = GL_FALSE;
 
    const char *func = "FramebufferTexture";
 
@@ -3157,7 +3330,7 @@
    if (!fb) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glFramebufferTexture(invalid target %s)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -3185,7 +3358,7 @@
    GET_CURRENT_CONTEXT(ctx);
    struct gl_framebuffer *fb;
    struct gl_texture_object *texObj;
-   GLboolean layered;
+   GLboolean layered = GL_FALSE;
 
    const char *func = "glNamedFramebufferTexture";
 
@@ -3238,7 +3411,7 @@
    if (att == NULL) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "%s(invalid attachment %s)", func,
-                  _mesa_lookup_enum_by_nr(attachment));
+                  _mesa_enum_to_string(attachment));
       return;
    }
 
@@ -3278,7 +3451,7 @@
    if (!fb) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glFramebufferRenderbuffer(invalid target %s)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -3377,7 +3550,7 @@
           attachment != GL_DEPTH && attachment != GL_STENCIL) {
          _mesa_error(ctx, GL_INVALID_ENUM,
                      "%s(invalid attachment %s)", caller,
-                     _mesa_lookup_enum_by_nr(attachment));
+                     _mesa_enum_to_string(attachment));
          return;
       }
       /* the default / window-system FBO */
@@ -3390,7 +3563,7 @@
 
    if (att == NULL) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller,
-                  _mesa_lookup_enum_by_nr(attachment));
+                  _mesa_enum_to_string(attachment));
       return;
    }
 
@@ -3447,7 +3620,7 @@
       }
       else if (att->Type == GL_NONE) {
          _mesa_error(ctx, err, "%s(invalid pname %s)", caller,
-                     _mesa_lookup_enum_by_nr(pname));
+                     _mesa_enum_to_string(pname));
       }
       else {
          goto invalid_pname_enum;
@@ -3464,7 +3637,7 @@
       }
       else if (att->Type == GL_NONE) {
          _mesa_error(ctx, err, "%s(invalid pname %s)", caller,
-                     _mesa_lookup_enum_by_nr(pname));
+                     _mesa_enum_to_string(pname));
       }
       else {
          goto invalid_pname_enum;
@@ -3475,7 +3648,7 @@
          goto invalid_pname_enum;
       } else if (att->Type == GL_NONE) {
          _mesa_error(ctx, err, "%s(invalid pname %s)", caller,
-                     _mesa_lookup_enum_by_nr(pname));
+                     _mesa_enum_to_string(pname));
       } else if (att->Type == GL_TEXTURE) {
          if (att->Texture && (att->Texture->Target == GL_TEXTURE_3D ||
              att->Texture->Target == GL_TEXTURE_2D_ARRAY)) {
@@ -3497,7 +3670,7 @@
       }
       else if (att->Type == GL_NONE) {
          _mesa_error(ctx, err, "%s(invalid pname %s)", caller,
-                     _mesa_lookup_enum_by_nr(pname));
+                     _mesa_enum_to_string(pname));
       }
       else {
          if (ctx->Extensions.EXT_framebuffer_sRGB) {
@@ -3520,7 +3693,7 @@
       }
       else if (att->Type == GL_NONE) {
          _mesa_error(ctx, err, "%s(invalid pname %s)", caller,
-                     _mesa_lookup_enum_by_nr(pname));
+                     _mesa_enum_to_string(pname));
       }
       else {
          mesa_format format = att->Renderbuffer->Format;
@@ -3572,7 +3745,7 @@
       }
       else if (att->Type == GL_NONE) {
          _mesa_error(ctx, err, "%s(invalid pname %s)", caller,
-                     _mesa_lookup_enum_by_nr(pname));
+                     _mesa_enum_to_string(pname));
       }
       else if (att->Texture) {
          const struct gl_texture_image *texImage =
@@ -3601,7 +3774,7 @@
          *params = att->Layered;
       } else if (att->Type == GL_NONE) {
          _mesa_error(ctx, err, "%s(invalid pname %s)", caller,
-                     _mesa_lookup_enum_by_nr(pname));
+                     _mesa_enum_to_string(pname));
       } else {
          goto invalid_pname_enum;
       }
@@ -3614,7 +3787,7 @@
 
 invalid_pname_enum:
    _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid pname %s)", caller,
-               _mesa_lookup_enum_by_nr(pname));
+               _mesa_enum_to_string(pname));
    return;
 }
 
@@ -3630,7 +3803,7 @@
    if (!buffer) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glGetFramebufferAttachmentParameteriv(invalid target %s)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -3675,14 +3848,22 @@
                                  GLint param)
 {
    GET_CURRENT_CONTEXT(ctx);
+   struct gl_framebuffer *fb = NULL;
 
-   (void) framebuffer;
-   (void) pname;
-   (void) param;
+   if (!ctx->Extensions.ARB_framebuffer_no_attachments) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glNamedFramebufferParameteri("
+                  "ARB_framebuffer_no_attachments not implemented)");
+      return;
+   }
 
-   _mesa_error(ctx, GL_INVALID_OPERATION,
-               "glNamedFramebufferParameteri not supported "
-               "(ARB_framebuffer_no_attachments not implemented)");
+   fb = _mesa_lookup_framebuffer_err(ctx, framebuffer,
+                                     "glNamedFramebufferParameteri");
+
+   if (fb) {
+      framebuffer_parameteri(ctx, fb, pname, param,
+                             "glNamedFramebufferParameteriv");
+   }
 }
 
 
@@ -3691,14 +3872,26 @@
                                      GLint *param)
 {
    GET_CURRENT_CONTEXT(ctx);
+   struct gl_framebuffer *fb;
+
+   if (!ctx->Extensions.ARB_framebuffer_no_attachments) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glNamedFramebufferParameteriv("
+                  "ARB_framebuffer_no_attachments not implemented)");
+      return;
+   }
 
-   (void) framebuffer;
-   (void) pname;
-   (void) param;
+   if (framebuffer) {
+      fb = _mesa_lookup_framebuffer_err(ctx, framebuffer,
+                                        "glGetNamedFramebufferParameteriv");
+   } else {
+      fb = ctx->WinSysDrawBuffer;
+   }
 
-   _mesa_error(ctx, GL_INVALID_OPERATION,
-               "glGetNamedFramebufferParameteriv not supported "
-               "(ARB_framebuffer_no_attachments not implemented)");
+   if (fb) {
+      get_framebuffer_parameteriv(ctx, fb, pname, param,
+                                  "glGetNamedFramebufferParameteriv");
+   }
 }
 
 
@@ -3827,7 +4020,7 @@
 
 invalid_enum:
    _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", name,
-               _mesa_lookup_enum_by_nr(attachments[i]));
+               _mesa_enum_to_string(attachments[i]));
    return;
 }
 
@@ -3844,7 +4037,7 @@
    if (!fb) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glInvalidateSubFramebuffer(invalid target %s)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -3894,7 +4087,7 @@
    if (!fb) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glInvalidateFramebuffer(invalid target %s)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -3970,7 +4163,7 @@
    if (!fb) {
       _mesa_error(ctx, GL_INVALID_ENUM,
          "glDiscardFramebufferEXT(target %s)",
-         _mesa_lookup_enum_by_nr(target));
+         _mesa_enum_to_string(target));
       return;
    }
 
@@ -4007,5 +4200,5 @@
 invalid_enum:
    _mesa_error(ctx, GL_INVALID_ENUM,
                "glDiscardFramebufferEXT(attachment %s)",
-              _mesa_lookup_enum_by_nr(attachments[i]));
+              _mesa_enum_to_string(attachments[i]));
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/fbobject.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/fbobject.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/fbobject.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/fbobject.h	2015-09-16 14:36:10.000000000 +0000
@@ -288,4 +288,10 @@
 _mesa_DiscardFramebufferEXT(GLenum target, GLsizei numAttachments,
                             const GLenum *attachments);
 
+extern void GLAPIENTRY
+_mesa_FramebufferParameteri(GLenum target, GLenum pname, GLint param);
+
+extern void GLAPIENTRY
+_mesa_GetFramebufferParameteriv(GLenum target, GLenum pname, GLint *params);
+
 #endif /* FBOBJECT_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/feedback.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/feedback.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/feedback.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/feedback.c	2015-09-16 14:36:10.000000000 +0000
@@ -415,7 +415,7 @@
    ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
 
    if (MESA_VERBOSE & VERBOSE_API)
-      _mesa_debug(ctx, "glRenderMode %s\n", _mesa_lookup_enum_by_nr(mode));
+      _mesa_debug(ctx, "glRenderMode %s\n", _mesa_enum_to_string(mode));
 
    FLUSH_VERTICES(ctx, _NEW_RENDERMODE);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/ffvertex_prog.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/ffvertex_prog.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/ffvertex_prog.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/ffvertex_prog.c	2015-09-16 14:36:10.000000000 +0000
@@ -189,15 +189,15 @@
 	 if (light->Enabled) {
 	    key->unit[i].light_enabled = 1;
 
-	    if (light->EyePosition[3] == 0.0)
+	    if (light->EyePosition[3] == 0.0F)
 	       key->unit[i].light_eyepos3_is_zero = 1;
 
-	    if (light->SpotCutoff == 180.0)
+	    if (light->SpotCutoff == 180.0F)
 	       key->unit[i].light_spotcutoff_is_180 = 1;
 
-	    if (light->ConstantAttenuation != 1.0 ||
-		light->LinearAttenuation != 0.0 ||
-		light->QuadraticAttenuation != 0.0)
+	    if (light->ConstantAttenuation != 1.0F ||
+		light->LinearAttenuation != 0.0F ||
+		light->QuadraticAttenuation != 0.0F)
 	       key->unit[i].light_attenuated = 1;
 	 }
       }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/fog.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/fog.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/fog.c	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/fog.c	2015-09-16 14:36:10.000000000 +0000
@@ -115,7 +115,7 @@
 	 ctx->Fog.Mode = m;
 	 break;
       case GL_FOG_DENSITY:
-	 if (*params<0.0) {
+	 if (*params<0.0F) {
 	    _mesa_error( ctx, GL_INVALID_VALUE, "glFog" );
             return;
 	 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/format_info.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/format_info.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/format_info.py	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/format_info.py	2015-09-16 14:36:10.000000000 +0000
@@ -98,14 +98,6 @@
    else:
       assert False
 
-def get_mesa_layout(fmat):
-   if fmat.layout == 'array':
-      return 'MESA_FORMAT_LAYOUT_ARRAY'
-   elif fmat.layout == 'packed':
-      return 'MESA_FORMAT_LAYOUT_PACKED'
-   else:
-      return 'MESA_FORMAT_LAYOUT_OTHER'
-
 def get_channel_bits(fmat, chan_name):
    if fmat.is_compressed():
       # These values are pretty-much bogus, but OpenGL requires that we
@@ -179,7 +171,7 @@
    print '   {'
    print '      {0},'.format(fmat.name)
    print '      "{0}",'.format(fmat.name)
-   print '      {0},'.format(get_mesa_layout(fmat))
+   print '      {0},'.format('MESA_FORMAT_LAYOUT_' + fmat.layout.upper())
    print '      {0},'.format(get_gl_base_format(fmat))
    print '      {0},'.format(get_gl_data_type(fmat))
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/format_parser.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/format_parser.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/format_parser.py	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/format_parser.py	2015-09-16 14:36:10.000000000 +0000
@@ -40,9 +40,6 @@
 YUV = 'yuv'
 ZS = 'zs'
 
-def is_power_of_two(x):
-   return not bool(x & (x - 1))
-
 VERY_LARGE = 99999999999999999999999
 
 class Channel:
@@ -100,10 +97,6 @@
       else:
          return 1
 
-   def is_power_of_two(self):
-      """Returns true if the size of this channel is a power of two."""
-      return is_power_of_two(self.size)
-
    def datatype(self):
       """Returns the datatype corresponding to a channel type and size"""
       return _get_datatype(self.type, self.size)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/formatquery.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/formatquery.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/formatquery.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/formatquery.c	2015-09-16 14:36:10.000000000 +0000
@@ -74,13 +74,15 @@
    case GL_TEXTURE_2D_MULTISAMPLE:
    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
       /* These enums are only valid if ARB_texture_multisample is supported */
-      if (_mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample)
+      if ((_mesa_is_desktop_gl(ctx) &&
+           ctx->Extensions.ARB_texture_multisample) ||
+          _mesa_is_gles31(ctx))
          break;
 
    default:
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glGetInternalformativ(target=%s)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -107,7 +109,7 @@
        _mesa_base_fbo_format(ctx, internalformat) == 0) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glGetInternalformativ(internalformat=%s)",
-                  _mesa_lookup_enum_by_nr(internalformat));
+                  _mesa_enum_to_string(internalformat));
       return;
    }
 
@@ -119,7 +121,7 @@
    if (bufSize < 0) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glGetInternalformativ(target=%s)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -168,7 +170,7 @@
    default:
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glGetInternalformativ(pname=%s)",
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
       return;
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/formats.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/formats.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/formats.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/formats.c	2015-09-16 14:36:10.000000000 +0000
@@ -188,6 +188,12 @@
  * The return value will be one of:
  *    MESA_FORMAT_LAYOUT_ARRAY
  *    MESA_FORMAT_LAYOUT_PACKED
+ *    MESA_FORMAT_LAYOUT_S3TC
+ *    MESA_FORMAT_LAYOUT_RGTC
+ *    MESA_FORMAT_LAYOUT_FXT1
+ *    MESA_FORMAT_LAYOUT_ETC1
+ *    MESA_FORMAT_LAYOUT_ETC2
+ *    MESA_FORMAT_LAYOUT_BPTC
  *    MESA_FORMAT_LAYOUT_OTHER
  */
 extern enum mesa_format_layout
@@ -406,6 +412,11 @@
    format_array_format_table = _mesa_hash_table_create(NULL, NULL,
                                                        array_formats_equal);
 
+   if (!format_array_format_table) {
+      _mesa_error_no_memory(__func__);
+      return;
+   }
+
    for (f = 1; f < MESA_FORMAT_COUNT; ++f) {
       info = _mesa_get_format_info(f);
       if (!info->ArrayFormat)
@@ -441,6 +452,12 @@
 
    call_once(&format_array_format_table_exists, format_array_format_table_init);
 
+   if (!format_array_format_table) {
+      static const once_flag once_flag_init = ONCE_FLAG_INIT;
+      format_array_format_table_exists = once_flag_init;
+      return MESA_FORMAT_NONE;
+   }
+
    entry = _mesa_hash_table_search_pre_hashed(format_array_format_table,
                                               array_format,
                                               (void *)(intptr_t)array_format);
@@ -996,6 +1013,10 @@
    case MESA_FORMAT_R8G8B8X8_UNORM:
    case MESA_FORMAT_B8G8R8X8_UNORM:
    case MESA_FORMAT_X8R8G8B8_UNORM:
+   case MESA_FORMAT_A8B8G8R8_UINT:
+   case MESA_FORMAT_R8G8B8A8_UINT:
+   case MESA_FORMAT_B8G8R8A8_UINT:
+   case MESA_FORMAT_A8R8G8B8_UINT:
       *datatype = GL_UNSIGNED_BYTE;
       *comps = 4;
       return;
@@ -1006,6 +1027,8 @@
       return;
    case MESA_FORMAT_B5G6R5_UNORM:
    case MESA_FORMAT_R5G6B5_UNORM:
+   case MESA_FORMAT_B5G6R5_UINT:
+   case MESA_FORMAT_R5G6B5_UINT:
       *datatype = GL_UNSIGNED_SHORT_5_6_5;
       *comps = 3;
       return;
@@ -1013,6 +1036,8 @@
    case MESA_FORMAT_B4G4R4A4_UNORM:
    case MESA_FORMAT_A4R4G4B4_UNORM:
    case MESA_FORMAT_B4G4R4X4_UNORM:
+   case MESA_FORMAT_B4G4R4A4_UINT:
+   case MESA_FORMAT_A4R4G4B4_UINT:
       *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
       *comps = 4;
       return;
@@ -1020,6 +1045,8 @@
    case MESA_FORMAT_B5G5R5A1_UNORM:
    case MESA_FORMAT_A1R5G5B5_UNORM:
    case MESA_FORMAT_B5G5R5X1_UNORM:
+   case MESA_FORMAT_B5G5R5A1_UINT:
+   case MESA_FORMAT_A1R5G5B5_UINT:
       *datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV;
       *comps = 4;
       return;
@@ -1030,6 +1057,7 @@
       return;
 
    case MESA_FORMAT_A1B5G5R5_UNORM:
+   case MESA_FORMAT_A1B5G5R5_UINT:
       *datatype = GL_UNSIGNED_SHORT_5_5_5_1;
       *comps = 4;
       return;
@@ -1064,19 +1092,23 @@
       return;
 
    case MESA_FORMAT_R3G3B2_UNORM:
+   case MESA_FORMAT_R3G3B2_UINT:
       *datatype = GL_UNSIGNED_BYTE_2_3_3_REV;
       *comps = 3;
       return;
    case MESA_FORMAT_A4B4G4R4_UNORM:
+   case MESA_FORMAT_A4B4G4R4_UINT:
       *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
       *comps = 4;
       return;
 
    case MESA_FORMAT_R4G4B4A4_UNORM:
+   case MESA_FORMAT_R4G4B4A4_UINT:
       *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
       *comps = 4;
       return;
    case MESA_FORMAT_R5G5B5A1_UNORM:
+   case MESA_FORMAT_R5G5B5A1_UINT:
       *datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV;
       *comps = 4;
       return;
@@ -1092,6 +1124,7 @@
       return;
 
    case MESA_FORMAT_B2G3R3_UNORM:
+   case MESA_FORMAT_B2G3R3_UINT:
       *datatype = GL_UNSIGNED_BYTE_3_3_2;
       *comps = 3;
       return;
@@ -2121,6 +2154,96 @@
               type == GL_UNSIGNED_INT_2_10_10_10_REV &&
               !swapBytes);
 
+   case MESA_FORMAT_B5G6R5_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_SHORT_5_6_5;
+
+   case MESA_FORMAT_R5G6B5_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_SHORT_5_6_5_REV;
+
+   case MESA_FORMAT_B2G3R3_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_BYTE_3_3_2;
+
+   case MESA_FORMAT_R3G3B2_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_BYTE_2_3_3_REV;
+
+   case MESA_FORMAT_A4B4G4R4_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4 && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && swapBytes)
+         return GL_TRUE;
+      return GL_FALSE;
+
+   case MESA_FORMAT_R4G4B4A4_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_B4G4R4A4_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV &&
+         !swapBytes;
+
+   case MESA_FORMAT_A4R4G4B4_UINT:
+      return GL_FALSE;
+
+   case MESA_FORMAT_A1B5G5R5_UINT:
+      return format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_5_5_5_1 &&
+         !swapBytes;
+
+   case MESA_FORMAT_B5G5R5A1_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_1_5_5_5_REV &&
+         !swapBytes;
+
+   case MESA_FORMAT_A1R5G5B5_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_5_5_5_1 &&
+         !swapBytes;
+
+   case MESA_FORMAT_R5G5B5A1_UINT:
+      return format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_1_5_5_5_REV;
+
+   case MESA_FORMAT_A8B8G8R8_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV && swapBytes)
+         return GL_TRUE;
+      return GL_FALSE;
+
+   case MESA_FORMAT_A8R8G8B8_UINT:
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_R8G8B8A8_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_B8G8R8A8_UINT:
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
    case MESA_FORMAT_R9G9B9E5_FLOAT:
       return format == GL_RGB && type == GL_UNSIGNED_INT_5_9_9_9_REV &&
          !swapBytes;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/formats.csv mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/formats.csv
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/formats.csv	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/formats.csv	2015-09-16 14:36:10.000000000 +0000
@@ -186,10 +186,26 @@
 MESA_FORMAT_Z_FLOAT32                     , array , 1, 1, f32 ,     ,     ,     , x___, zs
 
 # Packed signed/unsigned non-normalized integer formats
+MESA_FORMAT_A8B8G8R8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , wzyx, rgb
+MESA_FORMAT_A8R8G8B8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , yzwx, rgb
+MESA_FORMAT_R8G8B8A8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , xyzw, rgb
+MESA_FORMAT_B8G8R8A8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , zyxw, rgb
 MESA_FORMAT_B10G10R10A2_UINT              , packed, 1, 1, u10 , u10 , u10 , u2  , zyxw, rgb
 MESA_FORMAT_R10G10B10A2_UINT              , packed, 1, 1, u10 , u10 , u10 , u2  , xyzw, rgb
 MESA_FORMAT_A2B10G10R10_UINT              , packed, 1, 1, u2  , u10 , u10 , u10 , wzyx, rgb
 MESA_FORMAT_A2R10G10B10_UINT              , packed, 1, 1, u2  , u10 , u10 , u10 , yzwx, rgb
+MESA_FORMAT_B5G6R5_UINT                   , packed, 1, 1, u5  , u6  ,  u5 ,     , zyx1, rgb
+MESA_FORMAT_R5G6B5_UINT                   , packed, 1, 1, u5  , u6  ,  u5 ,     , xyz1, rgb
+MESA_FORMAT_B2G3R3_UINT                   , packed, 1, 1, u2  , u3  ,  u3 ,     , zyx1, rgb
+MESA_FORMAT_R3G3B2_UINT                   , packed, 1, 1, u3  , u3  ,  u2 ,     , xyz1, rgb
+MESA_FORMAT_A4B4G4R4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , wzyx, rgb
+MESA_FORMAT_R4G4B4A4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , xyzw, rgb
+MESA_FORMAT_B4G4R4A4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , zyxw, rgb
+MESA_FORMAT_A4R4G4B4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , yzwx, rgb
+MESA_FORMAT_A1B5G5R5_UINT                 , packed, 1, 1, u1  , u5  ,  u5 , u5  , wzyx, rgb
+MESA_FORMAT_B5G5R5A1_UINT                 , packed, 1, 1, u5  , u5  ,  u5 , u1  , zyxw, rgb
+MESA_FORMAT_A1R5G5B5_UINT                 , packed, 1, 1, u1  , u5  ,  u5 , u5  , yzwx, rgb
+MESA_FORMAT_R5G5B5A1_UINT                 , packed, 1, 1, u5  , u5  ,  u5 , u1  , xyzw, rgb
 
 # Array signed/unsigned non-normalized integer formats
 MESA_FORMAT_A_UINT8                       , array , 1, 1, u8  ,     ,     ,     , 000x, rgb
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/formats.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/formats.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/formats.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/formats.h	2015-09-16 14:36:10.000000000 +0000
@@ -64,6 +64,12 @@
 enum mesa_format_layout {
    MESA_FORMAT_LAYOUT_ARRAY,
    MESA_FORMAT_LAYOUT_PACKED,
+   MESA_FORMAT_LAYOUT_S3TC,
+   MESA_FORMAT_LAYOUT_RGTC,
+   MESA_FORMAT_LAYOUT_FXT1,
+   MESA_FORMAT_LAYOUT_ETC1,
+   MESA_FORMAT_LAYOUT_ETC2,
+   MESA_FORMAT_LAYOUT_BPTC,
    MESA_FORMAT_LAYOUT_OTHER,
 };
 
@@ -464,10 +470,27 @@
    MESA_FORMAT_Z_FLOAT32,
 
    /* Packed signed/unsigned non-normalized integer formats */
+
+   MESA_FORMAT_A8B8G8R8_UINT,    /* RRRR RRRR GGGG GGGG BBBB BBBB AAAA AAAA */
+   MESA_FORMAT_A8R8G8B8_UINT,    /* BBBB BBBB GGGG GGGG RRRR RRRR AAAA AAAA */
+   MESA_FORMAT_R8G8B8A8_UINT,    /* AAAA AAAA BBBB BBBB GGGG GGGG RRRR RRRR */
+   MESA_FORMAT_B8G8R8A8_UINT,    /* AAAA AAAA RRRR RRRR GGGG GGGG BBBB BBBB */
    MESA_FORMAT_B10G10R10A2_UINT, /* AARR RRRR RRRR GGGG GGGG GGBB BBBB BBBB */
    MESA_FORMAT_R10G10B10A2_UINT, /* AABB BBBB BBBB GGGG GGGG GGRR RRRR RRRR */
    MESA_FORMAT_A2B10G10R10_UINT, /* RRRR RRRR RRGG GGGG GGGG BBBB BBBB BBAA */
    MESA_FORMAT_A2R10G10B10_UINT, /* BBBB BBBB BBGG GGGG GGGG RRRR RRRR RRAA */
+   MESA_FORMAT_B5G6R5_UINT,                          /* RRRR RGGG GGGB BBBB */
+   MESA_FORMAT_R5G6B5_UINT,                          /* BBBB BGGG GGGR RRRR */
+   MESA_FORMAT_B2G3R3_UINT,                                    /* RRRG GGBB */
+   MESA_FORMAT_R3G3B2_UINT,                                    /* BBGG GRRR */
+   MESA_FORMAT_A4B4G4R4_UINT,                        /* RRRR GGGG BBBB AAAA */
+   MESA_FORMAT_R4G4B4A4_UINT,                        /* AAAA BBBB GGGG RRRR */
+   MESA_FORMAT_B4G4R4A4_UINT,                        /* AAAA RRRR GGGG BBBB */
+   MESA_FORMAT_A4R4G4B4_UINT,                        /* BBBB GGGG RRRR AAAA */
+   MESA_FORMAT_A1B5G5R5_UINT,                        /* RRRR RGGG GGBB BBBA */
+   MESA_FORMAT_B5G5R5A1_UINT,                        /* ARRR RRGG GGGB BBBB */
+   MESA_FORMAT_A1R5G5B5_UINT,                        /* BBBB BGGG GGRR RRRA */
+   MESA_FORMAT_R5G5B5A1_UINT,                        /* ABBB BBGG GGGR RRRR */
 
    /* Array signed/unsigned non-normalized integer formats */
    MESA_FORMAT_A_UINT8,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/format_utils.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/format_utils.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/format_utils.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/format_utils.h	2015-09-16 14:36:10.000000000 +0000
@@ -33,6 +33,7 @@
 
 #include "imports.h"
 #include "macros.h"
+#include "util/rounding.h"
 
 extern const mesa_array_format RGBA32_FLOAT;
 extern const mesa_array_format RGBA8_UBYTE;
@@ -84,7 +85,7 @@
    else if (x > 1.0f)
       return MAX_UINT(dst_bits);
    else
-      return F_TO_I(x * MAX_UINT(dst_bits));
+      return _mesa_lroundevenf(x * MAX_UINT(dst_bits));
 }
 
 static inline unsigned
@@ -98,7 +99,7 @@
 {
    if (src_bits < dst_bits) {
       return EXTEND_NORMALIZED_INT(x, src_bits, dst_bits);
-   } else {
+   } else if (src_bits > dst_bits) {
       unsigned src_half = (1 << (src_bits - 1)) - 1;
 
       if (src_bits + dst_bits > sizeof(x) * 8) {
@@ -108,6 +109,8 @@
       } else {
          return (x * MAX_UINT(dst_bits) + src_half) / MAX_UINT(src_bits);
       }
+   } else {
+      return x;
    }
 }
 
@@ -128,7 +131,7 @@
    else if (x > 1.0f)
       return MAX_INT(dst_bits);
    else
-      return F_TO_I(x * MAX_INT(dst_bits));
+      return _mesa_lroundevenf(x * MAX_INT(dst_bits));
 }
 
 static inline int
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/framebuffer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/framebuffer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/framebuffer.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/framebuffer.c	2015-09-16 14:36:10.000000000 +0000
@@ -157,6 +157,7 @@
    fb->_Status = GL_FRAMEBUFFER_COMPLETE_EXT;
    fb->_AllColorBuffersFixedPoint = !visual->floatMode;
    fb->_HasSNormOrFloatColorBuffer = visual->floatMode;
+   fb->_HasAttachments = true;
 
    compute_depth_max(fb);
 }
@@ -356,30 +357,20 @@
 }
 
 
+
 /**
- * Calculate the inclusive bounding box for the scissor of a specific viewport
+ * Given a bounding box, intersect the bounding box with the scissor of
+ * a specified vieport.
  *
  * \param ctx     GL context.
- * \param buffer  Framebuffer to be checked against
  * \param idx     Index of the desired viewport
  * \param bbox    Bounding box for the scissored viewport.  Stored as xmin,
  *                xmax, ymin, ymax.
- *
- * \warning This function assumes that the framebuffer dimensions are up to
- * date (e.g., update_framebuffer_size has been recently called on \c buffer).
- *
- * \sa _mesa_clip_to_region
  */
 void
-_mesa_scissor_bounding_box(const struct gl_context *ctx,
-                           const struct gl_framebuffer *buffer,
-                           unsigned idx, int *bbox)
+_mesa_intersect_scissor_bounding_box(const struct gl_context *ctx,
+                                     unsigned idx, int *bbox)
 {
-   bbox[0] = 0;
-   bbox[2] = 0;
-   bbox[1] = buffer->Width;
-   bbox[3] = buffer->Height;
-
    if (ctx->Scissor.EnableFlags & (1u << idx)) {
       if (ctx->Scissor.ScissorArray[idx].X > bbox[0]) {
          bbox[0] = ctx->Scissor.ScissorArray[idx].X;
@@ -401,6 +392,33 @@
          bbox[2] = bbox[3];
       }
    }
+}
+
+/**
+ * Calculate the inclusive bounding box for the scissor of a specific viewport
+ *
+ * \param ctx     GL context.
+ * \param buffer  Framebuffer to be checked against
+ * \param idx     Index of the desired viewport
+ * \param bbox    Bounding box for the scissored viewport.  Stored as xmin,
+ *                xmax, ymin, ymax.
+ *
+ * \warning This function assumes that the framebuffer dimensions are up to
+ * date (e.g., update_framebuffer_size has been recently called on \c buffer).
+ *
+ * \sa _mesa_clip_to_region
+ */
+void
+_mesa_scissor_bounding_box(const struct gl_context *ctx,
+                           const struct gl_framebuffer *buffer,
+                           unsigned idx, int *bbox)
+{
+   bbox[0] = 0;
+   bbox[2] = 0;
+   bbox[1] = buffer->Width;
+   bbox[3] = buffer->Height;
+
+   _mesa_intersect_scissor_bounding_box(ctx, idx, bbox);
 
    assert(bbox[0] <= bbox[1]);
    assert(bbox[2] <= bbox[3]);
@@ -920,7 +938,7 @@
 
    fprintf(stderr, "Mesa Framebuffer %u at %p\n", fb->Name, (void *) fb);
    fprintf(stderr, "  Size: %u x %u  Status: %s\n", fb->Width, fb->Height,
-           _mesa_lookup_enum_by_nr(fb->_Status));
+           _mesa_enum_to_string(fb->_Status));
    fprintf(stderr, "  Attachments:\n");
 
    for (i = 0; i < BUFFER_COUNT; i++) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/framebuffer.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/framebuffer.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/framebuffer.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/framebuffer.h	2015-09-16 14:36:10.000000000 +0000
@@ -75,6 +75,37 @@
 _mesa_scissor_bounding_box(const struct gl_context *ctx,
                            const struct gl_framebuffer *buffer,
                            unsigned idx, int *bbox);
+extern void
+_mesa_intersect_scissor_bounding_box(const struct gl_context *ctx,
+                                     unsigned idx, int *bbox);
+
+static inline GLuint
+_mesa_geometric_width(const struct gl_framebuffer *buffer)
+{
+   return buffer->_HasAttachments ?
+      buffer->Width : buffer->DefaultGeometry.Width;
+}
+
+static inline GLuint
+_mesa_geometric_height(const struct gl_framebuffer *buffer)
+{
+   return buffer->_HasAttachments ?
+      buffer->Height : buffer->DefaultGeometry.Height;
+}
+
+static inline GLuint
+_mesa_geometric_samples(const struct gl_framebuffer *buffer)
+{
+   return buffer->_HasAttachments ?
+      buffer->Visual.samples : buffer->DefaultGeometry.NumSamples;
+}
+
+static inline GLuint
+_mesa_geometric_layers(const struct gl_framebuffer *buffer)
+{
+   return buffer->_HasAttachments ?
+      buffer->MaxNumLayers : buffer->DefaultGeometry.Layers;
+}
 
 extern void 
 _mesa_update_draw_buffer_bounds(struct gl_context *ctx,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/genmipmap.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/genmipmap.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/genmipmap.c	2015-06-14 10:02:08.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/genmipmap.c	2015-09-16 14:36:10.000000000 +0000
@@ -83,7 +83,7 @@
 
    if (error) {
       _mesa_error(ctx, GL_INVALID_ENUM, "glGenerate%sMipmap(target=%s)",
-                  suffix, _mesa_lookup_enum_by_nr(target));
+                  suffix, _mesa_enum_to_string(target));
       return;
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/get.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/get.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/get.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/get.c	2015-09-16 14:36:10.000000000 +0000
@@ -35,6 +35,7 @@
 #include "mtypes.h"
 #include "state.h"
 #include "texcompress.h"
+#include "texstate.h"
 #include "framebuffer.h"
 #include "samplerobj.h"
 #include "stencil.h"
@@ -138,6 +139,7 @@
    EXTRA_API_GL_CORE,
    EXTRA_API_ES2,
    EXTRA_API_ES3,
+   EXTRA_API_ES31,
    EXTRA_NEW_BUFFERS, 
    EXTRA_NEW_FRAG_CLAMP,
    EXTRA_VALID_DRAW_BUFFER,
@@ -148,6 +150,8 @@
    EXTRA_EXT_UBO_GS4,
    EXTRA_EXT_ATOMICS_GS4,
    EXTRA_EXT_SHADER_IMAGE_GS4,
+   EXTRA_EXT_ATOMICS_TESS,
+   EXTRA_EXT_SHADER_IMAGE_TESS,
 };
 
 #define NO_EXTRA NULL
@@ -348,6 +352,58 @@
    EXTRA_END
 };
 
+static const int extra_ARB_shader_atomic_counters_and_tessellation[] = {
+   EXTRA_EXT_ATOMICS_TESS,
+   EXTRA_END
+};
+
+static const int extra_ARB_shader_image_load_store_and_tessellation[] = {
+   EXTRA_EXT_SHADER_IMAGE_TESS,
+   EXTRA_END
+};
+
+static const int extra_ARB_draw_indirect_es31[] = {
+   EXT(ARB_draw_indirect),
+   EXTRA_API_ES31,
+   EXTRA_END
+};
+
+static const int extra_ARB_shader_image_load_store_es31[] = {
+   EXT(ARB_shader_image_load_store),
+   EXTRA_API_ES31,
+   EXTRA_END
+};
+
+static const int extra_ARB_shader_atomic_counters_es31[] = {
+   EXT(ARB_shader_atomic_counters),
+   EXTRA_API_ES31,
+   EXTRA_END
+};
+
+static const int extra_ARB_texture_multisample_es31[] = {
+   EXT(ARB_texture_multisample),
+   EXTRA_API_ES31,
+   EXTRA_END
+};
+
+static const int extra_ARB_texture_gather_es31[] = {
+   EXT(ARB_texture_gather),
+   EXTRA_API_ES31,
+   EXTRA_END
+};
+
+static const int extra_ARB_compute_shader_es31[] = {
+   EXT(ARB_compute_shader),
+   EXTRA_API_ES31,
+   EXTRA_END
+};
+
+static const int extra_ARB_explicit_uniform_location_es31[] = {
+   EXT(ARB_explicit_uniform_location),
+   EXTRA_API_ES31,
+   EXTRA_END
+};
+
 EXTRA_EXT(ARB_texture_cube_map);
 EXTRA_EXT(EXT_texture_array);
 EXTRA_EXT(NV_fog_distance);
@@ -393,6 +449,9 @@
 EXTRA_EXT(ARB_explicit_uniform_location);
 EXTRA_EXT(ARB_clip_control);
 EXTRA_EXT(EXT_polygon_offset_clamp);
+EXTRA_EXT(ARB_framebuffer_no_attachments);
+EXTRA_EXT(ARB_tessellation_shader);
+EXTRA_EXT(ARB_shader_subroutine);
 
 static const int
 extra_ARB_color_buffer_float_or_glcore[] = {
@@ -618,7 +677,7 @@
       break;
 
    case GL_EDGE_FLAG:
-      v->value_bool = ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0] == 1.0;
+      v->value_bool = ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0] == 1.0F;
       break;
 
    case GL_READ_BUFFER:
@@ -1078,6 +1137,11 @@
          if (_mesa_is_gles3(ctx))
             api_found = GL_TRUE;
 	 break;
+      case EXTRA_API_ES31:
+         api_check = GL_TRUE;
+         if (_mesa_is_gles31(ctx))
+            api_found = GL_TRUE;
+	 break;
       case EXTRA_API_GL:
          api_check = GL_TRUE;
          if (_mesa_is_desktop_gl(ctx))
@@ -1136,6 +1200,16 @@
          api_found = (ctx->Extensions.ARB_shader_image_load_store &&
                       _mesa_has_geometry_shaders(ctx));
          break;
+      case EXTRA_EXT_ATOMICS_TESS:
+         api_check = GL_TRUE;
+         api_found = ctx->Extensions.ARB_shader_atomic_counters &&
+                     _mesa_has_tessellation(ctx);
+         break;
+      case EXTRA_EXT_SHADER_IMAGE_TESS:
+         api_check = GL_TRUE;
+         api_found = ctx->Extensions.ARB_shader_image_load_store &&
+                     _mesa_has_tessellation(ctx);
+         break;
       case EXTRA_END:
 	 break;
       default: /* *e is a offset into the extension struct */
@@ -1148,7 +1222,7 @@
 
    if (api_check && !api_found) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func,
-                  _mesa_lookup_enum_by_nr(d->pname));
+                  _mesa_enum_to_string(d->pname));
       return GL_FALSE;
    }
 
@@ -1195,10 +1269,13 @@
     * value since it's compatible with GLES2 its entry in table_set[] is at the
     * end.
     */
-   STATIC_ASSERT(ARRAY_SIZE(table_set) == API_OPENGL_LAST + 2);
+   STATIC_ASSERT(ARRAY_SIZE(table_set) == API_OPENGL_LAST + 3);
    if (_mesa_is_gles3(ctx)) {
       api = API_OPENGL_LAST + 1;
    }
+   if (_mesa_is_gles31(ctx)) {
+      api = API_OPENGL_LAST + 2;
+   }
    mask = ARRAY_SIZE(table(api)) - 1;
    hash = (pname * prime_factor);
    while (1) {
@@ -1209,7 +1286,7 @@
        * any valid enum. */
       if (unlikely(idx == 0)) {
          _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func,
-               _mesa_lookup_enum_by_nr(pname));
+               _mesa_enum_to_string(pname));
          return &error_value;
       }
 
@@ -1709,6 +1786,52 @@
    }
 }
 
+/**
+ * Convert a GL texture binding enum such as GL_TEXTURE_BINDING_2D
+ * into the corresponding Mesa texture target index.
+ * \return TEXTURE_x_INDEX or -1 if binding is invalid
+ */
+static int
+tex_binding_to_index(const struct gl_context *ctx, GLenum binding)
+{
+   switch (binding) {
+   case GL_TEXTURE_BINDING_1D:
+      return _mesa_is_desktop_gl(ctx) ? TEXTURE_1D_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D:
+      return TEXTURE_2D_INDEX;
+   case GL_TEXTURE_BINDING_3D:
+      return ctx->API != API_OPENGLES ? TEXTURE_3D_INDEX : -1;
+   case GL_TEXTURE_BINDING_CUBE_MAP:
+      return ctx->Extensions.ARB_texture_cube_map
+         ? TEXTURE_CUBE_INDEX : -1;
+   case GL_TEXTURE_BINDING_RECTANGLE:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.NV_texture_rectangle
+         ? TEXTURE_RECT_INDEX : -1;
+   case GL_TEXTURE_BINDING_1D_ARRAY:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array
+         ? TEXTURE_1D_ARRAY_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D_ARRAY:
+      return (_mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array)
+         || _mesa_is_gles3(ctx)
+         ? TEXTURE_2D_ARRAY_INDEX : -1;
+   case GL_TEXTURE_BINDING_BUFFER:
+      return ctx->API == API_OPENGL_CORE &&
+             ctx->Extensions.ARB_texture_buffer_object ?
+             TEXTURE_BUFFER_INDEX : -1;
+   case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_cube_map_array
+         ? TEXTURE_CUBE_ARRAY_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
+         ? TEXTURE_2D_MULTISAMPLE_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
+         ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : -1;
+   default:
+      return -1;
+   }
+}
+
 static enum value_type
 find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
 {
@@ -1972,6 +2095,45 @@
       v->value_int = ctx->ImageUnits[index].Format;
       return TYPE_INT;
 
+   /* ARB_direct_state_access */
+   case GL_TEXTURE_BINDING_1D:
+   case GL_TEXTURE_BINDING_1D_ARRAY:
+   case GL_TEXTURE_BINDING_2D:
+   case GL_TEXTURE_BINDING_2D_ARRAY:
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE:
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY:
+   case GL_TEXTURE_BINDING_3D:
+   case GL_TEXTURE_BINDING_BUFFER:
+   case GL_TEXTURE_BINDING_CUBE_MAP:
+   case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_BINDING_RECTANGLE: {
+      int target;
+
+      if (ctx->API != API_OPENGL_CORE)
+         goto invalid_enum;
+      target = tex_binding_to_index(ctx, pname);
+      if (target < 0)
+         goto invalid_enum;
+      if (index >= _mesa_max_tex_unit(ctx))
+         goto invalid_value;
+
+      v->value_int = ctx->Texture.Unit[index].CurrentTex[target]->Name;
+      return TYPE_INT;
+   }
+
+   case GL_SAMPLER_BINDING: {
+      struct gl_sampler_object *samp;
+
+      if (ctx->API != API_OPENGL_CORE)
+         goto invalid_enum;
+      if (index >= _mesa_max_tex_unit(ctx))
+         goto invalid_value;
+
+      samp = ctx->Texture.Unit[index].Sampler;
+      v->value_int = samp ? samp->Name : 0;
+      return TYPE_INT;
+   }
+
    case GL_MAX_COMPUTE_WORK_GROUP_COUNT:
       if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_compute_shader)
          goto invalid_enum;
@@ -1991,11 +2153,11 @@
 
  invalid_enum:
    _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func,
-               _mesa_lookup_enum_by_nr(pname));
+               _mesa_enum_to_string(pname));
    return TYPE_INVALID;
  invalid_value:
    _mesa_error(ctx, GL_INVALID_VALUE, "%s(pname=%s)", func,
-               _mesa_lookup_enum_by_nr(pname));
+               _mesa_enum_to_string(pname));
    return TYPE_INVALID;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/get_hash_generator.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/get_hash_generator.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/get_hash_generator.py	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/get_hash_generator.py	2015-09-16 14:36:10.000000000 +0000
@@ -44,7 +44,7 @@
 prime_step = 281
 hash_table_size = 1024
 
-gl_apis=set(["GL", "GL_CORE", "GLES", "GLES2", "GLES3"])
+gl_apis=set(["GL", "GL_CORE", "GLES", "GLES2", "GLES3", "GLES31"])
 
 def print_header():
    print "typedef const unsigned short table_t[%d];\n" % (hash_table_size)
@@ -68,6 +68,7 @@
    'GLES2',
    'GL_CORE',
    'GLES3', # Not in gl_api enum in mtypes.h
+   'GLES31', # Not in gl_api enum in mtypes.h
 ]
 
 def api_index(api):
@@ -167,10 +168,13 @@
 
          for api in valid_apis:
             add_to_hash_table(tables[api], hash_val, len(params))
-            # Also add GLES2 items to the GLES3 hash table
+            # Also add GLES2 items to the GLES3 and GLES31 hash table
             if api == "GLES2":
                add_to_hash_table(tables["GLES3"], hash_val, len(params))
-
+               add_to_hash_table(tables["GLES31"], hash_val, len(params))
+            # Also add GLES3 items to the GLES31 hash table
+            if api == "GLES3":
+               add_to_hash_table(tables["GLES31"], hash_val, len(params))
          params.append(["GL_" + enum_name, param[1]])
 
    sorted_tables={}
@@ -206,7 +210,7 @@
       die("missing descriptor file (-f)\n")
 
    # generate the code for all APIs
-   enabled_apis = set(["GLES", "GLES2", "GLES3", "GL", "GL_CORE"])
+   enabled_apis = set(["GLES", "GLES2", "GLES3", "GLES31", "GL", "GL_CORE"])
 
    try:
       api_desc = gl_XML.parse_GL_API(api_desc_file)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/get_hash_params.py mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/get_hash_params.py
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/get_hash_params.py	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/get_hash_params.py	2015-09-16 14:36:10.000000000 +0000
@@ -351,6 +351,9 @@
 # GL_ARB_framebuffer_object
   [ "MAX_SAMPLES", "CONTEXT_INT(Const.MaxSamples), extra_ARB_framebuffer_object_EXT_framebuffer_multisample" ],
 
+# GL_ARB_sampler_objects / GL 3.3 / GLES 3.0
+  [ "SAMPLER_BINDING", "LOC_CUSTOM, TYPE_INT, GL_SAMPLER_BINDING, NO_EXTRA" ],
+
 # GL_ARB_sync
   [ "MAX_SERVER_WAIT_TIMEOUT", "CONTEXT_INT64(Const.MaxServerWaitTimeout), extra_ARB_sync" ],
 
@@ -404,9 +407,55 @@
   [ "TEXTURE_EXTERNAL_OES", "LOC_CUSTOM, TYPE_BOOLEAN, 0, extra_OES_EGL_image_external" ],
 ]},
 
-{ "apis": ["GL", "GL_CORE", "GLES3"], "params": [
-# GL_ARB_sampler_objects / GL 3.3 / GLES 3.0
-  [ "SAMPLER_BINDING", "LOC_CUSTOM, TYPE_INT, GL_SAMPLER_BINDING, NO_EXTRA" ],
+# Enums in OpenGL and ES 3.1
+{ "apis": ["GL", "GL_CORE", "GLES31"], "params": [
+# GL_ARB_shader_image_load_store / GLES 3.1
+  [ "MAX_IMAGE_UNITS", "CONTEXT_INT(Const.MaxImageUnits), extra_ARB_shader_image_load_store_es31" ],
+  [ "MAX_VERTEX_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms), extra_ARB_shader_image_load_store_es31" ],
+  [ "MAX_FRAGMENT_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms), extra_ARB_shader_image_load_store_es31" ],
+  [ "MAX_COMBINED_IMAGE_UNIFORMS", "CONTEXT_INT(Const.MaxCombinedImageUniforms), extra_ARB_shader_image_load_store_es31" ],
+
+# GL_ARB_shader_atomic_counters / GLES 3.1
+  [ "ATOMIC_COUNTER_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_shader_atomic_counters_es31" ],
+  [ "MAX_ATOMIC_COUNTER_BUFFER_BINDINGS", "CONTEXT_INT(Const.MaxAtomicBufferBindings), extra_ARB_shader_atomic_counters_es31" ],
+  [ "MAX_ATOMIC_COUNTER_BUFFER_SIZE", "CONTEXT_INT(Const.MaxAtomicBufferSize), extra_ARB_shader_atomic_counters_es31" ],
+  [ "MAX_VERTEX_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_es31" ],
+  [ "MAX_VERTEX_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters), extra_ARB_shader_atomic_counters_es31" ],
+  [ "MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_es31" ],
+  [ "MAX_FRAGMENT_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters), extra_ARB_shader_atomic_counters_es31" ],
+  [ "MAX_COMBINED_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.MaxCombinedAtomicBuffers), extra_ARB_shader_atomic_counters_es31" ],
+  [ "MAX_COMBINED_ATOMIC_COUNTERS", "CONTEXT_INT(Const.MaxCombinedAtomicCounters), extra_ARB_shader_atomic_counters_es31" ],
+
+# GL_ARB_texture_multisample / GLES 3.1
+  [ "TEXTURE_BINDING_2D_MULTISAMPLE", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_INDEX, extra_ARB_texture_multisample_es31" ],
+  [ "MAX_COLOR_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxColorTextureSamples), extra_ARB_texture_multisample_es31" ],
+  [ "MAX_DEPTH_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxDepthTextureSamples), extra_ARB_texture_multisample_es31" ],
+  [ "MAX_INTEGER_SAMPLES", "CONTEXT_INT(Const.MaxIntegerSamples), extra_ARB_texture_multisample_es31" ],
+  [ "SAMPLE_MASK", "CONTEXT_BOOL(Multisample.SampleMask), extra_ARB_texture_multisample_es31" ],
+  [ "MAX_SAMPLE_MASK_WORDS", "CONST(1), extra_ARB_texture_multisample_es31" ],
+
+# GL_ARB_texture_gather / GLES 3.1
+  [ "MIN_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MinProgramTextureGatherOffset), extra_ARB_texture_gather_es31"],
+  [ "MAX_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MaxProgramTextureGatherOffset), extra_ARB_texture_gather_es31"],
+
+# GL_ARB_compute_shader / GLES 3.1
+  [ "MAX_COMPUTE_WORK_GROUP_INVOCATIONS", "CONTEXT_INT(Const.MaxComputeWorkGroupInvocations), extra_ARB_compute_shader_es31" ],
+  [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONST(MAX_COMPUTE_UNIFORM_BLOCKS), extra_ARB_compute_shader_es31" ],
+  [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONST(MAX_COMPUTE_TEXTURE_IMAGE_UNITS), extra_ARB_compute_shader_es31" ],
+  [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS), extra_ARB_compute_shader_es31" ],
+  [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTERS), extra_ARB_compute_shader_es31" ],
+  [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONST(MAX_COMPUTE_SHARED_MEMORY_SIZE), extra_ARB_compute_shader_es31" ],
+  [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ],
+  [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ],
+
+# GL_ARB_explicit_uniform_location / GLES 3.1
+  [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location_es31" ],
+]},
+
+# Enums in OpenGL Core profile and ES 3.1
+{ "apis": ["GL_CORE", "GLES3"], "params": [
+# GL_ARB_draw_indirect / GLES 3.1
+  [ "DRAW_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_draw_indirect_es31" ],
 ]},
 
 # Remaining enums are only in OpenGL
@@ -492,7 +541,6 @@
   [ "MAX_LIST_NESTING", "CONST(MAX_LIST_NESTING), NO_EXTRA" ],
   [ "MAX_NAME_STACK_DEPTH", "CONST(MAX_NAME_STACK_DEPTH), NO_EXTRA" ],
   [ "MAX_PIXEL_MAP_TABLE", "CONST(MAX_PIXEL_MAP_TABLE), NO_EXTRA" ],
-  [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location" ],
   [ "NAME_STACK_DEPTH", "CONTEXT_INT(Select.NameStackDepth), NO_EXTRA" ],
   [ "PACK_LSB_FIRST", "CONTEXT_BOOL(Pack.LsbFirst), NO_EXTRA" ],
   [ "PACK_SWAP_BYTES", "CONTEXT_BOOL(Pack.SwapBytes), NO_EXTRA" ],
@@ -693,13 +741,7 @@
   [ "TEXTURE_BUFFER_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ],
 
 # GL_ARB_texture_multisample / GL 3.2
-  [ "TEXTURE_BINDING_2D_MULTISAMPLE", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_INDEX, extra_ARB_texture_multisample" ],
   [ "TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX, extra_ARB_texture_multisample" ],
-  [ "MAX_COLOR_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxColorTextureSamples), extra_ARB_texture_multisample" ],
-  [ "MAX_DEPTH_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxDepthTextureSamples), extra_ARB_texture_multisample" ],
-  [ "MAX_INTEGER_SAMPLES", "CONTEXT_INT(Const.MaxIntegerSamples), extra_ARB_texture_multisample" ],
-  [ "SAMPLE_MASK", "CONTEXT_BOOL(Multisample.SampleMask), extra_ARB_texture_multisample" ],
-  [ "MAX_SAMPLE_MASK_WORDS", "CONST(1), extra_ARB_texture_multisample" ],
 
 # GL 3.0
   [ "CONTEXT_FLAGS", "CONTEXT_INT(Const.ContextFlags), extra_version_30" ],
@@ -750,62 +792,38 @@
   [ "TEXTURE_BINDING_CUBE_MAP_ARRAY_ARB", "LOC_CUSTOM, TYPE_INT, TEXTURE_CUBE_ARRAY_INDEX, extra_ARB_texture_cube_map_array" ],
 
 # GL_ARB_texture_gather
-  [ "MIN_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MinProgramTextureGatherOffset), extra_ARB_texture_gather"],
-  [ "MAX_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MaxProgramTextureGatherOffset), extra_ARB_texture_gather"],
   [ "MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB", "CONTEXT_INT(Const.MaxProgramTextureGatherComponents), extra_ARB_texture_gather"],
 
 # GL_ARB_separate_shader_objects
   [ "PROGRAM_PIPELINE_BINDING", "LOC_CUSTOM, TYPE_INT, GL_PROGRAM_PIPELINE_BINDING, NO_EXTRA" ],
 
 # GL_ARB_shader_atomic_counters
-  [ "ATOMIC_COUNTER_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_shader_atomic_counters" ],
-  [ "MAX_ATOMIC_COUNTER_BUFFER_BINDINGS", "CONTEXT_INT(Const.MaxAtomicBufferBindings), extra_ARB_shader_atomic_counters" ],
-  [ "MAX_ATOMIC_COUNTER_BUFFER_SIZE", "CONTEXT_INT(Const.MaxAtomicBufferSize), extra_ARB_shader_atomic_counters" ],
-  [ "MAX_VERTEX_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers), extra_ARB_shader_atomic_counters" ],
-  [ "MAX_VERTEX_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters), extra_ARB_shader_atomic_counters" ],
-  [ "MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers), extra_ARB_shader_atomic_counters" ],
-  [ "MAX_FRAGMENT_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters), extra_ARB_shader_atomic_counters" ],
   [ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
   [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
-  [ "MAX_COMBINED_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.MaxCombinedAtomicBuffers), extra_ARB_shader_atomic_counters" ],
-  [ "MAX_COMBINED_ATOMIC_COUNTERS", "CONTEXT_INT(Const.MaxCombinedAtomicCounters), extra_ARB_shader_atomic_counters" ],
 
 # GL_ARB_vertex_attrib_binding
   [ "MAX_VERTEX_ATTRIB_RELATIVE_OFFSET", "CONTEXT_ENUM(Const.MaxVertexAttribRelativeOffset), NO_EXTRA" ],
   [ "MAX_VERTEX_ATTRIB_BINDINGS", "CONTEXT_ENUM(Const.MaxVertexAttribBindings), NO_EXTRA" ],
 
 # GL_ARB_shader_image_load_store
-  [ "MAX_IMAGE_UNITS", "CONTEXT_INT(Const.MaxImageUnits), extra_ARB_shader_image_load_store"],
-  [ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedImageUnitsAndFragmentOutputs), extra_ARB_shader_image_load_store"],
-  [ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store"],
-  [ "MAX_VERTEX_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms), extra_ARB_shader_image_load_store"],
+  [ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedShaderOutputResources), extra_ARB_shader_image_load_store" ],
+  [ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ],
   [ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader"],
-  [ "MAX_FRAGMENT_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms), extra_ARB_shader_image_load_store"],
-  [ "MAX_COMBINED_IMAGE_UNIFORMS", "CONTEXT_INT(Const.MaxCombinedImageUniforms), extra_ARB_shader_image_load_store"],
 
-# GL_ARB_compute_shader
-  [ "MAX_COMPUTE_WORK_GROUP_INVOCATIONS", "CONTEXT_INT(Const.MaxComputeWorkGroupInvocations), extra_ARB_compute_shader" ],
-  [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONST(MAX_COMPUTE_UNIFORM_BLOCKS), extra_ARB_compute_shader" ],
-  [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONST(MAX_COMPUTE_TEXTURE_IMAGE_UNITS), extra_ARB_compute_shader" ],
-  [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS), extra_ARB_compute_shader" ],
-  [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTERS), extra_ARB_compute_shader" ],
-  [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONST(MAX_COMPUTE_SHARED_MEMORY_SIZE), extra_ARB_compute_shader" ],
-  [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader" ],
-  [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader" ],
+# GL_ARB_framebuffer_no_attachments
+  ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"],
+  ["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"],
+  ["MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments"],
+  ["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"],
 
-# GL_ARB_gpu_shader5
-  [ "MAX_GEOMETRY_SHADER_INVOCATIONS", "CONST(MAX_GEOMETRY_SHADER_INVOCATIONS), extra_ARB_gpu_shader5" ],
-  [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5" ],
-  [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5" ],
-  [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", "CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5" ],
+# GL_EXT_polygon_offset_clamp
+  [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
 ]},
 
 # Enums restricted to OpenGL Core profile
 { "apis": ["GL_CORE"], "params": [
 # GL_ARB_texture_buffer_range
   [ "TEXTURE_BUFFER_OFFSET_ALIGNMENT", "CONTEXT_INT(Const.TextureBufferOffsetAlignment), extra_ARB_texture_buffer_range" ],
-# GL_ARB_draw_indirect
-  [ "DRAW_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_draw_indirect" ],
 
 # GL_ARB_viewport_array
   [ "MAX_VIEWPORTS", "CONTEXT_INT(Const.MaxViewports), extra_ARB_viewport_array" ],
@@ -814,8 +832,43 @@
   [ "LAYER_PROVOKING_VERTEX", "CONTEXT_ENUM(Light.ProvokingVertex), extra_ARB_viewport_array" ],
   [ "VIEWPORT_INDEX_PROVOKING_VERTEX", "CONTEXT_ENUM(Light.ProvokingVertex), extra_ARB_viewport_array" ],
 
-# GL_EXT_polygon_offset_clamp
-  [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
+# GL_ARB_gpu_shader5
+  [ "MAX_GEOMETRY_SHADER_INVOCATIONS", "CONST(MAX_GEOMETRY_SHADER_INVOCATIONS), extra_ARB_gpu_shader5" ],
+  [ "MIN_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5" ],
+  [ "MAX_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5" ],
+  [ "FRAGMENT_INTERPOLATION_OFFSET_BITS", "CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5" ],
+
+# GL_ARB_tessellation_shader
+  [ "PATCH_VERTICES", "CONTEXT_INT(TessCtrlProgram.patch_vertices), extra_ARB_tessellation_shader" ],
+  [ "PATCH_DEFAULT_OUTER_LEVEL", "CONTEXT_FLOAT4(TessCtrlProgram.patch_default_outer_level), extra_ARB_tessellation_shader" ],
+  [ "PATCH_DEFAULT_INNER_LEVEL", "CONTEXT_FLOAT2(TessCtrlProgram.patch_default_inner_level), extra_ARB_tessellation_shader" ],
+  [ "MAX_TESS_GEN_LEVEL", "CONTEXT_INT(Const.MaxTessGenLevel), extra_ARB_tessellation_shader" ],
+  [ "MAX_PATCH_VERTICES", "CONTEXT_INT(Const.MaxPatchVertices), extra_ARB_tessellation_shader" ],
+  [ "MAX_TESS_CONTROL_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformComponents), extra_ARB_tessellation_shader" ],
+  [ "MAX_TESS_EVALUATION_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformComponents), extra_ARB_tessellation_shader" ],
+  [ "MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits), extra_ARB_tessellation_shader" ],
+  [ "MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits), extra_ARB_tessellation_shader" ],
+  [ "MAX_TESS_CONTROL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents), extra_ARB_tessellation_shader" ],
+  [ "MAX_TESS_PATCH_COMPONENTS", "CONTEXT_INT(Const.MaxTessPatchComponents), extra_ARB_tessellation_shader" ],
+  [ "MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.MaxTessControlTotalOutputComponents), extra_ARB_tessellation_shader" ],
+  [ "MAX_TESS_EVALUATION_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents), extra_ARB_tessellation_shader" ],
+  [ "MAX_TESS_CONTROL_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents), extra_ARB_tessellation_shader" ],
+  [ "MAX_TESS_EVALUATION_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents), extra_ARB_tessellation_shader" ],
+  [ "MAX_TESS_CONTROL_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformBlocks), extra_ARB_tessellation_shader" ],
+  [ "MAX_TESS_EVALUATION_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformBlocks), extra_ARB_tessellation_shader" ],
+  [ "MAX_COMBINED_TESS_CONTROL_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxCombinedUniformComponents), extra_ARB_tessellation_shader" ],
+  [ "MAX_COMBINED_TESS_EVALUATION_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxCombinedUniformComponents), extra_ARB_tessellation_shader" ],
+# Dependencies on GL_ARB_tessellation_shader
+  [ "MAX_TESS_CONTROL_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_tessellation" ],
+  [ "MAX_TESS_CONTROL_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_tessellation" ],
+  [ "MAX_TESS_EVALUATION_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_tessellation" ],
+  [ "MAX_TESS_EVALUATION_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_tessellation" ],
+  [ "MAX_TESS_CONTROL_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms), extra_ARB_shader_image_load_store_and_tessellation"],
+  [ "MAX_TESS_EVALUATION_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms), extra_ARB_shader_image_load_store_and_tessellation"],
+
+# GL_ARB_shader_subroutine
+  [ "MAX_SUBROUTINES", "CONST(MAX_SUBROUTINES), extra_ARB_shader_subroutine" ],
+  [ "MAX_SUBROUTINE_UNIFORM_LOCATIONS", "CONST(MAX_SUBROUTINE_UNIFORM_LOCATIONS), extra_ARB_shader_subroutine" ],
 ]}
 
 ]
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/getstring.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/getstring.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/getstring.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/getstring.c	2015-09-16 14:36:10.000000000 +0000
@@ -72,10 +72,18 @@
       break;
 
    case API_OPENGLES2:
-      return (ctx->Version < 30)
-         ? (const GLubyte *) "OpenGL ES GLSL ES 1.0.16"
-         : (const GLubyte *) "OpenGL ES GLSL ES 3.00";
-
+      switch (ctx->Version) {
+      case 20:
+         return (const GLubyte *) "OpenGL ES GLSL ES 1.0.16";
+      case 30:
+         return (const GLubyte *) "OpenGL ES GLSL ES 3.00";
+      case 31:
+         return (const GLubyte *) "OpenGL ES GLSL ES 3.10";
+      default:
+         _mesa_problem(ctx,
+                       "Invalid OpenGL ES version in shading_language_version()");
+         return (const GLubyte *) 0;
+      }
    case API_OPENGLES:
       /* fall-through */
 
@@ -200,7 +208,7 @@
       return;
 
    if (MESA_VERBOSE & VERBOSE_API)
-      _mesa_debug(ctx, "glGetPointerv %s\n", _mesa_lookup_enum_by_nr(pname));
+      _mesa_debug(ctx, "glGetPointerv %s\n", _mesa_enum_to_string(pname));
 
    switch (pname) {
       case GL_VERTEX_ARRAY_POINTER:
@@ -291,7 +299,7 @@
    ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
 
    if (MESA_VERBOSE & VERBOSE_API)
-      _mesa_debug(ctx, "glGetError <-- %s\n", _mesa_lookup_enum_by_nr(e));
+      _mesa_debug(ctx, "glGetError <-- %s\n", _mesa_enum_to_string(e));
 
    ctx->ErrorValue = (GLenum) GL_NO_ERROR;
    ctx->ErrorDebugCount = 0;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/glformats.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/glformats.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/glformats.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/glformats.c	2015-09-16 14:36:10.000000000 +0000
@@ -186,7 +186,7 @@
       return IDX_RG;
    default:
       _mesa_problem(NULL, "Unexpected inFormat %s",
-                    _mesa_lookup_enum_by_nr(value));
+                    _mesa_enum_to_string(value));
       return 0;
    }
 }
@@ -216,8 +216,8 @@
 
 #if 0
    printf("from %x/%s to %x/%s map %d %d %d %d %d %d\n",
-	  inFormat, _mesa_lookup_enum_by_nr(inFormat),
-	  outFormat, _mesa_lookup_enum_by_nr(outFormat),
+	  inFormat, _mesa_enum_to_string(inFormat),
+	  outFormat, _mesa_enum_to_string(outFormat),
 	  map[0],
 	  map[1],
 	  map[2],
@@ -1200,7 +1200,7 @@
  * \return GL_TRUE if compressed, GL_FALSE if uncompressed
  */
 GLboolean
-_mesa_is_compressed_format(struct gl_context *ctx, GLenum format)
+_mesa_is_compressed_format(const struct gl_context *ctx, GLenum format)
 {
    switch (format) {
    case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
@@ -1278,9 +1278,53 @@
    }
 }
 
+/**
+ * Convert various unpack formats to the corresponding base format.
+ */
+GLenum
+_mesa_unpack_format_to_base_format(GLenum format)
+{
+   switch(format) {
+   case GL_RED_INTEGER:
+      return GL_RED;
+   case GL_GREEN_INTEGER:
+      return GL_GREEN;
+   case GL_BLUE_INTEGER:
+      return GL_BLUE;
+   case GL_ALPHA_INTEGER:
+      return GL_ALPHA;
+   case GL_RG_INTEGER:
+      return GL_RG;
+   case GL_RGB_INTEGER:
+      return GL_RGB;
+   case GL_RGBA_INTEGER:
+      return GL_RGBA;
+   case GL_BGR_INTEGER:
+      return GL_BGR;
+   case GL_BGRA_INTEGER:
+      return GL_BGRA;
+   case GL_LUMINANCE_INTEGER_EXT:
+      return GL_LUMINANCE;
+   case GL_LUMINANCE_ALPHA_INTEGER_EXT:
+      return GL_LUMINANCE_ALPHA;
+   case GL_RED:
+   case GL_GREEN:
+   case GL_BLUE:
+   case GL_RG:
+   case GL_RGB:
+   case GL_RGBA:
+   case GL_BGR:
+   case GL_BGRA:
+   case GL_ALPHA:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE_ALPHA:
+   default:
+      return format;
+   }
+}
 
 /**
- * Convert various base formats to the cooresponding integer format.
+ * Convert various base formats to the corresponding integer format.
  */
 GLenum
 _mesa_base_format_to_integer_format(GLenum format)
@@ -1678,6 +1722,10 @@
       case GL_LUMINANCE:
       case GL_ALPHA:
          return GL_NO_ERROR;
+      case GL_RG:
+      case GL_RED:
+	 if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_texture_rg)
+            return GL_NO_ERROR;
       default:
          return GL_INVALID_OPERATION;
       }
@@ -2292,8 +2340,18 @@
          break;
 
       case GL_HALF_FLOAT:
-         if (internalFormat != GL_RG16F)
-            return GL_INVALID_OPERATION;
+      case GL_HALF_FLOAT_OES:
+         switch (internalFormat) {
+            case GL_RG16F:
+               break;
+            case GL_RG:
+               if (ctx->Extensions.ARB_texture_rg &&
+                   ctx->Extensions.OES_texture_half_float)
+                  break;
+            /* fallthrough */
+            default:
+               return GL_INVALID_OPERATION;
+         }
          break;
 
       case GL_FLOAT:
@@ -2301,6 +2359,11 @@
          case GL_RG16F:
          case GL_RG32F:
             break;
+         case GL_RG:
+            if (ctx->Extensions.ARB_texture_rg &&
+                ctx->Extensions.OES_texture_float)
+               break;
+            /* fallthrough */
          default:
             return GL_INVALID_OPERATION;
          }
@@ -2361,8 +2424,19 @@
          break;
 
       case GL_HALF_FLOAT:
-         if (internalFormat != GL_R16F)
+      case GL_HALF_FLOAT_OES:
+         switch (internalFormat) {
+         case GL_R16F:
+            break;
+         case GL_RG:
+         case GL_RED:
+            if (ctx->Extensions.ARB_texture_rg &&
+                ctx->Extensions.OES_texture_half_float)
+               break;
+            /* fallthrough */
+         default:
             return GL_INVALID_OPERATION;
+         }
          break;
 
       case GL_FLOAT:
@@ -2370,6 +2444,11 @@
          case GL_R16F:
          case GL_R32F:
             break;
+         case GL_RED:
+            if (ctx->Extensions.ARB_texture_rg &&
+                ctx->Extensions.OES_texture_float)
+               break;
+            /* fallthrough */
          default:
             return GL_INVALID_OPERATION;
          }
@@ -2639,12 +2718,16 @@
          return MESA_FORMAT_B5G6R5_UNORM;
       else if (format == GL_BGR)
          return MESA_FORMAT_R5G6B5_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_B5G6R5_UINT;
       break;
    case GL_UNSIGNED_SHORT_5_6_5_REV:
       if (format == GL_RGB)
          return MESA_FORMAT_R5G6B5_UNORM;
       else if (format == GL_BGR)
          return MESA_FORMAT_B5G6R5_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_R5G6B5_UINT;
       break;
    case GL_UNSIGNED_SHORT_4_4_4_4:
       if (format == GL_RGBA)
@@ -2653,6 +2736,10 @@
          return MESA_FORMAT_A4R4G4B4_UNORM;
       else if (format == GL_ABGR_EXT)
          return MESA_FORMAT_R4G4B4A4_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_A4B4G4R4_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_A4R4G4B4_UINT;
       break;
    case GL_UNSIGNED_SHORT_4_4_4_4_REV:
       if (format == GL_RGBA)
@@ -2661,26 +2748,42 @@
          return MESA_FORMAT_B4G4R4A4_UNORM;
       else if (format == GL_ABGR_EXT)
          return MESA_FORMAT_A4B4G4R4_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_R4G4B4A4_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_B4G4R4A4_UINT;
       break;
    case GL_UNSIGNED_SHORT_5_5_5_1:
       if (format == GL_RGBA)
          return MESA_FORMAT_A1B5G5R5_UNORM;
       else if (format == GL_BGRA)
          return MESA_FORMAT_A1R5G5B5_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_A1B5G5R5_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_A1R5G5B5_UINT;
       break;
    case GL_UNSIGNED_SHORT_1_5_5_5_REV:
       if (format == GL_RGBA)
          return MESA_FORMAT_R5G5B5A1_UNORM;
       else if (format == GL_BGRA)
          return MESA_FORMAT_B5G5R5A1_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_R5G5B5A1_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_B5G5R5A1_UINT;
       break;
    case GL_UNSIGNED_BYTE_3_3_2:
       if (format == GL_RGB)
          return MESA_FORMAT_B2G3R3_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_B2G3R3_UINT;
       break;
    case GL_UNSIGNED_BYTE_2_3_3_REV:
       if (format == GL_RGB)
          return MESA_FORMAT_R3G3B2_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_R3G3B2_UINT;
       break;
    case GL_UNSIGNED_INT_5_9_9_9_REV:
       if (format == GL_RGB)
@@ -2715,6 +2818,10 @@
          return MESA_FORMAT_A8R8G8B8_UNORM;
       else if (format == GL_ABGR_EXT)
          return MESA_FORMAT_R8G8B8A8_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_A8B8G8R8_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_A8R8G8B8_UINT;
       break;
    case GL_UNSIGNED_INT_8_8_8_8_REV:
       if (format == GL_RGBA)
@@ -2723,6 +2830,10 @@
          return MESA_FORMAT_B8G8R8A8_UNORM;
       else if (format == GL_ABGR_EXT)
          return MESA_FORMAT_A8B8G8R8_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_R8G8B8A8_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_B8G8R8A8_UINT;
       break;
    case GL_UNSIGNED_SHORT_8_8_MESA:
       if (format == GL_YCBCR_MESA)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/glformats.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/glformats.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/glformats.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/glformats.h	2015-09-16 14:36:10.000000000 +0000
@@ -96,11 +96,14 @@
 _mesa_is_depth_or_stencil_format(GLenum format);
 
 extern GLboolean
-_mesa_is_compressed_format(struct gl_context *ctx, GLenum format);
+_mesa_is_compressed_format(const struct gl_context *ctx, GLenum format);
 
 extern GLenum
 _mesa_base_format_to_integer_format(GLenum format);
 
+extern GLenum
+_mesa_unpack_format_to_base_format(GLenum format);
+
 extern GLboolean
 _mesa_base_format_has_channel(GLenum base_format, GLenum pname);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/glheader.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/glheader.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/glheader.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/glheader.h	2015-09-16 14:36:10.000000000 +0000
@@ -135,12 +135,6 @@
 #define GL_SHADER_PROGRAM_MESA 0x9999
 
 
-/**
- * Internal token for geometry programs.
- * Use the value for GL_GEOMETRY_PROGRAM_NV for now.
- */
-#define MESA_GEOMETRY_PROGRAM 0x8c26
-
 /* Several fields of struct gl_config can take these as values.  Since
  * GLX header files may not be available everywhere they need to be used,
  * redefine them here.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/hash.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/hash.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/hash.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/hash.c	2015-09-16 14:36:10.000000000 +0000
@@ -389,34 +389,6 @@
 
 
 /**
- * Clone all entries in a hash table, into a new table.
- *
- * \param table  the hash table to clone
- */
-struct _mesa_HashTable *
-_mesa_HashClone(const struct _mesa_HashTable *table)
-{
-   /* cast-away const */
-   struct _mesa_HashTable *table2 = (struct _mesa_HashTable *) table;
-   struct hash_entry *entry;
-   struct _mesa_HashTable *clonetable;
-
-   assert(table);
-   mtx_lock(&table2->Mutex);
-
-   clonetable = _mesa_NewHashTable();
-   assert(clonetable);
-   hash_table_foreach(table->ht, entry) {
-      _mesa_HashInsert(clonetable, (GLint)(uintptr_t)entry->key, entry->data);
-   }
-
-   mtx_unlock(&table2->Mutex);
-
-   return clonetable;
-}
-
-
-/**
  * Walk over all entries in a hash table, calling callback function for each.
  * Note: we use a separate mutex in this function to avoid a recursive
  * locking deadlock (in case the callback calls _mesa_HashRemove()) and to
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/hash.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/hash.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/hash.h	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/hash.h	2015-09-16 14:36:10.000000000 +0000
@@ -59,9 +59,6 @@
                     void (*callback)(GLuint key, void *data, void *userData),
                     void *userData);
 
-extern struct _mesa_HashTable *
-_mesa_HashClone(const struct _mesa_HashTable *table);
-
 extern void
 _mesa_HashWalk(const struct _mesa_HashTable *table,
                void (*callback)(GLuint key, void *data, void *userData),
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/hint.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/hint.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/hint.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/hint.c	2015-09-16 14:36:10.000000000 +0000
@@ -40,8 +40,8 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glHint %s %s\n",
-                  _mesa_lookup_enum_by_nr(target),
-                  _mesa_lookup_enum_by_nr(mode));
+                  _mesa_enum_to_string(target),
+                  _mesa_enum_to_string(mode));
 
    if (mode != GL_NICEST && mode != GL_FASTEST && mode != GL_DONT_CARE) {
       _mesa_error(ctx, GL_INVALID_ENUM, "glHint(mode)");
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/image.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/image.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/image.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/image.c	2015-09-16 14:36:10.000000000 +0000
@@ -49,8 +49,8 @@
  * \param src the array with the source data we want to byte-swap.
  * \param n number of words.
  */
-void
-_mesa_swap2_copy( GLushort *dst, GLushort *src, GLuint n )
+static void
+swap2_copy( GLushort *dst, GLushort *src, GLuint n )
 {
    GLuint i;
    for (i = 0; i < n; i++) {
@@ -58,7 +58,11 @@
    }
 }
 
-
+void
+_mesa_swap2(GLushort *p, GLuint n)
+{
+   swap2_copy(p, p, n);
+}
 
 /*
  * Flip the order of the 4 bytes in each word in the given array (src) and
@@ -69,8 +73,8 @@
  * \param src the array with the source data we want to byte-swap.
  * \param n number of words.
  */
-void
-_mesa_swap4_copy( GLuint *dst, GLuint *src, GLuint n )
+static void
+swap4_copy( GLuint *dst, GLuint *src, GLuint n )
 {
    GLuint i, a, b;
    for (i = 0; i < n; i++) {
@@ -83,6 +87,11 @@
    }
 }
 
+void
+_mesa_swap4(GLuint *p, GLuint n)
+{
+   swap4_copy(p, p, n);
+}
 
 /**
  * Return the byte offset of a specific pixel in an image (1D, 2D or 3D).
@@ -958,3 +967,42 @@
 
    return GL_TRUE;
 }
+
+/**
+ * Swap the bytes in a 2D image.
+ *
+ * using the packing information this swaps the bytes
+ * according to the format and type of data being input.
+ * It takes into a/c various packing parameters like
+ * Alignment and RowLength.
+ */
+void
+_mesa_swap_bytes_2d_image(GLenum format, GLenum type,
+                          const struct gl_pixelstore_attrib *packing,
+                          GLsizei width, GLsizei height,
+                          GLvoid *dst, const GLvoid *src)
+{
+   GLint swapSize = _mesa_sizeof_packed_type(type);
+
+   assert(packing->SwapBytes);
+
+   if (swapSize == 2 || swapSize == 4) {
+      int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
+      int stride = _mesa_image_row_stride(packing, width, format, type);
+      int row;
+      uint8_t *dstrow;
+      const uint8_t *srcrow;
+      assert(swapsPerPixel > 0);
+      assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
+      dstrow = dst;
+      srcrow = src;
+      for (row = 0; row < height; row++) {
+         if (swapSize == 2)
+            swap2_copy((GLushort *)dstrow, (GLushort *)srcrow, width * swapsPerPixel);
+         else if (swapSize == 4)
+            swap4_copy((GLuint *)dstrow, (GLuint *)srcrow, width * swapsPerPixel);
+         dstrow += stride;
+         srcrow += stride;
+      }
+   }
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/image.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/image.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/image.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/image.h	2015-09-16 14:36:10.000000000 +0000
@@ -35,22 +35,11 @@
 struct gl_framebuffer;
 
 extern void
-_mesa_swap2_copy(GLushort *dst, GLushort *src, GLuint n);
+_mesa_swap2(GLushort *p, GLuint n);
 
 extern void
-_mesa_swap4_copy(GLuint *dst, GLuint *src, GLuint n);
+_mesa_swap4(GLuint *p, GLuint n);
 
-static inline void
-_mesa_swap2(GLushort *p, GLuint n)
-{
-   _mesa_swap2_copy(p, p, n);
-}
-
-static inline void
-_mesa_swap4(GLuint *p, GLuint n)
-{
-   _mesa_swap4_copy(p, p, n);
-}
 
 extern GLintptr
 _mesa_image_offset( GLuint dimensions,
@@ -146,5 +135,10 @@
                 GLint *srcX0, GLint *srcY0, GLint *srcX1, GLint *srcY1,
                 GLint *dstX0, GLint *dstY0, GLint *dstX1, GLint *dstY1);
 
+void
+_mesa_swap_bytes_2d_image(GLenum format, GLenum type,
+                          const struct gl_pixelstore_attrib *packing,
+                          GLsizei width, GLsizei height,
+                          GLvoid *dst, const GLvoid *src);
 
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/imports.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/imports.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/imports.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/imports.c	2015-09-16 14:36:10.000000000 +0000
@@ -369,7 +369,7 @@
           * or normal.
           */
          e = 0;
-         m = (int) _mesa_roundevenf((1 << 24) * fabsf(fi.f));
+         m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f));
       }
       else if (new_exp > 15) {
          /* map this value to infinity */
@@ -383,7 +383,7 @@
           * either normal or infinite.
           */
          e = new_exp + 15;
-         m = (int) _mesa_roundevenf(flt_m / (float) (1 << 13));
+         m = _mesa_lroundevenf(flt_m / (float) (1 << 13));
       }
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/imports.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/imports.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/imports.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/imports.h	2015-09-16 14:36:10.000000000 +0000
@@ -170,34 +170,6 @@
    return (int) (f + 0.5F);
 }
 
-#ifdef __x86_64__
-#  include <xmmintrin.h>
-#endif
-
-/**
- * Convert float to int using a fast method.  The rounding mode may vary.
- */
-static inline int F_TO_I(float f)
-{
-#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
-   int r;
-   __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st");
-   return r;
-#elif defined(USE_X86_ASM) && defined(_MSC_VER)
-   int r;
-   _asm {
-	 fld f
-	 fistp r
-	}
-   return r;
-#elif defined(__x86_64__)
-   return _mm_cvt_ss2si(_mm_load_ss(&f));
-#else
-   return IROUND(f);
-#endif
-}
-
-
 /** Return (as an integer) floor of float */
 static inline int IFLOOR(float f)
 {
@@ -229,38 +201,6 @@
 #endif
 }
 
-
-/** Return (as an integer) ceiling of float */
-static inline int ICEIL(float f)
-{
-#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
-   /*
-    * IEEE ceil for computers that round to nearest or even.
-    * 'f' must be between -4194304 and 4194303.
-    * This ceil operation is done by "(iround(f + .5) + iround(f - .5) + 1) >> 1",
-    * but uses some IEEE specific tricks for better speed.
-    * Contributed by Josh Vanderhoof
-    */
-   int ai, bi;
-   double af, bf;
-   af = (3 << 22) + 0.5 + (double)f;
-   bf = (3 << 22) + 0.5 - (double)f;
-   /* GCC generates an extra fstp/fld without this. */
-   __asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st");
-   __asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st");
-   return (ai - bi + 1) >> 1;
-#else
-   int ai, bi;
-   double af, bf;
-   fi_type u;
-   af = (3 << 22) + 0.5 + (double)f;
-   bf = (3 << 22) + 0.5 - (double)f;
-   u.f = (float) af; ai = u.i;
-   u.f = (float) bf; bi = u.i;
-   return (ai - bi + 1) >> 1;
-#endif
-}
-
 
 /**
  * Is x a power of two?
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/light.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/light.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/light.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/light.c	2015-09-16 14:36:10.000000000 +0000
@@ -42,16 +42,16 @@
    GET_CURRENT_CONTEXT(ctx);
 
    if (MESA_VERBOSE & VERBOSE_API)
-      _mesa_debug(ctx, "glShadeModel %s\n", _mesa_lookup_enum_by_nr(mode));
+      _mesa_debug(ctx, "glShadeModel %s\n", _mesa_enum_to_string(mode));
+
+   if (ctx->Light.ShadeModel == mode)
+      return;
 
    if (mode != GL_FLAT && mode != GL_SMOOTH) {
       _mesa_error(ctx, GL_INVALID_ENUM, "glShadeModel");
       return;
    }
 
-   if (ctx->Light.ShadeModel == mode)
-      return;
-
    FLUSH_VERTICES(ctx, _NEW_LIGHT);
    ctx->Light.ShadeModel = mode;
 
@@ -143,7 +143,7 @@
       COPY_3V(light->SpotDirection, params);
       break;
    case GL_SPOT_EXPONENT:
-      assert(params[0] >= 0.0);
+      assert(params[0] >= 0.0F);
       assert(params[0] <= ctx->Const.MaxSpotExponent);
       if (light->SpotExponent == params[0])
 	 return;
@@ -151,12 +151,12 @@
       light->SpotExponent = params[0];
       break;
    case GL_SPOT_CUTOFF:
-      assert(params[0] == 180.0 || (params[0] >= 0.0 && params[0] <= 90.0));
+      assert(params[0] == 180.0F || (params[0] >= 0.0F && params[0] <= 90.0F));
       if (light->SpotCutoff == params[0])
          return;
       FLUSH_VERTICES(ctx, _NEW_LIGHT);
       light->SpotCutoff = params[0];
-      light->_CosCutoff = (GLfloat) (cos(light->SpotCutoff * M_PI / 180.0));
+      light->_CosCutoff = (cosf(light->SpotCutoff * M_PI / 180.0));
       if (light->_CosCutoff < 0)
          light->_CosCutoff = 0;
       if (light->SpotCutoff != 180.0F)
@@ -165,21 +165,21 @@
          light->_Flags &= ~LIGHT_SPOT;
       break;
    case GL_CONSTANT_ATTENUATION:
-      assert(params[0] >= 0.0);
+      assert(params[0] >= 0.0F);
       if (light->ConstantAttenuation == params[0])
 	 return;
       FLUSH_VERTICES(ctx, _NEW_LIGHT);
       light->ConstantAttenuation = params[0];
       break;
    case GL_LINEAR_ATTENUATION:
-      assert(params[0] >= 0.0);
+      assert(params[0] >= 0.0F);
       if (light->LinearAttenuation == params[0])
 	 return;
       FLUSH_VERTICES(ctx, _NEW_LIGHT);
       light->LinearAttenuation = params[0];
       break;
    case GL_QUADRATIC_ATTENUATION:
-      assert(params[0] >= 0.0);
+      assert(params[0] >= 0.0F);
       if (light->QuadraticAttenuation == params[0])
 	 return;
       FLUSH_VERTICES(ctx, _NEW_LIGHT);
@@ -238,31 +238,31 @@
       params = temp;
       break;
    case GL_SPOT_EXPONENT:
-      if (params[0] < 0.0 || params[0] > ctx->Const.MaxSpotExponent) {
+      if (params[0] < 0.0F || params[0] > ctx->Const.MaxSpotExponent) {
 	 _mesa_error(ctx, GL_INVALID_VALUE, "glLight");
 	 return;
       }
       break;
    case GL_SPOT_CUTOFF:
-      if ((params[0] < 0.0 || params[0] > 90.0) && params[0] != 180.0) {
+      if ((params[0] < 0.0F || params[0] > 90.0F) && params[0] != 180.0F) {
 	 _mesa_error(ctx, GL_INVALID_VALUE, "glLight");
 	 return;
       }
       break;
    case GL_CONSTANT_ATTENUATION:
-      if (params[0] < 0.0) {
+      if (params[0] < 0.0F) {
 	 _mesa_error(ctx, GL_INVALID_VALUE, "glLight");
 	 return;
       }
       break;
    case GL_LINEAR_ATTENUATION:
-      if (params[0] < 0.0) {
+      if (params[0] < 0.0F) {
 	 _mesa_error(ctx, GL_INVALID_VALUE, "glLight");
 	 return;
       }
       break;
    case GL_QUADRATIC_ATTENUATION:
-      if (params[0] < 0.0) {
+      if (params[0] < 0.0F) {
 	 _mesa_error(ctx, GL_INVALID_VALUE, "glLight");
 	 return;
       }
@@ -463,14 +463,14 @@
       case GL_LIGHT_MODEL_LOCAL_VIEWER:
          if (ctx->API != API_OPENGL_COMPAT)
             goto invalid_pname;
-         newbool = (params[0]!=0.0);
+         newbool = (params[0] != 0.0F);
 	 if (ctx->Light.Model.LocalViewer == newbool)
 	    return;
 	 FLUSH_VERTICES(ctx, _NEW_LIGHT);
 	 ctx->Light.Model.LocalViewer = newbool;
          break;
       case GL_LIGHT_MODEL_TWO_SIDE:
-         newbool = (params[0]!=0.0);
+         newbool = (params[0] != 0.0F);
 	 if (ctx->Light.Model.TwoSide == newbool)
 	    return;
 	 FLUSH_VERTICES(ctx, _NEW_LIGHT);
@@ -723,8 +723,8 @@
 
    if (MESA_VERBOSE&VERBOSE_API)
       _mesa_debug(ctx, "glColorMaterial %s %s\n",
-                  _mesa_lookup_enum_by_nr(face),
-                  _mesa_lookup_enum_by_nr(mode));
+                  _mesa_enum_to_string(face),
+                  _mesa_enum_to_string(mode));
 
    bitmask = _mesa_material_bitmask(ctx, face, mode, legal, "glColorMaterial");
    if (bitmask == 0)
@@ -975,7 +975,7 @@
       }
       else {
          /* positional light w/ homogeneous coordinate, divide by W */
-         GLfloat wInv = (GLfloat)1.0 / light->_Position[3];
+         GLfloat wInv = 1.0F / light->_Position[3];
          light->_Position[0] *= wInv;
          light->_Position[1] *= wInv;
          light->_Position[2] *= wInv;
@@ -1024,7 +1024,7 @@
    if (!_math_matrix_is_length_preserving(ctx->ModelviewMatrixStack.Top)) {
       const GLfloat *m = ctx->ModelviewMatrixStack.Top->inv;
       GLfloat f = m[2] * m[2] + m[6] * m[6] + m[10] * m[10];
-      if (f < 1e-12) f = 1.0;
+      if (f < 1e-12f) f = 1.0f;
       if (ctx->_NeedEyeCoords)
 	 ctx->_ModelViewInvScale = 1.0f / sqrtf(f);
       else
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/lines.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/lines.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/lines.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/lines.c	2015-09-16 14:36:10.000000000 +0000
@@ -45,7 +45,7 @@
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glLineWidth %f\n", width);
 
-   if (width<=0.0) {
+   if (width <= 0.0F) {
       _mesa_error( ctx, GL_INVALID_VALUE, "glLineWidth" );
       return;
    }
@@ -63,7 +63,7 @@
    if (ctx->API == API_OPENGL_CORE
        && ((ctx->Const.ContextFlags & GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT)
            != 0)
-       && width > 1.0) {
+       && width > 1.0F) {
       _mesa_error( ctx, GL_INVALID_VALUE, "glLineWidth" );
       return;
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/macros.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/macros.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/macros.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/macros.h	2015-09-16 14:36:10.000000000 +0000
@@ -33,6 +33,7 @@
 
 #include "util/macros.h"
 #include "util/u_math.h"
+#include "util/rounding.h"
 #include "imports.h"
 
 
@@ -131,12 +132,12 @@
 #define INT_TO_USHORT(i)   ((i) < 0 ? 0 : ((GLushort) ((i) >> 15)))
 #define UINT_TO_USHORT(i)  ((i) < 0 ? 0 : ((GLushort) ((i) >> 16)))
 #define UNCLAMPED_FLOAT_TO_USHORT(us, f)  \
-        us = ( (GLushort) F_TO_I( CLAMP((f), 0.0F, 1.0F) * 65535.0F) )
+        us = ( (GLushort) _mesa_lroundevenf( CLAMP((f), 0.0F, 1.0F) * 65535.0F) )
 #define CLAMPED_FLOAT_TO_USHORT(us, f)  \
-        us = ( (GLushort) F_TO_I( (f) * 65535.0F) )
+        us = ( (GLushort) _mesa_lroundevenf( (f) * 65535.0F) )
 
 #define UNCLAMPED_FLOAT_TO_SHORT(s, f)  \
-        s = ( (GLshort) F_TO_I( CLAMP((f), -1.0F, 1.0F) * 32767.0F) )
+        s = ( (GLshort) _mesa_lroundevenf( CLAMP((f), -1.0F, 1.0F) * 32767.0F) )
 
 /***
  *** UNCLAMPED_FLOAT_TO_UBYTE: clamp float to [0,1] and map to ubyte in [0,255]
@@ -167,9 +168,9 @@
         } while (0)
 #else
 #define UNCLAMPED_FLOAT_TO_UBYTE(ub, f) \
-	ub = ((GLubyte) F_TO_I(CLAMP((f), 0.0F, 1.0F) * 255.0F))
+	ub = ((GLubyte) _mesa_lroundevenf(CLAMP((f), 0.0F, 1.0F) * 255.0F))
 #define CLAMPED_FLOAT_TO_UBYTE(ub, f) \
-	ub = ((GLubyte) F_TO_I((f) * 255.0F))
+	ub = ((GLubyte) _mesa_lroundevenf((f) * 255.0F))
 #endif
 
 static fi_type UINT_AS_UNION(GLuint u)
@@ -679,17 +680,6 @@
 }
 
 /**
- * Return true if the given value is a power of two.
- *
- * Note that this considers 0 a power of two.
- */
-static inline bool
-is_power_of_two(unsigned value)
-{
-   return (value & (value - 1)) == 0;
-}
-
-/**
  * Align a value up to an alignment value
  *
  * If \c value is not already aligned to the requested alignment value, it
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/matrix.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/matrix.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/matrix.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/matrix.c	2015-09-16 14:36:10.000000000 +0000
@@ -229,7 +229,7 @@
 
    if (MESA_VERBOSE&VERBOSE_API)
       _mesa_debug(ctx, "glPushMatrix %s\n",
-                  _mesa_lookup_enum_by_nr(ctx->Transform.MatrixMode));
+                  _mesa_enum_to_string(ctx->Transform.MatrixMode));
 
    if (stack->Depth + 1 >= stack->MaxDepth) {
       if (ctx->Transform.MatrixMode == GL_TEXTURE) {
@@ -239,7 +239,7 @@
       }
       else {
          _mesa_error(ctx,  GL_STACK_OVERFLOW, "glPushMatrix(mode=%s)",
-                     _mesa_lookup_enum_by_nr(ctx->Transform.MatrixMode));
+                     _mesa_enum_to_string(ctx->Transform.MatrixMode));
       }
       return;
    }
@@ -270,7 +270,7 @@
 
    if (MESA_VERBOSE&VERBOSE_API)
       _mesa_debug(ctx, "glPopMatrix %s\n",
-                  _mesa_lookup_enum_by_nr(ctx->Transform.MatrixMode));
+                  _mesa_enum_to_string(ctx->Transform.MatrixMode));
 
    if (stack->Depth == 0) {
       if (ctx->Transform.MatrixMode == GL_TEXTURE) {
@@ -280,7 +280,7 @@
       }
       else {
          _mesa_error(ctx,  GL_STACK_UNDERFLOW, "glPopMatrix(mode=%s)",
-                     _mesa_lookup_enum_by_nr(ctx->Transform.MatrixMode));
+                     _mesa_enum_to_string(ctx->Transform.MatrixMode));
       }
       return;
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/mipmap.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/mipmap.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/mipmap.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/mipmap.c	2015-09-16 14:36:10.000000000 +0000
@@ -2077,9 +2077,12 @@
 
       /* Get the uncompressed image */
       assert(srcImage->Level == texObj->BaseLevel);
-      ctx->Driver.GetTexImage(ctx,
-                              temp_base_format, temp_datatype,
-                              temp_src, srcImage);
+      ctx->Driver.GetTexSubImage(ctx,
+                                 0, 0, 0,
+                                 srcImage->Width, srcImage->Height,
+                                 srcImage->Depth,
+                                 temp_base_format, temp_datatype,
+                                 temp_src, srcImage);
       /* restore packing mode */
       ctx->Pack = save;
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/mtypes.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/mtypes.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/mtypes.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/mtypes.h	2015-09-16 14:36:10.000000000 +0000
@@ -43,7 +43,6 @@
 #include "glapi/glapi.h"
 #include "math/m_matrix.h"	/* GLmatrix */
 #include "glsl/shader_enums.h"
-#include "util/simple_list.h"	/* struct simple_node */
 #include "main/formats.h"       /* MESA_FORMAT_COUNT */
 
 
@@ -91,12 +90,10 @@
 
 
 /** Extra draw modes beyond GL_POINTS, GL_TRIANGLE_FAN, etc */
-#define PRIM_MAX                 GL_TRIANGLE_STRIP_ADJACENCY
+#define PRIM_MAX                 GL_PATCHES
 #define PRIM_OUTSIDE_BEGIN_END   (PRIM_MAX + 1)
 #define PRIM_UNKNOWN             (PRIM_MAX + 2)
 
-
-
 /**
  * Indexes for vertex program attributes.
  * GL_NV_vertex_program aliases generic attributes over the conventional
@@ -204,81 +201,10 @@
 /*@}*/
 
 
-/**
- * Indexes for vertex shader outputs, geometry shader inputs/outputs, and
- * fragment shader inputs.
- *
- * Note that some of these values are not available to all pipeline stages.
- *
- * When this enum is updated, the following code must be updated too:
- * - vertResults (in prog_print.c's arb_output_attrib_string())
- * - fragAttribs (in prog_print.c's arb_input_attrib_string())
- * - _mesa_varying_slot_in_fs()
- */
-typedef enum
-{
-   VARYING_SLOT_POS,
-   VARYING_SLOT_COL0, /* COL0 and COL1 must be contiguous */
-   VARYING_SLOT_COL1,
-   VARYING_SLOT_FOGC,
-   VARYING_SLOT_TEX0, /* TEX0-TEX7 must be contiguous */
-   VARYING_SLOT_TEX1,
-   VARYING_SLOT_TEX2,
-   VARYING_SLOT_TEX3,
-   VARYING_SLOT_TEX4,
-   VARYING_SLOT_TEX5,
-   VARYING_SLOT_TEX6,
-   VARYING_SLOT_TEX7,
-   VARYING_SLOT_PSIZ, /* Does not appear in FS */
-   VARYING_SLOT_BFC0, /* Does not appear in FS */
-   VARYING_SLOT_BFC1, /* Does not appear in FS */
-   VARYING_SLOT_EDGE, /* Does not appear in FS */
-   VARYING_SLOT_CLIP_VERTEX, /* Does not appear in FS */
-   VARYING_SLOT_CLIP_DIST0,
-   VARYING_SLOT_CLIP_DIST1,
-   VARYING_SLOT_PRIMITIVE_ID, /* Does not appear in VS */
-   VARYING_SLOT_LAYER, /* Appears as VS or GS output */
-   VARYING_SLOT_VIEWPORT, /* Appears as VS or GS output */
-   VARYING_SLOT_FACE, /* FS only */
-   VARYING_SLOT_PNTC, /* FS only */
-   VARYING_SLOT_VAR0, /* First generic varying slot */
-   VARYING_SLOT_MAX = VARYING_SLOT_VAR0 + MAX_VARYING
-} gl_varying_slot;
-
-
-/**
- * Bitflags for varying slots.
- */
-/*@{*/
-#define VARYING_BIT_POS BITFIELD64_BIT(VARYING_SLOT_POS)
-#define VARYING_BIT_COL0 BITFIELD64_BIT(VARYING_SLOT_COL0)
-#define VARYING_BIT_COL1 BITFIELD64_BIT(VARYING_SLOT_COL1)
-#define VARYING_BIT_FOGC BITFIELD64_BIT(VARYING_SLOT_FOGC)
-#define VARYING_BIT_TEX0 BITFIELD64_BIT(VARYING_SLOT_TEX0)
-#define VARYING_BIT_TEX1 BITFIELD64_BIT(VARYING_SLOT_TEX1)
-#define VARYING_BIT_TEX2 BITFIELD64_BIT(VARYING_SLOT_TEX2)
-#define VARYING_BIT_TEX3 BITFIELD64_BIT(VARYING_SLOT_TEX3)
-#define VARYING_BIT_TEX4 BITFIELD64_BIT(VARYING_SLOT_TEX4)
-#define VARYING_BIT_TEX5 BITFIELD64_BIT(VARYING_SLOT_TEX5)
-#define VARYING_BIT_TEX6 BITFIELD64_BIT(VARYING_SLOT_TEX6)
-#define VARYING_BIT_TEX7 BITFIELD64_BIT(VARYING_SLOT_TEX7)
-#define VARYING_BIT_TEX(U) BITFIELD64_BIT(VARYING_SLOT_TEX0 + (U))
-#define VARYING_BITS_TEX_ANY BITFIELD64_RANGE(VARYING_SLOT_TEX0, \
-                                              MAX_TEXTURE_COORD_UNITS)
-#define VARYING_BIT_PSIZ BITFIELD64_BIT(VARYING_SLOT_PSIZ)
-#define VARYING_BIT_BFC0 BITFIELD64_BIT(VARYING_SLOT_BFC0)
-#define VARYING_BIT_BFC1 BITFIELD64_BIT(VARYING_SLOT_BFC1)
-#define VARYING_BIT_EDGE BITFIELD64_BIT(VARYING_SLOT_EDGE)
-#define VARYING_BIT_CLIP_VERTEX BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX)
-#define VARYING_BIT_CLIP_DIST0 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)
-#define VARYING_BIT_CLIP_DIST1 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1)
-#define VARYING_BIT_PRIMITIVE_ID BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID)
-#define VARYING_BIT_LAYER BITFIELD64_BIT(VARYING_SLOT_LAYER)
-#define VARYING_BIT_VIEWPORT BITFIELD64_BIT(VARYING_SLOT_VIEWPORT)
-#define VARYING_BIT_FACE BITFIELD64_BIT(VARYING_SLOT_FACE)
-#define VARYING_BIT_PNTC BITFIELD64_BIT(VARYING_SLOT_PNTC)
-#define VARYING_BIT_VAR(V) BITFIELD64_BIT(VARYING_SLOT_VAR0 + (V))
-/*@}*/
+#define VARYING_SLOT_MAX	(VARYING_SLOT_VAR0 + MAX_VARYING)
+#define VARYING_SLOT_PATCH0	(VARYING_SLOT_MAX)
+#define VARYING_SLOT_TESS_MAX	(VARYING_SLOT_PATCH0 + MAX_VARYING)
+#define FRAG_RESULT_MAX		(FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)
 
 /**
  * Determine if the given gl_varying_slot appears in the fragment shader.
@@ -293,35 +219,14 @@
    case VARYING_SLOT_EDGE:
    case VARYING_SLOT_CLIP_VERTEX:
    case VARYING_SLOT_LAYER:
+   case VARYING_SLOT_TESS_LEVEL_OUTER:
+   case VARYING_SLOT_TESS_LEVEL_INNER:
       return GL_FALSE;
    default:
       return GL_TRUE;
    }
 }
 
-
-/**
- * Fragment program results
- */
-typedef enum
-{
-   FRAG_RESULT_DEPTH = 0,
-   FRAG_RESULT_STENCIL = 1,
-   /* If a single color should be written to all render targets, this
-    * register is written.  No FRAG_RESULT_DATAn will be written.
-    */
-   FRAG_RESULT_COLOR = 2,
-   FRAG_RESULT_SAMPLE_MASK = 3,
-
-   /* FRAG_RESULT_DATAn are the per-render-target (GLSL gl_FragData[n]
-    * or ARB_fragment_program fragment.color[n]) color results.  If
-    * any are written, FRAG_RESULT_COLOR will not be written.
-    */
-   FRAG_RESULT_DATA0 = 4,
-   FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)
-} gl_frag_result;
-
-
 /**
  * Indexes for all renderbuffers
  */
@@ -398,7 +303,6 @@
 {
    GLboolean rgbMode;
    GLboolean floatMode;
-   GLboolean colorIndexMode;  /* XXX is this used anywhere? */
    GLuint doubleBufferMode;
    GLuint stereoMode;
 
@@ -1461,6 +1365,7 @@
    USAGE_UNIFORM_BUFFER = 0x1,
    USAGE_TEXTURE_BUFFER = 0x2,
    USAGE_ATOMIC_COUNTER_BUFFER = 0x4,
+   USAGE_SHADER_STORAGE_BUFFER = 0x8,
 } gl_buffer_usage;
 
 
@@ -1861,6 +1766,11 @@
     * multiple transform feedback outputs in the same buffer.
     */
    unsigned BufferStride[MAX_FEEDBACK_BUFFERS];
+
+   /**
+    * Which transform feedback stream this buffer binding is associated with.
+    */
+   unsigned BufferStream[MAX_FEEDBACK_BUFFERS];
 };
 
 
@@ -2098,9 +2008,9 @@
    GLbitfield64 InputsRead;     /**< Bitmask of which input regs are read */
    GLbitfield64 DoubleInputsRead;     /**< Bitmask of which input regs are read  and are doubles */
    GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */
+   GLbitfield PatchInputsRead;  /**< VAR[0..31] usage for patch inputs (user-defined only) */
+   GLbitfield PatchOutputsWritten; /**< VAR[0..31] usage for patch outputs (user-defined only) */
    GLbitfield SystemValuesRead;   /**< Bitmask of SYSTEM_VALUE_x inputs used */
-   GLbitfield InputFlags[MAX_PROGRAM_INPUTS];   /**< PROG_PARAM_BIT_x flags */
-   GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */
    GLbitfield TexturesUsed[MAX_COMBINED_TEXTURE_IMAGE_UNITS];  /**< TEXTURE_x_BIT bitmask */
    GLbitfield SamplersUsed;   /**< Bitfield of which samplers are used */
    GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */
@@ -2167,6 +2077,29 @@
 };
 
 
+/** Tessellation control program object */
+struct gl_tess_ctrl_program
+{
+   struct gl_program Base;   /**< base class */
+
+   /* output layout */
+   GLint VerticesOut;
+};
+
+
+/** Tessellation evaluation program object */
+struct gl_tess_eval_program
+{
+   struct gl_program Base;   /**< base class */
+
+   /* input layout */
+   GLenum PrimitiveMode; /* GL_TRIANGLES, GL_QUADS or GL_ISOLINES */
+   GLenum Spacing;       /* GL_EQUAL, GL_FRACTIONAL_EVEN, GL_FRACTIONAL_ODD */
+   GLenum VertexOrder;   /* GL_CW or GL_CCW */
+   bool PointMode;
+};
+
+
 /** Geometry program object */
 struct gl_geometry_program
 {
@@ -2269,22 +2202,37 @@
    GLboolean _Overriden;
 };
 
+/**
+ * Context state for tessellation control programs.
+ */
+struct gl_tess_ctrl_program_state
+{
+   /** Currently bound and valid shader. */
+   struct gl_tess_ctrl_program *_Current;
+
+   GLint patch_vertices;
+   GLfloat patch_default_outer_level[4];
+   GLfloat patch_default_inner_level[2];
+};
+
+/**
+ * Context state for tessellation evaluation programs.
+ */
+struct gl_tess_eval_program_state
+{
+   /** Currently bound and valid shader. */
+   struct gl_tess_eval_program *_Current;
+};
 
 /**
  * Context state for geometry programs.
  */
 struct gl_geometry_program_state
 {
-   GLboolean Enabled;               /**< GL_ARB_GEOMETRY_SHADER4 */
-   GLboolean _Enabled;              /**< Enabled and valid program? */
-   struct gl_geometry_program *Current;  /**< user-bound geometry program */
-
    /** Currently enabled and valid program (including internal programs
     * and compiled shader programs).
     */
    struct gl_geometry_program *_Current;
-
-   GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */
 };
 
 /**
@@ -2320,8 +2268,6 @@
  */
 struct gl_compute_program_state
 {
-   struct gl_compute_program *Current;  /**< user-bound compute program */
-
    /** Currently enabled and valid program (including internal programs
     * and compiled shader programs).
     */
@@ -2371,13 +2317,23 @@
    struct ati_fragment_shader *Current;
 };
 
+/**
+ *  Shader subroutine function definition
+ */
+struct gl_subroutine_function
+{
+   char *name;
+   int num_compat_types;
+   const struct glsl_type **types;
+};
 
 /**
  * A GLSL vertex or fragment shader object.
  */
 struct gl_shader
 {
-   /** GL_FRAGMENT_SHADER || GL_VERTEX_SHADER || GL_GEOMETRY_SHADER_ARB.
+   /** GL_FRAGMENT_SHADER || GL_VERTEX_SHADER || GL_GEOMETRY_SHADER_ARB ||
+    *  GL_TESS_CONTROL_SHADER || GL_TESS_EVALUATION_SHADER.
     * Must be the first field.
     */
    GLenum Type;
@@ -2457,6 +2413,41 @@
    bool pixel_center_integer;
 
    /**
+    * Tessellation Control shader state from layout qualifiers.
+    */
+   struct {
+      /**
+       * 0 - vertices not declared in shader, or
+       * 1 .. GL_MAX_PATCH_VERTICES
+       */
+      GLint VerticesOut;
+   } TessCtrl;
+
+   /**
+    * Tessellation Evaluation shader state from layout qualifiers.
+    */
+   struct {
+      /**
+       * GL_TRIANGLES, GL_QUADS, GL_ISOLINES or PRIM_UNKNOWN if it's not set
+       * in this shader.
+       */
+      GLenum PrimitiveMode;
+      /**
+       * GL_EQUAL, GL_FRACTIONAL_ODD, GL_FRACTIONAL_EVEN, or 0 if it's not set
+       * in this shader.
+       */
+      GLenum Spacing;
+      /**
+       * GL_CW, GL_CCW, or 0 if it's not set in this shader.
+       */
+      GLenum VertexOrder;
+      /**
+       * 1, 0, or -1 if it's not set in this shader.
+       */
+      int PointMode;
+   } TessEval;
+
+   /**
     * Geometry shader state from GLSL 1.50 layout qualifiers.
     */
    struct {
@@ -2521,6 +2512,25 @@
        */
       unsigned LocalSize[3];
    } Comp;
+
+   /**
+     * Number of types for subroutine uniforms.
+     */
+   GLuint NumSubroutineUniformTypes;
+
+   /**
+     * Subroutine uniform remap table
+     * based on the program level uniform remap table.
+     */
+   GLuint NumSubroutineUniformRemapTable;
+   struct gl_uniform_storage **SubroutineUniformRemapTable;
+
+   /**
+    * Num of subroutine functions for this stage
+    * and storage for them.
+    */
+   GLuint NumSubroutineFunctions;
+   struct gl_subroutine_function *SubroutineFunctions;
 };
 
 
@@ -2577,6 +2587,11 @@
    GLuint UniformBufferSize;
 
    /**
+    * Is this actually an interface block for a shader storage buffer?
+    */
+   bool IsShaderStorage;
+
+   /**
     * Layout specified in the shader
     *
     * This isn't accessible through the API, but it is used while
@@ -2680,6 +2695,37 @@
    enum gl_frag_depth_layout FragDepthLayout;
 
    /**
+    * Tessellation Control shader state from layout qualifiers.
+    */
+   struct {
+      /**
+       * 0 - vertices not declared in shader, or
+       * 1 .. GL_MAX_PATCH_VERTICES
+       */
+      GLint VerticesOut;
+   } TessCtrl;
+
+   /**
+    * Tessellation Evaluation shader state from layout qualifiers.
+    */
+   struct {
+      /** GL_TRIANGLES, GL_QUADS or GL_ISOLINES */
+      GLenum PrimitiveMode;
+      /** GL_EQUAL, GL_FRACTIONAL_ODD or GL_FRACTIONAL_EVEN */
+      GLenum Spacing;
+      /** GL_CW or GL_CCW */
+      GLenum VertexOrder;
+      bool PointMode;
+      /**
+       * True if gl_ClipDistance is written to.  Copied into
+       * gl_tess_eval_program by _mesa_copy_linked_program_data().
+       */
+      GLboolean UsesClipDistance;
+      GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or
+                                         0 if not present. */
+   } TessEval;
+
+   /**
     * Geometry shader state - copied into gl_geometry_program by
     * _mesa_copy_linked_program_data().
     */
@@ -2728,7 +2774,7 @@
    } Comp;
 
    /* post-link info: */
-   unsigned NumUserUniformStorage;
+   unsigned NumUniformStorage;
    unsigned NumHiddenUniforms;
    struct gl_uniform_storage *UniformStorage;
 
@@ -3007,7 +3053,6 @@
    struct _mesa_HashTable *Programs; /**< All vertex/fragment programs */
    struct gl_vertex_program *DefaultVertexProgram;
    struct gl_fragment_program *DefaultFragmentProgram;
-   struct gl_geometry_program *DefaultGeometryProgram;
    /*@}*/
 
    /* GL_ATI_fragment_shader */
@@ -3149,12 +3194,29 @@
     */
    struct gl_config Visual;
 
-   GLuint Width, Height;	/**< size of frame buffer in pixels */
+   /**
+    * Size of frame buffer in pixels. If there are no attachments, then both
+    * of these are 0.
+    */
+   GLuint Width, Height;
 
-   /** \name  Drawing bounds (Intersection of buffer size and scissor box) */
+   /**
+    * In the case that the framebuffer has no attachment (i.e.
+    * GL_ARB_framebuffer_no_attachments) then the geometry of
+    * the framebuffer is specified by the default values.
+    */
+   struct {
+     GLuint Width, Height, Layers, NumSamples;
+     GLboolean FixedSampleLocations;
+   } DefaultGeometry;
+
+   /** \name  Drawing bounds (Intersection of buffer size and scissor box)
+    * The drawing region is given by [_Xmin, _Xmax) x [_Ymin, _Ymax),
+    * (inclusive for _Xmin and _Ymin while exclusive for _Xmax and _Ymax)
+    */
    /*@{*/
-   GLint _Xmin, _Xmax;  /**< inclusive */
-   GLint _Ymin, _Ymax;  /**< exclusive */
+   GLint _Xmin, _Xmax;
+   GLint _Ymin, _Ymax;
    /*@}*/
 
    /** \name  Derived Z buffer stuff */
@@ -3167,6 +3229,22 @@
    /** One of the GL_FRAMEBUFFER_(IN)COMPLETE_* tokens */
    GLenum _Status;
 
+   /** Whether one of Attachment has Type != GL_NONE
+    * NOTE: the values for Width and Height are set to 0 in case of having
+    * no attachments, a backend driver supporting the extension
+    * GL_ARB_framebuffer_no_attachments must check for the flag _HasAttachments
+    * and if GL_FALSE, must then use the values in DefaultGeometry to initialize
+    * its viewport, scissor and so on (in particular _Xmin, _Xmax, _Ymin and
+    * _Ymax do NOT take into account _HasAttachments being false). To get the
+    * geometry of the framebuffer, the  helper functions
+    *   _mesa_geometric_width(),
+    *   _mesa_geometric_height(),
+    *   _mesa_geometric_samples() and
+    *   _mesa_geometric_layers()
+    * are available that check _HasAttachments.
+    */
+   bool _HasAttachments;
+
    /** Integer color values */
    GLboolean _IntegerColor;
 
@@ -3177,7 +3255,9 @@
    /**
     * The maximum number of layers in the framebuffer, or 0 if the framebuffer
     * is not layered.  For cube maps and cube map arrays, each cube face
-    * counts as a layer.
+    * counts as a layer. As the case for Width, Height a backend driver
+    * supporting GL_ARB_framebuffer_no_attachments must use DefaultGeometry
+    * in the case that _HasAttachments is false
     */
    GLuint MaxNumLayers;
 
@@ -3279,6 +3359,9 @@
 
    /* GL_ARB_shader_image_load_store */
    GLuint MaxImageUniforms;
+
+   /* GL_ARB_shader_storage_buffer_object */
+   GLuint MaxShaderStorageBlocks;
 };
 
 
@@ -3356,6 +3439,14 @@
    GLuint MaxRenderbufferSize;   /**< GL_EXT_framebuffer_object */
    GLuint MaxSamples;            /**< GL_ARB_framebuffer_object */
 
+   /**
+    * GL_ARB_framebuffer_no_attachments
+    */
+   GLuint MaxFramebufferWidth;
+   GLuint MaxFramebufferHeight;
+   GLuint MaxFramebufferLayers;
+   GLuint MaxFramebufferSamples;
+
    /** Number of varying vectors between any two shader stages. */
    GLuint MaxVarying;
 
@@ -3368,6 +3459,15 @@
    GLuint UniformBufferOffsetAlignment;
    /** @} */
 
+   /** @{
+    * GL_ARB_shader_storage_buffer_object
+    */
+   GLuint MaxCombinedShaderStorageBlocks;
+   GLuint MaxShaderStorageBufferBindings;
+   GLuint MaxShaderStorageBlockSize;
+   GLuint ShaderStorageBufferOffsetAlignment;
+   /** @} */
+
    /**
     * GL_ARB_explicit_uniform_location
     */
@@ -3575,7 +3675,7 @@
 
    /* GL_ARB_shader_image_load_store */
    GLuint MaxImageUnits;
-   GLuint MaxCombinedImageUnitsAndFragmentOutputs;
+   GLuint MaxCombinedShaderOutputResources;
    GLuint MaxImageSamples;
    GLuint MaxCombinedImageUniforms;
 
@@ -3594,6 +3694,13 @@
    GLenum ContextReleaseBehavior;
 
    struct gl_shader_compiler_options ShaderCompilerOptions[MESA_SHADER_STAGES];
+
+   /** GL_ARB_tessellation_shader */
+   GLuint MaxPatchVertices;
+   GLuint MaxTessGenLevel;
+   GLuint MaxTessPatchComponents;
+   GLuint MaxTessControlTotalOutputComponents;
+   bool LowerTessLevel; /**< Lower gl_TessLevel* from float[n] to vecn? */
 };
 
 
@@ -3633,6 +3740,7 @@
    GLboolean ARB_fragment_program;
    GLboolean ARB_fragment_program_shadow;
    GLboolean ARB_fragment_shader;
+   GLboolean ARB_framebuffer_no_attachments;
    GLboolean ARB_framebuffer_object;
    GLboolean ARB_explicit_attrib_location;
    GLboolean ARB_explicit_uniform_location;
@@ -3652,8 +3760,11 @@
    GLboolean ARB_shader_atomic_counters;
    GLboolean ARB_shader_bit_encoding;
    GLboolean ARB_shader_image_load_store;
+   GLboolean ARB_shader_image_size;
    GLboolean ARB_shader_precision;
    GLboolean ARB_shader_stencil_export;
+   GLboolean ARB_shader_storage_buffer_object;
+   GLboolean ARB_shader_subroutine;
    GLboolean ARB_shader_texture_lod;
    GLboolean ARB_shading_language_packing;
    GLboolean ARB_shading_language_420pack;
@@ -3985,6 +4096,12 @@
     */
    uint64_t NewUniformBuffer;
 
+   /**
+    * gl_context::ShaderStorageBufferBindings
+    * gl_shader_program::ShaderStorageBlocks
+    */
+   uint64_t NewShaderStorageBuffer;
+
    uint64_t NewTextureBuffer;
 
    /**
@@ -3996,6 +4113,11 @@
     * gl_context::ImageUnits
     */
    uint64_t NewImageUnits;
+
+   /**
+    * gl_context::TessCtrlProgram::patch_default_*
+    */
+   uint64_t NewDefaultTessLevels;
 };
 
 struct gl_uniform_buffer_binding
@@ -4012,6 +4134,20 @@
    GLboolean AutomaticSize;
 };
 
+struct gl_shader_storage_buffer_binding
+{
+   struct gl_buffer_object *BufferObject;
+   /** Start of shader storage block data in the buffer */
+   GLintptr Offset;
+   /** Size of data allowed to be referenced from the buffer (in bytes) */
+   GLsizeiptr Size;
+   /**
+    * glBindBufferBase() indicates that the Size should be ignored and only
+    * limited by the current size of the BufferObject.
+    */
+   GLboolean AutomaticSize;
+};
+
 /**
  * ARB_shader_image_load_store image unit.
  */
@@ -4042,10 +4178,16 @@
    GLboolean _Valid;
 
    /**
+    * Layer of the texture object bound to this unit as specified by the
+    * application.
+    */
+   GLuint Layer;
+
+   /**
     * Layer of the texture object bound to this unit, or zero if the
     * whole level is bound.
     */
-   GLuint Layer;
+   GLuint _Layer;
 
    /**
     * Access allowed to this texture image.  Either \c GL_READ_ONLY,
@@ -4217,6 +4359,8 @@
    struct gl_fragment_program_state FragmentProgram;
    struct gl_geometry_program_state GeometryProgram;
    struct gl_compute_program_state ComputeProgram;
+   struct gl_tess_ctrl_program_state TessCtrlProgram;
+   struct gl_tess_eval_program_state TessEvalProgram;
    struct gl_ati_fragment_shader_state ATIFragmentShader;
 
    struct gl_pipeline_shader_state Pipeline; /**< GLSL pipeline shader object state */
@@ -4259,6 +4403,12 @@
    struct gl_buffer_object *UniformBuffer;
 
    /**
+    * Current GL_ARB_shader_storage_buffer_object binding referenced by
+    * GL_SHADER_STORAGE_BUFFER target for glBufferData, glMapBuffer, etc.
+    */
+   struct gl_buffer_object *ShaderStorageBuffer;
+
+   /**
     * Array of uniform buffers for GL_ARB_uniform_buffer_object and GL 3.1.
     * This is set up using glBindBufferRange() or glBindBufferBase().  They are
     * associated with uniform blocks by glUniformBlockBinding()'s state in the
@@ -4268,6 +4418,15 @@
       UniformBufferBindings[MAX_COMBINED_UNIFORM_BUFFERS];
 
    /**
+    * Array of shader storage buffers for ARB_shader_storage_buffer_object
+    * and GL 4.3. This is set up using glBindBufferRange() or
+    * glBindBufferBase().  They are associated with shader storage blocks by
+    * glShaderStorageBlockBinding()'s state in the shader program.
+    */
+   struct gl_shader_storage_buffer_binding
+      ShaderStorageBufferBindings[MAX_COMBINED_SHADER_STORAGE_BUFFERS];
+
+   /**
     * Object currently associated with the GL_ATOMIC_COUNTER_BUFFER
     * target.
     */
@@ -4420,7 +4579,12 @@
    DEBUG_INCOMPLETE_FBO         = (1 << 3)
 };
 
-
+static inline bool
+_mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx)
+{
+   return ctx->Shader._CurrentFragmentProgram != NULL &&
+      ctx->Shader._CurrentFragmentProgram->NumAtomicBuffers > 0;
+}
 
 #ifdef __cplusplus
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/multisample.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/multisample.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/multisample.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/multisample.c	2015-09-16 14:36:10.000000000 +0000
@@ -43,7 +43,7 @@
 
    FLUSH_VERTICES(ctx, 0);
 
-   ctx->Multisample.SampleCoverageValue = (GLfloat) CLAMP(value, 0.0, 1.0);
+   ctx->Multisample.SampleCoverageValue = CLAMP(value, 0.0f, 1.0f);
    ctx->Multisample.SampleCoverageInvert = invert;
    ctx->NewState |= _NEW_MULTISAMPLE;
 }
@@ -134,7 +134,7 @@
 
    FLUSH_VERTICES(ctx, 0);
 
-   ctx->Multisample.MinSampleShadingValue = CLAMP(value, 0.0, 1.0);
+   ctx->Multisample.MinSampleShadingValue = CLAMP(value, 0.0f, 1.0f);
    ctx->NewState |= _NEW_MULTISAMPLE;
 }
 
@@ -150,22 +150,16 @@
 _mesa_check_sample_count(struct gl_context *ctx, GLenum target,
                          GLenum internalFormat, GLsizei samples)
 {
-   /* Section 2.5 (GL Errors) of OpenGL 3.0 specification, page 16:
-    *
-    * "If a negative number is provided where an argument of type sizei or
-    * sizeiptr is specified, the error INVALID VALUE is generated."
-    */
-   if (samples < 0) {
-      return GL_INVALID_VALUE;
-   }
-
    /* Section 4.4 (Framebuffer objects), page 198 of the OpenGL ES 3.0.0
     * specification says:
     *
     *     "If internalformat is a signed or unsigned integer format and samples
     *     is greater than zero, then the error INVALID_OPERATION is generated."
+    *
+    * This restriction is relaxed for OpenGL ES 3.1.
     */
-   if (_mesa_is_gles3(ctx) && _mesa_is_enum_format_integer(internalFormat)
+   if ((ctx->API == API_OPENGLES2 && ctx->Version == 30) &&
+       _mesa_is_enum_format_integer(internalFormat)
        && samples > 0) {
       return GL_INVALID_OPERATION;
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/objectlabel.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/objectlabel.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/objectlabel.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/objectlabel.c	2015-09-16 14:36:10.000000000 +0000
@@ -234,7 +234,7 @@
 
 invalid_enum:
    _mesa_error(ctx, GL_INVALID_ENUM, "%s(identifier = %s)",
-               caller, _mesa_lookup_enum_by_nr(identifier));
+               caller, _mesa_enum_to_string(identifier));
    return NULL;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/pack.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/pack.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/pack.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/pack.c	2015-09-16 14:36:10.000000000 +0000
@@ -470,7 +470,7 @@
 static inline GLuint
 clamp_float_to_uint(GLfloat f)
 {
-   return f < 0.0F ? 0 : F_TO_I(f);
+   return f < 0.0F ? 0 : _mesa_lroundevenf(f);
 }
 
 
@@ -478,7 +478,7 @@
 clamp_half_to_uint(GLhalfARB h)
 {
    GLfloat f = _mesa_half_to_float(h);
-   return f < 0.0F ? 0 : F_TO_I(f);
+   return f < 0.0F ? 0 : _mesa_lroundevenf(f);
 }
 
 
@@ -796,7 +796,7 @@
     * back to an int type can introduce errors that will show up as
     * artifacts in things like depth peeling which uses glCopyTexImage.
     */
-   if (ctx->Pixel.DepthScale == 1.0 && ctx->Pixel.DepthBias == 0.0) {
+   if (ctx->Pixel.DepthScale == 1.0F && ctx->Pixel.DepthBias == 0.0F) {
       if (srcType == GL_UNSIGNED_INT && dstType == GL_UNSIGNED_SHORT) {
          const GLuint *src = (const GLuint *) source;
          GLushort *dst = (GLushort *) dest;
@@ -874,8 +874,8 @@
       case GL_UNSIGNED_INT_24_8_EXT: /* GL_EXT_packed_depth_stencil */
          if (dstType == GL_UNSIGNED_INT_24_8_EXT &&
              depthMax == 0xffffff &&
-             ctx->Pixel.DepthScale == 1.0 &&
-             ctx->Pixel.DepthBias == 0.0) {
+             ctx->Pixel.DepthScale == 1.0F &&
+             ctx->Pixel.DepthBias == 0.0F) {
             const GLuint *src = (const GLuint *) source;
             GLuint *zValues = (GLuint *) dest;
             GLuint i;
@@ -945,7 +945,7 @@
    {
       const GLfloat scale = ctx->Pixel.DepthScale;
       const GLfloat bias = ctx->Pixel.DepthBias;
-      if (scale != 1.0 || bias != 0.0) {
+      if (scale != 1.0F || bias != 0.0F) {
          GLuint i;
          for (i = 0; i < n; i++) {
             depthValues[i] = depthValues[i] * scale + bias;
@@ -958,7 +958,7 @@
    if (needClamp) {
       GLuint i;
       for (i = 0; i < n; i++) {
-         depthValues[i] = (GLfloat)CLAMP(depthValues[i], 0.0, 1.0);
+         depthValues[i] = CLAMP(depthValues[i], 0.0F, 1.0F);
       }
    }
 
@@ -1025,7 +1025,7 @@
       return;
    }
 
-   if (ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0) {
+   if (ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F) {
       memcpy(depthCopy, depthSpan, n * sizeof(GLfloat));
       _mesa_scale_and_bias_depth(ctx, n, depthCopy);
       depthSpan = depthCopy;
@@ -1153,7 +1153,7 @@
       return;
    }
 
-   if (ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0) {
+   if (ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F) {
       memcpy(depthCopy, depthVals, n * sizeof(GLfloat));
       _mesa_scale_and_bias_depth(ctx, n, depthCopy);
       depthVals = depthCopy;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/pipelineobj.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/pipelineobj.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/pipelineobj.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/pipelineobj.c	2015-09-16 14:36:10.000000000 +0000
@@ -244,14 +244,13 @@
     *
     *     "If stages is not the special value ALL_SHADER_BITS, and has a bit
     *     set that is not recognized, the error INVALID_VALUE is generated."
-    *
-    * NOT YET SUPPORTED:
-    * GL_TESS_CONTROL_SHADER_BIT
-    * GL_TESS_EVALUATION_SHADER_BIT
     */
    any_valid_stages = GL_VERTEX_SHADER_BIT | GL_FRAGMENT_SHADER_BIT;
    if (_mesa_has_geometry_shaders(ctx))
       any_valid_stages |= GL_GEOMETRY_SHADER_BIT;
+   if (_mesa_has_tessellation(ctx))
+      any_valid_stages |= GL_TESS_CONTROL_SHADER_BIT |
+                          GL_TESS_EVALUATION_SHADER_BIT;
 
    if (stages != GL_ALL_SHADER_BITS && (stages & ~any_valid_stages) != 0) {
       _mesa_error(ctx, GL_INVALID_VALUE, "glUseProgramStages(Stages)");
@@ -327,6 +326,12 @@
 
    if ((stages & GL_GEOMETRY_SHADER_BIT) != 0)
       _mesa_use_shader_program(ctx, GL_GEOMETRY_SHADER, shProg, pipe);
+
+   if ((stages & GL_TESS_CONTROL_SHADER_BIT) != 0)
+      _mesa_use_shader_program(ctx, GL_TESS_CONTROL_SHADER, shProg, pipe);
+
+   if ((stages & GL_TESS_EVALUATION_SHADER_BIT) != 0)
+      _mesa_use_shader_program(ctx, GL_TESS_EVALUATION_SHADER, shProg, pipe);
 }
 
 /**
@@ -588,6 +593,7 @@
    /* Are geometry shaders available in this context?
     */
    const bool has_gs = _mesa_has_geometry_shaders(ctx);
+   const bool has_tess = _mesa_has_tessellation(ctx);;
 
    if (!pipe) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
@@ -615,11 +621,17 @@
          ? pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name : 0;
       return;
    case GL_TESS_EVALUATION_SHADER:
-      /* NOT YET SUPPORTED */
-      break;
+      if (!has_tess)
+         break;
+      *params = pipe->CurrentProgram[MESA_SHADER_TESS_EVAL]
+         ? pipe->CurrentProgram[MESA_SHADER_TESS_EVAL]->Name : 0;
+      return;
    case GL_TESS_CONTROL_SHADER:
-      /* NOT YET SUPPORTED */
-      break;
+      if (!has_tess)
+         break;
+      *params = pipe->CurrentProgram[MESA_SHADER_TESS_CTRL]
+         ? pipe->CurrentProgram[MESA_SHADER_TESS_CTRL]->Name : 0;
+      return;
    case GL_GEOMETRY_SHADER:
       if (!has_gs)
          break;
@@ -635,7 +647,7 @@
    }
 
    _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramPipelineiv(pname=%s)",
-               _mesa_lookup_enum_by_nr(pname));
+               _mesa_enum_to_string(pname));
 }
 
 /**
@@ -674,6 +686,38 @@
    return status;
 }
 
+static bool
+program_stages_interleaved_illegally(const struct gl_pipeline_object *pipe)
+{
+   struct gl_shader_program *prev = NULL;
+   unsigned i, j;
+
+   /* Look for programs bound to stages: A -> B -> A, with any intervening
+    * sequence of unrelated programs or empty stages.
+    */
+   for (i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_shader_program *cur = pipe->CurrentProgram[i];
+
+      /* Empty stages anywhere in the pipe are OK */
+      if (!cur || cur == prev)
+         continue;
+
+      if (prev) {
+         /* We've seen an A -> B transition; look at the rest of the pipe
+          * to see if we ever see A again.
+          */
+         for (j = i + 1; j < MESA_SHADER_STAGES; j++) {
+            if (pipe->CurrentProgram[j] == prev)
+               return true;
+         }
+      }
+
+      prev = cur;
+   }
+
+   return false;
+}
+
 extern GLboolean
 _mesa_validate_program_pipeline(struct gl_context* ctx,
                                 struct gl_pipeline_object *pipe,
@@ -722,24 +766,13 @@
     *         - One program object is active for at least two shader stages
     *           and a second program is active for a shader stage between two
     *           stages for which the first program was active."
-    *
-    * Without Tesselation, the only case where this can occur is the geometry
-    * shader between the fragment shader and vertex shader.
     */
-   if (pipe->CurrentProgram[MESA_SHADER_GEOMETRY]
-       && pipe->CurrentProgram[MESA_SHADER_FRAGMENT]
-       && pipe->CurrentProgram[MESA_SHADER_VERTEX]) {
-      if (pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name == pipe->CurrentProgram[MESA_SHADER_FRAGMENT]->Name &&
-          pipe->CurrentProgram[MESA_SHADER_GEOMETRY]->Name != pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name) {
-         pipe->InfoLog =
-            ralloc_asprintf(pipe,
-                            "Program %d is active for geometry stage between "
-                            "two stages for which another program %d is "
-                            "active",
-                            pipe->CurrentProgram[MESA_SHADER_GEOMETRY]->Name,
-                            pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name);
-         goto err;
-      }
+   if (program_stages_interleaved_illegally(pipe)) {
+      pipe->InfoLog =
+         ralloc_strdup(pipe,
+                       "Program is active for multiple shader stages with an "
+                       "intervening stage provided by another program");
+      goto err;
    }
 
    /* Section 2.11.11 (Shader Execution), subheading "Validation," of the
@@ -756,7 +789,9 @@
     *           executable vertex shader."
     */
    if (!pipe->CurrentProgram[MESA_SHADER_VERTEX]
-       && pipe->CurrentProgram[MESA_SHADER_GEOMETRY]) {
+       && (pipe->CurrentProgram[MESA_SHADER_GEOMETRY] ||
+           pipe->CurrentProgram[MESA_SHADER_TESS_CTRL] ||
+           pipe->CurrentProgram[MESA_SHADER_TESS_EVAL])) {
       pipe->InfoLog = ralloc_strdup(pipe, "Program lacks a vertex shader");
       goto err;
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/pixel.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/pixel.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/pixel.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/pixel.c	2015-09-16 14:36:10.000000000 +0000
@@ -455,12 +455,12 @@
    /* special cases */
    case GL_PIXEL_MAP_I_TO_I:
       for (i = 0; i < mapsize; i++) {
-         values[i] = (GLushort) CLAMP(ctx->PixelMaps.ItoI.Map[i], 0.0, 65535.);
+         values[i] = (GLushort) CLAMP(ctx->PixelMaps.ItoI.Map[i], 0.0F, 65535.0F);
       }
       break;
    case GL_PIXEL_MAP_S_TO_S:
       for (i = 0; i < mapsize; i++) {
-         values[i] = (GLushort) CLAMP(ctx->PixelMaps.StoS.Map[i], 0.0, 65535.);
+         values[i] = (GLushort) CLAMP(ctx->PixelMaps.StoS.Map[i], 0.0F, 65535.0F);
       }
       break;
    default:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/pixeltransfer.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/pixeltransfer.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/pixeltransfer.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/pixeltransfer.c	2015-09-16 14:36:10.000000000 +0000
@@ -35,6 +35,7 @@
 #include "pixeltransfer.h"
 #include "imports.h"
 #include "mtypes.h"
+#include "util/rounding.h"
 
 
 /*
@@ -47,25 +48,25 @@
                           GLfloat rBias, GLfloat gBias,
                           GLfloat bBias, GLfloat aBias)
 {
-   if (rScale != 1.0 || rBias != 0.0) {
+   if (rScale != 1.0F || rBias != 0.0F) {
       GLuint i;
       for (i = 0; i < n; i++) {
          rgba[i][RCOMP] = rgba[i][RCOMP] * rScale + rBias;
       }
    }
-   if (gScale != 1.0 || gBias != 0.0) {
+   if (gScale != 1.0F || gBias != 0.0F) {
       GLuint i;
       for (i = 0; i < n; i++) {
          rgba[i][GCOMP] = rgba[i][GCOMP] * gScale + gBias;
       }
    }
-   if (bScale != 1.0 || bBias != 0.0) {
+   if (bScale != 1.0F || bBias != 0.0F) {
       GLuint i;
       for (i = 0; i < n; i++) {
          rgba[i][BCOMP] = rgba[i][BCOMP] * bScale + bBias;
       }
    }
-   if (aScale != 1.0 || aBias != 0.0) {
+   if (aScale != 1.0F || aBias != 0.0F) {
       GLuint i;
       for (i = 0; i < n; i++) {
          rgba[i][ACOMP] = rgba[i][ACOMP] * aScale + aBias;
@@ -94,10 +95,10 @@
       GLfloat g = CLAMP(rgba[i][GCOMP], 0.0F, 1.0F);
       GLfloat b = CLAMP(rgba[i][BCOMP], 0.0F, 1.0F);
       GLfloat a = CLAMP(rgba[i][ACOMP], 0.0F, 1.0F);
-      rgba[i][RCOMP] = rMap[F_TO_I(r * rscale)];
-      rgba[i][GCOMP] = gMap[F_TO_I(g * gscale)];
-      rgba[i][BCOMP] = bMap[F_TO_I(b * bscale)];
-      rgba[i][ACOMP] = aMap[F_TO_I(a * ascale)];
+      rgba[i][RCOMP] = rMap[(int)_mesa_lroundevenf(r * rscale)];
+      rgba[i][GCOMP] = gMap[(int)_mesa_lroundevenf(g * gscale)];
+      rgba[i][BCOMP] = bMap[(int)_mesa_lroundevenf(b * bscale)];
+      rgba[i][ACOMP] = aMap[(int)_mesa_lroundevenf(a * ascale)];
    }
 }
 
@@ -236,7 +237,7 @@
       GLuint i;
       for (i = 0; i < n; i++) {
          const GLuint j = indexes[i] & mask;
-         indexes[i] = F_TO_I(ctx->PixelMaps.ItoI.Map[j]);
+         indexes[i] = _mesa_lroundevenf(ctx->PixelMaps.ItoI.Map[j]);
       }
    }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/points.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/points.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/points.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/points.c	2015-09-16 14:36:10.000000000 +0000
@@ -45,7 +45,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
 
-   if (size <= 0.0) {
+   if (size <= 0.0F) {
       _mesa_error( ctx, GL_INVALID_VALUE, "glPointSize" );
       return;
    }
@@ -119,9 +119,9 @@
             return;
          FLUSH_VERTICES(ctx, _NEW_POINT);
          COPY_3V(ctx->Point.Params, params);
-         ctx->Point._Attenuated = (ctx->Point.Params[0] != 1.0 ||
-                                   ctx->Point.Params[1] != 0.0 ||
-                                   ctx->Point.Params[2] != 0.0);
+         ctx->Point._Attenuated = (ctx->Point.Params[0] != 1.0F ||
+                                   ctx->Point.Params[1] != 0.0F ||
+                                   ctx->Point.Params[2] != 0.0F);
          break;
       case GL_POINT_SIZE_MIN_EXT:
          if (params[0] < 0.0F) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/polygon.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/polygon.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/polygon.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/polygon.c	2015-09-16 14:36:10.000000000 +0000
@@ -56,7 +56,7 @@
    GET_CURRENT_CONTEXT(ctx);
 
    if (MESA_VERBOSE&VERBOSE_API)
-      _mesa_debug(ctx, "glCullFace %s\n", _mesa_lookup_enum_by_nr(mode));
+      _mesa_debug(ctx, "glCullFace %s\n", _mesa_enum_to_string(mode));
 
    if (mode!=GL_FRONT && mode!=GL_BACK && mode!=GL_FRONT_AND_BACK) {
       _mesa_error( ctx, GL_INVALID_ENUM, "glCullFace" );
@@ -91,16 +91,16 @@
    GET_CURRENT_CONTEXT(ctx);
 
    if (MESA_VERBOSE&VERBOSE_API)
-      _mesa_debug(ctx, "glFrontFace %s\n", _mesa_lookup_enum_by_nr(mode));
+      _mesa_debug(ctx, "glFrontFace %s\n", _mesa_enum_to_string(mode));
+
+   if (ctx->Polygon.FrontFace == mode)
+      return;
 
    if (mode!=GL_CW && mode!=GL_CCW) {
       _mesa_error( ctx, GL_INVALID_ENUM, "glFrontFace" );
       return;
    }
 
-   if (ctx->Polygon.FrontFace == mode)
-      return;
-
    FLUSH_VERTICES(ctx, _NEW_POLYGON);
    ctx->Polygon.FrontFace = mode;
 
@@ -128,8 +128,8 @@
 
    if (MESA_VERBOSE&VERBOSE_API)
       _mesa_debug(ctx, "glPolygonMode %s %s\n",
-                  _mesa_lookup_enum_by_nr(face),
-                  _mesa_lookup_enum_by_nr(mode));
+                  _mesa_enum_to_string(face),
+                  _mesa_enum_to_string(mode));
 
    if (mode!=GL_POINT && mode!=GL_LINE && mode!=GL_FILL) {
       _mesa_error( ctx, GL_INVALID_ENUM, "glPolygonMode(mode)" );
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/program_resource.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/program_resource.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/program_resource.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/program_resource.c	2015-09-16 14:36:10.000000000 +0000
@@ -28,10 +28,11 @@
 #include "main/mtypes.h"
 #include "main/shaderapi.h"
 #include "main/shaderobj.h"
+#include "main/context.h"
 #include "program_resource.h"
-
+#include "ir_uniform.h"
 static bool
-supported_interface_enum(GLenum iface)
+supported_interface_enum(struct gl_context *ctx, GLenum iface)
 {
    switch (iface) {
    case GL_UNIFORM:
@@ -42,17 +43,21 @@
    case GL_ATOMIC_COUNTER_BUFFER:
       return true;
    case GL_VERTEX_SUBROUTINE:
-   case GL_TESS_CONTROL_SUBROUTINE:
-   case GL_TESS_EVALUATION_SUBROUTINE:
-   case GL_GEOMETRY_SUBROUTINE:
    case GL_FRAGMENT_SUBROUTINE:
-   case GL_COMPUTE_SUBROUTINE:
    case GL_VERTEX_SUBROUTINE_UNIFORM:
-   case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
-   case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
-   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
    case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+      return _mesa_has_shader_subroutine(ctx);
+   case GL_GEOMETRY_SUBROUTINE:
+   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+      return _mesa_has_geometry_shaders(ctx) && _mesa_has_shader_subroutine(ctx);
+   case GL_COMPUTE_SUBROUTINE:
    case GL_COMPUTE_SUBROUTINE_UNIFORM:
+      return _mesa_has_compute_shaders(ctx) && _mesa_has_shader_subroutine(ctx);
+   case GL_TESS_CONTROL_SUBROUTINE:
+   case GL_TESS_EVALUATION_SUBROUTINE:
+   case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+   case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+      return _mesa_has_tessellation(ctx) && _mesa_has_shader_subroutine(ctx);
    case GL_BUFFER_VARIABLE:
    case GL_SHADER_STORAGE_BLOCK:
    default:
@@ -79,9 +84,9 @@
    }
 
    /* Validate interface. */
-   if (!supported_interface_enum(programInterface)) {
+   if (!supported_interface_enum(ctx, programInterface)) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramInterfaceiv(%s)",
-                  _mesa_lookup_enum_by_nr(programInterface));
+                  _mesa_enum_to_string(programInterface));
       return;
    }
 
@@ -96,8 +101,8 @@
       if (programInterface == GL_ATOMIC_COUNTER_BUFFER) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
                      "glGetProgramInterfaceiv(%s pname %s)",
-                     _mesa_lookup_enum_by_nr(programInterface),
-                     _mesa_lookup_enum_by_nr(pname));
+                     _mesa_enum_to_string(programInterface),
+                     _mesa_enum_to_string(pname));
          return;
       }
       /* Name length consists of base name, 3 additional chars '[0]' if
@@ -138,15 +143,40 @@
       default:
         _mesa_error(ctx, GL_INVALID_OPERATION,
                     "glGetProgramInterfaceiv(%s pname %s)",
-                    _mesa_lookup_enum_by_nr(programInterface),
-                    _mesa_lookup_enum_by_nr(pname));
+                    _mesa_enum_to_string(programInterface),
+                    _mesa_enum_to_string(pname));
       };
       break;
    case GL_MAX_NUM_COMPATIBLE_SUBROUTINES:
+      switch (programInterface) {
+      case GL_VERTEX_SUBROUTINE_UNIFORM:
+      case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+      case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+      case GL_COMPUTE_SUBROUTINE_UNIFORM:
+      case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+      case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: {
+         for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
+            if (shProg->ProgramResourceList[i].Type == programInterface) {
+               struct gl_uniform_storage *uni =
+                  (struct gl_uniform_storage *)
+                  shProg->ProgramResourceList[i].Data;
+               *params = MAX2(*params, uni->num_compatible_subroutines);
+            }
+         }
+         break;
+      }
+
+      default:
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glGetProgramInterfaceiv(%s pname %s)",
+                     _mesa_enum_to_string(programInterface),
+                     _mesa_enum_to_string(pname));
+      }
+      break;
    default:
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glGetProgramInterfaceiv(pname %s)",
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
    }
 }
 
@@ -173,32 +203,12 @@
    return false;
 }
 
-/**
- * Checks if given name index is legal for GetProgramResourceIndex,
- * check is written to be compatible with GL_ARB_array_of_arrays.
- */
-static bool
-valid_program_resource_index_name(const GLchar *name)
-{
-   const char *array = strstr(name, "[");
-   const char *close = strrchr(name, ']');
-
-   /* Not array, no need for the check. */
-   if (!array)
-      return true;
-
-   /* Last array index has to be zero. */
-   if (!close || *--close != '0')
-      return false;
-
-   return true;
-}
-
 GLuint GLAPIENTRY
 _mesa_GetProgramResourceIndex(GLuint program, GLenum programInterface,
                               const GLchar *name)
 {
    GET_CURRENT_CONTEXT(ctx);
+   unsigned array_index = 0;
    struct gl_program_resource *res;
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
@@ -206,6 +216,11 @@
    if (!shProg || !name)
       return GL_INVALID_INDEX;
 
+   if (!supported_interface_enum(ctx, programInterface)) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceIndex(%s)",
+                  _mesa_enum_to_string(programInterface));
+      return GL_INVALID_INDEX;
+   }
    /*
     * For the interface TRANSFORM_FEEDBACK_VARYING, the value INVALID_INDEX
     * should be returned when querying the index assigned to the special names
@@ -217,24 +232,33 @@
       return GL_INVALID_INDEX;
 
    switch (programInterface) {
+   case GL_TESS_CONTROL_SUBROUTINE:
+   case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+   case GL_TESS_EVALUATION_SUBROUTINE:
+   case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+   case GL_COMPUTE_SUBROUTINE:
+   case GL_COMPUTE_SUBROUTINE_UNIFORM:
+   case GL_GEOMETRY_SUBROUTINE:
+   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+   case GL_VERTEX_SUBROUTINE:
+   case GL_FRAGMENT_SUBROUTINE:
+   case GL_VERTEX_SUBROUTINE_UNIFORM:
+   case GL_FRAGMENT_SUBROUTINE_UNIFORM:
    case GL_PROGRAM_INPUT:
    case GL_PROGRAM_OUTPUT:
    case GL_UNIFORM:
    case GL_TRANSFORM_FEEDBACK_VARYING:
-      /* Validate name syntax for array variables */
-      if (!valid_program_resource_index_name(name))
-         return GL_INVALID_INDEX;
-      /* fall-through */
    case GL_UNIFORM_BLOCK:
-      res = _mesa_program_resource_find_name(shProg, programInterface, name);
-      if (!res)
+      res = _mesa_program_resource_find_name(shProg, programInterface, name,
+                                             &array_index);
+      if (!res || array_index > 0)
          return GL_INVALID_INDEX;
 
       return _mesa_program_resource_index(shProg, res);
    case GL_ATOMIC_COUNTER_BUFFER:
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceIndex(%s)",
-                  _mesa_lookup_enum_by_nr(programInterface));
+                  _mesa_enum_to_string(programInterface));
    }
 
    return GL_INVALID_INDEX;
@@ -250,19 +274,13 @@
       _mesa_lookup_shader_program_err(ctx, program,
                                       "glGetProgramResourceName");
 
-   /* Set user friendly return values in case of errors. */
-   if (name)
-      *name = '\0';
-   if (length)
-      *length = 0;
-
    if (!shProg || !name)
       return;
 
    if (programInterface == GL_ATOMIC_COUNTER_BUFFER ||
-       !supported_interface_enum(programInterface)) {
+       !supported_interface_enum(ctx, programInterface)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceName(%s)",
-                  _mesa_lookup_enum_by_nr(programInterface));
+                  _mesa_enum_to_string(programInterface));
       return;
    }
 
@@ -300,36 +318,6 @@
                                 propCount, props, bufSize, length, params);
 }
 
-/**
- * Function verifies syntax of given name for GetProgramResourceLocation
- * and GetProgramResourceLocationIndex for the following cases:
- *
- * "array element portion of a string passed to GetProgramResourceLocation
- * or GetProgramResourceLocationIndex must not have, a "+" sign, extra
- * leading zeroes, or whitespace".
- *
- * Check is written to be compatible with GL_ARB_array_of_arrays.
- */
-static bool
-invalid_array_element_syntax(const GLchar *name)
-{
-   char *first = strchr(name, '[');
-   char *last = strrchr(name, '[');
-
-   if (!first)
-      return false;
-
-   /* No '+' or ' ' allowed anywhere. */
-   if (strchr(first, '+') || strchr(first, ' '))
-      return true;
-
-   /* Check that last array index is 0. */
-   if (last[1] == '0' && last[2] != ']')
-      return true;
-
-   return false;
-}
-
 static struct gl_shader_program *
 lookup_linked_program(GLuint program, const char *caller)
 {
@@ -356,7 +344,7 @@
    struct gl_shader_program *shProg =
       lookup_linked_program(program, "glGetProgramResourceLocation");
 
-   if (!shProg || !name || invalid_array_element_syntax(name))
+   if (!shProg || !name)
       return -1;
 
    /* Validate programInterface. */
@@ -366,24 +354,33 @@
    case GL_PROGRAM_OUTPUT:
       break;
 
-   /* For reference valid cases requiring additional extension support:
-    * GL_ARB_shader_subroutine
-    * GL_ARB_tessellation_shader
-    * GL_ARB_compute_shader
-    */
    case GL_VERTEX_SUBROUTINE_UNIFORM:
-   case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
-   case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
-   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
    case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+      if (!_mesa_has_shader_subroutine(ctx))
+         goto fail;
+      break;
+   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+      if (!_mesa_has_geometry_shaders(ctx) || !_mesa_has_shader_subroutine(ctx))
+         goto fail;
+      break;
    case GL_COMPUTE_SUBROUTINE_UNIFORM:
-
+      if (!_mesa_has_compute_shaders(ctx) || !_mesa_has_shader_subroutine(ctx))
+         goto fail;
+      break;
+   case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+   case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+      if (!_mesa_has_tessellation(ctx) || !_mesa_has_shader_subroutine(ctx))
+         goto fail;
+      break;
    default:
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceLocation(%s %s)",
-                  _mesa_lookup_enum_by_nr(programInterface), name);
+         goto fail;
    }
 
    return _mesa_program_resource_location(shProg, programInterface, name);
+fail:
+   _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceLocation(%s %s)",
+               _mesa_enum_to_string(programInterface), name);
+   return -1;
 }
 
 /**
@@ -397,7 +394,7 @@
    struct gl_shader_program *shProg =
       lookup_linked_program(program, "glGetProgramResourceLocationIndex");
 
-   if (!shProg || !name || invalid_array_element_syntax(name))
+   if (!shProg || !name)
       return -1;
 
    /* From the GL_ARB_program_interface_query spec:
@@ -408,7 +405,7 @@
    if (programInterface != GL_PROGRAM_OUTPUT) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glGetProgramResourceLocationIndex(%s)",
-                  _mesa_lookup_enum_by_nr(programInterface));
+                  _mesa_enum_to_string(programInterface));
       return -1;
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/queryobj.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/queryobj.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/queryobj.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/queryobj.c	2015-09-16 14:36:10.000000000 +0000
@@ -217,7 +217,7 @@
 
    case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
    case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
-      if (ctx->Extensions.ARB_tessellation_shader)
+      if (_mesa_has_tessellation(ctx))
          return get_pipe_stats_binding_point(ctx, target);
       else
          return NULL;
@@ -295,7 +295,7 @@
       break;
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "glCreateQueries(invalid target = %s)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -390,7 +390,7 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glBeginQueryIndexed(%s, %u, %u)\n",
-                  _mesa_lookup_enum_by_nr(target), index, id);
+                  _mesa_enum_to_string(target), index, id);
 
    if (!query_error_check_index(ctx, target, index))
       return;
@@ -412,7 +412,7 @@
    if (*bindpt) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glBeginQuery{Indexed}(target=%s is active)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -496,7 +496,7 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glEndQueryIndexed(%s, %u)\n",
-                  _mesa_lookup_enum_by_nr(target), index);
+                  _mesa_enum_to_string(target), index);
 
    if (!query_error_check_index(ctx, target, index))
       return;
@@ -516,8 +516,8 @@
    if (q && q->Target != target) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glEndQuery(target=%s with active query of target %s)",
-                  _mesa_lookup_enum_by_nr(target),
-                  _mesa_lookup_enum_by_nr(q->Target));
+                  _mesa_enum_to_string(target),
+                  _mesa_enum_to_string(q->Target));
       return;
    }
 
@@ -553,7 +553,7 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glQueryCounter(%u, %s)\n", id,
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
 
    /* error checking */
    if (target != GL_TIMESTAMP) {
@@ -628,9 +628,9 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glGetQueryIndexediv(%s, %u, %s)\n",
-                  _mesa_lookup_enum_by_nr(target),
+                  _mesa_enum_to_string(target),
                   index,
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
 
    if (!query_error_check_index(ctx, target, index))
       return;
@@ -712,7 +712,7 @@
          default:
             _mesa_problem(ctx,
                           "Unknown target in glGetQueryIndexediv(target = %s)",
-                          _mesa_lookup_enum_by_nr(target));
+                          _mesa_enum_to_string(target));
             *params = 0;
             break;
          }
@@ -740,7 +740,7 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glGetQueryObjectiv(%u, %s)\n", id,
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
 
    if (id)
       q = _mesa_lookup_query_object(ctx, id);
@@ -794,7 +794,7 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glGetQueryObjectuiv(%u, %s)\n", id,
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
 
    if (id)
       q = _mesa_lookup_query_object(ctx, id);
@@ -851,7 +851,7 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glGetQueryObjecti64v(%u, %s)\n", id,
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
 
    if (id)
       q = _mesa_lookup_query_object(ctx, id);
@@ -894,7 +894,7 @@
 
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glGetQueryObjectui64v(%u, %s)\n", id,
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
 
    if (id)
       q = _mesa_lookup_query_object(ctx, id);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/readpix.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/readpix.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/readpix.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/readpix.c	2015-09-16 14:36:10.000000000 +0000
@@ -47,17 +47,14 @@
  * Return true if the conversion L=R+G+B is needed.
  */
 GLboolean
-_mesa_need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format)
+_mesa_need_rgb_to_luminance_conversion(GLenum srcBaseFormat,
+                                       GLenum dstBaseFormat)
 {
-   GLenum baseTexFormat = _mesa_get_format_base_format(texFormat);
-
-   return (baseTexFormat == GL_RG ||
-           baseTexFormat == GL_RGB ||
-           baseTexFormat == GL_RGBA) &&
-          (format == GL_LUMINANCE ||
-           format == GL_LUMINANCE_ALPHA ||
-           format == GL_LUMINANCE_INTEGER_EXT ||
-           format == GL_LUMINANCE_ALPHA_INTEGER_EXT);
+   return (srcBaseFormat == GL_RG ||
+           srcBaseFormat == GL_RGB ||
+           srcBaseFormat == GL_RGBA) &&
+          (dstBaseFormat == GL_LUMINANCE ||
+           dstBaseFormat == GL_LUMINANCE_ALPHA);
 }
 
 /**
@@ -89,6 +86,8 @@
                                   GLboolean uses_blit)
 {
    GLbitfield transferOps = ctx->_ImageTransferState;
+   GLenum srcBaseFormat = _mesa_get_format_base_format(texFormat);
+   GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format);
 
    if (format == GL_DEPTH_COMPONENT ||
        format == GL_DEPTH_STENCIL ||
@@ -125,7 +124,7 @@
     * have any effect anyway.
     */
    if (_mesa_get_format_datatype(texFormat) == GL_UNSIGNED_NORMALIZED &&
-       !_mesa_need_rgb_to_luminance_conversion(texFormat, format)) {
+       !_mesa_need_rgb_to_luminance_conversion(srcBaseFormat, dstBaseFormat)) {
       transferOps &= ~IMAGE_CLAMP_BIT;
    }
 
@@ -148,7 +147,7 @@
 {
    struct gl_renderbuffer *rb =
          _mesa_get_read_renderbuffer_for_format(ctx, format);
-   GLenum srcType;
+   GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format);
 
    assert(rb);
 
@@ -169,22 +168,8 @@
 
    default:
       /* Color formats. */
-      if (_mesa_need_rgb_to_luminance_conversion(rb->Format, format)) {
-         return GL_TRUE;
-      }
-
-      /* Conversion between signed and unsigned integers needs masking
-       * (it isn't just memcpy). */
-      srcType = _mesa_get_format_datatype(rb->Format);
-
-      if ((srcType == GL_INT &&
-           (type == GL_UNSIGNED_INT ||
-            type == GL_UNSIGNED_SHORT ||
-            type == GL_UNSIGNED_BYTE)) ||
-          (srcType == GL_UNSIGNED_INT &&
-           (type == GL_INT ||
-            type == GL_SHORT ||
-            type == GL_BYTE))) {
+      if (_mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat,
+                                                 dstBaseFormat)) {
          return GL_TRUE;
       }
 
@@ -283,7 +268,7 @@
    GLubyte *map, *dst;
    int stride, dstStride, j;
 
-   if (ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0)
+   if (ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F)
       return GL_FALSE;
 
    if (packing->SwapBytes)
@@ -452,6 +437,7 @@
    uint8_t rebase_swizzle[4];
    struct gl_framebuffer *fb = ctx->ReadBuffer;
    struct gl_renderbuffer *rb = fb->_ColorReadBuffer;
+   GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format);
 
    if (!rb)
       return;
@@ -463,7 +449,7 @@
    dst_stride = _mesa_image_row_stride(packing, width, format, type);
    dst_format = _mesa_format_from_format_and_type(format, type);
    convert_rgb_to_lum =
-      _mesa_need_rgb_to_luminance_conversion(rb->Format, format);
+      _mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat, dstBaseFormat);
    dst = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
                                            format, type, 0, 0);
 
@@ -537,7 +523,8 @@
        * convert to, then we can convert directly into the dst buffer and avoid
        * the final conversion/copy from the rgba buffer to the dst buffer.
        */
-      if (dst_format == rgba_format) {
+      if (dst_format == rgba_format &&
+          dst_stride == rgba_stride) {
          need_convert = false;
          rgba = dst;
       } else {
@@ -627,15 +614,8 @@
 done_swap:
    /* Handle byte swapping if required */
    if (packing->SwapBytes) {
-      GLint swapSize = _mesa_sizeof_packed_type(type);
-      if (swapSize == 2 || swapSize == 4) {
-         int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
-         assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
-         if (swapSize == 2)
-            _mesa_swap2((GLushort *) dst, width * height * swapsPerPixel);
-         else if (swapSize == 4)
-            _mesa_swap4((GLuint *) dst, width * height * swapsPerPixel);
-      }
+      _mesa_swap_bytes_2d_image(format, type, packing,
+                                width, height, dst, dst);
    }
 
 done_unmap:
@@ -835,7 +815,7 @@
                           const struct gl_pixelstore_attrib *packing )
 {
    const GLboolean scaleOrBias
-      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+      = ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F;
    const GLboolean stencilTransfer = ctx->Pixel.IndexShift
       || ctx->Pixel.IndexOffset || ctx->Pixel.MapStencilFlag;
    GLubyte *dst;
@@ -930,10 +910,8 @@
    const GLenum data_type = _mesa_get_format_datatype(rb->Format);
    GLboolean is_unsigned_int = GL_FALSE;
    GLboolean is_signed_int = GL_FALSE;
-
-   if (!_mesa_is_color_format(internalFormat)) {
-      return GL_INVALID_OPERATION;
-   }
+   GLboolean is_float_depth = (internalFormat == GL_DEPTH_COMPONENT32F) ||
+         (internalFormat == GL_DEPTH32F_STENCIL8);
 
    is_unsigned_int = _mesa_is_enum_format_unsigned_int(internalFormat);
    if (!is_unsigned_int) {
@@ -964,6 +942,43 @@
           (is_unsigned_int && type == GL_UNSIGNED_INT))
          return GL_NO_ERROR;
       break;
+   case GL_DEPTH_STENCIL:
+      switch (type) {
+      case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
+         if (is_float_depth)
+            return GL_NO_ERROR;
+         break;
+      case GL_UNSIGNED_INT_24_8:
+         if (!is_float_depth)
+            return GL_NO_ERROR;
+         break;
+      default:
+         return GL_INVALID_ENUM;
+      }
+      break;
+   case GL_DEPTH_COMPONENT:
+      switch (type) {
+      case GL_FLOAT:
+         if (is_float_depth)
+            return GL_NO_ERROR;
+         break;
+      case GL_UNSIGNED_SHORT:
+      case GL_UNSIGNED_INT_24_8:
+         if (!is_float_depth)
+            return GL_NO_ERROR;
+         break;
+      default:
+         return GL_INVALID_ENUM;
+      }
+      break;
+   case GL_STENCIL_INDEX:
+      switch (type) {
+      case GL_UNSIGNED_BYTE:
+         return GL_NO_ERROR;
+      default:
+         return GL_INVALID_ENUM;
+      }
+      break;
    }
 
    return GL_INVALID_OPERATION;
@@ -986,8 +1001,8 @@
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glReadPixels(%d, %d, %s, %s, %p)\n",
                   width, height,
-                  _mesa_lookup_enum_by_nr(format),
-                  _mesa_lookup_enum_by_nr(type),
+                  _mesa_enum_to_string(format),
+                  _mesa_enum_to_string(type),
                   pixels);
 
    if (width < 0 || height < 0) {
@@ -1037,15 +1052,10 @@
          err = read_pixels_es3_error_check(format, type, rb);
       }
 
-      if (err == GL_NO_ERROR && (format == GL_DEPTH_COMPONENT
-          || format == GL_DEPTH_STENCIL)) {
-         err = GL_INVALID_ENUM;
-      }
-
       if (err != GL_NO_ERROR) {
          _mesa_error(ctx, err, "glReadPixels(invalid format %s and/or type %s)",
-                     _mesa_lookup_enum_by_nr(format),
-                     _mesa_lookup_enum_by_nr(type));
+                     _mesa_enum_to_string(format),
+                     _mesa_enum_to_string(type));
          return;
       }
    }
@@ -1053,8 +1063,8 @@
    err = _mesa_error_check_format_and_type(ctx, format, type);
    if (err != GL_NO_ERROR) {
       _mesa_error(ctx, err, "glReadPixels(invalid format %s and/or type %s)",
-                  _mesa_lookup_enum_by_nr(format),
-                  _mesa_lookup_enum_by_nr(type));
+                  _mesa_enum_to_string(format),
+                  _mesa_enum_to_string(type));
       return;
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/readpix.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/readpix.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/readpix.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/readpix.h	2015-09-16 14:36:10.000000000 +0000
@@ -38,7 +38,8 @@
                                  GLenum type, GLboolean uses_blit);
 
 extern GLboolean
-_mesa_need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format);
+_mesa_need_rgb_to_luminance_conversion(GLenum srcBaseFormat,
+                                       GLenum dstBaseFormat);
 
 extern GLboolean
 _mesa_need_luminance_to_rgb_conversion(GLenum srcBaseFormat,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/samplerobj.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/samplerobj.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/samplerobj.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/samplerobj.c	2015-09-16 14:36:10.000000000 +0000
@@ -689,7 +689,7 @@
    if (samp->MaxAnisotropy == param)
       return GL_FALSE;
 
-   if (param < 1.0)
+   if (param < 1.0F)
       return INVALID_VALUE;
 
    flush(ctx);
@@ -813,7 +813,7 @@
       break;
    case INVALID_PNAME:
       _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameteri(pname=%s)\n",
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
       break;
    case INVALID_PARAM:
       _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameteri(param=%d)\n",
@@ -906,7 +906,7 @@
       break;
    case INVALID_PNAME:
       _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterf(pname=%s)\n",
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
       break;
    case INVALID_PARAM:
       _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterf(param=%f)\n",
@@ -1006,7 +1006,7 @@
       break;
    case INVALID_PNAME:
       _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameteriv(pname=%s)\n",
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
       break;
    case INVALID_PARAM:
       _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameteriv(param=%d)\n",
@@ -1099,7 +1099,7 @@
       break;
    case INVALID_PNAME:
       _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterfv(pname=%s)\n",
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
       break;
    case INVALID_PARAM:
       _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterfv(param=%f)\n",
@@ -1184,7 +1184,7 @@
       break;
    case INVALID_PNAME:
       _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterIiv(pname=%s)\n",
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
       break;
    case INVALID_PARAM:
       _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterIiv(param=%d)\n",
@@ -1270,7 +1270,7 @@
       break;
    case INVALID_PNAME:
       _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterIuiv(pname=%s)\n",
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
       break;
    case INVALID_PARAM:
       _mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterIuiv(param=%u)\n",
@@ -1380,7 +1380,7 @@
 
 invalid_pname:
    _mesa_error(ctx, GL_INVALID_ENUM, "glGetSamplerParameteriv(pname=%s)",
-               _mesa_lookup_enum_by_nr(pname));
+               _mesa_enum_to_string(pname));
 }
 
 
@@ -1466,7 +1466,7 @@
 
 invalid_pname:
    _mesa_error(ctx, GL_INVALID_ENUM, "glGetSamplerParameterfv(pname=%s)",
-               _mesa_lookup_enum_by_nr(pname));
+               _mesa_enum_to_string(pname));
 }
 
 
@@ -1545,7 +1545,7 @@
 
 invalid_pname:
    _mesa_error(ctx, GL_INVALID_ENUM, "glGetSamplerParameterIiv(pname=%s)",
-               _mesa_lookup_enum_by_nr(pname));
+               _mesa_enum_to_string(pname));
 }
 
 
@@ -1624,7 +1624,7 @@
 
 invalid_pname:
    _mesa_error(ctx, GL_INVALID_ENUM, "glGetSamplerParameterIuiv(pname=%s)",
-               _mesa_lookup_enum_by_nr(pname));
+               _mesa_enum_to_string(pname));
 }
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shaderapi.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shaderapi.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shaderapi.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shaderapi.c	2015-09-16 14:36:10.000000000 +0000
@@ -110,6 +110,7 @@
     */
    struct gl_shader_compiler_options options;
    gl_shader_stage sh;
+   int i;
 
    memset(&options, 0, sizeof(options));
    options.MaxUnrollIterations = 32;
@@ -126,6 +127,12 @@
    /* Extended for ARB_separate_shader_objects */
    ctx->Shader.RefCount = 1;
    mtx_init(&ctx->Shader.Mutex, mtx_plain);
+
+   ctx->TessCtrlProgram.patch_vertices = 3;
+   for (i = 0; i < 4; ++i)
+      ctx->TessCtrlProgram.patch_default_outer_level[i] = 1.0;
+   for (i = 0; i < 2; ++i)
+      ctx->TessCtrlProgram.patch_default_inner_level[i] = 1.0;
 }
 
 
@@ -199,6 +206,9 @@
       return ctx == NULL || ctx->Extensions.ARB_vertex_shader;
    case GL_GEOMETRY_SHADER_ARB:
       return ctx == NULL || _mesa_has_geometry_shaders(ctx);
+   case GL_TESS_CONTROL_SHADER:
+   case GL_TESS_EVALUATION_SHADER:
+      return ctx == NULL || _mesa_has_tessellation(ctx);
    case GL_COMPUTE_SHADER:
       return ctx == NULL || ctx->Extensions.ARB_compute_shader;
    default:
@@ -415,6 +425,8 @@
          /* sanity check - make sure the new list's entries are sensible */
          for (j = 0; j < shProg->NumShaders; j++) {
             assert(shProg->Shaders[j]->Type == GL_VERTEX_SHADER ||
+                   shProg->Shaders[j]->Type == GL_TESS_CONTROL_SHADER ||
+                   shProg->Shaders[j]->Type == GL_TESS_EVALUATION_SHADER ||
                    shProg->Shaders[j]->Type == GL_GEOMETRY_SHADER ||
                    shProg->Shaders[j]->Type == GL_FRAGMENT_SHADER);
             assert(shProg->Shaders[j]->RefCount > 0);
@@ -511,6 +523,57 @@
 
 
 /**
+ * Check if a tessellation control shader query is valid at this time.
+ * If not, report an error and return false.
+ *
+ * From GL 4.0 section 6.1.12 (Shader and Program Queries):
+ *
+ *     "If TESS_CONTROL_OUTPUT_VERTICES is queried for a program which has
+ *     not been linked successfully, or which does not contain objects to
+ *     form a tessellation control shader, then an INVALID_OPERATION error is
+ *     generated."
+ */
+static bool
+check_tcs_query(struct gl_context *ctx, const struct gl_shader_program *shProg)
+{
+   if (shProg->LinkStatus &&
+       shProg->_LinkedShaders[MESA_SHADER_TESS_CTRL] != NULL) {
+      return true;
+   }
+
+   _mesa_error(ctx, GL_INVALID_OPERATION,
+               "glGetProgramv(linked tessellation control shader required)");
+   return false;
+}
+
+
+/**
+ * Check if a tessellation evaluation shader query is valid at this time.
+ * If not, report an error and return false.
+ *
+ * From GL 4.0 section 6.1.12 (Shader and Program Queries):
+ *
+ *     "If any of the pname values in this paragraph are queried for a program
+ *     which has not been linked successfully, or which does not contain
+ *     objects to form a tessellation evaluation shader, then an
+ *     INVALID_OPERATION error is generated."
+ *
+ */
+static bool
+check_tes_query(struct gl_context *ctx, const struct gl_shader_program *shProg)
+{
+   if (shProg->LinkStatus &&
+       shProg->_LinkedShaders[MESA_SHADER_TESS_EVAL] != NULL) {
+      return true;
+   }
+
+   _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramv(linked tessellation "
+               "evaluation shader required)");
+   return false;
+}
+
+
+/**
  * glGetProgramiv() - get shader program state.
  * Note that this is for GLSL shader programs, not ARB vertex/fragment
  * programs (see glGetProgramivARB).
@@ -532,7 +595,8 @@
    /* True if geometry shaders (of the form that was adopted into GLSL 1.50
     * and GL 3.2) are available in this context
     */
-   const bool has_core_gs = _mesa_is_desktop_gl(ctx) && ctx->Version >= 32;
+   const bool has_core_gs = _mesa_has_geometry_shaders(ctx);
+   const bool has_tess = _mesa_has_tessellation(ctx);
 
    /* Are uniform buffer objects available in this context?
     */
@@ -569,13 +633,13 @@
       *params = _mesa_longest_attribute_name_length(shProg);
       return;
    case GL_ACTIVE_UNIFORMS:
-      *params = shProg->NumUserUniformStorage - shProg->NumHiddenUniforms;
+      *params = shProg->NumUniformStorage - shProg->NumHiddenUniforms;
       return;
    case GL_ACTIVE_UNIFORM_MAX_LENGTH: {
       unsigned i;
       GLint max_len = 0;
       const unsigned num_uniforms =
-         shProg->NumUserUniformStorage - shProg->NumHiddenUniforms;
+         shProg->NumUniformStorage - shProg->NumHiddenUniforms;
 
       for (i = 0; i < num_uniforms; i++) {
 	 /* Add one for the terminating NUL character for a non-array, and
@@ -711,12 +775,44 @@
    case GL_PROGRAM_SEPARABLE:
       *params = shProg->SeparateShader;
       return;
+
+   /* ARB_tessellation_shader */
+   case GL_TESS_CONTROL_OUTPUT_VERTICES:
+      if (!has_tess)
+         break;
+      if (check_tcs_query(ctx, shProg))
+         *params = shProg->TessCtrl.VerticesOut;
+      return;
+   case GL_TESS_GEN_MODE:
+      if (!has_tess)
+         break;
+      if (check_tes_query(ctx, shProg))
+         *params = shProg->TessEval.PrimitiveMode;
+      return;
+   case GL_TESS_GEN_SPACING:
+      if (!has_tess)
+         break;
+      if (check_tes_query(ctx, shProg))
+         *params = shProg->TessEval.Spacing;
+      return;
+   case GL_TESS_GEN_VERTEX_ORDER:
+      if (!has_tess)
+         break;
+      if (check_tes_query(ctx, shProg))
+         *params = shProg->TessEval.VertexOrder;
+      return;
+   case GL_TESS_GEN_POINT_MODE:
+      if (!has_tess)
+         break;
+      if (check_tes_query(ctx, shProg))
+         *params = shProg->TessEval.PointMode;
+      return;
    default:
       break;
    }
 
    _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramiv(pname=%s)",
-               _mesa_lookup_enum_by_nr(pname));
+               _mesa_enum_to_string(pname));
 }
 
 
@@ -992,6 +1088,12 @@
    if (shProg->_LinkedShaders[MESA_SHADER_GEOMETRY])
       printf("  geom prog %u\n",
 	     shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program->Id);
+   if (shProg->_LinkedShaders[MESA_SHADER_TESS_CTRL])
+      printf("  tesc prog %u\n",
+	     shProg->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program->Id);
+   if (shProg->_LinkedShaders[MESA_SHADER_TESS_EVAL])
+      printf("  tese prog %u\n",
+	     shProg->_LinkedShaders[MESA_SHADER_TESS_EVAL]->Program->Id);
 }
 
 
@@ -1037,11 +1139,9 @@
        */
       switch (stage) {
       case MESA_SHADER_VERTEX:
-	 /* Empty for now. */
-	 break;
+      case MESA_SHADER_TESS_CTRL:
+      case MESA_SHADER_TESS_EVAL:
       case MESA_SHADER_GEOMETRY:
-	 /* Empty for now. */
-	 break;
       case MESA_SHADER_COMPUTE:
          /* Empty for now. */
          break;
@@ -1071,6 +1171,7 @@
       use_shader_program(ctx, i, shProg, &ctx->Shader);
    _mesa_active_program(ctx, shProg, "glUseProgram");
 
+   _mesa_shader_program_init_subroutine_defaults(shProg);
    if (ctx->Driver.UseProgram)
       ctx->Driver.UseProgram(ctx, shProg);
 }
@@ -1172,7 +1273,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    if (MESA_VERBOSE & VERBOSE_API)
-      _mesa_debug(ctx, "glCreateShader %s\n", _mesa_lookup_enum_by_nr(type));
+      _mesa_debug(ctx, "glCreateShader %s\n", _mesa_enum_to_string(type));
    return create_shader(ctx, type);
 }
 
@@ -1331,7 +1432,7 @@
 _mesa_GetObjectParameterfvARB(GLhandleARB object, GLenum pname,
                               GLfloat *params)
 {
-   GLint iparams[1];  /* XXX is one element enough? */
+   GLint iparams[1] = {0};  /* XXX is one element enough? */
    _mesa_GetObjectParameterivARB(object, pname, iparams);
    params[0] = (GLfloat) iparams[0];
 }
@@ -1460,7 +1561,7 @@
  */
 void GLAPIENTRY
 _mesa_ShaderSource(GLhandleARB shaderObj, GLsizei count,
-                      const GLcharARB * const * string, const GLint * length)
+                   const GLcharARB * const * string, const GLint * length)
 {
    GET_CURRENT_CONTEXT(ctx);
    GLint *offsets;
@@ -1868,7 +1969,7 @@
 
    default:
       _mesa_error(ctx, GL_INVALID_ENUM, "glProgramParameteri(pname=%s)",
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
       return;
    }
 
@@ -1876,7 +1977,7 @@
    _mesa_error(ctx, GL_INVALID_VALUE,
                "glProgramParameteri(pname=%s, value=%d): "
                "value must be 0 or 1.",
-               _mesa_lookup_enum_by_nr(pname),
+               _mesa_enum_to_string(pname),
                value);
 }
 
@@ -1894,13 +1995,85 @@
 }
 
 
-static GLuint
-_mesa_create_shader_program(struct gl_context* ctx, GLboolean separate,
-                            GLenum type, GLsizei count, const GLchar* const *strings)
+/**
+ * Copy program-specific data generated by linking from the gl_shader_program
+ * object to a specific gl_program object.
+ */
+void
+_mesa_copy_linked_program_data(gl_shader_stage type,
+                               const struct gl_shader_program *src,
+                               struct gl_program *dst)
+{
+   switch (type) {
+   case MESA_SHADER_VERTEX:
+      dst->UsesClipDistanceOut = src->Vert.UsesClipDistance;
+      break;
+   case MESA_SHADER_TESS_CTRL: {
+      struct gl_tess_ctrl_program *dst_tcp =
+         (struct gl_tess_ctrl_program *) dst;
+      dst_tcp->VerticesOut = src->TessCtrl.VerticesOut;
+      break;
+   }
+   case MESA_SHADER_TESS_EVAL: {
+      struct gl_tess_eval_program *dst_tep =
+         (struct gl_tess_eval_program *) dst;
+      dst_tep->PrimitiveMode = src->TessEval.PrimitiveMode;
+      dst_tep->Spacing = src->TessEval.Spacing;
+      dst_tep->VertexOrder = src->TessEval.VertexOrder;
+      dst_tep->PointMode = src->TessEval.PointMode;
+      dst->UsesClipDistanceOut = src->TessEval.UsesClipDistance;
+      break;
+   }
+   case MESA_SHADER_GEOMETRY: {
+      struct gl_geometry_program *dst_gp = (struct gl_geometry_program *) dst;
+      dst_gp->VerticesIn = src->Geom.VerticesIn;
+      dst_gp->VerticesOut = src->Geom.VerticesOut;
+      dst_gp->Invocations = src->Geom.Invocations;
+      dst_gp->InputType = src->Geom.InputType;
+      dst_gp->OutputType = src->Geom.OutputType;
+      dst->UsesClipDistanceOut = src->Geom.UsesClipDistance;
+      dst_gp->UsesEndPrimitive = src->Geom.UsesEndPrimitive;
+      dst_gp->UsesStreams = src->Geom.UsesStreams;
+      break;
+   }
+   case MESA_SHADER_FRAGMENT: {
+      struct gl_fragment_program *dst_fp = (struct gl_fragment_program *) dst;
+      dst_fp->FragDepthLayout = src->FragDepthLayout;
+      break;
+   }
+   case MESA_SHADER_COMPUTE: {
+      struct gl_compute_program *dst_cp = (struct gl_compute_program *) dst;
+      int i;
+      for (i = 0; i < 3; i++)
+         dst_cp->LocalSize[i] = src->Comp.LocalSize[i];
+      break;
+   }
+   default:
+      break;
+   }
+}
+
+/**
+ * ARB_separate_shader_objects: Compile & Link Program
+ */
+GLuint GLAPIENTRY
+_mesa_CreateShaderProgramv(GLenum type, GLsizei count,
+                           const GLchar* const *strings)
 {
+   GET_CURRENT_CONTEXT(ctx);
+
    const GLuint shader = create_shader(ctx, type);
    GLuint program = 0;
 
+   /*
+    * According to OpenGL 4.5 and OpenGL ES 3.1 standards, section 7.3:
+    * GL_INVALID_VALUE should be generated if count < 0
+    */
+   if (count < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glCreateShaderProgram (count < 0)");
+      return program;
+   }
+
    if (shader) {
       _mesa_ShaderSource(shader, count, strings, NULL);
 
@@ -1915,7 +2088,7 @@
 	 shProg = _mesa_lookup_shader_program(ctx, program);
 	 sh = _mesa_lookup_shader(ctx, shader);
 
-	 shProg->SeparateShader = separate;
+	 shProg->SeparateShader = GL_TRUE;
 
 	 get_shaderiv(ctx, shader, GL_COMPILE_STATUS, &compiled);
 	 if (compiled) {
@@ -1931,8 +2104,8 @@
 	    }
 #endif
 	 }
-
-	 ralloc_strcat(&shProg->InfoLog, sh->InfoLog);
+         if (sh->InfoLog)
+            ralloc_strcat(&shProg->InfoLog, sh->InfoLog);
       }
 
       delete_shader(ctx, shader);
@@ -1943,55 +2116,565 @@
 
 
 /**
- * Copy program-specific data generated by linking from the gl_shader_program
- * object to a specific gl_program object.
+ * For GL_ARB_tessellation_shader
  */
-void
-_mesa_copy_linked_program_data(gl_shader_stage type,
-                               const struct gl_shader_program *src,
-                               struct gl_program *dst)
+extern void GLAPIENTRY
+_mesa_PatchParameteri(GLenum pname, GLint value)
 {
-   switch (type) {
-   case MESA_SHADER_VERTEX:
-      dst->UsesClipDistanceOut = src->Vert.UsesClipDistance;
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (!_mesa_has_tessellation(ctx)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glPatchParameteri");
+      return;
+   }
+
+   if (pname != GL_PATCH_VERTICES) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glPatchParameteri");
+      return;
+   }
+
+   if (value <= 0 || value > ctx->Const.MaxPatchVertices) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glPatchParameteri");
+      return;
+   }
+
+   ctx->TessCtrlProgram.patch_vertices = value;
+}
+
+
+extern void GLAPIENTRY
+_mesa_PatchParameterfv(GLenum pname, const GLfloat *values)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (!_mesa_has_tessellation(ctx)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glPatchParameterfv");
+      return;
+   }
+
+   switch(pname) {
+   case GL_PATCH_DEFAULT_OUTER_LEVEL:
+      FLUSH_VERTICES(ctx, 0);
+      memcpy(ctx->TessCtrlProgram.patch_default_outer_level, values,
+             4 * sizeof(GLfloat));
+      ctx->NewDriverState |= ctx->DriverFlags.NewDefaultTessLevels;
+      return;
+   case GL_PATCH_DEFAULT_INNER_LEVEL:
+      FLUSH_VERTICES(ctx, 0);
+      memcpy(ctx->TessCtrlProgram.patch_default_inner_level, values,
+             2 * sizeof(GLfloat));
+      ctx->NewDriverState |= ctx->DriverFlags.NewDefaultTessLevels;
+      return;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM, "glPatchParameterfv");
+      return;
+   }
+}
+
+/**
+ * ARB_shader_subroutine
+ */
+GLint GLAPIENTRY
+_mesa_GetSubroutineUniformLocation(GLuint program, GLenum shadertype,
+                                   const GLchar *name)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glGetSubroutineUniformLocation";
+   struct gl_shader_program *shProg;
+   GLenum resource_type;
+   gl_shader_stage stage;
+
+   if (!_mesa_has_shader_subroutine(ctx)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return -1;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return -1;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+   if (!shProg)
+      return -1;
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   if (!shProg->_LinkedShaders[stage]) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return -1;
+   }
+
+   resource_type = _mesa_shader_stage_to_subroutine_uniform(stage);
+   return _mesa_program_resource_location(shProg, resource_type, name);
+}
+
+GLuint GLAPIENTRY
+_mesa_GetSubroutineIndex(GLuint program, GLenum shadertype,
+                         const GLchar *name)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glGetSubroutineIndex";
+   struct gl_shader_program *shProg;
+   struct gl_program_resource *res;
+   GLenum resource_type;
+   gl_shader_stage stage;
+
+   if (!_mesa_has_shader_subroutine(ctx)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return -1;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return -1;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+   if (!shProg)
+      return -1;
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   if (!shProg->_LinkedShaders[stage]) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return -1;
+   }
+
+   resource_type = _mesa_shader_stage_to_subroutine(stage);
+   res = _mesa_program_resource_find_name(shProg, resource_type, name, NULL);
+   if (!res) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+     return -1;
+   }
+
+   return _mesa_program_resource_index(shProg, res);
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformiv(GLuint program, GLenum shadertype,
+                                   GLuint index, GLenum pname, GLint *values)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glGetActiveSubroutineUniformiv";
+   struct gl_shader_program *shProg;
+   struct gl_shader *sh;
+   gl_shader_stage stage;
+   struct gl_program_resource *res;
+   const struct gl_uniform_storage *uni;
+   GLenum resource_type;
+   int count, i, j;
+
+   if (!_mesa_has_shader_subroutine(ctx)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+   if (!shProg)
+      return;
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   resource_type = _mesa_shader_stage_to_subroutine_uniform(stage);
+
+   sh = shProg->_LinkedShaders[stage];
+   if (!sh) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   switch (pname) {
+   case GL_NUM_COMPATIBLE_SUBROUTINES: {
+      res = _mesa_program_resource_find_index(shProg, resource_type, index);
+      if (res) {
+         uni = res->Data;
+         values[0] = uni->num_compatible_subroutines;
+      }
       break;
-   case MESA_SHADER_GEOMETRY: {
-      struct gl_geometry_program *dst_gp = (struct gl_geometry_program *) dst;
-      dst_gp->VerticesIn = src->Geom.VerticesIn;
-      dst_gp->VerticesOut = src->Geom.VerticesOut;
-      dst_gp->Invocations = src->Geom.Invocations;
-      dst_gp->InputType = src->Geom.InputType;
-      dst_gp->OutputType = src->Geom.OutputType;
-      dst->UsesClipDistanceOut = src->Geom.UsesClipDistance;
-      dst_gp->UsesEndPrimitive = src->Geom.UsesEndPrimitive;
-      dst_gp->UsesStreams = src->Geom.UsesStreams;
    }
+   case GL_COMPATIBLE_SUBROUTINES: {
+      res = _mesa_program_resource_find_index(shProg, resource_type, index);
+      if (res) {
+         uni = res->Data;
+         count = 0;
+         for (i = 0; i < sh->NumSubroutineFunctions; i++) {
+            struct gl_subroutine_function *fn = &sh->SubroutineFunctions[i];
+            for (j = 0; j < fn->num_compat_types; j++) {
+               if (fn->types[j] == uni->type) {
+                  values[count++] = i;
+                  break;
+               }
+            }
+         }
+      }
       break;
-   case MESA_SHADER_FRAGMENT: {
-      struct gl_fragment_program *dst_fp = (struct gl_fragment_program *) dst;
-      dst_fp->FragDepthLayout = src->FragDepthLayout;
    }
+   case GL_UNIFORM_SIZE:
+      res = _mesa_program_resource_find_index(shProg, resource_type, index);
+      if (res) {
+         uni = res->Data;
+         values[0] = uni->array_elements ? uni->array_elements : 1;
+      }
       break;
-   case MESA_SHADER_COMPUTE: {
-      struct gl_compute_program *dst_cp = (struct gl_compute_program *) dst;
-      int i;
-      for (i = 0; i < 3; i++)
-         dst_cp->LocalSize[i] = src->Comp.LocalSize[i];
+   case GL_UNIFORM_NAME_LENGTH:
+      res = _mesa_program_resource_find_index(shProg, resource_type, index);
+      if (res) {
+         values[0] = strlen(_mesa_program_resource_name(res)) + 1
+            + ((_mesa_program_resource_array_size(res) != 0) ? 3 : 0);;
+      }
+      break;
+   default:
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformName(GLuint program, GLenum shadertype,
+                                     GLuint index, GLsizei bufsize,
+                                     GLsizei *length, GLchar *name)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glGetActiveSubroutineUniformName";
+   struct gl_shader_program *shProg;
+   GLenum resource_type;
+   gl_shader_stage stage;
+
+   if (!_mesa_has_shader_subroutine(ctx)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+   if (!shProg)
+      return;
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   if (!shProg->_LinkedShaders[stage]) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   resource_type = _mesa_shader_stage_to_subroutine_uniform(stage);
+   /* get program resource name */
+   _mesa_get_program_resource_name(shProg, resource_type,
+                                   index, bufsize,
+                                   length, name, api_name);
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineName(GLuint program, GLenum shadertype,
+                              GLuint index, GLsizei bufsize,
+                              GLsizei *length, GLchar *name)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glGetActiveSubroutineName";
+   struct gl_shader_program *shProg;
+   GLenum resource_type;
+   gl_shader_stage stage;
+
+   if (!_mesa_has_shader_subroutine(ctx)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+   if (!shProg)
+      return;
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   if (!shProg->_LinkedShaders[stage]) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+   resource_type = _mesa_shader_stage_to_subroutine(stage);
+   _mesa_get_program_resource_name(shProg, resource_type,
+                                   index, bufsize,
+                                   length, name, api_name);
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_UniformSubroutinesuiv(GLenum shadertype, GLsizei count,
+                            const GLuint *indices)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glUniformSubroutinesuiv";
+   struct gl_shader_program *shProg;
+   struct gl_shader *sh;
+   gl_shader_stage stage;
+   int i;
+
+   if (!_mesa_has_shader_subroutine(ctx)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   shProg = ctx->_Shader->CurrentProgram[stage];
+   if (!shProg) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   sh = shProg->_LinkedShaders[stage];
+   if (!sh) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   if (count != sh->NumSubroutineUniformRemapTable) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s", api_name);
+      return;
+   }
+
+   i = 0;
+   do {
+      struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[i];
+      int uni_count = uni->array_elements ? uni->array_elements : 1;
+      int j, k;
+
+      for (j = i; j < i + uni_count; j++) {
+         struct gl_subroutine_function *subfn;
+         if (indices[j] >= sh->NumSubroutineFunctions) {
+            _mesa_error(ctx, GL_INVALID_VALUE, "%s", api_name);
+            return;
+         }
+
+         subfn = &sh->SubroutineFunctions[indices[j]];
+         for (k = 0; k < subfn->num_compat_types; k++) {
+            if (subfn->types[k] == uni->type)
+               break;
+         }
+         if (k == subfn->num_compat_types) {
+            _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+            return;
+         }
+      }
+      i += uni_count;
+   } while(i < count);
+
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
+   i = 0;
+   do {
+      struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[i];
+      int uni_count = uni->array_elements ? uni->array_elements : 1;
+
+      memcpy(&uni->storage[0], &indices[i],
+             sizeof(GLuint) * uni_count);
+
+      uni->initialized = true;
+      _mesa_propagate_uniforms_to_driver_storage(uni, 0, uni_count);
+      i += uni_count;
+   } while(i < count);
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetUniformSubroutineuiv(GLenum shadertype, GLint location,
+                              GLuint *params)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glGetUniformSubroutineuiv";
+   struct gl_shader_program *shProg;
+   struct gl_shader *sh;
+   gl_shader_stage stage;
+
+   if (!_mesa_has_shader_subroutine(ctx)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   shProg = ctx->_Shader->CurrentProgram[stage];
+   if (!shProg) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   sh = shProg->_LinkedShaders[stage];
+   if (!sh) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   if (location >= sh->NumSubroutineUniformRemapTable) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s", api_name);
+      return;
+   }
+
+   {
+      struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[location];
+      int offset = location - uni->subroutine[stage].index;
+      memcpy(params, &uni->storage[offset],
+	     sizeof(GLuint));
+   }
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetProgramStageiv(GLuint program, GLenum shadertype,
+                        GLenum pname, GLint *values)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   const char *api_name = "glGetProgramStageiv";
+   struct gl_shader_program *shProg;
+   struct gl_shader *sh;
+   gl_shader_stage stage;
+
+   if (!_mesa_has_shader_subroutine(ctx)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   if (!_mesa_validate_shader_target(ctx, shadertype)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
+   }
+
+   shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+   if (!shProg)
+      return;
+
+   stage = _mesa_shader_enum_to_shader_stage(shadertype);
+   sh = shProg->_LinkedShaders[stage];
+   if (!sh) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+      return;
    }
+
+   switch (pname) {
+   case GL_ACTIVE_SUBROUTINES:
+      values[0] = sh->NumSubroutineFunctions;
+      break;
+   case GL_ACTIVE_SUBROUTINE_UNIFORM_LOCATIONS:
+      values[0] = sh->NumSubroutineUniformRemapTable;
       break;
+   case GL_ACTIVE_SUBROUTINE_UNIFORMS:
+      values[0] = sh->NumSubroutineUniformTypes;
+      break;
+   case GL_ACTIVE_SUBROUTINE_MAX_LENGTH:
+   {
+      unsigned i;
+      GLint max_len = 0;
+      GLenum resource_type;
+      struct gl_program_resource *res;
+
+      resource_type = _mesa_shader_stage_to_subroutine(stage);
+      for (i = 0; i < sh->NumSubroutineFunctions; i++) {
+         res = _mesa_program_resource_find_index(shProg, resource_type, i);
+         if (res) {
+            const GLint len = strlen(_mesa_program_resource_name(res)) + 1;
+            if (len > max_len)
+               max_len = len;
+         }
+      }
+      values[0] = max_len;
+      break;
+   }
+   case GL_ACTIVE_SUBROUTINE_UNIFORM_MAX_LENGTH:
+   {
+      unsigned i;
+      GLint max_len = 0;
+      GLenum resource_type;
+      struct gl_program_resource *res;
+
+      resource_type = _mesa_shader_stage_to_subroutine_uniform(stage);
+      for (i = 0; i < sh->NumSubroutineUniformRemapTable; i++) {
+         res = _mesa_program_resource_find_index(shProg, resource_type, i);
+         if (res) {
+            const GLint len = strlen(_mesa_program_resource_name(res)) + 1
+               + ((_mesa_program_resource_array_size(res) != 0) ? 3 : 0);
+
+            if (len > max_len)
+               max_len = len;
+         }
+      }
+      values[0] = max_len;
+      break;
+   }
    default:
+      _mesa_error(ctx, GL_INVALID_ENUM, "%s", api_name);
+      values[0] = -1;
       break;
    }
 }
 
-/**
- * ARB_separate_shader_objects: Compile & Link Program
- */
-GLuint GLAPIENTRY
-_mesa_CreateShaderProgramv(GLenum type, GLsizei count,
-                           const GLchar* const *strings)
+static int
+find_compat_subroutine(struct gl_shader *sh, const struct glsl_type *type)
 {
-   GET_CURRENT_CONTEXT(ctx);
+   int i, j;
+
+   for (i = 0; i < sh->NumSubroutineFunctions; i++) {
+      struct gl_subroutine_function *fn = &sh->SubroutineFunctions[i];
+      for (j = 0; j < fn->num_compat_types; j++) {
+         if (fn->types[j] == type)
+            return i;
+      }
+   }
+   return 0;
+}
+
+static void
+_mesa_shader_init_subroutine_defaults(struct gl_shader *sh)
+{
+   int i, j;
+
+   for (i = 0; i < sh->NumSubroutineUniformRemapTable; i++) {
+      struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[i];
+      int uni_count;
+      int val;
+
+      if (!uni)
+         continue;
+      uni_count = uni->array_elements ? uni->array_elements : 1;
+      val = find_compat_subroutine(sh, uni->type);
+
+      for (j = 0; j < uni_count; j++)
+         memcpy(&uni->storage[j], &val, sizeof(int));
+      uni->initialized = true;
+      _mesa_propagate_uniforms_to_driver_storage(uni, 0, uni_count);
+   }
+}
+
+void
+_mesa_shader_program_init_subroutine_defaults(struct gl_shader_program *shProg)
+{
+   int i;
 
-   return _mesa_create_shader_program(ctx, GL_TRUE, type, count, strings);
+   if (!shProg)
+      return;
+
+   for (i = 0; i < MESA_SHADER_STAGES; i++) {
+      if (!shProg->_LinkedShaders[i])
+         continue;
+
+      _mesa_shader_init_subroutine_defaults(shProg->_LinkedShaders[i]);
+   }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shaderapi.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shaderapi.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shaderapi.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shaderapi.h	2015-09-16 14:36:10.000000000 +0000
@@ -232,7 +232,8 @@
 
 extern struct gl_program_resource *
 _mesa_program_resource_find_name(struct gl_shader_program *shProg,
-                                 GLenum programInterface, const char *name);
+                                 GLenum programInterface, const char *name,
+                                 unsigned *array_index);
 
 extern struct gl_program_resource *
 _mesa_program_resource_find_index(struct gl_shader_program *shProg,
@@ -264,6 +265,51 @@
                              GLsizei bufSize, GLsizei *length,
                              GLint *params);
 
+/* GL_ARB_tessellation_shader */
+extern void GLAPIENTRY
+_mesa_PatchParameteri(GLenum pname, GLint value);
+
+extern void GLAPIENTRY
+_mesa_PatchParameterfv(GLenum pname, const GLfloat *values);
+
+/* GL_ARB_shader_subroutine */
+void
+_mesa_shader_program_init_subroutine_defaults(struct gl_shader_program *shProg);
+
+extern GLint GLAPIENTRY
+_mesa_GetSubroutineUniformLocation(GLuint program, GLenum shadertype,
+                                   const GLchar *name);
+
+extern GLuint GLAPIENTRY
+_mesa_GetSubroutineIndex(GLuint program, GLenum shadertype,
+                         const GLchar *name);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformiv(GLuint program, GLenum shadertype,
+                                   GLuint index, GLenum pname, GLint *values);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformName(GLuint program, GLenum shadertype,
+                                     GLuint index, GLsizei bufsize,
+                                     GLsizei *length, GLchar *name);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineName(GLuint program, GLenum shadertype,
+                              GLuint index, GLsizei bufsize,
+                              GLsizei *length, GLchar *name);
+
+extern GLvoid GLAPIENTRY
+_mesa_UniformSubroutinesuiv(GLenum shadertype, GLsizei count,
+                            const GLuint *indices);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetUniformSubroutineuiv(GLenum shadertype, GLint location,
+                              GLuint *params);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetProgramStageiv(GLuint program, GLenum shadertype,
+                        GLenum pname, GLint *values);
+
 #ifdef __cplusplus
 }
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shaderimage.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shaderimage.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shaderimage.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shaderimage.c	2015-09-16 14:36:10.000000000 +0000
@@ -331,17 +331,88 @@
    }
 }
 
+/**
+ * Return whether an image format should be supported based on the current API
+ * version of the context.
+ */
+static bool
+is_image_format_supported(const struct gl_context *ctx, GLenum format)
+{
+   switch (format) {
+   /* Formats supported on both desktop and ES GL, c.f. table 8.27 of the
+    * OpenGL ES 3.1 specification.
+    */
+   case GL_RGBA32F:
+   case GL_RGBA16F:
+   case GL_R32F:
+   case GL_RGBA32UI:
+   case GL_RGBA16UI:
+   case GL_RGBA8UI:
+   case GL_R32UI:
+   case GL_RGBA32I:
+   case GL_RGBA16I:
+   case GL_RGBA8I:
+   case GL_R32I:
+   case GL_RGBA8:
+   case GL_RGBA8_SNORM:
+      return true;
+
+   /* Formats supported on unextended desktop GL and the original
+    * ARB_shader_image_load_store extension, c.f. table 3.21 of the OpenGL 4.2
+    * specification.
+    */
+   case GL_RG32F:
+   case GL_RG16F:
+   case GL_R11F_G11F_B10F:
+   case GL_R16F:
+   case GL_RGB10_A2UI:
+   case GL_RG32UI:
+   case GL_RG16UI:
+   case GL_RG8UI:
+   case GL_R16UI:
+   case GL_R8UI:
+   case GL_RG32I:
+   case GL_RG16I:
+   case GL_RG8I:
+   case GL_R16I:
+   case GL_R8I:
+   case GL_RGBA16:
+   case GL_RGB10_A2:
+   case GL_RG16:
+   case GL_RG8:
+   case GL_R16:
+   case GL_R8:
+   case GL_RGBA16_SNORM:
+   case GL_RG16_SNORM:
+   case GL_RG8_SNORM:
+   case GL_R16_SNORM:
+   case GL_R8_SNORM:
+      return _mesa_is_desktop_gl(ctx);
+
+   default:
+      return false;
+   }
+}
+
+struct gl_image_unit
+_mesa_default_image_unit(struct gl_context *ctx)
+{
+   const GLenum format = _mesa_is_desktop_gl(ctx) ? GL_R8 : GL_R32UI;
+   const struct gl_image_unit u = {
+      .Access = GL_READ_ONLY,
+      .Format = format,
+      ._ActualFormat = _mesa_get_shader_image_format(format)
+   };
+   return u;
+}
+
 void
 _mesa_init_image_units(struct gl_context *ctx)
 {
    unsigned i;
 
-   for (i = 0; i < ARRAY_SIZE(ctx->ImageUnits); ++i) {
-      struct gl_image_unit *u = &ctx->ImageUnits[i];
-      u->Access = GL_READ_ONLY;
-      u->Format = GL_R8;
-      u->_ActualFormat = _mesa_get_shader_image_format(u->Format);
-   }
+   for (i = 0; i < ARRAY_SIZE(ctx->ImageUnits); ++i)
+      ctx->ImageUnits[i] = _mesa_default_image_unit(ctx);
 }
 
 static GLboolean
@@ -362,7 +433,7 @@
       return GL_FALSE;
 
    if (_mesa_tex_target_is_layered(t->Target) &&
-       u->Layer >= _mesa_get_texture_layers(t, u->Level))
+       u->_Layer >= _mesa_get_texture_layers(t, u->Level))
       return GL_FALSE;
 
    if (t->Target == GL_TEXTURE_BUFFER) {
@@ -370,7 +441,7 @@
 
    } else {
       struct gl_texture_image *img = (t->Target == GL_TEXTURE_CUBE_MAP ?
-                                      t->Image[u->Layer][u->Level] :
+                                      t->Image[u->_Layer][u->Level] :
                                       t->Image[0][u->Level]);
 
       if (!img || img->Border || img->NumSamples > ctx->Const.MaxImageSamples)
@@ -442,7 +513,7 @@
       return GL_FALSE;
    }
 
-   if (!_mesa_get_shader_image_format(format)) {
+   if (!is_image_format_supported(ctx, format)) {
       _mesa_error(ctx, GL_INVALID_VALUE, "glBindImageTexture(format)");
       return GL_FALSE;
    }
@@ -475,6 +546,18 @@
          return;
       }
 
+      /* From section 8.22 "Texture Image Loads and Stores" of the OpenGL ES
+       * 3.1 spec:
+       *
+       * "An INVALID_OPERATION error is generated if texture is not the name
+       *  of an immutable texture object."
+       */
+      if (_mesa_is_gles(ctx) && !t->Immutable) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glBindImageTexture(!immutable)");
+         return;
+      }
+
       _mesa_reference_texobj(&u->TexObj, t);
    } else {
       _mesa_reference_texobj(&u->TexObj, NULL);
@@ -488,7 +571,8 @@
 
    if (u->TexObj && _mesa_tex_target_is_layered(u->TexObj->Target)) {
       u->Layered = layered;
-      u->Layer = (layered ? 0 : layer);
+      u->Layer = layer;
+      u->_Layer = (u->Layered ? 0 : u->Layer);
    } else {
       u->Layered = GL_FALSE;
       u->Layer = 0;
@@ -599,7 +683,7 @@
             tex_format = image->InternalFormat;
          }
 
-         if (_mesa_get_shader_image_format(tex_format) == MESA_FORMAT_NONE) {
+         if (!is_image_format_supported(ctx, tex_format)) {
             /* The ARB_multi_bind spec says:
              *
              *   "An INVALID_OPERATION error is generated if the internal
@@ -610,7 +694,7 @@
                         "glBindImageTextures(the internal format %s of "
                         "the level zero texture image of textures[%d]=%u "
                         "is not supported)",
-                        _mesa_lookup_enum_by_nr(tex_format),
+                        _mesa_enum_to_string(tex_format),
                         i, texture);
             continue;
          }
@@ -619,7 +703,7 @@
          _mesa_reference_texobj(&u->TexObj, texObj);
          u->Level = 0;
          u->Layered = _mesa_tex_target_is_layered(texObj->Target);
-         u->Layer = 0;
+         u->_Layer = u->Layer = 0;
          u->Access = GL_READ_WRITE;
          u->Format = tex_format;
          u->_ActualFormat = _mesa_get_shader_image_format(tex_format);
@@ -629,7 +713,7 @@
          _mesa_reference_texobj(&u->TexObj, NULL);
          u->Level = 0;
          u->Layered = GL_FALSE;
-         u->Layer = 0;
+         u->_Layer = u->Layer = 0;
          u->Access = GL_READ_ONLY;
          u->Format = GL_R8;
          u->_ActualFormat = MESA_FORMAT_R_UNORM8;
@@ -653,3 +737,43 @@
    if (ctx->Driver.MemoryBarrier)
       ctx->Driver.MemoryBarrier(ctx, barriers);
 }
+
+void GLAPIENTRY
+_mesa_MemoryBarrierByRegion(GLbitfield barriers)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   GLbitfield all_allowed_bits = GL_ATOMIC_COUNTER_BARRIER_BIT |
+                                 GL_FRAMEBUFFER_BARRIER_BIT |
+                                 GL_SHADER_IMAGE_ACCESS_BARRIER_BIT |
+                                 GL_SHADER_STORAGE_BARRIER_BIT |
+                                 GL_TEXTURE_FETCH_BARRIER_BIT |
+                                 GL_UNIFORM_BARRIER_BIT;
+
+   if (ctx->Driver.MemoryBarrier) {
+      /* From section 7.11.2 of the OpenGL ES 3.1 specification:
+       *
+       *    "When barriers is ALL_BARRIER_BITS, shader memory accesses will be
+       *     synchronized relative to all these barrier bits, but not to other
+       *     barrier bits specific to MemoryBarrier."
+       *
+       * That is, if barriers is the special value GL_ALL_BARRIER_BITS, then all
+       * barriers allowed by glMemoryBarrierByRegion should be activated."
+       */
+      if (barriers == GL_ALL_BARRIER_BITS)
+         return ctx->Driver.MemoryBarrier(ctx, all_allowed_bits);
+
+      /* From section 7.11.2 of the OpenGL ES 3.1 specification:
+       *
+       *    "An INVALID_VALUE error is generated if barriers is not the special
+       *     value ALL_BARRIER_BITS, and has any bits set other than those
+       *     described above."
+       */
+      if ((barriers & ~all_allowed_bits) != 0) {
+         _mesa_error(ctx, GL_INVALID_VALUE,
+                     "glMemoryBarrierByRegion(unsupported barrier bit");
+      }
+
+      ctx->Driver.MemoryBarrier(ctx, barriers);
+   }
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shaderimage.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shaderimage.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shaderimage.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shaderimage.h	2015-09-16 14:36:10.000000000 +0000
@@ -43,6 +43,12 @@
 _mesa_get_shader_image_format(GLenum format);
 
 /**
+ * Get a single image unit struct with the default state.
+ */
+struct gl_image_unit
+_mesa_default_image_unit(struct gl_context *ctx);
+
+/**
  * Initialize a context's shader image units to the default state.
  */
 void
@@ -68,6 +74,9 @@
 void GLAPIENTRY
 _mesa_MemoryBarrier(GLbitfield barriers);
 
+void GLAPIENTRY
+_mesa_MemoryBarrierByRegion(GLbitfield barriers);
+
 #ifdef __cplusplus
 }
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shaderobj.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shaderobj.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shaderobj.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shaderobj.c	2015-09-16 14:36:10.000000000 +0000
@@ -282,10 +282,10 @@
    unsigned i;
 
    if (shProg->UniformStorage) {
-      for (i = 0; i < shProg->NumUserUniformStorage; ++i)
+      for (i = 0; i < shProg->NumUniformStorage; ++i)
          _mesa_uniform_detach_all_driver_storage(&shProg->UniformStorage[i]);
       ralloc_free(shProg->UniformStorage);
-      shProg->NumUserUniformStorage = 0;
+      shProg->NumUniformStorage = 0;
       shProg->UniformStorage = NULL;
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shaderobj.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shaderobj.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shaderobj.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shaderobj.h	2015-09-16 14:36:10.000000000 +0000
@@ -111,6 +111,10 @@
       return MESA_SHADER_FRAGMENT;
    case GL_GEOMETRY_SHADER:
       return MESA_SHADER_GEOMETRY;
+   case GL_TESS_CONTROL_SHADER:
+      return MESA_SHADER_TESS_CTRL;
+   case GL_TESS_EVALUATION_SHADER:
+      return MESA_SHADER_TESS_EVAL;
    case GL_COMPUTE_SHADER:
       return MESA_SHADER_COMPUTE;
    default:
@@ -119,6 +123,107 @@
    }
 }
 
+/* 8 bytes + another underscore */
+#define MESA_SUBROUTINE_PREFIX_LEN 9
+static inline const char *
+_mesa_shader_stage_to_subroutine_prefix(gl_shader_stage stage)
+{
+  switch (stage) {
+  case MESA_SHADER_VERTEX:
+    return "__subu_v";
+  case MESA_SHADER_GEOMETRY:
+    return "__subu_g";
+  case MESA_SHADER_FRAGMENT:
+    return "__subu_f";
+  case MESA_SHADER_COMPUTE:
+    return "__subu_c";
+  case MESA_SHADER_TESS_CTRL:
+    return "__subu_t";
+  case MESA_SHADER_TESS_EVAL:
+    return "__subu_e";
+  default:
+    return NULL;
+  }
+}
+
+static inline gl_shader_stage
+_mesa_shader_stage_from_subroutine_uniform(GLenum subuniform)
+{
+   switch (subuniform) {
+   default:
+   case GL_VERTEX_SUBROUTINE_UNIFORM:
+      return MESA_SHADER_VERTEX;
+   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+      return MESA_SHADER_GEOMETRY;
+   case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+      return MESA_SHADER_FRAGMENT;
+   case GL_COMPUTE_SUBROUTINE_UNIFORM:
+      return MESA_SHADER_COMPUTE;
+   case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+      return MESA_SHADER_TESS_CTRL;
+   case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+      return MESA_SHADER_TESS_EVAL;
+   }
+}
+
+static inline gl_shader_stage
+_mesa_shader_stage_from_subroutine(GLenum subroutine)
+{
+   switch (subroutine) {
+   case GL_VERTEX_SUBROUTINE:
+      return MESA_SHADER_VERTEX;
+   case GL_GEOMETRY_SUBROUTINE:
+      return MESA_SHADER_GEOMETRY;
+   case GL_FRAGMENT_SUBROUTINE:
+      return MESA_SHADER_FRAGMENT;
+   case GL_COMPUTE_SUBROUTINE:
+      return MESA_SHADER_COMPUTE;
+   case GL_TESS_CONTROL_SUBROUTINE:
+      return MESA_SHADER_TESS_CTRL;
+   case GL_TESS_EVALUATION_SUBROUTINE:
+      return MESA_SHADER_TESS_EVAL;
+   }
+}
+
+static inline GLenum
+_mesa_shader_stage_to_subroutine(gl_shader_stage stage)
+{
+   switch (stage) {
+   default:
+   case MESA_SHADER_VERTEX:
+      return GL_VERTEX_SUBROUTINE;
+   case MESA_SHADER_GEOMETRY:
+      return GL_GEOMETRY_SUBROUTINE;
+   case MESA_SHADER_FRAGMENT:
+      return GL_FRAGMENT_SUBROUTINE;
+   case MESA_SHADER_COMPUTE:
+      return GL_COMPUTE_SUBROUTINE;
+   case MESA_SHADER_TESS_CTRL:
+      return GL_TESS_CONTROL_SUBROUTINE;
+   case MESA_SHADER_TESS_EVAL:
+      return GL_TESS_EVALUATION_SUBROUTINE;
+   }
+}
+
+static inline GLenum
+_mesa_shader_stage_to_subroutine_uniform(gl_shader_stage stage)
+{
+   switch (stage) {
+   default:
+   case MESA_SHADER_VERTEX:
+      return GL_VERTEX_SUBROUTINE_UNIFORM;
+   case MESA_SHADER_GEOMETRY:
+      return GL_GEOMETRY_SUBROUTINE_UNIFORM;
+   case MESA_SHADER_FRAGMENT:
+      return GL_FRAGMENT_SUBROUTINE_UNIFORM;
+   case MESA_SHADER_COMPUTE:
+      return GL_COMPUTE_SUBROUTINE_UNIFORM;
+   case MESA_SHADER_TESS_CTRL:
+      return GL_TESS_CONTROL_SUBROUTINE_UNIFORM;
+   case MESA_SHADER_TESS_EVAL:
+      return GL_TESS_EVALUATION_SUBROUTINE_UNIFORM;
+   }
+}
 
 #ifdef __cplusplus
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shader_query.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shader_query.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shader_query.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shader_query.cpp	2015-09-16 14:36:10.000000000 +0000
@@ -44,7 +44,8 @@
 
 static GLint
 program_resource_location(struct gl_shader_program *shProg,
-                          struct gl_program_resource *res, const char *name);
+                          struct gl_program_resource *res, const char *name,
+                          unsigned array_index);
 
 /**
  * Declare convenience functions to return resource data in a given type.
@@ -61,6 +62,7 @@
 DECL_RESOURCE_FUNC(UNI, gl_uniform_storage);
 DECL_RESOURCE_FUNC(ATC, gl_active_atomic_buffer);
 DECL_RESOURCE_FUNC(XFB, gl_transform_feedback_varying_info);
+DECL_RESOURCE_FUNC(SUB, gl_subroutine_function);
 
 void GLAPIENTRY
 _mesa_BindAttribLocation(GLhandleARB program, GLuint index,
@@ -189,63 +191,6 @@
                                   (GLint *) type, "glGetActiveAttrib");
 }
 
-/* Locations associated with shader variables (array or non-array) can be
- * queried using its base name or using the base name appended with the
- * valid array index. For example, in case of below vertex shader, valid
- * queries can be made to know the location of "xyz", "array", "array[0]",
- * "array[1]", "array[2]" and "array[3]". In this example index reurned
- * will be 0, 0, 0, 1, 2, 3 respectively.
- *
- * [Vertex Shader]
- * layout(location=0) in vec4 xyz;
- * layout(location=1) in vec4[4] array;
- * void main()
- * { }
- *
- * This requirement came up with the addition of ARB_program_interface_query
- * to OpenGL 4.3 specification. See page 101 (page 122 of the PDF) for details.
- *
- * This utility function is used by:
- * _mesa_GetAttribLocation
- * _mesa_GetFragDataLocation
- * _mesa_GetFragDataIndex
- *
- * Returns 0:
- *    if the 'name' string matches var->name.
- * Returns 'matched index':
- *    if the 'name' string matches var->name appended with valid array index.
- */
-int static inline
-get_matching_index(const ir_variable *const var, const char *name) {
-   unsigned idx = 0;
-   const char *const paren = strchr(name, '[');
-   const unsigned len = (paren != NULL) ? paren - name : strlen(name);
-
-   if (paren != NULL) {
-      if (!var->type->is_array())
-         return -1;
-
-      char *endptr;
-      idx = (unsigned) strtol(paren + 1, &endptr, 10);
-      const unsigned idx_len = endptr != (paren + 1) ? endptr - paren - 1 : 0;
-
-      /* Validate the sub string representing index in 'name' string */
-      if ((idx > 0 && paren[1] == '0') /* leading zeroes */
-          || (idx == 0 && idx_len > 1) /* all zeroes */
-          || paren[1] == ' ' /* whitespace */
-          || endptr[0] != ']' /* closing brace */
-          || endptr[1] != '\0' /* null char */
-          || idx_len == 0 /* missing index */
-          || idx >= var->type->length) /* exceeding array bound */
-         return -1;
-   }
-
-   if (strncmp(var->name, name, len) == 0 && var->name[len] == '\0')
-      return idx;
-
-   return -1;
-}
-
 GLint GLAPIENTRY
 _mesa_GetAttribLocation(GLhandleARB program, const GLcharARB * name)
 {
@@ -271,13 +216,15 @@
    if (shProg->_LinkedShaders[MESA_SHADER_VERTEX] == NULL)
       return -1;
 
+   unsigned array_index = 0;
    struct gl_program_resource *res =
-      _mesa_program_resource_find_name(shProg, GL_PROGRAM_INPUT, name);
+      _mesa_program_resource_find_name(shProg, GL_PROGRAM_INPUT, name,
+                                       &array_index);
 
    if (!res)
       return -1;
 
-   GLint loc = program_resource_location(shProg, res, name);
+   GLint loc = program_resource_location(shProg, res, name, array_index);
 
    /* The extra check against against 0 is made because of builtin-attribute
     * locations that have offset applied. Function program_resource_location
@@ -455,13 +402,15 @@
    if (shProg->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL)
       return -1;
 
+   unsigned array_index = 0;
    struct gl_program_resource *res =
-      _mesa_program_resource_find_name(shProg, GL_PROGRAM_OUTPUT, name);
+      _mesa_program_resource_find_name(shProg, GL_PROGRAM_OUTPUT, name,
+                                       &array_index);
 
    if (!res)
       return -1;
 
-   GLint loc = program_resource_location(shProg, res, name);
+   GLint loc = program_resource_location(shProg, res, name, array_index);
 
    /* The extra check against against 0 is made because of builtin-attribute
     * locations that have offset applied. Function program_resource_location
@@ -479,16 +428,38 @@
 const char*
 _mesa_program_resource_name(struct gl_program_resource *res)
 {
+   const ir_variable *var;
    switch (res->Type) {
    case GL_UNIFORM_BLOCK:
       return RESOURCE_UBO(res)->Name;
    case GL_TRANSFORM_FEEDBACK_VARYING:
       return RESOURCE_XFB(res)->Name;
    case GL_PROGRAM_INPUT:
+      var = RESOURCE_VAR(res);
+      /* Special case gl_VertexIDMESA -> gl_VertexID. */
+      if (var->data.mode == ir_var_system_value &&
+          var->data.location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) {
+         return "gl_VertexID";
+      }
+   /* fallthrough */
    case GL_PROGRAM_OUTPUT:
       return RESOURCE_VAR(res)->name;
    case GL_UNIFORM:
       return RESOURCE_UNI(res)->name;
+   case GL_VERTEX_SUBROUTINE_UNIFORM:
+   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+   case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+   case GL_COMPUTE_SUBROUTINE_UNIFORM:
+   case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+   case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+      return RESOURCE_UNI(res)->name + MESA_SUBROUTINE_PREFIX_LEN;
+   case GL_VERTEX_SUBROUTINE:
+   case GL_GEOMETRY_SUBROUTINE:
+   case GL_FRAGMENT_SUBROUTINE:
+   case GL_COMPUTE_SUBROUTINE:
+   case GL_TESS_CONTROL_SUBROUTINE:
+   case GL_TESS_EVALUATION_SUBROUTINE:
+      return RESOURCE_SUB(res)->name;
    default:
       assert(!"support for resource type not implemented");
    }
@@ -507,7 +478,19 @@
    case GL_PROGRAM_OUTPUT:
       return RESOURCE_VAR(res)->data.max_array_access;
    case GL_UNIFORM:
+   case GL_VERTEX_SUBROUTINE_UNIFORM:
+   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+   case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+   case GL_COMPUTE_SUBROUTINE_UNIFORM:
+   case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+   case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
       return RESOURCE_UNI(res)->array_elements;
+   case GL_VERTEX_SUBROUTINE:
+   case GL_GEOMETRY_SUBROUTINE:
+   case GL_FRAGMENT_SUBROUTINE:
+   case GL_COMPUTE_SUBROUTINE:
+   case GL_TESS_CONTROL_SUBROUTINE:
+   case GL_TESS_EVALUATION_SUBROUTINE:
    case GL_ATOMIC_COUNTER_BUFFER:
    case GL_UNIFORM_BLOCK:
       return 0;
@@ -517,27 +500,31 @@
    return 0;
 }
 
-static int
-array_index_of_resource(struct gl_program_resource *res,
-                        const char *name)
+/**
+ * Checks if array subscript is valid and if so sets array_index.
+ */
+static bool
+valid_array_index(const GLchar *name, unsigned *array_index)
 {
-   assert(res->Data);
+   long idx = 0;
+   const GLchar *out_base_name_end;
 
-   switch (res->Type) {
-   case GL_PROGRAM_INPUT:
-   case GL_PROGRAM_OUTPUT:
-      return get_matching_index(RESOURCE_VAR(res), name);
-   default:
-      assert(!"support for resource type not implemented");
-      return -1;
-   }
+   idx = parse_program_resource_name(name, &out_base_name_end);
+   if (idx < 0)
+      return false;
+
+   if (array_index)
+      *array_index = idx;
+
+   return true;
 }
 
 /* Find a program resource with specific name in given interface.
  */
 struct gl_program_resource *
 _mesa_program_resource_find_name(struct gl_shader_program *shProg,
-                                 GLenum programInterface, const char *name)
+                                 GLenum programInterface, const char *name,
+                                 unsigned *array_index)
 {
    struct gl_program_resource *res = shProg->ProgramResourceList;
    for (unsigned i = 0; i < shProg->NumProgramResourceList; i++, res++) {
@@ -548,26 +535,46 @@
       const char *rname = _mesa_program_resource_name(res);
       unsigned baselen = strlen(rname);
 
-      switch (programInterface) {
-      case GL_TRANSFORM_FEEDBACK_VARYING:
-      case GL_UNIFORM_BLOCK:
-      case GL_UNIFORM:
-         if (strncmp(rname, name, baselen) == 0) {
+      if (strncmp(rname, name, baselen) == 0) {
+         switch (programInterface) {
+         case GL_UNIFORM_BLOCK:
             /* Basename match, check if array or struct. */
             if (name[baselen] == '\0' ||
                 name[baselen] == '[' ||
                 name[baselen] == '.') {
                return res;
             }
+            break;
+         case GL_TRANSFORM_FEEDBACK_VARYING:
+         case GL_UNIFORM:
+         case GL_VERTEX_SUBROUTINE_UNIFORM:
+         case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+         case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+         case GL_COMPUTE_SUBROUTINE_UNIFORM:
+         case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+         case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+         case GL_VERTEX_SUBROUTINE:
+         case GL_GEOMETRY_SUBROUTINE:
+         case GL_FRAGMENT_SUBROUTINE:
+         case GL_COMPUTE_SUBROUTINE:
+         case GL_TESS_CONTROL_SUBROUTINE:
+         case GL_TESS_EVALUATION_SUBROUTINE:
+            if (name[baselen] == '.') {
+               return res;
+            }
+            /* fall-through */
+         case GL_PROGRAM_INPUT:
+         case GL_PROGRAM_OUTPUT:
+            if (name[baselen] == '\0') {
+               return res;
+            } else if (name[baselen] == '[' &&
+                valid_array_index(name, array_index)) {
+               return res;
+            }
+            break;
+         default:
+            assert(!"not implemented for given interface");
          }
-         break;
-      case GL_PROGRAM_INPUT:
-      case GL_PROGRAM_OUTPUT:
-         if (array_index_of_resource(res, name) >= 0)
-            return res;
-         break;
-      default:
-         assert(!"not implemented for given interface");
       }
    }
    return NULL;
@@ -632,6 +639,18 @@
       case GL_PROGRAM_INPUT:
       case GL_PROGRAM_OUTPUT:
       case GL_UNIFORM:
+      case GL_VERTEX_SUBROUTINE_UNIFORM:
+      case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+      case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+      case GL_COMPUTE_SUBROUTINE_UNIFORM:
+      case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+      case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+      case GL_VERTEX_SUBROUTINE:
+      case GL_GEOMETRY_SUBROUTINE:
+      case GL_FRAGMENT_SUBROUTINE:
+      case GL_COMPUTE_SUBROUTINE:
+      case GL_TESS_CONTROL_SUBROUTINE:
+      case GL_TESS_EVALUATION_SUBROUTINE:
          if (++idx == (int) index)
             return res;
          break;
@@ -700,6 +719,12 @@
    bool add_index = !(((programInterface == GL_PROGRAM_INPUT) &&
                        res->StageReferences & (1 << MESA_SHADER_GEOMETRY)));
 
+   /* Transform feedback varyings have array index already appended
+    * in their names.
+    */
+   if (programInterface == GL_TRANSFORM_FEEDBACK_VARYING)
+      add_index = false;
+
    if (add_index && _mesa_program_resource_array_size(res)) {
       int i;
 
@@ -717,16 +742,12 @@
 
 static GLint
 program_resource_location(struct gl_shader_program *shProg,
-                          struct gl_program_resource *res, const char *name)
+                          struct gl_program_resource *res, const char *name,
+                          unsigned array_index)
 {
-   unsigned index, offset;
-   int array_index = -1;
-
-   if (res->Type == GL_PROGRAM_INPUT || res->Type == GL_PROGRAM_OUTPUT) {
-      array_index = array_index_of_resource(res, name);
-      if (array_index < 0)
-         return -1;
-   }
+   /* Built-in locations should report GL_INVALID_INDEX. */
+   if (is_gl_identifier(name))
+      return GL_INVALID_INDEX;
 
    /* VERT_ATTRIB_GENERIC0 and FRAG_RESULT_DATA0 are decremented as these
     * offsets are used internally to differentiate between built-in attributes
@@ -734,13 +755,22 @@
     */
    switch (res->Type) {
    case GL_PROGRAM_INPUT:
+      /* If the input is an array, fail if the index is out of bounds. */
+      if (array_index > 0
+          && array_index >= RESOURCE_VAR(res)->type->length) {
+         return -1;
+      }
       return RESOURCE_VAR(res)->data.location + array_index - VERT_ATTRIB_GENERIC0;
    case GL_PROGRAM_OUTPUT:
+      /* If the output is an array, fail if the index is out of bounds. */
+      if (array_index > 0
+          && array_index >= RESOURCE_VAR(res)->type->length) {
+         return -1;
+      }
       return RESOURCE_VAR(res)->data.location + array_index - FRAG_RESULT_DATA0;
    case GL_UNIFORM:
-      index = _mesa_get_uniform_location(shProg, name, &offset);
-
-      if (index == GL_INVALID_INDEX)
+      /* If the uniform is built-in, fail. */
+      if (RESOURCE_UNI(res)->builtin)
          return -1;
 
       /* From the GL_ARB_uniform_buffer_object spec:
@@ -754,9 +784,21 @@
           RESOURCE_UNI(res)->atomic_buffer_index != -1)
          return -1;
 
-      /* location in remap table + array element offset */
-      return RESOURCE_UNI(res)->remap_location + offset;
+      /* fallthrough */
+   case GL_VERTEX_SUBROUTINE_UNIFORM:
+   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+   case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+   case GL_COMPUTE_SUBROUTINE_UNIFORM:
+   case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+   case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+      /* If the uniform is an array, fail if the index is out of bounds. */
+      if (array_index > 0
+          && array_index >= RESOURCE_UNI(res)->array_elements) {
+         return -1;
+      }
 
+      /* location in remap table + array element offset */
+      return RESOURCE_UNI(res)->remap_location + array_index;
    default:
       return -1;
    }
@@ -764,22 +806,22 @@
 
 /**
  * Function implements following location queries:
- *    glGetAttribLocation
- *    glGetFragDataLocation
  *    glGetUniformLocation
  */
 GLint
 _mesa_program_resource_location(struct gl_shader_program *shProg,
                                 GLenum programInterface, const char *name)
 {
+   unsigned array_index = 0;
    struct gl_program_resource *res =
-      _mesa_program_resource_find_name(shProg, programInterface, name);
+      _mesa_program_resource_find_name(shProg, programInterface, name,
+                                       &array_index);
 
    /* Resource not found. */
    if (!res)
       return -1;
 
-   return program_resource_location(shProg, res, name);
+   return program_resource_location(shProg, res, name, array_index);
 }
 
 /**
@@ -791,7 +833,7 @@
                                       GLenum programInterface, const char *name)
 {
    struct gl_program_resource *res =
-      _mesa_program_resource_find_name(shProg, programInterface, name);
+      _mesa_program_resource_find_name(shProg, programInterface, name, NULL);
 
    /* Non-existent variable or resource is not referenced by fragment stage. */
    if (!res || !(res->StageReferences & (1 << MESA_SHADER_FRAGMENT)))
@@ -806,6 +848,10 @@
    switch (ref) {
    case GL_REFERENCED_BY_VERTEX_SHADER:
       return MESA_SHADER_VERTEX;
+   case GL_REFERENCED_BY_TESS_CONTROL_SHADER:
+      return MESA_SHADER_TESS_CTRL;
+   case GL_REFERENCED_BY_TESS_EVALUATION_SHADER:
+      return MESA_SHADER_TESS_EVAL;
    case GL_REFERENCED_BY_GEOMETRY_SHADER:
       return MESA_SHADER_GEOMETRY;
    case GL_REFERENCED_BY_FRAGMENT_SHADER:
@@ -863,7 +909,8 @@
          for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
             const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
             struct gl_program_resource *uni =
-               _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname);
+               _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname,
+                                                NULL);
             if (!uni)
                continue;
             (*val)++;
@@ -873,7 +920,8 @@
          for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
             const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
             struct gl_program_resource *uni =
-               _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname);
+               _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname,
+                                                NULL);
             if (!uni)
                continue;
             *val++ =
@@ -902,8 +950,8 @@
 
 invalid_operation:
    _mesa_error(ctx, GL_INVALID_OPERATION, "%s(%s prop %s)", caller,
-               _mesa_lookup_enum_by_nr(res->Type),
-               _mesa_lookup_enum_by_nr(prop));
+               _mesa_enum_to_string(res->Type),
+               _mesa_enum_to_string(prop));
 
    return 0;
 }
@@ -921,11 +969,17 @@
 
    switch(prop) {
    case GL_NAME_LENGTH:
-      if (res->Type == GL_ATOMIC_COUNTER_BUFFER)
+      switch (res->Type) {
+      case GL_ATOMIC_COUNTER_BUFFER:
          goto invalid_operation;
-      /* Base name +3 if array '[0]' + terminator. */
-      *val = strlen(_mesa_program_resource_name(res)) +
-         (_mesa_program_resource_array_size(res) > 0 ? 3 : 0) + 1;
+      case GL_TRANSFORM_FEEDBACK_VARYING:
+         *val = strlen(_mesa_program_resource_name(res)) + 1;
+         break;
+      default:
+         /* Base name +3 if array '[0]' + terminator. */
+         *val = strlen(_mesa_program_resource_name(res)) +
+            (_mesa_program_resource_array_size(res) > 0 ? 3 : 0) + 1;
+      }
       return 1;
    case GL_TYPE:
       switch (res->Type) {
@@ -991,6 +1045,8 @@
          goto invalid_enum;
       /* fallthrough */
    case GL_REFERENCED_BY_VERTEX_SHADER:
+   case GL_REFERENCED_BY_TESS_CONTROL_SHADER:
+   case GL_REFERENCED_BY_TESS_EVALUATION_SHADER:
    case GL_REFERENCED_BY_GEOMETRY_SHADER:
    case GL_REFERENCED_BY_FRAGMENT_SHADER:
       switch (res->Type) {
@@ -1011,7 +1067,8 @@
       case GL_PROGRAM_INPUT:
       case GL_PROGRAM_OUTPUT:
          *val = program_resource_location(shProg, res,
-                                          _mesa_program_resource_name(res));
+                                          _mesa_program_resource_name(res),
+                                          0);
          return 1;
       default:
          goto invalid_operation;
@@ -1022,10 +1079,54 @@
       *val = RESOURCE_VAR(res)->data.index;
       return 1;
 
+   case GL_NUM_COMPATIBLE_SUBROUTINES:
+      if (res->Type != GL_VERTEX_SUBROUTINE_UNIFORM &&
+          res->Type != GL_FRAGMENT_SUBROUTINE_UNIFORM &&
+          res->Type != GL_GEOMETRY_SUBROUTINE_UNIFORM &&
+          res->Type != GL_COMPUTE_SUBROUTINE_UNIFORM &&
+          res->Type != GL_TESS_CONTROL_SUBROUTINE_UNIFORM &&
+          res->Type != GL_TESS_EVALUATION_SUBROUTINE_UNIFORM)
+         goto invalid_operation;
+      *val = RESOURCE_UNI(res)->num_compatible_subroutines;
+      return 1;
+   case GL_COMPATIBLE_SUBROUTINES: {
+      const struct gl_uniform_storage *uni;
+      struct gl_shader *sh;
+      unsigned count, i;
+      int j;
+
+      if (res->Type != GL_VERTEX_SUBROUTINE_UNIFORM &&
+          res->Type != GL_FRAGMENT_SUBROUTINE_UNIFORM &&
+          res->Type != GL_GEOMETRY_SUBROUTINE_UNIFORM &&
+          res->Type != GL_COMPUTE_SUBROUTINE_UNIFORM &&
+          res->Type != GL_TESS_CONTROL_SUBROUTINE_UNIFORM &&
+          res->Type != GL_TESS_EVALUATION_SUBROUTINE_UNIFORM)
+         goto invalid_operation;
+      uni = RESOURCE_UNI(res);
+
+      sh = shProg->_LinkedShaders[_mesa_shader_stage_from_subroutine_uniform(res->Type)];
+      count = 0;
+      for (i = 0; i < sh->NumSubroutineFunctions; i++) {
+         struct gl_subroutine_function *fn = &sh->SubroutineFunctions[i];
+         for (j = 0; j < fn->num_compat_types; j++) {
+            if (fn->types[j] == uni->type) {
+               val[count++] = i;
+               break;
+            }
+         }
+      }
+      return count;
+   }
    /* GL_ARB_tessellation_shader */
    case GL_IS_PER_PATCH:
-   case GL_REFERENCED_BY_TESS_CONTROL_SHADER:
-   case GL_REFERENCED_BY_TESS_EVALUATION_SHADER:
+      switch (res->Type) {
+      case GL_PROGRAM_INPUT:
+      case GL_PROGRAM_OUTPUT:
+         *val = RESOURCE_VAR(res)->data.patch;
+         return 1;
+      default:
+         goto invalid_operation;
+      }
    default:
       goto invalid_enum;
    }
@@ -1034,14 +1135,14 @@
 
 invalid_enum:
    _mesa_error(ctx, GL_INVALID_ENUM, "%s(%s prop %s)", caller,
-               _mesa_lookup_enum_by_nr(res->Type),
-               _mesa_lookup_enum_by_nr(prop));
+               _mesa_enum_to_string(res->Type),
+               _mesa_enum_to_string(prop));
    return 0;
 
 invalid_operation:
    _mesa_error(ctx, GL_INVALID_OPERATION, "%s(%s prop %s)", caller,
-               _mesa_lookup_enum_by_nr(res->Type),
-               _mesa_lookup_enum_by_nr(prop));
+               _mesa_enum_to_string(res->Type),
+               _mesa_enum_to_string(prop));
    return 0;
 }
 
@@ -1063,7 +1164,7 @@
    if (!res || bufSize < 0) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glGetProgramResourceiv(%s index %d bufSize %d)",
-                  _mesa_lookup_enum_by_nr(programInterface), index, bufSize);
+                  _mesa_enum_to_string(programInterface), index, bufSize);
       return;
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shared.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shared.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/shared.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/shared.c	2015-09-16 14:36:10.000000000 +0000
@@ -313,7 +313,6 @@
    _mesa_DeleteHashTable(shared->Programs);
 
    _mesa_reference_vertprog(ctx, &shared->DefaultVertexProgram, NULL);
-   _mesa_reference_geomprog(ctx, &shared->DefaultGeometryProgram, NULL);
    _mesa_reference_fragprog(ctx, &shared->DefaultFragmentProgram, NULL);
 
    _mesa_HashDeleteAll(shared->ATIShaders, delete_fragshader_cb, ctx);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/state.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/state.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/state.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/state.c	2015-09-16 14:36:10.000000000 +0000
@@ -79,8 +79,8 @@
 
 
 /**
- * Update the ctx->Vertex/Geometry/FragmentProgram._Current pointers to point
- * to the current/active programs.  Then call ctx->Driver.BindProgram() to
+ * Update the ctx->*Program._Current pointers to point to the
+ * current/active programs.  Then call ctx->Driver.BindProgram() to
  * tell the driver which programs to use.
  *
  * Programs may come from 3 sources: GLSL shaders, ARB/NV_vertex/fragment
@@ -97,6 +97,10 @@
 {
    const struct gl_shader_program *vsProg =
       ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
+   const struct gl_shader_program *tcsProg =
+      ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
+   const struct gl_shader_program *tesProg =
+      ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
    const struct gl_shader_program *gsProg =
       ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
    struct gl_shader_program *fsProg =
@@ -106,6 +110,8 @@
    const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current;
    const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current;
    const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current;
+   const struct gl_tess_ctrl_program *prevTCP = ctx->TessCtrlProgram._Current;
+   const struct gl_tess_eval_program *prevTEP = ctx->TessEvalProgram._Current;
    const struct gl_compute_program *prevCP = ctx->ComputeProgram._Current;
    GLbitfield new_state = 0x0;
 
@@ -175,6 +181,30 @@
       _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL);
    }
 
+   if (tesProg && tesProg->LinkStatus
+       && tesProg->_LinkedShaders[MESA_SHADER_TESS_EVAL]) {
+      /* Use GLSL tessellation evaluation shader */
+      _mesa_reference_tesseprog(ctx, &ctx->TessEvalProgram._Current,
+         gl_tess_eval_program(
+            tesProg->_LinkedShaders[MESA_SHADER_TESS_EVAL]->Program));
+   }
+   else {
+      /* No tessellation evaluation program */
+      _mesa_reference_tesseprog(ctx, &ctx->TessEvalProgram._Current, NULL);
+   }
+
+   if (tcsProg && tcsProg->LinkStatus
+       && tcsProg->_LinkedShaders[MESA_SHADER_TESS_CTRL]) {
+      /* Use GLSL tessellation control shader */
+      _mesa_reference_tesscprog(ctx, &ctx->TessCtrlProgram._Current,
+         gl_tess_ctrl_program(
+            tcsProg->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program));
+   }
+   else {
+      /* No tessellation control program */
+      _mesa_reference_tesscprog(ctx, &ctx->TessCtrlProgram._Current, NULL);
+   }
+
    /* Examine vertex program after fragment program as
     * _mesa_get_fixed_func_vertex_program() needs to know active
     * fragprog inputs.
@@ -225,11 +255,27 @@
    if (ctx->GeometryProgram._Current != prevGP) {
       new_state |= _NEW_PROGRAM;
       if (ctx->Driver.BindProgram) {
-         ctx->Driver.BindProgram(ctx, MESA_GEOMETRY_PROGRAM,
+         ctx->Driver.BindProgram(ctx, GL_GEOMETRY_PROGRAM_NV,
                             (struct gl_program *) ctx->GeometryProgram._Current);
       }
    }
 
+   if (ctx->TessEvalProgram._Current != prevTEP) {
+      new_state |= _NEW_PROGRAM;
+      if (ctx->Driver.BindProgram) {
+         ctx->Driver.BindProgram(ctx, GL_TESS_EVALUATION_PROGRAM_NV,
+                            (struct gl_program *) ctx->TessEvalProgram._Current);
+      }
+   }
+
+   if (ctx->TessCtrlProgram._Current != prevTCP) {
+      new_state |= _NEW_PROGRAM;
+      if (ctx->Driver.BindProgram) {
+         ctx->Driver.BindProgram(ctx, GL_TESS_CONTROL_PROGRAM_NV,
+                            (struct gl_program *) ctx->TessCtrlProgram._Current);
+      }
+   }
+
    if (ctx->VertexProgram._Current != prevVP) {
       new_state |= _NEW_PROGRAM;
       if (ctx->Driver.BindProgram) {
@@ -266,15 +312,9 @@
       }
    }
 
-   if (ctx->GeometryProgram._Current) {
-      const struct gl_program_parameter_list *params =
-         ctx->GeometryProgram._Current->Base.Parameters;
-      /*FIXME: StateFlags is always 0 because we have unnamed constant
-       *       not state changes */
-      if (params /*&& params->StateFlags & ctx->NewState*/) {
-         new_state |= _NEW_PROGRAM_CONSTANTS;
-      }
-   }
+   /* Don't handle tessellation and geometry shaders here. They don't use
+    * any state constants.
+    */
 
    if (ctx->VertexProgram._Current) {
       const struct gl_program_parameter_list *params =
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/tests/dispatch_sanity.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/tests/dispatch_sanity.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/tests/dispatch_sanity.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/tests/dispatch_sanity.cpp	2015-09-16 14:36:10.000000000 +0000
@@ -74,6 +74,7 @@
 extern const struct function gles11_functions_possible[];
 extern const struct function gles2_functions_possible[];
 extern const struct function gles3_functions_possible[];
+extern const struct function gles31_functions_possible[];
 
 class DispatchSanity_test : public ::testing::Test {
 public:
@@ -212,6 +213,15 @@
    validate_nops(&ctx, nop_table);
 }
 
+TEST_F(DispatchSanity_test, GLES31)
+{
+   SetUpCtx(API_OPENGLES2, 31);
+   validate_functions(&ctx, gles2_functions_possible, nop_table);
+   validate_functions(&ctx, gles3_functions_possible, nop_table);
+   validate_functions(&ctx, gles31_functions_possible, nop_table);
+   validate_nops(&ctx, nop_table);
+}
+
 const struct function common_desktop_functions_possible[] = {
    { "glCullFace", 10, -1 },
    { "glFrontFace", 10, -1 },
@@ -553,6 +563,8 @@
 
    /* GL 4.0 */
    { "glMinSampleShading", 40, -1 },
+   { "glPatchParameteri", 40, -1 },
+   { "glPatchParameterfv", 40, -1 },
    { "glBlendEquationi", 40, -1 },
    { "glBlendEquationSeparatei", 40, -1 },
    { "glBlendFunci", 40, -1 },
@@ -813,8 +825,8 @@
 // { "glVertexArrayVertexAttribIFormatEXT", 43, -1 },   // XXX: Add to xml
 // { "glVertexArrayVertexAttribBindingEXT", 43, -1 },   // XXX: Add to xml
 // { "glVertexArrayVertexBindingDivisorEXT", 43, -1 },  // XXX: Add to xml
-// { "glFramebufferParameteri", 43, -1 },               // XXX: Add to xml
-// { "glGetFramebufferParameteriv", 43, -1 },           // XXX: Add to xml
+   { "glFramebufferParameteri", 43, -1 },
+   { "glGetFramebufferParameteriv", 43, -1 },
 // { "glNamedFramebufferParameteriEXT", 43, -1 },       // XXX: Add to xml
 // { "glGetNamedFramebufferParameterivEXT", 43, -1 },   // XXX: Add to xml
 // { "glGetInternalformati64v", 43, -1 },               // XXX: Add to xml
@@ -839,6 +851,9 @@
 // { "glTextureStorage2DMultisampleEXT", 43, -1 },      // XXX: Add to xml
 // { "glTextureStorage3DMultisampleEXT", 43, -1 },      // XXX: Add to xml
 
+/* GL 4.5 */
+   { "glMemoryBarrierByRegion", 45, -1 },
+
    /* GL_ARB_internalformat_query */
    { "glGetInternalformativ", 30, -1 },
 
@@ -920,6 +935,11 @@
 
    /* GL_EXT_polygon_offset_clamp */
    { "glPolygonOffsetClampEXT", 11, -1 },
+
+   /* GL_ARB_get_texture_sub_image */
+   { "glGetTextureSubImage", 20, -1 },
+   { "glGetCompressedTextureSubImage", 20, -1 },
+
    { NULL, 0, -1 }
 };
 
@@ -1414,6 +1434,16 @@
    /* GL 3.2 */
    { "glFramebufferTexture", 32, -1 },
 
+   /* GL 4.0 */
+   { "glGetSubroutineUniformLocation", 40, -1 },
+   { "glGetSubroutineIndex", 40, -1 },
+   { "glGetActiveSubroutineUniformiv", 40, -1 },
+   { "glGetActiveSubroutineUniformName", 40, -1 },
+   { "glGetActiveSubroutineName", 40, -1 },
+   { "glUniformSubroutinesuiv", 40, -1 },
+   { "glGetUniformSubroutineuiv", 40, -1 },
+   { "glGetProgramStageiv", 40, -1 },
+
    /* GL 4.3 */
    { "glIsRenderbuffer", 43, -1 },
    { "glBindRenderbuffer", 43, -1 },
@@ -1552,16 +1582,6 @@
    { "glUniformMatrix4x2dv", 40, -1 },
    { "glUniformMatrix4x3dv", 40, -1 },
    { "glGetUniformdv", 43, -1 },
-// { "glGetSubroutineUniformLocation", 43, -1 },        // XXX: Add to xml
-// { "glGetSubroutineIndex", 43, -1 },                  // XXX: Add to xml
-// { "glGetActiveSubroutineUniformiv", 43, -1 },        // XXX: Add to xml
-// { "glGetActiveSubroutineUniformName", 43, -1 },      // XXX: Add to xml
-// { "glGetActiveSubroutineName", 43, -1 },             // XXX: Add to xml
-// { "glUniformSubroutinesuiv", 43, -1 },               // XXX: Add to xml
-// { "glGetUniformSubroutineuiv", 43, -1 },             // XXX: Add to xml
-// { "glGetProgramStageiv", 43, -1 },                   // XXX: Add to xml
-// { "glPatchParameteri", 43, -1 },                     // XXX: Add to xml
-// { "glPatchParameterfv", 43, -1 },                    // XXX: Add to xml
 
    { "glBindTransformFeedback", 43, -1 },
    { "glDeleteTransformFeedbacks", 43, -1 },
@@ -1722,6 +1742,9 @@
 // { "glTextureStorage2DMultisampleEXT", 43, -1 },      // XXX: Add to xml
 // { "glTextureStorage3DMultisampleEXT", 43, -1 },      // XXX: Add to xml
 
+/* GL 4.5 */
+   { "glMemoryBarrierByRegion", 45, -1 },
+
    /* GL_ARB_direct_state_access */
    { "glCreateTransformFeedbacks", 45, -1 },
    { "glTransformFeedbackBufferBase", 45, -1 },
@@ -2375,3 +2398,87 @@
 
    { NULL, 0, -1 }
 };
+
+const struct function gles31_functions_possible[] = {
+   { "glDispatchCompute", 31, -1 },
+   { "glDispatchComputeIndirect", 31, -1 },
+   { "glDrawArraysIndirect", 31, -1 },
+   { "glDrawElementsIndirect", 31, -1 },
+
+   // FINISHME: These two functions have not been implemented yet.  They come
+   // FINISHME: from the ARB_framebuffer_no_attachments extension.
+   // { "glFramebufferParameteri", 31, -1 },
+   // { "glGetFramebufferParameteriv", 31, -1 },
+
+   { "glGetProgramInterfaceiv", 31, -1 },
+   { "glGetProgramResourceIndex", 31, -1 },
+   { "glGetProgramResourceName", 31, -1 },
+   { "glGetProgramResourceiv", 31, -1 },
+   { "glGetProgramResourceLocation", 31, -1 },
+
+   // We check for the aliased EXT versions in GLES 2
+   // { "glUseProgramStages", 31, -1 },
+   // { "glActiveShaderProgram", 31, -1 },
+   // { "glCreateShaderProgramv", 31, -1 },
+   // { "glBindProgramPipeline", 31, -1 },
+   // { "glDeleteProgramPipelines", 31, -1 },
+   // { "glGenProgramPipelines", 31, -1 },
+   // { "glIsProgramPipeline", 31, -1 },
+   // { "glGetProgramPipelineiv", 31, -1 },
+   // { "glProgramUniform1i", 31, -1 },
+   // { "glProgramUniform2i", 31, -1 },
+   // { "glProgramUniform3i", 31, -1 },
+   // { "glProgramUniform4i", 31, -1 },
+   // { "glProgramUniform1f", 31, -1 },
+   // { "glProgramUniform2f", 31, -1 },
+   // { "glProgramUniform3f", 31, -1 },
+   // { "glProgramUniform4f", 31, -1 },
+   // { "glProgramUniform1iv", 31, -1 },
+   // { "glProgramUniform2iv", 31, -1 },
+   // { "glProgramUniform3iv", 31, -1 },
+   // { "glProgramUniform4iv", 31, -1 },
+   // { "glProgramUniform1fv", 31, -1 },
+   // { "glProgramUniform2fv", 31, -1 },
+   // { "glProgramUniform3fv", 31, -1 },
+   // { "glProgramUniform4fv", 31, -1 },
+   // { "glProgramUniformMatrix2fv", 31, -1 },
+   // { "glProgramUniformMatrix3fv", 31, -1 },
+   // { "glProgramUniformMatrix4fv", 31, -1 },
+   // { "glProgramUniformMatrix2x3fv", 31, -1 },
+   // { "glProgramUniformMatrix3x2fv", 31, -1 },
+   // { "glProgramUniformMatrix2x4fv", 31, -1 },
+   // { "glProgramUniformMatrix4x2fv", 31, -1 },
+   // { "glProgramUniformMatrix3x4fv", 31, -1 },
+   // { "glProgramUniformMatrix4x3fv", 31, -1 },
+   // { "glValidateProgramPipeline", 31, -1 },
+   // { "glGetProgramPipelineInfoLog", 31, -1 },
+
+   // We check for the aliased EXT versions in GLES 3
+   // { "glProgramUniform1ui", 31, -1 },
+   // { "glProgramUniform2ui", 31, -1 },
+   // { "glProgramUniform3ui", 31, -1 },
+   // { "glProgramUniform4ui", 31, -1 },
+   // { "glProgramUniform1uiv", 31, -1 },
+   // { "glProgramUniform2uiv", 31, -1 },
+   // { "glProgramUniform3uiv", 31, -1 },
+   // { "glProgramUniform4uiv", 31, -1 },
+
+   { "glBindImageTexture", 31, -1 },
+   { "glGetBooleani_v", 31, -1 },
+   { "glMemoryBarrier", 31, -1 },
+
+   { "glMemoryBarrierByRegion", 31, -1 },
+
+   { "glTexStorage2DMultisample", 31, -1 },
+   { "glGetMultisamplefv", 31, -1 },
+   { "glSampleMaski", 31, -1 },
+   { "glGetTexLevelParameteriv", 31, -1 },
+   { "glGetTexLevelParameterfv", 31, -1 },
+   { "glBindVertexBuffer", 31, -1 },
+   { "glVertexAttribFormat", 31, -1 },
+   { "glVertexAttribIFormat", 31, -1 },
+   { "glVertexAttribBinding", 31, -1 },
+   { "glVertexBindingDivisor", 31, -1 },
+
+   { NULL, 0, -1 },
+ };
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/tests/enum_strings.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/tests/enum_strings.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/tests/enum_strings.cpp	2014-09-10 05:44:12.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/tests/enum_strings.cpp	2015-09-16 14:36:10.000000000 +0000
@@ -39,13 +39,13 @@
 {
    for (unsigned i = 0; everything[i].name != NULL; i++) {
       EXPECT_STREQ(everything[i].name,
-		   _mesa_lookup_enum_by_nr(everything[i].value));
+		   _mesa_enum_to_string(everything[i].value));
    }
 }
 
 TEST(EnumStrings, LookUpUnknownNumber)
 {
-   EXPECT_STRCASEEQ("0xEEEE", _mesa_lookup_enum_by_nr(0xEEEE));
+   EXPECT_STRCASEEQ("0xEEEE", _mesa_enum_to_string(0xEEEE));
 }
 
 /* Please type the name and the value.  This makes it easier to detect
@@ -1731,6 +1731,10 @@
    { 0x8DDF, "GL_MAX_GEOMETRY_UNIFORM_COMPONENTS" },
    { 0x8DE0, "GL_MAX_GEOMETRY_OUTPUT_VERTICES" },
    { 0x8DE1, "GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS" },
+   { 0x8DE5, "GL_ACTIVE_SUBROUTINES" },
+   { 0x8DE6, "GL_ACTIVE_SUBROUTINE_UNIFORMS" },
+   { 0x8DE7, "GL_MAX_SUBROUTINES" },
+   { 0x8DE8, "GL_MAX_SUBROUTINE_UNIFORM_LOCATIONS" },
    { 0x8DF0, "GL_LOW_FLOAT" },
    { 0x8DF1, "GL_MEDIUM_FLOAT" },
    { 0x8DF2, "GL_HIGH_FLOAT" },
@@ -1759,6 +1763,11 @@
    { 0x8E44, "GL_TEXTURE_SWIZZLE_B" },
    { 0x8E45, "GL_TEXTURE_SWIZZLE_A" },
    { 0x8E46, "GL_TEXTURE_SWIZZLE_RGBA" },
+   { 0x8E47, "GL_ACTIVE_SUBROUTINE_UNIFORM_LOCATIONS" },
+   { 0x8E48, "GL_ACTIVE_SUBROUTINE_MAX_LENGTH" },
+   { 0x8E49, "GL_ACTIVE_SUBROUTINE_UNIFORM_MAX_LENGTH" },
+   { 0x8E4A, "GL_NUM_COMPATIBLE_SUBROUTINES" },
+   { 0x8E4B, "GL_COMPATIBLE_SUBROUTINES" },
    { 0x8E4C, "GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION" },
    { 0x8E4D, "GL_FIRST_VERTEX_CONVENTION" },
    { 0x8E4E, "GL_LAST_VERTEX_CONVENTION" },
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texcompress.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texcompress.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texcompress.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texcompress.c	2015-09-16 14:36:10.000000000 +0000
@@ -586,34 +586,16 @@
 compressed_fetch_func
 _mesa_get_compressed_fetch_func(mesa_format format)
 {
-   switch (format) {
-   case MESA_FORMAT_RGB_DXT1:
-   case MESA_FORMAT_RGBA_DXT1:
-   case MESA_FORMAT_RGBA_DXT3:
-   case MESA_FORMAT_RGBA_DXT5:
-   case MESA_FORMAT_SRGB_DXT1:
-   case MESA_FORMAT_SRGBA_DXT1:
-   case MESA_FORMAT_SRGBA_DXT3:
-   case MESA_FORMAT_SRGBA_DXT5:
+   switch (_mesa_get_format_layout(format)) {
+   case MESA_FORMAT_LAYOUT_S3TC:
       return _mesa_get_dxt_fetch_func(format);
-   case MESA_FORMAT_RGB_FXT1:
-   case MESA_FORMAT_RGBA_FXT1:
+   case MESA_FORMAT_LAYOUT_FXT1:
       return _mesa_get_fxt_fetch_func(format);
-   case MESA_FORMAT_R_RGTC1_UNORM:
-   case MESA_FORMAT_L_LATC1_UNORM:
-   case MESA_FORMAT_R_RGTC1_SNORM:
-   case MESA_FORMAT_L_LATC1_SNORM:
-   case MESA_FORMAT_RG_RGTC2_UNORM:
-   case MESA_FORMAT_LA_LATC2_UNORM:
-   case MESA_FORMAT_RG_RGTC2_SNORM:
-   case MESA_FORMAT_LA_LATC2_SNORM:
+   case MESA_FORMAT_LAYOUT_RGTC:
       return _mesa_get_compressed_rgtc_func(format);
-   case MESA_FORMAT_ETC1_RGB8:
+   case MESA_FORMAT_LAYOUT_ETC1:
       return _mesa_get_etc_fetch_func(format);
-   case MESA_FORMAT_BPTC_RGBA_UNORM:
-   case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM:
-   case MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT:
-   case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT:
+   case MESA_FORMAT_LAYOUT_BPTC:
       return _mesa_get_bptc_fetch_func(format);
    default:
       return NULL;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texcompress_fxt1.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texcompress_fxt1.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texcompress_fxt1.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texcompress_fxt1.c	2015-09-16 14:36:10.000000000 +0000
@@ -65,7 +65,7 @@
    if (srcFormat != GL_RGB ||
        srcType != GL_UNSIGNED_BYTE ||
        ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGB/GLubyte */
       GLubyte *tempImageSlices[1];
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texcompress_s3tc.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texcompress_s3tc.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texcompress_s3tc.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texcompress_s3tc.c	2015-09-16 14:36:10.000000000 +0000
@@ -130,7 +130,7 @@
    if (srcFormat != GL_RGB ||
        srcType != GL_UNSIGNED_BYTE ||
        ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGB/GLubyte */
       GLubyte *tempImageSlices[1];
@@ -187,7 +187,7 @@
    if (srcFormat != GL_RGBA ||
        srcType != GL_UNSIGNED_BYTE ||
        ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGBA/GLubyte */
       GLubyte *tempImageSlices[1];
@@ -244,7 +244,7 @@
    if (srcFormat != GL_RGBA ||
        srcType != GL_UNSIGNED_BYTE ||
        ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGBA/GLubyte */
       GLubyte *tempImageSlices[1];
@@ -300,7 +300,7 @@
    if (srcFormat != GL_RGBA ||
        srcType != GL_UNSIGNED_BYTE ||
        ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGBA/GLubyte */
       GLubyte *tempImageSlices[1];
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texenv.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texenv.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texenv.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texenv.c	2015-09-16 14:36:10.000000000 +0000
@@ -42,7 +42,7 @@
 
 
 #define TE_ERROR(errCode, msg, value)				\
-   _mesa_error(ctx, errCode, msg, _mesa_lookup_enum_by_nr(value));
+   _mesa_error(ctx, errCode, msg, _mesa_enum_to_string(value));
 
 
 /** Set texture env mode */
@@ -482,16 +482,16 @@
    }
    else {
       _mesa_error(ctx, GL_INVALID_ENUM, "glTexEnv(target=%s)",
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
    if (MESA_VERBOSE&(VERBOSE_API|VERBOSE_TEXTURE))
       _mesa_debug(ctx, "glTexEnv %s %s %.1f(%s) ...\n",
-                  _mesa_lookup_enum_by_nr(target),
-                  _mesa_lookup_enum_by_nr(pname),
+                  _mesa_enum_to_string(target),
+                  _mesa_enum_to_string(pname),
                   *param,
-                  _mesa_lookup_enum_by_nr((GLenum) iparam0));
+                  _mesa_enum_to_string((GLenum) iparam0));
 
    /* Tell device driver about the new texture environment */
    if (ctx->Driver.TexEnv) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texformat.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texformat.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texformat.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texformat.c	2015-09-16 14:36:10.000000000 +0000
@@ -847,7 +847,7 @@
    }
 
    _mesa_problem(ctx, "unexpected format %s in _mesa_choose_tex_format()",
-                 _mesa_lookup_enum_by_nr(internalFormat));
+                 _mesa_enum_to_string(internalFormat));
    return MESA_FORMAT_NONE;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texgen.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texgen.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texgen.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texgen.c	2015-09-16 14:36:10.000000000 +0000
@@ -76,10 +76,10 @@
 
    if (MESA_VERBOSE&(VERBOSE_API|VERBOSE_TEXTURE))
       _mesa_debug(ctx, "glTexGen %s %s %.1f(%s)...\n",
-                  _mesa_lookup_enum_by_nr(coord),
-                  _mesa_lookup_enum_by_nr(pname),
+                  _mesa_enum_to_string(coord),
+                  _mesa_enum_to_string(pname),
                   *params,
-		  _mesa_lookup_enum_by_nr((GLenum) (GLint) *params));
+		  _mesa_enum_to_string((GLenum) (GLint) *params));
 
    if (ctx->Texture.CurrentUnit >= ctx->Const.MaxTextureCoordUnits) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "glTexGen(current unit)");
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texgetimage.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texgetimage.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texgetimage.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texgetimage.c	2015-09-16 14:36:10.000000000 +0000
@@ -75,12 +75,11 @@
  */
 static void
 get_tex_depth(struct gl_context *ctx, GLuint dimensions,
+              GLint xoffset, GLint yoffset, GLint zoffset,
+              GLsizei width, GLsizei height, GLint depth,
               GLenum format, GLenum type, GLvoid *pixels,
               struct gl_texture_image *texImage)
 {
-   const GLint width = texImage->Width;
-   GLint height = texImage->Height;
-   GLint depth = texImage->Depth;
    GLint img, row;
    GLfloat *depthRow = malloc(width * sizeof(GLfloat));
 
@@ -94,14 +93,15 @@
       height = 1;
    }
 
+   assert(zoffset + depth <= texImage->Depth);
    for (img = 0; img < depth; img++) {
       GLubyte *srcMap;
       GLint srcRowStride;
 
       /* map src texture buffer */
-      ctx->Driver.MapTextureImage(ctx, texImage, img,
-                                  0, 0, width, height, GL_MAP_READ_BIT,
-                                  &srcMap, &srcRowStride);
+      ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img,
+                                  xoffset, yoffset, width, height,
+                                  GL_MAP_READ_BIT, &srcMap, &srcRowStride);
 
       if (srcMap) {
          for (row = 0; row < height; row++) {
@@ -113,7 +113,7 @@
             _mesa_pack_depth_span(ctx, width, dest, type, depthRow, &ctx->Pack);
          }
 
-         ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+         ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
       }
       else {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -130,26 +130,26 @@
  */
 static void
 get_tex_depth_stencil(struct gl_context *ctx, GLuint dimensions,
+                      GLint xoffset, GLint yoffset, GLint zoffset,
+                      GLsizei width, GLsizei height, GLint depth,
                       GLenum format, GLenum type, GLvoid *pixels,
                       struct gl_texture_image *texImage)
 {
-   const GLint width = texImage->Width;
-   const GLint height = texImage->Height;
-   const GLint depth = texImage->Depth;
    GLint img, row;
 
    assert(format == GL_DEPTH_STENCIL);
    assert(type == GL_UNSIGNED_INT_24_8 ||
           type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV);
 
+   assert(zoffset + depth <= texImage->Depth);
    for (img = 0; img < depth; img++) {
       GLubyte *srcMap;
       GLint rowstride;
 
       /* map src texture buffer */
-      ctx->Driver.MapTextureImage(ctx, texImage, img,
-                                  0, 0, width, height, GL_MAP_READ_BIT,
-                                  &srcMap, &rowstride);
+      ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img,
+                                  xoffset, yoffset, width, height,
+                                  GL_MAP_READ_BIT, &srcMap, &rowstride);
 
       if (srcMap) {
          for (row = 0; row < height; row++) {
@@ -166,7 +166,7 @@
             }
          }
 
-         ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+         ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
       }
       else {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -180,12 +180,11 @@
  */
 static void
 get_tex_stencil(struct gl_context *ctx, GLuint dimensions,
+                GLint xoffset, GLint yoffset, GLint zoffset,
+                GLsizei width, GLsizei height, GLint depth,
                 GLenum format, GLenum type, GLvoid *pixels,
                 struct gl_texture_image *texImage)
 {
-   const GLint width = texImage->Width;
-   const GLint height = texImage->Height;
-   const GLint depth = texImage->Depth;
    GLint img, row;
 
    assert(format == GL_STENCIL_INDEX);
@@ -195,8 +194,9 @@
       GLint rowstride;
 
       /* map src texture buffer */
-      ctx->Driver.MapTextureImage(ctx, texImage, img,
-                                  0, 0, width, height, GL_MAP_READ_BIT,
+      ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img,
+                                  xoffset, yoffset, width, height,
+                                  GL_MAP_READ_BIT,
                                   &srcMap, &rowstride);
 
       if (srcMap) {
@@ -211,7 +211,7 @@
                                            dest);
          }
 
-         ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+         ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
       }
       else {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -226,22 +226,22 @@
  */
 static void
 get_tex_ycbcr(struct gl_context *ctx, GLuint dimensions,
+              GLint xoffset, GLint yoffset, GLint zoffset,
+              GLsizei width, GLsizei height, GLint depth,
               GLenum format, GLenum type, GLvoid *pixels,
               struct gl_texture_image *texImage)
 {
-   const GLint width = texImage->Width;
-   const GLint height = texImage->Height;
-   const GLint depth = texImage->Depth;
    GLint img, row;
 
+   assert(zoffset + depth <= texImage->Depth);
    for (img = 0; img < depth; img++) {
       GLubyte *srcMap;
       GLint rowstride;
 
       /* map src texture buffer */
-      ctx->Driver.MapTextureImage(ctx, texImage, img,
-                                  0, 0, width, height, GL_MAP_READ_BIT,
-                                  &srcMap, &rowstride);
+      ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img,
+                                  xoffset, yoffset, width, height,
+                                  GL_MAP_READ_BIT, &srcMap, &rowstride);
 
       if (srcMap) {
          for (row = 0; row < height; row++) {
@@ -264,7 +264,7 @@
             }
          }
 
-         ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+         ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
       }
       else {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -279,6 +279,8 @@
  */
 static void
 get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions,
+                        GLint xoffset, GLint yoffset, GLint zoffset,
+                        GLsizei width, GLsizei height, GLint depth,
                         GLenum format, GLenum type, GLvoid *pixels,
                         struct gl_texture_image *texImage,
                         GLbitfield transferOps)
@@ -287,9 +289,6 @@
    const mesa_format texFormat =
       _mesa_get_srgb_format_linear(texImage->TexFormat);
    const GLenum baseFormat = _mesa_get_format_base_format(texFormat);
-   const GLuint width = texImage->Width;
-   const GLuint height = texImage->Height;
-   const GLuint depth = texImage->Depth;
    GLfloat *tempImage, *tempSlice;
    GLuint slice;
    int srcStride, dstStride;
@@ -312,15 +311,15 @@
 
       tempSlice = tempImage + slice * 4 * width * height;
 
-      ctx->Driver.MapTextureImage(ctx, texImage, slice,
-                                  0, 0, width, height,
+      ctx->Driver.MapTextureImage(ctx, texImage, zoffset + slice,
+                                  xoffset, yoffset, width, height,
                                   GL_MAP_READ_BIT,
                                   &srcMap, &srcRowStride);
       if (srcMap) {
          _mesa_decompress_image(texFormat, width, height,
                                 srcMap, srcRowStride, tempSlice);
 
-         ctx->Driver.UnmapTextureImage(ctx, texImage, slice);
+         ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + slice);
       }
       else {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -362,6 +361,13 @@
                            tempSlice, RGBA32_FLOAT, srcStride,
                            width, height,
                            needsRebase ? rebaseSwizzle : NULL);
+
+      /* Handle byte swapping if required */
+      if (ctx->Pack.SwapBytes) {
+         _mesa_swap_bytes_2d_image(format, type, &ctx->Pack,
+                                   width, height, dest, dest);
+      }
+
       tempSlice += 4 * width * height;
    }
 
@@ -409,6 +415,8 @@
  */
 static void
 get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
+                          GLint xoffset, GLint yoffset, GLint zoffset,
+                          GLsizei width, GLsizei height, GLint depth,
                           GLenum format, GLenum type, GLvoid *pixels,
                           struct gl_texture_image *texImage,
                           GLbitfield transferOps)
@@ -416,9 +424,6 @@
    /* don't want to apply sRGB -> RGB conversion here so override the format */
    const mesa_format texFormat =
       _mesa_get_srgb_format_linear(texImage->TexFormat);
-   const GLuint width = texImage->Width;
-   GLuint height = texImage->Height;
-   GLuint depth = texImage->Depth;
    GLuint img;
    GLboolean dst_is_integer;
    uint32_t dst_format;
@@ -430,6 +435,8 @@
    if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY) {
       depth = height;
       height = 1;
+      zoffset = yoffset;
+      yoffset = 0;
    }
 
    /* Depending on the base format involved we may need to apply a rebase
@@ -449,7 +456,8 @@
       rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
       rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
       rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W;
-    } else if (texImage->_BaseFormat != _mesa_get_format_base_format(texFormat)) {
+    } else if (texImage->_BaseFormat !=
+               _mesa_get_format_base_format(texFormat)) {
       needsRebase =
          _mesa_compute_rgba2base2rgba_component_mapping(texImage->_BaseFormat,
                                                         rebaseSwizzle);
@@ -480,8 +488,9 @@
       uint32_t src_format;
 
       /* map src texture buffer */
-      ctx->Driver.MapTextureImage(ctx, texImage, img,
-                                  0, 0, width, height, GL_MAP_READ_BIT,
+      ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img,
+                                  xoffset, yoffset, width, height,
+                                  GL_MAP_READ_BIT,
                                   &srcMap, &rowstride);
       if (!srcMap) {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -530,8 +539,8 @@
          /* If we had to rebase, we have already handled that */
          needsRebase = false;
 
-         /* If we were lucky and our RGBA conversion matches the dst format, then
-          * we are done.
+         /* If we were lucky and our RGBA conversion matches the dst format,
+          * then we are done.
           */
          if (!need_convert)
             goto do_swap;
@@ -555,20 +564,12 @@
 
    do_swap:
       /* Handle byte swapping if required */
-      if (ctx->Pack.SwapBytes) {
-         GLint swapSize = _mesa_sizeof_packed_type(type);
-         if (swapSize == 2 || swapSize == 4) {
-            int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
-            assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
-            if (swapSize == 2)
-               _mesa_swap2((GLushort *) dest, width * height * swapsPerPixel);
-            else if (swapSize == 4)
-               _mesa_swap4((GLuint *) dest, width * height * swapsPerPixel);
-         }
-      }
+      if (ctx->Pack.SwapBytes)
+         _mesa_swap_bytes_2d_image(format, type, &ctx->Pack,
+                                   width, height, dest, dest);
 
       /* Unmap the src texture buffer */
-      ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+      ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
    }
 
 done:
@@ -583,6 +584,8 @@
  */
 static void
 get_tex_rgba(struct gl_context *ctx, GLuint dimensions,
+             GLint xoffset, GLint yoffset, GLint zoffset,
+             GLsizei width, GLsizei height, GLint depth,
              GLenum format, GLenum type, GLvoid *pixels,
              struct gl_texture_image *texImage)
 {
@@ -604,11 +607,17 @@
    }
 
    if (_mesa_is_format_compressed(texImage->TexFormat)) {
-      get_tex_rgba_compressed(ctx, dimensions, format, type,
+      get_tex_rgba_compressed(ctx, dimensions,
+                              xoffset, yoffset, zoffset,
+                              width, height, depth,
+                              format, type,
                               pixels, texImage, transferOps);
    }
    else {
-      get_tex_rgba_uncompressed(ctx, dimensions, format, type,
+      get_tex_rgba_uncompressed(ctx, dimensions,
+                                xoffset, yoffset, zoffset,
+                                width, height, depth,
+                                format, type,
                                 pixels, texImage, transferOps);
    }
 }
@@ -619,8 +628,10 @@
  * \return GL_TRUE if done, GL_FALSE otherwise
  */
 static GLboolean
-get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type,
-               GLvoid *pixels,
+get_tex_memcpy(struct gl_context *ctx,
+               GLint xoffset, GLint yoffset, GLint zoffset,
+               GLsizei width, GLsizei height, GLint depth,
+               GLenum format, GLenum type, GLvoid *pixels,
                struct gl_texture_image *texImage)
 {
    const GLenum target = texImage->TexObject->Target;
@@ -642,20 +653,25 @@
                                                      ctx->Pack.SwapBytes);
    }
 
+   if (depth > 1) {
+      /* only a single slice is supported at this time */
+      memCopy = FALSE;
+   }
+
    if (memCopy) {
       const GLuint bpp = _mesa_get_format_bytes(texImage->TexFormat);
-      const GLint bytesPerRow = texImage->Width * bpp;
+      const GLint bytesPerRow = width * bpp;
       GLubyte *dst =
-         _mesa_image_address2d(&ctx->Pack, pixels, texImage->Width,
-                               texImage->Height, format, type, 0, 0);
+         _mesa_image_address2d(&ctx->Pack, pixels, width, height,
+                               format, type, 0, 0);
       const GLint dstRowStride =
-         _mesa_image_row_stride(&ctx->Pack, texImage->Width, format, type);
+         _mesa_image_row_stride(&ctx->Pack, width, format, type);
       GLubyte *src;
       GLint srcRowStride;
 
       /* map src texture buffer */
-      ctx->Driver.MapTextureImage(ctx, texImage, 0,
-                                  0, 0, texImage->Width, texImage->Height,
+      ctx->Driver.MapTextureImage(ctx, texImage, zoffset,
+                                  xoffset, yoffset, width, height,
                                   GL_MAP_READ_BIT, &src, &srcRowStride);
 
       if (src) {
@@ -664,7 +680,7 @@
          }
          else {
             GLuint row;
-            for (row = 0; row < texImage->Height; row++) {
+            for (row = 0; row < height; row++) {
                memcpy(dst, src, bytesPerRow);
                dst += dstRowStride;
                src += srcRowStride;
@@ -672,7 +688,7 @@
          }
 
          /* unmap src texture buffer */
-         ctx->Driver.UnmapTextureImage(ctx, texImage, 0);
+         ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset);
       }
       else {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -684,15 +700,17 @@
 
 
 /**
- * This is the software fallback for Driver.GetTexImage().
+ * This is the software fallback for Driver.GetTexSubImage().
  * All error checking will have been done before this routine is called.
  * We'll call ctx->Driver.MapTextureImage() to access the data, then
  * unmap with ctx->Driver.UnmapTextureImage().
  */
 void
-_mesa_GetTexImage_sw(struct gl_context *ctx,
-                     GLenum format, GLenum type, GLvoid *pixels,
-                     struct gl_texture_image *texImage)
+_mesa_GetTexSubImage_sw(struct gl_context *ctx,
+                        GLint xoffset, GLint yoffset, GLint zoffset,
+                        GLsizei width, GLsizei height, GLint depth,
+                        GLenum format, GLenum type, GLvoid *pixels,
+                        struct gl_texture_image *texImage)
 {
    const GLuint dimensions =
       _mesa_get_texture_dimensions(texImage->TexObject->Target);
@@ -720,23 +738,30 @@
       pixels = ADD_POINTERS(buf, pixels);
    }
 
-   if (get_tex_memcpy(ctx, format, type, pixels, texImage)) {
+   if (get_tex_memcpy(ctx, xoffset, yoffset, zoffset, width, height, depth,
+                      format, type, pixels, texImage)) {
       /* all done */
    }
    else if (format == GL_DEPTH_COMPONENT) {
-      get_tex_depth(ctx, dimensions, format, type, pixels, texImage);
+      get_tex_depth(ctx, dimensions, xoffset, yoffset, zoffset,
+                    width, height, depth, format, type, pixels, texImage);
    }
    else if (format == GL_DEPTH_STENCIL_EXT) {
-      get_tex_depth_stencil(ctx, dimensions, format, type, pixels, texImage);
+      get_tex_depth_stencil(ctx, dimensions, xoffset, yoffset, zoffset,
+                            width, height, depth, format, type, pixels,
+                            texImage);
    }
    else if (format == GL_STENCIL_INDEX) {
-      get_tex_stencil(ctx, dimensions, format, type, pixels, texImage);
+      get_tex_stencil(ctx, dimensions, xoffset, yoffset, zoffset,
+                      width, height, depth, format, type, pixels, texImage);
    }
    else if (format == GL_YCBCR_MESA) {
-      get_tex_ycbcr(ctx, dimensions, format, type, pixels, texImage);
+      get_tex_ycbcr(ctx, dimensions, xoffset, yoffset, zoffset,
+                    width, height, depth, format, type, pixels, texImage);
    }
    else {
-      get_tex_rgba(ctx, dimensions, format, type, pixels, texImage);
+      get_tex_rgba(ctx, dimensions, xoffset, yoffset, zoffset,
+                   width, height, depth, format, type, pixels, texImage);
    }
 
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
@@ -747,13 +772,16 @@
 
 
 /**
- * This is the software fallback for Driver.GetCompressedTexImage().
+ * This is the software fallback for Driver.GetCompressedTexSubImage().
  * All error checking will have been done before this routine is called.
  */
 void
-_mesa_GetCompressedTexImage_sw(struct gl_context *ctx,
-                               struct gl_texture_image *texImage,
-                               GLvoid *img)
+_mesa_GetCompressedTexSubImage_sw(struct gl_context *ctx,
+                                  struct gl_texture_image *texImage,
+                                  GLint xoffset, GLint yoffset,
+                                  GLint zoffset, GLsizei width,
+                                  GLint height, GLint depth,
+                                  GLvoid *img)
 {
    const GLuint dimensions =
       _mesa_get_texture_dimensions(texImage->TexObject->Target);
@@ -762,10 +790,8 @@
    GLubyte *dest;
 
    _mesa_compute_compressed_pixelstore(dimensions, texImage->TexFormat,
-                                       texImage->Width, texImage->Height,
-                                       texImage->Depth,
-                                       &ctx->Pack,
-                                       &store);
+                                       width, height, depth,
+                                       &ctx->Pack, &store);
 
    if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
       /* pack texture image into a PBO */
@@ -791,8 +817,8 @@
       GLubyte *src;
 
       /* map src texture buffer */
-      ctx->Driver.MapTextureImage(ctx, texImage, slice,
-                                  0, 0, texImage->Width, texImage->Height,
+      ctx->Driver.MapTextureImage(ctx, texImage, zoffset + slice,
+                                  xoffset, yoffset, width, height,
                                   GL_MAP_READ_BIT, &src, &srcRowStride);
 
       if (src) {
@@ -803,10 +829,11 @@
             src += srcRowStride;
          }
 
-         ctx->Driver.UnmapTextureImage(ctx, texImage, slice);
+         ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + slice);
 
          /* Advance to next slice */
-         dest += store.TotalBytesPerRow * (store.TotalRowsPerSlice - store.CopyRowsPerSlice);
+         dest += store.TotalBytesPerRow * (store.TotalRowsPerSlice -
+                                           store.CopyRowsPerSlice);
 
       } else {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetCompresssedTexImage");
@@ -863,29 +890,299 @@
 
 
 /**
- * Do error checking for a glGetTex(ture)Image() call.
- * \return GL_TRUE if any error, GL_FALSE if no errors.
+ * Wrapper for _mesa_select_tex_image() which can handle target being
+ * GL_TEXTURE_CUBE_MAP_ARB in which case we use zoffset to select a cube face.
+ * This can happen for glGetTextureImage and glGetTextureSubImage (DSA
+ * functions).
  */
-static GLboolean
+static struct gl_texture_image *
+select_tex_image(const struct gl_texture_object *texObj, GLenum target,
+                 GLint level, GLint zoffset)
+{
+   assert(level >= 0);
+   assert(level < MAX_TEXTURE_LEVELS);
+   if (target == GL_TEXTURE_CUBE_MAP) {
+      assert(zoffset >= 0);
+      assert(zoffset < 6);
+      target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + zoffset;
+   }
+   return _mesa_select_tex_image(texObj, target, level);
+}
+
+
+/**
+ * Error-check the offset and size arguments to
+ * glGet[Compressed]TextureSubImage().  Also checks if the specified
+ * texture image is missing.
+ * \return true if error, false if no error.
+ */
+static bool
+dimensions_error_check(struct gl_context *ctx,
+                       struct gl_texture_object *texObj,
+                       GLenum target, GLint level,
+                       GLint xoffset, GLint yoffset, GLint zoffset,
+                       GLsizei width, GLsizei height, GLsizei depth,
+                       const char *caller)
+{
+   const struct gl_texture_image *texImage;
+   int i;
+
+   if (xoffset < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset = %d)", caller, xoffset);
+      return true;
+   }
+
+   if (yoffset < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s(yoffset = %d)", caller, yoffset);
+      return true;
+   }
+
+   if (zoffset < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s(zoffset = %d)", caller, zoffset);
+      return true;
+   }
+
+   if (width < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s(width = %d)", caller, width);
+      return true;
+   }
+
+   if (height < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s(height = %d)", caller, height);
+      return true;
+   }
+
+   if (depth < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s(depth = %d)", caller, depth);
+      return true;
+   }
+
+   /* do special per-target checks */
+   switch (target) {
+   case GL_TEXTURE_1D:
+      if (yoffset != 0) {
+         _mesa_error(ctx, GL_INVALID_VALUE,
+                     "%s(1D, yoffset = %d)", caller, yoffset);
+         return true;
+      }
+      if (height > 1) {
+         _mesa_error(ctx, GL_INVALID_VALUE,
+                     "%s(1D, height = %d)", caller, height);
+         return true;
+      }
+      /* fall-through */
+   case GL_TEXTURE_1D_ARRAY:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_RECTANGLE:
+      if (zoffset != 0) {
+         _mesa_error(ctx, GL_INVALID_VALUE,
+                     "%s(zoffset = %d)", caller, zoffset);
+         return true;
+      }
+      if (depth > 1) {
+         _mesa_error(ctx, GL_INVALID_VALUE,
+                     "%s(depth = %d)", caller, depth);
+         return true;
+      }
+      break;
+   case GL_TEXTURE_CUBE_MAP:
+      /* Non-array cube maps are special because we have a gl_texture_image
+       * per face.
+       */
+      if (zoffset + depth > 6) {
+         _mesa_error(ctx, GL_INVALID_VALUE,
+                     "%s(zoffset + depth = %d)", caller, zoffset + depth);
+         return true;
+      }
+      /* check that the range of faces exist */
+      for (i = 0; i < depth; i++) {
+         GLenum face = GL_TEXTURE_CUBE_MAP_POSITIVE_X + zoffset + i;
+         if (!_mesa_select_tex_image(texObj, face, level)) {
+            /* non-existant face */
+            _mesa_error(ctx, GL_INVALID_OPERATION,
+                        "%s(missing cube face)", caller);
+            return true;
+         }
+      }
+      break;
+   default:
+      ; /* nothing */
+   }
+
+   texImage = select_tex_image(texObj, target, level, zoffset);
+   if (!texImage) {
+      /* missing texture image */
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(missing image)", caller);
+      return true;
+   }
+
+   if (xoffset + width > texImage->Width) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "%s(xoffset %d + width %d > %u)",
+                  caller, xoffset, width, texImage->Width);
+      return true;
+   }
+
+   if (yoffset + height > texImage->Height) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "%s(yoffset %d + height %d > %u)",
+                  caller, yoffset, height, texImage->Height);
+      return true;
+   }
+
+   if (target != GL_TEXTURE_CUBE_MAP) {
+      /* Cube map error checking was done above */
+      if (zoffset + depth > texImage->Depth) {
+         _mesa_error(ctx, GL_INVALID_VALUE,
+                     "%s(zoffset %d + depth %d > %u)",
+                     caller, zoffset, depth, texImage->Depth);
+         return true;
+      }
+   }
+
+   /* Extra checks for compressed textures */
+   {
+      GLuint bw, bh;
+      _mesa_get_format_block_size(texImage->TexFormat, &bw, &bh);
+      if (bw > 1 || bh > 1) {
+         /* offset must be multiple of block size */
+         if (xoffset % bw != 0) {
+            _mesa_error(ctx, GL_INVALID_VALUE,
+                        "%s(xoffset = %d)", caller, xoffset);
+            return true;
+         }
+         if (target != GL_TEXTURE_1D && target != GL_TEXTURE_1D_ARRAY) {
+            if (yoffset % bh != 0) {
+               _mesa_error(ctx, GL_INVALID_VALUE,
+                           "%s(yoffset = %d)", caller, yoffset);
+               return true;
+            }
+         }
+
+         /* The size must be a multiple of bw x bh, or we must be using a
+          * offset+size that exactly hits the edge of the image.
+          */
+         if ((width % bw != 0) &&
+             (xoffset + width != (GLint) texImage->Width)) {
+            _mesa_error(ctx, GL_INVALID_VALUE,
+                        "%s(width = %d)", caller, width);
+            return true;
+         }
+
+         if ((height % bh != 0) &&
+             (yoffset + height != (GLint) texImage->Height)) {
+            _mesa_error(ctx, GL_INVALID_VALUE,
+                        "%s(height = %d)", caller, height);
+            return true;
+         }
+      }
+   }
+
+   if (width == 0 || height == 0 || depth == 0) {
+      /* Not an error, but nothing to do.  Return 'true' so that the
+       * caller simply returns.
+       */
+      return true;
+   }
+
+   return false;
+}
+
+
+/**
+ * Do PBO-related error checking for getting uncompressed images.
+ * \return true if there was an error (or the GetTexImage is to be a no-op)
+ */
+static bool
+pbo_error_check(struct gl_context *ctx, GLenum target,
+                GLsizei width, GLsizei height, GLsizei depth,
+                GLenum format, GLenum type, GLsizei clientMemSize,
+                GLvoid *pixels,
+                const char *caller)
+{
+   const GLuint dimensions = (target == GL_TEXTURE_3D) ? 3 : 2;
+
+   if (!_mesa_validate_pbo_access(dimensions, &ctx->Pack, width, height, depth,
+                                  format, type, clientMemSize, pixels)) {
+      if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "%s(out of bounds PBO access)", caller);
+      } else {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "%s(out of bounds access: bufSize (%d) is too small)",
+                     caller, clientMemSize);
+      }
+      return true;
+   }
+
+   if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
+      /* PBO should not be mapped */
+      if (_mesa_check_disallowed_mapping(ctx->Pack.BufferObj)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "%s(PBO is mapped)", caller);
+         return true;
+      }
+   }
+
+   if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !pixels) {
+      /* not an error, do nothing */
+      return true;
+   }
+
+   return false;
+}
+
+
+/**
+ * Do error checking for all (non-compressed) get-texture-image functions.
+ * \return true if any error, false if no errors.
+ */
+static bool
 getteximage_error_check(struct gl_context *ctx,
-                        struct gl_texture_image *texImage,
+                        struct gl_texture_object *texObj,
                         GLenum target, GLint level,
-                        GLenum format, GLenum type, GLsizei clientMemSize,
-                        GLvoid *pixels, bool dsa)
+                        GLint xoffset, GLint yoffset, GLint zoffset,
+                        GLsizei width, GLsizei height, GLsizei depth,
+                        GLenum format, GLenum type, GLsizei bufSize,
+                        GLvoid *pixels, const char *caller)
 {
-   const GLint maxLevels = _mesa_max_texture_levels(ctx, target);
-   const GLuint dimensions = (target == GL_TEXTURE_3D) ? 3 : 2;
-   GLenum baseFormat;
-   const char *suffix = dsa ? "ture" : "";
+   struct gl_texture_image *texImage;
+   GLenum baseFormat, err;
+   GLint maxLevels;
 
-   assert(texImage);
-   assert(maxLevels != 0);
+   assert(texObj);
+
+   if (texObj->Target == 0) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller);
+      return true;
+   }
+
+   maxLevels = _mesa_max_texture_levels(ctx, target);
    if (level < 0 || level >= maxLevels) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glGetTex%sImage(level out of range)", suffix);
-      return GL_TRUE;
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s(level = %d)", caller, level);
+      return true;
+   }
+
+   err = _mesa_error_check_format_and_type(ctx, format, type);
+   if (err != GL_NO_ERROR) {
+      _mesa_error(ctx, err, "%s(format/type)", caller);
+      return true;
+   }
+
+   if (dimensions_error_check(ctx, texObj, target, level,
+                              xoffset, yoffset, zoffset,
+                              width, height, depth, caller)) {
+      return true;
    }
 
+   if (pbo_error_check(ctx, target, width, height, depth,
+                       format, type, bufSize, pixels, caller)) {
+      return true;
+   }
+
+   texImage = select_tex_image(texObj, target, level, zoffset);
+   assert(texImage);
+
    /*
     * Format and type checking has been moved up to GetnTexImage and
     * GetTextureImage so that it happens before getting the texImage object.
@@ -899,494 +1196,586 @@
    if (_mesa_is_color_format(format)
        && !_mesa_is_color_format(baseFormat)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glGetTex%sImage(format mismatch)", suffix);
-      return GL_TRUE;
+                  "%s(format mismatch)", caller);
+      return true;
    }
    else if (_mesa_is_depth_format(format)
             && !_mesa_is_depth_format(baseFormat)
             && !_mesa_is_depthstencil_format(baseFormat)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glGetTex%sImage(format mismatch)", suffix);
-      return GL_TRUE;
+                  "%s(format mismatch)", caller);
+      return true;
    }
    else if (_mesa_is_stencil_format(format)
             && !ctx->Extensions.ARB_texture_stencil8) {
       _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glGetTex%sImage(format=GL_STENCIL_INDEX)", suffix);
-      return GL_TRUE;
+                  "%s(format=GL_STENCIL_INDEX)", caller);
+      return true;
+   }
+   else if (_mesa_is_stencil_format(format)
+	    && !_mesa_is_depthstencil_format(baseFormat)
+	    && !_mesa_is_stencil_format(baseFormat)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(format mismatch)", caller);
+      return true;
    }
    else if (_mesa_is_ycbcr_format(format)
             && !_mesa_is_ycbcr_format(baseFormat)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glGetTex%sImage(format mismatch)", suffix);
-      return GL_TRUE;
+                  "%s(format mismatch)", caller);
+      return true;
    }
    else if (_mesa_is_depthstencil_format(format)
             && !_mesa_is_depthstencil_format(baseFormat)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glGetTex%sImage(format mismatch)", suffix);
-      return GL_TRUE;
+                  "%s(format mismatch)", caller);
+      return true;
    }
-   else if (!_mesa_is_stencil_format(format) && _mesa_is_enum_format_integer(format) !=
+   else if (!_mesa_is_stencil_format(format) &&
+            _mesa_is_enum_format_integer(format) !=
             _mesa_is_format_integer(texImage->TexFormat)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glGetTex%sImage(format mismatch)", suffix);
-      return GL_TRUE;
+                  "%s(format mismatch)", caller);
+      return true;
    }
 
-   if (!_mesa_validate_pbo_access(dimensions, &ctx->Pack, texImage->Width,
-                                  texImage->Height, texImage->Depth,
-                                  format, type, clientMemSize, pixels)) {
-      if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glGetTex%sImage(out of bounds PBO access)", suffix);
-      } else {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "%s(out of bounds access:"
-                     " bufSize (%d) is too small)",
-                     dsa ? "glGetTextureImage" : "glGetnTexImageARB",
-                     clientMemSize);
-      }
-      return GL_TRUE;
+   return false;
+}
+
+
+/**
+ * Return the width, height and depth of a texture image.
+ * This function must be resilient to bad parameter values since
+ * this is called before full error checking.
+ */
+static void
+get_texture_image_dims(const struct gl_texture_object *texObj,
+                       GLenum target, GLint level,
+                       GLsizei *width, GLsizei *height, GLsizei *depth)
+{
+   const struct gl_texture_image *texImage = NULL;
+
+   if (level >= 0 && level < MAX_TEXTURE_LEVELS) {
+      texImage = _mesa_select_tex_image(texObj, target, level);
    }
 
-   if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-      /* PBO should not be mapped */
-      if (_mesa_check_disallowed_mapping(ctx->Pack.BufferObj)) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glGetTex%sImage(PBO is mapped)", suffix);
-         return GL_TRUE;
+   if (texImage) {
+      *width = texImage->Width;
+      *height = texImage->Height;
+      if (target == GL_TEXTURE_CUBE_MAP) {
+         *depth = 6;
+      }
+      else {
+         *depth = texImage->Depth;
       }
    }
-
-   return GL_FALSE;
+   else {
+      *width = *height = *depth = 0;
+   }
 }
 
 
 /**
- * This is the implementation for glGetnTexImageARB, glGetTextureImage,
- * and glGetTexImage.
- *
- * Requires caller to pass in texImage object because _mesa_GetTextureImage
- * must handle the GL_TEXTURE_CUBE_MAP target.
- *
- * \param target texture target.
+ * Common code for all (uncompressed) get-texture-image functions.
+ * \param texObj  the texture object (should not be null)
+ * \param target  user-provided target, or 0 for DSA
  * \param level image level.
  * \param format pixel data format for returned image.
  * \param type pixel data type for returned image.
  * \param bufSize size of the pixels data buffer.
  * \param pixels returned pixel data.
- * \param dsa True when the caller is an ARB_direct_state_access function,
- *            false otherwise
+ * \param caller  name of calling function
  */
-void
-_mesa_get_texture_image(struct gl_context *ctx,
-                        struct gl_texture_object *texObj,
-                        struct gl_texture_image *texImage, GLenum target,
-                        GLint level, GLenum format, GLenum type,
-                        GLsizei bufSize, GLvoid *pixels, bool dsa)
+static void
+get_texture_image(struct gl_context *ctx,
+                  struct gl_texture_object *texObj,
+                  GLenum target, GLint level,
+                  GLint xoffset, GLint yoffset, GLint zoffset,
+                  GLsizei width, GLsizei height, GLint depth,
+                  GLenum format, GLenum type,
+                  GLvoid *pixels, const char *caller)
 {
-   assert(texObj);
-   assert(texImage);
+   struct gl_texture_image *texImage;
+   unsigned firstFace, numFaces, i;
+   GLint imageStride;
 
    FLUSH_VERTICES(ctx, 0);
 
-   /*
-    * Legal target checking has been moved up to GetnTexImage and
-    * GetTextureImage so that it can be caught before receiving a NULL
-    * texImage object and exiting.
-    */
-
-   if (getteximage_error_check(ctx, texImage, target, level, format,
-                               type, bufSize, pixels, dsa)) {
-      return;
-   }
+   texImage = select_tex_image(texObj, target, level, zoffset);
+   assert(texImage);  /* should have been error checked already */
 
-   if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !pixels) {
-      /* not an error, do nothing */
+   if (_mesa_is_zero_size_texture(texImage)) {
+      /* no image data to return */
       return;
    }
 
-   if (_mesa_is_zero_size_texture(texImage))
-      return;
-
    if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE)) {
-      _mesa_debug(ctx, "glGetTex%sImage(tex %u) format = %s, w=%d, h=%d,"
+      _mesa_debug(ctx, "%s(tex %u) format = %s, w=%d, h=%d,"
                   " dstFmt=0x%x, dstType=0x%x\n",
-                  dsa ? "ture": "",
-                  texObj->Name,
+                  caller, texObj->Name,
                   _mesa_get_format_name(texImage->TexFormat),
                   texImage->Width, texImage->Height,
                   format, type);
    }
 
+   if (target == GL_TEXTURE_CUBE_MAP) {
+      /* Compute stride between cube faces */
+      imageStride = _mesa_image_image_stride(&ctx->Pack, width, height,
+                                             format, type);
+      firstFace = zoffset;
+      numFaces = depth;
+      zoffset = 0;
+      depth = 1;
+   }
+   else {
+      imageStride = 0;
+      firstFace = _mesa_tex_target_to_face(target);
+      numFaces = 1;
+   }
+
    _mesa_lock_texture(ctx, texObj);
-   {
-      ctx->Driver.GetTexImage(ctx, format, type, pixels, texImage);
+
+   for (i = 0; i < numFaces; i++) {
+      texImage = texObj->Image[firstFace + i][level];
+      assert(texImage);
+
+      ctx->Driver.GetTexSubImage(ctx, xoffset, yoffset, zoffset,
+                                 width, height, depth,
+                                 format, type, pixels, texImage);
+
+      /* next cube face */
+      pixels = (GLubyte *) pixels + imageStride;
    }
+
    _mesa_unlock_texture(ctx, texObj);
 }
 
-/**
- * Get texture image.  Called by glGetTexImage.
- *
- * \param target texture target.
- * \param level image level.
- * \param format pixel data format for returned image.
- * \param type pixel data type for returned image.
- * \param bufSize size of the pixels data buffer.
- * \param pixels returned pixel data.
- */
+
 void GLAPIENTRY
-_mesa_GetnTexImageARB(GLenum target, GLint level, GLenum format,
-                      GLenum type, GLsizei bufSize, GLvoid *pixels)
+_mesa_GetnTexImageARB(GLenum target, GLint level, GLenum format, GLenum type,
+                      GLsizei bufSize, GLvoid *pixels)
 {
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
-   GLenum err;
    GET_CURRENT_CONTEXT(ctx);
+   static const char *caller = "glGetnTexImageARB";
+   GLsizei width, height, depth;
+   struct gl_texture_object *texObj;
 
-   /*
-    * This has been moved here because a format/type mismatch can cause a NULL
-    * texImage object, which in turn causes the mismatch error to be
-    * ignored.
-    */
-   err = _mesa_error_check_format_and_type(ctx, format, type);
-   if (err != GL_NO_ERROR) {
-      _mesa_error(ctx, err, "glGetnTexImage(format/type)");
-      return;
-   }
-
-   /*
-    * Legal target checking has been moved here to prevent exiting with a NULL
-    * texImage object.
-    */
    if (!legal_getteximage_target(ctx, target, false)) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetnTexImage(target=0x%x)",
-                  target);
+      _mesa_error(ctx, GL_INVALID_ENUM, "%s", caller);
       return;
    }
 
    texObj = _mesa_get_current_tex_object(ctx, target);
-   if (!texObj)
-      return;
+   assert(texObj);
+
+   get_texture_image_dims(texObj, target, level, &width, &height, &depth);
 
-   texImage = _mesa_select_tex_image(texObj, target, level);
-   if (!texImage)
+   if (getteximage_error_check(ctx, texObj, target, level,
+                               0, 0, 0, width, height, depth,
+                               format, type, bufSize, pixels, caller)) {
       return;
+   }
 
-   _mesa_get_texture_image(ctx, texObj, texImage, target, level, format, type,
-                           bufSize, pixels, false);
+   get_texture_image(ctx, texObj, target, level,
+                     0, 0, 0, width, height, depth,
+                     format, type, pixels, caller);
 }
 
 
 void GLAPIENTRY
-_mesa_GetTexImage( GLenum target, GLint level, GLenum format,
-                   GLenum type, GLvoid *pixels )
+_mesa_GetTexImage(GLenum target, GLint level, GLenum format, GLenum type,
+                  GLvoid *pixels )
 {
-   _mesa_GetnTexImageARB(target, level, format, type, INT_MAX, pixels);
+   GET_CURRENT_CONTEXT(ctx);
+   static const char *caller = "glGetTexImage";
+   GLsizei width, height, depth;
+   struct gl_texture_object *texObj;
+
+   if (!legal_getteximage_target(ctx, target, false)) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "%s", caller);
+      return;
+   }
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   assert(texObj);
+
+   get_texture_image_dims(texObj, target, level, &width, &height, &depth);
+
+   if (getteximage_error_check(ctx, texObj, target, level,
+                               0, 0, 0, width, height, depth,
+                               format, type, INT_MAX, pixels, caller)) {
+      return;
+   }
+
+   get_texture_image(ctx, texObj, target, level,
+                     0, 0, 0, width, height, depth,
+                     format, type, pixels, caller);
 }
 
-/**
- * Get texture image.
- *
- * \param texture texture name.
- * \param level image level.
- * \param format pixel data format for returned image.
- * \param type pixel data type for returned image.
- * \param bufSize size of the pixels data buffer.
- * \param pixels returned pixel data.
- */
+
 void GLAPIENTRY
-_mesa_GetTextureImage(GLuint texture, GLint level, GLenum format,
-                      GLenum type, GLsizei bufSize, GLvoid *pixels)
+_mesa_GetTextureImage(GLuint texture, GLint level, GLenum format, GLenum type,
+                      GLsizei bufSize, GLvoid *pixels)
 {
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
-   int i;
-   GLint image_stride;
-   GLenum err;
    GET_CURRENT_CONTEXT(ctx);
+   GLsizei width, height, depth;
+   static const char *caller = "glGetTextureImage";
+   struct gl_texture_object *texObj =
+      _mesa_lookup_texture_err(ctx, texture, caller);
 
-   /*
-    * This has been moved here because a format/type mismatch can cause a NULL
-    * texImage object, which in turn causes the mismatch error to be
-    * ignored.
-    */
-   err = _mesa_error_check_format_and_type(ctx, format, type);
-   if (err != GL_NO_ERROR) {
-      _mesa_error(ctx, err, "glGetTextureImage(format/type)");
+   if (!texObj) {
       return;
    }
 
-   texObj = _mesa_lookup_texture_err(ctx, texture, "glGetTextureImage");
-   if (!texObj)
-      return;
+   get_texture_image_dims(texObj, texObj->Target, level,
+                          &width, &height, &depth);
 
-   /*
-    * Legal target checking has been moved here to prevent exiting with a NULL
-    * texImage object.
-    */
-   if (!legal_getteximage_target(ctx, texObj->Target, true)) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTextureImage(target=%s)",
-                  _mesa_lookup_enum_by_nr(texObj->Target));
+   if (getteximage_error_check(ctx, texObj, texObj->Target, level,
+                               0, 0, 0, width, height, depth,
+                               format, type, bufSize, pixels, caller)) {
       return;
    }
 
-   /* Must handle special case GL_TEXTURE_CUBE_MAP. */
-   if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+   get_texture_image(ctx, texObj, texObj->Target, level,
+                     0, 0, 0, width, height, depth,
+                     format, type, pixels, caller);
+}
 
-      /* Make sure the texture object is a proper cube.
-       * (See texturesubimage in teximage.c for details on why this check is
-       * performed.)
-       */
-      if (!_mesa_cube_level_complete(texObj, level)) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glGetTextureImage(cube map incomplete)");
-         return;
-      }
 
-      /* Copy each face. */
-      for (i = 0; i < 6; ++i) {
-         texImage = texObj->Image[i][level];
-         assert(texImage);
-
-         _mesa_get_texture_image(ctx, texObj, texImage, texObj->Target, level,
-                                 format, type, bufSize, pixels, true);
-
-         image_stride = _mesa_image_image_stride(&ctx->Pack, texImage->Width,
-                                                 texImage->Height, format,
-                                                 type);
-         pixels = (GLubyte *) pixels + image_stride;
-         bufSize -= image_stride;
-      }
+void GLAPIENTRY
+_mesa_GetTextureSubImage(GLuint texture, GLint level,
+                         GLint xoffset, GLint yoffset, GLint zoffset,
+                         GLsizei width, GLsizei height, GLsizei depth,
+                         GLenum format, GLenum type, GLsizei bufSize,
+                         void *pixels)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   static const char *caller = "glGetTextureSubImage";
+   struct gl_texture_object *texObj =
+      _mesa_lookup_texture_err(ctx, texture, caller);
+
+   if (!texObj) {
+      return;
    }
-   else {
-      texImage = _mesa_select_tex_image(texObj, texObj->Target, level);
-      if (!texImage)
-         return;
 
-      _mesa_get_texture_image(ctx, texObj, texImage, texObj->Target, level,
-                              format, type, bufSize, pixels, true);
+   if (getteximage_error_check(ctx, texObj, texObj->Target, level,
+                               xoffset, yoffset, zoffset, width, height, depth,
+                               format, type, bufSize, pixels, caller)) {
+      return;
    }
+
+   get_texture_image(ctx, texObj, texObj->Target, level,
+                     xoffset, yoffset, zoffset, width, height, depth,
+                     format, type, pixels, caller);
 }
 
+
+
 /**
- * Do error checking for a glGetCompressedTexImage() call.
- * \return GL_TRUE if any error, GL_FALSE if no errors.
+ * Compute the number of bytes which will be written when retrieving
+ * a sub-region of a compressed texture.
  */
-static GLboolean
+static GLsizei
+packed_compressed_size(GLuint dimensions, mesa_format format,
+                       GLsizei width, GLsizei height, GLsizei depth,
+                       const struct gl_pixelstore_attrib *packing)
+{
+   struct compressed_pixelstore st;
+   GLsizei totalBytes;
+
+   _mesa_compute_compressed_pixelstore(dimensions, format,
+                                       width, height, depth,
+                                       packing, &st);
+   totalBytes =
+      (st.CopySlices - 1) * st.TotalRowsPerSlice * st.TotalBytesPerRow +
+      st.SkipBytes +
+      (st.CopyRowsPerSlice - 1) * st.TotalBytesPerRow +
+      st.CopyBytesPerRow;
+
+   return totalBytes;
+}
+
+
+/**
+ * Do error checking for getting compressed texture images.
+ * \return true if any error, false if no errors.
+ */
+static bool
 getcompressedteximage_error_check(struct gl_context *ctx,
-                                  struct gl_texture_image *texImage,
-                                  GLenum target,
-                                  GLint level, GLsizei clientMemSize,
-                                  GLvoid *img, bool dsa)
-{
-   const GLint maxLevels = _mesa_max_texture_levels(ctx, target);
-   GLuint compressedSize, dimensions;
-   const char *suffix = dsa ? "ture" : "";
+                                  struct gl_texture_object *texObj,
+                                  GLenum target, GLint level,
+                                  GLint xoffset, GLint yoffset, GLint zoffset,
+                                  GLsizei width, GLsizei height, GLsizei depth,
+                                  GLsizei bufSize, GLvoid *pixels,
+                                  const char *caller)
+{
+   struct gl_texture_image *texImage;
+   GLint maxLevels;
+   GLsizei totalBytes;
+   GLuint dimensions;
 
-   assert(texImage);
+   assert(texObj);
 
-   if (!legal_getteximage_target(ctx, target, dsa)) {
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glGetCompressedTex%sImage(target=%s)", suffix,
-                  _mesa_lookup_enum_by_nr(target));
-      return GL_TRUE;
+   if (texObj->Target == 0) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller);
+      return true;
    }
 
-   assert(maxLevels != 0);
+   maxLevels = _mesa_max_texture_levels(ctx, target);
    if (level < 0 || level >= maxLevels) {
       _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glGetCompressedTex%sImage(bad level = %d)", suffix, level);
-      return GL_TRUE;
+                  "%s(bad level = %d)", caller, level);
+      return true;
    }
 
+   if (dimensions_error_check(ctx, texObj, target, level,
+                              xoffset, yoffset, zoffset,
+                              width, height, depth, caller)) {
+      return true;
+   }
+
+   texImage = select_tex_image(texObj, target, level, zoffset);
+   assert(texImage);
+
    if (!_mesa_is_format_compressed(texImage->TexFormat)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glGetCompressedTex%sImage(texture is not compressed)",
-                  suffix);
-      return GL_TRUE;
+                  "%s(texture is not compressed)", caller);
+      return true;
    }
 
-   compressedSize = _mesa_format_image_size(texImage->TexFormat,
-                                            texImage->Width,
-                                            texImage->Height,
-                                            texImage->Depth);
-
    /* Check for invalid pixel storage modes */
-   dimensions = _mesa_get_texture_dimensions(texImage->TexObject->Target);
+   dimensions = _mesa_get_texture_dimensions(texObj->Target);
    if (!_mesa_compressed_pixel_storage_error_check(ctx, dimensions,
-                                              &ctx->Pack, dsa ?
-                                              "glGetCompressedTextureImage":
-                                              "glGetCompressedTexImage")) {
-      return GL_TRUE;
+                                                   &ctx->Pack,
+                                                   caller)) {
+      return true;
    }
 
-   if (!_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-      /* do bounds checking on writing to client memory */
-      if (clientMemSize < (GLsizei) compressedSize) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "%s(out of bounds access: bufSize (%d) is too small)",
-                     dsa ? "glGetCompressedTextureImage" :
-                     "glGetnCompressedTexImageARB", clientMemSize);
-         return GL_TRUE;
-      }
-   } else {
+   /* Compute number of bytes that may be touched in the dest buffer */
+   totalBytes = packed_compressed_size(dimensions, texImage->TexFormat,
+                                       width, height, depth,
+                                       &ctx->Pack);
+
+   /* Do dest buffer bounds checking */
+   if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
       /* do bounds checking on PBO write */
-      if ((const GLubyte *) img + compressedSize >
-          (const GLubyte *) ctx->Pack.BufferObj->Size) {
+      if ((GLubyte *) pixels + totalBytes >
+          (GLubyte *) ctx->Pack.BufferObj->Size) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glGetCompressedTex%sImage(out of bounds PBO access)",
-                     suffix);
-         return GL_TRUE;
+                     "%s(out of bounds PBO access)", caller);
+         return true;
       }
 
       /* make sure PBO is not mapped */
       if (_mesa_check_disallowed_mapping(ctx->Pack.BufferObj)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION, "%s(PBO is mapped)", caller);
+         return true;
+      }
+   }
+   else {
+      /* do bounds checking on writing to client memory */
+      if (totalBytes > bufSize) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glGetCompressedTex%sImage(PBO is mapped)", suffix);
-         return GL_TRUE;
+                     "%s(out of bounds access: bufSize (%d) is too small)",
+                     caller, bufSize);
+         return true;
       }
    }
 
-   return GL_FALSE;
+   if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !pixels) {
+      /* not an error, but do nothing */
+      return true;
+   }
+
+   return false;
 }
 
-/** Implements glGetnCompressedTexImageARB, glGetCompressedTexImage, and
- * glGetCompressedTextureImage.
- *
- * texImage must be passed in because glGetCompressedTexImage must handle the
- * target GL_TEXTURE_CUBE_MAP.
+
+/**
+ * Common helper for all glGetCompressed-teximage functions.
  */
-void
-_mesa_get_compressed_texture_image(struct gl_context *ctx,
-                                   struct gl_texture_object *texObj,
-                                   struct gl_texture_image *texImage,
-                                   GLenum target, GLint level,
-                                   GLsizei bufSize, GLvoid *pixels,
-                                   bool dsa)
+static void
+get_compressed_texture_image(struct gl_context *ctx,
+                             struct gl_texture_object *texObj,
+                             GLenum target, GLint level,
+                             GLint xoffset, GLint yoffset, GLint zoffset,
+                             GLsizei width, GLsizei height, GLint depth,
+                             GLvoid *pixels,
+                             const char *caller)
 {
-   assert(texObj);
-   assert(texImage);
+   struct gl_texture_image *texImage;
+   unsigned firstFace, numFaces, i, imageStride;
 
    FLUSH_VERTICES(ctx, 0);
 
-   if (getcompressedteximage_error_check(ctx, texImage, target, level,
-                                         bufSize, pixels, dsa)) {
-      return;
-   }
-
-   if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !pixels) {
-      /* not an error, do nothing */
-      return;
-   }
+   texImage = select_tex_image(texObj, target, level, zoffset);
+   assert(texImage);  /* should have been error checked already */
 
    if (_mesa_is_zero_size_texture(texImage))
       return;
 
    if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE)) {
       _mesa_debug(ctx,
-                  "glGetCompressedTex%sImage(tex %u) format = %s, w=%d, h=%d\n",
-                  dsa ? "ture" : "", texObj->Name,
+                  "%s(tex %u) format = %s, w=%d, h=%d\n",
+                  caller, texObj->Name,
                   _mesa_get_format_name(texImage->TexFormat),
                   texImage->Width, texImage->Height);
    }
 
+   if (target == GL_TEXTURE_CUBE_MAP) {
+      struct compressed_pixelstore store;
+
+      /* Compute image stride between cube faces */
+      _mesa_compute_compressed_pixelstore(2, texImage->TexFormat,
+                                          width, height, depth,
+                                          &ctx->Pack, &store);
+      imageStride = store.TotalBytesPerRow * store.TotalRowsPerSlice;
+
+      firstFace = zoffset;
+      numFaces = depth;
+      zoffset = 0;
+      depth = 1;
+   }
+   else {
+      imageStride = 0;
+      firstFace = _mesa_tex_target_to_face(target);
+      numFaces = 1;
+   }
+
    _mesa_lock_texture(ctx, texObj);
-   {
-      ctx->Driver.GetCompressedTexImage(ctx, texImage, pixels);
+
+   for (i = 0; i < numFaces; i++) {
+      texImage = texObj->Image[firstFace + i][level];
+      assert(texImage);
+
+      ctx->Driver.GetCompressedTexSubImage(ctx, texImage,
+                                           xoffset, yoffset, zoffset,
+                                           width, height, depth, pixels);
+
+      /* next cube face */
+      pixels = (GLubyte *) pixels + imageStride;
    }
+
    _mesa_unlock_texture(ctx, texObj);
 }
 
+
 void GLAPIENTRY
 _mesa_GetnCompressedTexImageARB(GLenum target, GLint level, GLsizei bufSize,
-                                GLvoid *img)
+                                GLvoid *pixels)
 {
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
    GET_CURRENT_CONTEXT(ctx);
+   static const char *caller = "glGetnCompressedTexImageARB";
+   GLsizei width, height, depth;
+   struct gl_texture_object *texObj;
 
-   texObj = _mesa_get_current_tex_object(ctx, target);
-   if (!texObj)
+   if (!legal_getteximage_target(ctx, target, false)) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "%s", caller);
       return;
+   }
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   assert(texObj);
+
+   get_texture_image_dims(texObj, target, level, &width, &height, &depth);
 
-   texImage = _mesa_select_tex_image(texObj, target, level);
-   if (!texImage)
+   if (getcompressedteximage_error_check(ctx, texObj, target, level,
+                                         0, 0, 0, width, height, depth,
+                                         INT_MAX, pixels, caller)) {
       return;
+   }
 
-   _mesa_get_compressed_texture_image(ctx, texObj, texImage, target, level,
-                                      bufSize, img, false);
+   get_compressed_texture_image(ctx, texObj, target, level,
+                                0, 0, 0, width, height, depth,
+                                pixels, caller);
 }
 
+
 void GLAPIENTRY
-_mesa_GetCompressedTexImage(GLenum target, GLint level, GLvoid *img)
+_mesa_GetCompressedTexImage(GLenum target, GLint level, GLvoid *pixels)
 {
-   _mesa_GetnCompressedTexImageARB(target, level, INT_MAX, img);
+   GET_CURRENT_CONTEXT(ctx);
+   static const char *caller = "glGetCompressedTexImage";
+   GLsizei width, height, depth;
+   struct gl_texture_object *texObj;
+
+   if (!legal_getteximage_target(ctx, target, false)) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "%s", caller);
+      return;
+   }
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   assert(texObj);
+
+   get_texture_image_dims(texObj, target, level,
+                          &width, &height, &depth);
+
+   if (getcompressedteximage_error_check(ctx, texObj, target, level,
+                                         0, 0, 0, width, height, depth,
+                                         INT_MAX, pixels, caller)) {
+      return;
+   }
+
+   get_compressed_texture_image(ctx, texObj, target, level,
+                                0, 0, 0, width, height, depth,
+                                pixels, caller);
 }
 
-/**
- * Get compressed texture image.
- *
- * \param texture texture name.
- * \param level image level.
- * \param bufSize size of the pixels data buffer.
- * \param pixels returned pixel data.
- */
+
 void GLAPIENTRY
 _mesa_GetCompressedTextureImage(GLuint texture, GLint level,
                                 GLsizei bufSize, GLvoid *pixels)
 {
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
-   int i;
-   GLint image_stride;
    GET_CURRENT_CONTEXT(ctx);
+   static const char *caller = "glGetCompressedTextureImage";
+   GLsizei width, height, depth;
+   struct gl_texture_object *texObj =
+      _mesa_lookup_texture_err(ctx, texture, caller);
 
-   texObj = _mesa_lookup_texture_err(ctx, texture,
-                                     "glGetCompressedTextureImage");
-   if (!texObj)
+   if (!texObj) {
       return;
+   }
 
-   /* Must handle special case GL_TEXTURE_CUBE_MAP. */
-   if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+   get_texture_image_dims(texObj, texObj->Target, level,
+                          &width, &height, &depth);
 
-      /* Make sure the texture object is a proper cube.
-       * (See texturesubimage in teximage.c for details on why this check is
-       * performed.)
-       */
-      if (!_mesa_cube_level_complete(texObj, level)) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glGetCompressedTextureImage(cube map incomplete)");
-         return;
-      }
+   if (getcompressedteximage_error_check(ctx, texObj, texObj->Target, level,
+                                         0, 0, 0, width, height, depth,
+                                         bufSize, pixels, caller)) {
+      return;
+   }
 
-      /* Copy each face. */
-      for (i = 0; i < 6; ++i) {
-         texImage = texObj->Image[i][level];
-         assert(texImage);
-
-         _mesa_get_compressed_texture_image(ctx, texObj, texImage,
-                                            texObj->Target, level,
-                                            bufSize, pixels, true);
-
-         /* Compressed images don't have a client format */
-         image_stride = _mesa_format_image_size(texImage->TexFormat,
-                                                texImage->Width,
-                                                texImage->Height, 1);
+   get_compressed_texture_image(ctx, texObj, texObj->Target, level,
+                                0, 0, 0, width, height, depth,
+                                pixels, caller);
+}
 
-         pixels = (GLubyte *) pixels + image_stride;
-         bufSize -= image_stride;
-      }
+
+void APIENTRY
+_mesa_GetCompressedTextureSubImage(GLuint texture, GLint level,
+                                   GLint xoffset, GLint yoffset,
+                                   GLint zoffset, GLsizei width,
+                                   GLsizei height, GLsizei depth,
+                                   GLsizei bufSize, void *pixels)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   static const char *caller = "glGetCompressedTextureImage";
+   struct gl_texture_object *texObj;
+
+   texObj = _mesa_lookup_texture_err(ctx, texture, caller);
+   if (!texObj) {
+      return;
    }
-   else {
-      texImage = _mesa_select_tex_image(texObj, texObj->Target, level);
-      if (!texImage)
-         return;
 
-      _mesa_get_compressed_texture_image(ctx, texObj, texImage,
-                                         texObj->Target, level, bufSize,
-                                         pixels, true);
+   if (getcompressedteximage_error_check(ctx, texObj, texObj->Target, level,
+                                         xoffset, yoffset, zoffset,
+                                         width, height, depth,
+                                         bufSize, pixels, caller)) {
+      return;
    }
+
+   get_compressed_texture_image(ctx, texObj, texObj->Target, level,
+                                xoffset, yoffset, zoffset,
+                                width, height, depth,
+                                pixels, caller);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texgetimage.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texgetimage.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texgetimage.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texgetimage.h	2015-09-16 14:36:10.000000000 +0000
@@ -37,22 +37,19 @@
 _mesa_base_pack_format(GLenum format);
 
 extern void
-_mesa_GetTexImage_sw(struct gl_context *ctx,
-                     GLenum format, GLenum type, GLvoid *pixels,
-                     struct gl_texture_image *texImage);
-
-
-extern void
-_mesa_GetCompressedTexImage_sw(struct gl_context *ctx,
-                               struct gl_texture_image *texImage,
-                               GLvoid *data);
+_mesa_GetTexSubImage_sw(struct gl_context *ctx,
+                        GLint xoffset, GLint yoffset, GLint zoffset,
+                        GLsizei width, GLsizei height, GLint depth,
+                        GLenum format, GLenum type, GLvoid *pixels,
+                        struct gl_texture_image *texImage);
 
 extern void
-_mesa_get_texture_image(struct gl_context *ctx,
-                        struct gl_texture_object *texObj,
-                        struct gl_texture_image *texImage, GLenum target,
-                        GLint level, GLenum format, GLenum type,
-                        GLsizei bufSize, GLvoid *pixels, bool dsa);
+_mesa_GetCompressedTexSubImage_sw(struct gl_context *ctx,
+                                  struct gl_texture_image *texImage,
+                                  GLint xoffset, GLint yoffset,
+                                  GLint zoffset, GLsizei width,
+                                  GLint height, GLint depth,
+                                  GLvoid *data);
 
 extern void
 _mesa_get_compressed_texture_image( struct gl_context *ctx,
@@ -74,6 +71,14 @@
                       GLenum type, GLsizei bufSize, GLvoid *pixels);
 
 extern void GLAPIENTRY
+_mesa_GetTextureSubImage(GLuint texture, GLint level,
+                         GLint xoffset, GLint yoffset, GLint zoffset,
+                         GLsizei width, GLsizei height, GLsizei depth,
+                         GLenum format, GLenum type, GLsizei bufSize,
+                         void *pixels);
+
+
+extern void GLAPIENTRY
 _mesa_GetCompressedTexImage(GLenum target, GLint lod, GLvoid *img);
 
 extern void GLAPIENTRY
@@ -84,4 +89,11 @@
 _mesa_GetCompressedTextureImage(GLuint texture, GLint level, GLsizei bufSize,
                                 GLvoid *pixels);
 
+extern void APIENTRY
+_mesa_GetCompressedTextureSubImage(GLuint texture, GLint level,
+                                   GLint xoffset, GLint yoffset,
+                                   GLint zoffset, GLsizei width,
+                                   GLsizei height, GLsizei depth,
+                                   GLsizei bufSize, void *pixels);
+
 #endif /* TEXGETIMAGE_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/teximage.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/teximage.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/teximage.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/teximage.c	2015-09-16 14:36:10.000000000 +0000
@@ -222,7 +222,7 @@
       }
    }
 
-   if (ctx->Extensions.ARB_stencil_texturing) {
+   if (ctx->Extensions.ARB_texture_stencil8) {
       switch (internalFormat) {
       case GL_STENCIL_INDEX:
       case GL_STENCIL_INDEX1:
@@ -1008,7 +1008,7 @@
    case GL_PROXY_TEXTURE_2D_MULTISAMPLE:
    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
    case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY:
-      return _mesa_is_desktop_gl(ctx)
+      return (_mesa_is_desktop_gl(ctx) || _mesa_is_gles31(ctx))
          && ctx->Extensions.ARB_texture_multisample
          ? 1 : 0;
    case GL_TEXTURE_EXTERNAL_OES:
@@ -1793,8 +1793,6 @@
 _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target,
                                GLenum intFormat)
 {
-   (void) intFormat;  /* not used yet */
-
    switch (target) {
    case GL_TEXTURE_2D:
    case GL_PROXY_TEXTURE_2D:
@@ -1814,6 +1812,16 @@
    case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY:
    case GL_TEXTURE_CUBE_MAP_ARRAY:
       return ctx->Extensions.ARB_texture_cube_map_array;
+   case GL_TEXTURE_3D:
+      switch (intFormat) {
+      case GL_COMPRESSED_RGBA_BPTC_UNORM:
+      case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
+      case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
+      case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+         return ctx->Extensions.ARB_texture_compression_bptc;
+      default:
+         return GL_FALSE;
+      }
    default:
       return GL_FALSE;
    }
@@ -2081,6 +2089,53 @@
 }
 
 /**
+ * Test the combination of format, type and internal format arguments of
+ * different texture operations on GLES.
+ *
+ * \param ctx GL context.
+ * \param format pixel data format given by the user.
+ * \param type pixel data type given by the user.
+ * \param internalFormat internal format given by the user.
+ * \param dimensions texture image dimensions (must be 1, 2 or 3).
+ * \param callerName name of the caller function to print in the error message
+ *
+ * \return true if a error is found, false otherwise
+ *
+ * Currently, it is used by texture_error_check() and texsubimage_error_check().
+ */
+static bool
+texture_format_error_check_gles(struct gl_context *ctx, GLenum format,
+                                GLenum type, GLenum internalFormat,
+                                GLuint dimensions, const char *callerName)
+{
+   GLenum err;
+
+   if (_mesa_is_gles3(ctx)) {
+      err = _mesa_es3_error_check_format_and_type(ctx, format, type,
+                                                  internalFormat);
+      if (err != GL_NO_ERROR) {
+         _mesa_error(ctx, err,
+                     "%s(format = %s, type = %s, internalformat = %s)",
+                     callerName, _mesa_enum_to_string(format),
+                     _mesa_enum_to_string(type),
+                     _mesa_enum_to_string(internalFormat));
+         return true;
+      }
+   }
+   else {
+      err = _mesa_es_error_check_format_and_type(format, type, dimensions);
+      if (err != GL_NO_ERROR) {
+         _mesa_error(ctx, err, "%s(format = %s, type = %s)",
+                     callerName, _mesa_enum_to_string(format),
+                     _mesa_enum_to_string(type));
+         return true;
+      }
+   }
+
+   return false;
+}
+
+/**
  * Test the glTexImage[123]D() parameters for errors.
  *
  * \param ctx GL context.
@@ -2151,39 +2206,17 @@
     * Formats and types that require additional extensions (e.g., GL_FLOAT
     * requires GL_OES_texture_float) are filtered elsewhere.
     */
-
-   if (_mesa_is_gles(ctx)) {
-      if (_mesa_is_gles3(ctx)) {
-         err = _mesa_es3_error_check_format_and_type(ctx, format, type,
-                                                     internalFormat);
-      } else {
-         if (format != internalFormat) {
-            _mesa_error(ctx, GL_INVALID_OPERATION,
-                        "glTexImage%dD(format = %s, internalFormat = %s)",
-                        dimensions,
-                        _mesa_lookup_enum_by_nr(format),
-                        _mesa_lookup_enum_by_nr(internalFormat));
-            return GL_TRUE;
-         }
-
-         err = _mesa_es_error_check_format_and_type(format, type, dimensions);
-      }
-      if (err != GL_NO_ERROR) {
-         _mesa_error(ctx, err,
-                     "glTexImage%dD(format = %s, type = %s, internalFormat = %s)",
-                     dimensions,
-                     _mesa_lookup_enum_by_nr(format),
-                     _mesa_lookup_enum_by_nr(type),
-                     _mesa_lookup_enum_by_nr(internalFormat));
-         return GL_TRUE;
-      }
+   if (_mesa_is_gles(ctx) &&
+       texture_format_error_check_gles(ctx, format, type, internalFormat,
+                                       dimensions, "glTexImage%dD")) {
+     return GL_TRUE;
    }
 
    /* Check internalFormat */
    if (_mesa_base_tex_format(ctx, internalFormat) < 0) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glTexImage%dD(internalFormat=%s)",
-                  dimensions, _mesa_lookup_enum_by_nr(internalFormat));
+                  dimensions, _mesa_enum_to_string(internalFormat));
       return GL_TRUE;
    }
 
@@ -2192,8 +2225,8 @@
    if (err != GL_NO_ERROR) {
       _mesa_error(ctx, err,
                   "glTexImage%dD(incompatible format = %s, type = %s)",
-                  dimensions, _mesa_lookup_enum_by_nr(format),
-                  _mesa_lookup_enum_by_nr(type));
+                  dimensions, _mesa_enum_to_string(format),
+                  _mesa_enum_to_string(type));
       return GL_TRUE;
    }
 
@@ -2208,8 +2241,8 @@
    if (!texture_formats_agree(internalFormat, format)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glTexImage%dD(incompatible internalFormat = %s, format = %s)",
-                  dimensions, _mesa_lookup_enum_by_nr(internalFormat),
-                  _mesa_lookup_enum_by_nr(format));
+                  dimensions, _mesa_enum_to_string(internalFormat),
+                  _mesa_enum_to_string(format));
       return GL_TRUE;
    }
 
@@ -2324,7 +2357,7 @@
    if (!_mesa_is_compressed_format(ctx, internalFormat)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glCompressedTexImage%dD(internalFormat=%s)",
-                  dimensions, _mesa_lookup_enum_by_nr(internalFormat));
+                  dimensions, _mesa_enum_to_string(internalFormat));
       return GL_TRUE;
    }
 
@@ -2485,27 +2518,32 @@
       return GL_TRUE;
    }
 
-   /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
-    * combinations of format and type that can be used.  Formats and types
-    * that require additional extensions (e.g., GL_FLOAT requires
-    * GL_OES_texture_float) are filtered elsewhere.
-    */
-   if (_mesa_is_gles(ctx) && !_mesa_is_gles3(ctx)) {
-      err = _mesa_es_error_check_format_and_type(format, type, dimensions);
-      if (err != GL_NO_ERROR) {
-         _mesa_error(ctx, err, "%s(format = %s, type = %s)",
-                     callerName, _mesa_lookup_enum_by_nr(format),
-                     _mesa_lookup_enum_by_nr(type));
-         return GL_TRUE;
-      }
+   texImage = _mesa_select_tex_image(texObj, target, level);
+   if (!texImage) {
+      /* non-existant texture level */
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture image)",
+                  callerName);
+      return GL_TRUE;
    }
 
    err = _mesa_error_check_format_and_type(ctx, format, type);
    if (err != GL_NO_ERROR) {
       _mesa_error(ctx, err,
                   "%s(incompatible format = %s, type = %s)",
-                  callerName, _mesa_lookup_enum_by_nr(format),
-                  _mesa_lookup_enum_by_nr(type));
+                  callerName, _mesa_enum_to_string(format),
+                  _mesa_enum_to_string(type));
+      return GL_TRUE;
+   }
+
+   /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
+    * combinations of format, internalFormat, and type that can be used.
+    * Formats and types that require additional extensions (e.g., GL_FLOAT
+    * requires GL_OES_texture_float) are filtered elsewhere.
+    */
+   if (_mesa_is_gles(ctx) &&
+       texture_format_error_check_gles(ctx, format, type,
+                                       texImage->InternalFormat,
+                                       dimensions, callerName)) {
       return GL_TRUE;
    }
 
@@ -2516,14 +2554,6 @@
       return GL_TRUE;
    }
 
-   texImage = _mesa_select_tex_image(texObj, target, level);
-   if (!texImage) {
-      /* non-existant texture level */
-      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture image)",
-                  callerName);
-      return GL_TRUE;
-   }
-
    if (error_check_subtexture_dimensions(ctx, dimensions,
                                          texImage, xoffset, yoffset, zoffset,
                                          width, height, depth, callerName)) {
@@ -2583,7 +2613,7 @@
    /* check target */
    if (!legal_texsubimage_target(ctx, dimensions, target, false)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTexImage%uD(target=%s)",
-                  dimensions, _mesa_lookup_enum_by_nr(target));
+                  dimensions, _mesa_enum_to_string(target));
       return GL_TRUE;
    }
 
@@ -2636,7 +2666,7 @@
       default:
          _mesa_error(ctx, GL_INVALID_ENUM,
                      "glCopyTexImage%dD(internalFormat=%s)", dimensions,
-                     _mesa_lookup_enum_by_nr(internalFormat));
+                     _mesa_enum_to_string(internalFormat));
          return GL_TRUE;
       }
    }
@@ -2645,7 +2675,7 @@
    if (baseFormat < 0) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glCopyTexImage%dD(internalFormat=%s)", dimensions,
-                  _mesa_lookup_enum_by_nr(internalFormat));
+                  _mesa_enum_to_string(internalFormat));
       return GL_TRUE;
    }
 
@@ -2662,7 +2692,7 @@
       if (rb_base_format < 0) {
          _mesa_error(ctx, GL_INVALID_VALUE,
                      "glCopyTexImage%dD(internalFormat=%s)", dimensions,
-                     _mesa_lookup_enum_by_nr(internalFormat));
+                     _mesa_enum_to_string(internalFormat));
          return GL_TRUE;
       }
    }
@@ -2689,7 +2719,7 @@
       if (!valid) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
                      "glCopyTexImage%dD(internalFormat=%s)", dimensions,
-                     _mesa_lookup_enum_by_nr(internalFormat));
+                     _mesa_enum_to_string(internalFormat));
          return GL_TRUE;
       }
    }
@@ -2728,10 +2758,10 @@
        * types for SNORM formats. Also, conversion to SNORM formats is not
        * allowed by Table 3.2 on Page 110.
        */
-      if(_mesa_is_enum_format_snorm(internalFormat)) {
+      if (_mesa_is_enum_format_snorm(internalFormat)) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
                      "glCopyTexImage%dD(internalFormat=%s)", dimensions,
-                     _mesa_lookup_enum_by_nr(internalFormat));
+                     _mesa_enum_to_string(internalFormat));
          return GL_TRUE;
       }
    }
@@ -3096,8 +3126,8 @@
                        "DXT compression requested (%s), "
                        "but libtxc_dxtn library not installed.  Using %s "
                        "instead.",
-                       _mesa_lookup_enum_by_nr(before),
-                       _mesa_lookup_enum_by_nr(internalFormat));
+                       _mesa_enum_to_string(before),
+                       _mesa_enum_to_string(internalFormat));
       }
    }
 
@@ -3184,18 +3214,18 @@
          _mesa_debug(ctx,
                      "glCompressedTexImage%uD %s %d %s %d %d %d %d %p\n",
                      dims,
-                     _mesa_lookup_enum_by_nr(target), level,
-                     _mesa_lookup_enum_by_nr(internalFormat),
+                     _mesa_enum_to_string(target), level,
+                     _mesa_enum_to_string(internalFormat),
                      width, height, depth, border, pixels);
       else
          _mesa_debug(ctx,
                      "glTexImage%uD %s %d %s %d %d %d %d %s %s %p\n",
                      dims,
-                     _mesa_lookup_enum_by_nr(target), level,
-                     _mesa_lookup_enum_by_nr(internalFormat),
+                     _mesa_enum_to_string(target), level,
+                     _mesa_enum_to_string(internalFormat),
                      width, height, depth, border,
-                     _mesa_lookup_enum_by_nr(format),
-                     _mesa_lookup_enum_by_nr(type), pixels);
+                     _mesa_enum_to_string(format),
+                     _mesa_enum_to_string(type), pixels);
    }
 
    internalFormat = override_internal_format(internalFormat, width, height);
@@ -3203,7 +3233,7 @@
    /* target error checking */
    if (!legal_teximage_target(ctx, dims, target)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s%uD(target=%s)",
-                  func, dims, _mesa_lookup_enum_by_nr(target));
+                  func, dims, _mesa_enum_to_string(target));
       return;
    }
 
@@ -3306,16 +3336,16 @@
 
       if (!dimensionsOK) {
          _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glTexImage%uD(invalid width or height or depth)",
-                     dims);
+                     "%s%uD(invalid width or height or depth)",
+                     func, dims);
          return;
       }
 
       if (!sizeOK) {
          _mesa_error(ctx, GL_OUT_OF_MEMORY,
-                     "glTexImage%uD(image too large: %d x %d x %d, %s format)",
-                     dims, width, height, depth,
-                     _mesa_lookup_enum_by_nr(internalFormat));
+                     "%s%uD(image too large: %d x %d x %d, %s format)",
+                     func, dims, width, height, depth,
+                     _mesa_enum_to_string(internalFormat));
          return;
       }
 
@@ -3488,7 +3518,6 @@
       _mesa_dirty_texobj(ctx, texObj);
    }
    _mesa_unlock_texture(ctx, texObj);
-
 }
 
 
@@ -3560,7 +3589,7 @@
    /* check target (proxies not allowed) */
    if (!legal_texsubimage_target(ctx, dims, target, false)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "glTexSubImage%uD(target=%s)",
-                  dims, _mesa_lookup_enum_by_nr(target));
+                  dims, _mesa_enum_to_string(target));
       return;
    }
 
@@ -3581,10 +3610,10 @@
    if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
       _mesa_debug(ctx, "glTexSubImage%uD %s %d %d %d %d %d %d %d %s %s %p\n",
                   dims,
-                  _mesa_lookup_enum_by_nr(target), level,
+                  _mesa_enum_to_string(target), level,
                   xoffset, yoffset, zoffset, width, height, depth,
-                  _mesa_lookup_enum_by_nr(format),
-                  _mesa_lookup_enum_by_nr(type), pixels);
+                  _mesa_enum_to_string(format),
+                  _mesa_enum_to_string(type), pixels);
 
    _mesa_texture_sub_image(ctx, dims, texObj, texImage, target, level,
                            xoffset, yoffset, zoffset, width, height, depth,
@@ -3613,8 +3642,8 @@
                   "glTextureSubImage%uD %d %d %d %d %d %d %d %d %s %s %p\n",
                   dims, texture, level,
                   xoffset, yoffset, zoffset, width, height, depth,
-                  _mesa_lookup_enum_by_nr(format),
-                  _mesa_lookup_enum_by_nr(type), pixels);
+                  _mesa_enum_to_string(format),
+                  _mesa_enum_to_string(type), pixels);
 
    /* Get the texture object by Name. */
    texObj = _mesa_lookup_texture(ctx, texture);
@@ -3627,7 +3656,7 @@
    /* check target (proxies not allowed) */
    if (!legal_texsubimage_target(ctx, dims, texObj->Target, true)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(target=%s)",
-                  callerName, _mesa_lookup_enum_by_nr(texObj->Target));
+                  callerName, _mesa_enum_to_string(texObj->Target));
       return;
    }
 
@@ -3682,12 +3711,12 @@
       rowStride = _mesa_image_image_stride(&ctx->Unpack, width, height,
                                            format, type);
       /* Copy in each face. */
-      for (i = 0; i < 6; ++i) {
+      for (i = zoffset; i < zoffset + depth; ++i) {
          texImage = texObj->Image[i][level];
          assert(texImage);
 
          _mesa_texture_sub_image(ctx, 3, texObj, texImage, texObj->Target,
-                                 level, xoffset, yoffset, zoffset,
+                                 level, xoffset, yoffset, 0,
                                  width, height, 1, format,
                                  type, pixels, true);
          pixels = (GLubyte *) pixels + rowStride;
@@ -3841,8 +3870,7 @@
 }
 
 static GLboolean
-formats_differ_in_component_sizes (mesa_format f1,
-                                   mesa_format f2)
+formats_differ_in_component_sizes(mesa_format f1, mesa_format f2)
 {
    GLint f1_r_bits = _mesa_get_format_bits(f1, GL_RED_BITS);
    GLint f1_g_bits = _mesa_get_format_bits(f1, GL_GREEN_BITS);
@@ -3882,8 +3910,8 @@
    if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
       _mesa_debug(ctx, "glCopyTexImage%uD %s %d %s %d %d %d %d %d\n",
                   dims,
-                  _mesa_lookup_enum_by_nr(target), level,
-                  _mesa_lookup_enum_by_nr(internalFormat),
+                  _mesa_enum_to_string(target), level,
+                  _mesa_enum_to_string(internalFormat),
                   x, y, width, height, border);
 
    if (ctx->NewState & NEW_COPY_TEX_STATE)
@@ -3915,8 +3943,8 @@
        */
          if (rb->InternalFormat == GL_RGB10_A2) {
                _mesa_error(ctx, GL_INVALID_OPERATION,
-                           "glCopyTexImage%uD(Reading from GL_RGB10_A2 buffer and"
-                           " writing to unsized internal format)", dims);
+                           "glCopyTexImage%uD(Reading from GL_RGB10_A2 buffer"
+                           " and writing to unsized internal format)", dims);
                return;
          }
       }
@@ -4042,7 +4070,7 @@
 
    if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
       _mesa_debug(ctx, "%s %s %d %d %d %d %d %d %d %d\n", caller,
-                  _mesa_lookup_enum_by_nr(target),
+                  _mesa_enum_to_string(target),
                   level, xoffset, yoffset, zoffset, x, y, width, height);
 
    if (ctx->NewState & NEW_COPY_TEX_STATE)
@@ -4104,7 +4132,7 @@
     */
    if (!legal_texsubimage_target(ctx, 1, target, false)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self,
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -4132,7 +4160,7 @@
     */
    if (!legal_texsubimage_target(ctx, 2, target, false)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self,
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -4161,7 +4189,7 @@
     */
    if (!legal_texsubimage_target(ctx, 3, target, false)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self,
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return;
    }
 
@@ -4189,7 +4217,7 @@
    /* Check target (proxies not allowed). */
    if (!legal_texsubimage_target(ctx, 1, texObj->Target, true)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self,
-                  _mesa_lookup_enum_by_nr(texObj->Target));
+                  _mesa_enum_to_string(texObj->Target));
       return;
    }
 
@@ -4213,7 +4241,7 @@
    /* Check target (proxies not allowed). */
    if (!legal_texsubimage_target(ctx, 2, texObj->Target, true)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self,
-                  _mesa_lookup_enum_by_nr(texObj->Target));
+                  _mesa_enum_to_string(texObj->Target));
       return;
    }
 
@@ -4240,7 +4268,7 @@
    /* Check target (proxies not allowed). */
    if (!legal_texsubimage_target(ctx, 3, texObj->Target, true)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self,
-                  _mesa_lookup_enum_by_nr(texObj->Target));
+                  _mesa_enum_to_string(texObj->Target));
       return;
    }
 
@@ -4287,8 +4315,8 @@
       _mesa_error(ctx, err,
                   "%s(incompatible format = %s, type = %s)",
                   function,
-                  _mesa_lookup_enum_by_nr(format),
-                  _mesa_lookup_enum_by_nr(type));
+                  _mesa_enum_to_string(format),
+                  _mesa_enum_to_string(type));
       return false;
    }
 
@@ -4297,8 +4325,8 @@
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "%s(incompatible internalFormat = %s, format = %s)",
                   function,
-                  _mesa_lookup_enum_by_nr(internalFormat),
-                  _mesa_lookup_enum_by_nr(format));
+                  _mesa_enum_to_string(internalFormat),
+                  _mesa_enum_to_string(format));
       return false;
    }
 
@@ -4540,7 +4568,7 @@
 
    if (dsa && target == GL_TEXTURE_RECTANGLE) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid target %s)", caller,
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return GL_TRUE;
    }
 
@@ -4548,13 +4576,15 @@
    case 2:
       switch (target) {
       case GL_TEXTURE_2D:
+         targetOK = GL_TRUE;
+         break;
       case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
       case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
       case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
       case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
       case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
       case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-         targetOK = GL_TRUE;
+         targetOK = ctx->Extensions.ARB_texture_cube_map;
          break;
       default:
          targetOK = GL_FALSE;
@@ -4562,52 +4592,59 @@
       }
       break;
    case 3:
-      targetOK = (target == GL_TEXTURE_3D) ||
-                 (target == GL_TEXTURE_2D_ARRAY) ||
-                 (target == GL_TEXTURE_CUBE_MAP_ARRAY) ||
-                 (target == GL_TEXTURE_CUBE_MAP && dsa);
-
-      /* OpenGL 4.5 spec (30.10.2014) says in Section 8.7 Compressed Texture
-       * Images:
-       *    "An INVALID_OPERATION error is generated by
-       *    CompressedTex*SubImage3D if the internal format of the texture is
-       *    one of the EAC, ETC2, or RGTC formats and either border is
-       *    non-zero, or the effective target for the texture is not
-       *    TEXTURE_2D_ARRAY."
-       */
-      if (target != GL_TEXTURE_2D_ARRAY) {
-         bool invalidformat;
+      switch (target) {
+      case GL_TEXTURE_CUBE_MAP:
+         targetOK = dsa && ctx->Extensions.ARB_texture_cube_map;
+         break;
+      case GL_TEXTURE_2D_ARRAY:
+         targetOK = _mesa_is_gles3(ctx) ||
+            (_mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array);
+         break;
+      case GL_TEXTURE_CUBE_MAP_ARRAY:
+         targetOK = ctx->Extensions.ARB_texture_cube_map_array;
+         break;
+      case GL_TEXTURE_3D:
+         targetOK = GL_TRUE;
+         /*
+          * OpenGL 4.5 spec (30.10.2014) says in Section 8.7 Compressed Texture
+          * Images:
+          *    "An INVALID_OPERATION error is generated by
+          *    CompressedTex*SubImage3D if the internal format of the texture
+          *    is one of the EAC, ETC2, or RGTC formats and either border is
+          *    non-zero, or the effective target for the texture is not
+          *    TEXTURE_2D_ARRAY."
+          *
+          * NOTE: that's probably a spec error.  It should probably say
+          *    "... or the effective target for the texture is not
+          *    TEXTURE_2D_ARRAY, TEXTURE_CUBE_MAP, nor
+          *    GL_TEXTURE_CUBE_MAP_ARRAY."
+          * since those targets are 2D images and they support all compression
+          * formats.
+          *
+          * Instead of listing all these, just list those which are allowed,
+          * which is (at this time) only bptc. Otherwise we'd say s3tc (and
+          * more) are valid here, which they are not, but of course not
+          * mentioned by core spec.
+          */
          switch (format) {
-            /* These came from _mesa_is_compressed_format in glformats.c. */
-            /* EAC formats */
-            case GL_COMPRESSED_RGBA8_ETC2_EAC:
-            case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
-            case GL_COMPRESSED_R11_EAC:
-            case GL_COMPRESSED_RG11_EAC:
-            case GL_COMPRESSED_SIGNED_R11_EAC:
-            case GL_COMPRESSED_SIGNED_RG11_EAC:
-            /* ETC2 formats */
-            case GL_COMPRESSED_RGB8_ETC2:
-            case GL_COMPRESSED_SRGB8_ETC2:
-            case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-            case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-            /* RGTC formats */
-            case GL_COMPRESSED_RED_RGTC1:
-            case GL_COMPRESSED_SIGNED_RED_RGTC1:
-            case GL_COMPRESSED_RG_RGTC2:
-            case GL_COMPRESSED_SIGNED_RG_RGTC2:
-               invalidformat = true;
-               break;
-            default:
-               invalidformat = false;
-         }
-         if (invalidformat) {
+         /* These are the only 3D compression formats supported at this time */
+         case GL_COMPRESSED_RGBA_BPTC_UNORM:
+         case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
+         case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
+         case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+            /* valid format */
+            break;
+         default:
+            /* invalid format */
             _mesa_error(ctx, GL_INVALID_OPERATION,
                         "%s(invalid target %s for format %s)", caller,
-                        _mesa_lookup_enum_by_nr(target),
-                        _mesa_lookup_enum_by_nr(format));
+                        _mesa_enum_to_string(target),
+                        _mesa_enum_to_string(format));
             return GL_TRUE;
          }
+         break;
+      default:
+         targetOK = GL_FALSE;
       }
 
       break;
@@ -4620,7 +4657,7 @@
 
    if (!targetOK) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", caller,
-                  _mesa_lookup_enum_by_nr(target));
+                  _mesa_enum_to_string(target));
       return GL_TRUE;
    }
 
@@ -4833,8 +4870,7 @@
    if (!texObj)
       return;
 
-   if (compressed_subtexture_target_check(ctx, texObj->Target, 1, format,
-                                          true,
+   if (compressed_subtexture_target_check(ctx, texObj->Target, 1, format, true,
                                           "glCompressedTextureSubImage1D")) {
       return;
    }
@@ -4911,8 +4947,7 @@
    if (!texObj)
       return;
 
-   if (compressed_subtexture_target_check(ctx, texObj->Target, 2, format,
-                                          true,
+   if (compressed_subtexture_target_check(ctx, texObj->Target, 2, format, true,
                                           "glCompressedTextureSubImage2D")) {
       return;
    }
@@ -4989,8 +5024,7 @@
    if (!texObj)
       return;
 
-   if (compressed_subtexture_target_check(ctx, texObj->Target, 3, format,
-                                          true,
+   if (compressed_subtexture_target_check(ctx, texObj->Target, 3, format, true,
                                           "glCompressedTextureSubImage3D")) {
       return;
    }
@@ -5439,7 +5473,6 @@
          return;
 
    } else {
-
       /* OpenGL 4.5 core spec (02.02.2015) says in Section 8.9 Buffer
        * Textures (PDF page 254):
        *    "If buffer is zero, then any buffer object attached to the buffer
@@ -5507,7 +5540,6 @@
          return;
 
    } else {
-
       /* OpenGL 4.5 core spec (02.02.2015) says in Section 8.9 Buffer
        * Textures (PDF page 254):
        *    "If buffer is zero, then any buffer object attached to the buffer
@@ -5537,10 +5569,13 @@
 is_renderable_texture_format(struct gl_context *ctx, GLenum internalformat)
 {
    /* Everything that is allowed for renderbuffers,
-    * except for a base format of GL_STENCIL_INDEX.
+    * except for a base format of GL_STENCIL_INDEX, unless supported.
     */
    GLenum baseFormat = _mesa_base_fbo_format(ctx, internalformat);
-   return baseFormat != 0 && baseFormat != GL_STENCIL_INDEX;
+   if (ctx->Extensions.ARB_texture_stencil8)
+      return baseFormat != 0;
+   else
+      return baseFormat != 0 && baseFormat != GL_STENCIL_INDEX;
 }
 
 
@@ -5553,26 +5588,24 @@
       return dims == 2;
    case GL_PROXY_TEXTURE_2D_MULTISAMPLE:
       return dims == 2 && !dsa;
-
    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
       return dims == 3;
    case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY:
       return dims == 3 && !dsa;
-
    default:
       return GL_FALSE;
    }
 }
 
 
-void
-_mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims,
-                                struct gl_texture_object *texObj,
-                                GLenum target, GLsizei samples,
-                                GLint internalformat, GLsizei width,
-                                GLsizei height, GLsizei depth,
-                                GLboolean fixedsamplelocations,
-                                GLboolean immutable, const char *func)
+static void
+texture_image_multisample(struct gl_context *ctx, GLuint dims,
+                          struct gl_texture_object *texObj,
+                          GLenum target, GLsizei samples,
+                          GLint internalformat, GLsizei width,
+                          GLsizei height, GLsizei depth,
+                          GLboolean fixedsamplelocations,
+                          GLboolean immutable, const char *func)
 {
    struct gl_texture_image *texImage;
    GLboolean sizeOK, dimensionsOK, samplesOK;
@@ -5580,12 +5613,17 @@
    GLenum sample_count_error;
    bool dsa = strstr(func, "ture") ? true : false;
 
-   if (!(ctx->Extensions.ARB_texture_multisample
-      && _mesa_is_desktop_gl(ctx))) {
+   if (!((ctx->Extensions.ARB_texture_multisample
+         && _mesa_is_desktop_gl(ctx))) && !_mesa_is_gles31(ctx)) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "%s(unsupported)", func);
       return;
    }
 
+   if (samples < 1) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s(samples < 1)", func);
+      return;
+   }
+
    if (!check_multisample_target(dims, target, dsa)) {
       if (dsa) {
          _mesa_error(ctx, GL_INVALID_OPERATION, "%s(target)", func);
@@ -5604,14 +5642,21 @@
    if (immutable && !_mesa_is_legal_tex_storage_format(ctx, internalformat)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
             "%s(internalformat=%s not legal for immutable-format)",
-            func, _mesa_lookup_enum_by_nr(internalformat));
+            func, _mesa_enum_to_string(internalformat));
       return;
    }
 
    if (!is_renderable_texture_format(ctx, internalformat)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-            "%s(internalformat=%s)",
-            func, _mesa_lookup_enum_by_nr(internalformat));
+      /* Page 172 of OpenGL ES 3.1 spec says:
+       *   "An INVALID_ENUM error is generated if sizedinternalformat is not
+       *   color-renderable, depth-renderable, or stencil-renderable (as
+       *   defined in section 9.4).
+       *
+       *  (Same error is also defined for desktop OpenGL for multisample
+       *  teximage/texstorage functions.)
+       */
+      _mesa_error(ctx, GL_INVALID_ENUM, "%s(internalformat=%s)", func,
+                  _mesa_enum_to_string(internalformat));
       return;
    }
 
@@ -5670,13 +5715,12 @@
    else {
       if (!dimensionsOK) {
          _mesa_error(ctx, GL_INVALID_VALUE,
-               "%s(invalid width or height)", func);
+                     "%s(invalid width or height)", func);
          return;
       }
 
       if (!sizeOK) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY,
-               "%s(texture too large)", func);
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s(texture too large)", func);
          return;
       }
 
@@ -5694,7 +5738,7 @@
 
       if (width > 0 && height > 0 && depth > 0) {
          if (!ctx->Driver.AllocTextureStorage(ctx, texObj, 1,
-                  width, height, depth)) {
+                                              width, height, depth)) {
             /* tidy up the texture image state. strictly speaking,
              * we're allowed to just leave this in whatever state we
              * like, but being tidy is good.
@@ -5727,10 +5771,10 @@
    if (!texObj)
       return;
 
-   _mesa_texture_image_multisample(ctx, 2, texObj, target, samples,
-                                   internalformat, width, height, 1,
-                                   fixedsamplelocations, GL_FALSE,
-                                   "glTexImage2DMultisample");
+   texture_image_multisample(ctx, 2, texObj, target, samples,
+                             internalformat, width, height, 1,
+                             fixedsamplelocations, GL_FALSE,
+                             "glTexImage2DMultisample");
 }
 
 
@@ -5747,12 +5791,26 @@
    if (!texObj)
       return;
 
-   _mesa_texture_image_multisample(ctx, 3, texObj, target, samples,
-                                   internalformat, width, height, depth,
-                                   fixedsamplelocations, GL_FALSE,
-                                   "glTexImage3DMultisample");
+   texture_image_multisample(ctx, 3, texObj, target, samples,
+                             internalformat, width, height, depth,
+                             fixedsamplelocations, GL_FALSE,
+                             "glTexImage3DMultisample");
 }
 
+static bool
+valid_texstorage_ms_parameters(GLsizei width, GLsizei height, GLsizei depth,
+                               GLsizei samples, unsigned dims)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (!_mesa_valid_tex_storage_dim(width, height, depth)) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glTexStorage%uDMultisample(width=%d,height=%d,depth=%d)",
+                  dims, width, height, depth);
+      return false;
+   }
+   return true;
+}
 
 void GLAPIENTRY
 _mesa_TexStorage2DMultisample(GLenum target, GLsizei samples,
@@ -5766,10 +5824,13 @@
    if (!texObj)
       return;
 
-   _mesa_texture_image_multisample(ctx, 2, texObj, target, samples,
-                                   internalformat, width, height, 1,
-                                   fixedsamplelocations, GL_TRUE,
-                                   "glTexStorage2DMultisample");
+   if (!valid_texstorage_ms_parameters(width, height, 1, samples, 2))
+      return;
+
+   texture_image_multisample(ctx, 2, texObj, target, samples,
+                             internalformat, width, height, 1,
+                             fixedsamplelocations, GL_TRUE,
+                             "glTexStorage2DMultisample");
 }
 
 void GLAPIENTRY
@@ -5785,10 +5846,13 @@
    if (!texObj)
       return;
 
-   _mesa_texture_image_multisample(ctx, 3, texObj, target, samples,
-                                   internalformat, width, height, depth,
-                                   fixedsamplelocations, GL_TRUE,
-                                   "glTexStorage3DMultisample");
+   if (!valid_texstorage_ms_parameters(width, height, depth, samples, 3))
+      return;
+
+   texture_image_multisample(ctx, 3, texObj, target, samples,
+                             internalformat, width, height, depth,
+                             fixedsamplelocations, GL_TRUE,
+                             "glTexStorage3DMultisample");
 }
 
 void GLAPIENTRY
@@ -5805,10 +5869,13 @@
    if (!texObj)
       return;
 
-   _mesa_texture_image_multisample(ctx, 2, texObj, texObj->Target, samples,
-                                   internalformat, width, height, 1,
-                                   fixedsamplelocations, GL_TRUE,
-                                   "glTextureStorage2DMultisample");
+   if (!valid_texstorage_ms_parameters(width, height, 1, samples, 2))
+      return;
+
+   texture_image_multisample(ctx, 2, texObj, texObj->Target, samples,
+                             internalformat, width, height, 1,
+                             fixedsamplelocations, GL_TRUE,
+                             "glTextureStorage2DMultisample");
 }
 
 void GLAPIENTRY
@@ -5826,8 +5893,11 @@
    if (!texObj)
       return;
 
-   _mesa_texture_image_multisample(ctx, 3, texObj, texObj->Target, samples,
-                                   internalformat, width, height, depth,
-                                   fixedsamplelocations, GL_TRUE,
-                                   "glTextureStorage3DMultisample");
+   if (!valid_texstorage_ms_parameters(width, height, depth, samples, 3))
+      return;
+
+   texture_image_multisample(ctx, 3, texObj, texObj->Target, samples,
+                             internalformat, width, height, depth,
+                             fixedsamplelocations, GL_TRUE,
+                             "glTextureStorage3DMultisample");
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/teximage.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/teximage.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/teximage.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/teximage.h	2015-09-16 14:36:10.000000000 +0000
@@ -200,15 +200,6 @@
                              const char *caller);
 
 extern void
-_mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims,
-                                struct gl_texture_object *texObj,
-                                GLenum target, GLsizei samples,
-                                GLint internalformat, GLsizei width,
-                                GLsizei height, GLsizei depth,
-                                GLboolean fixedsamplelocations,
-                                GLboolean immutable, const char *func);
-
-extern void
 _mesa_texture_buffer_range(struct gl_context *ctx,
                            struct gl_texture_object *texObj,
                            GLenum internalFormat,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texobj.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texobj.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texobj.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texobj.c	2015-09-16 14:36:10.000000000 +0000
@@ -37,6 +37,7 @@
 #include "hash.h"
 #include "imports.h"
 #include "macros.h"
+#include "shaderimage.h"
 #include "teximage.h"
 #include "texobj.h"
 #include "texstate.h"
@@ -1255,7 +1256,7 @@
          if (targetIndex < 0) { /* Bad Target */
             mtx_unlock(&ctx->Shared->Mutex);
             _mesa_error(ctx, GL_INVALID_ENUM, "gl%sTextures(target = %s)",
-                        func, _mesa_lookup_enum_by_nr(texObj->Target));
+                        func, _mesa_enum_to_string(texObj->Target));
             return;
          }
          assert(targetIndex < NUM_TEXTURE_TARGETS);
@@ -1411,8 +1412,10 @@
    for (i = 0; i < ctx->Const.MaxImageUnits; i++) {
       struct gl_image_unit *unit = &ctx->ImageUnits[i];
 
-      if (texObj == unit->TexObj)
+      if (texObj == unit->TexObj) {
          _mesa_reference_texobj(&unit->TexObj, NULL);
+         *unit = _mesa_default_image_unit(ctx);
+      }
    }
 }
 
@@ -1606,8 +1609,8 @@
       return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_cube_map_array
          ? TEXTURE_CUBE_ARRAY_INDEX : -1;
    case GL_TEXTURE_2D_MULTISAMPLE:
-      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
-         ? TEXTURE_2D_MULTISAMPLE_INDEX: -1;
+      return ((_mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample) ||
+              _mesa_is_gles31(ctx)) ? TEXTURE_2D_MULTISAMPLE_INDEX: -1;
    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
       return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
          ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: -1;
@@ -1642,7 +1645,7 @@
 
    if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
       _mesa_debug(ctx, "glBindTexture %s %d\n",
-                  _mesa_lookup_enum_by_nr(target), (GLint) texName);
+                  _mesa_enum_to_string(target), (GLint) texName);
 
    targetIndex = _mesa_tex_target_to_index(ctx, target);
    if (targetIndex < 0) {
@@ -1806,7 +1809,7 @@
 
    if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
       _mesa_debug(ctx, "glBindTextureUnit %s %d\n",
-                  _mesa_lookup_enum_by_nr(GL_TEXTURE0+unit), (GLint) texture);
+                  _mesa_enum_to_string(GL_TEXTURE0+unit), (GLint) texture);
 
    /* Section 8.1 (Texture Objects) of the OpenGL 4.5 core profile spec
     * (20141030) says:
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texparam.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texparam.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texparam.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texparam.c	2015-09-16 14:36:10.000000000 +0000
@@ -381,7 +381,7 @@
       if (texObj->Target == GL_TEXTURE_RECTANGLE_ARB && params[0] != 0) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
                      "glTex%sParameter(target=%s, param=%d)", suffix,
-                     _mesa_lookup_enum_by_nr(texObj->Target), params[0]);
+                     _mesa_enum_to_string(texObj->Target), params[0]);
          return GL_FALSE;
       }
       incomplete(ctx, texObj);
@@ -500,7 +500,9 @@
       goto invalid_pname;
 
    case GL_DEPTH_STENCIL_TEXTURE_MODE:
-      if (_mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_stencil_texturing) {
+      if ((_mesa_is_desktop_gl(ctx) &&
+           ctx->Extensions.ARB_stencil_texturing) ||
+          _mesa_is_gles31(ctx)) {
          bool stencil = params[0] == GL_STENCIL_INDEX;
          if (!stencil && params[0] != GL_DEPTH_COMPONENT)
             goto invalid_param;
@@ -610,22 +612,22 @@
 
 invalid_pname:
    _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)",
-               suffix, _mesa_lookup_enum_by_nr(pname));
+               suffix, _mesa_enum_to_string(pname));
    return GL_FALSE;
 
 invalid_param:
    _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(param=%s)",
-               suffix, _mesa_lookup_enum_by_nr(params[0]));
+               suffix, _mesa_enum_to_string(params[0]));
    return GL_FALSE;
 
 invalid_operation:
    _mesa_error(ctx, GL_INVALID_OPERATION, "glTex%sParameter(pname=%s)",
-               suffix, _mesa_lookup_enum_by_nr(pname));
+               suffix, _mesa_enum_to_string(pname));
    return GL_FALSE;
 
 invalid_enum:
    _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)",
-               suffix, _mesa_lookup_enum_by_nr(pname));
+               suffix, _mesa_enum_to_string(pname));
    return GL_FALSE;
 }
 
@@ -683,7 +685,7 @@
 
          if (texObj->Sampler.MaxAnisotropy == params[0])
             return GL_FALSE;
-         if (params[0] < 1.0) {
+         if (params[0] < 1.0F) {
             _mesa_error(ctx, GL_INVALID_VALUE, "glTex%sParameter(param)",
                         suffix);
             return GL_FALSE;
@@ -745,12 +747,12 @@
 
 invalid_pname:
    _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)",
-               suffix, _mesa_lookup_enum_by_nr(pname));
+               suffix, _mesa_enum_to_string(pname));
    return GL_FALSE;
 
 invalid_enum:
    _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)",
-               suffix, _mesa_lookup_enum_by_nr(pname));
+               suffix, _mesa_enum_to_string(pname));
    return GL_FALSE;
 }
 
@@ -1395,7 +1397,7 @@
     else {
        _mesa_error(ctx, GL_INVALID_OPERATION,
                    "glGetTex%sLevelParameter[if]v(pname=%s)", suffix,
-                   _mesa_lookup_enum_by_nr(pname));
+                   _mesa_enum_to_string(pname));
     }
          break;
       case GL_TEXTURE_COMPRESSED:
@@ -1444,7 +1446,7 @@
 invalid_pname:
    _mesa_error(ctx, GL_INVALID_ENUM,
                "glGetTex%sLevelParameter[if]v(pname=%s)", suffix,
-               _mesa_lookup_enum_by_nr(pname));
+               _mesa_enum_to_string(pname));
 }
 
 
@@ -1528,7 +1530,7 @@
          /* Always illegal for GL_TEXTURE_BUFFER */
          _mesa_error(ctx, GL_INVALID_OPERATION,
                      "glGetTex%sLevelParameter[if]v(pname=%s)", suffix,
-                     _mesa_lookup_enum_by_nr(pname));
+                     _mesa_enum_to_string(pname));
          break;
 
       /* GL_ARB_texture_float */
@@ -1557,9 +1559,22 @@
 invalid_pname:
    _mesa_error(ctx, GL_INVALID_ENUM,
                "glGetTex%sLevelParameter[if]v(pname=%s)", suffix,
-               _mesa_lookup_enum_by_nr(pname));
+               _mesa_enum_to_string(pname));
 }
 
+static bool
+valid_tex_level_parameteriv_target(struct gl_context *ctx, GLenum target,
+                                   bool dsa)
+{
+   const char *suffix = dsa ? "ture" : "";
+   if (!legal_get_tex_level_parameter_target(ctx, target, dsa)) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetTex%sLevelParameter[if]v(target=%s)", suffix,
+                  _mesa_enum_to_string(target));
+      return false;
+   }
+   return true;
+}
 
 /**
  * This isn't exposed to the rest of the driver because it is a part of the
@@ -1583,13 +1598,6 @@
       return;
    }
 
-   if (!legal_get_tex_level_parameter_target(ctx, target, dsa)) {
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glGetTex%sLevelParameter[if]v(target=%s)", suffix,
-                  _mesa_lookup_enum_by_nr(target));
-      return;
-   }
-
    maxLevels = _mesa_max_texture_levels(ctx, target);
    assert(maxLevels != 0);
 
@@ -1617,6 +1625,9 @@
    GLint iparam;
    GET_CURRENT_CONTEXT(ctx);
 
+   if (!valid_tex_level_parameteriv_target(ctx, target, false))
+      return;
+
    texObj = _mesa_get_current_tex_object(ctx, target);
    if (!texObj)
       return;
@@ -1634,6 +1645,9 @@
    struct gl_texture_object *texObj;
    GET_CURRENT_CONTEXT(ctx);
 
+   if (!valid_tex_level_parameteriv_target(ctx, target, false))
+      return;
+
    texObj = _mesa_get_current_tex_object(ctx, target);
    if (!texObj)
       return;
@@ -1655,6 +1669,9 @@
    if (!texObj)
       return;
 
+   if (!valid_tex_level_parameteriv_target(ctx, texObj->Target, true))
+      return;
+
    get_tex_level_parameteriv(ctx, texObj, texObj->Target, level,
                              pname, &iparam, true);
 
@@ -1673,6 +1690,9 @@
    if (!texObj)
       return;
 
+   if (!valid_tex_level_parameteriv_target(ctx, texObj->Target, true))
+      return;
+
    get_tex_level_parameteriv(ctx, texObj, texObj->Target, level,
                              pname, params, true);
 }
@@ -1888,6 +1908,18 @@
          *params = (GLfloat) obj->Sampler.sRGBDecode;
          break;
 
+      case GL_IMAGE_FORMAT_COMPATIBILITY_TYPE:
+         if (!ctx->Extensions.ARB_shader_image_load_store)
+            goto invalid_pname;
+         *params = (GLfloat) obj->ImageFormatCompatibilityType;
+         break;
+
+      case GL_TEXTURE_TARGET:
+         if (ctx->API != API_OPENGL_CORE)
+            goto invalid_pname;
+         *params = ENUM_TO_FLOAT(obj->Target);
+         break;
+
       default:
          goto invalid_pname;
    }
@@ -2113,6 +2145,12 @@
          *params = obj->ImageFormatCompatibilityType;
          break;
 
+      case GL_TEXTURE_TARGET:
+         if (ctx->API != API_OPENGL_CORE)
+            goto invalid_pname;
+         *params = (GLint) obj->Target;
+         break;
+
       default:
          goto invalid_pname;
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texstate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texstate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texstate.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texstate.c	2015-09-16 14:36:10.000000000 +0000
@@ -123,21 +123,21 @@
 {
    const struct gl_texture_unit *texUnit = ctx->Texture.Unit + unit;
    printf("Texture Unit %d\n", unit);
-   printf("  GL_TEXTURE_ENV_MODE = %s\n", _mesa_lookup_enum_by_nr(texUnit->EnvMode));
-   printf("  GL_COMBINE_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.ModeRGB));
-   printf("  GL_COMBINE_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.ModeA));
-   printf("  GL_SOURCE0_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceRGB[0]));
-   printf("  GL_SOURCE1_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceRGB[1]));
-   printf("  GL_SOURCE2_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceRGB[2]));
-   printf("  GL_SOURCE0_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceA[0]));
-   printf("  GL_SOURCE1_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceA[1]));
-   printf("  GL_SOURCE2_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceA[2]));
-   printf("  GL_OPERAND0_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandRGB[0]));
-   printf("  GL_OPERAND1_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandRGB[1]));
-   printf("  GL_OPERAND2_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandRGB[2]));
-   printf("  GL_OPERAND0_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandA[0]));
-   printf("  GL_OPERAND1_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandA[1]));
-   printf("  GL_OPERAND2_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandA[2]));
+   printf("  GL_TEXTURE_ENV_MODE = %s\n", _mesa_enum_to_string(texUnit->EnvMode));
+   printf("  GL_COMBINE_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.ModeRGB));
+   printf("  GL_COMBINE_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.ModeA));
+   printf("  GL_SOURCE0_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceRGB[0]));
+   printf("  GL_SOURCE1_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceRGB[1]));
+   printf("  GL_SOURCE2_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceRGB[2]));
+   printf("  GL_SOURCE0_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceA[0]));
+   printf("  GL_SOURCE1_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceA[1]));
+   printf("  GL_SOURCE2_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceA[2]));
+   printf("  GL_OPERAND0_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandRGB[0]));
+   printf("  GL_OPERAND1_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandRGB[1]));
+   printf("  GL_OPERAND2_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandRGB[2]));
+   printf("  GL_OPERAND0_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandA[0]));
+   printf("  GL_OPERAND1_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandA[1]));
+   printf("  GL_OPERAND2_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandA[2]));
    printf("  GL_RGB_SCALE = %d\n", 1 << texUnit->Combine.ScaleShiftRGB);
    printf("  GL_ALPHA_SCALE = %d\n", 1 << texUnit->Combine.ScaleShiftA);
    printf("  GL_TEXTURE_ENV_COLOR = (%f, %f, %f, %f)\n", texUnit->EnvColor[0], texUnit->EnvColor[1], texUnit->EnvColor[2], texUnit->EnvColor[3]);
@@ -289,23 +289,23 @@
    GLuint k;
    GET_CURRENT_CONTEXT(ctx);
 
+   if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
+      _mesa_debug(ctx, "glActiveTexture %s\n",
+                  _mesa_enum_to_string(texture));
+
+   if (ctx->Texture.CurrentUnit == texUnit)
+      return;
+
    k = _mesa_max_tex_unit(ctx);
 
    assert(k <= ARRAY_SIZE(ctx->Texture.Unit));
 
-   if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
-      _mesa_debug(ctx, "glActiveTexture %s\n",
-                  _mesa_lookup_enum_by_nr(texture));
-
    if (texUnit >= k) {
       _mesa_error(ctx, GL_INVALID_ENUM, "glActiveTexture(texture=%s)",
-                  _mesa_lookup_enum_by_nr(texture));
+                  _mesa_enum_to_string(texture));
       return;
    }
 
-   if (ctx->Texture.CurrentUnit == texUnit)
-      return;
-
    FLUSH_VERTICES(ctx, _NEW_TEXTURE);
 
    ctx->Texture.CurrentUnit = texUnit;
@@ -325,16 +325,16 @@
 
    if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE))
       _mesa_debug(ctx, "glClientActiveTexture %s\n",
-                  _mesa_lookup_enum_by_nr(texture));
+                  _mesa_enum_to_string(texture));
+
+   if (ctx->Array.ActiveTexture == texUnit)
+      return;
 
    if (texUnit >= ctx->Const.MaxTextureCoordUnits) {
       _mesa_error(ctx, GL_INVALID_ENUM, "glClientActiveTexture(texture)");
       return;
    }
 
-   if (ctx->Array.ActiveTexture == texUnit)
-      return;
-
    FLUSH_VERTICES(ctx, _NEW_ARRAY);
    ctx->Array.ActiveTexture = texUnit;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texstate.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texstate.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texstate.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texstate.h	2015-09-16 14:36:10.000000000 +0000
@@ -77,7 +77,7 @@
     *     implementation."
     */
    _mesa_error(ctx, GL_INVALID_OPERATION, "%s(unit=%s)", func,
-               _mesa_lookup_enum_by_nr(GL_TEXTURE0+unit));
+               _mesa_enum_to_string(GL_TEXTURE0+unit));
    return NULL;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texstorage.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texstorage.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texstorage.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texstorage.c	2015-09-16 14:36:10.000000000 +0000
@@ -189,6 +189,20 @@
 }
 
 
+/**
+ * Update/re-validate framebuffer object.
+ */
+static void
+update_fbo_texture(struct gl_context *ctx, struct gl_texture_object *texObj)
+{
+   const unsigned numFaces = _mesa_num_tex_faces(texObj->Target);
+   for (int level = 0; level < ARRAY_SIZE(texObj->Image[0]); level++) {
+      for (unsigned face = 0; face < numFaces; face++)
+         _mesa_update_fbo_texture(ctx, texObj, face, level);
+   }
+}
+
+
 GLboolean
 _mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat)
 {
@@ -287,7 +301,7 @@
     * order to allow meta functions to use legacy formats. */
 
    /* size check */
-   if (width < 1 || height < 1 || depth < 1) {
+   if (!_mesa_valid_tex_storage_dim(width, height, depth)) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glTex%sStorage%uD(width, height or depth < 1)",
                   suffix, dims);
@@ -308,7 +322,8 @@
       _mesa_error(ctx, _mesa_is_desktop_gl(ctx)?
                   GL_INVALID_ENUM : GL_INVALID_OPERATION,
                   "glTex%sStorage%dD(internalformat = %s)", suffix, dims,
-                  _mesa_lookup_enum_by_nr(internalformat));
+                  _mesa_enum_to_string(internalformat));
+      return GL_TRUE;
    }
 
    /* levels check */
@@ -445,6 +460,7 @@
 
       _mesa_set_texture_view_state(ctx, texObj, target, levels);
 
+      update_fbo_texture(ctx, texObj);
    }
 }
 
@@ -464,21 +480,21 @@
    if (!legal_texobj_target(ctx, dims, target)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glTexStorage%uD(illegal target=%s)",
-                  dims, _mesa_lookup_enum_by_nr(target));
+                  dims, _mesa_enum_to_string(target));
       return;
    }
 
    if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
       _mesa_debug(ctx, "glTexStorage%uD %s %d %s %d %d %d\n",
                   dims,
-                  _mesa_lookup_enum_by_nr(target), levels,
-                  _mesa_lookup_enum_by_nr(internalformat),
+                  _mesa_enum_to_string(target), levels,
+                  _mesa_enum_to_string(internalformat),
                   width, height, depth);
    /* Check the format to make sure it is sized. */
    if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glTexStorage%uD(internalformat = %s)", dims,
-                  _mesa_lookup_enum_by_nr(internalformat));
+                  _mesa_enum_to_string(internalformat));
       return;
    }
 
@@ -504,14 +520,14 @@
    if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
       _mesa_debug(ctx, "glTextureStorage%uD %d %d %s %d %d %d\n",
                   dims, texture, levels,
-                  _mesa_lookup_enum_by_nr(internalformat),
+                  _mesa_enum_to_string(internalformat),
                   width, height, depth);
 
    /* Check the format to make sure it is sized. */
    if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glTextureStorage%uD(internalformat = %s)", dims,
-                  _mesa_lookup_enum_by_nr(internalformat));
+                  _mesa_enum_to_string(internalformat));
       return;
    }
 
@@ -529,7 +545,7 @@
    if (!legal_texobj_target(ctx, dims, texObj->Target)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glTextureStorage%uD(illegal target=%s)",
-                  dims, _mesa_lookup_enum_by_nr(texObj->Target));
+                  dims, _mesa_enum_to_string(texObj->Target));
       return;
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texstorage.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texstorage.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texstorage.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texstorage.h	2015-09-16 14:36:10.000000000 +0000
@@ -38,6 +38,27 @@
                       GLenum internalformat, GLsizei width,
                       GLsizei height, GLsizei depth, bool dsa);
 
+/**
+ * Texture width, height and depth check shared with the
+ * multisample variants of TexStorage functions.
+ *
+ * From OpenGL 4.5 Core spec, page 260 (section 8.19)
+ *
+ *     "An INVALID_VALUE error is generated if width, height, depth
+ *     or levels are less than 1, for commands with the corresponding
+ *     parameters."
+ *
+ * (referring to TextureStorage* commands, these also match values
+ * specified for OpenGL ES 3.1.)
+ */
+static inline bool
+_mesa_valid_tex_storage_dim(GLsizei width, GLsizei height, GLsizei depth)
+{
+   if (width < 1 || height < 1 || depth < 1)
+      return false;
+   return true;
+}
+
 /*@}*/
 
 /**
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texstore.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texstore.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/texstore.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/texstore.c	2015-09-16 14:36:10.000000000 +0000
@@ -727,19 +727,25 @@
        */
       GLint swapSize = _mesa_sizeof_packed_type(srcType);
       if (swapSize == 2 || swapSize == 4) {
-         int bytesPerPixel = _mesa_bytes_per_pixel(srcFormat, srcType);
-         int swapsPerPixel = bytesPerPixel / swapSize;
-         int elementCount = srcWidth * srcHeight * srcDepth;
-         assert(bytesPerPixel % swapSize == 0);
-         tempImage = malloc(elementCount * bytesPerPixel);
+         int imageStride = _mesa_image_image_stride(srcPacking, srcWidth, srcHeight, srcFormat, srcType);
+         int bufferSize = imageStride * srcDepth;
+         int layer;
+         const uint8_t *src;
+         uint8_t *dst;
+
+         tempImage = malloc(bufferSize);
          if (!tempImage)
             return GL_FALSE;
-         if (swapSize == 2)
-            _mesa_swap2_copy(tempImage, (GLushort *) srcAddr,
-                             elementCount * swapsPerPixel);
-         else
-            _mesa_swap4_copy(tempImage, (GLuint *) srcAddr,
-                             elementCount * swapsPerPixel);
+         src = srcAddr;
+         dst = tempImage;
+         for (layer = 0; layer < srcDepth; layer++) {
+            _mesa_swap_bytes_2d_image(srcFormat, srcType,
+                                      srcPacking,
+                                      srcWidth, srcHeight,
+                                      dst, src);
+            src += imageStride;
+            dst += imageStride;
+         }
          srcAddr = tempImage;
       }
    }
@@ -1004,6 +1010,7 @@
    /* compute slice info (and do some sanity checks) */
    switch (target) {
    case GL_TEXTURE_2D:
+   case GL_TEXTURE_2D_MULTISAMPLE:
    case GL_TEXTURE_RECTANGLE:
    case GL_TEXTURE_CUBE_MAP:
    case GL_TEXTURE_EXTERNAL_OES:
@@ -1025,6 +1032,7 @@
       srcImageStride = _mesa_image_row_stride(packing, width, format, type);
       break;
    case GL_TEXTURE_2D_ARRAY:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
       numSlices = depth;
       sliceOffset = zoffset;
       depth = 1;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/textureview.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/textureview.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/textureview.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/textureview.c	2015-09-16 14:36:10.000000000 +0000
@@ -167,7 +167,7 @@
  * \return VIEW_CLASS if internalformat found in table, false otherwise.
  */
 static GLenum
-lookup_view_class(struct gl_context *ctx, GLenum internalformat)
+lookup_view_class(const struct gl_context *ctx, GLenum internalformat)
 {
    GLuint i;
 
@@ -176,9 +176,11 @@
          return compatible_internal_formats[i].view_class;
    }
 
-   if (ctx->Extensions.EXT_texture_compression_s3tc && ctx->Extensions.EXT_texture_sRGB) {
+   if (ctx->Extensions.EXT_texture_compression_s3tc &&
+       ctx->Extensions.EXT_texture_sRGB) {
       for (i = 0; i < ARRAY_SIZE(s3tc_compatible_internal_formats); i++) {
-         if (s3tc_compatible_internal_formats[i].internal_format == internalformat)
+         if (s3tc_compatible_internal_formats[i].internal_format
+             == internalformat)
             return s3tc_compatible_internal_formats[i].view_class;
       }
    }
@@ -226,7 +228,8 @@
                                     0, internalFormat, texFormat);
       }
 
-      _mesa_next_mipmap_level_size(target, 0, levelWidth, levelHeight, levelDepth,
+      _mesa_next_mipmap_level_size(target, 0,
+                                   levelWidth, levelHeight, levelDepth,
                                    &levelWidth, &levelHeight, &levelDepth);
    }
 
@@ -310,7 +313,7 @@
    }
    _mesa_error(ctx, GL_INVALID_OPERATION,
                "glTextureView(illegal target=%s)",
-               _mesa_lookup_enum_by_nr(newTarget));
+               _mesa_enum_to_string(newTarget));
    return false;
 }
 #undef RETURN_IF_SUPPORTED
@@ -320,8 +323,8 @@
  * If an error is found, record it with _mesa_error()
  * \return false if any error, true otherwise.
  */
-GLboolean
-_mesa_texture_view_compatible_format(struct gl_context *ctx,
+bool
+_mesa_texture_view_compatible_format(const struct gl_context *ctx,
                                      GLenum origInternalFormat,
                                      GLenum newInternalFormat)
 {
@@ -334,15 +337,16 @@
     * or an INVALID_OPERATION error is generated.
     */
    if (origInternalFormat == newInternalFormat)
-      return GL_TRUE;
+      return true;
 
    origViewClass = lookup_view_class(ctx, origInternalFormat);
    newViewClass = lookup_view_class(ctx, newInternalFormat);
    if ((origViewClass == newViewClass) && origViewClass != false)
-      return GL_TRUE;
+      return true;
 
-   return GL_FALSE;
+   return false;
 }
+
 /**
  * Helper function for TexStorage and teximagemultisample to set immutable
  * texture state needed by ARB_texture_view.
@@ -357,17 +361,19 @@
    /* Get a reference to what will become this View's base level */
    texImage = _mesa_select_tex_image(texObj, target, 0);
 
-   /* When an immutable texture is created via glTexStorage or glTexImageMultisample,
+   /* When an immutable texture is created via glTexStorage or
+    * glTexImageMultisample,
     * TEXTURE_IMMUTABLE_FORMAT becomes TRUE.
     * TEXTURE_IMMUTABLE_LEVELS and TEXTURE_VIEW_NUM_LEVELS become levels.
     * If the texture target is TEXTURE_1D_ARRAY then
     * TEXTURE_VIEW_NUM_LAYERS becomes height.
     * If the texture target is TEXTURE_2D_ARRAY, TEXTURE_CUBE_MAP_ARRAY,
-    * or TEXTURE_2D_MULTISAMPLE_ARRAY then TEXTURE_VIEW_NUM_LAYERS becomes depth.
+    * or TEXTURE_2D_MULTISAMPLE_ARRAY then TEXTURE_VIEW_NUM_LAYERS becomes
+    * depth.
     * If the texture target is TEXTURE_CUBE_MAP, then
     * TEXTURE_VIEW_NUM_LAYERS becomes 6.
     * For any other texture target, TEXTURE_VIEW_NUM_LAYERS becomes 1.
-    * 
+    *
     * ARB_texture_multisample: Multisample textures do
     * not have multiple image levels.
     */
@@ -401,7 +407,6 @@
    case GL_TEXTURE_CUBE_MAP:
       texObj->NumLayers = 6;
       break;
-
    }
 }
 
@@ -430,21 +435,25 @@
 
    if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE))
       _mesa_debug(ctx, "glTextureView %d %s %d %s %d %d %d %d\n",
-                  texture, _mesa_lookup_enum_by_nr(target), origtexture,
-                  _mesa_lookup_enum_by_nr(internalformat),
+                  texture, _mesa_enum_to_string(target), origtexture,
+                  _mesa_enum_to_string(internalformat),
                   minlevel, numlevels, minlayer, numlayers);
 
    if (origtexture == 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(origtexture = %u)", origtexture);
+      _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(origtexture = %u)",
+                  origtexture);
       return;
    }
 
    /* Need original texture information to validate arguments */
    origTexObj = _mesa_lookup_texture(ctx, origtexture);
 
-   /* If <origtexture> is not the name of a texture, INVALID_VALUE is generated. */
+   /* If <origtexture> is not the name of a texture, INVALID_VALUE
+    * is generated.
+    */
    if (!origTexObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(origtexture = %u)", origtexture);
+      _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(origtexture = %u)",
+                  origtexture);
       return;
    }
 
@@ -452,7 +461,8 @@
     * INVALID_OPERATION is generated.
     */
    if (!origTexObj->Immutable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(origtexture not immutable)");
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glTextureView(origtexture not immutable)");
       return;
    }
 
@@ -467,7 +477,8 @@
     */
    texObj = _mesa_lookup_texture(ctx, texture);
    if (texObj == NULL) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(texture = %u non-gen name)", texture);
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glTextureView(texture = %u non-gen name)", texture);
       return;
    }
 
@@ -475,7 +486,8 @@
     * the error INVALID_OPERATION is generated.
     */
    if (texObj->Target) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(texture = %u already bound)", texture);
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glTextureView(texture = %u already bound)", texture);
       return;
    }
 
@@ -484,33 +496,35 @@
       return; /* error was recorded */
    }
 
-   /* minlevel and minlayer are relative to the view of origtexture
+   /* minlevel and minlayer are relative to the view of origtexture.
     * If minlevel or minlayer is greater than level or layer, respectively,
-    * of origtexture return INVALID_VALUE.
+    * return INVALID_VALUE.
     */
    newViewMinLevel = origTexObj->MinLevel + minlevel;
    newViewMinLayer = origTexObj->MinLayer + minlayer;
    if (newViewMinLevel >= (origTexObj->MinLevel + origTexObj->NumLevels)) {
       _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glTextureView(new minlevel (%d) > orig minlevel (%d) + orig numlevels (%d))",
+                  "glTextureView(new minlevel (%d) > orig minlevel (%d)"
+                  " + orig numlevels (%d))",
                   newViewMinLevel, origTexObj->MinLevel, origTexObj->NumLevels);
       return;
    }
 
    if (newViewMinLayer >= (origTexObj->MinLayer + origTexObj->NumLayers)) {
       _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glTextureView(new minlayer (%d) > orig minlayer (%d) + orig numlayers (%d))",
+                  "glTextureView(new minlayer (%d) > orig minlayer (%d)"
+                  " + orig numlayers (%d))",
                   newViewMinLayer, origTexObj->MinLayer, origTexObj->NumLayers);
       return;
    }
 
    if (!_mesa_texture_view_compatible_format(ctx,
-                                             origTexObj->Image[0][0]->InternalFormat,
-                                             internalformat)) {
+                                   origTexObj->Image[0][0]->InternalFormat,
+                                   internalformat)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glTextureView(internalformat %s not compatible with origtexture %s)",
-                  _mesa_lookup_enum_by_nr(internalformat),
-                  _mesa_lookup_enum_by_nr(origTexObj->Image[0][0]->InternalFormat));
+          "glTextureView(internalformat %s not compatible with origtexture %s)",
+          _mesa_enum_to_string(internalformat),
+          _mesa_enum_to_string(origTexObj->Image[0][0]->InternalFormat));
       return;
    }
 
@@ -569,14 +583,16 @@
    dimensionsOK = _mesa_legal_texture_dimensions(ctx, target, 0,
                                                  width, height, depth, 0);
    if (!dimensionsOK) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(invalid width or height or depth)");
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glTextureView(invalid width or height or depth)");
       return;
    }
 
    sizeOK = ctx->Driver.TestProxyTexImage(ctx, target, 0, texFormat,
                                           width, height, depth, 0);
    if (!sizeOK) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(invalid texture size)");
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glTextureView(invalid texture size)");
       return;
    }
 
@@ -591,17 +607,19 @@
    case GL_TEXTURE_RECTANGLE:
    case GL_TEXTURE_2D_MULTISAMPLE:
       if (numlayers != 1) {
-         _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(numlayers %d != 1)", numlayers);
+         _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(numlayers %d != 1)",
+                     numlayers);
          return;
       }
       break;
 
    case GL_TEXTURE_CUBE_MAP:
-      /* If the new texture's target is TEXTURE_CUBE_MAP, the clamped <numlayers>
-       * must be equal to 6.
+      /* If the new texture's target is TEXTURE_CUBE_MAP, the clamped
+       * <numlayers> must be equal to 6.
        */
       if (newViewNumLayers != 6) {
-         _mesa_error(ctx, GL_INVALID_VALUE, "glTextureView(clamped numlayers %d != 6)",
+         _mesa_error(ctx, GL_INVALID_VALUE,
+                     "glTextureView(clamped numlayers %d != 6)",
                      newViewNumLayers);
          return;
       }
@@ -615,7 +633,8 @@
        */
       if ((newViewNumLayers % 6) != 0) {
          _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glTextureView(clamped numlayers %d is not a multiple of 6)",
+                     "glTextureView(clamped numlayers %d is not"
+                     " a multiple of 6)",
                      newViewNumLayers);
          return;
       }
@@ -628,7 +647,8 @@
     */
    if ((target == GL_TEXTURE_CUBE_MAP || target == GL_TEXTURE_CUBE_MAP_ARRAY) &&
        (origTexImage->Width != origTexImage->Height)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureView(origtexture width (%d) != height (%d))",
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glTextureView(origtexture width (%d) != height (%d))",
                   origTexImage->Width, origTexImage->Height);
       return;
    }
@@ -662,7 +682,8 @@
    texObj->ImmutableLevels = origTexObj->ImmutableLevels;
    texObj->Target = target;
 
-   if (ctx->Driver.TextureView != NULL && !ctx->Driver.TextureView(ctx, texObj, origTexObj)) {
+   if (ctx->Driver.TextureView != NULL &&
+       !ctx->Driver.TextureView(ctx, texObj, origTexObj)) {
       return; /* driver recorded error */
    }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/textureview.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/textureview.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/textureview.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/textureview.h	2015-09-16 14:36:10.000000000 +0000
@@ -29,8 +29,8 @@
 #ifndef TEXTUREVIEW_H
 #define TEXTUREVIEW_H
 
-GLboolean
-_mesa_texture_view_compatible_format(struct gl_context *ctx,
+bool
+_mesa_texture_view_compatible_format(const struct gl_context *ctx,
                                      GLenum origInternalFormat,
                                      GLenum newInternalFormat);
 
@@ -41,7 +41,8 @@
                   GLuint minlayer, GLuint numlayers);
 
 extern void
-_mesa_set_texture_view_state(struct gl_context *ctx, struct gl_texture_object *texObj,
-                       GLenum target, GLuint levels);
+_mesa_set_texture_view_state(struct gl_context *ctx,
+                             struct gl_texture_object *texObj,
+                             GLenum target, GLuint levels);
 
 #endif /* TEXTUREVIEW_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/uniform_query.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/uniform_query.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/uniform_query.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/uniform_query.cpp	2015-09-16 14:36:10.000000000 +0000
@@ -237,6 +237,13 @@
 
    struct gl_uniform_storage *const uni = shProg->UniformRemapTable[location];
 
+   /* Even though no location is assigned to a built-in uniform and this
+    * function should already have returned NULL, this test makes it explicit
+    * that we are not allowing to update the value of a built-in.
+    */
+   if (uni->builtin)
+      return NULL;
+
    if (uni->array_elements == 0) {
       if (count > 1) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
@@ -312,24 +319,31 @@
 
       return;
    }
+   if ((uni->type->base_type == GLSL_TYPE_DOUBLE &&
+        returnType != GLSL_TYPE_DOUBLE) ||
+       (uni->type->base_type != GLSL_TYPE_DOUBLE &&
+        returnType == GLSL_TYPE_DOUBLE)) {
+	 _mesa_error( ctx, GL_INVALID_OPERATION,
+	             "glGetnUniform*vARB(incompatible uniform types)");
+	return;
+   }
 
    {
       unsigned elements = (uni->type->is_sampler())
 	 ? 1 : uni->type->components();
+      const int dmul = uni->type->base_type == GLSL_TYPE_DOUBLE ? 2 : 1;
 
       /* Calculate the source base address *BEFORE* modifying elements to
        * account for the size of the user's buffer.
        */
       const union gl_constant_value *const src =
-	 &uni->storage[offset * elements];
+	 &uni->storage[offset * elements * dmul];
 
       assert(returnType == GLSL_TYPE_FLOAT || returnType == GLSL_TYPE_INT ||
-             returnType == GLSL_TYPE_UINT);
-      /* The three (currently) supported types all have the same size,
-       * which is of course the same as their union. That'll change
-       * with glGetUniformdv()...
-       */
-      unsigned bytes = sizeof(src[0]) * elements;
+             returnType == GLSL_TYPE_UINT || returnType == GLSL_TYPE_DOUBLE);
+
+      /* doubles have a different size than the other 3 types */
+      unsigned bytes = sizeof(src[0]) * elements * dmul;
       if (bufSize < 0 || bytes > (unsigned) bufSize) {
 	 _mesa_error( ctx, GL_INVALID_OPERATION,
 	             "glGetnUniform*vARB(out of bounds: bufSize is %d,"
@@ -670,9 +684,11 @@
       match = (basicType != GLSL_TYPE_DOUBLE);
       break;
    case GLSL_TYPE_SAMPLER:
-   case GLSL_TYPE_IMAGE:
       match = (basicType == GLSL_TYPE_INT);
       break;
+   case GLSL_TYPE_IMAGE:
+      match = (basicType == GLSL_TYPE_INT && _mesa_is_desktop_gl(ctx));
+      break;
    default:
       match = (basicType == uni->type->base_type);
       break;
@@ -857,7 +873,7 @@
 		     GLuint cols, GLuint rows,
                      GLint location, GLsizei count,
                      GLboolean transpose,
-                     const GLvoid *values, GLenum type)
+                     const GLvoid *values, enum glsl_base_type basicType)
 {
    unsigned offset;
    unsigned vectors;
@@ -876,8 +892,8 @@
       return;
    }
 
-   assert(type == GL_FLOAT || type == GL_DOUBLE);
-   size_mul = type == GL_DOUBLE ? 2 : 1;
+   assert(basicType == GLSL_TYPE_FLOAT || basicType == GLSL_TYPE_DOUBLE);
+   size_mul = basicType == GLSL_TYPE_DOUBLE ? 2 : 1;
 
    assert(!uni->type->is_sampler());
    vectors = uni->type->matrix_columns;
@@ -903,6 +919,31 @@
       }
    }
 
+   /* Section 2.11.7 (Uniform Variables) of the OpenGL 4.2 Core Profile spec
+    * says:
+    *
+    *     "If any of the following conditions occur, an INVALID_OPERATION
+    *     error is generated by the Uniform* commands, and no uniform values
+    *     are changed:
+    *
+    *     ...
+    *
+    *     - if the uniform declared in the shader is not of type boolean and
+    *       the type indicated in the name of the Uniform* command used does
+    *       not match the type of the uniform"
+    *
+    * There are no Boolean matrix types, so we do not need to allow
+    * GLSL_TYPE_BOOL here (as _mesa_uniform does).
+    */
+   if (uni->type->base_type != basicType) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glUniformMatrix%ux%u(\"%s\"@%d is %s, not %s)",
+                  cols, rows, uni->name, location,
+                  glsl_type_name(uni->type->base_type),
+                  glsl_type_name(basicType));
+      return;
+   }
+
    if (unlikely(ctx->_Shader->Flags & GLSL_UNIFORMS)) {
       log_uniform(values, uni->type->base_type, components, vectors, count,
 		  bool(transpose), shProg, location, uni);
@@ -932,7 +973,7 @@
    if (!transpose) {
       memcpy(&uni->storage[elements * offset], values,
 	     sizeof(uni->storage[0]) * elements * count * size_mul);
-   } else if (type == GL_FLOAT) {
+   } else if (basicType == GLSL_TYPE_FLOAT) {
       /* Copy and transpose the matrix.
        */
       const float *src = (const float *)values;
@@ -949,7 +990,7 @@
 	 src += elements;
       }
    } else {
-      assert(type == GL_DOUBLE);
+      assert(basicType == GLSL_TYPE_DOUBLE);
       const double *src = (const double *)values;
       double *dst = (double *)&uni->storage[elements * offset].f;
 
@@ -971,83 +1012,12 @@
 }
 
 
-/**
- * Called via glGetUniformLocation().
- *
- * Returns the uniform index into UniformStorage (also the
- * glGetActiveUniformsiv uniform index), and stores the referenced
- * array offset in *offset, or GL_INVALID_INDEX (-1).
- */
-extern "C" unsigned
-_mesa_get_uniform_location(struct gl_shader_program *shProg,
-                           const GLchar *name,
-                           unsigned *out_offset)
-{
-   /* Page 80 (page 94 of the PDF) of the OpenGL 2.1 spec says:
-    *
-    *     "The first element of a uniform array is identified using the
-    *     name of the uniform array appended with "[0]". Except if the last
-    *     part of the string name indicates a uniform array, then the
-    *     location of the first element of that array can be retrieved by
-    *     either using the name of the uniform array, or the name of the
-    *     uniform array appended with "[0]"."
-    *
-    * Note: since uniform names are not allowed to use whitespace, and array
-    * indices within uniform names are not allowed to use "+", "-", or leading
-    * zeros, it follows that each uniform has a unique name up to the possible
-    * ambiguity with "[0]" noted above.  Therefore we don't need to worry
-    * about mal-formed inputs--they will properly fail when we try to look up
-    * the uniform name in shProg->UniformHash.
-    */
-
-   const GLchar *base_name_end;
-   long offset = parse_program_resource_name(name, &base_name_end);
-   bool array_lookup = offset >= 0;
-   char *name_copy;
-
-   if (array_lookup) {
-      name_copy = (char *) malloc(base_name_end - name + 1);
-      memcpy(name_copy, name, base_name_end - name);
-      name_copy[base_name_end - name] = '\0';
-   } else {
-      name_copy = (char *) name;
-      offset = 0;
-   }
-
-   unsigned location = 0;
-   const bool found = shProg->UniformHash->get(location, name_copy);
-
-   assert(!found
-	  || strcmp(name_copy, shProg->UniformStorage[location].name) == 0);
-
-   /* Free the temporary buffer *before* possibly returning an error.
-    */
-   if (name_copy != name)
-      free(name_copy);
-
-   if (!found)
-      return GL_INVALID_INDEX;
-
-   /* If the uniform is an array, fail if the index is out of bounds.
-    * (A negative index is caught above.)  This also fails if the uniform
-    * is not an array, but the user is trying to index it, because
-    * array_elements is zero and offset >= 0.
-    */
-   if (array_lookup
-       && offset >= (long) shProg->UniformStorage[location].array_elements) {
-      return GL_INVALID_INDEX;
-   }
-
-   *out_offset = offset;
-   return location;
-}
-
 extern "C" bool
 _mesa_sampler_uniforms_are_valid(const struct gl_shader_program *shProg,
 				 char *errMsg, size_t errMsgLength)
 {
    /* Shader does not have samplers. */
-   if (shProg->NumUserUniformStorage == 0)
+   if (shProg->NumUniformStorage == 0)
       return true;
 
    if (!shProg->SamplersValidated) {
@@ -1087,21 +1057,26 @@
       if (!shProg[idx])
          continue;
 
-      for (unsigned i = 0; i < shProg[idx]->NumUserUniformStorage; i++) {
+      for (unsigned i = 0; i < shProg[idx]->NumUniformStorage; i++) {
          const struct gl_uniform_storage *const storage =
             &shProg[idx]->UniformStorage[i];
-         const glsl_type *const t = (storage->type->is_array())
-            ? storage->type->fields.array : storage->type;
 
-         if (!t->is_sampler())
+         if (!storage->type->is_sampler())
             continue;
 
          active_samplers++;
 
-         const unsigned count = MAX2(1, storage->type->array_size());
+         const unsigned count = MAX2(1, storage->array_elements);
          for (unsigned j = 0; j < count; j++) {
             const unsigned unit = storage->storage[j].i;
 
+            /* FIXME: Samplers are initialized to 0 and Mesa doesn't do a
+             * great job of eliminating unused uniforms currently so for now
+             * don't throw an error if two sampler types both point to 0.
+             */
+            if (unit == 0)
+               continue;
+
             /* The types of the samplers associated with a particular texture
              * unit must be an exact match.  Page 74 (page 89 of the PDF) of
              * the OpenGL 3.3 core spec says:
@@ -1111,13 +1086,14 @@
              *     program object."
              */
             if (unit_types[unit] == NULL) {
-               unit_types[unit] = t;
-            } else if (unit_types[unit] != t) {
+               unit_types[unit] = storage->type;
+            } else if (unit_types[unit] != storage->type) {
                pipeline->InfoLog =
                   ralloc_asprintf(pipeline,
                                   "Texture unit %d is accessed both as %s "
                                   "and %s",
-                                  unit, unit_types[unit]->name, t->name);
+                                  unit, unit_types[unit]->name,
+                                  storage->type->name);
                return false;
             }
          }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/uniforms.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/uniforms.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/uniforms.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/uniforms.c	2015-09-16 14:36:10.000000000 +0000
@@ -553,7 +553,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 2, location, count, transpose, value, GL_FLOAT);
+			2, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -562,7 +562,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 3, location, count, transpose, value, GL_FLOAT);
+			3, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -571,7 +571,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 4, location, count, transpose, value, GL_FLOAT);
+			4, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 /** Same as above with direct state access **/
@@ -683,7 +683,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -694,7 +694,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -705,7 +705,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 
@@ -718,7 +718,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 3, location, count, transpose, value, GL_FLOAT);
+			2, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -727,7 +727,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 2, location, count, transpose, value, GL_FLOAT);
+			3, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -736,7 +736,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 4, location, count, transpose, value, GL_FLOAT);
+			2, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -745,7 +745,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 2, location, count, transpose, value, GL_FLOAT);
+			4, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -754,7 +754,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 4, location, count, transpose, value, GL_FLOAT);
+			3, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -763,7 +763,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 3, location, count, transpose, value, GL_FLOAT);
+			4, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 /** Same as above with direct state access **/
@@ -776,7 +776,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2x3fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -787,7 +787,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3x2fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -798,7 +798,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2x4fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -809,7 +809,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4x2fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -820,7 +820,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3x4fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 void GLAPIENTRY
@@ -831,7 +831,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4x3fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }
 
 
@@ -888,16 +888,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
 
-   (void) program;
-   (void) location;
-   (void) bufSize;
-   (void) params;
-
-   /*
    _mesa_get_uniform(ctx, program, location, bufSize, GLSL_TYPE_DOUBLE, params);
-   */
-   _mesa_error(ctx, GL_INVALID_OPERATION, "glGetUniformdvARB"
-               "(GL_ARB_gpu_shader_fp64 not implemented)");
 }
 
 void GLAPIENTRY
@@ -952,7 +943,7 @@
 
    struct gl_program_resource *res =
       _mesa_program_resource_find_name(shProg, GL_UNIFORM_BLOCK,
-                                       uniformBlockName);
+                                       uniformBlockName, NULL);
    if (!res)
       return GL_INVALID_INDEX;
 
@@ -987,7 +978,8 @@
 
    for (i = 0; i < uniformCount; i++) {
       struct gl_program_resource *res =
-         _mesa_program_resource_find_name(shProg, GL_UNIFORM, uniformNames[i]);
+         _mesa_program_resource_find_name(shProg, GL_UNIFORM, uniformNames[i],
+                                          NULL);
       uniformIndices[i] = _mesa_program_resource_index(shProg, res);
    }
 }
@@ -1092,6 +1084,21 @@
                                   GL_REFERENCED_BY_VERTEX_SHADER, params,
                                   caller);
       return;
+
+   case GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_CONTROL_SHADER:
+   case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER:
+      _mesa_program_resource_prop(shProg, res, index,
+                                  GL_REFERENCED_BY_TESS_CONTROL_SHADER, params,
+                                  caller);
+      return;
+
+   case GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_EVALUATION_SHADER:
+   case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER:
+      _mesa_program_resource_prop(shProg, res, index,
+                                  GL_REFERENCED_BY_TESS_EVALUATION_SHADER, params,
+                                  caller);
+      return;
+
    case GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER:
    case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_GEOMETRY_SHADER:
       _mesa_program_resource_prop(shProg, res, index,
@@ -1104,16 +1111,10 @@
                                   GL_REFERENCED_BY_FRAGMENT_SHADER, params,
                                   caller);
       return;
-   case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER:
-      params[0] = GL_FALSE;
-      return;
-   case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER:
-      params[0] = GL_FALSE;
-      return;
    default:
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "%s(pname 0x%x (%s))", caller, pname,
-                  _mesa_lookup_enum_by_nr(pname));
+                  _mesa_enum_to_string(pname));
       return;
    }
 }
@@ -1302,7 +1303,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 2, location, count, transpose, value, GL_DOUBLE);
+			2, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1311,7 +1312,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 3, location, count, transpose, value, GL_DOUBLE);
+			3, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1320,7 +1321,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 4, location, count, transpose, value, GL_DOUBLE);
+			4, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1329,7 +1330,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 3, location, count, transpose, value, GL_DOUBLE);
+			2, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1338,7 +1339,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 2, location, count, transpose, value, GL_DOUBLE);
+			3, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1347,7 +1348,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 4, location, count, transpose, value, GL_DOUBLE);
+			2, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1356,7 +1357,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 2, location, count, transpose, value, GL_DOUBLE);
+			4, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1365,7 +1366,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 4, location, count, transpose, value, GL_DOUBLE);
+			3, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1374,7 +1375,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 3, location, count, transpose, value, GL_DOUBLE);
+			4, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1480,7 +1481,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1491,7 +1492,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1502,7 +1503,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1513,7 +1514,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2x3dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1524,7 +1525,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3x2dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1535,7 +1536,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2x4dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1546,7 +1547,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4x2dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1557,7 +1558,7 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3x4dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
 
 void GLAPIENTRY
@@ -1568,5 +1569,5 @@
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4x3dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/uniforms.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/uniforms.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/uniforms.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/uniforms.h	2015-09-16 14:36:10.000000000 +0000
@@ -343,14 +343,6 @@
 _mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count,
                                 GLboolean transpose, const GLdouble *value);
 
-long
-_mesa_parse_program_resource_name(const GLchar *name,
-                                  const GLchar **out_base_name_end);
-
-unsigned
-_mesa_get_uniform_location(struct gl_shader_program *shProg,
-			   const GLchar *name, unsigned *offset);
-
 void
 _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shader_program,
 	      GLint location, GLsizei count,
@@ -363,7 +355,7 @@
 		     GLuint cols, GLuint rows,
                      GLint location, GLsizei count,
                      GLboolean transpose,
-                     const GLvoid *values, GLenum type);
+                     const GLvoid *values, enum glsl_base_type basicType);
 
 void
 _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/varray.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/varray.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/varray.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/varray.c	2015-09-16 14:36:10.000000000 +0000
@@ -300,7 +300,7 @@
    typeBit = type_to_bit(ctx, type);
    if (typeBit == 0x0 || (typeBit & legalTypesMask) == 0x0) {
       _mesa_error(ctx, GL_INVALID_ENUM, "%s(type = %s)",
-                  func, _mesa_lookup_enum_by_nr(type));
+                  func, _mesa_enum_to_string(type));
       return false;
    }
 
@@ -333,7 +333,7 @@
 
       if (bgra_error) {
          _mesa_error(ctx, GL_INVALID_OPERATION, "%s(size=GL_BGRA and type=%s)",
-                     func, _mesa_lookup_enum_by_nr(type));
+                     func, _mesa_enum_to_string(type));
          return false;
       }
 
@@ -2309,10 +2309,10 @@
       fprintf(stderr, "  %s[%d]: ", name, index);
    else
       fprintf(stderr, "  %s: ", name);
-   fprintf(stderr, "Ptr=%p, Type=0x%x, Size=%d, ElemSize=%u, Stride=%d, Buffer=%u(Size %lu)\n",
-	  array->Ptr, array->Type, array->Size,
-	  array->_ElementSize, array->StrideB,
-	  array->BufferObj->Name, (unsigned long) array->BufferObj->Size);
+   fprintf(stderr, "Ptr=%p, Type=%s, Size=%d, ElemSize=%u, Stride=%d, Buffer=%u(Size %lu)\n",
+           array->Ptr, _mesa_enum_to_string(array->Type), array->Size,
+           array->_ElementSize, array->StrideB, array->BufferObj->Name,
+           (unsigned long) array->BufferObj->Size);
 }
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/version.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/version.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/version.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/version.c	2015-09-16 14:36:10.000000000 +0000
@@ -51,31 +51,51 @@
  * fwd_context is only valid if version > 0
  */
 static void
-get_gl_override(int *version, bool *fwd_context, bool *compat_context)
+get_gl_override(gl_api api, int *version, bool *fwd_context,
+                bool *compat_context)
 {
-   const char *env_var = "MESA_GL_VERSION_OVERRIDE";
+   const char *env_var = (api == API_OPENGL_CORE || api == API_OPENGL_COMPAT)
+      ? "MESA_GL_VERSION_OVERRIDE" : "MESA_GLES_VERSION_OVERRIDE";
    const char *version_str;
    int major, minor, n;
-   static int override_version = -1;
-   static bool fc_suffix = false;
-   static bool compat_suffix = false;
+   static struct override_info {
+      int version;
+      bool fc_suffix;
+      bool compat_suffix;
+   } override[] = {
+      { -1, false, false},
+      { -1, false, false},
+      { -1, false, false},
+      { -1, false, false},
+   };
 
-   if (override_version < 0) {
-      override_version = 0;
+   STATIC_ASSERT(ARRAY_SIZE(override) == API_OPENGL_LAST + 1);
+
+   if (api == API_OPENGLES)
+      goto exit;
+
+   if (override[api].version < 0) {
+      override[api].version = 0;
 
       version_str = getenv(env_var);
       if (version_str) {
-         fc_suffix = check_for_ending(version_str, "FC");
-         compat_suffix = check_for_ending(version_str, "COMPAT");
+         override[api].fc_suffix = check_for_ending(version_str, "FC");
+         override[api].compat_suffix = check_for_ending(version_str, "COMPAT");
 
          n = sscanf(version_str, "%u.%u", &major, &minor);
          if (n != 2) {
             fprintf(stderr, "error: invalid value for %s: %s\n",
                     env_var, version_str);
-            override_version = 0;
+            override[api].version = 0;
          } else {
-            override_version = major * 10 + minor;
-            if (override_version < 30 && fc_suffix) {
+            override[api].version = major * 10 + minor;
+
+            /* There is no such thing as compatibility or forward-compatible for
+             * OpenGL ES 2.0 or 3.x APIs.
+             */
+            if ((override[api].version < 30 && override[api].fc_suffix) ||
+                (api == API_OPENGLES2 && (override[api].fc_suffix ||
+                                          override[api].compat_suffix))) {
                fprintf(stderr, "error: invalid value for %s: %s\n",
                        env_var, version_str);
             }
@@ -83,9 +103,10 @@
       }
    }
 
-   *version = override_version;
-   *fwd_context = fc_suffix;
-   *compat_context = compat_suffix;
+exit:
+   *version = override[api].version;
+   *fwd_context = override[api].fc_suffix;
+   *compat_context = override[api].compat_suffix;
 }
 
 /**
@@ -130,18 +151,26 @@
    int version;
    bool fwd_context, compat_context;
 
-   get_gl_override(&version, &fwd_context, &compat_context);
+   get_gl_override(*apiOut, &version, &fwd_context, &compat_context);
 
    if (version > 0) {
       *versionOut = version;
-      if (version >= 30 && fwd_context) {
-         *apiOut = API_OPENGL_CORE;
-         consts->ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT;
-      } else if (version >= 31 && !compat_context) {
-         *apiOut = API_OPENGL_CORE;
-      } else {
-         *apiOut = API_OPENGL_COMPAT;
+
+      /* If the API is a desktop API, adjust the context flags.  We may also
+       * need to modify the API depending on the version.  For example, Mesa
+       * does not support a GL 3.3 compatibility profile.
+       */
+      if (*apiOut == API_OPENGL_CORE || *apiOut == API_OPENGL_COMPAT) {
+         if (version >= 30 && fwd_context) {
+            *apiOut = API_OPENGL_CORE;
+            consts->ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT;
+         } else if (version >= 31 && !compat_context) {
+            *apiOut = API_OPENGL_CORE;
+         } else {
+            *apiOut = API_OPENGL_COMPAT;
+         }
       }
+
       return true;
    }
    return false;
@@ -157,22 +186,6 @@
 }
 
 /**
- * Returns the gl override value
- *
- * version > 0 indicates there is an override requested
- */
-int
-_mesa_get_gl_version_override(void)
-{
-   int version;
-   bool fwd_context, compat_context;
-
-   get_gl_override(&version, &fwd_context, &compat_context);
-
-   return version;
-}
-
-/**
  * Override the context's GLSL version if the environment variable
  * MESA_GLSL_VERSION_OVERRIDE is set. Valid values for
  * MESA_GLSL_VERSION_OVERRIDE are integers, such as "130".
@@ -296,7 +309,7 @@
                          extensions->ARB_gpu_shader5 &&
                          extensions->ARB_gpu_shader_fp64 &&
                          extensions->ARB_sample_shading &&
-                         false /*extensions->ARB_shader_subroutine*/ &&
+                         extensions->ARB_shader_subroutine &&
                          extensions->ARB_tessellation_shader &&
                          extensions->ARB_texture_buffer_object_rgb32 &&
                          extensions->ARB_texture_cube_map_array &&
@@ -433,7 +446,23 @@
                          extensions->EXT_texture_snorm &&
                          extensions->NV_primitive_restart &&
                          extensions->OES_depth_texture_cube_map);
-   if (ver_3_0) {
+   const bool ver_3_1 = (ver_3_0 &&
+                         extensions->ARB_arrays_of_arrays &&
+                         extensions->ARB_compute_shader &&
+                         extensions->ARB_draw_indirect &&
+                         false /*extensions->ARB_framebuffer_no_attachments*/ &&
+                         extensions->ARB_shader_atomic_counters &&
+                         extensions->ARB_shader_image_load_store &&
+                         false /*extensions->ARB_shader_image_size*/ &&
+                         false /*extensions->ARB_shader_storage_buffer_object*/ &&
+                         extensions->ARB_shading_language_packing &&
+                         extensions->ARB_stencil_texturing &&
+                         extensions->ARB_gpu_shader5 &&
+                         extensions->EXT_shader_integer_mix);
+
+   if (ver_3_1) {
+      return 31;
+   } else if (ver_3_0) {
       return 30;
    } else if (ver_2_0) {
       return 20;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/version.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/version.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/version.h	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/version.h	2015-09-16 14:36:10.000000000 +0000
@@ -47,7 +47,4 @@
 extern void
 _mesa_override_glsl_version(struct gl_constants *consts);
 
-extern int
-_mesa_get_gl_version_override(void);
-
 #endif /* VERSION_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/viewport.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/viewport.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/viewport.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/viewport.c	2015-09-16 14:36:10.000000000 +0000
@@ -391,8 +391,8 @@
 
    if (MESA_VERBOSE&VERBOSE_API)
       _mesa_debug(ctx, "glClipControl(%s, %s)\n",
-	          _mesa_lookup_enum_by_nr(origin),
-                  _mesa_lookup_enum_by_nr(depth));
+	          _mesa_enum_to_string(origin),
+                  _mesa_enum_to_string(depth));
 
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
@@ -443,12 +443,12 @@
  */
 void
 _mesa_get_viewport_xform(struct gl_context *ctx, unsigned i,
-                         double scale[3], double translate[3])
+                         float scale[3], float translate[3])
 {
-   double x = ctx->ViewportArray[i].X;
-   double y = ctx->ViewportArray[i].Y;
-   double half_width = 0.5*ctx->ViewportArray[i].Width;
-   double half_height = 0.5*ctx->ViewportArray[i].Height;
+   float x = ctx->ViewportArray[i].X;
+   float y = ctx->ViewportArray[i].Y;
+   float half_width = 0.5f * ctx->ViewportArray[i].Width;
+   float half_height = 0.5f * ctx->ViewportArray[i].Height;
    double n = ctx->ViewportArray[i].Near;
    double f = ctx->ViewportArray[i].Far;
 
@@ -462,8 +462,8 @@
       translate[1] = half_height + y;
    }
    if (ctx->Transform.ClipDepthMode == GL_NEGATIVE_ONE_TO_ONE) {
-      scale[2] = 0.5*(f - n);
-      translate[2] = 0.5*(n + f);
+      scale[2] = 0.5 * (f - n);
+      translate[2] = 0.5 * (n + f);
    } else {
       scale[2] = f - n;
       translate[2] = n;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/viewport.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/viewport.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/main/viewport.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/main/viewport.h	2015-09-16 14:36:10.000000000 +0000
@@ -73,6 +73,6 @@
 
 extern void
 _mesa_get_viewport_xform(struct gl_context *ctx, unsigned i,
-                         double scale[3], double translate[3]);
+                         float scale[3], float translate[3]);
 
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/Makefile.am	2015-09-16 14:36:09.000000000 +0000
@@ -19,8 +19,6 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-AUTOMAKE_OPTIONS = subdir-objects
-
 SUBDIRS = . main/tests
 
 if HAVE_X11_DRIVER
@@ -92,37 +90,24 @@
 	program/program_parse.tab.h \
 	main/git_sha1.h.tmp
 
-GET_HASH_GEN = main/get_hash_generator.py
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
 
-main/get_hash.h: ../mapi/glapi/gen/gl_and_es_API.xml main/get_hash_params.py 	\
-		 $(GET_HASH_GEN)
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/$(GET_HASH_GEN)		\
-		-f $< > $@.tmp;						\
-	mv $@.tmp $@;
+main/get_hash.h: ../mapi/glapi/gen/gl_and_es_API.xml main/get_hash_params.py \
+                 main/get_hash_generator.py
+	$(PYTHON_GEN) $(srcdir)/main/get_hash_generator.py \
+		-f $(srcdir)/../mapi/glapi/gen/gl_and_es_API.xml > $@
 
-main/format_info.h: main/formats.csv                                    \
+main/format_info.h: main/formats.csv \
                     main/format_parser.py main/format_info.py
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/main/format_info.py        \
-                   $< > $@.tmp;                                         \
-	mv $@.tmp $@;
+	$(PYTHON_GEN) $(srcdir)/main/format_info.py $(srcdir)/main/formats.csv > $@
 
-main/format_pack.c: main/format_pack.py main/formats.csv		\
+main/format_pack.c: main/format_pack.py main/formats.csv \
                     main/format_parser.py
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS)					\
-			$(srcdir)/main/format_pack.py			\
-			$(srcdir)/main/formats.csv			\
-		| $(INDENT) $(INDENT_FLAGS) > $@;
+	$(PYTHON_GEN) $(srcdir)/main/format_pack.py $(srcdir)/main/formats.csv > $@
 
 main/format_unpack.c: main/format_unpack.py main/formats.csv	\
                       main/format_parser.py
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS)					\
-			$(srcdir)/main/format_unpack.py			\
-			$(srcdir)/main/formats.csv			\
-		| $(INDENT) $(INDENT_FLAGS) > $@;
+	$(PYTHON_GEN) $(srcdir)/main/format_unpack.py $(srcdir)/main/formats.csv > $@
 
 main/formats.c: main/format_info.h
 
@@ -203,13 +188,17 @@
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = gl.pc
 
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+YACC_GEN = $(AM_V_GEN)$(YACC) $(YFLAGS)
+LEX_GEN = $(AM_V_GEN)$(LEX) $(LFLAGS)
+
 program/lex.yy.c: program/program_lexer.l
-	$(AM_V_at)$(MKDIR_P) program
-	$(AM_V_GEN) $(LEX) --never-interactive --outfile=$@ $<
+	$(MKDIR_GEN)
+	$(LEX_GEN) -o $@ $(srcdir)/program/program_lexer.l
 
 program/program_parse.tab.c program/program_parse.tab.h: program/program_parse.y
-	$(AM_V_at)$(MKDIR_P) program
-	$(AM_V_GEN) $(YACC) -p "_mesa_program_" -v -d --output=program/program_parse.tab.c $<
+	$(MKDIR_GEN)
+	$(YACC_GEN) -o $@ -p "_mesa_program_" --defines=$(builddir)/program/program_parse.tab.h $(srcdir)/program/program_parse.y
 
 if GEN_ASM_OFFSETS
 matypes.h: $(gen_matypes_SOURCES)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/Makefile.sources	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/Makefile.sources	2015-09-16 14:36:09.000000000 +0000
@@ -407,6 +407,7 @@
 	state_tracker/st_atom_shader.c \
 	state_tracker/st_atom_shader.h \
 	state_tracker/st_atom_stipple.c \
+	state_tracker/st_atom_tess.c \
 	state_tracker/st_atom_texture.c \
 	state_tracker/st_atom_viewport.c \
 	state_tracker/st_cache.h \
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/math/m_clip_tmp.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/math/m_clip_tmp.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/math/m_clip_tmp.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/math/m_clip_tmp.h	2015-09-16 14:36:10.000000000 +0000
@@ -194,13 +194,13 @@
    STRIDE_LOOP {
       const GLfloat cx = from[0], cy = from[1], cz = from[2];
       GLubyte mask = 0;
-      if (cx >  1.0)       mask |= CLIP_RIGHT_BIT;
-      else if (cx < -1.0)  mask |= CLIP_LEFT_BIT;
-      if (cy >  1.0)       mask |= CLIP_TOP_BIT;
-      else if (cy < -1.0)  mask |= CLIP_BOTTOM_BIT;
+      if (cx >  1.0F)       mask |= CLIP_RIGHT_BIT;
+      else if (cx < -1.0F)  mask |= CLIP_LEFT_BIT;
+      if (cy >  1.0F)       mask |= CLIP_TOP_BIT;
+      else if (cy < -1.0F)  mask |= CLIP_BOTTOM_BIT;
       if (viewport_z_clip) {
-	 if (cz >  1.0)       mask |= CLIP_FAR_BIT;
-	 else if (cz < -1.0)  mask |= CLIP_NEAR_BIT;
+	 if (cz >  1.0F)       mask |= CLIP_FAR_BIT;
+	 else if (cz < -1.0F)  mask |= CLIP_NEAR_BIT;
       }
       clipMask[i] = mask;
       tmpOrMask |= mask;
@@ -230,10 +230,10 @@
    STRIDE_LOOP {
       const GLfloat cx = from[0], cy = from[1];
       GLubyte mask = 0;
-      if (cx >  1.0)       mask |= CLIP_RIGHT_BIT;
-      else if (cx < -1.0)  mask |= CLIP_LEFT_BIT;
-      if (cy >  1.0)       mask |= CLIP_TOP_BIT;
-      else if (cy < -1.0)  mask |= CLIP_BOTTOM_BIT;
+      if (cx >  1.0F)       mask |= CLIP_RIGHT_BIT;
+      else if (cx < -1.0F)  mask |= CLIP_LEFT_BIT;
+      if (cy >  1.0F)       mask |= CLIP_TOP_BIT;
+      else if (cy < -1.0F)  mask |= CLIP_BOTTOM_BIT;
       clipMask[i] = mask;
       tmpOrMask |= mask;
       tmpAndMask &= mask;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/math/m_matrix.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/math/m_matrix.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/math/m_matrix.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/math/m_matrix.c	2015-09-16 14:36:10.000000000 +0000
@@ -380,7 +380,7 @@
    if (fabsf(r3[0])>fabsf(r2[0])) SWAP_ROWS(r3, r2);
    if (fabsf(r2[0])>fabsf(r1[0])) SWAP_ROWS(r2, r1);
    if (fabsf(r1[0])>fabsf(r0[0])) SWAP_ROWS(r1, r0);
-   if (0.0 == r0[0])  return GL_FALSE;
+   if (0.0F == r0[0])  return GL_FALSE;
 
    /* eliminate first variable     */
    m1 = r1[0]/r0[0]; m2 = r2[0]/r0[0]; m3 = r3[0]/r0[0];
@@ -388,31 +388,31 @@
    s = r0[2]; r1[2] -= m1 * s; r2[2] -= m2 * s; r3[2] -= m3 * s;
    s = r0[3]; r1[3] -= m1 * s; r2[3] -= m2 * s; r3[3] -= m3 * s;
    s = r0[4];
-   if (s != 0.0) { r1[4] -= m1 * s; r2[4] -= m2 * s; r3[4] -= m3 * s; }
+   if (s != 0.0F) { r1[4] -= m1 * s; r2[4] -= m2 * s; r3[4] -= m3 * s; }
    s = r0[5];
-   if (s != 0.0) { r1[5] -= m1 * s; r2[5] -= m2 * s; r3[5] -= m3 * s; }
+   if (s != 0.0F) { r1[5] -= m1 * s; r2[5] -= m2 * s; r3[5] -= m3 * s; }
    s = r0[6];
-   if (s != 0.0) { r1[6] -= m1 * s; r2[6] -= m2 * s; r3[6] -= m3 * s; }
+   if (s != 0.0F) { r1[6] -= m1 * s; r2[6] -= m2 * s; r3[6] -= m3 * s; }
    s = r0[7];
-   if (s != 0.0) { r1[7] -= m1 * s; r2[7] -= m2 * s; r3[7] -= m3 * s; }
+   if (s != 0.0F) { r1[7] -= m1 * s; r2[7] -= m2 * s; r3[7] -= m3 * s; }
 
    /* choose pivot - or die */
    if (fabsf(r3[1])>fabsf(r2[1])) SWAP_ROWS(r3, r2);
    if (fabsf(r2[1])>fabsf(r1[1])) SWAP_ROWS(r2, r1);
-   if (0.0 == r1[1])  return GL_FALSE;
+   if (0.0F == r1[1])  return GL_FALSE;
 
    /* eliminate second variable */
    m2 = r2[1]/r1[1]; m3 = r3[1]/r1[1];
    r2[2] -= m2 * r1[2]; r3[2] -= m3 * r1[2];
    r2[3] -= m2 * r1[3]; r3[3] -= m3 * r1[3];
-   s = r1[4]; if (0.0 != s) { r2[4] -= m2 * s; r3[4] -= m3 * s; }
-   s = r1[5]; if (0.0 != s) { r2[5] -= m2 * s; r3[5] -= m3 * s; }
-   s = r1[6]; if (0.0 != s) { r2[6] -= m2 * s; r3[6] -= m3 * s; }
-   s = r1[7]; if (0.0 != s) { r2[7] -= m2 * s; r3[7] -= m3 * s; }
+   s = r1[4]; if (0.0F != s) { r2[4] -= m2 * s; r3[4] -= m3 * s; }
+   s = r1[5]; if (0.0F != s) { r2[5] -= m2 * s; r3[5] -= m3 * s; }
+   s = r1[6]; if (0.0F != s) { r2[6] -= m2 * s; r3[6] -= m3 * s; }
+   s = r1[7]; if (0.0F != s) { r2[7] -= m2 * s; r3[7] -= m3 * s; }
 
    /* choose pivot - or die */
    if (fabsf(r3[2])>fabsf(r2[2])) SWAP_ROWS(r3, r2);
-   if (0.0 == r2[2])  return GL_FALSE;
+   if (0.0F == r2[2])  return GL_FALSE;
 
    /* eliminate third variable */
    m3 = r3[2]/r2[2];
@@ -421,7 +421,7 @@
    r3[7] -= m3 * r2[7];
 
    /* last check */
-   if (0.0 == r3[3]) return GL_FALSE;
+   if (0.0F == r3[3]) return GL_FALSE;
 
    s = 1.0F/r3[3];             /* now back substitute row 3 */
    r3[4] *= s; r3[5] *= s; r3[6] *= s; r3[7] *= s;
@@ -490,26 +490,26 @@
     */
    pos = neg = 0.0;
    t =  MAT(in,0,0) * MAT(in,1,1) * MAT(in,2,2);
-   if (t >= 0.0) pos += t; else neg += t;
+   if (t >= 0.0F) pos += t; else neg += t;
 
    t =  MAT(in,1,0) * MAT(in,2,1) * MAT(in,0,2);
-   if (t >= 0.0) pos += t; else neg += t;
+   if (t >= 0.0F) pos += t; else neg += t;
 
    t =  MAT(in,2,0) * MAT(in,0,1) * MAT(in,1,2);
-   if (t >= 0.0) pos += t; else neg += t;
+   if (t >= 0.0F) pos += t; else neg += t;
 
    t = -MAT(in,2,0) * MAT(in,1,1) * MAT(in,0,2);
-   if (t >= 0.0) pos += t; else neg += t;
+   if (t >= 0.0F) pos += t; else neg += t;
 
    t = -MAT(in,1,0) * MAT(in,0,1) * MAT(in,2,2);
-   if (t >= 0.0) pos += t; else neg += t;
+   if (t >= 0.0F) pos += t; else neg += t;
 
    t = -MAT(in,0,0) * MAT(in,2,1) * MAT(in,1,2);
-   if (t >= 0.0) pos += t; else neg += t;
+   if (t >= 0.0F) pos += t; else neg += t;
 
    det = pos + neg;
 
-   if (fabsf(det) < 1e-25)
+   if (fabsf(det) < 1e-25F)
       return GL_FALSE;
 
    det = 1.0F / det;
@@ -564,7 +564,7 @@
                        MAT(in,0,1) * MAT(in,0,1) +
                        MAT(in,0,2) * MAT(in,0,2));
 
-      if (scale == 0.0)
+      if (scale == 0.0F)
          return GL_FALSE;
 
       scale = 1.0F / scale;
@@ -799,8 +799,8 @@
    GLfloat m[16];
    GLboolean optimized;
 
-   s = (GLfloat) sin( angle * M_PI / 180.0 );
-   c = (GLfloat) cos( angle * M_PI / 180.0 );
+   s = sinf( angle * M_PI / 180.0 );
+   c = cosf( angle * M_PI / 180.0 );
 
    memcpy(m, Identity, sizeof(GLfloat)*16);
    optimized = GL_FALSE;
@@ -859,7 +859,7 @@
    if (!optimized) {
       const GLfloat mag = sqrtf(x * x + y * y + z * z);
 
-      if (mag <= 1.0e-4) {
+      if (mag <= 1.0e-4F) {
          /* no rotation, leave mat as-is */
          return;
       }
@@ -1070,7 +1070,7 @@
    m[2] *= x;   m[6] *= y;   m[10] *= z;
    m[3] *= x;   m[7] *= y;   m[11] *= z;
 
-   if (fabsf(x - y) < 1e-8 && fabsf(x - z) < 1e-8)
+   if (fabsf(x - y) < 1e-8F && fabsf(x - z) < 1e-8F)
       mat->flags |= MAT_FLAG_UNIFORM_SCALE;
    else
       mat->flags |= MAT_FLAG_GENERAL_SCALE;
@@ -1111,8 +1111,8 @@
  * Transforms Normalized Device Coords to window/Z values.
  */
 void
-_math_matrix_viewport(GLmatrix *m, const double scale[3],
-                      const double translate[3], double depthMax)
+_math_matrix_viewport(GLmatrix *m, const float scale[3],
+                      const float translate[3], double depthMax)
 {
    m->m[MAT_SX] = scale[0];
    m->m[MAT_TX] = translate[0];
@@ -1206,7 +1206,7 @@
    GLuint i;
 
    for (i = 0 ; i < 16 ; i++) {
-      if (m[i] == 0.0) mask |= (1<<i);
+      if (m[i] == 0.0F) mask |= (1<<i);
    }
 
    if (m[0] == 1.0F) mask |= (1<<16);
@@ -1240,12 +1240,12 @@
       mat->type = MATRIX_2D;
 
       /* Check for scale */
-      if (SQ(mm-1) > SQ(1e-6) ||
-	  SQ(m4m4-1) > SQ(1e-6))
+      if (SQ(mm-1) > SQ(1e-6F) ||
+	  SQ(m4m4-1) > SQ(1e-6F))
 	 mat->flags |= MAT_FLAG_GENERAL_SCALE;
 
       /* Check for rotation */
-      if (SQ(mm4) > SQ(1e-6))
+      if (SQ(mm4) > SQ(1e-6F))
 	 mat->flags |= MAT_FLAG_GENERAL_3D;
       else
 	 mat->flags |= MAT_FLAG_ROTATION;
@@ -1255,9 +1255,9 @@
       mat->type = MATRIX_3D_NO_ROT;
 
       /* Check for scale */
-      if (SQ(m[0]-m[5]) < SQ(1e-6) &&
-	  SQ(m[0]-m[10]) < SQ(1e-6)) {
-	 if (SQ(m[0]-1.0) > SQ(1e-6)) {
+      if (SQ(m[0]-m[5]) < SQ(1e-6F) &&
+	  SQ(m[0]-m[10]) < SQ(1e-6F)) {
+	 if (SQ(m[0]-1.0F) > SQ(1e-6F)) {
 	    mat->flags |= MAT_FLAG_UNIFORM_SCALE;
          }
       }
@@ -1275,8 +1275,8 @@
       mat->type = MATRIX_3D;
 
       /* Check for scale */
-      if (SQ(c1-c2) < SQ(1e-6) && SQ(c1-c3) < SQ(1e-6)) {
-	 if (SQ(c1-1.0) > SQ(1e-6))
+      if (SQ(c1-c2) < SQ(1e-6F) && SQ(c1-c3) < SQ(1e-6F)) {
+	 if (SQ(c1-1.0F) > SQ(1e-6F))
 	    mat->flags |= MAT_FLAG_UNIFORM_SCALE;
 	 /* else no scale at all */
       }
@@ -1285,10 +1285,10 @@
       }
 
       /* Check for rotation */
-      if (SQ(d1) < SQ(1e-6)) {
+      if (SQ(d1) < SQ(1e-6F)) {
 	 CROSS3( cp, m, m+4 );
 	 SUB_3V( cp, cp, (m+8) );
-	 if (LEN_SQUARED_3FV(cp) < SQ(1e-6))
+	 if (LEN_SQUARED_3FV(cp) < SQ(1e-6F))
 	    mat->flags |= MAT_FLAG_ROTATION;
 	 else
 	    mat->flags |= MAT_FLAG_GENERAL_3D;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/math/m_matrix.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/math/m_matrix.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/math/m_matrix.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/math/m_matrix.h	2015-09-16 14:36:10.000000000 +0000
@@ -122,8 +122,8 @@
 		      GLfloat nearval, GLfloat farval );
 
 extern void
-_math_matrix_viewport( GLmatrix *m, const double scale[3],
-                       const double translate[3], double depthMax );
+_math_matrix_viewport( GLmatrix *m, const float scale[3],
+                       const float translate[3], double depthMax );
 
 extern void
 _math_matrix_set_identity( GLmatrix *dest );
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/math/m_norm_tmp.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/math/m_norm_tmp.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/math/m_norm_tmp.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/math/m_norm_tmp.h	2015-09-16 14:36:10.000000000 +0000
@@ -80,7 +80,7 @@
       }
    }
    else {
-      if (scale != 1.0) {
+      if (scale != 1.0f) {
 	 m0 *= scale,  m4 *= scale,  m8 *= scale;
 	 m1 *= scale,  m5 *= scale,  m9 *= scale;
 	 m2 *= scale,  m6 *= scale,  m10 *= scale;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/math/m_vector.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/math/m_vector.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/math/m_vector.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/math/m_vector.h	2015-09-16 14:36:10.000000000 +0000
@@ -51,7 +51,7 @@
 
 /**
  * Wrap all the information about vectors up in a struct.  Has
- * additional fields compared to the other vectors to help us track of
+ * additional fields compared to the other vectors to help us track
  * different vertex sizes, and whether we need to clean columns out
  * because they contain non-(0,0,0,1) values.
  *
@@ -61,7 +61,7 @@
  */
 typedef struct {
    GLfloat (*data)[4];	/**< may be malloc'd or point to client data */
-   GLfloat *start;	/**< points somewhere inside of <data> */
+   GLfloat *start;	/**< points somewhere inside of GLvector4f::data */
    GLuint count;	/**< size of the vector (in elements) */
    GLuint stride;	/**< stride from one element to the next (in bytes) */
    GLuint size;		/**< 2-4 for vertices and 1-4 for texcoords */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/ir_to_mesa.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/ir_to_mesa.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/ir_to_mesa.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/ir_to_mesa.cpp	2015-09-16 14:36:10.000000000 +0000
@@ -262,6 +262,7 @@
    virtual void visit(ir_if *);
    virtual void visit(ir_emit_vertex *);
    virtual void visit(ir_end_primitive *);
+   virtual void visit(ir_barrier *);
    /*@}*/
 
    src_reg result;
@@ -405,7 +406,7 @@
 			    dst_reg dst, src_reg src0, src_reg src1,
 			    unsigned elements)
 {
-   static const gl_inst_opcode dot_opcodes[] = {
+   static const enum prog_opcode dot_opcodes[] = {
       OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
    };
 
@@ -533,6 +534,7 @@
       return size;
    case GLSL_TYPE_SAMPLER:
    case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_SUBROUTINE:
       /* Samplers take up one slot in UNIFORMS[], but they're baked in
        * at link time.
        */
@@ -1341,6 +1343,7 @@
    case ir_unop_dFdx_fine:
    case ir_unop_dFdy_coarse:
    case ir_unop_dFdy_fine:
+   case ir_unop_subroutine_to_int:
       assert(!"not supported");
       break;
 
@@ -2117,6 +2120,12 @@
    assert(!"Geometry shaders not supported.");
 }
 
+void
+ir_to_mesa_visitor::visit(ir_barrier *)
+{
+   unreachable("GLSL barrier() not supported.");
+}
+
 ir_to_mesa_visitor::ir_to_mesa_visitor()
 {
    result.file = PROGRAM_UNDEFINED;
@@ -2377,7 +2386,7 @@
       ir_variable *var = node->as_variable();
 
       if ((var == NULL) || (var->data.mode != ir_var_uniform)
-	  || var->is_in_uniform_block() || (strncmp(var->name, "gl_", 3) == 0))
+	  || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0))
 	 continue;
 
       add.process(var);
@@ -2406,9 +2415,14 @@
       if (!found)
 	 continue;
 
+      struct gl_uniform_storage *storage =
+         &shader_program->UniformStorage[location];
+
+      /* Do not associate any uniform storage to built-in uniforms */
+      if (storage->builtin)
+         continue;
+
       if (location != last_location) {
-	 struct gl_uniform_storage *storage =
-	    &shader_program->UniformStorage[location];
 	 enum gl_uniform_driver_format format = uniform_native;
 
 	 unsigned columns = 0;
@@ -2439,6 +2453,7 @@
 	    break;
 	 case GLSL_TYPE_SAMPLER:
 	 case GLSL_TYPE_IMAGE:
+         case GLSL_TYPE_SUBROUTINE:
 	    format = uniform_native;
 	    columns = 1;
 	    break;
@@ -2720,7 +2735,7 @@
       mesa_inst->Opcode = inst->op;
       mesa_inst->CondUpdate = inst->cond_update;
       if (inst->saturate)
-	 mesa_inst->SaturateMode = SATURATE_ZERO_ONE;
+	 mesa_inst->Saturate = GL_TRUE;
       mesa_inst->DstReg.File = inst->dst.file;
       mesa_inst->DstReg.Index = inst->dst.index;
       mesa_inst->DstReg.CondMask = inst->dst.cond_mask;
@@ -2898,7 +2913,7 @@
 	 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
 	     || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
 	   progress =
-	     lower_variable_index_to_cond_assign(ir,
+	     lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
 						 options->EmitNoIndirectInput,
 						 options->EmitNoIndirectOutput,
 						 options->EmitNoIndirectTemp,
@@ -2963,6 +2978,8 @@
    if (prog->LinkStatus) {
       if (!ctx->Driver.LinkShader(ctx, prog)) {
 	 prog->LinkStatus = GL_FALSE;
+      } else {
+         build_program_resource_list(ctx, prog);
       }
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/prog_execute.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/prog_execute.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/prog_execute.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/prog_execute.c	2015-09-16 14:36:10.000000000 +0000
@@ -397,7 +397,7 @@
               struct gl_program_machine *machine, const GLfloat value[4])
 {
    const struct prog_dst_register *dstReg = &(inst->DstReg);
-   const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
+   const GLboolean clamp = inst->Saturate;
    GLuint writeMask = dstReg->WriteMask;
    GLfloat clampedValue[4];
    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
@@ -623,7 +623,7 @@
             GLfloat a[4], result[4];
             fetch_vector1(&inst->SrcReg[0], machine, a);
             result[0] = result[1] = result[2] = result[3]
-               = (GLfloat) cos(a[0]);
+               = cosf(a[0]);
             store_vector4(inst, machine, result);
          }
          break;
@@ -723,7 +723,7 @@
                 * result.z = result.x * APPX(result.y)
                 * We do what the ARB extension says.
                 */
-               q[2] = (GLfloat) pow(2.0, t[0]);
+               q[2] = exp2f(t[0]);
             }
             q[1] = t[0] - floor_t0;
             q[3] = 1.0F;
@@ -734,7 +734,7 @@
          {
             GLfloat a[4], result[4], val;
             fetch_vector1(&inst->SrcReg[0], machine, a);
-            val = (GLfloat) pow(2.0, a[0]);
+            val = exp2f(a[0]);
             /*
             if (IS_INF_OR_NAN(val))
                val = 1.0e10;
@@ -776,7 +776,7 @@
             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
                GLfloat a[4];
                fetch_vector1(&inst->SrcReg[0], machine, a);
-               cond = (a[0] != 0.0);
+               cond = (a[0] != 0.0F);
             }
             else {
                cond = eval_condition(machine, inst);
@@ -834,7 +834,7 @@
                val = -FLT_MAX;
             }
             else {
-               val = (float)(log(a[0]) * 1.442695F);
+               val = logf(a[0]) * 1.442695F;
             }
             result[0] = result[1] = result[2] = result[3] = val;
             store_vector4(inst, machine, result);
@@ -853,10 +853,10 @@
             result[1] = a[0];
             /* XXX we could probably just use pow() here */
             if (a[0] > 0.0F) {
-               if (a[1] == 0.0 && a[3] == 0.0)
+               if (a[1] == 0.0F && a[3] == 0.0F)
                   result[2] = 1.0F;
                else
-                  result[2] = (GLfloat) pow(a[1], a[3]);
+                  result[2] = powf(a[1], a[3]);
             }
             else {
                result[2] = 0.0F;
@@ -886,12 +886,12 @@
                   int exponent;
                   GLfloat mantissa = frexpf(t[0], &exponent);
                   q[0] = (GLfloat) (exponent - 1);
-                  q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
+                  q[1] = 2.0F * mantissa; /* map [.5, 1) -> [1, 2) */
 
 		  /* The fast LOG2 macro doesn't meet the precision
 		   * requirements.
 		   */
-                  q[2] = (float)(log(t[0]) * 1.442695F);
+                  q[2] = logf(t[0]) * 1.442695F;
                }
             }
             else {
@@ -1051,7 +1051,7 @@
             fetch_vector1(&inst->SrcReg[0], machine, a);
             fetch_vector1(&inst->SrcReg[1], machine, b);
             result[0] = result[1] = result[2] = result[3]
-               = (GLfloat) pow(a[0], b[0]);
+               = powf(a[0], b[0]);
             store_vector4(inst, machine, result);
          }
          break;
@@ -1095,10 +1095,10 @@
          {
             GLfloat a[4], result[4];
             fetch_vector1(&inst->SrcReg[0], machine, a);
-            result[0] = (GLfloat) cos(a[0]);
-            result[1] = (GLfloat) sin(a[0]);
-            result[2] = 0.0;    /* undefined! */
-            result[3] = 0.0;    /* undefined! */
+            result[0] = cosf(a[0]);
+            result[1] = sinf(a[0]);
+            result[2] = 0.0F;    /* undefined! */
+            result[3] = 0.0F;    /* undefined! */
             store_vector4(inst, machine, result);
          }
          break;
@@ -1161,7 +1161,7 @@
             GLfloat a[4], result[4];
             fetch_vector1(&inst->SrcReg[0], machine, a);
             result[0] = result[1] = result[2] = result[3]
-               = (GLfloat) sin(a[0]);
+               = sinf(a[0]);
             store_vector4(inst, machine, result);
          }
          break;
@@ -1360,7 +1360,7 @@
              * zero, we'd probably be fine except for an assert in
              * IROUND_POS() which gets triggered by the inf values created.
              */
-            if (texcoord[3] != 0.0) {
+            if (texcoord[3] != 0.0F) {
                texcoord[0] /= texcoord[3];
                texcoord[1] /= texcoord[3];
                texcoord[2] /= texcoord[3];
@@ -1380,7 +1380,7 @@
 
             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
-                texcoord[3] != 0.0) {
+                texcoord[3] != 0.0F) {
                texcoord[0] /= texcoord[3];
                texcoord[1] /= texcoord[3];
                texcoord[2] /= texcoord[3];
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/prog_instruction.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/prog_instruction.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/prog_instruction.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/prog_instruction.c	2015-09-16 14:36:10.000000000 +0000
@@ -55,7 +55,7 @@
       inst[i].DstReg.CondMask = COND_TR;
       inst[i].DstReg.CondSwizzle = SWIZZLE_NOOP;
 
-      inst[i].SaturateMode = SATURATE_OFF;
+      inst[i].Saturate = GL_FALSE;
       inst[i].Precision = FLOAT32;
    }
 }
@@ -114,7 +114,7 @@
  */
 struct instruction_info
 {
-   gl_inst_opcode Opcode;
+   enum prog_opcode Opcode;
    const char *Name;
    GLuint NumSrcRegs;
    GLuint NumDstRegs;
@@ -198,7 +198,7 @@
  * Return the number of src registers for the given instruction/opcode.
  */
 GLuint
-_mesa_num_inst_src_regs(gl_inst_opcode opcode)
+_mesa_num_inst_src_regs(enum prog_opcode opcode)
 {
    assert(opcode < MAX_OPCODE);
    assert(opcode == InstInfo[opcode].Opcode);
@@ -211,7 +211,7 @@
  * Return the number of dst registers for the given instruction/opcode.
  */
 GLuint
-_mesa_num_inst_dst_regs(gl_inst_opcode opcode)
+_mesa_num_inst_dst_regs(enum prog_opcode opcode)
 {
    assert(opcode < MAX_OPCODE);
    assert(opcode == InstInfo[opcode].Opcode);
@@ -221,7 +221,7 @@
 
 
 GLboolean
-_mesa_is_tex_instruction(gl_inst_opcode opcode)
+_mesa_is_tex_instruction(enum prog_opcode opcode)
 {
    return (opcode == OPCODE_TEX ||
            opcode == OPCODE_TXB ||
@@ -285,7 +285,7 @@
  * Return string name for given program opcode.
  */
 const char *
-_mesa_opcode_string(gl_inst_opcode opcode)
+_mesa_opcode_string(enum prog_opcode opcode)
 {
    if (opcode < MAX_OPCODE)
       return InstInfo[opcode].Name;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/prog_instruction.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/prog_instruction.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/prog_instruction.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/prog_instruction.h	2015-09-16 14:36:10.000000000 +0000
@@ -118,15 +118,6 @@
 
 
 /**
- * Saturation modes when storing values.
- */
-/*@{*/
-#define SATURATE_OFF            0
-#define SATURATE_ZERO_ONE       1
-/*@}*/
-
-
-/**
  * Per-component negation masks
  */
 /*@{*/
@@ -143,7 +134,7 @@
 /**
  * Program instruction opcodes for vertex, fragment and geometry programs.
  */
-typedef enum prog_opcode {
+enum prog_opcode {
                      /* ARB_vp   ARB_fp   NV_vp   NV_fp     GLSL */
                      /*------------------------------------------*/
    OPCODE_NOP = 0,   /*                                      X   */
@@ -213,7 +204,7 @@
    OPCODE_TRUNC,     /*                                      X   */
    OPCODE_XPD,       /*   X        X                             */
    MAX_OPCODE
-} gl_inst_opcode;
+};
 
 
 /**
@@ -300,7 +291,7 @@
  */
 struct prog_instruction
 {
-   gl_inst_opcode Opcode;
+   enum prog_opcode Opcode;
    struct prog_src_register SrcReg[3];
    struct prog_dst_register DstReg;
 
@@ -327,15 +318,12 @@
    GLuint CondDst:1;
 
    /**
-    * Saturate each value of the vectored result to the range [0,1] or the
-    * range [-1,1].  \c SSAT mode (i.e., saturation to the range [-1,1]) is
-    * only available in NV_fragment_program2 mode.
-    * Value is one of the SATURATE_* tokens.
+    * Saturate each value of the vectored result to the range [0,1].
     *
     * \since
     * NV_fragment_program_option, NV_vertex_program3.
     */
-   GLuint SaturateMode:2;
+   GLuint Saturate:1;
 
    /**
     * Per-instruction selectable precision: FLOAT32, FLOAT16, FIXED12.
@@ -368,9 +356,6 @@
     */
    GLint BranchTarget;
 
-   /** for driver use (try to remove someday) */
-   GLint Aux;
-
    /** for debugging purposes */
    const char *Comment;
 };
@@ -394,19 +379,19 @@
 _mesa_free_instructions(struct prog_instruction *inst, GLuint count);
 
 extern GLuint
-_mesa_num_inst_src_regs(gl_inst_opcode opcode);
+_mesa_num_inst_src_regs(enum prog_opcode opcode);
 
 extern GLuint
-_mesa_num_inst_dst_regs(gl_inst_opcode opcode);
+_mesa_num_inst_dst_regs(enum prog_opcode opcode);
 
 extern GLboolean
-_mesa_is_tex_instruction(gl_inst_opcode opcode);
+_mesa_is_tex_instruction(enum prog_opcode opcode);
 
 extern GLboolean
 _mesa_check_soa_dependencies(const struct prog_instruction *inst);
 
 extern const char *
-_mesa_opcode_string(gl_inst_opcode opcode);
+_mesa_opcode_string(enum prog_opcode opcode);
 
 
 #ifdef __cplusplus
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/prog_optimize.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/prog_optimize.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/prog_optimize.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/prog_optimize.c	2015-09-16 14:36:10.000000000 +0000
@@ -478,7 +478,7 @@
    return
       can_downward_mov_be_modifed(mov) &&
       mov->DstReg.File == PROGRAM_TEMPORARY &&
-      mov->SaturateMode == SATURATE_OFF;
+      !mov->Saturate;
 }
 
 
@@ -653,7 +653,7 @@
    if (mask != (inst->DstReg.WriteMask & mask))
       return GL_FALSE;
 
-   inst->SaturateMode |= mov->SaturateMode;
+   inst->Saturate |= mov->Saturate;
 
    /* Depending on the instruction, we may need to recompute the swizzles.
     * Also, some other instructions (like TEX) are not linear. We will only
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/prog_print.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/prog_print.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/prog_print.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/prog_print.c	2015-09-16 14:36:10.000000000 +0000
@@ -147,6 +147,8 @@
       "fragment.(twenty-one)", /* VARYING_SLOT_VIEWPORT */
       "fragment.(twenty-two)", /* VARYING_SLOT_FACE */
       "fragment.(twenty-three)", /* VARYING_SLOT_PNTC */
+      "fragment.(twenty-four)", /* VARYING_SLOT_TESS_LEVEL_OUTER */
+      "fragment.(twenty-five)", /* VARYING_SLOT_TESS_LEVEL_INNER */
       "fragment.varying[0]",
       "fragment.varying[1]",
       "fragment.varying[2]",
@@ -272,6 +274,8 @@
       "result.(twenty-one)", /* VARYING_SLOT_VIEWPORT */
       "result.(twenty-two)", /* VARYING_SLOT_FACE */
       "result.(twenty-three)", /* VARYING_SLOT_PNTC */
+      "result.(twenty-four)", /* VARYING_SLOT_TESS_LEVEL_OUTER */
+      "result.(twenty-five)", /* VARYING_SLOT_TESS_LEVEL_INNER */
       "result.varying[0]",
       "result.varying[1]",
       "result.varying[2]",
@@ -600,7 +604,7 @@
       fprintf(f, ".C");
 
    /* frag prog only */
-   if (inst->SaturateMode == SATURATE_ZERO_ONE)
+   if (inst->Saturate)
       fprintf(f, "_SAT");
 
    fprintf(f, " ");
@@ -658,7 +662,7 @@
    switch (inst->Opcode) {
    case OPCODE_SWZ:
       fprintf(f, "SWZ");
-      if (inst->SaturateMode == SATURATE_ZERO_ONE)
+      if (inst->Saturate)
          fprintf(f, "_SAT");
       fprintf(f, " ");
       fprint_dst_reg(f, &inst->DstReg, mode, prog);
@@ -675,7 +679,7 @@
    case OPCODE_TXB:
    case OPCODE_TXD:
       fprintf(f, "%s", _mesa_opcode_string(inst->Opcode));
-      if (inst->SaturateMode == SATURATE_ZERO_ONE)
+      if (inst->Saturate)
          fprintf(f, "_SAT");
       fprintf(f, " ");
       fprint_dst_reg(f, &inst->DstReg, mode, prog);
@@ -864,7 +868,7 @@
       else
          fprintf(f, "# Fragment Program/Shader %u\n", prog->Id);
       break;
-   case MESA_GEOMETRY_PROGRAM:
+   case GL_GEOMETRY_PROGRAM_NV:
       fprintf(f, "# Geometry Shader\n");
    }
 
@@ -1015,6 +1019,12 @@
    case MESA_SHADER_FRAGMENT:
       type = "frag";
       break;
+   case MESA_SHADER_TESS_CTRL:
+      type = "tesc";
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      type = "tese";
+      break;
    case MESA_SHADER_VERTEX:
       type = "vert";
       break;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/program.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/program.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/program.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/program.c	2015-09-16 14:36:10.000000000 +0000
@@ -97,13 +97,6 @@
    assert(ctx->FragmentProgram.Current);
    ctx->FragmentProgram.Cache = _mesa_new_program_cache();
 
-   ctx->GeometryProgram.Enabled = GL_FALSE;
-   /* right now by default we don't have a geometry program */
-   _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current,
-                            NULL);
-
-   _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL);
-
    /* XXX probably move this stuff */
    ctx->ATIFragmentShader.Enabled = GL_FALSE;
    ctx->ATIFragmentShader.Current = ctx->Shared->DefaultFragmentShader;
@@ -122,8 +115,6 @@
    _mesa_delete_program_cache(ctx, ctx->VertexProgram.Cache);
    _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL);
    _mesa_delete_shader_cache(ctx, ctx->FragmentProgram.Cache);
-   _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, NULL);
-   _mesa_reference_compprog(ctx, &ctx->ComputeProgram.Current, NULL);
 
    /* XXX probably move this stuff */
    if (ctx->ATIFragmentShader.Current) {
@@ -153,9 +144,6 @@
                             ctx->Shared->DefaultFragmentProgram);
    assert(ctx->FragmentProgram.Current);
 
-   _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current,
-                      ctx->Shared->DefaultGeometryProgram);
-
    /* XXX probably move this stuff */
    if (ctx->ATIFragmentShader.Current) {
       ctx->ATIFragmentShader.Current->RefCount--;
@@ -298,6 +286,38 @@
 
 
 /**
+ * Initialize a new tessellation control program object.
+ */
+struct gl_program *
+_mesa_init_tess_ctrl_program(struct gl_context *ctx,
+                             struct gl_tess_ctrl_program *prog,
+                             GLenum target, GLuint id)
+{
+   if (prog) {
+      init_program_struct(&prog->Base, target, id);
+      return &prog->Base;
+   }
+   return NULL;
+}
+
+
+/**
+ * Initialize a new tessellation evaluation program object.
+ */
+struct gl_program *
+_mesa_init_tess_eval_program(struct gl_context *ctx,
+                             struct gl_tess_eval_program *prog,
+                             GLenum target, GLuint id)
+{
+   if (prog) {
+      init_program_struct(&prog->Base, target, id);
+      return &prog->Base;
+   }
+   return NULL;
+}
+
+
+/**
  * Initialize a new geometry program object.
  */
 struct gl_program *
@@ -340,11 +360,21 @@
                                          CALLOC_STRUCT(gl_fragment_program),
                                          target, id );
       break;
-   case MESA_GEOMETRY_PROGRAM:
+   case GL_GEOMETRY_PROGRAM_NV:
       prog = _mesa_init_geometry_program(ctx,
                                          CALLOC_STRUCT(gl_geometry_program),
                                          target, id);
       break;
+   case GL_TESS_CONTROL_PROGRAM_NV:
+      prog = _mesa_init_tess_ctrl_program(ctx,
+                                          CALLOC_STRUCT(gl_tess_ctrl_program),
+                                          target, id);
+      break;
+   case GL_TESS_EVALUATION_PROGRAM_NV:
+      prog = _mesa_init_tess_eval_program(ctx,
+                                         CALLOC_STRUCT(gl_tess_eval_program),
+                                         target, id);
+      break;
    case GL_COMPUTE_PROGRAM_NV:
       prog = _mesa_init_compute_program(ctx,
                                         CALLOC_STRUCT(gl_compute_program),
@@ -426,8 +456,8 @@
       else if ((*ptr)->Target == GL_FRAGMENT_PROGRAM_ARB)
          assert(prog->Target == GL_FRAGMENT_PROGRAM_ARB ||
                 prog->Target == GL_FRAGMENT_PROGRAM_NV);
-      else if ((*ptr)->Target == MESA_GEOMETRY_PROGRAM)
-         assert(prog->Target == MESA_GEOMETRY_PROGRAM);
+      else if ((*ptr)->Target == GL_GEOMETRY_PROGRAM_NV)
+         assert(prog->Target == GL_GEOMETRY_PROGRAM_NV);
    }
 #endif
 
@@ -439,7 +469,7 @@
       printf("Program %p ID=%u Target=%s  Refcount-- to %d\n",
              *ptr, (*ptr)->Id,
              ((*ptr)->Target == GL_VERTEX_PROGRAM_ARB ? "VP" :
-              ((*ptr)->Target == MESA_GEOMETRY_PROGRAM ? "GP" : "FP")),
+              ((*ptr)->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")),
              (*ptr)->RefCount - 1);
 #endif
       assert((*ptr)->RefCount > 0);
@@ -464,7 +494,7 @@
       printf("Program %p ID=%u Target=%s  Refcount++ to %d\n",
              prog, prog->Id,
              (prog->Target == GL_VERTEX_PROGRAM_ARB ? "VP" :
-              (prog->Target == MESA_GEOMETRY_PROGRAM ? "GP" : "FP")),
+              (prog->Target == GL_GEOMETRY_PROGRAM_NV ? "GP" : "FP")),
              prog->RefCount);
 #endif
       /*mtx_unlock(&prog->Mutex);*/
@@ -554,7 +584,7 @@
          fpc->PixelCenterInteger = fp->PixelCenterInteger;
       }
       break;
-   case MESA_GEOMETRY_PROGRAM:
+   case GL_GEOMETRY_PROGRAM_NV:
       {
          const struct gl_geometry_program *gp = gl_geometry_program_const(prog);
          struct gl_geometry_program *gpc = gl_geometry_program(clone);
@@ -566,6 +596,23 @@
          gpc->UsesStreams = gp->UsesStreams;
       }
       break;
+   case GL_TESS_CONTROL_PROGRAM_NV:
+      {
+         const struct gl_tess_ctrl_program *tcp = gl_tess_ctrl_program_const(prog);
+         struct gl_tess_ctrl_program *tcpc = gl_tess_ctrl_program(clone);
+         tcpc->VerticesOut = tcp->VerticesOut;
+      }
+      break;
+   case GL_TESS_EVALUATION_PROGRAM_NV:
+      {
+         const struct gl_tess_eval_program *tep = gl_tess_eval_program_const(prog);
+         struct gl_tess_eval_program *tepc = gl_tess_eval_program(clone);
+         tepc->PrimitiveMode = tep->PrimitiveMode;
+         tepc->Spacing = tep->Spacing;
+         tepc->VertexOrder = tep->VertexOrder;
+         tepc->PointMode = tep->PointMode;
+      }
+      break;
    default:
       _mesa_problem(NULL, "Unexpected target in _mesa_clone_program");
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/program.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/program.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/program.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/program.h	2015-09-16 14:36:10.000000000 +0000
@@ -79,6 +79,16 @@
                             GLenum target, GLuint id);
 
 extern struct gl_program *
+_mesa_init_tess_ctrl_program(struct gl_context *ctx,
+                            struct gl_tess_ctrl_program *prog,
+                            GLenum target, GLuint id);
+
+extern struct gl_program *
+_mesa_init_tess_eval_program(struct gl_context *ctx,
+                            struct gl_tess_eval_program *prog,
+                            GLenum target, GLuint id);
+
+extern struct gl_program *
 _mesa_init_geometry_program(struct gl_context *ctx,
                             struct gl_geometry_program *prog,
                             GLenum target, GLuint id);
@@ -147,6 +157,25 @@
                            (struct gl_program *) prog);
 }
 
+
+static inline void
+_mesa_reference_tesscprog(struct gl_context *ctx,
+                         struct gl_tess_ctrl_program **ptr,
+                         struct gl_tess_ctrl_program *prog)
+{
+   _mesa_reference_program(ctx, (struct gl_program **) ptr,
+                           (struct gl_program *) prog);
+}
+
+static inline void
+_mesa_reference_tesseprog(struct gl_context *ctx,
+                         struct gl_tess_eval_program **ptr,
+                         struct gl_tess_eval_program *prog)
+{
+   _mesa_reference_program(ctx, (struct gl_program **) ptr,
+                           (struct gl_program *) prog);
+}
+
 extern struct gl_program *
 _mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog);
 
@@ -157,6 +186,20 @@
    return (struct gl_vertex_program *) _mesa_clone_program(ctx, &prog->Base);
 }
 
+static inline struct gl_tess_ctrl_program *
+_mesa_clone_tess_ctrl_program(struct gl_context *ctx,
+                             const struct gl_tess_ctrl_program *prog)
+{
+   return (struct gl_tess_ctrl_program *) _mesa_clone_program(ctx, &prog->Base);
+}
+
+static inline struct gl_tess_eval_program *
+_mesa_clone_tess_eval_program(struct gl_context *ctx,
+                             const struct gl_tess_eval_program *prog)
+{
+   return (struct gl_tess_eval_program *) _mesa_clone_program(ctx, &prog->Base);
+}
+
 static inline struct gl_geometry_program *
 _mesa_clone_geometry_program(struct gl_context *ctx,
                              const struct gl_geometry_program *prog)
@@ -216,6 +259,10 @@
       return MESA_SHADER_FRAGMENT;
    case GL_GEOMETRY_PROGRAM_NV:
       return MESA_SHADER_GEOMETRY;
+   case GL_TESS_CONTROL_PROGRAM_NV:
+      return MESA_SHADER_TESS_CTRL;
+   case GL_TESS_EVALUATION_PROGRAM_NV:
+      return MESA_SHADER_TESS_EVAL;
    case GL_COMPUTE_PROGRAM_NV:
       return MESA_SHADER_COMPUTE;
    default:
@@ -235,6 +282,10 @@
       return GL_FRAGMENT_PROGRAM_ARB;
    case MESA_SHADER_GEOMETRY:
       return GL_GEOMETRY_PROGRAM_NV;
+   case MESA_SHADER_TESS_CTRL:
+      return GL_TESS_CONTROL_PROGRAM_NV;
+   case MESA_SHADER_TESS_EVAL:
+      return GL_TESS_EVALUATION_PROGRAM_NV;
    case MESA_SHADER_COMPUTE:
       return GL_COMPUTE_PROGRAM_NV;
    }
@@ -244,7 +295,9 @@
 }
 
 
-/* Cast wrappers from gl_program to gl_vertex/geometry/fragment_program */
+/* Cast wrappers from gl_program to derived program types.
+ * (e.g. gl_vertex_program)
+ */
 
 static inline struct gl_fragment_program *
 gl_fragment_program(struct gl_program *prog)
@@ -297,6 +350,31 @@
    return (const struct gl_compute_program *) prog;
 }
 
+static inline struct gl_tess_ctrl_program *
+gl_tess_ctrl_program(struct gl_program *prog)
+{
+   return (struct gl_tess_ctrl_program *) prog;
+}
+
+static inline const struct gl_tess_ctrl_program *
+gl_tess_ctrl_program_const(const struct gl_program *prog)
+{
+   return (const struct gl_tess_ctrl_program *) prog;
+}
+
+
+static inline struct gl_tess_eval_program *
+gl_tess_eval_program(struct gl_program *prog)
+{
+   return (struct gl_tess_eval_program *) prog;
+}
+
+static inline const struct gl_tess_eval_program *
+gl_tess_eval_program_const(const struct gl_program *prog)
+{
+   return (const struct gl_tess_eval_program *) prog;
+}
+
 
 #ifdef __cplusplus
 } /* extern "C" */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/programopt.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/programopt.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/programopt.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/programopt.c	2015-09-16 14:36:10.000000000 +0000
@@ -305,7 +305,7 @@
          /* change the instruction to write to colorTemp w/ clamping */
          inst->DstReg.File = PROGRAM_TEMPORARY;
          inst->DstReg.Index = colorTemp;
-         inst->SaturateMode = saturate;
+         inst->Saturate = saturate;
          /* don't break (may be several writes to result.color) */
       }
       inst++;
@@ -331,7 +331,7 @@
       inst->SrcReg[2].File = PROGRAM_STATE_VAR;
       inst->SrcReg[2].Index = fogPRefOpt;
       inst->SrcReg[2].Swizzle = SWIZZLE_YYYY;
-      inst->SaturateMode = SATURATE_ZERO_ONE;
+      inst->Saturate = GL_TRUE;
       inst++;
    }
    else {
@@ -374,7 +374,7 @@
       inst->SrcReg[0].Index = fogFactorTemp;
       inst->SrcReg[0].Negate = NEGATE_XYZW;
       inst->SrcReg[0].Swizzle = SWIZZLE_XXXX;
-      inst->SaturateMode = SATURATE_ZERO_ONE;
+      inst->Saturate = GL_TRUE;
       inst++;
    }
    /* LRP result.color.xyz, fogFactorTemp.xxxx, colorTemp, fogColorRef; */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/program_parse_extra.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/program_parse_extra.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/program_parse_extra.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/program_parse_extra.c	2015-09-16 14:36:10.000000000 +0000
@@ -40,7 +40,7 @@
 {
    inst->CondUpdate = 0;
    inst->CondDst = 0;
-   inst->SaturateMode = SATURATE_OFF;
+   inst->Saturate = GL_FALSE;
    inst->Precision = FLOAT32;
 
 
@@ -82,7 +82,7 @@
     */
    if (state->mode == ARB_fragment) {
       if (strcmp(suffix, "_SAT") == 0) {
-	 inst->SaturateMode = SATURATE_ZERO_ONE;
+	 inst->Saturate = GL_TRUE;
 	 suffix += 4;
       }
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/program_parse.y mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/program_parse.y
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/program_parse.y	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/program_parse.y	2015-09-16 14:36:10.000000000 +0000
@@ -84,7 +84,7 @@
     const struct prog_dst_register *dst, const struct asm_src_register *src0,
     const struct asm_src_register *src1, const struct asm_src_register *src2);
 
-static struct asm_instruction *asm_instruction_ctor(gl_inst_opcode op,
+static struct asm_instruction *asm_instruction_ctor(enum prog_opcode op,
     const struct prog_dst_register *dst, const struct asm_src_register *src0,
     const struct asm_src_register *src1, const struct asm_src_register *src2);
 
@@ -139,7 +139,7 @@
    gl_state_index state[STATE_LENGTH];
    int negate;
    struct asm_vector vector;
-   gl_inst_opcode opcode;
+   enum prog_opcode opcode;
 
    struct {
       unsigned swz;
@@ -2275,7 +2275,7 @@
 
 
 struct asm_instruction *
-asm_instruction_ctor(gl_inst_opcode op,
+asm_instruction_ctor(enum prog_opcode op,
 		     const struct prog_dst_register *dst,
 		     const struct asm_src_register *src0,
 		     const struct asm_src_register *src1,
@@ -2308,7 +2308,7 @@
       inst->Base.Opcode = base->Opcode;
       inst->Base.CondUpdate = base->CondUpdate;
       inst->Base.CondDst = base->CondDst;
-      inst->Base.SaturateMode = base->SaturateMode;
+      inst->Base.Saturate = base->Saturate;
       inst->Base.Precision = base->Precision;
 
       asm_instruction_set_operands(inst, dst, src0, src1, src2);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/prog_to_nir.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/prog_to_nir.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/program/prog_to_nir.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/program/prog_to_nir.c	2015-09-16 14:36:10.000000000 +0000
@@ -47,6 +47,7 @@
    nir_builder build;
    bool error;
 
+   nir_variable *parameters;
    nir_variable *input_vars[VARYING_SLOT_MAX];
    nir_variable *output_vars[VARYING_SLOT_MAX];
    nir_register **output_regs;
@@ -112,21 +113,6 @@
    return dest;
 }
 
-/**
- * Multiply the contents of the ADDR register by 4 to convert from the number
- * of vec4s to the number of floating point components.
- */
-static nir_ssa_def *
-ptn_addr_reg_value(struct ptn_compile *c)
-{
-   nir_builder *b = &c->build;
-   nir_alu_src src;
-   memset(&src, 0, sizeof(src));
-   src.src = nir_src_for_reg(c->addr_reg);
-
-   return nir_imul(b, nir_fmov_alu(b, src, 1), nir_imm_int(b, 4));
-}
-
 static nir_ssa_def *
 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
 {
@@ -180,27 +166,38 @@
          }
          /* FALLTHROUGH */
       case PROGRAM_STATE_VAR: {
-         nir_intrinsic_op load_op =
-            prog_src->RelAddr ? nir_intrinsic_load_uniform_indirect :
-                                nir_intrinsic_load_uniform;
-         nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, load_op);
+         nir_intrinsic_instr *load =
+            nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
          nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
          load->num_components = 4;
 
-         /* Multiply src->Index by 4 to scale from # of vec4s to components. */
-         load->const_index[0] = 4 * prog_src->Index;
-         load->const_index[1] = 1;
+         load->variables[0] = nir_deref_var_create(load, c->parameters);
+         nir_deref_array *deref_arr =
+            nir_deref_array_create(load->variables[0]);
+         deref_arr->deref.type = glsl_vec4_type();
+         load->variables[0]->deref.child = &deref_arr->deref;
 
          if (prog_src->RelAddr) {
-            nir_ssa_def *reladdr = ptn_addr_reg_value(c);
+            deref_arr->deref_array_type = nir_deref_array_type_indirect;
+
+            nir_alu_src addr_src = { NIR_SRC_INIT };
+            addr_src.src = nir_src_for_reg(c->addr_reg);
+            nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1);
+
             if (prog_src->Index < 0) {
                /* This is a negative offset which should be added to the address
                 * register's value.
                 */
-               reladdr = nir_iadd(b, reladdr, nir_imm_int(b, load->const_index[0]));
-               load->const_index[0] = 0;
+               reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index));
+
+               deref_arr->base_offset = 0;
+            } else {
+               deref_arr->base_offset = prog_src->Index;
             }
-            load->src[0] = nir_src_for_ssa(reladdr);
+            deref_arr->indirect = nir_src_for_ssa(reladdr);
+         } else {
+            deref_arr->deref_array_type = nir_deref_array_type_direct;
+            deref_arr->base_offset = prog_src->Index;
          }
 
          nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
@@ -700,7 +697,7 @@
    [OPCODE_ADD] = nir_op_fadd,
    [OPCODE_ARL] = 0,
    [OPCODE_CMP] = 0,
-   [OPCODE_COS] = nir_op_fcos,
+   [OPCODE_COS] = 0,
    [OPCODE_DDX] = nir_op_fddx,
    [OPCODE_DDY] = nir_op_fddy,
    [OPCODE_DP2] = 0,
@@ -709,11 +706,11 @@
    [OPCODE_DPH] = 0,
    [OPCODE_DST] = 0,
    [OPCODE_END] = 0,
-   [OPCODE_EX2] = nir_op_fexp2,
+   [OPCODE_EX2] = 0,
    [OPCODE_EXP] = 0,
    [OPCODE_FLR] = nir_op_ffloor,
    [OPCODE_FRC] = nir_op_ffract,
-   [OPCODE_LG2] = nir_op_flog2,
+   [OPCODE_LG2] = 0,
    [OPCODE_LIT] = 0,
    [OPCODE_LOG] = 0,
    [OPCODE_LRP] = 0,
@@ -722,15 +719,15 @@
    [OPCODE_MIN] = nir_op_fmin,
    [OPCODE_MOV] = nir_op_fmov,
    [OPCODE_MUL] = nir_op_fmul,
-   [OPCODE_POW] = nir_op_fpow,
-   [OPCODE_RCP] = nir_op_frcp,
+   [OPCODE_POW] = 0,
+   [OPCODE_RCP] = 0,
 
-   [OPCODE_RSQ] = nir_op_frsq,
+   [OPCODE_RSQ] = 0,
    [OPCODE_SCS] = 0,
    [OPCODE_SEQ] = 0,
    [OPCODE_SGE] = 0,
    [OPCODE_SGT] = 0,
-   [OPCODE_SIN] = nir_op_fsin,
+   [OPCODE_SIN] = 0,
    [OPCODE_SLE] = 0,
    [OPCODE_SLT] = 0,
    [OPCODE_SNE] = 0,
@@ -895,7 +892,7 @@
       break;
 
    default:
-      if (op_trans[op] != 0 || op == OPCODE_MOV) {
+      if (op_trans[op] != 0) {
          ptn_alu(b, op_trans[op], dest, src);
       } else {
          fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
@@ -904,8 +901,8 @@
       break;
    }
 
-   if (prog_inst->SaturateMode) {
-      assert(prog_inst->SaturateMode == SATURATE_ZERO_ONE);
+   if (prog_inst->Saturate) {
+      assert(prog_inst->Saturate);
       assert(!dest.dest.is_ssa);
       ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
    }
@@ -1074,13 +1071,11 @@
    }
    reg->num_components = 1;
    c->addr_reg = reg;
-
-   /* Set the number of uniforms */
-   shader->num_uniforms = 4 * c->prog->Parameters->NumParameters;
 }
 
 struct nir_shader *
-prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options *options)
+prog_to_nir(const struct gl_program *prog,
+            const nir_shader_compiler_options *options)
 {
    struct ptn_compile *c;
    struct nir_shader *s;
@@ -1093,6 +1088,14 @@
       goto fail;
    c->prog = prog;
 
+   c->parameters = rzalloc(s, nir_variable);
+   c->parameters->type = glsl_array_type(glsl_vec4_type(),
+                                            prog->Parameters->NumParameters);
+   c->parameters->name = "parameters";
+   c->parameters->data.read_only = true;
+   c->parameters->data.mode = nir_var_uniform;
+   exec_list_push_tail(&s->uniforms, &c->parameters->node);
+
    nir_function *func = nir_function_create(s, "main");
    nir_function_overload *overload = nir_function_overload_create(func);
    nir_function_impl *impl = nir_function_impl_create(overload);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom.c	2015-09-16 14:36:10.000000000 +0000
@@ -46,9 +46,10 @@
    &st_update_depth_stencil_alpha,
    &st_update_clip,
 
-   &st_finalize_textures,
    &st_update_fp,
    &st_update_gp,
+   &st_update_tep,
+   &st_update_tcp,
    &st_update_vp,
 
    &st_update_rasterizer,
@@ -59,17 +60,24 @@
    &st_update_vertex_texture,
    &st_update_fragment_texture,
    &st_update_geometry_texture,
+   &st_update_tessctrl_texture,
+   &st_update_tesseval_texture,
    &st_update_sampler, /* depends on update_*_texture for swizzle */
    &st_update_framebuffer,
    &st_update_msaa,
    &st_update_sample_shading,
    &st_update_vs_constants,
+   &st_update_tcs_constants,
+   &st_update_tes_constants,
    &st_update_gs_constants,
    &st_update_fs_constants,
    &st_bind_vs_ubos,
+   &st_bind_tcs_ubos,
+   &st_bind_tes_ubos,
    &st_bind_fs_ubos,
    &st_bind_gs_ubos,
    &st_update_pixel_transfer,
+   &st_update_tess,
 
    /* this must be done after the vertex program update */
    &st_update_array
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_clip.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_clip.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_clip.c	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_clip.c	2015-09-16 14:36:10.000000000 +0000
@@ -59,8 +59,11 @@
    memcpy(clip.ucp,
           use_eye ? ctx->Transform.EyeUserPlane
                   : ctx->Transform._ClipUserPlane, sizeof(clip.ucp));
-   st->state.clip = clip;
-   cso_set_clip(st->cso_context, &clip);
+
+   if (memcmp(&st->state.clip, &clip, sizeof(clip)) != 0) {
+      st->state.clip = clip;
+      st->pipe->set_clip_state(st->pipe, &clip);
+   }
 }
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_constbuf.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_constbuf.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_constbuf.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_constbuf.c	2015-09-16 14:36:10.000000000 +0000
@@ -59,7 +59,9 @@
 {
    assert(shader_type == PIPE_SHADER_VERTEX ||
           shader_type == PIPE_SHADER_FRAGMENT ||
-          shader_type == PIPE_SHADER_GEOMETRY);
+          shader_type == PIPE_SHADER_GEOMETRY ||
+          shader_type == PIPE_SHADER_TESS_CTRL ||
+          shader_type == PIPE_SHADER_TESS_EVAL);
 
    /* update constants */
    if (params && params->NumParameters) {
@@ -178,6 +180,50 @@
    update_gs_constants					/* update */
 };
 
+/* Tessellation control shader:
+ */
+static void update_tcs_constants(struct st_context *st )
+{
+   struct st_tessctrl_program *tcp = st->tcp;
+   struct gl_program_parameter_list *params;
+
+   if (tcp) {
+      params = tcp->Base.Base.Parameters;
+      st_upload_constants( st, params, PIPE_SHADER_TESS_CTRL );
+   }
+}
+
+const struct st_tracked_state st_update_tcs_constants = {
+   "st_update_tcs_constants",				/* name */
+   {							/* dirty */
+      _NEW_PROGRAM_CONSTANTS,                           /* mesa */
+      ST_NEW_TESSCTRL_PROGRAM,				/* st */
+   },
+   update_tcs_constants					/* update */
+};
+
+/* Tessellation evaluation shader:
+ */
+static void update_tes_constants(struct st_context *st )
+{
+   struct st_tesseval_program *tep = st->tep;
+   struct gl_program_parameter_list *params;
+
+   if (tep) {
+      params = tep->Base.Base.Parameters;
+      st_upload_constants( st, params, PIPE_SHADER_TESS_EVAL );
+   }
+}
+
+const struct st_tracked_state st_update_tes_constants = {
+   "st_update_tes_constants",				/* name */
+   {							/* dirty */
+      _NEW_PROGRAM_CONSTANTS,                           /* mesa */
+      ST_NEW_TESSEVAL_PROGRAM,				/* st */
+   },
+   update_tes_constants					/* update */
+};
+
 static void st_bind_ubos(struct st_context *st,
                            struct gl_shader *shader,
                            unsigned shader_type)
@@ -275,3 +321,43 @@
    },
    bind_gs_ubos
 };
+
+static void bind_tcs_ubos(struct st_context *st)
+{
+   struct gl_shader_program *prog =
+      st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
+
+   if (!prog)
+      return;
+
+   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_TESS_CTRL], PIPE_SHADER_TESS_CTRL);
+}
+
+const struct st_tracked_state st_bind_tcs_ubos = {
+   "st_bind_tcs_ubos",
+   {
+      0,
+      ST_NEW_TESSCTRL_PROGRAM | ST_NEW_UNIFORM_BUFFER,
+   },
+   bind_tcs_ubos
+};
+
+static void bind_tes_ubos(struct st_context *st)
+{
+   struct gl_shader_program *prog =
+      st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+
+   if (!prog)
+      return;
+
+   st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_TESS_EVAL], PIPE_SHADER_TESS_EVAL);
+}
+
+const struct st_tracked_state st_bind_tes_ubos = {
+   "st_bind_tes_ubos",
+   {
+      0,
+      ST_NEW_TESSEVAL_PROGRAM | ST_NEW_UNIFORM_BUFFER,
+   },
+   bind_tes_ubos
+};
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_depth.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_depth.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_depth.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_depth.c	2015-09-16 14:36:10.000000000 +0000
@@ -105,10 +105,17 @@
    memset(dsa, 0, sizeof(*dsa));
    memset(&sr, 0, sizeof(sr));
 
-   if (ctx->Depth.Test && ctx->DrawBuffer->Visual.depthBits > 0) {
-      dsa->depth.enabled = 1;
-      dsa->depth.writemask = ctx->Depth.Mask;
-      dsa->depth.func = st_compare_func_to_pipe(ctx->Depth.Func);
+   if (ctx->DrawBuffer->Visual.depthBits > 0) {
+      if (ctx->Depth.Test) {
+         dsa->depth.enabled = 1;
+         dsa->depth.writemask = ctx->Depth.Mask;
+         dsa->depth.func = st_compare_func_to_pipe(ctx->Depth.Func);
+      }
+      if (ctx->Depth.BoundsTest) {
+         dsa->depth.bounds_test = 1;
+         dsa->depth.bounds_min = ctx->Depth.BoundsMin;
+         dsa->depth.bounds_max = ctx->Depth.BoundsMax;
+      }
    }
 
    if (ctx->Stencil.Enabled && ctx->DrawBuffer->Visual.stencilBits > 0) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom.h	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom.h	2015-09-16 14:36:10.000000000 +0000
@@ -52,6 +52,8 @@
 extern const struct st_tracked_state st_update_depth_stencil_alpha;
 extern const struct st_tracked_state st_update_fp;
 extern const struct st_tracked_state st_update_gp;
+extern const struct st_tracked_state st_update_tep;
+extern const struct st_tracked_state st_update_tcp;
 extern const struct st_tracked_state st_update_vp;
 extern const struct st_tracked_state st_update_rasterizer;
 extern const struct st_tracked_state st_update_polygon_stipple;
@@ -64,14 +66,20 @@
 extern const struct st_tracked_state st_update_fragment_texture;
 extern const struct st_tracked_state st_update_vertex_texture;
 extern const struct st_tracked_state st_update_geometry_texture;
-extern const struct st_tracked_state st_finalize_textures;
+extern const struct st_tracked_state st_update_tessctrl_texture;
+extern const struct st_tracked_state st_update_tesseval_texture;
 extern const struct st_tracked_state st_update_fs_constants;
 extern const struct st_tracked_state st_update_gs_constants;
+extern const struct st_tracked_state st_update_tes_constants;
+extern const struct st_tracked_state st_update_tcs_constants;
 extern const struct st_tracked_state st_update_vs_constants;
 extern const struct st_tracked_state st_bind_fs_ubos;
 extern const struct st_tracked_state st_bind_vs_ubos;
 extern const struct st_tracked_state st_bind_gs_ubos;
+extern const struct st_tracked_state st_bind_tcs_ubos;
+extern const struct st_tracked_state st_bind_tes_ubos;
 extern const struct st_tracked_state st_update_pixel_transfer;
+extern const struct st_tracked_state st_update_tess;
 
 
 GLuint st_compare_func_to_pipe(GLenum func);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_sampler.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_sampler.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_sampler.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_sampler.c	2015-09-16 14:36:10.000000000 +0000
@@ -245,6 +245,7 @@
    GLuint unit;
    GLbitfield samplers_used;
    const GLuint old_max = *num_samplers;
+   const struct pipe_sampler_state *states[PIPE_MAX_SAMPLERS];
 
    samplers_used = prog->SamplersUsed;
 
@@ -261,13 +262,11 @@
          const GLuint texUnit = prog->SamplerUnits[unit];
 
          convert_sampler(st, sampler, texUnit);
-
+         states[unit] = sampler;
          *num_samplers = unit + 1;
-
-         cso_single_sampler(st->cso_context, shader_stage, unit, sampler);
       }
       else if (samplers_used != 0 || unit < old_max) {
-         cso_single_sampler(st->cso_context, shader_stage, unit, NULL);
+         states[unit] = NULL;
       }
       else {
          /* if we've reset all the old samplers and we have no more new ones */
@@ -275,7 +274,7 @@
       }
    }
 
-   cso_single_sampler_done(st->cso_context, shader_stage);
+   cso_set_samplers(st->cso_context, shader_stage, *num_samplers, states);
 }
 
 
@@ -306,6 +305,22 @@
                              st->state.samplers[PIPE_SHADER_GEOMETRY],
                              &st->state.num_samplers[PIPE_SHADER_GEOMETRY]);
    }
+   if (ctx->TessCtrlProgram._Current) {
+      update_shader_samplers(st,
+                             PIPE_SHADER_TESS_CTRL,
+                             &ctx->TessCtrlProgram._Current->Base,
+                             ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits,
+                             st->state.samplers[PIPE_SHADER_TESS_CTRL],
+                             &st->state.num_samplers[PIPE_SHADER_TESS_CTRL]);
+   }
+   if (ctx->TessEvalProgram._Current) {
+      update_shader_samplers(st,
+                             PIPE_SHADER_TESS_EVAL,
+                             &ctx->TessEvalProgram._Current->Base,
+                             ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits,
+                             st->state.samplers[PIPE_SHADER_TESS_EVAL],
+                             &st->state.num_samplers[PIPE_SHADER_TESS_EVAL]);
+   }
 }
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_shader.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_shader.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_shader.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_shader.c	2015-09-16 14:36:10.000000000 +0000
@@ -50,24 +50,6 @@
 
 
 /**
- * Return pointer to a pass-through fragment shader.
- * This shader is used when a texture is missing/incomplete.
- */
-static void *
-get_passthrough_fs(struct st_context *st)
-{
-   if (!st->passthrough_fs) {
-      st->passthrough_fs =
-         util_make_fragment_passthrough_shader(st->pipe, TGSI_SEMANTIC_COLOR,
-                                               TGSI_INTERPOLATE_PERSPECTIVE,
-                                               TRUE);
-   }
-
-   return st->passthrough_fs;
-}
-
-
-/**
  * Update fragment program state/atom.  This involves translating the
  * Mesa fragment program into a gallium fragment program and binding it.
  */
@@ -96,15 +78,8 @@
 
    st_reference_fragprog(st, &st->fp, stfp);
 
-   if (st->missing_textures) {
-      /* use a pass-through frag shader that uses no textures */
-      void *fs = get_passthrough_fs(st);
-      cso_set_fragment_shader_handle(st->cso_context, fs);
-   }
-   else {
-      cso_set_fragment_shader_handle(st->cso_context,
-                                     st->fp_variant->driver_shader);
-   }
+   cso_set_fragment_shader_handle(st->cso_context,
+                                  st->fp_variant->driver_shader);
 }
 
 
@@ -189,7 +164,7 @@
    }
 
    stgp = st_geometry_program(st->ctx->GeometryProgram._Current);
-   assert(stgp->Base.Base.Target == MESA_GEOMETRY_PROGRAM);
+   assert(stgp->Base.Base.Target == GL_GEOMETRY_PROGRAM_NV);
 
    memset(&key, 0, sizeof(key));
    key.st = st;
@@ -210,3 +185,75 @@
    },
    update_gp  				/* update */
 };
+
+
+
+static void
+update_tcp( struct st_context *st )
+{
+   struct st_tessctrl_program *sttcp;
+   struct st_tcp_variant_key key;
+
+   if (!st->ctx->TessCtrlProgram._Current) {
+      cso_set_tessctrl_shader_handle(st->cso_context, NULL);
+      return;
+   }
+
+   sttcp = st_tessctrl_program(st->ctx->TessCtrlProgram._Current);
+   assert(sttcp->Base.Base.Target == GL_TESS_CONTROL_PROGRAM_NV);
+
+   memset(&key, 0, sizeof(key));
+   key.st = st;
+
+   st->tcp_variant = st_get_tcp_variant(st, sttcp, &key);
+
+   st_reference_tesscprog(st, &st->tcp, sttcp);
+
+   cso_set_tessctrl_shader_handle(st->cso_context,
+                                  st->tcp_variant->driver_shader);
+}
+
+const struct st_tracked_state st_update_tcp = {
+   "st_update_tcp",			/* name */
+   {					/* dirty */
+      0,				/* mesa */
+      ST_NEW_TESSCTRL_PROGRAM           /* st */
+   },
+   update_tcp  				/* update */
+};
+
+
+
+static void
+update_tep( struct st_context *st )
+{
+   struct st_tesseval_program *sttep;
+   struct st_tep_variant_key key;
+
+   if (!st->ctx->TessEvalProgram._Current) {
+      cso_set_tesseval_shader_handle(st->cso_context, NULL);
+      return;
+   }
+
+   sttep = st_tesseval_program(st->ctx->TessEvalProgram._Current);
+   assert(sttep->Base.Base.Target == GL_TESS_EVALUATION_PROGRAM_NV);
+
+   memset(&key, 0, sizeof(key));
+   key.st = st;
+
+   st->tep_variant = st_get_tep_variant(st, sttep, &key);
+
+   st_reference_tesseprog(st, &st->tep, sttep);
+
+   cso_set_tesseval_shader_handle(st->cso_context,
+                                  st->tep_variant->driver_shader);
+}
+
+const struct st_tracked_state st_update_tep = {
+   "st_update_tep",			/* name */
+   {					/* dirty */
+      0,				/* mesa */
+      ST_NEW_TESSEVAL_PROGRAM           /* st */
+   },
+   update_tep  				/* update */
+};
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_tess.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_tess.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_tess.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_tess.c	2015-09-16 14:36:10.000000000 +0000
@@ -0,0 +1,62 @@
+/**************************************************************************
+ * 
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/*
+ * Authors:
+ *   Marek Olšák <maraeo@gmail.com>
+ */
+
+
+#include "main/macros.h"
+#include "st_context.h"
+#include "pipe/p_context.h"
+#include "st_atom.h"
+
+
+static void
+update_tess(struct st_context *st)
+{
+   const struct gl_context *ctx = st->ctx;
+   struct pipe_context *pipe = st->pipe;
+
+   if (!pipe->set_tess_state)
+      return;
+
+   pipe->set_tess_state(pipe,
+                        ctx->TessCtrlProgram.patch_default_outer_level,
+                        ctx->TessCtrlProgram.patch_default_inner_level);
+}
+
+
+const struct st_tracked_state st_update_tess = {
+   "update_tess",		/* name */
+   {				/* dirty */
+      0,			/* mesa */
+      ST_NEW_TESS_STATE,	/* st */
+   },
+   update_tess                  /* update */
+};
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_texture.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_texture.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_texture.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_texture.c	2015-09-16 14:36:10.000000000 +0000
@@ -103,7 +103,8 @@
  */
 static unsigned
 compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode,
-                               enum pipe_format actualFormat)
+                               enum pipe_format actualFormat,
+                               unsigned glsl_version)
 {
    switch (baseFormat) {
    case GL_RGBA:
@@ -157,8 +158,26 @@
       case GL_INTENSITY:
          return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
       case GL_ALPHA:
-         return MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO,
-                              SWIZZLE_ZERO, SWIZZLE_X);
+         /* The texture(sampler*Shadow) functions from GLSL 1.30 ignore
+          * the depth mode and return float, while older shadow* functions
+          * and ARB_fp instructions return vec4 according to the depth mode.
+          *
+          * The problem with the GLSL 1.30 functions is that GL_ALPHA forces
+          * them to return 0, breaking them completely.
+          *
+          * A proper fix would increase code complexity and that's not worth
+          * it for a rarely used feature such as the GL_ALPHA depth mode
+          * in GL3. Therefore, change GL_ALPHA to GL_INTENSITY for all
+          * shaders that use GLSL 1.30 or later.
+          *
+          * BTW, it's required that sampler views are updated when
+          * shaders change (check_sampler_swizzle takes care of that).
+          */
+         if (glsl_version && glsl_version >= 130)
+            return SWIZZLE_XXXX;
+         else
+            return MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO,
+                                 SWIZZLE_ZERO, SWIZZLE_X);
       case GL_RED:
          return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO,
                               SWIZZLE_ZERO, SWIZZLE_ONE);
@@ -174,7 +193,8 @@
 
 
 static unsigned
-get_texture_format_swizzle(const struct st_texture_object *stObj)
+get_texture_format_swizzle(const struct st_texture_object *stObj,
+                           unsigned glsl_version)
 {
    GLenum baseFormat = _mesa_texture_base_format(&stObj->base);
    unsigned tex_swizzle;
@@ -182,7 +202,8 @@
    if (baseFormat != GL_NONE) {
       tex_swizzle = compute_texture_format_swizzle(baseFormat,
                                                    stObj->base.DepthMode,
-                                                   stObj->pt->format);
+                                                   stObj->pt->format,
+                                                   glsl_version);
    }
    else {
       tex_swizzle = SWIZZLE_XYZW;
@@ -201,9 +222,9 @@
  */
 static boolean
 check_sampler_swizzle(const struct st_texture_object *stObj,
-		      struct pipe_sampler_view *sv)
+		      struct pipe_sampler_view *sv, unsigned glsl_version)
 {
-   unsigned swizzle = get_texture_format_swizzle(stObj);
+   unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version);
 
    return ((sv->swizzle_r != GET_SWZ(swizzle, 0)) ||
            (sv->swizzle_g != GET_SWZ(swizzle, 1)) ||
@@ -232,11 +253,11 @@
 static struct pipe_sampler_view *
 st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe,
 					  struct st_texture_object *stObj,
-                                          const struct gl_sampler_object *samp,
-					  enum pipe_format format)
+					  enum pipe_format format,
+                                          unsigned glsl_version)
 {
    struct pipe_sampler_view templ;
-   unsigned swizzle = get_texture_format_swizzle(stObj);
+   unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version);
 
    u_sampler_view_default_template(&templ,
                                    stObj->pt,
@@ -283,8 +304,8 @@
 static struct pipe_sampler_view *
 st_get_texture_sampler_view_from_stobj(struct st_context *st,
                                        struct st_texture_object *stObj,
-                                       const struct gl_sampler_object *samp,
-				       enum pipe_format format)
+				       enum pipe_format format,
+                                       unsigned glsl_version)
 {
    struct pipe_sampler_view **sv;
    const struct st_texture_image *firstImage;
@@ -306,7 +327,7 @@
 
    /* if sampler view has changed dereference it */
    if (*sv) {
-      if (check_sampler_swizzle(stObj, *sv) ||
+      if (check_sampler_swizzle(stObj, *sv, glsl_version) ||
 	  (format != (*sv)->format) ||
           gl_target_to_pipe(stObj->base.Target) != (*sv)->target ||
           stObj->base.MinLevel + stObj->base.BaseLevel != (*sv)->u.tex.first_level ||
@@ -318,7 +339,8 @@
    }
 
    if (!*sv) {
-      *sv = st_create_texture_sampler_view_from_stobj(st->pipe, stObj, samp, format);
+      *sv = st_create_texture_sampler_view_from_stobj(st->pipe, stObj,
+                                                      format, glsl_version);
 
    } else if ((*sv)->context != st->pipe) {
       /* Recreate view in correct context, use existing view as template */
@@ -334,7 +356,7 @@
 static GLboolean
 update_single_texture(struct st_context *st,
                       struct pipe_sampler_view **sampler_view,
-		      GLuint texUnit)
+		      GLuint texUnit, unsigned glsl_version)
 {
    struct gl_context *ctx = st->ctx;
    const struct gl_sampler_object *samp;
@@ -374,8 +396,9 @@
       }
    }
 
-   *sampler_view = st_get_texture_sampler_view_from_stobj(st, stObj, samp,
-							  view_format);
+   *sampler_view =
+      st_get_texture_sampler_view_from_stobj(st, stObj, view_format,
+                                             glsl_version);
    return GL_TRUE;
 }
 
@@ -383,7 +406,7 @@
 
 static void
 update_textures(struct st_context *st,
-                unsigned shader_stage,
+                gl_shader_stage mesa_shader,
                 const struct gl_program *prog,
                 unsigned max_units,
                 struct pipe_sampler_view **sampler_views,
@@ -392,6 +415,10 @@
    const GLuint old_max = *num_textures;
    GLbitfield samplers_used = prog->SamplersUsed;
    GLuint unit;
+   struct gl_shader_program *shader =
+      st->ctx->_Shader->CurrentProgram[mesa_shader];
+   unsigned glsl_version = shader ? shader->Version : 0;
+   unsigned shader_stage = st_shader_stage_to_ptarget(mesa_shader);
 
    if (samplers_used == 0x0 && old_max == 0)
       return;
@@ -406,7 +433,8 @@
          const GLuint texUnit = prog->SamplerUnits[unit];
          GLboolean retval;
 
-         retval = update_single_texture(st, &sampler_view, texUnit);
+         retval = update_single_texture(st, &sampler_view, texUnit,
+                                        glsl_version);
          if (retval == GL_FALSE)
             continue;
 
@@ -435,7 +463,7 @@
 
    if (ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits > 0) {
       update_textures(st,
-                      PIPE_SHADER_VERTEX,
+                      MESA_SHADER_VERTEX,
                       &ctx->VertexProgram._Current->Base,
                       ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits,
                       st->state.sampler_views[PIPE_SHADER_VERTEX],
@@ -450,7 +478,7 @@
    const struct gl_context *ctx = st->ctx;
 
    update_textures(st,
-                   PIPE_SHADER_FRAGMENT,
+                   MESA_SHADER_FRAGMENT,
                    &ctx->FragmentProgram._Current->Base,
                    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits,
                    st->state.sampler_views[PIPE_SHADER_FRAGMENT],
@@ -465,7 +493,7 @@
 
    if (ctx->GeometryProgram._Current) {
       update_textures(st,
-                      PIPE_SHADER_GEOMETRY,
+                      MESA_SHADER_GEOMETRY,
                       &ctx->GeometryProgram._Current->Base,
                       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits,
                       st->state.sampler_views[PIPE_SHADER_GEOMETRY],
@@ -474,6 +502,38 @@
 }
 
 
+static void
+update_tessctrl_textures(struct st_context *st)
+{
+   const struct gl_context *ctx = st->ctx;
+
+   if (ctx->TessCtrlProgram._Current) {
+      update_textures(st,
+                      MESA_SHADER_TESS_CTRL,
+                      &ctx->TessCtrlProgram._Current->Base,
+                      ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits,
+                      st->state.sampler_views[PIPE_SHADER_TESS_CTRL],
+                      &st->state.num_sampler_views[PIPE_SHADER_TESS_CTRL]);
+   }
+}
+
+
+static void
+update_tesseval_textures(struct st_context *st)
+{
+   const struct gl_context *ctx = st->ctx;
+
+   if (ctx->TessEvalProgram._Current) {
+      update_textures(st,
+                      MESA_SHADER_TESS_EVAL,
+                      &ctx->TessEvalProgram._Current->Base,
+                      ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits,
+                      st->state.sampler_views[PIPE_SHADER_TESS_EVAL],
+                      &st->state.num_sampler_views[PIPE_SHADER_TESS_EVAL]);
+   }
+}
+
+
 const struct st_tracked_state st_update_fragment_texture = {
    "st_update_texture",					/* name */
    {							/* dirty */
@@ -504,46 +564,21 @@
 };
 
 
-
-static void
-finalize_textures(struct st_context *st)
-{
-   struct gl_context *ctx = st->ctx;
-   struct gl_fragment_program *fprog = ctx->FragmentProgram._Current;
-   const GLboolean prev_missing_textures = st->missing_textures;
-   GLuint su;
-
-   st->missing_textures = GL_FALSE;
-
-   for (su = 0; su < ctx->Const.MaxTextureCoordUnits; su++) {
-      if (fprog->Base.SamplersUsed & (1 << su)) {
-         const GLuint texUnit = fprog->Base.SamplerUnits[su];
-         struct gl_texture_object *texObj
-            = ctx->Texture.Unit[texUnit]._Current;
-
-         if (texObj) {
-            GLboolean retval;
-
-            retval = st_finalize_texture(ctx, st->pipe, texObj);
-            if (!retval) {
-               /* out of mem */
-               st->missing_textures = GL_TRUE;
-               continue;
-            }
-         }
-      }
-   }
-
-   if (prev_missing_textures != st->missing_textures)
-      st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
-}
+const struct st_tracked_state st_update_tessctrl_texture = {
+   "st_update_tessctrl_texture",			/* name */
+   {							/* dirty */
+      _NEW_TEXTURE,					/* mesa */
+      ST_NEW_TESSCTRL_PROGRAM | ST_NEW_SAMPLER_VIEWS,	/* st */
+   },
+   update_tessctrl_textures				/* update */
+};
 
 
-const struct st_tracked_state st_finalize_textures = {
-   "st_finalize_textures",		/* name */
-   {					/* dirty */
-      _NEW_TEXTURE,			/* mesa */
-      0,				/* st */
+const struct st_tracked_state st_update_tesseval_texture = {
+   "st_update_tesseval_texture",			/* name */
+   {							/* dirty */
+      _NEW_TEXTURE,					/* mesa */
+      ST_NEW_TESSEVAL_PROGRAM | ST_NEW_SAMPLER_VIEWS,	/* st */
    },
-   finalize_textures			/* update */
+   update_tesseval_textures				/* update */
 };
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_viewport.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_viewport.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_atom_viewport.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_atom_viewport.c	2015-09-16 14:36:10.000000000 +0000
@@ -64,7 +64,7 @@
     */
    for (i = 0; i < ctx->Const.MaxViewports; i++)
    {
-      double scale[3], translate[3];
+      float scale[3], translate[3];
       _mesa_get_viewport_xform(ctx, i, scale, translate);
 
       st->state.viewport[i].scale[0] = scale[0];
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_bitmap.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_bitmap.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_bitmap.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_bitmap.c	2015-09-16 14:36:10.000000000 +0000
@@ -446,8 +446,8 @@
    assert(height <= (GLsizei)maxSize);
 
    cso_save_rasterizer(cso);
-   cso_save_samplers(cso, PIPE_SHADER_FRAGMENT);
-   cso_save_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+   cso_save_fragment_samplers(cso);
+   cso_save_fragment_sampler_views(cso);
    cso_save_viewport(cso);
    cso_save_fragment_shader(cso);
    cso_save_stream_outputs(cso);
@@ -535,8 +535,8 @@
 
    /* restore state */
    cso_restore_rasterizer(cso);
-   cso_restore_samplers(cso, PIPE_SHADER_FRAGMENT);
-   cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+   cso_restore_fragment_samplers(cso);
+   cso_restore_fragment_sampler_views(cso);
    cso_restore_viewport(cso);
    cso_restore_fragment_shader(cso);
    cso_restore_vertex_shader(cso);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_blit.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_blit.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_blit.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_blit.c	2015-09-16 14:36:10.000000000 +0000
@@ -192,6 +192,7 @@
 
    blit.filter = pFilter;
    blit.render_condition_enable = TRUE;
+   blit.alpha_blend = FALSE;
 
    if (mask & GL_COLOR_BUFFER_BIT) {
       struct gl_renderbuffer_attachment *srcAtt =
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_drawpixels.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_drawpixels.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_drawpixels.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_drawpixels.c	2015-09-16 14:36:10.000000000 +0000
@@ -33,6 +33,7 @@
 #include "main/imports.h"
 #include "main/image.h"
 #include "main/bufferobj.h"
+#include "main/blit.h"
 #include "main/format_pack.h"
 #include "main/macros.h"
 #include "main/mtypes.h"
@@ -688,8 +689,8 @@
 
    cso_save_rasterizer(cso);
    cso_save_viewport(cso);
-   cso_save_samplers(cso, PIPE_SHADER_FRAGMENT);
-   cso_save_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+   cso_save_fragment_samplers(cso);
+   cso_save_fragment_sampler_views(cso);
    cso_save_fragment_shader(cso);
    cso_save_stream_outputs(cso);
    cso_save_vertex_shader(cso);
@@ -756,6 +757,8 @@
    /* texture sampling state: */
    {
       struct pipe_sampler_state sampler;
+      const struct pipe_sampler_state *states[2] = {&sampler, &sampler};
+
       memset(&sampler, 0, sizeof(sampler));
       sampler.wrap_s = PIPE_TEX_WRAP_CLAMP;
       sampler.wrap_t = PIPE_TEX_WRAP_CLAMP;
@@ -765,11 +768,8 @@
       sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
       sampler.normalized_coords = normalized;
 
-      cso_single_sampler(cso, PIPE_SHADER_FRAGMENT, 0, &sampler);
-      if (num_sampler_view > 1) {
-         cso_single_sampler(cso, PIPE_SHADER_FRAGMENT, 1, &sampler);
-      }
-      cso_single_sampler_done(cso, PIPE_SHADER_FRAGMENT);
+      cso_set_samplers(cso, PIPE_SHADER_FRAGMENT,
+                       num_sampler_view > 1 ? 2 : 1, states);
    }
 
    /* viewport state: viewport matching window dims */
@@ -816,8 +816,8 @@
    /* restore state */
    cso_restore_rasterizer(cso);
    cso_restore_viewport(cso);
-   cso_restore_samplers(cso, PIPE_SHADER_FRAGMENT);
-   cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+   cso_restore_fragment_samplers(cso);
+   cso_restore_fragment_sampler_views(cso);
    cso_restore_fragment_shader(cso);
    cso_restore_vertex_shader(cso);
    cso_restore_tessctrl_shader(cso);
@@ -1313,31 +1313,6 @@
 
 
 /**
- * \return TRUE if two regions overlap, FALSE otherwise
- */
-static boolean
-regions_overlap(int srcX0, int srcY0,
-                int srcX1, int srcY1,
-                int dstX0, int dstY0,
-                int dstX1, int dstY1)
-{
-   if (MAX2(srcX0, srcX1) < MIN2(dstX0, dstX1))
-      return FALSE; /* src completely left of dst */
-
-   if (MAX2(dstX0, dstX1) < MIN2(srcX0, srcX1))
-      return FALSE; /* dst completely left of src */
-
-   if (MAX2(srcY0, srcY1) < MIN2(dstY0, dstY1))
-      return FALSE; /* src completely above dst */
-
-   if (MAX2(dstY0, dstY1) < MIN2(srcY0, srcY1))
-      return FALSE; /* dst completely above src */
-
-   return TRUE; /* some overlap */
-}
-
-
-/**
  * Try to do a glCopyPixels for simple cases with a blit by calling
  * pipe->blit().
  *
@@ -1420,8 +1395,8 @@
       }
 
       if (rbRead != rbDraw ||
-          !regions_overlap(readX, readY, readX + readW, readY + readH,
-                           drawX, drawY, drawX + drawW, drawY + drawH)) {
+          !_mesa_regions_overlap(readX, readY, readX + readW, readY + readH,
+                                 drawX, drawY, drawX + drawW, drawY + drawH)) {
          struct pipe_blit_info blit;
 
          memset(&blit, 0, sizeof(blit));
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_fbo.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_fbo.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_fbo.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_fbo.c	2015-09-16 14:36:10.000000000 +0000
@@ -511,8 +511,6 @@
    strb->rtt_layered = att->Layered;
    pipe_resource_reference(&strb->texture, pt);
 
-   pipe_surface_release(pipe, &strb->surface);
-
    st_update_renderbuffer_surface(st, strb);
 
    strb->Base.Format = st_pipe_format_to_mesa_format(pt->format);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_perfmon.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_perfmon.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_perfmon.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_perfmon.h	2015-09-16 14:36:10.000000000 +0000
@@ -46,7 +46,7 @@
 /**
  * Cast wrapper
  */
-static INLINE struct st_perf_monitor_object *
+static inline struct st_perf_monitor_object *
 st_perf_monitor_object(struct gl_perf_monitor_object *q)
 {
    return (struct st_perf_monitor_object *)q;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_program.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_program.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_program.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_program.c	2015-09-16 14:36:10.000000000 +0000
@@ -65,9 +65,15 @@
    case GL_FRAGMENT_PROGRAM_ARB:
       st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
       break;
-   case MESA_GEOMETRY_PROGRAM:
+   case GL_GEOMETRY_PROGRAM_NV:
       st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
       break;
+   case GL_TESS_CONTROL_PROGRAM_NV:
+      st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM;
+      break;
+   case GL_TESS_EVALUATION_PROGRAM_NV:
+      st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
+      break;
    }
 }
 
@@ -84,6 +90,8 @@
    st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
    st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
    st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
+   st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM;
+   st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
 }
 
 
@@ -105,11 +113,21 @@
       return _mesa_init_fragment_program(ctx, &prog->Base, target, id);
    }
 
-   case MESA_GEOMETRY_PROGRAM: {
+   case GL_GEOMETRY_PROGRAM_NV: {
       struct st_geometry_program *prog = ST_CALLOC_STRUCT(st_geometry_program);
       return _mesa_init_geometry_program(ctx, &prog->Base, target, id);
    }
 
+   case GL_TESS_CONTROL_PROGRAM_NV: {
+      struct st_tessctrl_program *prog = ST_CALLOC_STRUCT(st_tessctrl_program);
+      return _mesa_init_tess_ctrl_program(ctx, &prog->Base, target, id);
+   }
+
+   case GL_TESS_EVALUATION_PROGRAM_NV: {
+      struct st_tesseval_program *prog = ST_CALLOC_STRUCT(st_tesseval_program);
+      return _mesa_init_tess_eval_program(ctx, &prog->Base, target, id);
+   }
+
    default:
       assert(0);
       return NULL;
@@ -135,7 +153,7 @@
             free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
       }
       break;
-   case MESA_GEOMETRY_PROGRAM:
+   case GL_GEOMETRY_PROGRAM_NV:
       {
          struct st_geometry_program *stgp =
             (struct st_geometry_program *) prog;
@@ -157,6 +175,28 @@
             free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
       }
       break;
+   case GL_TESS_CONTROL_PROGRAM_NV:
+      {
+         struct st_tessctrl_program *sttcp =
+            (struct st_tessctrl_program *) prog;
+
+         st_release_tcp_variants(st, sttcp);
+
+         if (sttcp->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(sttcp->glsl_to_tgsi);
+      }
+      break;
+   case GL_TESS_EVALUATION_PROGRAM_NV:
+      {
+         struct st_tesseval_program *sttep =
+            (struct st_tesseval_program *) prog;
+
+         st_release_tep_variants(st, sttep);
+
+         if (sttep->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(sttep->glsl_to_tgsi);
+      }
+      break;
    default:
       assert(0); /* problem */
    }
@@ -198,7 +238,7 @@
       if (st->fp == stfp)
 	 st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
    }
-   else if (target == MESA_GEOMETRY_PROGRAM) {
+   else if (target == GL_GEOMETRY_PROGRAM_NV) {
       struct st_geometry_program *stgp = (struct st_geometry_program *) prog;
 
       st_release_gp_variants(st, stgp);
@@ -214,6 +254,24 @@
       if (st->vp == stvp)
 	 st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
    }
+   else if (target == GL_TESS_CONTROL_PROGRAM_NV) {
+      struct st_tessctrl_program *sttcp =
+         (struct st_tessctrl_program *) prog;
+
+      st_release_tcp_variants(st, sttcp);
+
+      if (st->tcp == sttcp)
+         st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM;
+   }
+   else if (target == GL_TESS_EVALUATION_PROGRAM_NV) {
+      struct st_tesseval_program *sttep =
+         (struct st_tesseval_program *) prog;
+
+      st_release_tep_variants(st, sttep);
+
+      if (st->tep == sttep)
+         st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
+   }
 
    if (ST_DEBUG & DEBUG_PRECOMPILE)
       st_precompile_shader_variant(st, prog);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_rasterpos.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_rasterpos.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_rasterpos.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_rasterpos.c	2015-09-16 14:36:10.000000000 +0000
@@ -254,7 +254,7 @@
     * st_feedback_draw_vbo doesn't check for that flag. */
    ctx->Array._DrawArrays = rs->arrays;
    st_feedback_draw_vbo(ctx, &rs->prim, 1, NULL, GL_TRUE, 0, 1,
-                        NULL, NULL);
+                        NULL, 0, NULL);
    ctx->Array._DrawArrays = saved_arrays;
 
    /* restore draw's rasterization stage depending on rendermode */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_readpixels.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_readpixels.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_readpixels.c	2014-08-12 16:09:46.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_readpixels.c	2015-09-16 14:36:10.000000000 +0000
@@ -43,6 +43,30 @@
 #include "state_tracker/st_format.h"
 #include "state_tracker/st_texture.h"
 
+static boolean
+needs_integer_signed_unsigned_conversion(const struct gl_context *ctx,
+                                         GLenum format, GLenum type)
+{
+   struct gl_renderbuffer *rb =
+      _mesa_get_read_renderbuffer_for_format(ctx, format);
+
+   assert(rb);
+
+   GLenum srcType = _mesa_get_format_datatype(rb->Format);
+
+    if ((srcType == GL_INT &&
+        (type == GL_UNSIGNED_INT ||
+         type == GL_UNSIGNED_SHORT ||
+         type == GL_UNSIGNED_BYTE)) ||
+       (srcType == GL_UNSIGNED_INT &&
+        (type == GL_INT ||
+         type == GL_SHORT ||
+         type == GL_BYTE))) {
+      return TRUE;
+   }
+
+   return FALSE;
+}
 
 /**
  * This uses a blit to copy the read buffer to a texture format which matches
@@ -123,6 +147,10 @@
       goto fallback;
    }
 
+   if (needs_integer_signed_unsigned_conversion(ctx, format, type)) {
+      goto fallback;
+   }
+
    /* Convert the source format to what is expected by ReadPixels
     * and see if it's supported. */
    src_format = util_format_linear(src->format);
@@ -210,9 +238,9 @@
       GLuint row;
 
       for (row = 0; row < (unsigned) height; row++) {
-         GLvoid *dest = _mesa_image_address3d(pack, pixels,
+         GLvoid *dest = _mesa_image_address2d(pack, pixels,
                                               width, height, format,
-                                              type, 0, row, 0);
+                                              type, row, 0);
          memcpy(dest, map, bytesPerRow);
          map += tex_xfer->stride;
       }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_syncobj.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_syncobj.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_syncobj.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_syncobj.c	2015-09-16 14:36:10.000000000 +0000
@@ -81,7 +81,13 @@
    struct pipe_screen *screen = st_context(ctx)->pipe->screen;
    struct st_sync_object *so = (struct st_sync_object*)obj;
 
-   if (so->fence && screen->fence_signalled(screen, so->fence)) {
+   /* If the fence doesn't exist, assume it's signalled. */
+   if (!so->fence) {
+      so->b.StatusFlag = GL_TRUE;
+      return;
+   }
+
+   if (screen->fence_finish(screen, so->fence, 0)) {
       screen->fence_reference(screen, &so->fence, NULL);
       so->b.StatusFlag = GL_TRUE;
    }
@@ -94,6 +100,12 @@
    struct pipe_screen *screen = st_context(ctx)->pipe->screen;
    struct st_sync_object *so = (struct st_sync_object*)obj;
 
+   /* If the fence doesn't exist, assume it's signalled. */
+   if (!so->fence) {
+      so->b.StatusFlag = GL_TRUE;
+      return;
+   }
+
    /* We don't care about GL_SYNC_FLUSH_COMMANDS_BIT, because flush is
     * already called when creating a fence. */
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_texture.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_texture.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_texture.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_texture.c	2015-09-16 14:36:10.000000000 +0000
@@ -896,7 +896,7 @@
 
 
 /**
- * Called via ctx->Driver.GetTexImage()
+ * Called via ctx->Driver.GetTexSubImage()
  *
  * This uses a blit to copy the texture to a texture format which matches
  * the format and type combo and then a fast read-back is done using memcpy.
@@ -910,16 +910,15 @@
  *       we do here should be free in such cases.
  */
 static void
-st_GetTexImage(struct gl_context * ctx,
-               GLenum format, GLenum type, GLvoid * pixels,
-               struct gl_texture_image *texImage)
+st_GetTexSubImage(struct gl_context * ctx,
+                  GLint xoffset, GLint yoffset, GLint zoffset,
+                  GLsizei width, GLsizei height, GLint depth,
+                  GLenum format, GLenum type, GLvoid * pixels,
+                  struct gl_texture_image *texImage)
 {
    struct st_context *st = st_context(ctx);
    struct pipe_context *pipe = st->pipe;
    struct pipe_screen *screen = pipe->screen;
-   GLuint width = texImage->Width;
-   GLuint height = texImage->Height;
-   GLuint depth = texImage->Depth;
    struct st_texture_image *stImage = st_texture_image(texImage);
    struct st_texture_object *stObj = st_texture_object(texImage->TexObject);
    struct pipe_resource *src = stObj->pt;
@@ -1054,7 +1053,7 @@
       }
    }
 
-   /* create the destination texture */
+   /* create the destination texture of size (width X height X depth) */
    memset(&dst_templ, 0, sizeof(dst_templ));
    dst_templ.target = pipe_target;
    dst_templ.format = dst_format;
@@ -1072,10 +1071,16 @@
 
    /* From now on, we need the gallium representation of dimensions. */
    if (gl_target == GL_TEXTURE_1D_ARRAY) {
+      zoffset = yoffset;
+      yoffset = 0;
       depth = height;
       height = 1;
    }
 
+   assert(texImage->Face == 0 ||
+          texImage->TexObject->MinLayer == 0 ||
+          zoffset == 0);
+
    memset(&blit, 0, sizeof(blit));
    blit.src.resource = src;
    blit.src.level = texImage->Level + texImage->TexObject->MinLevel;
@@ -1083,9 +1088,11 @@
    blit.dst.resource = dst;
    blit.dst.level = 0;
    blit.dst.format = dst->format;
-   blit.src.box.x = blit.dst.box.x = 0;
-   blit.src.box.y = blit.dst.box.y = 0;
-   blit.src.box.z = texImage->Face + texImage->TexObject->MinLayer;
+   blit.src.box.x = xoffset;
+   blit.dst.box.x = 0;
+   blit.src.box.y = yoffset;
+   blit.dst.box.y = 0;
+   blit.src.box.z = texImage->Face + texImage->TexObject->MinLayer + zoffset;
    blit.dst.box.z = 0;
    blit.src.box.width = blit.dst.box.width = width;
    blit.src.box.height = blit.dst.box.height = height;
@@ -1206,7 +1213,9 @@
 
 fallback:
    if (!done) {
-      _mesa_GetTexImage_sw(ctx, format, type, pixels, texImage);
+      _mesa_GetTexSubImage_sw(ctx, xoffset, yoffset, zoffset,
+                              width, height, depth,
+                              format, type, pixels, texImage);
    }
 }
 
@@ -1864,6 +1873,31 @@
    return GL_TRUE;
 }
 
+/* HACK: this is only enough for the most basic uses of CopyImage. Must fix
+ * before actually exposing the extension.
+ */
+static void
+st_CopyImageSubData(struct gl_context *ctx,
+                    struct gl_texture_image *src_image,
+                    int src_x, int src_y, int src_z,
+                    struct gl_texture_image *dst_image,
+                    int dst_x, int dst_y, int dst_z,
+                    int src_width, int src_height)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct st_texture_image *src = st_texture_image(src_image);
+   struct st_texture_image *dst = st_texture_image(dst_image);
+
+   struct pipe_box box;
+
+   u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box);
+   pipe->resource_copy_region(pipe, dst->pt, dst_image->Level,
+                              dst_x, dst_y, dst_z,
+                              src->pt, src_image->Level,
+                              &box);
+}
+
 
 void
 st_init_texture_functions(struct dd_function_table *functions)
@@ -1876,11 +1910,11 @@
    functions->CopyTexSubImage = st_CopyTexSubImage;
    functions->GenerateMipmap = st_generate_mipmap;
 
-   functions->GetTexImage = st_GetTexImage;
+   functions->GetTexSubImage = st_GetTexSubImage;
 
    /* compressed texture functions */
    functions->CompressedTexImage = st_CompressedTexImage;
-   functions->GetCompressedTexImage = _mesa_GetCompressedTexImage_sw;
+   functions->GetCompressedTexSubImage = _mesa_GetCompressedTexSubImage_sw;
 
    functions->NewTextureObject = st_NewTextureObject;
    functions->NewTextureImage = st_NewTextureImage;
@@ -1896,4 +1930,6 @@
 
    functions->AllocTextureStorage = st_AllocTextureStorage;
    functions->TextureView = st_TextureView;
+
+   functions->CopyImageSubData = st_CopyImageSubData;
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_xformfb.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_xformfb.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_xformfb.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_xformfb.c	2015-09-16 14:36:10.000000000 +0000
@@ -54,9 +54,9 @@
    struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
 
    /* This encapsulates the count that can be used as a source for draw_vbo.
-    * It contains a stream output target from the last call of
-    * EndTransformFeedback. */
-   struct pipe_stream_output_target *draw_count;
+    * It contains stream output targets from the last call of
+    * EndTransformFeedback for each stream. */
+   struct pipe_stream_output_target *draw_count[MAX_VERTEX_STREAMS];
 };
 
 static inline struct st_transform_feedback_object *
@@ -88,7 +88,8 @@
          st_transform_feedback_object(obj);
    unsigned i;
 
-   pipe_so_target_reference(&sobj->draw_count, NULL);
+   for (i = 0; i < ARRAY_SIZE(sobj->draw_count); i++)
+      pipe_so_target_reference(&sobj->draw_count[i], NULL);
 
    /* Unreference targets. */
    for (i = 0; i < sobj->num_targets; i++) {
@@ -123,9 +124,12 @@
       struct st_buffer_object *bo = st_buffer_object(sobj->base.Buffers[i]);
 
       if (bo && bo->buffer) {
+         unsigned stream =
+            obj->shader_program->LinkedTransformFeedback.BufferStream[i];
+
          /* Check whether we need to recreate the target. */
          if (!sobj->targets[i] ||
-             sobj->targets[i] == sobj->draw_count ||
+             sobj->targets[i] == sobj->draw_count[stream] ||
              sobj->targets[i]->buffer != bo->buffer ||
              sobj->targets[i]->buffer_offset != sobj->base.Offset[i] ||
              sobj->targets[i]->buffer_size != sobj->base.Size[i]) {
@@ -178,24 +182,6 @@
 }
 
 
-static struct pipe_stream_output_target *
-st_transform_feedback_get_draw_target(struct gl_transform_feedback_object *obj)
-{
-   struct st_transform_feedback_object *sobj =
-         st_transform_feedback_object(obj);
-   unsigned i;
-
-   for (i = 0; i < ARRAY_SIZE(sobj->targets); i++) {
-      if (sobj->targets[i]) {
-         return sobj->targets[i];
-      }
-   }
-
-   assert(0);
-   return NULL;
-}
-
-
 static void
 st_end_transform_feedback(struct gl_context *ctx,
                           struct gl_transform_feedback_object *obj)
@@ -203,22 +189,41 @@
    struct st_context *st = st_context(ctx);
    struct st_transform_feedback_object *sobj =
          st_transform_feedback_object(obj);
+   unsigned i;
 
    cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
 
-   pipe_so_target_reference(&sobj->draw_count,
-                            st_transform_feedback_get_draw_target(obj));
+   /* The next call to glDrawTransformFeedbackStream should use the vertex
+    * count from the last call to glEndTransformFeedback.
+    * Therefore, save the targets for each stream.
+    *
+    * NULL means the vertex counter is 0 (initial state).
+    */
+   for (i = 0; i < ARRAY_SIZE(sobj->draw_count); i++)
+      pipe_so_target_reference(&sobj->draw_count[i], NULL);
+
+   for (i = 0; i < ARRAY_SIZE(sobj->targets); i++) {
+      unsigned stream =
+         obj->shader_program->LinkedTransformFeedback.BufferStream[i];
+
+      /* Is it not bound or already set for this stream? */
+      if (!sobj->targets[i] || sobj->draw_count[stream])
+         continue;
+
+      pipe_so_target_reference(&sobj->draw_count[stream], sobj->targets[i]);
+   }
 }
 
 
-void
+bool
 st_transform_feedback_draw_init(struct gl_transform_feedback_object *obj,
-                                struct pipe_draw_info *out)
+                                unsigned stream, struct pipe_draw_info *out)
 {
    struct st_transform_feedback_object *sobj =
          st_transform_feedback_object(obj);
 
-   out->count_from_stream_output = sobj->draw_count;
+   out->count_from_stream_output = sobj->draw_count[stream];
+   return out->count_from_stream_output != NULL;
 }
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_xformfb.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_xformfb.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_cb_xformfb.h	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_cb_xformfb.h	2015-09-16 14:36:10.000000000 +0000
@@ -38,9 +38,9 @@
 extern void
 st_init_xformfb_functions(struct dd_function_table *functions);
 
-extern void
+extern bool
 st_transform_feedback_draw_init(struct gl_transform_feedback_object *obj,
-                                struct pipe_draw_info *out);
+                                unsigned stream, struct pipe_draw_info *out);
 
 
 #endif /* ST_CB_XFORMFB_H */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_context.c	2015-09-16 14:36:10.000000000 +0000
@@ -313,6 +313,7 @@
    f->NewArray = ST_NEW_VERTEX_ARRAYS;
    f->NewRasterizerDiscard = ST_NEW_RASTERIZER;
    f->NewUniformBuffer = ST_NEW_UNIFORM_BUFFER;
+   f->NewDefaultTessLevels = ST_NEW_TESS_STATE;
    f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS;
 }
 
@@ -375,6 +376,8 @@
    st_reference_fragprog(st, &st->fp, NULL);
    st_reference_geomprog(st, &st->gp, NULL);
    st_reference_vertprog(st, &st->vp, NULL);
+   st_reference_tesscprog(st, &st->tcp, NULL);
+   st_reference_tesseprog(st, &st->tep, NULL);
 
    /* release framebuffer surfaces */
    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_context.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_context.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_context.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_context.h	2015-09-16 14:36:10.000000000 +0000
@@ -53,12 +53,13 @@
 #define ST_NEW_FRAGMENT_PROGRAM        (1 << 1)
 #define ST_NEW_VERTEX_PROGRAM          (1 << 2)
 #define ST_NEW_FRAMEBUFFER             (1 << 3)
-/* gap, re-use it */
+#define ST_NEW_TESS_STATE              (1 << 4)
 #define ST_NEW_GEOMETRY_PROGRAM        (1 << 5)
 #define ST_NEW_VERTEX_ARRAYS           (1 << 6)
 #define ST_NEW_RASTERIZER              (1 << 7)
 #define ST_NEW_UNIFORM_BUFFER          (1 << 8)
-/* gap for TESSCTRL/TESSEVAL that's in master only */
+#define ST_NEW_TESSCTRL_PROGRAM        (1 << 9)
+#define ST_NEW_TESSEVAL_PROGRAM        (1 << 10)
 #define ST_NEW_SAMPLER_VIEWS           (1 << 11)
 
 
@@ -139,7 +140,6 @@
 
    struct st_state_flags dirty;
 
-   GLboolean missing_textures;
    GLboolean vertdata_edgeflags;
    GLboolean edgeflag_culls_prims;
 
@@ -149,10 +149,14 @@
    struct st_vertex_program *vp;    /**< Currently bound vertex program */
    struct st_fragment_program *fp;  /**< Currently bound fragment program */
    struct st_geometry_program *gp;  /**< Currently bound geometry program */
+   struct st_tessctrl_program *tcp; /**< Currently bound tess control program */
+   struct st_tesseval_program *tep; /**< Currently bound tess eval program */
 
    struct st_vp_variant *vp_variant;
    struct st_fp_variant *fp_variant;
    struct st_gp_variant *gp_variant;
+   struct st_tcp_variant *tcp_variant;
+   struct st_tep_variant *tep_variant;
 
    struct gl_texture_object *default_texture;
 
@@ -274,6 +278,29 @@
 }
 
 
+static inline unsigned
+st_shader_stage_to_ptarget(gl_shader_stage stage)
+{
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+      return PIPE_SHADER_VERTEX;
+   case MESA_SHADER_FRAGMENT:
+      return PIPE_SHADER_FRAGMENT;
+   case MESA_SHADER_GEOMETRY:
+      return PIPE_SHADER_GEOMETRY;
+   case MESA_SHADER_TESS_CTRL:
+      return PIPE_SHADER_TESS_CTRL;
+   case MESA_SHADER_TESS_EVAL:
+      return PIPE_SHADER_TESS_EVAL;
+   case MESA_SHADER_COMPUTE:
+      return PIPE_SHADER_COMPUTE;
+   }
+
+   assert(!"should not be reached");
+   return PIPE_SHADER_VERTEX;
+}
+
+
 /** clear-alloc a struct-sized object, with casting */
 #define ST_CALLOC_STRUCT(T)   (struct T *) calloc(1, sizeof(struct T))
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_draw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_draw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_draw.c	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_draw.c	2015-09-16 14:36:10.000000000 +0000
@@ -141,7 +141,7 @@
       if (shProg[j] == NULL || !shProg[j]->LinkStatus)
 	 continue;
 
-      for (i = 0; i < shProg[j]->NumUserUniformStorage; i++) {
+      for (i = 0; i < shProg[j]->NumUniformStorage; i++) {
          const struct gl_uniform_storage *u = &shProg[j]->UniformStorage[i];
          if (!u->initialized) {
             _mesa_warning(ctx,
@@ -164,6 +164,7 @@
    STATIC_ASSERT(GL_POINTS == PIPE_PRIM_POINTS);
    STATIC_ASSERT(GL_QUADS == PIPE_PRIM_QUADS);
    STATIC_ASSERT(GL_TRIANGLE_STRIP_ADJACENCY == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY);
+   STATIC_ASSERT(GL_PATCHES == PIPE_PRIM_PATCHES);
 
    return prim;
 }
@@ -183,6 +184,7 @@
             GLuint min_index,
             GLuint max_index,
             struct gl_transform_feedback_object *tfb_vertcount,
+            unsigned stream,
             struct gl_buffer_object *indirect)
 {
    struct st_context *st = st_context(ctx);
@@ -241,7 +243,8 @@
       /* Transform feedback drawing is always non-indexed. */
       /* Set info.count_from_stream_output. */
       if (tfb_vertcount) {
-         st_transform_feedback_draw_init(tfb_vertcount, &info);
+         if (!st_transform_feedback_draw_init(tfb_vertcount, stream, &info))
+            return;
       }
    }
 
@@ -260,6 +263,7 @@
       info.count = prims[i].count;
       info.start_instance = prims[i].base_instance;
       info.instance_count = prims[i].num_instances;
+      info.vertices_per_patch = ctx->TessCtrlProgram.patch_vertices;
       info.index_bias = prims[i].basevertex;
       if (!ib) {
          info.min_index = info.start;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_draw_feedback.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_draw_feedback.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_draw_feedback.c	2014-04-29 19:36:58.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_draw_feedback.c	2015-09-16 14:36:10.000000000 +0000
@@ -117,6 +117,7 @@
                      GLuint min_index,
                      GLuint max_index,
                      struct gl_transform_feedback_object *tfb_vertcount,
+                     unsigned stream,
                      struct gl_buffer_object *indirect)
 {
    struct st_context *st = st_context(ctx);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_draw.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_draw.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_draw.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_draw.h	2015-09-16 14:36:10.000000000 +0000
@@ -56,6 +56,7 @@
             GLuint min_index,
             GLuint max_index,
             struct gl_transform_feedback_object *tfb_vertcount,
+            unsigned stream,
             struct gl_buffer_object *indirect);
 
 extern void
@@ -67,6 +68,7 @@
                      GLuint min_index,
                      GLuint max_index,
                      struct gl_transform_feedback_object *tfb_vertcount,
+                     unsigned stream,
                      struct gl_buffer_object *indirect);
 
 /**
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_extensions.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_extensions.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_extensions.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_extensions.c	2015-09-16 14:36:10.000000000 +0000
@@ -165,6 +165,14 @@
          pc = &c->Program[MESA_SHADER_GEOMETRY];
          options = &c->ShaderCompilerOptions[MESA_SHADER_GEOMETRY];
          break;
+      case PIPE_SHADER_TESS_CTRL:
+         pc = &c->Program[MESA_SHADER_TESS_CTRL];
+         options = &c->ShaderCompilerOptions[MESA_SHADER_TESS_CTRL];
+         break;
+      case PIPE_SHADER_TESS_EVAL:
+         pc = &c->Program[MESA_SHADER_TESS_EVAL];
+         options = &c->ShaderCompilerOptions[MESA_SHADER_TESS_EVAL];
+         break;
       default:
          /* compute shader, etc. */
          continue;
@@ -245,8 +253,12 @@
       options->LowerClipDistance = true;
    }
 
+   c->LowerTessLevel = true;
+
    c->MaxCombinedTextureImageUnits =
          _min(c->Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
+              c->Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits +
+              c->Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits +
               c->Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
               c->Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits,
               MAX_COMBINED_TEXTURE_IMAGE_UNITS);
@@ -266,6 +278,9 @@
    c->MaxVarying = MIN2(c->MaxVarying, MAX_VARYING);
    c->MaxGeometryOutputVertices = screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES);
    c->MaxGeometryTotalOutputComponents = screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS);
+   c->MaxTessPatchComponents =
+      MAX2(screen->get_param(screen, PIPE_CAP_MAX_SHADER_PATCH_VARYINGS),
+           MAX_VARYING) * 4;
 
    c->MinProgramTexelOffset = screen->get_param(screen, PIPE_CAP_MIN_TEXEL_OFFSET);
    c->MaxProgramTexelOffset = screen->get_param(screen, PIPE_CAP_MAX_TEXEL_OFFSET);
@@ -301,6 +316,8 @@
          screen->get_param(screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT);
       c->MaxCombinedUniformBlocks = c->MaxUniformBufferBindings =
          c->Program[MESA_SHADER_VERTEX].MaxUniformBlocks +
+         c->Program[MESA_SHADER_TESS_CTRL].MaxUniformBlocks +
+         c->Program[MESA_SHADER_TESS_EVAL].MaxUniformBlocks +
          c->Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks +
          c->Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks;
       assert(c->MaxCombinedUniformBlocks <= MAX_COMBINED_UNIFORM_BUFFERS);
@@ -417,12 +434,14 @@
 
    static const struct st_extension_cap_mapping cap_mapping[] = {
       { o(ARB_base_instance),                PIPE_CAP_START_INSTANCE                   },
-      { o(ARB_buffer_storage),               PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT },
+      { o(ARB_buffer_storage),               PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT   },
+      { o(ARB_color_buffer_float),           PIPE_CAP_VERTEX_COLOR_UNCLAMPED           },
       { o(ARB_depth_clamp),                  PIPE_CAP_DEPTH_CLIP_DISABLE               },
       { o(ARB_depth_texture),                PIPE_CAP_TEXTURE_SHADOW_MAP               },
       { o(ARB_draw_buffers_blend),           PIPE_CAP_INDEP_BLEND_FUNC                 },
       { o(ARB_draw_instanced),               PIPE_CAP_TGSI_INSTANCEID                  },
       { o(ARB_fragment_program_shadow),      PIPE_CAP_TEXTURE_SHADOW_MAP               },
+      { o(ARB_framebuffer_object),           PIPE_CAP_MIXED_FRAMEBUFFER_SIZES          },
       { o(ARB_instanced_arrays),             PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR  },
       { o(ARB_occlusion_query),              PIPE_CAP_OCCLUSION_QUERY                  },
       { o(ARB_occlusion_query2),             PIPE_CAP_OCCLUSION_QUERY                  },
@@ -432,6 +451,8 @@
       { o(ARB_shader_stencil_export),        PIPE_CAP_SHADER_STENCIL_EXPORT            },
       { o(ARB_shader_texture_lod),           PIPE_CAP_SM3                              },
       { o(ARB_shadow),                       PIPE_CAP_TEXTURE_SHADOW_MAP               },
+      { o(ARB_texture_buffer_object),        PIPE_CAP_TEXTURE_BUFFER_OBJECTS           },
+      { o(ARB_texture_gather),               PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS    },
       { o(ARB_texture_mirror_clamp_to_edge), PIPE_CAP_TEXTURE_MIRROR_CLAMP             },
       { o(ARB_texture_non_power_of_two),     PIPE_CAP_NPOT_TEXTURES                    },
       { o(ARB_timer_query),                  PIPE_CAP_QUERY_TIMESTAMP                  },
@@ -452,11 +473,14 @@
       { o(ATI_separate_stencil),             PIPE_CAP_TWO_SIDED_STENCIL                },
       { o(ATI_texture_mirror_once),          PIPE_CAP_TEXTURE_MIRROR_CLAMP             },
       { o(NV_conditional_render),            PIPE_CAP_CONDITIONAL_RENDER               },
+      { o(NV_primitive_restart),             PIPE_CAP_PRIMITIVE_RESTART                },
       { o(NV_texture_barrier),               PIPE_CAP_TEXTURE_BARRIER                  },
       /* GL_NV_point_sprite is not supported by gallium because we don't
        * support the GL_POINT_SPRITE_R_MODE_NV option. */
 
       { o(OES_standard_derivatives),         PIPE_CAP_SM3                              },
+      { o(OES_texture_float_linear),         PIPE_CAP_TEXTURE_FLOAT_LINEAR             },
+      { o(OES_texture_half_float_linear),    PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR        },
       { o(ARB_texture_cube_map_array),       PIPE_CAP_CUBE_MAP_ARRAY                   },
       { o(ARB_texture_multisample),          PIPE_CAP_TEXTURE_MULTISAMPLE              },
       { o(ARB_texture_query_lod),            PIPE_CAP_TEXTURE_QUERY_LOD                },
@@ -467,6 +491,7 @@
       { o(ARB_texture_view),                 PIPE_CAP_SAMPLER_VIEW_TARGET              },
       { o(ARB_clip_control),                 PIPE_CAP_CLIP_HALFZ                       },
       { o(EXT_polygon_offset_clamp),         PIPE_CAP_POLYGON_OFFSET_CLAMP             },
+      { o(EXT_depth_bounds_test),            PIPE_CAP_DEPTH_BOUNDS_TEST                },
    };
 
    /* Required: render target and sampler support */
@@ -475,6 +500,12 @@
         { PIPE_FORMAT_R32G32B32A32_FLOAT,
           PIPE_FORMAT_R16G16B16A16_FLOAT } },
 
+      { { o(OES_texture_float) },
+        { PIPE_FORMAT_R32G32B32A32_FLOAT } },
+
+      { { o(OES_texture_half_float) },
+        { PIPE_FORMAT_R16G16B16A16_FLOAT } },
+
       { { o(ARB_texture_rgb10_a2ui) },
         { PIPE_FORMAT_R10G10B10A2_UINT,
           PIPE_FORMAT_B10G10R10A2_UINT },
@@ -556,7 +587,8 @@
           PIPE_FORMAT_R8G8B8A8_UNORM },
         GL_TRUE }, /* at least one format must be supported */
 
-      { { o(ARB_stencil_texturing) },
+      { { o(ARB_stencil_texturing),
+          o(ARB_texture_stencil8) },
         { PIPE_FORMAT_X24S8_UINT,
           PIPE_FORMAT_S8X24_UINT },
         GL_TRUE }, /* at least one format must be supported */
@@ -650,9 +682,6 @@
                           ARRAY_SIZE(vertex_mapping), PIPE_BUFFER,
                           PIPE_BIND_VERTEX_BUFFER);
 
-   if (extensions->ARB_stencil_texturing)
-      extensions->ARB_texture_stencil8 = GL_TRUE;
-
    /* Figure out GLSL support. */
    glsl_feature_level = screen->get_param(screen, PIPE_CAP_GLSL_FEATURE_LEVEL);
 
@@ -693,6 +722,7 @@
       extensions->OES_depth_texture_cube_map = GL_TRUE;
       extensions->ARB_shading_language_420pack = GL_TRUE;
       extensions->ARB_texture_query_levels = GL_TRUE;
+      extensions->ARB_shader_subroutine = GL_TRUE;
 
       if (!options->disable_shader_bit_encoding) {
          extensions->ARB_shader_bit_encoding = GL_TRUE;
@@ -723,20 +753,9 @@
       extensions->ANGLE_texture_compression_dxt = GL_FALSE;
    }
 
-   if (screen->get_shader_param(screen, PIPE_SHADER_GEOMETRY,
+   if (screen->get_shader_param(screen, PIPE_SHADER_TESS_CTRL,
                                 PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
-#if 0 /* XXX re-enable when GLSL compiler again supports geometry shaders */
-      extensions->ARB_geometry_shader4 = GL_TRUE;
-#endif
-   }
-
-   if (screen->get_param(screen, PIPE_CAP_PRIMITIVE_RESTART)) {
-      extensions->NV_primitive_restart = GL_TRUE;
-   }
-
-   /* ARB_color_buffer_float. */
-   if (screen->get_param(screen, PIPE_CAP_VERTEX_COLOR_UNCLAMPED)) {
-      extensions->ARB_color_buffer_float = GL_TRUE;
+      extensions->ARB_tessellation_shader = GL_TRUE;
    }
 
    if (screen->fence_finish) {
@@ -823,9 +842,7 @@
    consts->MinMapBufferAlignment =
       screen->get_param(screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
 
-   if (screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS)) {
-      extensions->ARB_texture_buffer_object = GL_TRUE;
-
+   if (extensions->ARB_texture_buffer_object) {
       consts->MaxTextureBufferSize =
          _min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE),
               (1u << 31) - 1);
@@ -840,10 +857,6 @@
                              PIPE_BIND_SAMPLER_VIEW);
    }
 
-   if (screen->get_param(screen, PIPE_CAP_MIXED_FRAMEBUFFER_SIZES)) {
-      extensions->ARB_framebuffer_object = GL_TRUE;
-   }
-
    /* Unpacking a varying in the fragment shader costs 1 texture indirection.
     * If the number of available texture indirections is very limited, then we
     * prefer to disable varying packing rather than run the risk of varying
@@ -860,17 +873,19 @@
 
    consts->MaxViewports = screen->get_param(screen, PIPE_CAP_MAX_VIEWPORTS);
    if (consts->MaxViewports >= 16) {
-      consts->ViewportBounds.Min = -16384.0;
-      consts->ViewportBounds.Max = 16384.0;
+      if (glsl_feature_level >= 400) {
+         consts->ViewportBounds.Min = -32768.0;
+         consts->ViewportBounds.Max = 32767.0;
+      } else {
+         consts->ViewportBounds.Min = -16384.0;
+         consts->ViewportBounds.Max = 16383.0;
+      }
       extensions->ARB_viewport_array = GL_TRUE;
       extensions->ARB_fragment_layer_viewport = GL_TRUE;
       if (extensions->AMD_vertex_shader_layer)
          extensions->AMD_vertex_shader_viewport_index = GL_TRUE;
    }
 
-   if (consts->MaxProgramTextureGatherComponents > 0)
-      extensions->ARB_texture_gather = GL_TRUE;
-
    /* GL_ARB_ES3_compatibility.
     *
     * Assume that ES3 is supported if GLSL 3.30 is supported.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_format.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_format.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_format.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_format.c	2015-09-16 14:36:10.000000000 +0000
@@ -1270,46 +1270,40 @@
    /* 32-bit float formats */
    {
       { GL_RGBA32F_ARB, 0 },
-      { PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_RGB32F_ARB, 0 },
       { PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32X32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_LUMINANCE_ALPHA32F_ARB, 0 },
-      { PIPE_FORMAT_L32A32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_L32A32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_ALPHA32F_ARB, 0 },
       { PIPE_FORMAT_A32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_A16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_INTENSITY32F_ARB, 0 },
       { PIPE_FORMAT_I32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_I16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_LUMINANCE32F_ARB, 0 },
       { PIPE_FORMAT_L32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_L16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_R32F, 0 },
       { PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16_FLOAT,
-        PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
    {
       { GL_RG32F, 0 },
-      { PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT,
-        PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
    },
 
    /* R, RG formats */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_glsl_to_tgsi.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_glsl_to_tgsi.cpp	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_glsl_to_tgsi.cpp	2015-09-16 14:36:10.000000000 +0000
@@ -57,11 +57,6 @@
                            (1 << PROGRAM_CONSTANT) |     \
                            (1 << PROGRAM_UNIFORM))
 
-/**
- * Maximum number of arrays
- */
-#define MAX_ARRAYS        256
-
 #define MAX_GLSL_TEXTURE_OFFSET 4
 
 class st_src_reg;
@@ -89,6 +84,7 @@
       this->reladdr2 = NULL;
       this->has_index2 = false;
       this->double_reg2 = false;
+      this->array_id = 0;
    }
 
    st_src_reg(gl_register_file file, int index, int type)
@@ -103,6 +99,7 @@
       this->reladdr2 = NULL;
       this->has_index2 = false;
       this->double_reg2 = false;
+      this->array_id = 0;
    }
 
    st_src_reg(gl_register_file file, int index, int type, int index2D)
@@ -117,6 +114,7 @@
       this->reladdr2 = NULL;
       this->has_index2 = false;
       this->double_reg2 = false;
+      this->array_id = 0;
    }
 
    st_src_reg()
@@ -131,6 +129,7 @@
       this->reladdr2 = NULL;
       this->has_index2 = false;
       this->double_reg2 = false;
+      this->array_id = 0;
    }
 
    explicit st_src_reg(st_dst_reg reg);
@@ -150,6 +149,7 @@
     * currently used for input mapping only.
     */
    bool double_reg2;
+   unsigned array_id;
 };
 
 class st_dst_reg {
@@ -158,20 +158,28 @@
    {
       this->file = file;
       this->index = index;
+      this->index2D = 0;
       this->writemask = writemask;
       this->cond_mask = COND_TR;
       this->reladdr = NULL;
+      this->reladdr2 = NULL;
+      this->has_index2 = false;
       this->type = type;
+      this->array_id = 0;
    }
 
    st_dst_reg(gl_register_file file, int writemask, int type)
    {
       this->file = file;
       this->index = 0;
+      this->index2D = 0;
       this->writemask = writemask;
       this->cond_mask = COND_TR;
       this->reladdr = NULL;
+      this->reladdr2 = NULL;
+      this->has_index2 = false;
       this->type = type;
+      this->array_id = 0;
    }
 
    st_dst_reg()
@@ -179,20 +187,28 @@
       this->type = GLSL_TYPE_ERROR;
       this->file = PROGRAM_UNDEFINED;
       this->index = 0;
+      this->index2D = 0;
       this->writemask = 0;
       this->cond_mask = COND_TR;
       this->reladdr = NULL;
+      this->reladdr2 = NULL;
+      this->has_index2 = false;
+      this->array_id = 0;
    }
 
    explicit st_dst_reg(st_src_reg reg);
 
    gl_register_file file; /**< PROGRAM_* from Mesa */
    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
+   int index2D;
    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
    GLuint cond_mask:4;
    int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
    /** Register index should be offset by the integer in this reg. */
    st_src_reg *reladdr;
+   st_src_reg *reladdr2;
+   bool has_index2;
+   unsigned array_id;
 };
 
 st_src_reg::st_src_reg(st_dst_reg reg)
@@ -203,10 +219,11 @@
    this->swizzle = SWIZZLE_XYZW;
    this->negate = 0;
    this->reladdr = reg.reladdr;
-   this->index2D = 0;
-   this->reladdr2 = NULL;
-   this->has_index2 = false;
+   this->index2D = reg.index2D;
+   this->reladdr2 = reg.reladdr2;
+   this->has_index2 = reg.has_index2;
    this->double_reg2 = false;
+   this->array_id = reg.array_id;
 }
 
 st_dst_reg::st_dst_reg(st_src_reg reg)
@@ -217,6 +234,10 @@
    this->writemask = WRITEMASK_XYZW;
    this->cond_mask = COND_TR;
    this->reladdr = reg.reladdr;
+   this->index2D = reg.index2D;
+   this->reladdr2 = reg.reladdr2;
+   this->has_index2 = reg.has_index2;
+   this->array_id = reg.array_id;
 }
 
 class glsl_to_tgsi_instruction : public exec_node {
@@ -233,6 +254,7 @@
    st_src_reg sampler; /**< sampler register */
    int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */
    int tex_target; /**< One of TEXTURE_*_INDEX */
+   glsl_base_type tex_type;
    GLboolean tex_shadow;
 
    st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
@@ -244,8 +266,9 @@
 
 class variable_storage : public exec_node {
 public:
-   variable_storage(ir_variable *var, gl_register_file file, int index)
-      : file(file), index(index), var(var)
+   variable_storage(ir_variable *var, gl_register_file file, int index,
+                    unsigned array_id = 0)
+      : file(file), index(index), var(var), array_id(array_id)
    {
       /* empty */
    }
@@ -253,6 +276,7 @@
    gl_register_file file;
    int index;
    ir_variable *var; /* variable that maps to this, if any */
+   unsigned array_id;
 };
 
 class immediate_storage : public exec_node {
@@ -302,6 +326,15 @@
    st_src_reg return_reg;
 };
 
+static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
+static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
+
+struct array_decl {
+   unsigned mesa_index;
+   unsigned array_id;
+   unsigned array_size;
+};
+
 struct glsl_to_tgsi_visitor : public ir_visitor {
 public:
    glsl_to_tgsi_visitor();
@@ -317,11 +350,19 @@
 
    int next_temp;
 
-   unsigned array_sizes[MAX_ARRAYS];
+   unsigned *array_sizes;
+   unsigned max_num_arrays;
    unsigned next_array;
 
+   struct array_decl input_arrays[PIPE_MAX_SHADER_INPUTS];
+   unsigned num_input_arrays;
+   struct array_decl output_arrays[PIPE_MAX_SHADER_OUTPUTS];
+   unsigned num_output_arrays;
+
    int num_address_regs;
    int samplers_used;
+   glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
+   int sampler_targets[PIPE_MAX_SAMPLERS];   /**< One of TGSI_TEXTURE_* */
    bool indirect_addr_consts;
    int wpos_transform_const;
 
@@ -372,6 +413,7 @@
    virtual void visit(ir_if *);
    virtual void visit(ir_emit_vertex *);
    virtual void visit(ir_end_primitive *);
+   virtual void visit(ir_barrier *);
    /*@}*/
 
    st_src_reg result;
@@ -390,31 +432,19 @@
    /** List of glsl_to_tgsi_instruction */
    exec_list instructions;
 
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
-
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
-                                  st_dst_reg dst, st_src_reg src0);
-
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
-                                  st_dst_reg dst, st_dst_reg dst1,
-                                  st_src_reg src0);
-
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
-                                  st_dst_reg dst, st_src_reg src0, st_src_reg src1);
-
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
-                                  st_dst_reg dst,
-                                  st_src_reg src0, st_src_reg src1, st_src_reg src2);
-
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
-                                  st_dst_reg dst,
-                                  st_src_reg src0, st_src_reg src1,
-                                  st_src_reg src2, st_src_reg src3);
-
-   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
-                                  st_dst_reg dst, st_dst_reg dst1,
-                                  st_src_reg src0, st_src_reg src1,
-                                  st_src_reg src2, st_src_reg src3);
+   glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
+                                      st_dst_reg dst = undef_dst,
+                                      st_src_reg src0 = undef_src,
+                                      st_src_reg src1 = undef_src,
+                                      st_src_reg src2 = undef_src,
+                                      st_src_reg src3 = undef_src);
+
+   glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
+                                      st_dst_reg dst, st_dst_reg dst1,
+                                      st_src_reg src0 = undef_src,
+                                      st_src_reg src1 = undef_src,
+                                      st_src_reg src2 = undef_src,
+                                      st_src_reg src3 = undef_src);
 
    unsigned get_opcode(ir_instruction *ir, unsigned op,
                     st_dst_reg dst,
@@ -468,10 +498,6 @@
    void *mem_ctx;
 };
 
-static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
-
-static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
-
 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
 static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
 static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2);
@@ -526,10 +552,10 @@
 }
 
 glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
-                           st_dst_reg dst, st_dst_reg dst1,
-                           st_src_reg src0, st_src_reg src1,
-                           st_src_reg src2, st_src_reg src3)
+glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
+                               st_dst_reg dst, st_dst_reg dst1,
+                               st_src_reg src0, st_src_reg src1,
+                               st_src_reg src2, st_src_reg src3)
 {
    glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
    int num_reladdr = 0, i, j;
@@ -540,8 +566,8 @@
     * reg directly for one of the regs, and preload the other reladdr
     * sources into temps.
     */
-   num_reladdr += dst.reladdr != NULL;
-   num_reladdr += dst1.reladdr != NULL;
+   num_reladdr += dst.reladdr != NULL || dst.reladdr2;
+   num_reladdr += dst1.reladdr != NULL || dst1.reladdr2;
    num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
    num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
    num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
@@ -552,8 +578,11 @@
    reladdr_to_temp(ir, &src1, &num_reladdr);
    reladdr_to_temp(ir, &src0, &num_reladdr);
 
-   if (dst.reladdr) {
-      emit_arl(ir, address_reg, *dst.reladdr);
+   if (dst.reladdr || dst.reladdr2) {
+      if (dst.reladdr)
+         emit_arl(ir, address_reg, *dst.reladdr);
+      if (dst.reladdr2)
+         emit_arl(ir, address_reg2, *dst.reladdr2);
       num_reladdr--;
    }
    if (dst1.reladdr) {
@@ -571,11 +600,15 @@
    inst->src[3] = src3;
    inst->ir = ir;
    inst->dead_mask = 0;
+   /* default to float, for paths where this is not initialized
+    * (since 0==UINT which is likely wrong):
+    */
+   inst->tex_type = GLSL_TYPE_FLOAT;
 
    inst->function = NULL;
 
    /* Update indirect addressing status used by TGSI */
-   if (dst.reladdr) {
+   if (dst.reladdr || dst.reladdr2) {
       switch(dst.file) {
       case PROGRAM_STATE_VAR:
       case PROGRAM_CONSTANT:
@@ -716,48 +749,12 @@
 }
 
 glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
-                           st_dst_reg dst,
-                           st_src_reg src0, st_src_reg src1,
-                           st_src_reg src2, st_src_reg src3)
-{
-   return emit(ir, op, dst, undef_dst, src0, src1, src2, src3);
-}
-
-glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
-                           st_dst_reg dst, st_src_reg src0,
-                           st_src_reg src1, st_src_reg src2)
-{
-   return emit(ir, op, dst, undef_dst, src0, src1, src2, undef_src);
-}
-
-glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
-                           st_dst_reg dst, st_src_reg src0, st_src_reg src1)
-{
-   return emit(ir, op, dst, undef_dst, src0, src1, undef_src, undef_src);
-}
-
-glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
-                           st_dst_reg dst, st_src_reg src0)
+glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
+                               st_dst_reg dst,
+                               st_src_reg src0, st_src_reg src1,
+                               st_src_reg src2, st_src_reg src3)
 {
-   assert(dst.writemask != 0);
-   return emit(ir, op, dst, undef_dst, src0, undef_src, undef_src, undef_src);
-}
-
-glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
-                           st_dst_reg dst, st_dst_reg dst1, st_src_reg src0)
-{
-   return emit(ir, op, dst, dst1, src0, undef_src, undef_src, undef_src);
-}
-
-glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
-{
-   return emit(ir, op, undef_dst, undef_dst, undef_src, undef_src, undef_src, undef_src);
+   return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3);
 }
 
 /**
@@ -818,7 +815,7 @@
    case TGSI_OPCODE_##c: \
       if (type == GLSL_TYPE_DOUBLE) \
          op = TGSI_OPCODE_##d; \
-      else if (type == GLSL_TYPE_INT)       \
+      else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE)       \
          op = TGSI_OPCODE_##i; \
       else if (type == GLSL_TYPE_UINT) \
          op = TGSI_OPCODE_##u; \
@@ -879,7 +876,7 @@
       TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
    };
 
-   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+   return emit_asm(ir, dot_opcodes[elements - 2], dst, src0, src1);
 }
 
 /**
@@ -929,7 +926,7 @@
                                    src1_swiz, src1_swiz);
 
       dst.writemask = this_mask;
-      emit(ir, op, dst, src0, src1);
+      emit_asm(ir, op, dst, src0, src1);
       done_mask |= this_mask;
    }
 }
@@ -958,7 +955,7 @@
    if (dst.index >= this->num_address_regs)
       this->num_address_regs = dst.index + 1;
 
-   emit(NULL, op, dst, src0);
+   emit_asm(NULL, op, dst, src0);
 }
 
 int
@@ -1111,6 +1108,7 @@
       return size;
    case GLSL_TYPE_SAMPLER:
    case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_SUBROUTINE:
       /* Samplers take up one slot in UNIFORMS[], but they're baked in
        * at link time.
        */
@@ -1142,6 +1140,12 @@
    if (!options->EmitNoIndirectTemp &&
        (type->is_array() || type->is_matrix())) {
 
+      if (next_array >= max_num_arrays) {
+         max_num_arrays += 32;
+         array_sizes = (unsigned*)
+            realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays);
+      }
+
       src.file = PROGRAM_ARRAY;
       src.index = next_array << 16 | 0x8000;
       array_sizes[next_array] = type_size(type);
@@ -1242,7 +1246,7 @@
              */
             st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT);
             src.swizzle = slots[i].swizzle;
-            emit(ir, TGSI_OPCODE_MOV, dst, src);
+            emit_asm(ir, TGSI_OPCODE_MOV, dst, src);
             /* even a float takes up a whole vec4 reg in a struct/array. */
             dst.index++;
          }
@@ -1261,11 +1265,11 @@
 void
 glsl_to_tgsi_visitor::visit(ir_loop *ir)
 {
-   emit(NULL, TGSI_OPCODE_BGNLOOP);
+   emit_asm(NULL, TGSI_OPCODE_BGNLOOP);
 
    visit_exec_list(&ir->body_instructions, this);
 
-   emit(NULL, TGSI_OPCODE_ENDLOOP);
+   emit_asm(NULL, TGSI_OPCODE_ENDLOOP);
 }
 
 void
@@ -1273,10 +1277,10 @@
 {
    switch (ir->mode) {
    case ir_loop_jump::jump_break:
-      emit(NULL, TGSI_OPCODE_BRK);
+      emit_asm(NULL, TGSI_OPCODE_BRK);
       break;
    case ir_loop_jump::jump_continue:
-      emit(NULL, TGSI_OPCODE_CONT);
+      emit_asm(NULL, TGSI_OPCODE_CONT);
       break;
    }
 }
@@ -1330,7 +1334,7 @@
    this->result = get_temp(ir->type);
    result_dst = st_dst_reg(this->result);
    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
-   emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
+   emit_asm(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
 
    return true;
 }
@@ -1370,7 +1374,7 @@
    b.negate = ~b.negate;
 
    this->result = get_temp(ir->type);
-   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
+   emit_asm(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
 
    return true;
 }
@@ -1388,7 +1392,7 @@
    if (*num_reladdr != 1) {
       st_src_reg temp = get_temp(glsl_type::vec4_type);
 
-      emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
+      emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
       *reg = temp;
    }
 
@@ -1464,7 +1468,7 @@
    switch (ir->operation) {
    case ir_unop_logic_not:
       if (result_dst.type != GLSL_TYPE_FLOAT)
-         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
+         emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
       else {
          /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
           * older GPUs implement SEQ using multiple instructions (i915 uses two
@@ -1472,24 +1476,27 @@
           * 0.0 and 1.0, 1-x also implements !x.
           */
          op[0].negate = ~op[0].negate;
-         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
+         emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
       }
       break;
    case ir_unop_neg:
       if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
-         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
+         emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
       else if (result_dst.type == GLSL_TYPE_DOUBLE)
-         emit(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
+         emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
       else {
          op[0].negate = ~op[0].negate;
          result_src = op[0];
       }
       break;
+   case ir_unop_subroutine_to_int:
+      emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+      break;
    case ir_unop_abs:
-      emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
       break;
    case ir_unop_sign:
-      emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
       break;
    case ir_unop_rcp:
       emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
@@ -1513,17 +1520,17 @@
       break;
    case ir_unop_saturate: {
       glsl_to_tgsi_instruction *inst;
-      inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+      inst = emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
       inst->saturate = true;
       break;
    }
 
    case ir_unop_dFdx:
    case ir_unop_dFdx_coarse:
-      emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
       break;
    case ir_unop_dFdx_fine:
-      emit(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]);
       break;
    case ir_unop_dFdy:
    case ir_unop_dFdy_coarse:
@@ -1547,18 +1554,18 @@
 
       st_src_reg temp = get_temp(glsl_type::vec4_type);
 
-      emit(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]);
-      emit(ir, ir->operation == ir_unop_dFdy_fine ?
+      emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]);
+      emit_asm(ir, ir->operation == ir_unop_dFdy_fine ?
            TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp);
       break;
    }
 
    case ir_unop_frexp_sig:
-      emit(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]);
       break;
 
    case ir_unop_frexp_exp:
-      emit(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]);
       break;
 
    case ir_unop_noise: {
@@ -1568,50 +1575,50 @@
        * place to do this is in the GL state tracker, not the poor
        * driver.
        */
-      emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
+      emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
       break;
    }
 
    case ir_binop_add:
-      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
       break;
    case ir_binop_sub:
-      emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_mul:
-      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
       break;
    case ir_binop_div:
       if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
          assert(!"not reached: should be handled by ir_div_to_mul_rcp");
       else
-         emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
       break;
    case ir_binop_mod:
       if (result_dst.type == GLSL_TYPE_FLOAT)
          assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
       else
-         emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_less:
-      emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
       break;
    case ir_binop_greater:
-      emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
+      emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
       break;
    case ir_binop_lequal:
-      emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
+      emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
       break;
    case ir_binop_gequal:
-      emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
       break;
    case ir_binop_equal:
-      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
       break;
    case ir_binop_nequal:
-      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       break;
    case ir_binop_all_equal:
       /* "==" operator producing a scalar boolean. */
@@ -1625,7 +1632,7 @@
             st_dst_reg temp_dst = st_dst_reg(temp);
             st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
 
-            emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
+            emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
 
             /* Emit 1-3 AND operations to combine the SEQ results. */
             switch (ir->operands[0]->type->vector_elements) {
@@ -1635,24 +1642,24 @@
                temp_dst.writemask = WRITEMASK_Y;
                temp1.swizzle = SWIZZLE_YYYY;
                temp2.swizzle = SWIZZLE_ZZZZ;
-               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+               emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
                break;
             case 4:
                temp_dst.writemask = WRITEMASK_X;
                temp1.swizzle = SWIZZLE_XXXX;
                temp2.swizzle = SWIZZLE_YYYY;
-               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+               emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
                temp_dst.writemask = WRITEMASK_Y;
                temp1.swizzle = SWIZZLE_ZZZZ;
                temp2.swizzle = SWIZZLE_WWWW;
-               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+               emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
             }
 
             temp1.swizzle = SWIZZLE_XXXX;
             temp2.swizzle = SWIZZLE_YYYY;
-            emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
+            emit_asm(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
          } else {
-            emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+            emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
 
             /* After the dot-product, the value will be an integer on the
              * range [0,4].  Zero becomes 1.0, and positive values become zero.
@@ -1665,10 +1672,10 @@
              */
             st_src_reg sge_src = result_src;
             sge_src.negate = ~sge_src.negate;
-            emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
+            emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
          }
       } else {
-         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
       }
       break;
    case ir_binop_any_nequal:
@@ -1678,7 +1685,7 @@
          st_src_reg temp = get_temp(native_integers ?
                                     glsl_type::uvec4_type :
                                     glsl_type::vec4_type);
-         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
 
          if (native_integers) {
             st_dst_reg temp_dst = st_dst_reg(temp);
@@ -1692,22 +1699,22 @@
                temp_dst.writemask = WRITEMASK_Y;
                temp1.swizzle = SWIZZLE_YYYY;
                temp2.swizzle = SWIZZLE_ZZZZ;
-               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
+               emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
                break;
             case 4:
                temp_dst.writemask = WRITEMASK_X;
                temp1.swizzle = SWIZZLE_XXXX;
                temp2.swizzle = SWIZZLE_YYYY;
-               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
+               emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
                temp_dst.writemask = WRITEMASK_Y;
                temp1.swizzle = SWIZZLE_ZZZZ;
                temp2.swizzle = SWIZZLE_WWWW;
-               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
+               emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
             }
 
             temp1.swizzle = SWIZZLE_XXXX;
             temp2.swizzle = SWIZZLE_YYYY;
-            emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
+            emit_asm(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
          } else {
             /* After the dot-product, the value will be an integer on the
              * range [0,4].  Zero stays zero, and positive values become 1.0.
@@ -1726,11 +1733,11 @@
                 */
                st_src_reg slt_src = result_src;
                slt_src.negate = ~slt_src.negate;
-               emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+               emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
             }
          }
       } else {
-         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       }
       break;
 
@@ -1763,7 +1770,7 @@
                                           GET_SWZ(op0_swizzle, 3),
                                           GET_SWZ(op0_swizzle, 3),
                                           GET_SWZ(op0_swizzle, 3));
-            emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
+            emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
             accum = st_src_reg(result_dst);
             accum.swizzle = dst_swizzle;
             /* fallthrough */
@@ -1772,7 +1779,7 @@
                                           GET_SWZ(op0_swizzle, 2),
                                           GET_SWZ(op0_swizzle, 2),
                                           GET_SWZ(op0_swizzle, 2));
-            emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
+            emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
             accum = st_src_reg(result_dst);
             accum.swizzle = dst_swizzle;
             /* fallthrough */
@@ -1781,7 +1788,7 @@
                                           GET_SWZ(op0_swizzle, 1),
                                           GET_SWZ(op0_swizzle, 1),
                                           GET_SWZ(op0_swizzle, 1));
-            emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
+            emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
             break;
          default:
             assert(!"Unexpected vector size");
@@ -1807,11 +1814,11 @@
              */
             st_src_reg slt_src = result_src;
             slt_src.negate = ~slt_src.negate;
-            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+            emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
          }
          else {
             /* Use SNE 0 if integers are being used as boolean values. */
-            emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+            emit_asm(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
          }
       }
       break;
@@ -1819,9 +1826,9 @@
 
    case ir_binop_logic_xor:
       if (native_integers)
-         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
       else
-         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_logic_or: {
@@ -1830,13 +1837,13 @@
           * instruction.
           */
          assert(native_integers);
-         emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
       } else {
          /* After the addition, the value will be an integer on the
           * range [0,2].  Zero stays zero, and positive values become 1.0.
           */
          glsl_to_tgsi_instruction *add =
-            emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+            emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
          if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
             /* The clamping to [0,1] can be done for free in the fragment
              * shader with a saturate if floats are being used as boolean values.
@@ -1849,7 +1856,7 @@
              */
             st_src_reg slt_src = result_src;
             slt_src.negate = ~slt_src.negate;
-            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+            emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
          }
       }
       break;
@@ -1861,9 +1868,9 @@
        * actual AND opcode.
        */
       if (native_integers)
-         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
       else
-         emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_dot:
@@ -1879,10 +1886,10 @@
       } else {
          /* sqrt(x) = x * rsq(x). */
          emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
-         emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
+         emit_asm(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
          /* For incoming channels <= 0, set the result to 0. */
          op[0].negate = ~op[0].negate;
-         emit(ir, TGSI_OPCODE_CMP, result_dst,
+         emit_asm(ir, TGSI_OPCODE_CMP, result_dst,
               op[0], result_src, st_src_reg_for_float(0.0));
       }
       break;
@@ -1891,13 +1898,13 @@
       break;
    case ir_unop_i2f:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
+         emit_asm(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
          break;
       }
       /* fallthrough to next case otherwise */
    case ir_unop_b2f:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
+         emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
          break;
       }
       /* fallthrough to next case otherwise */
@@ -1912,7 +1919,7 @@
           * GLSL requires that int(bool) return 1 for true and 0 for false.
           * This conversion is done with AND, but it could be done with NEG.
           */
-         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
+         emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
       } else {
          /* Booleans and integers are both stored as floats when native
           * integers are disabled.
@@ -1922,15 +1929,15 @@
       break;
    case ir_unop_f2i:
       if (native_integers)
-         emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
+         emit_asm(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
       else
-         emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
+         emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
       break;
    case ir_unop_f2u:
       if (native_integers)
-         emit(ir, TGSI_OPCODE_F2U, result_dst, op[0]);
+         emit_asm(ir, TGSI_OPCODE_F2U, result_dst, op[0]);
       else
-         emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
+         emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
       break;
    case ir_unop_bitcast_f2i:
       result_src = op[0];
@@ -1946,38 +1953,38 @@
       result_src.type = GLSL_TYPE_FLOAT;
       break;
    case ir_unop_f2b:
-      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
+      emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
       break;
    case ir_unop_d2b:
-      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0));
+      emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0));
       break;
    case ir_unop_i2b:
       if (native_integers)
-         emit(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0));
+         emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0));
       else
-         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
+         emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
       break;
    case ir_unop_trunc:
-      emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
       break;
    case ir_unop_ceil:
-      emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
       break;
    case ir_unop_floor:
-      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
       break;
    case ir_unop_round_even:
-      emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
       break;
    case ir_unop_fract:
-      emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
       break;
 
    case ir_binop_min:
-      emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
       break;
    case ir_binop_max:
-      emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
       break;
    case ir_binop_pow:
       emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
@@ -1985,37 +1992,37 @@
 
    case ir_unop_bit_not:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
+         emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
          break;
       }
    case ir_unop_u2f:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
+         emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
          break;
       }
    case ir_binop_lshift:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
          break;
       }
    case ir_binop_rshift:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
          break;
       }
    case ir_binop_bit_and:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
          break;
       }
    case ir_binop_bit_xor:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
          break;
       }
    case ir_binop_bit_or:
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
          break;
       }
 
@@ -2045,7 +2052,7 @@
       }
       else {
          /* Relative/variable index into constant buffer */
-         emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1],
+         emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1],
               st_src_reg_for_int(4));
          cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
          memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
@@ -2078,88 +2085,88 @@
                                        const_offset % 16 / 4);
 
       if (ir->type->base_type == GLSL_TYPE_BOOL) {
-         emit(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
+         emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
       } else {
-         emit(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
+         emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
       }
       break;
    }
    case ir_triop_lrp:
       /* note: we have to reorder the three args here */
-      emit(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]);
+      emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]);
       break;
    case ir_triop_csel:
       if (this->ctx->Const.NativeIntegers)
-         emit(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]);
+         emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]);
       else {
          op[0].negate = ~op[0].negate;
-         emit(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]);
+         emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]);
       }
       break;
    case ir_triop_bitfield_extract:
-      emit(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]);
+      emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]);
       break;
    case ir_quadop_bitfield_insert:
-      emit(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]);
+      emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]);
       break;
    case ir_unop_bitfield_reverse:
-      emit(ir, TGSI_OPCODE_BREV, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]);
       break;
    case ir_unop_bit_count:
-      emit(ir, TGSI_OPCODE_POPC, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]);
       break;
    case ir_unop_find_msb:
-      emit(ir, TGSI_OPCODE_IMSB, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]);
       break;
    case ir_unop_find_lsb:
-      emit(ir, TGSI_OPCODE_LSB, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_LSB, result_dst, op[0]);
       break;
    case ir_binop_imul_high:
-      emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
       break;
    case ir_triop_fma:
       /* In theory, MAD is incorrect here. */
       if (have_fma)
-         emit(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
+         emit_asm(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
       else
-         emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
+         emit_asm(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
       break;
    case ir_unop_interpolate_at_centroid:
-      emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
       break;
    case ir_binop_interpolate_at_offset:
-      emit(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], op[1]);
       break;
    case ir_binop_interpolate_at_sample:
-      emit(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]);
       break;
 
    case ir_unop_d2f:
-      emit(ir, TGSI_OPCODE_D2F, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_D2F, result_dst, op[0]);
       break;
    case ir_unop_f2d:
-      emit(ir, TGSI_OPCODE_F2D, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_F2D, result_dst, op[0]);
       break;
    case ir_unop_d2i:
-      emit(ir, TGSI_OPCODE_D2I, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_D2I, result_dst, op[0]);
       break;
    case ir_unop_i2d:
-      emit(ir, TGSI_OPCODE_I2D, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]);
       break;
    case ir_unop_d2u:
-      emit(ir, TGSI_OPCODE_D2U, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]);
       break;
    case ir_unop_u2d:
-      emit(ir, TGSI_OPCODE_U2D, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]);
       break;
    case ir_unop_unpack_double_2x32:
    case ir_unop_pack_double_2x32:
-      emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+      emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
       break;
 
    case ir_binop_ldexp:
       if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) {
-         emit(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]);
+         emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]);
       } else {
          assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()");
       }
@@ -2243,11 +2250,41 @@
    this->result = src;
 }
 
+/* Test if the variable is an array. Note that geometry and
+ * tessellation shader inputs are outputs are always arrays (except
+ * for patch inputs), so only the array element type is considered.
+ */
+static bool
+is_inout_array(unsigned stage, ir_variable *var, bool *is_2d)
+{
+   const glsl_type *type = var->type;
+
+   if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) ||
+       (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out))
+      return false;
+
+   *is_2d = false;
+
+   if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) ||
+        (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) ||
+        stage == MESA_SHADER_TESS_CTRL) &&
+       !var->data.patch) {
+      if (!var->type->is_array())
+         return false; /* a system value probably */
+
+      type = var->type->fields.array;
+      *is_2d = true;
+   }
+
+   return type->is_array() || type->is_matrix();
+}
+
 void
 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
 {
    variable_storage *entry = find_variable_storage(ir->var);
    ir_variable *var = ir->var;
+   bool is_2d;
 
    if (!entry) {
       switch (var->data.mode) {
@@ -2263,16 +2300,56 @@
           * user-defined varyings.
           */
          assert(var->data.location != -1);
-         entry = new(mem_ctx) variable_storage(var,
-                                               PROGRAM_INPUT,
-                                               var->data.location);
+
+         if (is_inout_array(shader->Stage, var, &is_2d)) {
+            struct array_decl *decl = &input_arrays[num_input_arrays];
+
+            decl->mesa_index = var->data.location;
+            decl->array_id = num_input_arrays + 1;
+            if (is_2d)
+               decl->array_size = type_size(var->type->fields.array);
+            else
+               decl->array_size = type_size(var->type);
+            num_input_arrays++;
+
+            entry = new(mem_ctx) variable_storage(var,
+                                                  PROGRAM_INPUT,
+                                                  var->data.location,
+                                                  decl->array_id);
+         }
+         else {
+            entry = new(mem_ctx) variable_storage(var,
+                                                  PROGRAM_INPUT,
+                                                  var->data.location);
+         }
+         this->variables.push_tail(entry);
          break;
       case ir_var_shader_out:
          assert(var->data.location != -1);
-         entry = new(mem_ctx) variable_storage(var,
-                                               PROGRAM_OUTPUT,
-                                               var->data.location
-                                               + var->data.index);
+
+         if (is_inout_array(shader->Stage, var, &is_2d)) {
+            struct array_decl *decl = &output_arrays[num_output_arrays];
+
+            decl->mesa_index = var->data.location;
+            decl->array_id = num_output_arrays + 1;
+            if (is_2d)
+               decl->array_size = type_size(var->type->fields.array);
+            else
+               decl->array_size = type_size(var->type);
+            num_output_arrays++;
+
+            entry = new(mem_ctx) variable_storage(var,
+                                                  PROGRAM_OUTPUT,
+                                                  var->data.location,
+                                                  decl->array_id);
+         }
+         else {
+            entry = new(mem_ctx) variable_storage(var,
+                                                  PROGRAM_OUTPUT,
+                                                  var->data.location
+                                                  + var->data.index);
+         }
+         this->variables.push_tail(entry);
          break;
       case ir_var_system_value:
          entry = new(mem_ctx) variable_storage(var,
@@ -2296,32 +2373,92 @@
    }
 
    this->result = st_src_reg(entry->file, entry->index, var->type);
+   this->result.array_id = entry->array_id;
    if (!native_integers)
       this->result.type = GLSL_TYPE_FLOAT;
 }
 
+static void
+shrink_array_declarations(struct array_decl *arrays, unsigned count,
+                          GLbitfield64 usage_mask,
+                          GLbitfield patch_usage_mask)
+{
+   unsigned i, j;
+
+   /* Fix array declarations by removing unused array elements at both ends
+    * of the arrays. For example, mat4[3] where only mat[1] is used.
+    */
+   for (i = 0; i < count; i++) {
+      struct array_decl *decl = &arrays[i];
+
+      /* Shrink the beginning. */
+      for (j = 0; j < decl->array_size; j++) {
+         if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
+            if (patch_usage_mask &
+                BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
+               break;
+         }
+         else {
+            if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
+               break;
+         }
+
+         decl->mesa_index++;
+         decl->array_size--;
+         j--;
+      }
+
+      /* Shrink the end. */
+      for (j = decl->array_size-1; j >= 0; j--) {
+         if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
+            if (patch_usage_mask &
+                BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
+               break;
+         }
+         else {
+            if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
+               break;
+         }
+
+         decl->array_size--;
+      }
+   }
+}
+
 void
 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
 {
    ir_constant *index;
    st_src_reg src;
    int element_size = type_size(ir->type);
-   bool is_2D_input;
+   bool is_2D = false;
 
    index = ir->array_index->constant_expression_value();
 
    ir->array->accept(this);
    src = this->result;
 
-   is_2D_input = this->prog->Target == GL_GEOMETRY_PROGRAM_NV &&
-                 src.file == PROGRAM_INPUT &&
-                 ir->array->ir_type != ir_type_dereference_array;
+   if (ir->array->ir_type != ir_type_dereference_array) {
+      switch (this->prog->Target) {
+      case GL_TESS_CONTROL_PROGRAM_NV:
+         is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) &&
+                 !ir->variable_referenced()->data.patch;
+         break;
+      case GL_TESS_EVALUATION_PROGRAM_NV:
+         is_2D = src.file == PROGRAM_INPUT &&
+                 !ir->variable_referenced()->data.patch;
+         break;
+      case GL_GEOMETRY_PROGRAM_NV:
+         is_2D = src.file == PROGRAM_INPUT;
+         break;
+      }
+   }
 
-   if (is_2D_input)
+   if (is_2D)
       element_size = 1;
 
    if (index) {
-      if (is_2D_input) {
+      if (is_2D) {
          src.index2D = index->value.i[0];
          src.has_index2 = true;
       } else
@@ -2341,24 +2478,24 @@
          index_reg = get_temp(native_integers ?
                               glsl_type::int_type : glsl_type::float_type);
 
-         emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
+         emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
               this->result, st_src_reg_for_type(index_reg.type, element_size));
       }
 
       /* If there was already a relative address register involved, add the
        * new and the old together to get the new offset.
        */
-      if (!is_2D_input && src.reladdr != NULL) {
+      if (!is_2D && src.reladdr != NULL) {
          st_src_reg accum_reg = get_temp(native_integers ?
                                 glsl_type::int_type : glsl_type::float_type);
 
-         emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
+         emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
               index_reg, *src.reladdr);
 
          index_reg = accum_reg;
       }
 
-      if (is_2D_input) {
+      if (is_2D) {
          src.reladdr2 = ralloc(mem_ctx, st_src_reg);
          memcpy(src.reladdr2, &index_reg, sizeof(index_reg));
          src.index2D = 0;
@@ -2589,16 +2726,16 @@
       l_src.swizzle = swizzle_for_size(type->vector_elements);
 
       if (native_integers) {
-         emit(ir, TGSI_OPCODE_UCMP, *l, *cond,
+         emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond,
               cond_swap ? l_src : *r,
               cond_swap ? *r : l_src);
       } else {
-         emit(ir, TGSI_OPCODE_CMP, *l, *cond,
+         emit_asm(ir, TGSI_OPCODE_CMP, *l, *cond,
               cond_swap ? l_src : *r,
               cond_swap ? *r : l_src);
       }
    } else {
-      emit(ir, TGSI_OPCODE_MOV, *l, *r);
+      emit_asm(ir, TGSI_OPCODE_MOV, *l, *r);
    }
    l->index++;
    r->index++;
@@ -2679,7 +2816,7 @@
        */
       glsl_to_tgsi_instruction *inst, *new_inst;
       inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
-      new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+      new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]);
       new_inst->saturate = inst->saturate;
       inst->dead_mask = inst->dst[0].writemask;
    } else {
@@ -2717,7 +2854,7 @@
          src = this->result;
 
          for (i = 0; i < (unsigned int)size; i++) {
-            emit(ir, TGSI_OPCODE_MOV, temp, src);
+            emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
 
             src.index++;
             temp.index++;
@@ -2739,7 +2876,7 @@
          ir->array_elements[i]->accept(this);
          src = this->result;
          for (int j = 0; j < size; j++) {
-            emit(ir, TGSI_OPCODE_MOV, temp, src);
+            emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
 
             src.index++;
             temp.index++;
@@ -2764,7 +2901,7 @@
                                   ir->type->vector_elements,
                                   GL_FLOAT,
                                   &src.swizzle);
-         emit(ir, TGSI_OPCODE_MOV, mat_column, src);
+         emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
 
          mat_column.index++;
       }
@@ -2889,7 +3026,7 @@
          l.cond_mask = COND_TR;
 
          for (i = 0; i < type_size(param->type); i++) {
-            emit(ir, TGSI_OPCODE_MOV, l, r);
+            emit_asm(ir, TGSI_OPCODE_MOV, l, r);
             l.index++;
             r.index++;
          }
@@ -2897,7 +3034,7 @@
    }
 
    /* Emit call instruction */
-   call_inst = emit(ir, TGSI_OPCODE_CAL);
+   call_inst = emit_asm(ir, TGSI_OPCODE_CAL);
    call_inst->function = entry;
 
    /* Process out parameters. */
@@ -2922,7 +3059,7 @@
          st_dst_reg l = st_dst_reg(this->result);
 
          for (i = 0; i < type_size(param->type); i++) {
-            emit(ir, TGSI_OPCODE_MOV, l, r);
+            emit_asm(ir, TGSI_OPCODE_MOV, l, r);
             l.index++;
             r.index++;
          }
@@ -2965,7 +3102,7 @@
       coord = get_temp(glsl_type::vec4_type);
       coord_dst = st_dst_reg(coord);
       coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1;
-      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+      emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
    }
 
    if (ir->projector) {
@@ -3074,7 +3211,7 @@
       if (opcode == TGSI_OPCODE_TEX) {
          /* Slot the projector in as the last component of the coord. */
          coord_dst.writemask = WRITEMASK_W;
-         emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
+         emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, projector);
          coord_dst.writemask = WRITEMASK_XYZW;
          opcode = TGSI_OPCODE_TXP;
       } else {
@@ -3086,7 +3223,7 @@
           * projective divide now.
           */
          coord_dst.writemask = WRITEMASK_W;
-         emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
+         emit_asm(ir, TGSI_OPCODE_RCP, coord_dst, projector);
 
          /* In the case where we have to project the coordinates "by hand,"
           * the shadow comparator value must also be projected.
@@ -3105,14 +3242,14 @@
             assert(!sampler_type->sampler_array);
 
             tmp_dst.writemask = WRITEMASK_Z;
-            emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
+            emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
 
             tmp_dst.writemask = WRITEMASK_XY;
-            emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
+            emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
          }
 
          coord_dst.writemask = WRITEMASK_XYZ;
-         emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
+         emit_asm(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
 
          coord_dst.writemask = WRITEMASK_XYZW;
          coord.swizzle = SWIZZLE_XYZW;
@@ -3133,7 +3270,7 @@
          cube_sc = get_temp(glsl_type::float_type);
          cube_sc_dst = st_dst_reg(cube_sc);
          cube_sc_dst.writemask = WRITEMASK_X;
-         emit(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result);
+         emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result);
          cube_sc_dst.writemask = WRITEMASK_X;
       }
       else {
@@ -3144,20 +3281,20 @@
          } else {
             coord_dst.writemask = WRITEMASK_Z;
          }
-         emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+         emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
          coord_dst.writemask = WRITEMASK_XYZW;
       }
    }
 
    if (ir->op == ir_txf_ms) {
       coord_dst.writemask = WRITEMASK_W;
-      emit(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
+      emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
       coord_dst.writemask = WRITEMASK_XYZW;
    } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
        opcode == TGSI_OPCODE_TXF) {
       /* TGSI stores LOD or LOD bias in the last channel of the coords. */
       coord_dst.writemask = WRITEMASK_W;
-      emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
+      emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
       coord_dst.writemask = WRITEMASK_XYZW;
    }
 
@@ -3167,30 +3304,30 @@
    }
 
    if (opcode == TGSI_OPCODE_TXD)
-      inst = emit(ir, opcode, result_dst, coord, dx, dy);
+      inst = emit_asm(ir, opcode, result_dst, coord, dx, dy);
    else if (opcode == TGSI_OPCODE_TXQ) {
       if (ir->op == ir_query_levels) {
          /* the level is stored in W */
-         inst = emit(ir, opcode, st_dst_reg(levels_src), lod_info);
+         inst = emit_asm(ir, opcode, st_dst_reg(levels_src), lod_info);
          result_dst.writemask = WRITEMASK_X;
          levels_src.swizzle = SWIZZLE_WWWW;
-         emit(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
+         emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
       } else
-         inst = emit(ir, opcode, result_dst, lod_info);
+         inst = emit_asm(ir, opcode, result_dst, lod_info);
    } else if (opcode == TGSI_OPCODE_TXF) {
-      inst = emit(ir, opcode, result_dst, coord);
+      inst = emit_asm(ir, opcode, result_dst, coord);
    } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
-      inst = emit(ir, opcode, result_dst, coord, lod_info);
+      inst = emit_asm(ir, opcode, result_dst, coord, lod_info);
    } else if (opcode == TGSI_OPCODE_TEX2) {
-      inst = emit(ir, opcode, result_dst, coord, cube_sc);
+      inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
    } else if (opcode == TGSI_OPCODE_TG4) {
       if (is_cube_array && ir->shadow_comparitor) {
-         inst = emit(ir, opcode, result_dst, coord, cube_sc);
+         inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
       } else {
-         inst = emit(ir, opcode, result_dst, coord, component);
+         inst = emit_asm(ir, opcode, result_dst, coord, component);
       }
    } else
-      inst = emit(ir, opcode, result_dst, coord);
+      inst = emit_asm(ir, opcode, result_dst, coord);
 
    if (ir->shadow_comparitor)
       inst->tex_shadow = GL_TRUE;
@@ -3246,6 +3383,8 @@
       assert(!"Should not get here.");
    }
 
+   inst->tex_type = ir->type->base_type;
+
    this->result = result_src;
 }
 
@@ -3264,13 +3403,13 @@
       l = st_dst_reg(current_function->return_reg);
 
       for (i = 0; i < type_size(current_function->sig->return_type); i++) {
-         emit(ir, TGSI_OPCODE_MOV, l, r);
+         emit_asm(ir, TGSI_OPCODE_MOV, l, r);
          l.index++;
          r.index++;
       }
    }
 
-   emit(ir, TGSI_OPCODE_RET);
+   emit_asm(ir, TGSI_OPCODE_RET);
 }
 
 void
@@ -3283,16 +3422,16 @@
       /* Convert the bool condition to a float so we can negate. */
       if (native_integers) {
          st_src_reg temp = get_temp(ir->condition->type);
-         emit(ir, TGSI_OPCODE_AND, st_dst_reg(temp),
+         emit_asm(ir, TGSI_OPCODE_AND, st_dst_reg(temp),
               condition, st_src_reg_for_float(1.0));
          condition = temp;
       }
 
       condition.negate = ~condition.negate;
-      emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition);
+      emit_asm(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition);
    } else {
       /* unconditional kil */
-      emit(ir, TGSI_OPCODE_KILL);
+      emit_asm(ir, TGSI_OPCODE_KILL);
    }
 }
 
@@ -3307,18 +3446,18 @@
 
    if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF;
 
-   if_inst = emit(ir->condition, if_opcode, undef_dst, this->result);
+   if_inst = emit_asm(ir->condition, if_opcode, undef_dst, this->result);
 
    this->instructions.push_tail(if_inst);
 
    visit_exec_list(&ir->then_instructions, this);
 
    if (!ir->else_instructions.is_empty()) {
-      emit(ir->condition, TGSI_OPCODE_ELSE);
+      emit_asm(ir->condition, TGSI_OPCODE_ELSE);
       visit_exec_list(&ir->else_instructions, this);
    }
 
-   if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
+   if_inst = emit_asm(ir->condition, TGSI_OPCODE_ENDIF);
 }
 
 
@@ -3328,7 +3467,7 @@
    assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
 
    ir->stream->accept(this);
-   emit(ir, TGSI_OPCODE_EMIT, undef_dst, this->result);
+   emit_asm(ir, TGSI_OPCODE_EMIT, undef_dst, this->result);
 }
 
 void
@@ -3337,14 +3476,27 @@
    assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
 
    ir->stream->accept(this);
-   emit(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result);
+   emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_barrier *ir)
+{
+   assert(this->prog->Target == GL_TESS_CONTROL_PROGRAM_NV ||
+          this->prog->Target == GL_COMPUTE_PROGRAM_NV);
+
+   emit_asm(ir, TGSI_OPCODE_BARRIER);
 }
 
 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
 {
    result.file = PROGRAM_UNDEFINED;
    next_temp = 1;
+   array_sizes = NULL;
+   max_num_arrays = 0;
    next_array = 0;
+   num_input_arrays = 0;
+   num_output_arrays = 0;
    next_signature_id = 1;
    num_immediates = 0;
    current_function = NULL;
@@ -3366,6 +3518,7 @@
 
 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
 {
+   free(array_sizes);
    ralloc_free(mem_ctx);
 }
 
@@ -3387,7 +3540,13 @@
    foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
       if (is_tex_instruction(inst->op)) {
          for (int i = 0; i < inst->sampler_array_size; i++) {
-            v->samplers_used |= 1 << (inst->sampler.index + i);
+            unsigned idx = inst->sampler.index + i;
+            v->samplers_used |= 1 << idx;
+
+            debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types));
+            v->sampler_types[idx] = inst->tex_type;
+            v->sampler_targets[idx] =
+               st_translate_texture_target(inst->tex_target, inst->tex_shadow);
 
             if (inst->tex_shadow) {
                prog->ShadowSamplers |= 1 << (inst->sampler.index + i);
@@ -3449,7 +3608,7 @@
 {
    int tempWritesSize = 0;
    unsigned *tempWrites = NULL;
-   unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
+   unsigned outputWrites[VARYING_SLOT_TESS_MAX];
 
    memset(outputWrites, 0, sizeof(outputWrites));
 
@@ -3457,8 +3616,8 @@
       unsigned prevWriteMask = 0;
 
       /* Give up if we encounter relative addressing or flow control. */
-      if (inst->dst[0].reladdr ||
-          inst->dst[1].reladdr ||
+      if (inst->dst[0].reladdr || inst->dst[0].reladdr2 ||
+          inst->dst[1].reladdr || inst->dst[1].reladdr2 ||
           tgsi_get_opcode_info(inst->op)->is_branch ||
           inst->op == TGSI_OPCODE_BGNSUB ||
           inst->op == TGSI_OPCODE_CONT ||
@@ -3469,7 +3628,7 @@
       }
 
       if (inst->dst[0].file == PROGRAM_OUTPUT) {
-         assert(inst->dst[0].index < MAX_PROGRAM_OUTPUTS);
+         assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites));
          prevWriteMask = outputWrites[inst->dst[0].index];
          outputWrites[inst->dst[0].index] |= inst->dst[0].writemask;
       } else if (inst->dst[0].file == PROGRAM_TEMPORARY) {
@@ -3734,6 +3893,7 @@
             inst->src[r].index2D = first->src[0].index2D;
             inst->src[r].has_index2 = first->src[0].has_index2;
             inst->src[r].double_reg2 = first->src[0].double_reg2;
+            inst->src[r].array_id = first->src[0].array_id;
 
             int swizzle = 0;
             for (int i = 0; i < 4; i++) {
@@ -3835,6 +3995,7 @@
           !(inst->dst[0].file == inst->src[0].file &&
              inst->dst[0].index == inst->src[0].index) &&
           !inst->dst[0].reladdr &&
+          !inst->dst[0].reladdr2 &&
           !inst->saturate &&
           inst->src[0].file != PROGRAM_ARRAY &&
           !inst->src[0].reladdr &&
@@ -4177,7 +4338,7 @@
    coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
    src0 = v->get_temp(glsl_type::vec4_type);
    dst0 = st_dst_reg(src0);
-   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+   inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord);
    inst->sampler_array_size = 1;
    inst->tex_target = TEXTURE_2D_INDEX;
 
@@ -4201,7 +4362,7 @@
       /* MAD colorTemp, colorTemp, scale, bias; */
       scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
       bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
-      inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
+      inst = v->emit_asm(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
    }
 
    if (pixel_maps) {
@@ -4209,6 +4370,7 @@
       st_dst_reg temp_dst = st_dst_reg(temp);
 
       assert(st->pixel_xfer.pixelmap_texture);
+      (void) st;
 
       /* With a little effort, we can do four pixel map look-ups with
        * two TEX instructions:
@@ -4216,7 +4378,7 @@
 
       /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
       temp_dst.writemask = WRITEMASK_XY; /* write R,G */
-      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+      inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
       inst->sampler.index = 1;
       inst->sampler_array_size = 1;
       inst->tex_target = TEXTURE_2D_INDEX;
@@ -4224,7 +4386,7 @@
       /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
       src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
       temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
-      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+      inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
       inst->sampler.index = 1;
       inst->sampler_array_size = 1;
       inst->tex_target = TEXTURE_2D_INDEX;
@@ -4233,19 +4395,19 @@
       v->samplers_used |= (1 << 1);
 
       /* MOV colorTemp, temp; */
-      inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
+      inst = v->emit_asm(NULL, TGSI_OPCODE_MOV, dst0, temp);
    }
 
    /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
     * new visitor. */
    foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
       glsl_to_tgsi_instruction *newinst;
-      st_src_reg src_regs[3];
+      st_src_reg src_regs[4];
 
       if (inst->dst[0].file == PROGRAM_OUTPUT)
          prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
 
-      for (int i = 0; i < 3; i++) {
+      for (int i = 0; i < 4; i++) {
          src_regs[i] = inst->src[i];
          if (src_regs[i].file == PROGRAM_INPUT &&
              src_regs[i].index == VARYING_SLOT_COL0) {
@@ -4256,7 +4418,7 @@
             prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
       }
 
-      newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]);
+      newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
       newinst->tex_target = inst->tex_target;
       newinst->sampler_array_size = inst->sampler_array_size;
    }
@@ -4306,7 +4468,7 @@
    coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
    src0 = v->get_temp(glsl_type::vec4_type);
    dst0 = st_dst_reg(src0);
-   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+   inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord);
    inst->sampler.index = samplerIndex;
    inst->sampler_array_size = 1;
    inst->tex_target = TEXTURE_2D_INDEX;
@@ -4319,24 +4481,24 @@
    src0.negate = NEGATE_XYZW;
    if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
       src0.swizzle = SWIZZLE_XXXX;
-   inst = v->emit(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0);
+   inst = v->emit_asm(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0);
 
    /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
     * new visitor. */
    foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
       glsl_to_tgsi_instruction *newinst;
-      st_src_reg src_regs[3];
+      st_src_reg src_regs[4];
 
       if (inst->dst[0].file == PROGRAM_OUTPUT)
          prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
 
-      for (int i = 0; i < 3; i++) {
+      for (int i = 0; i < 4; i++) {
          src_regs[i] = inst->src[i];
          if (src_regs[i].file == PROGRAM_INPUT)
             prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
       }
 
-      newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]);
+      newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
       newinst->tex_target = inst->tex_target;
       newinst->sampler_array_size = inst->sampler_array_size;
    }
@@ -4362,7 +4524,8 @@
    unsigned temps_size;
    struct ureg_dst *temps;
 
-   struct ureg_dst arrays[MAX_ARRAYS];
+   struct ureg_dst *arrays;
+   unsigned num_temp_arrays;
    struct ureg_src *constants;
    int num_constants;
    struct ureg_src *immediates;
@@ -4373,7 +4536,9 @@
    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
    struct ureg_src systemValues[SYSTEM_VALUE_MAX];
    struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
-   unsigned array_sizes[MAX_ARRAYS];
+   unsigned *array_sizes;
+   struct array_decl *input_arrays;
+   struct array_decl *output_arrays;
 
    const GLuint *inputMapping;
    const GLuint *outputMapping;
@@ -4418,6 +4583,14 @@
    TGSI_SEMANTIC_SAMPLEID,
    TGSI_SEMANTIC_SAMPLEPOS,
    TGSI_SEMANTIC_SAMPLEMASK,
+
+   /* Tessellation shaders
+    */
+   TGSI_SEMANTIC_TESSCOORD,
+   TGSI_SEMANTIC_VERTICESIN,
+   TGSI_SEMANTIC_PRIMID,
+   TGSI_SEMANTIC_TESSOUTER,
+   TGSI_SEMANTIC_TESSINNER,
 };
 
 /**
@@ -4497,9 +4670,8 @@
  * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
  */
 static struct ureg_dst
-dst_register(struct st_translate *t,
-             gl_register_file file,
-             GLuint index)
+dst_register(struct st_translate *t, gl_register_file file, unsigned index,
+             unsigned array_id)
 {
    unsigned array;
 
@@ -4530,7 +4702,7 @@
    case PROGRAM_ARRAY:
       array = index >> 16;
 
-      assert(array < ARRAY_SIZE(t->arrays));
+      assert(array < t->num_temp_arrays);
 
       if (ureg_dst_is_undef(t->arrays[array]))
          t->arrays[array] = ureg_DECL_array_temporary(
@@ -4540,16 +4712,28 @@
                                    (int)(index & 0xFFFF) - 0x8000);
 
    case PROGRAM_OUTPUT:
-      if (t->procType == TGSI_PROCESSOR_VERTEX)
-         assert(index < VARYING_SLOT_MAX);
-      else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
-         assert(index < FRAG_RESULT_MAX);
-      else
-         assert(index < VARYING_SLOT_MAX);
-
-      assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
+      if (!array_id) {
+         if (t->procType == TGSI_PROCESSOR_FRAGMENT)
+            assert(index < FRAG_RESULT_MAX);
+         else if (t->procType == TGSI_PROCESSOR_TESS_CTRL ||
+                  t->procType == TGSI_PROCESSOR_TESS_EVAL)
+            assert(index < VARYING_SLOT_TESS_MAX);
+         else
+            assert(index < VARYING_SLOT_MAX);
 
-      return t->outputs[t->outputMapping[index]];
+         assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
+         assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL);
+         return t->outputs[t->outputMapping[index]];
+      }
+      else {
+         struct array_decl *decl = &t->output_arrays[array_id-1];
+         unsigned mesa_index = decl->mesa_index;
+         int slot = t->outputMapping[mesa_index];
+
+         assert(slot != -1 && t->outputs[slot].File == TGSI_FILE_OUTPUT);
+         assert(t->outputs[slot].ArrayID == array_id);
+         return ureg_dst_array_offset(t->outputs[slot], index - mesa_index);
+      }
 
    case PROGRAM_ADDRESS:
       return t->address[index];
@@ -4575,7 +4759,8 @@
 
    case PROGRAM_TEMPORARY:
    case PROGRAM_ARRAY:
-      return ureg_src(dst_register(t, reg->file, reg->index));
+   case PROGRAM_OUTPUT:
+      return ureg_src(dst_register(t, reg->file, reg->index, reg->array_id));
 
    case PROGRAM_UNIFORM:
       assert(reg->index >= 0);
@@ -4598,12 +4783,20 @@
        * map back to the original index and add the offset after
        * mapping. */
       index -= double_reg2;
-      assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
-      return t->inputs[t->inputMapping[index] + double_reg2];
-
-   case PROGRAM_OUTPUT:
-      assert(t->outputMapping[reg->index] < ARRAY_SIZE(t->outputs));
-      return ureg_src(t->outputs[t->outputMapping[reg->index]]); /* not needed? */
+      if (!reg->array_id) {
+         assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
+         assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL);
+         return t->inputs[t->inputMapping[index]];
+      }
+      else {
+         struct array_decl *decl = &t->input_arrays[reg->array_id-1];
+         unsigned mesa_index = decl->mesa_index;
+         int slot = t->inputMapping[mesa_index];
+
+         assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT);
+         assert(t->inputs[slot].ArrayID == reg->array_id);
+         return ureg_src_array_offset(t->inputs[slot], index - mesa_index);
+      }
 
    case PROGRAM_ADDRESS:
       return ureg_src(t->address[reg->index]);
@@ -4626,9 +4819,8 @@
               const st_dst_reg *dst_reg,
               bool saturate, bool clamp_color)
 {
-   struct ureg_dst dst = dst_register(t,
-                                      dst_reg->file,
-                                      dst_reg->index);
+   struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index,
+                                      dst_reg->array_id);
 
    if (dst.File == TGSI_FILE_NULL)
       return dst;
@@ -4665,6 +4857,14 @@
       dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
    }
 
+   if (dst_reg->has_index2) {
+      if (dst_reg->reladdr2)
+         dst = ureg_dst_dimension_indirect(dst, ureg_src(t->address[1]),
+                                           dst_reg->index2D);
+      else
+         dst = ureg_dst_dimension(dst, dst_reg->index2D);
+   }
+
    return dst;
 }
 
@@ -4738,7 +4938,7 @@
       array = in_offset->index >> 16;
 
       assert(array >= 0);
-      assert(array < (int) ARRAY_SIZE(t->arrays));
+      assert(array < (int)t->num_temp_arrays);
 
       dst = t->arrays[array];
       offset.File = dst.File;
@@ -5060,6 +5260,25 @@
    ureg_MOV(ureg, edge_dst, edge_src);
 }
 
+static bool
+find_array(unsigned attr, struct array_decl *arrays, unsigned count,
+           unsigned *array_id, unsigned *array_size)
+{
+   unsigned i;
+
+   for (i = 0; i < count; i++) {
+      struct array_decl *decl = &arrays[i];
+
+      if (attr == decl->mesa_index) {
+         *array_id = decl->array_id;
+         *array_size = decl->array_size;
+         assert(*array_size);
+         return true;
+      }
+   }
+   return false;
+}
+
 /**
  * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
  * \param program  the program to translate
@@ -5089,12 +5308,14 @@
    const struct gl_program *proginfo,
    GLuint numInputs,
    const GLuint inputMapping[],
+   const GLuint inputSlotToAttr[],
    const ubyte inputSemanticName[],
    const ubyte inputSemanticIndex[],
    const GLuint interpMode[],
    const GLuint interpLocation[],
    GLuint numOutputs,
    const GLuint outputMapping[],
+   const GLuint outputSlotToAttr[],
    const ubyte outputSemanticName[],
    const ubyte outputSemanticIndex[],
    boolean passthrough_edgeflags,
@@ -5125,6 +5346,8 @@
           TGSI_SEMANTIC_VERTEXID_NOBASE);
    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_BASE_VERTEX] ==
           TGSI_SEMANTIC_BASEVERTEX);
+   assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_TESS_COORD] ==
+          TGSI_SEMANTIC_TESSCOORD);
 
    t = CALLOC_STRUCT(st_translate);
    if (!t) {
@@ -5132,25 +5355,105 @@
       goto out;
    }
 
-   memset(t, 0, sizeof *t);
-
    t->procType = procType;
    t->inputMapping = inputMapping;
    t->outputMapping = outputMapping;
    t->ureg = ureg;
+   t->num_temp_arrays = program->next_array;
+   if (t->num_temp_arrays)
+      t->arrays = (struct ureg_dst*)
+                  calloc(1, sizeof(t->arrays[0]) * t->num_temp_arrays);
 
    /*
     * Declare input attributes.
     */
-   if (procType == TGSI_PROCESSOR_FRAGMENT) {
+   switch (procType) {
+   case TGSI_PROCESSOR_FRAGMENT:
+      for (i = 0; i < numInputs; i++) {
+         unsigned array_id = 0;
+         unsigned array_size;
+
+         if (find_array(inputSlotToAttr[i], program->input_arrays,
+                        program->num_input_arrays, &array_id, &array_size)) {
+            /* We've found an array. Declare it so. */
+            t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
+                              inputSemanticName[i], inputSemanticIndex[i],
+                              interpMode[i], 0, interpLocation[i],
+                              array_id, array_size);
+            i += array_size - 1;
+         }
+         else {
+            t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
+                              inputSemanticName[i], inputSemanticIndex[i],
+                              interpMode[i], 0, interpLocation[i], 0, 1);
+         }
+      }
+      break;
+   case TGSI_PROCESSOR_GEOMETRY:
+   case TGSI_PROCESSOR_TESS_EVAL:
+   case TGSI_PROCESSOR_TESS_CTRL:
       for (i = 0; i < numInputs; i++) {
-         t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
-                                                        inputSemanticName[i],
-                                                        inputSemanticIndex[i],
-                                                        interpMode[i], 0,
-                                                        interpLocation[i]);
+         unsigned array_id = 0;
+         unsigned array_size;
+
+         if (find_array(inputSlotToAttr[i], program->input_arrays,
+                        program->num_input_arrays, &array_id, &array_size)) {
+            /* We've found an array. Declare it so. */
+            t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i],
+                                           inputSemanticIndex[i],
+                                           array_id, array_size);
+            i += array_size - 1;
+         }
+         else {
+            t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i],
+                                           inputSemanticIndex[i], 0, 1);
+         }
+      }
+      break;
+   case TGSI_PROCESSOR_VERTEX:
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
+      }
+      break;
+   default:
+      assert(0);
+   }
+
+   /*
+    * Declare output attributes.
+    */
+   switch (procType) {
+   case TGSI_PROCESSOR_FRAGMENT:
+      break;
+   case TGSI_PROCESSOR_GEOMETRY:
+   case TGSI_PROCESSOR_TESS_EVAL:
+   case TGSI_PROCESSOR_TESS_CTRL:
+   case TGSI_PROCESSOR_VERTEX:
+      for (i = 0; i < numOutputs; i++) {
+         unsigned array_id = 0;
+         unsigned array_size;
+
+         if (find_array(outputSlotToAttr[i], program->output_arrays,
+                        program->num_output_arrays, &array_id, &array_size)) {
+            /* We've found an array. Declare it so. */
+            t->outputs[i] = ureg_DECL_output_array(ureg,
+                                                   outputSemanticName[i],
+                                                   outputSemanticIndex[i],
+                                                   array_id, array_size);
+            i += array_size - 1;
+         }
+         else {
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             outputSemanticName[i],
+                                             outputSemanticIndex[i]);
+         }
       }
+      break;
+   default:
+      assert(0);
+   }
 
+   if (procType == TGSI_PROCESSOR_FRAGMENT) {
       if (proginfo->InputsRead & VARYING_BIT_POS) {
           /* Must do this after setting up t->inputs. */
           emit_wpos(st_context(ctx), t, proginfo, ureg,
@@ -5160,9 +5463,6 @@
       if (proginfo->InputsRead & VARYING_BIT_FACE)
          emit_face_var(ctx, t);
 
-      /*
-       * Declare output attributes.
-       */
       for (i = 0; i < numOutputs; i++) {
          switch (outputSemanticName[i]) {
          case TGSI_SEMANTIC_POSITION:
@@ -5198,31 +5498,8 @@
          }
       }
    }
-   else if (procType == TGSI_PROCESSOR_GEOMETRY) {
-      for (i = 0; i < numInputs; i++) {
-         t->inputs[i] = ureg_DECL_gs_input(ureg,
-                                           i,
-                                           inputSemanticName[i],
-                                           inputSemanticIndex[i]);
-      }
-
-      for (i = 0; i < numOutputs; i++) {
-         t->outputs[i] = ureg_DECL_output(ureg,
-                                          outputSemanticName[i],
-                                          outputSemanticIndex[i]);
-      }
-   }
-   else {
-      assert(procType == TGSI_PROCESSOR_VERTEX);
-
-      for (i = 0; i < numInputs; i++) {
-         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
-      }
-
+   else if (procType == TGSI_PROCESSOR_VERTEX) {
       for (i = 0; i < numOutputs; i++) {
-         t->outputs[i] = ureg_DECL_output(ureg,
-                                          outputSemanticName[i],
-                                          outputSemanticIndex[i]);
          if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
             /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */
             ureg_MOV(ureg,
@@ -5265,6 +5542,7 @@
                struct pipe_screen *pscreen = st->pipe->screen;
                assert(procType == TGSI_PROCESSOR_VERTEX);
                assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
+               (void) pscreen;
                if (!ctx->Const.NativeIntegers) {
                   struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
                   ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
@@ -5277,9 +5555,9 @@
       }
    }
 
-   /* Copy over array sizes
-    */
-   memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array);
+   t->array_sizes = program->array_sizes;
+   t->input_arrays = program->input_arrays;
+   t->output_arrays = program->output_arrays;
 
    /* Emit constants and uniforms.  TGSI uses a single index space for these,
     * so we put all the translated regs in t->constants.
@@ -5355,7 +5633,26 @@
    /* texture samplers */
    for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
       if (program->samplers_used & (1 << i)) {
+         unsigned type;
+
          t->samplers[i] = ureg_DECL_sampler(ureg, i);
+
+         switch (program->sampler_types[i]) {
+         case GLSL_TYPE_INT:
+            type = TGSI_RETURN_TYPE_SINT;
+            break;
+         case GLSL_TYPE_UINT:
+            type = TGSI_RETURN_TYPE_UINT;
+            break;
+         case GLSL_TYPE_FLOAT:
+            type = TGSI_RETURN_TYPE_FLOAT;
+            break;
+         default:
+            unreachable("not reached");
+         }
+
+         ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i],
+                                 type, type, type, type );
       }
    }
 
@@ -5375,6 +5672,7 @@
 
 out:
    if (t) {
+      free(t->arrays);
       free(t->temps);
       free(t->insn);
       free(t->labels);
@@ -5395,25 +5693,6 @@
 /* ----------------------------- End TGSI code ------------------------------ */
 
 
-static unsigned
-shader_stage_to_ptarget(gl_shader_stage stage)
-{
-   switch (stage) {
-   case MESA_SHADER_VERTEX:
-      return PIPE_SHADER_VERTEX;
-   case MESA_SHADER_FRAGMENT:
-      return PIPE_SHADER_FRAGMENT;
-   case MESA_SHADER_GEOMETRY:
-      return PIPE_SHADER_GEOMETRY;
-   case MESA_SHADER_COMPUTE:
-      return PIPE_SHADER_COMPUTE;
-   }
-
-   assert(!"should not be reached");
-   return PIPE_SHADER_VERTEX;
-}
-
-
 /**
  * Convert a shader's GLSL IR into a Mesa gl_program, although without
  * generating Mesa IR.
@@ -5430,7 +5709,7 @@
    struct gl_shader_compiler_options *options =
          &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader->Type)];
    struct pipe_screen *pscreen = ctx->st->pipe->screen;
-   unsigned ptarget = shader_stage_to_ptarget(shader->Stage);
+   unsigned ptarget = st_shader_stage_to_ptarget(shader->Stage);
 
    validate_ir_tree(shader->ir);
 
@@ -5457,7 +5736,7 @@
                                                prog->Parameters);
 
    /* Remove reads from output registers. */
-   lower_output_reads(shader->ir);
+   lower_output_reads(shader->Stage, shader->ir);
 
    /* Emit intermediate IR for main(). */
    visit_exec_list(shader->ir, v);
@@ -5470,7 +5749,7 @@
          if (!entry->bgn_inst) {
             v->current_function = entry;
 
-            entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
+            entry->bgn_inst = v->emit_asm(NULL, TGSI_OPCODE_BGNSUB);
             entry->bgn_inst->function = entry;
 
             visit_exec_list(&entry->sig->body, v);
@@ -5478,10 +5757,10 @@
             glsl_to_tgsi_instruction *last;
             last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
             if (last->op != TGSI_OPCODE_RET)
-               v->emit(NULL, TGSI_OPCODE_RET);
+               v->emit_asm(NULL, TGSI_OPCODE_RET);
 
             glsl_to_tgsi_instruction *end;
-            end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
+            end = v->emit_asm(NULL, TGSI_OPCODE_ENDSUB);
             end->function = entry;
 
             progress = GL_TRUE;
@@ -5505,7 +5784,11 @@
 
    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
    v->simplify_cmp();
-   v->copy_propagate();
+
+   if (shader->Type != GL_TESS_CONTROL_SHADER &&
+       shader->Type != GL_TESS_EVALUATION_SHADER)
+      v->copy_propagate();
+
    while (v->eliminate_dead_code());
 
    v->merge_two_dsts();
@@ -5513,7 +5796,7 @@
    v->renumber_registers();
 
    /* Write the END instruction. */
-   v->emit(NULL, TGSI_OPCODE_END);
+   v->emit_asm(NULL, TGSI_OPCODE_END);
 
    if (ctx->_Shader->Flags & GLSL_DUMP) {
       _mesa_log("\n");
@@ -5528,6 +5811,10 @@
    prog->NumInstructions = 0;
 
    do_set_program_inouts(shader->ir, prog, shader->Stage);
+   shrink_array_declarations(v->input_arrays, v->num_input_arrays,
+                             prog->InputsRead, prog->PatchInputsRead);
+   shrink_array_declarations(v->output_arrays, v->num_output_arrays,
+                             prog->OutputsWritten, prog->PatchOutputsWritten);
    count_resources(v, prog);
 
    /* This must be done before the uniform storage is associated. */
@@ -5556,6 +5843,8 @@
    struct st_vertex_program *stvp;
    struct st_fragment_program *stfp;
    struct st_geometry_program *stgp;
+   struct st_tessctrl_program *sttcp;
+   struct st_tesseval_program *sttep;
 
    switch (shader->Type) {
    case GL_VERTEX_SHADER:
@@ -5570,6 +5859,14 @@
       stgp = (struct st_geometry_program *)prog;
       stgp->glsl_to_tgsi = v;
       break;
+   case GL_TESS_CONTROL_SHADER:
+      sttcp = (struct st_tessctrl_program *)prog;
+      sttcp->glsl_to_tgsi = v;
+      break;
+   case GL_TESS_EVALUATION_SHADER:
+      sttep = (struct st_tesseval_program *)prog;
+      sttep->glsl_to_tgsi = v;
+      break;
    default:
       assert(!"should not be reached");
       return NULL;
@@ -5580,6 +5877,71 @@
 
 extern "C" {
 
+static void
+st_dump_program_for_shader_db(struct gl_context *ctx,
+                              struct gl_shader_program *prog)
+{
+   /* Dump only successfully compiled and linked shaders to the specified
+    * file. This is for shader-db.
+    *
+    * These options allow some pre-processing of shaders while dumping,
+    * because some apps have ill-formed shaders.
+    */
+   const char *dump_filename = os_get_option("ST_DUMP_SHADERS");
+   const char *insert_directives = os_get_option("ST_DUMP_INSERT");
+
+   if (dump_filename && prog->Name != 0) {
+      FILE *f = fopen(dump_filename, "a");
+
+      if (f) {
+         for (unsigned i = 0; i < prog->NumShaders; i++) {
+            const struct gl_shader *sh = prog->Shaders[i];
+            const char *source;
+            bool skip_version = false;
+
+            if (!sh)
+               continue;
+
+            source = sh->Source;
+
+            /* This string mustn't be changed. shader-db uses it to find
+             * where the shader begins.
+             */
+            fprintf(f, "GLSL %s shader %d source for linked program %d:\n",
+                    _mesa_shader_stage_to_string(sh->Stage),
+                    i, prog->Name);
+
+            /* Dump the forced version if set. */
+            if (ctx->Const.ForceGLSLVersion) {
+               fprintf(f, "#version %i\n", ctx->Const.ForceGLSLVersion);
+               skip_version = true;
+            }
+
+            /* Insert directives (optional). */
+            if (insert_directives) {
+               if (!ctx->Const.ForceGLSLVersion && prog->Version)
+                  fprintf(f, "#version %i\n", prog->Version);
+               fprintf(f, "%s\n", insert_directives);
+               skip_version = true;
+            }
+
+            if (skip_version && strncmp(source, "#version ", 9) == 0) {
+               const char *next_line = strstr(source, "\n");
+
+               if (next_line)
+                  source = next_line + 1;
+               else
+                  continue;
+            }
+
+            fprintf(f, "%s", source);
+            fprintf(f, "\n");
+         }
+         fclose(f);
+      }
+   }
+}
+
 /**
  * Link a shader.
  * Called via ctx->Driver.LinkShader()
@@ -5601,7 +5963,7 @@
       gl_shader_stage stage = _mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type);
       const struct gl_shader_compiler_options *options =
             &ctx->Const.ShaderCompilerOptions[stage];
-      unsigned ptarget = shader_stage_to_ptarget(stage);
+      unsigned ptarget = st_shader_stage_to_ptarget(stage);
       bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
                                                    PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
       bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
@@ -5612,7 +5974,7 @@
        */
       if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
           options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
-         lower_variable_index_to_cond_assign(ir,
+         lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
                                              options->EmitNoIndirectInput,
                                              options->EmitNoIndirectOutput,
                                              options->EmitNoIndirectTemp,
@@ -5700,6 +6062,7 @@
       _mesa_reference_program(ctx, &linked_prog, NULL);
    }
 
+   st_dump_program_for_shader_db(ctx, prog);
    return GL_TRUE;
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_glsl_to_tgsi.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_glsl_to_tgsi.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_glsl_to_tgsi.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_glsl_to_tgsi.h	2015-09-16 14:36:10.000000000 +0000
@@ -43,12 +43,14 @@
    const struct gl_program *proginfo,
    GLuint numInputs,
    const GLuint inputMapping[],
+   const GLuint inputSlotToAttr[],
    const ubyte inputSemanticName[],
    const ubyte inputSemanticIndex[],
    const GLuint interpMode[],
    const GLuint interpLocation[],
    GLuint numOutputs,
    const GLuint outputMapping[],
+   const GLuint outputSlotToAttr[],
    const ubyte outputSemanticName[],
    const ubyte outputSemanticIndex[],
    boolean passthrough_edgeflags,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_manager.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_manager.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_manager.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_manager.c	2015-09-16 14:36:10.000000000 +0000
@@ -368,6 +368,7 @@
 
       mode->rgbBits = mode->redBits +
          mode->greenBits + mode->blueBits + mode->alphaBits;
+      mode->sRGBCapable = util_format_is_srgb(visual->color_format);
    }
 
    if (visual->depth_stencil_format != PIPE_FORMAT_NONE) {
@@ -924,8 +925,7 @@
    struct gl_extensions extensions = {0};
    GLuint version;
 
-   if ((api == API_OPENGL_COMPAT || api == API_OPENGL_CORE) &&
-       _mesa_override_gl_version_contextless(&consts, &api, &version)) {
+   if (_mesa_override_gl_version_contextless(&consts, &api, &version)) {
       return version;
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_mesa_to_tgsi.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_mesa_to_tgsi.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_mesa_to_tgsi.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_mesa_to_tgsi.c	2015-09-16 14:36:10.000000000 +0000
@@ -665,7 +665,7 @@
    if (num_dst) 
       dst[0] = translate_dst( t, 
                               &inst->DstReg,
-                              inst->SaturateMode,
+                              inst->Saturate,
                               clamp_dst_color_output);
 
    for (i = 0; i < num_src; i++) 
@@ -1095,10 +1095,9 @@
    }
    else if (procType == TGSI_PROCESSOR_GEOMETRY) {
       for (i = 0; i < numInputs; i++) {
-         t->inputs[i] = ureg_DECL_gs_input(ureg,
-                                           i,
-                                           inputSemanticName[i],
-                                           inputSemanticIndex[i]);
+         t->inputs[i] = ureg_DECL_input(ureg,
+                                        inputSemanticName[i],
+                                        inputSemanticIndex[i], 0, 1);
       }
 
       for (i = 0; i < numOutputs; i++) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_program.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_program.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_program.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_program.c	2015-09-16 14:36:10.000000000 +0000
@@ -163,6 +163,68 @@
 }
 
 
+/**
+ * Delete a tessellation control program variant.  Note the caller must unlink
+ * the variant from the linked list.
+ */
+static void
+delete_tcp_variant(struct st_context *st, struct st_tcp_variant *tcpv)
+{
+   if (tcpv->driver_shader)
+      cso_delete_tessctrl_shader(st->cso_context, tcpv->driver_shader);
+
+   free(tcpv);
+}
+
+
+/**
+ * Free all variants of a tessellation control program.
+ */
+void
+st_release_tcp_variants(struct st_context *st, struct st_tessctrl_program *sttcp)
+{
+   struct st_tcp_variant *tcpv;
+
+   for (tcpv = sttcp->variants; tcpv; ) {
+      struct st_tcp_variant *next = tcpv->next;
+      delete_tcp_variant(st, tcpv);
+      tcpv = next;
+   }
+
+   sttcp->variants = NULL;
+}
+
+
+/**
+ * Delete a tessellation evaluation program variant.  Note the caller must
+ * unlink the variant from the linked list.
+ */
+static void
+delete_tep_variant(struct st_context *st, struct st_tep_variant *tepv)
+{
+   if (tepv->driver_shader)
+      cso_delete_tesseval_shader(st->cso_context, tepv->driver_shader);
+
+   free(tepv);
+}
+
+
+/**
+ * Free all variants of a tessellation evaluation program.
+ */
+void
+st_release_tep_variants(struct st_context *st, struct st_tesseval_program *sttep)
+{
+   struct st_tep_variant *tepv;
+
+   for (tepv = sttep->variants; tepv; ) {
+      struct st_tep_variant *next = tepv->next;
+      delete_tep_variant(st, tepv);
+      tepv = next;
+   }
+
+   sttep->variants = NULL;
+}
 
 
 /**
@@ -215,6 +277,7 @@
          unsigned slot = stvp->num_outputs++;
 
          stvp->result_to_output[attr] = slot;
+         stvp->output_slot_to_attr[slot] = attr;
 
          switch (attr) {
          case VARYING_SLOT_POS:
@@ -285,7 +348,8 @@
             /* fall through */
          case VARYING_SLOT_VAR0:
          default:
-            assert(attr < VARYING_SLOT_MAX);
+            assert(attr >= VARYING_SLOT_VAR0 ||
+                   (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
             stvp->output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
             stvp->output_semantic_index[slot] =
                st_get_generic_varying_index(st, attr);
@@ -321,7 +385,7 @@
       _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
    }
 
-   ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
+   ureg = ureg_create_with_screen(TGSI_PROCESSOR_VERTEX, st->pipe->screen);
    if (ureg == NULL) {
       free(vpv);
       return NULL;
@@ -351,6 +415,7 @@
                                    /* inputs */
                                    vpv->num_inputs,
                                    stvp->input_to_index,
+                                   NULL, /* inputSlotToAttr */
                                    NULL, /* input semantic name */
                                    NULL, /* input semantic index */
                                    NULL, /* interp mode */
@@ -358,6 +423,7 @@
                                    /* outputs */
                                    num_outputs,
                                    stvp->result_to_output,
+                                   stvp->output_slot_to_attr,
                                    stvp->output_semantic_name,
                                    stvp->output_semantic_index,
                                    key->passthrough_edgeflags,
@@ -482,6 +548,7 @@
 
    GLuint outputMapping[FRAG_RESULT_MAX];
    GLuint inputMapping[VARYING_SLOT_MAX];
+   GLuint inputSlotToAttr[VARYING_SLOT_MAX];
    GLuint interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
    GLuint interpLocation[PIPE_MAX_SHADER_INPUTS];
    GLuint attr;
@@ -502,6 +569,7 @@
       return NULL;
 
    assert(!(key->bitmap && key->drawpixels));
+   memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 
    if (key->bitmap) {
       /* glBitmap drawing */
@@ -543,6 +611,7 @@
          const GLuint slot = fs_num_inputs++;
 
          inputMapping[attr] = slot;
+         inputSlotToAttr[slot] = attr;
          if (stfp->Base.IsCentroid & BITFIELD64_BIT(attr))
             interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTROID;
          else if (stfp->Base.IsSample & BITFIELD64_BIT(attr))
@@ -657,7 +726,8 @@
              * consumed for the TEXi varyings, and we can base the locations of
              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
              */
-            assert(attr >= VARYING_SLOT_TEX0);
+            assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
+                   (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
             if (attr == VARYING_SLOT_PNTC)
@@ -732,7 +802,7 @@
       }
    }
 
-   ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
+   ureg = ureg_create_with_screen(TGSI_PROCESSOR_FRAGMENT, st->pipe->screen);
    if (ureg == NULL) {
       free(variant);
       return NULL;
@@ -778,6 +848,7 @@
                            /* inputs */
                            fs_num_inputs,
                            inputMapping,
+                           inputSlotToAttr,
                            input_semantic_name,
                            input_semantic_index,
                            interpMode,
@@ -785,6 +856,7 @@
                            /* outputs */
                            fs_num_outputs,
                            outputMapping,
+                           NULL,
                            fs_output_semantic_name,
                            fs_output_semantic_index, FALSE,
                            key->clamp_color );
@@ -860,56 +932,52 @@
 
 
 /**
- * Translate a geometry program to create a new variant.
+ * Translate a program. This is common code for geometry and tessellation
+ * shaders.
  */
-static struct st_gp_variant *
-st_translate_geometry_program(struct st_context *st,
-                              struct st_geometry_program *stgp,
-                              const struct st_gp_variant_key *key)
+static void
+st_translate_program_common(struct st_context *st,
+                            struct gl_program *prog,
+                            struct glsl_to_tgsi_visitor *glsl_to_tgsi,
+                            struct ureg_program *ureg,
+                            unsigned tgsi_processor,
+                            struct pipe_shader_state *out_state)
 {
-   GLuint inputMapping[VARYING_SLOT_MAX];
-   GLuint outputMapping[VARYING_SLOT_MAX];
-   struct pipe_context *pipe = st->pipe;
+   GLuint inputSlotToAttr[VARYING_SLOT_TESS_MAX];
+   GLuint inputMapping[VARYING_SLOT_TESS_MAX];
+   GLuint outputSlotToAttr[VARYING_SLOT_TESS_MAX];
+   GLuint outputMapping[VARYING_SLOT_TESS_MAX];
    GLuint attr;
 
-   uint gs_num_inputs = 0;
-
    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
+   uint num_inputs = 0;
 
-   ubyte gs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
-   ubyte gs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
-   uint gs_num_outputs = 0;
+   ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
+   ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
+   uint num_outputs = 0;
 
    GLint i;
-   struct ureg_program *ureg;
-   struct pipe_shader_state state = {0};
-   struct st_gp_variant *gpv;
-
-   gpv = CALLOC_STRUCT(st_gp_variant);
-   if (!gpv)
-      return NULL;
-
-   ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
-   if (ureg == NULL) {
-      free(gpv);
-      return NULL;
-   }
 
+   memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
    memset(inputMapping, 0, sizeof(inputMapping));
+   memset(outputSlotToAttr, 0, sizeof(outputSlotToAttr));
    memset(outputMapping, 0, sizeof(outputMapping));
+   memset(out_state, 0, sizeof(*out_state));
 
    /*
     * Convert Mesa program inputs to TGSI input register semantics.
     */
    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
-      if ((stgp->Base.Base.InputsRead & BITFIELD64_BIT(attr)) != 0) {
-         const GLuint slot = gs_num_inputs++;
+      if ((prog->InputsRead & BITFIELD64_BIT(attr)) != 0) {
+         const GLuint slot = num_inputs++;
 
          inputMapping[attr] = slot;
+         inputSlotToAttr[slot] = attr;
 
          switch (attr) {
          case VARYING_SLOT_PRIMITIVE_ID:
+            assert(tgsi_processor == TGSI_PROCESSOR_GEOMETRY);
             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
             input_semantic_index[slot] = 0;
             break;
@@ -961,19 +1029,33 @@
             /* fall through */
          case VARYING_SLOT_VAR0:
          default:
-            assert(attr >= VARYING_SLOT_VAR0 && attr < VARYING_SLOT_MAX);
+            assert(attr >= VARYING_SLOT_VAR0 ||
+                   (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
             input_semantic_index[slot] =
                st_get_generic_varying_index(st, attr);
-         break;
+            break;
          }
       }
    }
 
+   /* Also add patch inputs. */
+   for (attr = 0; attr < 32; attr++) {
+      if (prog->PatchInputsRead & (1 << attr)) {
+         GLuint slot = num_inputs++;
+         GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
+
+         inputMapping[patch_attr] = slot;
+         inputSlotToAttr[slot] = patch_attr;
+         input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
+         input_semantic_index[slot] = attr;
+      }
+   }
+
    /* initialize output semantics to defaults */
    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
-      gs_output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
-      gs_output_semantic_index[i] = 0;
+      output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
+      output_semantic_index[i] = 0;
    }
 
    /*
@@ -981,64 +1063,73 @@
     * mapping and the semantic information for each output.
     */
    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
-      if (stgp->Base.Base.OutputsWritten & BITFIELD64_BIT(attr)) {
-         GLuint slot = gs_num_outputs++;
+      if (prog->OutputsWritten & BITFIELD64_BIT(attr)) {
+         GLuint slot = num_outputs++;
 
          outputMapping[attr] = slot;
+         outputSlotToAttr[slot] = attr;
 
          switch (attr) {
          case VARYING_SLOT_POS:
             assert(slot == 0);
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
-            gs_output_semantic_index[slot] = 0;
+            output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
+            output_semantic_index[slot] = 0;
             break;
          case VARYING_SLOT_COL0:
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-            gs_output_semantic_index[slot] = 0;
+            output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            output_semantic_index[slot] = 0;
             break;
          case VARYING_SLOT_COL1:
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-            gs_output_semantic_index[slot] = 1;
+            output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            output_semantic_index[slot] = 1;
             break;
          case VARYING_SLOT_BFC0:
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
-            gs_output_semantic_index[slot] = 0;
+            output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
+            output_semantic_index[slot] = 0;
             break;
          case VARYING_SLOT_BFC1:
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
-            gs_output_semantic_index[slot] = 1;
+            output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
+            output_semantic_index[slot] = 1;
             break;
          case VARYING_SLOT_FOGC:
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
-            gs_output_semantic_index[slot] = 0;
+            output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
+            output_semantic_index[slot] = 0;
             break;
          case VARYING_SLOT_PSIZ:
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
-            gs_output_semantic_index[slot] = 0;
+            output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
+            output_semantic_index[slot] = 0;
             break;
          case VARYING_SLOT_CLIP_VERTEX:
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
-            gs_output_semantic_index[slot] = 0;
+            output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
+            output_semantic_index[slot] = 0;
             break;
          case VARYING_SLOT_CLIP_DIST0:
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
-            gs_output_semantic_index[slot] = 0;
+            output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+            output_semantic_index[slot] = 0;
             break;
          case VARYING_SLOT_CLIP_DIST1:
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
-            gs_output_semantic_index[slot] = 1;
+            output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+            output_semantic_index[slot] = 1;
             break;
          case VARYING_SLOT_LAYER:
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
-            gs_output_semantic_index[slot] = 0;
+            output_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
+            output_semantic_index[slot] = 0;
             break;
          case VARYING_SLOT_PRIMITIVE_ID:
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
-            gs_output_semantic_index[slot] = 0;
+            output_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
+            output_semantic_index[slot] = 0;
             break;
          case VARYING_SLOT_VIEWPORT:
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
-            gs_output_semantic_index[slot] = 0;
+            output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
+            output_semantic_index[slot] = 0;
+            break;
+         case VARYING_SLOT_TESS_LEVEL_OUTER:
+            output_semantic_name[slot] = TGSI_SEMANTIC_TESSOUTER;
+            output_semantic_index[slot] = 0;
+            break;
+         case VARYING_SLOT_TESS_LEVEL_INNER:
+            output_semantic_name[slot] = TGSI_SEMANTIC_TESSINNER;
+            output_semantic_index[slot] = 0;
             break;
          case VARYING_SLOT_TEX0:
          case VARYING_SLOT_TEX1:
@@ -1049,65 +1140,109 @@
          case VARYING_SLOT_TEX6:
          case VARYING_SLOT_TEX7:
             if (st->needs_texcoord_semantic) {
-               gs_output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
-               gs_output_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
+               output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
+               output_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
                break;
             }
             /* fall through */
          case VARYING_SLOT_VAR0:
          default:
-            assert(slot < ARRAY_SIZE(gs_output_semantic_name));
-            assert(attr >= VARYING_SLOT_VAR0);
-            gs_output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
-            gs_output_semantic_index[slot] =
+            assert(slot < ARRAY_SIZE(output_semantic_name));
+            assert(attr >= VARYING_SLOT_VAR0 ||
+                   (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
+            output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+            output_semantic_index[slot] =
                st_get_generic_varying_index(st, attr);
-         break;
+            break;
          }
       }
    }
 
-   ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, stgp->Base.InputType);
-   ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, stgp->Base.OutputType);
-   ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
-                 stgp->Base.VerticesOut);
-   ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, stgp->Base.Invocations);
+   /* Also add patch outputs. */
+   for (attr = 0; attr < 32; attr++) {
+      if (prog->PatchOutputsWritten & (1 << attr)) {
+         GLuint slot = num_outputs++;
+         GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
+
+         outputMapping[patch_attr] = slot;
+         outputSlotToAttr[slot] = patch_attr;
+         output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
+         output_semantic_index[slot] = attr;
+      }
+   }
 
    st_translate_program(st->ctx,
-                        TGSI_PROCESSOR_GEOMETRY,
+                        tgsi_processor,
                         ureg,
-                        stgp->glsl_to_tgsi,
-                        &stgp->Base.Base,
+                        glsl_to_tgsi,
+                        prog,
                         /* inputs */
-                        gs_num_inputs,
+                        num_inputs,
                         inputMapping,
+                        inputSlotToAttr,
                         input_semantic_name,
                         input_semantic_index,
                         NULL,
                         NULL,
                         /* outputs */
-                        gs_num_outputs,
+                        num_outputs,
                         outputMapping,
-                        gs_output_semantic_name,
-                        gs_output_semantic_index,
+                        outputSlotToAttr,
+                        output_semantic_name,
+                        output_semantic_index,
                         FALSE,
                         FALSE);
 
-   state.tokens = ureg_get_tokens(ureg, NULL);
+   out_state->tokens = ureg_get_tokens(ureg, NULL);
    ureg_destroy(ureg);
 
-   st_translate_stream_output_info(stgp->glsl_to_tgsi,
+   st_translate_stream_output_info(glsl_to_tgsi,
                                    outputMapping,
-                                   &state.stream_output);
+                                   &out_state->stream_output);
 
    if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) {
-      _mesa_print_program(&stgp->Base.Base);
+      _mesa_print_program(prog);
       debug_printf("\n");
    }
 
    if (ST_DEBUG & DEBUG_TGSI) {
-      tgsi_dump(state.tokens, 0);
+      tgsi_dump(out_state->tokens, 0);
       debug_printf("\n");
    }
+}
+
+
+/**
+ * Translate a geometry program to create a new variant.
+ */
+static struct st_gp_variant *
+st_translate_geometry_program(struct st_context *st,
+                              struct st_geometry_program *stgp,
+                              const struct st_gp_variant_key *key)
+{
+   struct pipe_context *pipe = st->pipe;
+   struct ureg_program *ureg;
+   struct st_gp_variant *gpv;
+   struct pipe_shader_state state;
+
+   ureg = ureg_create_with_screen(TGSI_PROCESSOR_GEOMETRY, st->pipe->screen);
+   if (ureg == NULL)
+      return NULL;
+
+   ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, stgp->Base.InputType);
+   ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, stgp->Base.OutputType);
+   ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
+                 stgp->Base.VerticesOut);
+   ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, stgp->Base.Invocations);
+
+   st_translate_program_common(st, &stgp->Base.Base, stgp->glsl_to_tgsi, ureg,
+                               TGSI_PROCESSOR_GEOMETRY, &state);
+
+   gpv = CALLOC_STRUCT(st_gp_variant);
+   if (!gpv) {
+      ureg_free_tokens(state.tokens);
+      return NULL;
+   }
 
    /* fill in new variant */
    gpv->driver_shader = pipe->create_gs_state(pipe, &state);
@@ -1150,6 +1285,168 @@
 
 
 /**
+ * Translate a tessellation control program to create a new variant.
+ */
+static struct st_tcp_variant *
+st_translate_tessctrl_program(struct st_context *st,
+                              struct st_tessctrl_program *sttcp,
+                              const struct st_tcp_variant_key *key)
+{
+   struct pipe_context *pipe = st->pipe;
+   struct ureg_program *ureg;
+   struct st_tcp_variant *tcpv;
+   struct pipe_shader_state state;
+
+   ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_CTRL, pipe->screen);
+   if (ureg == NULL) {
+      return NULL;
+   }
+
+   ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
+                 sttcp->Base.VerticesOut);
+
+   st_translate_program_common(st, &sttcp->Base.Base, sttcp->glsl_to_tgsi,
+                               ureg, TGSI_PROCESSOR_TESS_CTRL, &state);
+
+   tcpv = CALLOC_STRUCT(st_tcp_variant);
+   if (!tcpv) {
+      ureg_free_tokens(state.tokens);
+      return NULL;
+   }
+
+   /* fill in new variant */
+   tcpv->driver_shader = pipe->create_tcs_state(pipe, &state);
+   tcpv->key = *key;
+
+   ureg_free_tokens(state.tokens);
+   return tcpv;
+}
+
+
+/**
+ * Get/create tessellation control program variant.
+ */
+struct st_tcp_variant *
+st_get_tcp_variant(struct st_context *st,
+                  struct st_tessctrl_program *sttcp,
+                  const struct st_tcp_variant_key *key)
+{
+   struct st_tcp_variant *tcpv;
+
+   /* Search for existing variant */
+   for (tcpv = sttcp->variants; tcpv; tcpv = tcpv->next) {
+      if (memcmp(&tcpv->key, key, sizeof(*key)) == 0) {
+         break;
+      }
+   }
+
+   if (!tcpv) {
+      /* create new */
+      tcpv = st_translate_tessctrl_program(st, sttcp, key);
+      if (tcpv) {
+         /* insert into list */
+         tcpv->next = sttcp->variants;
+         sttcp->variants = tcpv;
+      }
+   }
+
+   return tcpv;
+}
+
+
+/**
+ * Translate a tessellation evaluation program to create a new variant.
+ */
+static struct st_tep_variant *
+st_translate_tesseval_program(struct st_context *st,
+                              struct st_tesseval_program *sttep,
+                              const struct st_tep_variant_key *key)
+{
+   struct pipe_context *pipe = st->pipe;
+   struct ureg_program *ureg;
+   struct st_tep_variant *tepv;
+   struct pipe_shader_state state;
+
+   ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_EVAL, pipe->screen);
+   if (ureg == NULL) {
+      return NULL;
+   }
+
+   if (sttep->Base.PrimitiveMode == GL_ISOLINES)
+      ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
+   else
+      ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, sttep->Base.PrimitiveMode);
+
+   switch (sttep->Base.Spacing) {
+   case GL_EQUAL:
+      ureg_property(ureg, TGSI_PROPERTY_TES_SPACING, PIPE_TESS_SPACING_EQUAL);
+      break;
+   case GL_FRACTIONAL_EVEN:
+      ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
+                    PIPE_TESS_SPACING_FRACTIONAL_EVEN);
+      break;
+   case GL_FRACTIONAL_ODD:
+      ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
+                    PIPE_TESS_SPACING_FRACTIONAL_ODD);
+      break;
+   default:
+      assert(0);
+   }
+
+   ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
+                 sttep->Base.VertexOrder == GL_CW);
+   ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE, sttep->Base.PointMode);
+
+   st_translate_program_common(st, &sttep->Base.Base, sttep->glsl_to_tgsi,
+                               ureg, TGSI_PROCESSOR_TESS_EVAL, &state);
+
+   tepv = CALLOC_STRUCT(st_tep_variant);
+   if (!tepv) {
+      ureg_free_tokens(state.tokens);
+      return NULL;
+   }
+
+   /* fill in new variant */
+   tepv->driver_shader = pipe->create_tes_state(pipe, &state);
+   tepv->key = *key;
+
+   ureg_free_tokens(state.tokens);
+   return tepv;
+}
+
+
+/**
+ * Get/create tessellation evaluation program variant.
+ */
+struct st_tep_variant *
+st_get_tep_variant(struct st_context *st,
+                  struct st_tesseval_program *sttep,
+                  const struct st_tep_variant_key *key)
+{
+   struct st_tep_variant *tepv;
+
+   /* Search for existing variant */
+   for (tepv = sttep->variants; tepv; tepv = tepv->next) {
+      if (memcmp(&tepv->key, key, sizeof(*key)) == 0) {
+         break;
+      }
+   }
+
+   if (!tepv) {
+      /* create new */
+      tepv = st_translate_tesseval_program(st, sttep, key);
+      if (tepv) {
+         /* insert into list */
+         tepv->next = sttep->variants;
+         sttep->variants = tepv;
+      }
+   }
+
+   return tepv;
+}
+
+
+/**
  * Vert/Geom/Frag programs have per-context variants.  Free all the
  * variants attached to the given program which match the given context.
  */
@@ -1201,7 +1498,7 @@
          }
       }
       break;
-   case MESA_GEOMETRY_PROGRAM:
+   case GL_GEOMETRY_PROGRAM_NV:
       {
          struct st_geometry_program *stgp =
             (struct st_geometry_program *) program;
@@ -1222,6 +1519,48 @@
          }
       }
       break;
+   case GL_TESS_CONTROL_PROGRAM_NV:
+      {
+         struct st_tessctrl_program *sttcp =
+            (struct st_tessctrl_program *) program;
+         struct st_tcp_variant *tcpv, **prevPtr = &sttcp->variants;
+
+         for (tcpv = sttcp->variants; tcpv; ) {
+            struct st_tcp_variant *next = tcpv->next;
+            if (tcpv->key.st == st) {
+               /* unlink from list */
+               *prevPtr = next;
+               /* destroy this variant */
+               delete_tcp_variant(st, tcpv);
+            }
+            else {
+               prevPtr = &tcpv->next;
+            }
+            tcpv = next;
+         }
+      }
+      break;
+   case GL_TESS_EVALUATION_PROGRAM_NV:
+      {
+         struct st_tesseval_program *sttep =
+            (struct st_tesseval_program *) program;
+         struct st_tep_variant *tepv, **prevPtr = &sttep->variants;
+
+         for (tepv = sttep->variants; tepv; ) {
+            struct st_tep_variant *next = tepv->next;
+            if (tepv->key.st == st) {
+               /* unlink from list */
+               *prevPtr = next;
+               /* destroy this variant */
+               delete_tep_variant(st, tepv);
+            }
+            else {
+               prevPtr = &tepv->next;
+            }
+            tepv = next;
+         }
+      }
+      break;
    default:
       _mesa_problem(NULL, "Unexpected program target 0x%x in "
                     "destroy_program_variants_cb()", program->Target);
@@ -1258,6 +1597,8 @@
    case GL_VERTEX_SHADER:
    case GL_FRAGMENT_SHADER:
    case GL_GEOMETRY_SHADER:
+   case GL_TESS_CONTROL_SHADER:
+   case GL_TESS_EVALUATION_SHADER:
       {
          destroy_program_variants(st, shader->Program);
       }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_program.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_program.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_program.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_program.h	2015-09-16 14:36:10.000000000 +0000
@@ -163,6 +163,7 @@
 
    /** Maps VARYING_SLOT_x to slot */
    GLuint result_to_output[VARYING_SLOT_MAX];
+   GLuint output_slot_to_attr[VARYING_SLOT_MAX];
    ubyte output_semantic_name[VARYING_SLOT_MAX];
    ubyte output_semantic_index[VARYING_SLOT_MAX];
    GLuint num_outputs;
@@ -187,7 +188,7 @@
  */
 struct st_gp_variant
 {
-   /* Parameters which generated this translated version of a vertex */
+   /* Parameters which generated this variant. */
    struct st_gp_variant_key key;
 
    void *driver_shader;
@@ -209,6 +210,76 @@
 
 
 
+/** Tessellation control program variant key */
+struct st_tcp_variant_key
+{
+   struct st_context *st;          /**< variants are per-context */
+   /* no other fields yet */
+};
+
+
+/**
+ * Tessellation control program variant.
+ */
+struct st_tcp_variant
+{
+   /* Parameters which generated this variant. */
+   struct st_tcp_variant_key key;
+
+   void *driver_shader;
+
+   struct st_tcp_variant *next;
+};
+
+
+/**
+ * Derived from Mesa gl_tess_ctrl_program:
+ */
+struct st_tessctrl_program
+{
+   struct gl_tess_ctrl_program Base;  /**< The Mesa tess ctrl program */
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
+
+   struct st_tcp_variant *variants;
+};
+
+
+
+/** Tessellation evaluation program variant key */
+struct st_tep_variant_key
+{
+   struct st_context *st;          /**< variants are per-context */
+   /* no other fields yet */
+};
+
+
+/**
+ * Tessellation evaluation program variant.
+ */
+struct st_tep_variant
+{
+   /* Parameters which generated this variant. */
+   struct st_tep_variant_key key;
+
+   void *driver_shader;
+
+   struct st_tep_variant *next;
+};
+
+
+/**
+ * Derived from Mesa gl_tess_eval_program:
+ */
+struct st_tesseval_program
+{
+   struct gl_tess_eval_program Base;  /**< The Mesa tess eval program */
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
+
+   struct st_tep_variant *variants;
+};
+
+
+
 static inline struct st_fragment_program *
 st_fragment_program( struct gl_fragment_program *fp )
 {
@@ -228,6 +299,18 @@
    return (struct st_geometry_program *)gp;
 }
 
+static inline struct st_tessctrl_program *
+st_tessctrl_program( struct gl_tess_ctrl_program *tcp )
+{
+   return (struct st_tessctrl_program *)tcp;
+}
+
+static inline struct st_tesseval_program *
+st_tesseval_program( struct gl_tess_eval_program *tep )
+{
+   return (struct st_tesseval_program *)tep;
+}
+
 static inline void
 st_reference_vertprog(struct st_context *st,
                       struct st_vertex_program **ptr,
@@ -258,6 +341,26 @@
                            (struct gl_program *) prog);
 }
 
+static inline void
+st_reference_tesscprog(struct st_context *st,
+                       struct st_tessctrl_program **ptr,
+                       struct st_tessctrl_program *prog)
+{
+   _mesa_reference_program(st->ctx,
+                           (struct gl_program **) ptr,
+                           (struct gl_program *) prog);
+}
+
+static inline void
+st_reference_tesseprog(struct st_context *st,
+                       struct st_tesseval_program **ptr,
+                       struct st_tesseval_program *prog)
+{
+   _mesa_reference_program(st->ctx,
+                           (struct gl_program **) ptr,
+                           (struct gl_program *) prog);
+}
+
 /**
  * This defines mapping from Mesa VARYING_SLOTs to TGSI GENERIC slots.
  */
@@ -301,6 +404,16 @@
                   struct st_geometry_program *stgp,
                   const struct st_gp_variant_key *key);
 
+extern struct st_tcp_variant *
+st_get_tcp_variant(struct st_context *st,
+                   struct st_tessctrl_program *stgp,
+                   const struct st_tcp_variant_key *key);
+
+extern struct st_tep_variant *
+st_get_tep_variant(struct st_context *st,
+                   struct st_tesseval_program *stgp,
+                   const struct st_tep_variant_key *key);
+
 
 extern void
 st_prepare_vertex_program(struct gl_context *ctx,
@@ -324,6 +437,14 @@
                        struct st_geometry_program *stgp);
 
 extern void
+st_release_tcp_variants(struct st_context *st,
+                        struct st_tessctrl_program *stgp);
+
+extern void
+st_release_tep_variants(struct st_context *st,
+                        struct st_tesseval_program *stgp);
+
+extern void
 st_destroy_program_variants(struct st_context *st);
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_texture.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_texture.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/state_tracker/st_texture.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/state_tracker/st_texture.c	2015-09-16 14:36:10.000000000 +0000
@@ -462,6 +462,11 @@
    return free;
 }
 
+
+/**
+ * For the given texture object, release any sampler views which belong
+ * to the calling context.
+ */
 void
 st_texture_release_sampler_view(struct st_context *st,
                                 struct st_texture_object *stObj)
@@ -478,6 +483,11 @@
    }
 }
 
+
+/**
+ * Release all sampler views attached to the given texture object, regardless
+ * of the context.
+ */
 void
 st_texture_release_all_sampler_views(struct st_context *st,
                                      struct st_texture_object *stObj)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_aaline.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_aaline.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_aaline.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_aaline.c	2015-09-16 14:36:10.000000000 +0000
@@ -116,11 +116,11 @@
    const GLfloat b = pz * py;
    const GLfloat c = px * px + py * py;
    const GLfloat d = -(a * x0 + b * y0 + c * z0);
-   if (a == 0.0 && b == 0.0 && c == 0.0 && d == 0.0) {
-      plane[0] = 0.0;
-      plane[1] = 0.0;
-      plane[2] = 1.0;
-      plane[3] = 0.0;
+   if (a == 0.0F && b == 0.0F && c == 0.0F && d == 0.0F) {
+      plane[0] = 0.0F;
+      plane[1] = 0.0F;
+      plane[2] = 1.0F;
+      plane[3] = 0.0F;
    }
    else {
       plane[0] = a;
@@ -135,9 +135,9 @@
 static inline void
 constant_plane(GLfloat value, GLfloat plane[4])
 {
-   plane[0] = 0.0;
-   plane[1] = 0.0;
-   plane[2] = -1.0;
+   plane[0] = 0.0F;
+   plane[1] = 0.0F;
+   plane[2] = -1.0F;
    plane[3] = value;
 }
 
@@ -160,8 +160,8 @@
 solve_plane_recip(GLfloat x, GLfloat y, const GLfloat plane[4])
 {
    const GLfloat denom = plane[3] + plane[0] * x + plane[1] * y;
-   if (denom == 0.0)
-      return 0.0;
+   if (denom == 0.0F)
+      return 0.0F;
    else
       return -plane[2] / denom;
 }
@@ -374,7 +374,7 @@
       if (x0 < x1) {
          xLeft = x0 - line->halfWidth;
          xRight = x1 + line->halfWidth;
-         if (line->dy >= 0.0) {
+         if (line->dy >= 0.0F) {
             yBot = y0 - 3.0F * line->halfWidth;
             yTop = y0 + line->halfWidth;
          }
@@ -386,7 +386,7 @@
       else {
          xLeft = x1 - line->halfWidth;
          xRight = x0 + line->halfWidth;
-         if (line->dy <= 0.0) {
+         if (line->dy <= 0.0F) {
             yBot = y1 - 3.0F * line->halfWidth;
             yTop = y1 + line->halfWidth;
          }
@@ -420,7 +420,7 @@
       if (y0 < y1) {
          yBot = y0 - line->halfWidth;
          yTop = y1 + line->halfWidth;
-         if (line->dx >= 0.0) {
+         if (line->dx >= 0.0F) {
             xLeft = x0 - 3.0F * line->halfWidth;
             xRight = x0 + line->halfWidth;
          }
@@ -432,7 +432,7 @@
       else {
          yBot = y1 - line->halfWidth;
          yTop = y0 + line->halfWidth;
-         if (line->dx <= 0.0) {
+         if (line->dx <= 0.0F) {
             xLeft = x1 - 3.0F * line->halfWidth;
             xRight = x1 + line->halfWidth;
          }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_aalinetemp.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_aalinetemp.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_aalinetemp.h	2015-02-25 15:01:21.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_aalinetemp.h	2015-09-16 14:36:10.000000000 +0000
@@ -44,7 +44,7 @@
 
    (void) swrast;
 
-   if (coverage == 0.0)
+   if (coverage == 0.0F)
       return;
 
    line->span.end++;
@@ -123,7 +123,7 @@
                                  ctx->Const.MinLineWidthAA,
                                  ctx->Const.MaxLineWidthAA);
 
-   if (line.len == 0.0 || IS_INF_OR_NAN(line.len))
+   if (line.len == 0.0F || IS_INF_OR_NAN(line.len))
       return;
 
    INIT_SPAN(line.span, GL_LINE);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_atifragshader.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_atifragshader.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_atifragshader.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_atifragshader.c	2015-09-16 14:36:10.000000000 +0000
@@ -436,13 +436,13 @@
 		     for (i = 0; i < 3; i++) {
 			dst[optype][i] =
 			   (src[optype][2][i] >
-			    0.5) ? src[optype][0][i] : src[optype][1][i];
+			    0.5F) ? src[optype][0][i] : src[optype][1][i];
 		     }
 		  }
 		  else {
 		     dst[optype][3] =
 			(src[optype][2][3] >
-			 0.5) ? src[optype][0][3] : src[optype][1][3];
+			 0.5F) ? src[optype][0][3] : src[optype][1][3];
 		  }
 		  break;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_copypix.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_copypix.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_copypix.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_copypix.c	2015-09-16 14:36:10.000000000 +0000
@@ -27,6 +27,7 @@
 #include "main/context.h"
 #include "main/condrender.h"
 #include "main/macros.h"
+#include "main/blit.h"
 #include "main/pixeltransfer.h"
 #include "main/imports.h"
 
@@ -51,20 +52,9 @@
                 GLint width, GLint height,
                 GLfloat zoomX, GLfloat zoomY)
 {
-   if (zoomX == 1.0 && zoomY == 1.0) {
-      /* no zoom */
-      if (srcx >= dstx + width || (srcx + width <= dstx)) {
-         return GL_FALSE;
-      }
-      else if (srcy < dsty) { /* this is OK */
-         return GL_FALSE;
-      }
-      else if (srcy > dsty + height) {
-         return GL_FALSE;
-      }
-      else {
-         return GL_TRUE;
-      }
+   if (zoomX == 1.0F && zoomY == 1.0F) {
+      return _mesa_regions_overlap(srcx, srcy, srcx + width, srcy + height,
+                                   dstx, dsty, dstx + width, dsty + height);
    }
    else {
       /* add one pixel of slop when zooming, just to be safe */
@@ -211,8 +201,8 @@
    GLuint i;
 
    if (depthMax <= 0xffffff &&
-       ctx->Pixel.DepthScale == 1.0 &&
-       ctx->Pixel.DepthBias == 0.0) {
+       ctx->Pixel.DepthScale == 1.0F &&
+       ctx->Pixel.DepthBias == 0.0F) {
       /* no scale or bias and no clamping and no worry of overflow */
       const GLfloat depthMaxF = ctx->DrawBuffer->_DepthMaxF;
       for (i = 0; i < width; i++) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_depth.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_depth.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_depth.c	2014-04-29 19:36:58.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_depth.c	2015-09-16 14:36:10.000000000 +0000
@@ -419,8 +419,8 @@
    struct gl_framebuffer *fb = ctx->DrawBuffer;
    struct gl_renderbuffer *rb = fb->Attachment[BUFFER_DEPTH].Renderbuffer;
    GLubyte *zStart;
-   GLuint zMin = (GLuint) (ctx->Depth.BoundsMin * fb->_DepthMaxF + 0.5F);
-   GLuint zMax = (GLuint) (ctx->Depth.BoundsMax * fb->_DepthMaxF + 0.5F);
+   GLuint zMin = (GLuint)((double)ctx->Depth.BoundsMin * 0xffffffff);
+   GLuint zMax = (GLuint)((double)ctx->Depth.BoundsMax * 0xffffffff);
    GLubyte *mask = span->array->mask;
    const GLuint count = span->end;
    GLuint i;
@@ -444,6 +444,16 @@
       zBufferVals = (const GLuint *) zStart;
    }
    else {
+      /* Round the bounds to the precision of the zbuffer. */
+      if (rb->Format == MESA_FORMAT_Z_UNORM16) {
+         zMin = (zMin & 0xffff0000) | (zMin >> 16);
+         zMax = (zMax & 0xffff0000) | (zMax >> 16);
+      } else {
+         /* 24 bits */
+         zMin = (zMin & 0xffffff00) | (zMin >> 24);
+         zMax = (zMax & 0xffffff00) | (zMax >> 24);
+      }
+
       /* unpack Z values into a temporary array */
       if (span->arrayMask & SPAN_XY) {
          get_z32_values(ctx, rb, count, span->array->x, span->array->y,
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_drawpix.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_drawpix.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_drawpix.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_drawpix.c	2015-09-16 14:36:10.000000000 +0000
@@ -264,7 +264,7 @@
                      const struct gl_pixelstore_attrib *unpack,
                      const GLvoid *pixels )
 {
-   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F;
    const GLenum destType = GL_UNSIGNED_BYTE;
    GLint row;
    GLubyte *values;
@@ -309,8 +309,8 @@
                    const GLvoid *pixels )
 {
    const GLboolean scaleOrBias
-      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
-   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
+      = ctx->Pixel.DepthScale != 1.0f || ctx->Pixel.DepthBias != 0.0f;
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0f || ctx->Pixel.ZoomY != 1.0f;
    SWspan span;
 
    INIT_SPAN(span, GL_BITMAP);
@@ -415,7 +415,7 @@
                   const GLvoid *pixels )
 {
    const GLint imgX = x, imgY = y;
-   const GLboolean zoom = ctx->Pixel.ZoomX!=1.0 || ctx->Pixel.ZoomY!=1.0;
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F;
    GLbitfield transferOps = ctx->_ImageTransferState;
    SWspan span;
 
@@ -481,17 +481,17 @@
           */
          GLint swapSize = _mesa_sizeof_packed_type(type);
          if (swapSize == 2 || swapSize == 4) {
-            int components = _mesa_components_in_format(format);
-            int elementCount = width * height * components;
-            tempImage = malloc(elementCount * swapSize);
+            int imageStride = _mesa_image_image_stride(unpack, width, height, format, type);
+
+            tempImage = malloc(imageStride);
             if (!tempImage) {
                _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
                return;
             }
-            if (swapSize == 2)
-               _mesa_swap2_copy(tempImage, (GLushort *) pixels, elementCount);
-            else
-               _mesa_swap4_copy(tempImage, (GLuint *) pixels, elementCount);
+
+            _mesa_swap_bytes_2d_image(format, type, unpack,
+                                      width, height, tempImage, pixels);
+
             pixels = tempImage;
          }
       }
@@ -601,10 +601,10 @@
 {
    const GLint imgX = x, imgY = y;
    const GLboolean scaleOrBias
-      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+      = ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F;
    const GLuint stencilMask = ctx->Stencil.WriteMask[0];
    const GLenum stencilType = GL_UNSIGNED_BYTE;
-   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F;
    struct gl_renderbuffer *depthRb, *stencilRb;
    struct gl_pixelstore_attrib clippedUnpack = *unpack;
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_fragprog.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_fragprog.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_fragprog.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_fragprog.c	2015-09-16 14:36:10.000000000 +0000
@@ -243,9 +243,9 @@
             /* Store result depth/z */
             if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
                const GLfloat depth = machine->Outputs[FRAG_RESULT_DEPTH][2];
-               if (depth <= 0.0)
+               if (depth <= 0.0F)
                   span->array->z[i] = 0;
-               else if (depth >= 1.0)
+               else if (depth >= 1.0F)
                   span->array->z[i] = ctx->DrawBuffer->_DepthMax;
                else
                   span->array->z[i] =
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_lines.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_lines.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_lines.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_lines.c	2015-09-16 14:36:10.000000000 +0000
@@ -241,7 +241,7 @@
          USE(general_line);
       }
       else if (ctx->Depth.Test
-               || ctx->Line.Width != 1.0
+               || ctx->Line.Width != 1.0F
                || ctx->Line.StippleFlag) {
          /* no texture, but Z, fog, width>1, stipple, etc. */
 #if CHAN_BITS == 32
@@ -252,7 +252,7 @@
       }
       else {
          assert(!ctx->Depth.Test);
-         assert(ctx->Line.Width == 1.0);
+         assert(ctx->Line.Width == 1.0F);
          /* simple lines */
          USE(simple_no_z_rgba_line);
       }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_points.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_points.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_points.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_points.c	2015-09-16 14:36:10.000000000 +0000
@@ -208,9 +208,9 @@
       else {
          /* even size */
          /* 0.501 factor allows conformance to pass */
-         xmin = (GLint) (x + 0.501) - iRadius;
+         xmin = (GLint) (x + 0.501F) - iRadius;
          xmax = xmin + iSize - 1;
-         ymin = (GLint) (y + 0.501) - iRadius;
+         ymin = (GLint) (y + 0.501F) - iRadius;
          ymax = ymin + iSize - 1;
       }
 
@@ -423,9 +423,9 @@
       else {
          /* even size */
          /* 0.501 factor allows conformance to pass */
-         xmin = (GLint) (x + 0.501) - iRadius;
+         xmin = (GLint) (x + 0.501F) - iRadius;
          xmax = xmin + iSize - 1;
-         ymin = (GLint) (y + 0.501) - iRadius;
+         ymin = (GLint) (y + 0.501F) - iRadius;
          ymax = ymin + iSize - 1;
       }
 
@@ -552,7 +552,7 @@
       else if (ctx->Point.SmoothFlag) {
          swrast->Point = smooth_point;
       }
-      else if (size > 1.0 ||
+      else if (size > 1.0F ||
                ctx->Point._Attenuated ||
                ctx->VertexProgram.PointSizeEnabled) {
          swrast->Point = large_point;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_span.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_span.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_span.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_span.c	2015-09-16 14:36:10.000000000 +0000
@@ -506,7 +506,7 @@
             /* LOD is calculated directly in the ansiotropic filter, we can
              * skip the normal lambda function as the result is ignored.
              */
-            if (samp->MaxAnisotropy > 1.0 &&
+            if (samp->MaxAnisotropy > 1.0F &&
                 samp->MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
                needLambda = GL_FALSE;
             }
@@ -886,16 +886,16 @@
       GLubyte (*rgba)[4] = span->array->rgba8;
       for (i = 0; i < span->end; i++) {
          const GLfloat a = rgba[i][ACOMP] * coverage[i];
-         rgba[i][ACOMP] = (GLubyte) CLAMP(a, 0.0, 255.0);
-         assert(coverage[i] >= 0.0);
-         assert(coverage[i] <= 1.0);
+         rgba[i][ACOMP] = (GLubyte) CLAMP(a, 0.0F, 255.0F);
+         assert(coverage[i] >= 0.0F);
+         assert(coverage[i] <= 1.0F);
       }
    }
    else if (span->array->ChanType == GL_UNSIGNED_SHORT) {
       GLushort (*rgba)[4] = span->array->rgba16;
       for (i = 0; i < span->end; i++) {
          const GLfloat a = rgba[i][ACOMP] * coverage[i];
-         rgba[i][ACOMP] = (GLushort) CLAMP(a, 0.0, 65535.0);
+         rgba[i][ACOMP] = (GLushort) CLAMP(a, 0.0F, 65535.0F);
       }
    }
    else {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_texcombine.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_texcombine.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_texcombine.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_texcombine.c	2015-09-16 14:36:10.000000000 +0000
@@ -670,8 +670,8 @@
                }
             }
 
-            if (samp->MinLod != -1000.0 ||
-                samp->MaxLod != 1000.0) {
+            if (samp->MinLod != -1000.0F ||
+                samp->MaxLod != 1000.0F) {
                /* apply LOD clamping to lambda */
                const GLfloat min = samp->MinLod;
                const GLfloat max = samp->MaxLod;
@@ -682,7 +682,7 @@
                }
             }
          }
-         else if (samp->MaxAnisotropy > 1.0 &&
+         else if (samp->MaxAnisotropy > 1.0F &&
                   samp->MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
             /* sample_lambda_2d_aniso is beeing used as texture_sample_func,
              * it requires the current SWspan *span as an additional parameter.
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_texfetch.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_texfetch.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_texfetch.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_texfetch.c	2015-09-16 14:36:10.000000000 +0000
@@ -282,10 +282,26 @@
    },
 
    /* Packed signed/unsigned non-normalized integer formats */
+   FETCH_NULL(A8B8G8R8_UINT),
+   FETCH_NULL(A8R8G8B8_UINT),
+   FETCH_NULL(R8G8B8A8_UINT),
+   FETCH_NULL(B8G8R8A8_UINT),
    FETCH_NULL(B10G10R10A2_UINT),
    FETCH_NULL(R10G10B10A2_UINT),
    FETCH_NULL(A2B10G10R10_UINT),
    FETCH_NULL(A2R10G10B10_UINT),
+   FETCH_NULL(B5G6R5_UINT),
+   FETCH_NULL(R5G6B5_UINT),
+   FETCH_NULL(B2G3R3_UINT),
+   FETCH_NULL(R3G3B2_UINT),
+   FETCH_NULL(A4B4G4R4_UINT),
+   FETCH_NULL(R4G4B4A4_UINT),
+   FETCH_NULL(B4G4R4A4_UINT),
+   FETCH_NULL(A4R4G4B4_UINT),
+   FETCH_NULL(A1B5G5R5_UINT),
+   FETCH_NULL(B5G5R5A1_UINT),
+   FETCH_NULL(A1R5G5B5_UINT),
+   FETCH_NULL(R5G5B5A1_UINT),
 
    /* Array signed/unsigned non-normalized integer formats */
    FETCH_NULL(A_UINT8),
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_texfilter.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_texfilter.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_texfilter.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_texfilter.c	2015-09-16 14:36:10.000000000 +0000
@@ -1902,7 +1902,7 @@
    const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
    const GLboolean adjustLOD =
       (texUnit->LodBias + samp->LodBias != 0.0F)
-      || (samp->MinLod != -1000.0 || samp->MaxLod != 1000.0);
+      || (samp->MinLod != -1000.0F || samp->MaxLod != 1000.0F);
 
    GLuint i;
    
@@ -1973,8 +1973,8 @@
                      ctx->Const.MaxTextureLodBias);
             lod += bias;
 
-            if (samp->MinLod != -1000.0 ||
-                samp->MaxLod != 1000.0) {
+            if (samp->MinLod != -1000.0F ||
+                samp->MaxLod != 1000.0F) {
                /* apply LOD clamping to lambda */
                lod = CLAMP(lod, samp->MinLod, samp->MaxLod);
             }
@@ -3713,7 +3713,7 @@
                                     const struct gl_sampler_object *sampler)
 {
    if (!t || !_mesa_is_texture_complete(t, sampler)) {
-      return &null_sample_func;
+      return null_sample_func;
    }
    else {
       const GLboolean needLambda =
@@ -3722,32 +3722,32 @@
       switch (t->Target) {
       case GL_TEXTURE_1D:
          if (is_depth_texture(t)) {
-            return &sample_depth_texture;
+            return sample_depth_texture;
          }
          else if (needLambda) {
-            return &sample_lambda_1d;
+            return sample_lambda_1d;
          }
          else if (sampler->MinFilter == GL_LINEAR) {
-            return &sample_linear_1d;
+            return sample_linear_1d;
          }
          else {
             assert(sampler->MinFilter == GL_NEAREST);
-            return &sample_nearest_1d;
+            return sample_nearest_1d;
          }
       case GL_TEXTURE_2D:
          if (is_depth_texture(t)) {
-            return &sample_depth_texture;
+            return sample_depth_texture;
          }
          else if (needLambda) {
             /* Anisotropic filtering extension. Activated only if mipmaps are used */
-            if (sampler->MaxAnisotropy > 1.0 &&
+            if (sampler->MaxAnisotropy > 1.0F &&
                 sampler->MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
-               return &sample_lambda_2d_aniso;
+               return sample_lambda_2d_aniso;
             }
-            return &sample_lambda_2d;
+            return sample_lambda_2d;
          }
          else if (sampler->MinFilter == GL_LINEAR) {
-            return &sample_linear_2d;
+            return sample_linear_2d;
          }
          else {
             /* check for a few optimized cases */
@@ -3772,72 +3772,72 @@
          }
       case GL_TEXTURE_3D:
          if (needLambda) {
-            return &sample_lambda_3d;
+            return sample_lambda_3d;
          }
          else if (sampler->MinFilter == GL_LINEAR) {
-            return &sample_linear_3d;
+            return sample_linear_3d;
          }
          else {
             assert(sampler->MinFilter == GL_NEAREST);
-            return &sample_nearest_3d;
+            return sample_nearest_3d;
          }
       case GL_TEXTURE_CUBE_MAP:
          if (needLambda) {
-            return &sample_lambda_cube;
+            return sample_lambda_cube;
          }
          else if (sampler->MinFilter == GL_LINEAR) {
-            return &sample_linear_cube;
+            return sample_linear_cube;
          }
          else {
             assert(sampler->MinFilter == GL_NEAREST);
-            return &sample_nearest_cube;
+            return sample_nearest_cube;
          }
       case GL_TEXTURE_RECTANGLE_NV:
          if (is_depth_texture(t)) {
-            return &sample_depth_texture;
+            return sample_depth_texture;
          }
          else if (needLambda) {
-            return &sample_lambda_rect;
+            return sample_lambda_rect;
          }
          else if (sampler->MinFilter == GL_LINEAR) {
-            return &sample_linear_rect;
+            return sample_linear_rect;
          }
          else {
             assert(sampler->MinFilter == GL_NEAREST);
-            return &sample_nearest_rect;
+            return sample_nearest_rect;
          }
       case GL_TEXTURE_1D_ARRAY_EXT:
          if (is_depth_texture(t)) {
-            return &sample_depth_texture;
+            return sample_depth_texture;
          }
 	 else if (needLambda) {
-            return &sample_lambda_1d_array;
+            return sample_lambda_1d_array;
          }
          else if (sampler->MinFilter == GL_LINEAR) {
-            return &sample_linear_1d_array;
+            return sample_linear_1d_array;
          }
          else {
             assert(sampler->MinFilter == GL_NEAREST);
-            return &sample_nearest_1d_array;
+            return sample_nearest_1d_array;
          }
       case GL_TEXTURE_2D_ARRAY_EXT:
          if (is_depth_texture(t)) {
-            return &sample_depth_texture;
+            return sample_depth_texture;
          }
 	 else if (needLambda) {
-            return &sample_lambda_2d_array;
+            return sample_lambda_2d_array;
          }
          else if (sampler->MinFilter == GL_LINEAR) {
-            return &sample_linear_2d_array;
+            return sample_linear_2d_array;
          }
          else {
             assert(sampler->MinFilter == GL_NEAREST);
-            return &sample_nearest_2d_array;
+            return sample_nearest_2d_array;
          }
       default:
          _mesa_problem(ctx,
                        "invalid target in _swrast_choose_texture_sample_func");
-         return &null_sample_func;
+         return null_sample_func;
       }
    }
 }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_tritemp.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_tritemp.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_tritemp.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_tritemp.h	2015-09-16 14:36:10.000000000 +0000
@@ -242,7 +242,7 @@
       if (IS_INF_OR_NAN(area) || area == 0.0F)
          return;
 
-      if (area * bf * swrast->_BackfaceCullSign < 0.0)
+      if (area * bf * swrast->_BackfaceCullSign < 0.0F)
          return;
 
       oneOverArea = 1.0F / area;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_zoom.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_zoom.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast/s_zoom.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast/s_zoom.c	2015-09-16 14:36:10.000000000 +0000
@@ -114,7 +114,7 @@
    (zx - imageX) / zoomX = x - imageX;
    */
    GLint x;
-   if (zoomX < 0.0)
+   if (zoomX < 0.0F)
       zx++;
    x = imageX + (GLint) ((zx - imageX) / zoomX);
    return x;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast_setup/ss_tritmp.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast_setup/ss_tritmp.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/swrast_setup/ss_tritmp.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/swrast_setup/ss_tritmp.h	2015-09-16 14:36:10.000000000 +0000
@@ -58,7 +58,7 @@
 
       if (IND & (SS_TWOSIDE_BIT | SS_UNFILLED_BIT))
       {
-	 facing = (cc < 0.0) ^ ctx->Polygon._FrontBit;
+	 facing = (cc < 0.0F) ^ ctx->Polygon._FrontBit;
 
 	 if (IND & SS_UNFILLED_BIT)
 	    mode = facing ? ctx->Polygon.BackMode : ctx->Polygon.FrontMode;
@@ -138,7 +138,7 @@
           * so no MRD value is used here.
           */
 	 offset = ctx->Polygon.OffsetUnits;
-	 if (cc * cc > 1e-16) {
+	 if (cc * cc > 1e-16F) {
 	    const GLfloat ez = z[0] - z[2];
 	    const GLfloat fz = z[1] - z[2];
 	    const GLfloat oneOverArea = 1.0F / cc;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_context.c	2015-09-16 14:36:10.000000000 +0000
@@ -36,6 +36,7 @@
 #include "math/m_xform.h"
 #include "main/state.h"
 #include "main/viewport.h"
+#include "util/simple_list.h"
 
 #include "tnl.h"
 #include "t_context.h"
@@ -189,7 +190,7 @@
    }
 
    if (new_state & (_NEW_VIEWPORT | _NEW_BUFFERS)) {
-      double scale[3], translate[3];
+      float scale[3], translate[3];
       _mesa_get_viewport_xform(ctx, 0, scale, translate);
       _math_matrix_viewport(&tnl->_WindowMap, scale, translate,
                             ctx->DrawBuffer->_DepthMaxF);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_draw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_draw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_draw.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_draw.c	2015-09-16 14:36:10.000000000 +0000
@@ -257,7 +257,7 @@
    GLuint i;
 
    for (i = 0; i < count; i++) {
-      *bptr++ = ((GLfloat *)ptr)[0] == 1.0;
+      *bptr++ = ((GLfloat *)ptr)[0] == 1.0F;
       ptr += stride;
    }
 
@@ -425,6 +425,7 @@
 			 GLuint min_index,
 			 GLuint max_index,
 			 struct gl_transform_feedback_object *tfb_vertcount,
+                         unsigned stream,
 			 struct gl_buffer_object *indirect)
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -451,7 +452,7 @@
       printf("%s %d..%d\n", __func__, min_index, max_index);
       for (i = 0; i < nr_prims; i++)
 	 printf("prim %d: %s start %d count %d\n", i, 
-		_mesa_lookup_enum_by_nr(prim[i].mode),
+		_mesa_enum_to_string(prim[i].mode),
 		prim[i].start,
 		prim[i].count);
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/tnl.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/tnl.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/tnl.h	2014-07-15 16:33:02.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/tnl.h	2015-09-16 14:36:10.000000000 +0000
@@ -76,7 +76,7 @@
 struct _mesa_index_buffer;
 
 void
-_tnl_draw_prims( struct gl_context *ctx,
+_tnl_draw_prims(struct gl_context *ctx,
 		     const struct _mesa_prim *prim,
 		     GLuint nr_prims,
 		     const struct _mesa_index_buffer *ib,
@@ -84,6 +84,7 @@
 		     GLuint min_index,
 		     GLuint max_index,
 		     struct gl_transform_feedback_object *tfb_vertcount,
+                     unsigned stream,
 		     struct gl_buffer_object *indirect );
 
 extern void
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_rasterpos.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_rasterpos.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_rasterpos.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_rasterpos.c	2015-09-16 14:36:10.000000000 +0000
@@ -148,7 +148,7 @@
 	 SUB_3V(VP, light->_Position, vertex);
          /* d = length(VP) */
 	 d = (GLfloat) LEN_3FV( VP );
-	 if (d > 1.0e-6) {
+	 if (d > 1.0e-6F) {
             /* normalize VP */
 	    GLfloat invd = 1.0F / d;
 	    SELF_SCALE_SCALAR_3V(VP, invd);
@@ -172,7 +172,7 @@
 	 }
       }
 
-      if (attenuation < 1e-3)
+      if (attenuation < 1e-3F)
 	 continue;
 
       n_dot_VP = DOT3( normal, VP );
@@ -219,7 +219,7 @@
 	    shine = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS][0];
 	    spec_coef = powf(n_dot_h, shine);
 
-	    if (spec_coef > 1.0e-10) {
+	    if (spec_coef > 1.0e-10F) {
                if (ctx->Light.Model.ColorControl==GL_SEPARATE_SPECULAR_COLOR) {
                   ACC_SCALE_SCALAR_3V( specularContrib, spec_coef,
                                        light->_MatSpecular[0]);
@@ -378,7 +378,7 @@
       GLfloat eye[4], clip[4], ndc[3], d;
       GLfloat *norm, eyenorm[3];
       GLfloat *objnorm = ctx->Current.Attrib[VERT_ATTRIB_NORMAL];
-      double scale[3], translate[3];
+      float scale[3], translate[3];
 
       /* apply modelview matrix:  eye = MV * obj */
       TRANSFORM_POINT( eye, ctx->ModelviewMatrixStack.Top->m, vObj );
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vb_fog.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vb_fog.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vb_fog.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vb_fog.c	2015-09-16 14:36:10.000000000 +0000
@@ -45,8 +45,8 @@
 #define FOG_STAGE_DATA(stage) ((struct fog_stage_data *)stage->privatePtr)
 
 #define FOG_EXP_TABLE_SIZE 256
-#define FOG_MAX (10.0)
-#define EXP_FOG_MAX .0006595
+#define FOG_MAX (10.0F)
+#define EXP_FOG_MAX .0006595F
 #define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE)
 static GLfloat exp_table[FOG_EXP_TABLE_SIZE];
 static GLfloat inited = 0;
@@ -54,7 +54,7 @@
 #if 1
 #define NEG_EXP( result, narg )						\
 do {									\
-   GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR));			\
+   GLfloat f = (GLfloat) (narg * (1.0F / FOG_INCR));			\
    GLint k = (GLint) f;							\
    if (k > FOG_EXP_TABLE_SIZE-2) 					\
       result = (GLfloat) EXP_FOG_MAX;					\
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vb_light.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vb_light.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vb_light.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vb_light.c	2015-09-16 14:36:10.000000000 +0000
@@ -137,23 +137,23 @@
 	    break;
 
       m = s->tab;
-      m[0] = 0.0;
-      if (shininess == 0.0) {
+      m[0] = 0.0F;
+      if (shininess == 0.0F) {
 	 for (j = 1 ; j <= SHINE_TABLE_SIZE ; j++)
-	    m[j] = 1.0;
+	    m[j] = 1.0F;
       }
       else {
 	 for (j = 1 ; j < SHINE_TABLE_SIZE ; j++) {
-            GLdouble t, x = j / (GLfloat) (SHINE_TABLE_SIZE - 1);
-            if (x < 0.005) /* underflow check */
-               x = 0.005;
-            t = pow(x, shininess);
-	    if (t > 1e-20)
-	       m[j] = (GLfloat) t;
+            GLfloat t, x = j / (GLfloat) (SHINE_TABLE_SIZE - 1);
+            if (x < 0.005F) /* underflow check */
+               x = 0.005F;
+            t = powf(x, shininess);
+	    if (t > 1e-20F)
+	       m[j] = t;
 	    else
-	       m[j] = 0.0;
+	       m[j] = 0.0F;
 	 }
-	 m[SHINE_TABLE_SIZE] = 1.0;
+	 m[SHINE_TABLE_SIZE] = 1.0F;
       }
 
       s->shininess = shininess;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vb_lighttmp.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vb_lighttmp.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vb_lighttmp.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vb_lighttmp.h	2015-09-16 14:36:10.000000000 +0000
@@ -112,7 +112,7 @@
 	 GLint side;
 	 GLfloat contrib[3];
 	 GLfloat attenuation;
-	 GLfloat VP[3];  /* unit vector from vertex to light */
+	 GLfloat VP[3];          /* unit vector from vertex to light */
 	 GLfloat n_dot_VP;       /* n dot VP */
 	 GLfloat *h;
 
@@ -129,7 +129,7 @@
 
 	    d = (GLfloat) LEN_3FV( VP );
 
-	    if (d > 1e-6) {
+	    if (d > 1e-6F) {
 	       GLfloat invd = 1.0F / d;
 	       SELF_SCALE_SCALAR_3V(VP, invd);
 	    }
@@ -152,7 +152,7 @@
 	    }
 	 }
 
-	 if (attenuation < 1e-3)
+	 if (attenuation < 1e-3F)
 	    continue;		/* this light makes no contribution */
 
 	 /* Compute dot product or normal and vector from V to light pos */
@@ -204,7 +204,7 @@
 
 	 if (n_dot_h > 0.0F) {
 	    GLfloat spec_coef = lookup_shininess(ctx, side, n_dot_h);
-	    if (spec_coef > 1.0e-10) {
+	    if (spec_coef > 1.0e-10F) {
 	       spec_coef *= attenuation;
 	       ACC_SCALE_SCALAR_3V( spec[side], spec_coef,
 				    light->_MatSpecular[side]);
@@ -283,12 +283,11 @@
 
       /* Add contribution from each enabled light source */
       foreach (light, &ctx->Light.EnabledList) {
-
 	 GLfloat n_dot_h;
 	 GLfloat correction;
 	 GLint side;
 	 GLfloat contrib[3];
-	 GLfloat attenuation = 1.0;
+	 GLfloat attenuation;
 	 GLfloat VP[3];          /* unit vector from vertex to light */
 	 GLfloat n_dot_VP;       /* n dot VP */
 	 GLfloat *h;
@@ -302,12 +301,11 @@
 	 else {
 	    GLfloat d;     /* distance from vertex to light */
 
-
 	    SUB_3V(VP, light->_Position, vertex);
 
 	    d = (GLfloat) LEN_3FV( VP );
 
-	    if ( d > 1e-6) {
+	    if (d > 1e-6F) {
 	       GLfloat invd = 1.0F / d;
 	       SELF_SCALE_SCALAR_3V(VP, invd);
 	    }
@@ -330,7 +328,7 @@
 	    }
 	 }
 
-	 if (attenuation < 1e-3)
+	 if (attenuation < 1e-3F)
 	    continue;		/* this light makes no contribution */
 
 	 /* Compute dot product or normal and vector from V to light pos */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vb_normals.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vb_normals.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vb_normals.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vb_normals.c	2015-09-16 14:36:10.000000000 +0000
@@ -114,7 +114,7 @@
 	 store->NormalTransform = _mesa_normal_tab[transform | NORM_NORMALIZE];
       }
       else if (ctx->Transform.RescaleNormals &&
-               ctx->_ModelViewInvScale != 1.0) {
+               ctx->_ModelViewInvScale != 1.0F) {
 	 store->NormalTransform = _mesa_normal_tab[transform | NORM_RESCALE];
       }
       else {
@@ -131,7 +131,7 @@
 	 store->NormalTransform = _mesa_normal_tab[NORM_NORMALIZE];
       }
       else if (!ctx->Transform.RescaleNormals &&
-	       ctx->_ModelViewInvScale != 1.0) {
+	       ctx->_ModelViewInvScale != 1.0F) {
 	 store->NormalTransform = _mesa_normal_tab[NORM_RESCALE];
       }
       else {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vb_render.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vb_render.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vb_render.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vb_render.c	2015-09-16 14:36:10.000000000 +0000
@@ -315,7 +315,7 @@
 
 	 if (MESA_VERBOSE & VERBOSE_PRIMS) 
 	    _mesa_debug(NULL, "MESA prim %s %d..%d\n", 
-			_mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK), 
+			_mesa_enum_to_string(prim & PRIM_MODE_MASK), 
 			start, start+length);
 
 	 if (length)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vertex_generic.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vertex_generic.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vertex_generic.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vertex_generic.c	2015-09-16 14:36:10.000000000 +0000
@@ -1026,7 +1026,7 @@
 
    if (tnl->NeedNdcCoords) {
       const GLfloat *dstclip = VB->ClipPtr->data[edst];
-      if (dstclip[3] != 0.0) {
+      if (dstclip[3] != 0.0f) {
 	 const GLfloat w = 1.0f / dstclip[3];
 	 GLfloat pos[4];
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vertex_sse.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vertex_sse.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl/t_vertex_sse.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl/t_vertex_sse.c	2015-09-16 14:36:10.000000000 +0000
@@ -592,7 +592,7 @@
 	    break;
 	 case GL_UNSIGNED_SHORT:
 	 default:
-	    printf("unknown CHAN_TYPE %s\n", _mesa_lookup_enum_by_nr(CHAN_TYPE));
+	    printf("unknown CHAN_TYPE %s\n", _mesa_enum_to_string(CHAN_TYPE));
 	    return GL_FALSE;
 	 }
 	 break;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl_dd/t_dd_dmatmp.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl_dd/t_dd_dmatmp.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl_dd/t_dd_dmatmp.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl_dd/t_dd_dmatmp.h	2015-09-16 14:36:10.000000000 +0000
@@ -1256,7 +1256,7 @@
       }
       
       if (!ok) {
-/* 	 fprintf(stderr, "not ok %s\n", _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK)); */
+/* 	 fprintf(stderr, "not ok %s\n", _mesa_enum_to_string(prim & PRIM_MODE_MASK)); */
 	 return GL_FALSE;
       }
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl_dd/t_dd_unfilled.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl_dd/t_dd_unfilled.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/tnl_dd/t_dd_unfilled.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/tnl_dd/t_dd_unfilled.h	2015-09-16 14:36:10.000000000 +0000
@@ -60,7 +60,7 @@
    }
 
 /*     fprintf(stderr, "%s %s %d %d %d\n", __func__, */
-/*  	   _mesa_lookup_enum_by_nr( mode ), */
+/*  	   _mesa_enum_to_string( mode ), */
 /*  	   ef[e0], ef[e1], ef[e2]); */
 
    if (mode == GL_POINT) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_context.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_context.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_context.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_context.c	2015-09-16 14:36:10.000000000 +0000
@@ -37,9 +37,9 @@
 
 static GLuint check_size( const GLfloat *attr )
 {
-   if (attr[3] != 1.0) return 4;
-   if (attr[2] != 0.0) return 3;
-   if (attr[1] != 0.0) return 2;
+   if (attr[3] != 1.0F) return 4;
+   if (attr[2] != 0.0F) return 3;
+   if (attr[1] != 0.0F) return 2;
    return 1;		
 }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_exec_array.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_exec_array.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_exec_array.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_exec_array.c	2015-09-16 14:36:10.000000000 +0000
@@ -255,7 +255,7 @@
             GLint k;
             for (k = 0; k < array->Size; k++) {
                if (IS_INF_OR_NAN(f[k]) ||
-                   f[k] >= 1.0e20 || f[k] <= -1.0e10) {
+                   f[k] >= 1.0e20F || f[k] <= -1.0e10F) {
                   printf("Bad array data:\n");
                   printf("  Element[%u].%u = %f\n", j, k, f[k]);
                   printf("  Array %u at %p\n", attrib, (void* ) array);
@@ -263,7 +263,7 @@
 			 array->Type, array->Size, array->Stride);
                   printf("  Address/offset %p in Buffer Object %u\n",
 			 array->Ptr, array->BufferObj->Name);
-                  f[k] = 1.0; /* XXX replace the bad value! */
+                  f[k] = 1.0F; /* XXX replace the bad value! */
                }
                /*assert(!IS_INF_OR_NAN(f[k]));*/
             }
@@ -633,7 +633,7 @@
          /* draw one or two prims */
          check_buffers_are_unmapped(exec->array.inputs);
          vbo->draw_prims(ctx, prim, primCount, NULL,
-                         GL_TRUE, start, start + count - 1, NULL, NULL);
+                         GL_TRUE, start, start + count - 1, NULL, 0, NULL);
       }
    }
    else {
@@ -644,7 +644,7 @@
       check_buffers_are_unmapped(exec->array.inputs);
       vbo->draw_prims(ctx, prim, 1, NULL,
                       GL_TRUE, start, start + count - 1,
-                      NULL, NULL);
+                      NULL, 0, NULL);
    }
 
    if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH) {
@@ -786,7 +786,7 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawArrays(%s, %d, %d)\n",
-                  _mesa_lookup_enum_by_nr(mode), start, count);
+                  _mesa_enum_to_string(mode), start, count);
 
    if (!_mesa_validate_DrawArrays(ctx, mode, count))
       return;
@@ -813,7 +813,7 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawArraysInstanced(%s, %d, %d, %d)\n",
-                  _mesa_lookup_enum_by_nr(mode), start, count, numInstances);
+                  _mesa_enum_to_string(mode), start, count, numInstances);
 
    if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, numInstances))
       return;
@@ -839,7 +839,7 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawArraysInstancedBaseInstance(%s, %d, %d, %d, %d)\n",
-                  _mesa_lookup_enum_by_nr(mode), first, count,
+                  _mesa_enum_to_string(mode), first, count,
                   numInstances, baseInstance);
 
    if (!_mesa_validate_DrawArraysInstanced(ctx, mode, first, count,
@@ -990,7 +990,7 @@
 
    check_buffers_are_unmapped(exec->array.inputs);
    vbo->draw_prims(ctx, prim, 1, &ib,
-                   index_bounds_valid, start, end, NULL, NULL);
+                   index_bounds_valid, start, end, NULL, 0, NULL);
 
    if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH) {
       _mesa_flush(ctx);
@@ -1021,8 +1021,8 @@
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx,
                 "glDrawRangeElementsBaseVertex(%s, %u, %u, %d, %s, %p, %d)\n",
-                _mesa_lookup_enum_by_nr(mode), start, end, count,
-                _mesa_lookup_enum_by_nr(type), indices, basevertex);
+                _mesa_enum_to_string(mode), start, end, count,
+                _mesa_enum_to_string(type), indices, basevertex);
 
    if (!_mesa_validate_DrawRangeElements(ctx, mode, start, end, count,
                                          type, indices))
@@ -1099,8 +1099,8 @@
       GET_CURRENT_CONTEXT(ctx);
       _mesa_debug(ctx,
                   "glDrawRangeElements(%s, %u, %u, %d, %s, %p)\n",
-                  _mesa_lookup_enum_by_nr(mode), start, end, count,
-                  _mesa_lookup_enum_by_nr(type), indices);
+                  _mesa_enum_to_string(mode), start, end, count,
+                  _mesa_enum_to_string(type), indices);
    }
 
    vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type,
@@ -1119,8 +1119,8 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawElements(%s, %u, %s, %p)\n",
-                  _mesa_lookup_enum_by_nr(mode), count,
-                  _mesa_lookup_enum_by_nr(type), indices);
+                  _mesa_enum_to_string(mode), count,
+                  _mesa_enum_to_string(type), indices);
 
    if (!_mesa_validate_DrawElements(ctx, mode, count, type, indices))
       return;
@@ -1141,8 +1141,8 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawElementsBaseVertex(%s, %d, %s, %p, %d)\n",
-                  _mesa_lookup_enum_by_nr(mode), count,
-                  _mesa_lookup_enum_by_nr(type), indices, basevertex);
+                  _mesa_enum_to_string(mode), count,
+                  _mesa_enum_to_string(type), indices, basevertex);
 
    if (!_mesa_validate_DrawElements(ctx, mode, count, type, indices))
       return;
@@ -1163,8 +1163,8 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawElementsInstanced(%s, %d, %s, %p, %d)\n",
-                  _mesa_lookup_enum_by_nr(mode), count,
-                  _mesa_lookup_enum_by_nr(type), indices, numInstances);
+                  _mesa_enum_to_string(mode), count,
+                  _mesa_enum_to_string(type), indices, numInstances);
 
    if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices,
                                              numInstances))
@@ -1187,8 +1187,8 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawElementsInstancedBaseVertex(%s, %d, %s, %p, %d; %d)\n",
-                  _mesa_lookup_enum_by_nr(mode), count,
-                  _mesa_lookup_enum_by_nr(type), indices,
+                  _mesa_enum_to_string(mode), count,
+                  _mesa_enum_to_string(type), indices,
                   numInstances, basevertex);
 
    if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices,
@@ -1212,8 +1212,8 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawElementsInstancedBaseInstance(%s, %d, %s, %p, %d, %d)\n",
-                  _mesa_lookup_enum_by_nr(mode), count,
-                  _mesa_lookup_enum_by_nr(type), indices,
+                  _mesa_enum_to_string(mode), count,
+                  _mesa_enum_to_string(type), indices,
                   numInstances, baseInstance);
 
    if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices,
@@ -1238,8 +1238,8 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawElementsInstancedBaseVertexBaseInstance(%s, %d, %s, %p, %d, %d, %d)\n",
-                  _mesa_lookup_enum_by_nr(mode), count,
-                  _mesa_lookup_enum_by_nr(type), indices,
+                  _mesa_enum_to_string(mode), count,
+                  _mesa_enum_to_string(type), indices,
                   numInstances, basevertex, baseInstance);
 
    if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices,
@@ -1350,7 +1350,7 @@
 
       check_buffers_are_unmapped(exec->array.inputs);
       vbo->draw_prims(ctx, prim, primcount, &ib,
-                      false, ~0, ~0, NULL, NULL);
+                      false, ~0, ~0, NULL, 0, NULL);
    } else {
       /* render one prim at a time */
       for (i = 0; i < primcount; i++) {
@@ -1379,7 +1379,7 @@
 
          check_buffers_are_unmapped(exec->array.inputs);
          vbo->draw_prims(ctx, prim, 1, &ib,
-                         false, ~0, ~0, NULL, NULL);
+                         false, ~0, ~0, NULL, 0, NULL);
       }
    }
 
@@ -1464,7 +1464,7 @@
 
    check_buffers_are_unmapped(exec->array.inputs);
    vbo->draw_prims(ctx, prim, 1, NULL,
-                   GL_TRUE, 0, 0, obj, NULL);
+                   GL_TRUE, 0, 0, obj, stream, NULL);
 
    if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH) {
       _mesa_flush(ctx);
@@ -1488,7 +1488,7 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawTransformFeedback(%s, %d)\n",
-                  _mesa_lookup_enum_by_nr(mode), name);
+                  _mesa_enum_to_string(mode), name);
 
    vbo_draw_transform_feedback(ctx, mode, obj, 0, 1);
 }
@@ -1502,7 +1502,7 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawTransformFeedbackStream(%s, %u, %u)\n",
-                  _mesa_lookup_enum_by_nr(mode), name, stream);
+                  _mesa_enum_to_string(mode), name, stream);
 
    vbo_draw_transform_feedback(ctx, mode, obj, stream, 1);
 }
@@ -1517,7 +1517,7 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawTransformFeedbackInstanced(%s, %d)\n",
-                  _mesa_lookup_enum_by_nr(mode), name);
+                  _mesa_enum_to_string(mode), name);
 
    vbo_draw_transform_feedback(ctx, mode, obj, 0, primcount);
 }
@@ -1533,7 +1533,7 @@
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawTransformFeedbackStreamInstanced"
                   "(%s, %u, %u, %i)\n",
-                  _mesa_lookup_enum_by_nr(mode), name, stream, primcount);
+                  _mesa_enum_to_string(mode), name, stream, primcount);
 
    vbo_draw_transform_feedback(ctx, mode, obj, stream, primcount);
 }
@@ -1563,7 +1563,7 @@
    check_buffers_are_unmapped(exec->array.inputs);
    vbo->draw_prims(ctx, prim, 1,
                    NULL, GL_TRUE, 0, ~0,
-                   NULL,
+                   NULL, 0,
                    ctx->DrawIndirectBuffer);
 
    if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH)
@@ -1603,7 +1603,7 @@
    check_buffers_are_unmapped(exec->array.inputs);
    vbo->draw_prims(ctx, prim, primcount,
                    NULL, GL_TRUE, 0, ~0,
-                   NULL,
+                   NULL, 0,
                    ctx->DrawIndirectBuffer);
 
    free(prim);
@@ -1640,7 +1640,7 @@
    check_buffers_are_unmapped(exec->array.inputs);
    vbo->draw_prims(ctx, prim, 1,
                    &ib, GL_TRUE, 0, ~0,
-                   NULL,
+                   NULL, 0,
                    ctx->DrawIndirectBuffer);
 
    if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH)
@@ -1689,7 +1689,7 @@
    check_buffers_are_unmapped(exec->array.inputs);
    vbo->draw_prims(ctx, prim, primcount,
                    &ib, GL_TRUE, 0, ~0,
-                   NULL,
+                   NULL, 0,
                    ctx->DrawIndirectBuffer);
 
    free(prim);
@@ -1709,7 +1709,7 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawArraysIndirect(%s, %p)\n",
-                  _mesa_lookup_enum_by_nr(mode), indirect);
+                  _mesa_enum_to_string(mode), indirect);
 
    if (!_mesa_validate_DrawArraysIndirect(ctx, mode, indirect))
       return;
@@ -1725,8 +1725,8 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glDrawElementsIndirect(%s, %s, %p)\n",
-                  _mesa_lookup_enum_by_nr(mode),
-                  _mesa_lookup_enum_by_nr(type), indirect);
+                  _mesa_enum_to_string(mode),
+                  _mesa_enum_to_string(type), indirect);
 
    if (!_mesa_validate_DrawElementsIndirect(ctx, mode, type, indirect))
       return;
@@ -1743,7 +1743,7 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glMultiDrawArraysIndirect(%s, %p, %i, %i)\n",
-                  _mesa_lookup_enum_by_nr(mode), indirect, primcount, stride);
+                  _mesa_enum_to_string(mode), indirect, primcount, stride);
 
    /* If <stride> is zero, the array elements are treated as tightly packed. */
    if (stride == 0)
@@ -1768,8 +1768,8 @@
 
    if (MESA_VERBOSE & VERBOSE_DRAW)
       _mesa_debug(ctx, "glMultiDrawElementsIndirect(%s, %s, %p, %i, %i)\n",
-                  _mesa_lookup_enum_by_nr(mode),
-                  _mesa_lookup_enum_by_nr(type), indirect, primcount, stride);
+                  _mesa_enum_to_string(mode),
+                  _mesa_enum_to_string(type), indirect, primcount, stride);
 
    /* If <stride> is zero, the array elements are treated as tightly packed. */
    if (stride == 0)
@@ -1817,9 +1817,12 @@
       SET_DrawElementsInstancedBaseVertexBaseInstance(exec, vbo_exec_DrawElementsInstancedBaseVertexBaseInstance);
    }
 
-   if (ctx->API == API_OPENGL_CORE) {
+   if (ctx->API == API_OPENGL_CORE || _mesa_is_gles31(ctx)) {
       SET_DrawArraysIndirect(exec, vbo_exec_DrawArraysIndirect);
       SET_DrawElementsIndirect(exec, vbo_exec_DrawElementsIndirect);
+   }
+
+   if (ctx->API == API_OPENGL_CORE) {
       SET_MultiDrawArraysIndirect(exec, vbo_exec_MultiDrawArraysIndirect);
       SET_MultiDrawElementsIndirect(exec, vbo_exec_MultiDrawElementsIndirect);
    }
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_exec_draw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_exec_draw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_exec_draw.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_exec_draw.c	2015-09-16 14:36:10.000000000 +0000
@@ -412,7 +412,7 @@
 				       GL_TRUE,
 				       0,
 				       exec->vtx.vert_count - 1,
-				       NULL, NULL);
+				       NULL, 0, NULL);
 
 	 /* If using a real VBO, get new storage -- unless asked not to.
           */
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo.h mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo.h	2015-09-16 14:36:10.000000000 +0000
@@ -97,7 +97,8 @@
 			       GLuint min_index,
 			       GLuint max_index,
 			       struct gl_transform_feedback_object *tfb_vertcount,
-			       struct gl_buffer_object *indirect );
+                               unsigned stream,
+			       struct gl_buffer_object *indirect);
 
 
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_primitive_restart.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_primitive_restart.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_primitive_restart.c	2015-07-16 07:06:16.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_primitive_restart.c	2015-09-16 14:36:10.000000000 +0000
@@ -251,11 +251,11 @@
                 (temp_prim.count == sub_prim->count)) {
                draw_prims_func(ctx, &temp_prim, 1, ib,
                                GL_TRUE, sub_prim->min_index, sub_prim->max_index,
-                               NULL, NULL);
+                               NULL, 0, NULL);
             } else {
                draw_prims_func(ctx, &temp_prim, 1, ib,
                                GL_FALSE, -1, -1,
-                               NULL, NULL);
+                               NULL, 0, NULL);
             }
          }
          if (sub_end_index >= end_index) {
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_rebase.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_rebase.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_rebase.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_rebase.c	2015-09-16 14:36:10.000000000 +0000
@@ -258,7 +258,7 @@
 	 GL_TRUE,
 	 0, 
 	 max_index - min_index,
-	 NULL, NULL );
+	 NULL, 0, NULL );
 
    ctx->Array._DrawArrays = saved_arrays;
    ctx->NewDriverState |= ctx->DriverFlags.NewArray;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_save_draw.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_save_draw.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_save_draw.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_save_draw.c	2015-09-16 14:36:10.000000000 +0000
@@ -314,7 +314,7 @@
                                       GL_TRUE,
                                       0,    /* Node is a VBO, so this is ok */
                                       node->count - 1,
-                                      NULL, NULL);
+                                      NULL, 0, NULL);
       }
    }
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_split_copy.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_split_copy.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_split_copy.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_split_copy.c	2015-09-16 14:36:10.000000000 +0000
@@ -203,7 +203,7 @@
 	       GL_TRUE,
 	       0,
 	       copy->dstbuf_nr - 1,
-	       NULL, NULL );
+	       NULL, 0, NULL );
 
    ctx->Array._DrawArrays = saved_arrays;
    ctx->NewDriverState |= ctx->DriverFlags.NewArray;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_split_inplace.c mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_split_inplace.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/mesa/vbo/vbo_split_inplace.c	2014-04-20 07:52:19.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/mesa/vbo/vbo_split_inplace.c	2015-09-16 14:36:10.000000000 +0000
@@ -94,7 +94,7 @@
 	       !split->ib,
 	       split->min_index,
 	       split->max_index,
-	       NULL, NULL);
+	       NULL, 0, NULL);
 
    ctx->Array._DrawArrays = saved_arrays;
    ctx->NewDriverState |= ctx->DriverFlags.NewArray;
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/SConscript	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/SConscript	2015-09-16 14:36:08.000000000 +0000
@@ -31,13 +31,8 @@
 if not env['embedded']:
     if env['platform'] not in ('cygwin', 'darwin', 'freebsd', 'haiku', 'windows'):
         SConscript('glx/SConscript')
-    if env['platform'] not in ['darwin', 'haiku', 'sunos', 'windows']:
-        if env['dri']:
-            SConscript('egl/drivers/dri2/SConscript')
-        SConscript('egl/main/SConscript')
     if env['platform'] == 'haiku':
-        SConscript('egl/drivers/haiku/SConscript')
-        SConscript('egl/main/SConscript')
+        SConscript('egl/SConscript')
 
     if env['gles']:
         SConscript('mapi/shared-glapi/SConscript')
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/util/bitset.h mesa-11.0.0~git20150916+11.0.c4bae579/src/util/bitset.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/util/bitset.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/util/bitset.h	2015-09-16 14:36:10.000000000 +0000
@@ -96,4 +96,40 @@
 
 #define BITSET_FFS(x) __bitset_ffs(x, ARRAY_SIZE(x))
 
+static inline unsigned
+__bitset_next_set(unsigned i, BITSET_WORD *tmp,
+                  BITSET_WORD *set, unsigned size)
+{
+   unsigned bit, word;
+
+   /* NOTE: The initial conditions for this function are very specific.  At
+    * the start of the loop, the tmp variable must be set to *set and the
+    * initial i value set to 0.  This way, if there is a bit set in the first
+    * word, we ignore the i-value and just grab that bit (so 0 is ok, even
+    * though 0 may be returned).  If the first word is 0, then the value of
+    * `word` will be 0 and we will go on to look at the second word.
+    */
+   word = BITSET_BITWORD(i);
+   while (*tmp == 0) {
+      word++;
+
+      if (word >= BITSET_WORDS(size))
+         return size;
+
+      *tmp = set[word];
+   }
+
+   /* Find the next set bit in the non-zero word */
+   bit = ffs(*tmp) - 1;
+
+   /* Unset the bit */
+   *tmp &= ~(1ull << bit);
+
+   return word * BITSET_WORDBITS + bit;
+}
+
+#define BITSET_FOREACH_SET(__i, __tmp, __set, __size) \
+   for (__tmp = *(__set), __i = 0; \
+        (__i = __bitset_next_set(__i, &__tmp, __set, __size)) < __size;)
+
 #endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/util/list.h mesa-11.0.0~git20150916+11.0.c4bae579/src/util/list.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/util/list.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/util/list.h	2015-09-16 14:36:10.000000000 +0000
@@ -140,6 +140,13 @@
 	     - ((char *)&(sample)->member - (char *)(sample)))
 #endif
 
+#define list_first_entry(ptr, type, member) \
+        LIST_ENTRY(type, (ptr)->next, member)
+
+#define list_last_entry(ptr, type, member) \
+        LIST_ENTRY(type, (ptr)->prev, member)
+
+
 #define LIST_FOR_EACH_ENTRY(pos, head, member)				\
    for (pos = NULL, pos = container_of((head)->next, pos, member);	\
 	&pos->member != (head);						\
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/util/macros.h mesa-11.0.0~git20150916+11.0.c4bae579/src/util/macros.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/util/macros.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/util/macros.h	2015-09-16 14:36:10.000000000 +0000
@@ -103,6 +103,17 @@
 #define assume(expr) assert(expr)
 #endif
 
+/* Attribute const is used for functions that have no effects other than their
+ * return value, and only rely on the argument values to compute the return
+ * value.  As a result, calls to it can be CSEed.  Note that using memory
+ * pointed to by the arguments is not allowed for const functions.
+ */
+#ifdef HAVE_FUNC_ATTRIBUTE_CONST
+#define ATTRIBUTE_CONST __attribute__((__const__))
+#else
+#define ATTRIBUTE_CONST
+#endif
+
 #ifdef HAVE_FUNC_ATTRIBUTE_FLATTEN
 #define FLATTEN __attribute__((__flatten__))
 #else
@@ -130,6 +141,15 @@
 #define PACKED
 #endif
 
+/* Attribute pure is used for functions that have no effects other than their
+ * return value.  As a result, calls to it can be dead code eliminated.
+ */
+#ifdef HAVE_FUNC_ATTRIBUTE_PURE
+#define ATTRIBUTE_PURE __attribute__((__pure__))
+#else
+#define ATTRIBUTE_PURE
+#endif
+
 #ifdef __cplusplus
 /**
  * Macro function that evaluates to true if T is a trivially
@@ -182,6 +202,12 @@
 #define UNUSED
 #endif
 
+#ifdef HAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT
+#define MUST_CHECK __attribute__((warn_unused_result))
+#else
+#define MUST_CHECK
+#endif
+
 /** Compute ceiling of integer quotient of A divided by B. */
 #define DIV_ROUND_UP( A, B )  ( (A) % (B) == 0 ? (A)/(B) : (A)/(B)+1 )
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/util/Makefile.am mesa-11.0.0~git20150916+11.0.c4bae579/src/util/Makefile.am
--- mesa-10.6.5~git20150829+10.6.fa342251/src/util/Makefile.am	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/util/Makefile.am	2015-09-16 14:36:10.000000000 +0000
@@ -52,5 +52,7 @@
 CLEANFILES = $(BUILT_SOURCES)
 EXTRA_DIST = format_srgb.py SConscript
 
-format_srgb.c: $(srcdir)/format_srgb.py
-	$(AM_V_GEN) $(PYTHON2) $< > $@
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
+format_srgb.c: format_srgb.py
+	$(PYTHON_GEN) $(srcdir)/format_srgb.py > $@
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/util/Makefile.sources mesa-11.0.0~git20150916+11.0.c4bae579/src/util/Makefile.sources
--- mesa-10.6.5~git20150829+10.6.fa342251/src/util/Makefile.sources	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/util/Makefile.sources	2015-09-16 14:36:10.000000000 +0000
@@ -15,7 +15,7 @@
 	set.c \
 	set.h \
 	simple_list.h \
-	strtod.cpp \
+	strtod.c \
 	strtod.h \
 	texcompress_rgtc_tmp.h \
 	u_atomic.h
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/util/register_allocate.c mesa-11.0.0~git20150916+11.0.c4bae579/src/util/register_allocate.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/util/register_allocate.c	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/util/register_allocate.c	2015-09-16 14:36:10.000000000 +0000
@@ -183,7 +183,7 @@
  * using ralloc_free().
  */
 struct ra_regs *
-ra_alloc_reg_set(void *mem_ctx, unsigned int count)
+ra_alloc_reg_set(void *mem_ctx, unsigned int count, bool need_conflict_lists)
 {
    unsigned int i;
    struct ra_regs *regs;
@@ -197,9 +197,15 @@
                                               BITSET_WORDS(count));
       BITSET_SET(regs->regs[i].conflicts, i);
 
-      regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4);
-      regs->regs[i].conflict_list_size = 4;
-      regs->regs[i].conflict_list[0] = i;
+      if (need_conflict_lists) {
+         regs->regs[i].conflict_list = ralloc_array(regs->regs,
+                                                    unsigned int, 4);
+         regs->regs[i].conflict_list_size = 4;
+         regs->regs[i].conflict_list[0] = i;
+      } else {
+         regs->regs[i].conflict_list = NULL;
+         regs->regs[i].conflict_list_size = 0;
+      }
       regs->regs[i].num_conflicts = 1;
    }
 
@@ -227,12 +233,14 @@
 {
    struct ra_reg *reg1 = &regs->regs[r1];
 
-   if (reg1->conflict_list_size == reg1->num_conflicts) {
-      reg1->conflict_list_size *= 2;
-      reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list,
-				     unsigned int, reg1->conflict_list_size);
+   if (reg1->conflict_list) {
+      if (reg1->conflict_list_size == reg1->num_conflicts) {
+         reg1->conflict_list_size *= 2;
+         reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list,
+                                        unsigned int, reg1->conflict_list_size);
+      }
+      reg1->conflict_list[reg1->num_conflicts++] = r2;
    }
-   reg1->conflict_list[reg1->num_conflicts++] = r2;
    BITSET_SET(reg1->conflicts, r2);
 }
 
@@ -255,7 +263,7 @@
  */
 void
 ra_add_transitive_reg_conflict(struct ra_regs *regs,
-			       unsigned int base_reg, unsigned int reg)
+                               unsigned int base_reg, unsigned int reg)
 {
    unsigned int i;
 
@@ -266,13 +274,37 @@
    }
 }
 
+/**
+ * Makes every conflict on the given register transitive.  In other words,
+ * every register that conflicts with r will now conflict with every other
+ * register conflicting with r.
+ *
+ * This can simplify code for setting up multiple register classes
+ * which are aggregates of some base hardware registers, compared to
+ * explicitly using ra_add_reg_conflict.
+ */
+void
+ra_make_reg_conflicts_transitive(struct ra_regs *regs, unsigned int r)
+{
+   struct ra_reg *reg = &regs->regs[r];
+   BITSET_WORD tmp;
+   int c;
+
+   BITSET_FOREACH_SET(c, tmp, reg->conflicts, regs->count) {
+      struct ra_reg *other = &regs->regs[c];
+      unsigned i;
+      for (i = 0; i < BITSET_WORDS(regs->count); i++)
+         other->conflicts[i] |= reg->conflicts[i];
+   }
+}
+
 unsigned int
 ra_alloc_reg_class(struct ra_regs *regs)
 {
    struct ra_class *class;
 
    regs->classes = reralloc(regs->regs, regs->classes, struct ra_class *,
-			    regs->class_count + 1);
+                            regs->class_count + 1);
 
    class = rzalloc(regs, struct ra_class);
    regs->classes[regs->class_count] = class;
@@ -319,36 +351,40 @@
       for (b = 0; b < regs->class_count; b++) {
          for (c = 0; c < regs->class_count; c++) {
             regs->classes[b]->q[c] = q_values[b][c];
-	 }
+         }
       }
-      return;
-   }
+   } else {
+      /* Compute, for each class B and C, how many regs of B an
+       * allocation to C could conflict with.
+       */
+      for (b = 0; b < regs->class_count; b++) {
+         for (c = 0; c < regs->class_count; c++) {
+            unsigned int rc;
+            int max_conflicts = 0;
 
-   /* Compute, for each class B and C, how many regs of B an
-    * allocation to C could conflict with.
-    */
-   for (b = 0; b < regs->class_count; b++) {
-      for (c = 0; c < regs->class_count; c++) {
-	 unsigned int rc;
-	 int max_conflicts = 0;
-
-	 for (rc = 0; rc < regs->count; rc++) {
-	    int conflicts = 0;
-	    unsigned int i;
-
-            if (!reg_belongs_to_class(rc, regs->classes[c]))
-	       continue;
-
-	    for (i = 0; i < regs->regs[rc].num_conflicts; i++) {
-	       unsigned int rb = regs->regs[rc].conflict_list[i];
-	       if (reg_belongs_to_class(rb, regs->classes[b]))
-		  conflicts++;
-	    }
-	    max_conflicts = MAX2(max_conflicts, conflicts);
-	 }
-	 regs->classes[b]->q[c] = max_conflicts;
+            for (rc = 0; rc < regs->count; rc++) {
+               int conflicts = 0;
+               unsigned int i;
+
+               if (!reg_belongs_to_class(rc, regs->classes[c]))
+                  continue;
+
+               for (i = 0; i < regs->regs[rc].num_conflicts; i++) {
+                  unsigned int rb = regs->regs[rc].conflict_list[i];
+                  if (reg_belongs_to_class(rb, regs->classes[b]))
+                     conflicts++;
+               }
+               max_conflicts = MAX2(max_conflicts, conflicts);
+            }
+            regs->classes[b]->q[c] = max_conflicts;
+         }
       }
    }
+
+   for (b = 0; b < regs->count; b++) {
+      ralloc_free(regs->regs[b].conflict_list);
+      regs->regs[b].conflict_list = NULL;
+   }
 }
 
 static void
@@ -406,14 +442,14 @@
 
 void
 ra_set_node_class(struct ra_graph *g,
-		  unsigned int n, unsigned int class)
+                  unsigned int n, unsigned int class)
 {
    g->nodes[n].class = class;
 }
 
 void
 ra_add_node_interference(struct ra_graph *g,
-			 unsigned int n1, unsigned int n2)
+                         unsigned int n1, unsigned int n2)
 {
    if (!BITSET_TEST(g->nodes[n1].adjacency, n2)) {
       ra_add_node_adjacency(g, n1, n2);
@@ -441,7 +477,7 @@
 
       if (n != n2 && !g->nodes[n2].in_stack) {
          assert(g->nodes[n2].q_total >= g->regs->classes[n2_class]->q[n_class]);
-	 g->nodes[n2].q_total -= g->regs->classes[n2_class]->q[n_class];
+         g->nodes[n2].q_total -= g->regs->classes[n2_class]->q[n_class];
       }
    }
 }
@@ -648,7 +684,7 @@
       float cost = g->nodes[n].spill_cost;
       float benefit;
 
-      if (cost <= 0.0)
+      if (cost <= 0.0f)
 	 continue;
 
       if (g->nodes[n].in_stack)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/util/register_allocate.h mesa-11.0.0~git20150916+11.0.c4bae579/src/util/register_allocate.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/util/register_allocate.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/util/register_allocate.h	2015-09-16 14:36:10.000000000 +0000
@@ -44,13 +44,15 @@
  * registers, such as aligned register pairs that conflict with the
  * two real registers from which they are composed.
  */
-struct ra_regs *ra_alloc_reg_set(void *mem_ctx, unsigned int count);
+struct ra_regs *ra_alloc_reg_set(void *mem_ctx, unsigned int count,
+                                 bool need_conflict_lists);
 void ra_set_allocate_round_robin(struct ra_regs *regs);
 unsigned int ra_alloc_reg_class(struct ra_regs *regs);
 void ra_add_reg_conflict(struct ra_regs *regs,
 			 unsigned int r1, unsigned int r2);
 void ra_add_transitive_reg_conflict(struct ra_regs *regs,
 				    unsigned int base_reg, unsigned int reg);
+void ra_make_reg_conflicts_transitive(struct ra_regs *regs, unsigned int reg);
 void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int reg);
 void ra_set_num_conflicts(struct ra_regs *regs, unsigned int class_a,
                           unsigned int class_b, unsigned int num_conflicts);
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/util/rounding.h mesa-11.0.0~git20150916+11.0.c4bae579/src/util/rounding.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/util/rounding.h	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/util/rounding.h	2015-09-16 14:36:10.000000000 +0000
@@ -21,7 +21,18 @@
  * IN THE SOFTWARE.
  */
 
-#include <math.h>
+#ifndef _ROUNDING_H
+#define _ROUNDING_H
+
+#include "c99_math.h"
+
+#include <limits.h>
+#include <stdint.h>
+
+#ifdef __x86_64__
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#endif
 
 #ifdef __SSE4_1__
 #include <smmintrin.h>
@@ -76,3 +87,45 @@
    return rint(x);
 #endif
 }
+
+/**
+ * \brief Rounds \c x to the nearest integer, with ties to the even integer,
+ * and returns the value as a long int.
+ */
+static inline long
+_mesa_lroundevenf(float x)
+{
+#ifdef __x86_64__
+#if LONG_MAX == INT64_MAX
+   return _mm_cvtss_si64(_mm_load_ss(&x));
+#elif LONG_MAX == INT32_MAX
+   return _mm_cvtss_si32(_mm_load_ss(&x));
+#else
+#error "Unsupported long size"
+#endif
+#else
+   return lrintf(x);
+#endif
+}
+
+/**
+ * \brief Rounds \c x to the nearest integer, with ties to the even integer,
+ * and returns the value as a long int.
+ */
+static inline long
+_mesa_lroundeven(double x)
+{
+#ifdef __x86_64__
+#if LONG_MAX == INT64_MAX
+   return _mm_cvtsd_si64(_mm_load_sd(&x));
+#elif LONG_MAX == INT32_MAX
+   return _mm_cvtsd_si32(_mm_load_sd(&x));
+#else
+#error "Unsupported long size"
+#endif
+#else
+   return lrint(x);
+#endif
+}
+
+#endif
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/util/SConscript mesa-11.0.0~git20150916+11.0.c4bae579/src/util/SConscript
--- mesa-10.6.5~git20150829+10.6.fa342251/src/util/SConscript	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/util/SConscript	2015-09-16 14:36:10.000000000 +0000
@@ -49,3 +49,10 @@
 )
 alias = env.Alias("u_atomic_test", u_atomic_test, u_atomic_test[0].abspath)
 AlwaysBuild(alias)
+
+roundeven_test = env.Program(
+    target = 'roundeven_test',
+    source = ['roundeven_test.c'],
+)
+alias = env.Alias("roundeven_test", roundeven_test, roundeven_test[0].abspath)
+AlwaysBuild(alias)
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/util/strtod.c mesa-11.0.0~git20150916+11.0.c4bae579/src/util/strtod.c
--- mesa-10.6.5~git20150829+10.6.fa342251/src/util/strtod.c	1970-01-01 00:00:00.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/util/strtod.c	2015-09-16 14:36:10.000000000 +0000
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <stdlib.h>
+
+#ifdef _GNU_SOURCE
+#include <locale.h>
+#ifdef HAVE_XLOCALE_H
+#include <xlocale.h>
+static locale_t loc;
+#endif
+#endif
+
+#include "strtod.h"
+
+
+void
+_mesa_locale_init(void)
+{
+#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
+   loc = newlocale(LC_CTYPE_MASK, "C", NULL);
+#endif
+}
+
+void
+_mesa_locale_fini(void)
+{
+#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
+   freelocale(loc);
+#endif
+}
+
+/**
+ * Wrapper around strtod which uses the "C" locale so the decimal
+ * point is always '.'
+ */
+double
+_mesa_strtod(const char *s, char **end)
+{
+#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
+   return strtod_l(s, end, loc);
+#else
+   return strtod(s, end);
+#endif
+}
+
+
+/**
+ * Wrapper around strtof which uses the "C" locale so the decimal
+ * point is always '.'
+ */
+float
+_mesa_strtof(const char *s, char **end)
+{
+#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
+   return strtof_l(s, end, loc);
+#elif defined(HAVE_STRTOF)
+   return strtof(s, end);
+#else
+   return (float) strtod(s, end);
+#endif
+}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/util/strtod.cpp mesa-11.0.0~git20150916+11.0.c4bae579/src/util/strtod.cpp
--- mesa-10.6.5~git20150829+10.6.fa342251/src/util/strtod.cpp	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/util/strtod.cpp	1970-01-01 00:00:00.000000000 +0000
@@ -1,75 +0,0 @@
-/*
- * Copyright 2010 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include <stdlib.h>
-
-#ifdef _GNU_SOURCE
-#include <locale.h>
-#ifdef HAVE_XLOCALE_H
-#include <xlocale.h>
-#endif
-#endif
-
-#include "strtod.h"
-
-
-#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
-static struct locale_initializer {
-   locale_initializer() { loc = newlocale(LC_CTYPE_MASK, "C", NULL); }
-   locale_t loc;
-} loc_init;
-#endif
-
-/**
- * Wrapper around strtod which uses the "C" locale so the decimal
- * point is always '.'
- */
-double
-_mesa_strtod(const char *s, char **end)
-{
-#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
-   return strtod_l(s, end, loc_init.loc);
-#else
-   return strtod(s, end);
-#endif
-}
-
-
-/**
- * Wrapper around strtof which uses the "C" locale so the decimal
- * point is always '.'
- */
-float
-_mesa_strtof(const char *s, char **end)
-{
-#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
-   return strtof_l(s, end, loc_init.loc);
-#elif defined(HAVE_STRTOF)
-   return strtof(s, end);
-#else
-   return (float) strtod(s, end);
-#endif
-}
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/src/util/strtod.h mesa-11.0.0~git20150916+11.0.c4bae579/src/util/strtod.h
--- mesa-10.6.5~git20150829+10.6.fa342251/src/util/strtod.h	2015-01-14 13:02:07.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/src/util/strtod.h	2015-09-16 14:36:10.000000000 +0000
@@ -31,6 +31,12 @@
 extern "C" {
 #endif
 
+extern void
+_mesa_locale_init(void);
+
+extern void
+_mesa_locale_fini(void);
+
 extern double
 _mesa_strtod(const char *s, char **end);
 
diff -Nru mesa-10.6.5~git20150829+10.6.fa342251/VERSION mesa-11.0.0~git20150916+11.0.c4bae579/VERSION
--- mesa-10.6.5~git20150829+10.6.fa342251/VERSION	2015-08-29 13:59:39.000000000 +0000
+++ mesa-11.0.0~git20150916+11.0.c4bae579/VERSION	2015-09-16 14:36:08.000000000 +0000
@@ -1 +1 @@
-10.6.5
+11.0.0